[clfft] 98/128: simplifying transform kernels for 3, 5, 7 combined sizes
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:43 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 7e6a2df25352cdf31a0880d4ab9d2ffe05fe26cc
Author: bragadeesh <bragadeesh.natarajan at amd.com>
Date: Wed Oct 7 12:36:19 2015 -0700
simplifying transform kernels for 3,5,7 combined sizes
---
src/library/generator.stockham.cpp | 3 +++
src/library/plan.cpp | 2 +-
src/library/plan.h | 14 ++++++++++----
src/library/transform.cpp | 2 +-
4 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 6ee21d1..c3fa5f9 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -2746,6 +2746,9 @@ namespace StockhamGenerator
// Set half lds for real transforms
halfLds = r2c2r ? true : halfLds;
+ // Set half lds for radix7
+ halfLds = (length % 7 == 0) ? true : halfLds;
+
linearRegs = halfLds;
realSpecial = params.fft_realSpecial;
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 43e37c2..62a3da4 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -494,7 +494,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
{
case CLFFT_1D:
{
- if ( fftPlan->length[0] > Large1DThreshold )
+ if ( !Is1DPossible(fftPlan->length[0], Large1DThreshold) )
{
size_t clLengths[] = { 1, 1, 0 };
size_t in_1d, in_x, count;
diff --git a/src/library/plan.h b/src/library/plan.h
index c5b0b58..ef7937f 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -516,10 +516,6 @@ public:
clfftStatus GetMax1DLength (size_t *longest ) const;
- void ResetBinarySizes();
- void ResetBinaries();
-
- clfftStatus CompressPlan();
clfftStatus ConstructAndEnqueueConstantBuffers( cl_command_queue* commQueueFFT );
clfftStatus GetEnvelope (const FFTEnvelope **) const;
@@ -533,5 +529,15 @@ public:
}
};
+static bool Is1DPossible(size_t length, size_t large1DThreshold)
+{
+ if (length > large1DThreshold)
+ return false;
+ if ( (length%7 == 0) && (length%5 == 0) && (length%3 == 0) )
+ return false;
+
+ return true;
+}
+
#endif // AMD_CLFFT_plan_H
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index a45a2bf..d88aab4 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -126,7 +126,7 @@ clfftStatus clfftEnqueueTransform(
{
case CLFFT_1D:
{
- if (fftPlan->length[0] <= Large1DThreshold)
+ if ( Is1DPossible(fftPlan->length[0], Large1DThreshold) )
break;
if( ( fftPlan->inputLayout == CLFFT_REAL ) && ( fftPlan->planTZ != 0) )
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list