[clfft] 98/128: simplifying transform kernels for 3, 5, 7 combined sizes

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:43 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 7e6a2df25352cdf31a0880d4ab9d2ffe05fe26cc
Author: bragadeesh <bragadeesh.natarajan at amd.com>
Date:   Wed Oct 7 12:36:19 2015 -0700

    simplifying transform kernels for 3,5,7 combined sizes
---
 src/library/generator.stockham.cpp |  3 +++
 src/library/plan.cpp               |  2 +-
 src/library/plan.h                 | 14 ++++++++++----
 src/library/transform.cpp          |  2 +-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 6ee21d1..c3fa5f9 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -2746,6 +2746,9 @@ namespace StockhamGenerator
 			// Set half lds for real transforms
 			halfLds = r2c2r ? true : halfLds;
 
+			// Set half lds for radix7
+			halfLds = (length % 7 == 0) ? true : halfLds;
+
 			linearRegs = halfLds;
 
 			realSpecial = params.fft_realSpecial;
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 43e37c2..62a3da4 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -494,7 +494,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 	{
 	case CLFFT_1D:
 		{
-			if ( fftPlan->length[0] > Large1DThreshold )
+			if ( !Is1DPossible(fftPlan->length[0], Large1DThreshold) )
 			{
 				size_t clLengths[] = { 1, 1, 0 };
 				size_t in_1d, in_x, count;
diff --git a/src/library/plan.h b/src/library/plan.h
index c5b0b58..ef7937f 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -516,10 +516,6 @@ public:
 
 	clfftStatus GetMax1DLength (size_t *longest ) const;
 
-	void ResetBinarySizes();
-	void ResetBinaries();
-
-	clfftStatus CompressPlan();
 	clfftStatus ConstructAndEnqueueConstantBuffers( cl_command_queue* commQueueFFT );
 
 	clfftStatus GetEnvelope (const FFTEnvelope **) const;
@@ -533,5 +529,15 @@ public:
 	}
 };
 
+static bool Is1DPossible(size_t length, size_t large1DThreshold)
+{
+	if (length > large1DThreshold)
+		return false;
+	if ( (length%7 == 0) && (length%5 == 0) && (length%3 == 0) )
+		return false;
+
+	return true;
+}
+
 #endif // AMD_CLFFT_plan_H
 
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index a45a2bf..d88aab4 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -126,7 +126,7 @@ clfftStatus clfftEnqueueTransform(
 	{
 		case CLFFT_1D:
 		{
-			if (fftPlan->length[0] <= Large1DThreshold)
+			if ( Is1DPossible(fftPlan->length[0], Large1DThreshold) )
 				break;
 
 			if( ( fftPlan->inputLayout == CLFFT_REAL ) && ( fftPlan->planTZ != 0) )

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list