[clfft] 08/10: fixing r2c issues in large lengths

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Wed Mar 30 15:49:30 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 9649fd85c1fa7d4f355d784379d36135add5ae66
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Fri Mar 25 15:14:57 2016 -0700

    fixing r2c issues in large lengths
---
 src/library/generator.stockham.cpp      |  2 +-
 src/library/generator.transpose.gcn.cpp | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index a2ddfa9..3d425a8 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -4010,7 +4010,7 @@ namespace StockhamGenerator
 				std::string rw, me;
 
 				if(r2c2r && !rcSimple)	rw = "rw, b, ";
-				else					rw = (numTrans > 1) ? "rw, b, " : "1, b, ";
+				else					rw = ((numTrans > 1) || realSpecial) ? "rw, b, " : "1, b, ";
 
 				if(numTrans > 1)	{ me += "me%"; me += SztToStr(workGroupSizePerTrans); me += ", "; }
 				else				{ me += "me, "; }
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 808bdf7..6fc7472 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -756,20 +756,20 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				break;
 			}
 
-			if(branchingInAny)
-			{
-				clKernWrite( transKernel, 9 ) << "}" << std::endl;
-				clKernWrite( transKernel, 9 ) << std::endl;
-			}
-
 			clKernWrite( transKernel, 9 ) << "// Transpose of Tile data happens here" << std::endl;
 
-
 			// If requested, generate the Twiddle math to multiply constant values
 			if( params.fft_3StepTwiddle )
 				genTwiddleMath( params, transKernel, dtComplex, fwd );
 
 			clKernWrite( transKernel, 9 ) << "lds[ xInd ][ yInd ] = tmp; " << std::endl;
+
+			if (branchingInAny)
+			{
+				clKernWrite(transKernel, 9) << "}" << std::endl;
+				clKernWrite(transKernel, 9) << std::endl;
+			}
+
 			clKernWrite( transKernel, 6 ) << "}" << std::endl;
 
 			if(branchingInAny)
@@ -915,7 +915,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 						if(params.fft_realSpecial)
 						{
 							clKernWrite( transKernel, 9 ) << "if( ((" << wIndexY << " == " << wIndexXEnd - 1 << ") && (" <<
-								wIndexX << " < 1) && (" << limitToWGForRealSpecial << " == " << cornerGroupX << ")) ";
+								wIndexX << " < 1) && (" << limitToWGForRealSpecial << " == 0)) ";
 							if(wIndexXEnd > 1)
 							{
 								clKernWrite( transKernel, 0 ) << "|| (" << wIndexY << " < " << wIndexXEnd - 1 << ") )" << std::endl;
@@ -937,7 +937,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 						if(params.fft_realSpecial)
 						{
 							clKernWrite( transKernel, 9 ) << "if( ((" << wIndexX << " == " << wIndexYEnd - 1 << ") && (" <<
-								wIndexY << " < 1) && (" << limitToWGForRealSpecial << " == " << cornerGroupY << ")) ";
+								wIndexY << " < 1) && (" << limitToWGForRealSpecial << " == 0)) ";
 							if(wIndexYEnd > 1)
 							{
 								clKernWrite( transKernel, 0 ) << "|| (" << wIndexX << " < " << wIndexYEnd - 1 << ") )" << std::endl;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list