[clfft] 08/10: fixing r2c issues in large lengths
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Wed Mar 30 15:49:30 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 9649fd85c1fa7d4f355d784379d36135add5ae66
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Fri Mar 25 15:14:57 2016 -0700
fixing r2c issues in large lengths
---
src/library/generator.stockham.cpp | 2 +-
src/library/generator.transpose.gcn.cpp | 18 +++++++++---------
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index a2ddfa9..3d425a8 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -4010,7 +4010,7 @@ namespace StockhamGenerator
std::string rw, me;
if(r2c2r && !rcSimple) rw = "rw, b, ";
- else rw = (numTrans > 1) ? "rw, b, " : "1, b, ";
+ else rw = ((numTrans > 1) || realSpecial) ? "rw, b, " : "1, b, ";
if(numTrans > 1) { me += "me%"; me += SztToStr(workGroupSizePerTrans); me += ", "; }
else { me += "me, "; }
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 808bdf7..6fc7472 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -756,20 +756,20 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
break;
}
- if(branchingInAny)
- {
- clKernWrite( transKernel, 9 ) << "}" << std::endl;
- clKernWrite( transKernel, 9 ) << std::endl;
- }
-
clKernWrite( transKernel, 9 ) << "// Transpose of Tile data happens here" << std::endl;
-
// If requested, generate the Twiddle math to multiply constant values
if( params.fft_3StepTwiddle )
genTwiddleMath( params, transKernel, dtComplex, fwd );
clKernWrite( transKernel, 9 ) << "lds[ xInd ][ yInd ] = tmp; " << std::endl;
+
+ if (branchingInAny)
+ {
+ clKernWrite(transKernel, 9) << "}" << std::endl;
+ clKernWrite(transKernel, 9) << std::endl;
+ }
+
clKernWrite( transKernel, 6 ) << "}" << std::endl;
if(branchingInAny)
@@ -915,7 +915,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
if(params.fft_realSpecial)
{
clKernWrite( transKernel, 9 ) << "if( ((" << wIndexY << " == " << wIndexXEnd - 1 << ") && (" <<
- wIndexX << " < 1) && (" << limitToWGForRealSpecial << " == " << cornerGroupX << ")) ";
+ wIndexX << " < 1) && (" << limitToWGForRealSpecial << " == 0)) ";
if(wIndexXEnd > 1)
{
clKernWrite( transKernel, 0 ) << "|| (" << wIndexY << " < " << wIndexXEnd - 1 << ") )" << std::endl;
@@ -937,7 +937,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
if(params.fft_realSpecial)
{
clKernWrite( transKernel, 9 ) << "if( ((" << wIndexX << " == " << wIndexYEnd - 1 << ") && (" <<
- wIndexY << " < 1) && (" << limitToWGForRealSpecial << " == " << cornerGroupY << ")) ";
+ wIndexY << " < 1) && (" << limitToWGForRealSpecial << " == 0)) ";
if(wIndexYEnd > 1)
{
clKernWrite( transKernel, 0 ) << "|| (" << wIndexX << " < " << wIndexYEnd - 1 << ") )" << std::endl;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list