[clfft] 40/128: removing some unneeded branch blocks for small size transposes
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:36 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 4423f3d73b8c9a3988b908403ae20b98fd5a4903
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Fri Aug 21 18:46:08 2015 -0500
removing some unneeded branch blocks for small size transposes
---
src/library/generator.transpose.gcn.cpp | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 4069824..4efa281 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -383,10 +383,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
clKernWrite( transKernel, 3 ) << "size_t y;" << std::endl;
clKernWrite( transKernel, 0 ) << "} Tile;" << std::endl << std::endl;
- // This detects whether the input matrix is square
- bool notSquare = ( params.fft_N[ 0 ] == params.fft_N[ 1 ] ) ? false : true;
-
- if( notSquare && (params.fft_placeness == CLFFT_INPLACE) )
+ if( params.fft_placeness == CLFFT_INPLACE )
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
@@ -535,18 +532,24 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
}
else if(i == 1)
{
+ if(!cornerGroupY) continue;
+
clKernWrite( transKernel, 3 ) << "else if( " << gIndexX << " == " <<
cornerGroupX << " )" << std::endl;
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
else if(i == 2)
{
+ if(!cornerGroupX) continue;
+
clKernWrite( transKernel, 3 ) << "else if( " << gIndexY << " == " <<
cornerGroupY << " )" << std::endl;
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
else
{
+ if( (!cornerGroupX) || (!cornerGroupY) ) continue;
+
clKernWrite( transKernel, 3 ) << "else" << std::endl;
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
@@ -568,6 +571,8 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
}
else
{
+ if( (!cornerGroupX) || (!cornerGroupY) ) continue;
+
clKernWrite( transKernel, 3 ) << "else" << std::endl;
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
@@ -708,18 +713,24 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
}
else if(i == 1)
{
+ if(!cornerGroupY) continue;
+
clKernWrite( transKernel, 3 ) << "else if( " << gIndexX << " == " <<
cornerGroupX << " )" << std::endl;
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
else if(i == 2)
{
+ if(!cornerGroupX) continue;
+
clKernWrite( transKernel, 3 ) << "else if( " << gIndexY << " == " <<
cornerGroupY << " )" << std::endl;
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
else
{
+ if( (!cornerGroupX) || (!cornerGroupY) ) continue;
+
clKernWrite( transKernel, 3 ) << "else" << std::endl;
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
@@ -741,6 +752,8 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
}
else
{
+ if( (!cornerGroupX) || (!cornerGroupY) ) continue;
+
clKernWrite( transKernel, 3 ) << "else" << std::endl;
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list