[clfft] 40/128: removing some unneeded branch blocks for small size transposes

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:36 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 4423f3d73b8c9a3988b908403ae20b98fd5a4903
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Fri Aug 21 18:46:08 2015 -0500

    removing some unneeded branch blocks for small size transposes
---
 src/library/generator.transpose.gcn.cpp | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 4069824..4efa281 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -383,10 +383,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
     clKernWrite( transKernel, 3 ) << "size_t y;" << std::endl;
     clKernWrite( transKernel, 0 ) << "} Tile;" << std::endl << std::endl;
 
-    // This detects whether the input matrix is square
-    bool notSquare = ( params.fft_N[ 0 ] == params.fft_N[ 1 ] ) ? false : true;
-
-    if( notSquare && (params.fft_placeness == CLFFT_INPLACE) )
+    if( params.fft_placeness == CLFFT_INPLACE )
         return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
 
 
@@ -535,18 +532,24 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				}
 				else if(i == 1)
 				{
+					if(!cornerGroupY) continue;
+
 					clKernWrite( transKernel, 3 ) << "else if( " << gIndexX << " == " << 
 						cornerGroupX << " )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else if(i == 2)
 				{
+					if(!cornerGroupX) continue;
+
 					clKernWrite( transKernel, 3 ) << "else if( " << gIndexY << " == " <<
 						cornerGroupY << " )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else
 				{
+					if( (!cornerGroupX) || (!cornerGroupY) ) continue;
+
 					clKernWrite( transKernel, 3 ) << "else" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
@@ -568,6 +571,8 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				}
 				else
 				{
+					if( (!cornerGroupX) || (!cornerGroupY) ) continue;
+
 					clKernWrite( transKernel, 3 ) << "else" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
@@ -708,18 +713,24 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				}
 				else if(i == 1)
 				{
+					if(!cornerGroupY) continue;
+
 					clKernWrite( transKernel, 3 ) << "else if( " << gIndexX << " == " << 
 						cornerGroupX << " )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else if(i == 2)
 				{
+					if(!cornerGroupX) continue;
+
 					clKernWrite( transKernel, 3 ) << "else if( " << gIndexY << " == " <<
 						cornerGroupY << " )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else
 				{
+					if( (!cornerGroupX) || (!cornerGroupY) ) continue;
+
 					clKernWrite( transKernel, 3 ) << "else" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
@@ -741,6 +752,8 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				}
 				else
 				{
+					if( (!cornerGroupX) || (!cornerGroupY) ) continue;
+
 					clKernWrite( transKernel, 3 ) << "else" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list