[clfft] 80/107: clarifying logic of transposes w.r.t transOutHorizontal

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:39 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit b03c26b9a891841cf7567aaf83f1ee9fe8dd1c29
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Wed Apr 15 18:42:58 2015 -0500

    clarifying logic of transposes w.r.t transOutHorizontal
---
 src/library/generator.transpose.gcn.cpp | 87 +++++++++++++++++++--------------
 src/library/plan.cpp                    |  1 +
 2 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index ecf73bc..78c2111 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -430,9 +430,9 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 
 		size_t numGroupsTemp;
 		if(params.transOutHorizontal)
-			numGroupsTemp = DivRoundingUp( params.fft_N[0], lwSize.y / reShapeFactor * loopCount );
+			numGroupsTemp = DivRoundingUp( params.fft_N[0], blockSize.x );
 		else
-			numGroupsTemp = DivRoundingUp( params.fft_N[1], lwSize.y / reShapeFactor * loopCount );
+			numGroupsTemp = DivRoundingUp( params.fft_N[1], blockSize.y );
 
 		clKernWrite( transKernel, 3 ) << "const size_t numGroupsY_1" << " = " << numGroupsTemp << ";" << std::endl;
 		for(int i = 2; i < params.fft_DataDim - 1; i++)
@@ -444,7 +444,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 
 		// Generate the amount of local data share we need
 		// Assumption: Even for planar data, we will still store values in LDS as interleaved
-		tile ldsSize = { lwSize.x * reShapeFactor, lwSize.y / reShapeFactor * loopCount };
+		tile ldsSize = { blockSize.x, blockSize.y };
 		switch( params.fft_outputLayout )
 		{
 		case CLFFT_COMPLEX_INTERLEAVED:
@@ -486,18 +486,31 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 		clKernWrite( transKernel, 3 ) << "rowSizeinUnits = " << params.fft_inStride[ 1 ] << ";" << std::endl; 
 		clKernWrite( transKernel, 3 ) << std::endl << std::endl;
 
-		bool branchingInX = params.transOutHorizontal ? ((params.fft_N[1] % blockSize.x) != 0) : ((params.fft_N[0] % blockSize.x) != 0);
-		bool branchingInY = params.transOutHorizontal ? ((params.fft_N[0] % blockSize.y) != 0) : ((params.fft_N[1] % blockSize.y) != 0);
-		bool branchingInBoth = branchingInX && branchingInY;
-		bool branchingInAny = branchingInX || branchingInY;
+		//
+		// Group index traversal is logical where X direction is horizontal in input buffer and vertical in output buffer
+		// when transOutHorizontal is enabled X direction is vertical in input buffer and horizontal in output buffer
+		// Not to be confused within a tile, where X is horizontal in input and vertical in output always
+		
+
+
+		bool branchingInGroupX = params.transOutHorizontal ? ((params.fft_N[1] % blockSize.y) != 0) : ((params.fft_N[0] % blockSize.x) != 0);
+		bool branchingInGroupY = params.transOutHorizontal ? ((params.fft_N[0] % blockSize.x) != 0) : ((params.fft_N[1] % blockSize.y) != 0);
+		bool branchingInBoth = branchingInGroupX && branchingInGroupY;
+		bool branchingInAny = branchingInGroupX || branchingInGroupY;
 
 		size_t branchBlocks = branchingInBoth ? 4 : ( branchingInAny ? 2 : 1 );
 
-		size_t validX = params.transOutHorizontal ? params.fft_N[0] % blockSize.y : params.fft_N[0] % blockSize.x;
-		size_t validY = params.transOutHorizontal ? params.fft_N[1] % blockSize.x : params.fft_N[1] % blockSize.y;
+		size_t cornerGroupX = params.transOutHorizontal ? (params.fft_N[1] / blockSize.y) : (params.fft_N[0] / blockSize.x);
+		size_t cornerGroupY = params.transOutHorizontal ? (params.fft_N[0] / blockSize.x) : (params.fft_N[1] / blockSize.y);
 
-		std::string gIndexX = params.transOutHorizontal ? "currDimIndex" : "groupIndex.x";
-		std::string gIndexY = params.transOutHorizontal ? "groupIndex.x" : "currDimIndex";		
+		std::string gIndexX = "groupIndex.x"; //params.transOutHorizontal ? "currDimIndex" : "groupIndex.x";
+		std::string gIndexY = "currDimIndex"; //params.transOutHorizontal ? "groupIndex.x" : "currDimIndex";		
+		
+		std::string wIndexX = params.transOutHorizontal ? "yInd" : "xInd";
+		std::string wIndexY = params.transOutHorizontal ? "xInd" : "yInd";
+				
+		size_t wIndexXEnd = params.transOutHorizontal ? params.fft_N[1] % blockSize.y : params.fft_N[0] % blockSize.x;
+		size_t wIndexYEnd = params.transOutHorizontal ? params.fft_N[0] % blockSize.x : params.fft_N[1] % blockSize.y;
 
 
 		for(size_t i = 0; i<branchBlocks; i++)
@@ -506,20 +519,20 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				if(i == 0)
 				{
 					clKernWrite( transKernel, 3 ) << "if( (" << gIndexX << " == " << 
-						(params.fft_N[0] / blockSize.x) << ") && (" << gIndexY << " == " <<
-						(params.fft_N[1] / blockSize.y) << ") )" << std::endl;
+						cornerGroupX << ") && (" << gIndexY << " == " <<
+						cornerGroupY << ") )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else if(i == 1)
 				{
 					clKernWrite( transKernel, 3 ) << "else if( " << gIndexX << " == " << 
-						(params.fft_N[0] / blockSize.x) << " )" << std::endl;
+						cornerGroupX << " )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else if(i == 2)
 				{
 					clKernWrite( transKernel, 3 ) << "else if( " << gIndexY << " == " <<
-						(params.fft_N[1] / blockSize.y) << " )" << std::endl;
+						cornerGroupY << " )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else
@@ -530,16 +543,16 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			else if(branchingInAny)
 				if(i == 0)
 				{
-					if(branchingInX)
+					if(branchingInGroupX)
 					{
 						clKernWrite( transKernel, 3 ) << "if( " << gIndexX << " == " << 
-							(params.fft_N[0] / blockSize.x) << " )" << std::endl;
+							cornerGroupX << " )" << std::endl;
 						clKernWrite( transKernel, 3 ) << "{" << std::endl;
 					}
 					else
 					{
 						clKernWrite( transKernel, 3 ) << "if( " << gIndexY << " == " <<
-							(params.fft_N[1] / blockSize.y) << " )" << std::endl;
+							cornerGroupY << " )" << std::endl;
 						clKernWrite( transKernel, 3 ) << "{" << std::endl;
 					}
 				}
@@ -566,19 +579,19 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				if(i == 0)
 				{
 					clKernWrite( transKernel, 9 ) << std::endl;
-					clKernWrite( transKernel, 9 ) << "if( (xInd < " << validX << ") && (yInd < " << validY << ") )" << std::endl;
+					clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << "< " << wIndexXEnd << ") && (" << wIndexY << " < " << wIndexYEnd << ") )" << std::endl;
 					clKernWrite( transKernel, 9 ) << "{" << std::endl;
 				}
 				else if(i == 1)
 				{
 					clKernWrite( transKernel, 9 ) << std::endl;
-					clKernWrite( transKernel, 9 ) << "if( (xInd < " << validX << ") )" << std::endl;
+					clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexXEnd << ") )" << std::endl;
 					clKernWrite( transKernel, 9 ) << "{" << std::endl;
 				}
 				else if(i == 2)
 				{
 					clKernWrite( transKernel, 9 ) << std::endl;
-					clKernWrite( transKernel, 9 ) << "if( (yInd < " << validY << ") )" << std::endl;
+					clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexYEnd << ") )" << std::endl;
 					clKernWrite( transKernel, 9 ) << "{" << std::endl;
 				}
 				else
@@ -588,16 +601,16 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			{
 				if(i == 0)
 				{
-					if(branchingInX)
+					if(branchingInGroupX)
 					{
 						clKernWrite( transKernel, 9 ) << std::endl;
-						clKernWrite( transKernel, 9 ) << "if( (xInd < " << validX << ") )" << std::endl;
+						clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexXEnd << ") )" << std::endl;
 						clKernWrite( transKernel, 9 ) << "{" << std::endl;
 					}
 					else
 					{
 						clKernWrite( transKernel, 9 ) << std::endl;
-						clKernWrite( transKernel, 9 ) << "if( (yInd < " << validY << ") )" << std::endl;
+						clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexYEnd << ") )" << std::endl;
 						clKernWrite( transKernel, 9 ) << "{" << std::endl;
 					}
 				}
@@ -674,20 +687,20 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				if(i == 0)
 				{
 					clKernWrite( transKernel, 3 ) << "if( (" << gIndexX << " == " << 
-						(params.fft_N[0] / blockSize.x) << ") && (" << gIndexY << " == " <<
-						(params.fft_N[1] / blockSize.y) << ") )" << std::endl;
+						cornerGroupX << ") && (" << gIndexY << " == " <<
+						cornerGroupY << ") )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else if(i == 1)
 				{
 					clKernWrite( transKernel, 3 ) << "else if( " << gIndexX << " == " << 
-						(params.fft_N[0] / blockSize.x) << " )" << std::endl;
+						cornerGroupX << " )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else if(i == 2)
 				{
 					clKernWrite( transKernel, 3 ) << "else if( " << gIndexY << " == " <<
-						(params.fft_N[1] / blockSize.y) << " )" << std::endl;
+						cornerGroupY << " )" << std::endl;
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 				else
@@ -698,16 +711,16 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			else if(branchingInAny)
 				if(i == 0)
 				{
-					if(branchingInX)
+					if(branchingInGroupX)
 					{
 						clKernWrite( transKernel, 3 ) << "if( " << gIndexX << " == " << 
-							(params.fft_N[0] / blockSize.x) << " )" << std::endl;
+							cornerGroupX << " )" << std::endl;
 						clKernWrite( transKernel, 3 ) << "{" << std::endl;
 					}
 					else
 					{
 						clKernWrite( transKernel, 3 ) << "if( " << gIndexY << " == " <<
-							(params.fft_N[1] / blockSize.y) << " )" << std::endl;
+							cornerGroupY << " )" << std::endl;
 						clKernWrite( transKernel, 3 ) << "{" << std::endl;
 					}
 				}
@@ -730,20 +743,20 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				if(i == 0)
 				{
 					clKernWrite( transKernel, 9 ) << std::endl;
-					clKernWrite( transKernel, 9 ) << "if( (xInd < " << validY << ") && (yInd < " << validX << ") )" << std::endl;
+					clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") && (" << wIndexX << " < " << wIndexYEnd << ") )" << std::endl;
 					clKernWrite( transKernel, 9 ) << "{" << std::endl;
 				}
 				else if(i == 1)
 				{
 					clKernWrite( transKernel, 9 ) << std::endl;
-					clKernWrite( transKernel, 9 ) << "if( (yInd < " << validX << ") )" << std::endl;
+					clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") )" << std::endl;
 					clKernWrite( transKernel, 9 ) << "{" << std::endl;
 
 				}
 				else if(i == 2)
 				{
 					clKernWrite( transKernel, 9 ) << std::endl;
-					clKernWrite( transKernel, 9 ) << "if( (xInd < " << validY << ") )" << std::endl;
+					clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexYEnd << ") )" << std::endl;
 					clKernWrite( transKernel, 9 ) << "{" << std::endl;
 				}
 				else
@@ -753,16 +766,16 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			{
 				if(i == 0)
 				{
-					if(branchingInX)
+					if(branchingInGroupX)
 					{
 						clKernWrite( transKernel, 9 ) << std::endl;
-						clKernWrite( transKernel, 9 ) << "if( (yInd < " << validX << ") )" << std::endl;
+						clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") )" << std::endl;
 						clKernWrite( transKernel, 9 ) << "{" << std::endl;
 					}
 					else
 					{
 						clKernWrite( transKernel, 9 ) << std::endl;
-						clKernWrite( transKernel, 9 ) << "if( (xInd < " << validY << ") )" << std::endl;
+						clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexYEnd << ") )" << std::endl;
 						clKernWrite( transKernel, 9 ) << "{" << std::endl;
 					}
 				}
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index d7a9e0a..e23f5e5 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1910,6 +1910,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					trans1Plan->outStride[1]  = length1;
 					trans1Plan->iDist         = rowPlan->oDist;
 					trans1Plan->oDist		  = Nt*length1;
+					trans1Plan->transOutHorizontal = true;
 
 					trans1Plan->gen           = Transpose_GCN;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list