[clfft] 20/21: making some more changes to better fix 2d perf issues

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Wed Mar 16 13:14:05 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 5c685c18d0dff7bcc0f511adcf2f10cddb1fdb65
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Tue Mar 15 11:24:10 2016 -0700

    making some more changes to better fix 2d perf issues
---
 src/library/plan.cpp      | 14 +++++++-------
 src/library/plan.h        |  4 ++++
 src/library/transform.cpp |  3 +--
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index d7a52dd..65b201a 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2190,8 +2190,8 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				//if (fftPlan->large2D) break;
 				//Performance show 512 is the good case with transpose
 				//if user want the result to be transposed, then we will.
-				if (fftPlan->length[0] < 512 && fftPlan->transposed == CLFFT_NOTRANSPOSE) break;
-				if (fftPlan->length[0] < 32) break;
+
+				if (fftPlan->length[0] < 64) break;
 				//x!=y case, we need tmp buffer, currently temp buffer only support interleaved format
 				//if (fftPlan->length[0] != fftPlan->length[1] && fftPlan->outputLayout == CLFFT_COMPLEX_PLANAR) break;
 				if (fftPlan->inStride[0] != 1 || fftPlan->outStride[0] != 1 ||
@@ -2253,8 +2253,8 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				size_t smallerDim = biggerDim == clLengths[0] ? clLengths[1] : clLengths[0];
 				size_t padding = 0;
 
-				bool xyflag = (clLengths[0]==clLengths[1]) ? false : true;
-				if (xyflag && fftPlan->tmpBufSize==0 && fftPlan->length.size()<=2)
+				fftPlan->transpose_in_2d_inplace = (clLengths[0]==clLengths[1]) ? true : false;
+				if ( (!fftPlan->transpose_in_2d_inplace) && fftPlan->tmpBufSize==0 && fftPlan->length.size()<=2 )
 				{
 					if ((smallerDim % 64 == 0) || (biggerDim % 64 == 0))
 						if(biggerDim > 512)
@@ -2284,7 +2284,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				transPlanX->iDist           = fftPlan->oDist;
 				transPlanX->transflag       = true;
 
-				if (xyflag)
+				if (!fftPlan->transpose_in_2d_inplace)
 				{
 					transPlanX->gen = Transpose_GCN;
 					transPlanX->outputLayout    = CLFFT_COMPLEX_INTERLEAVED;
@@ -2316,7 +2316,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				lockRAII* colLock	= NULL;
 				OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
 
-				if (xyflag)
+				if (!fftPlan->transpose_in_2d_inplace)
 				{
 					colPlan->inputLayout     = CLFFT_COMPLEX_INTERLEAVED;
 					colPlan->inStride[0]     = 1;
@@ -2382,7 +2382,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				lockRAII* transLockY	= NULL;
 				OPENCL_V( fftRepo.getPlan( fftPlan->planTY, transPlanY, transLockY ), _T( "fftRepo.getPlan failed" ) );
 
-				if (xyflag)
+				if (!fftPlan->transpose_in_2d_inplace)
 				{
 					transPlanY->gen = Transpose_GCN;
 					transPlanY->inputLayout     = CLFFT_COMPLEX_INTERLEAVED;
diff --git a/src/library/plan.h b/src/library/plan.h
index 9d4f1e6..2b53df4 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -470,6 +470,9 @@ public:
 	// Allocate no extra memory
 	bool allOpsInplace;
 
+	// flag to indicate transpose placeness in 2D breakdown
+	bool transpose_in_2d_inplace;
+
 
 	// A flag to say that blocked FFTs are going to be performed
 	// It can only be one of these: column to row, row to column or column to column
@@ -531,6 +534,7 @@ public:
 	,	realSpecial_Nr(0)
 	,	userPlan(false)
 	,	allOpsInplace(false)
+	,	transpose_in_2d_inplace(false)
 	,	blockCompute(false)
 	,	blockComputeType(BCT_C2C)
 	,   planTX( 0 )
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index 0dc557b..7b5a9dc 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -693,9 +693,8 @@ clfftStatus clfftEnqueueTransform(
 
 				cl_event transXOutEvents = NULL;
 				cl_event colOutEvents = NULL;
-				bool xyflag = (fftPlan->length[0] == fftPlan->length[1]) ? false : true;
 
-				if (xyflag)
+				if (!fftPlan->transpose_in_2d_inplace)
 				{
 					//First transpose
 					OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list