[clfft] 20/21: making some more changes to better fix 2d perf issues
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Wed Mar 16 13:14:05 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 5c685c18d0dff7bcc0f511adcf2f10cddb1fdb65
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Tue Mar 15 11:24:10 2016 -0700
making some more changes to better fix 2d perf issues
---
src/library/plan.cpp | 14 +++++++-------
src/library/plan.h | 4 ++++
src/library/transform.cpp | 3 +--
3 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index d7a52dd..65b201a 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2190,8 +2190,8 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
//if (fftPlan->large2D) break;
//Performance show 512 is the good case with transpose
//if user want the result to be transposed, then we will.
- if (fftPlan->length[0] < 512 && fftPlan->transposed == CLFFT_NOTRANSPOSE) break;
- if (fftPlan->length[0] < 32) break;
+
+ if (fftPlan->length[0] < 64) break;
//x!=y case, we need tmp buffer, currently temp buffer only support interleaved format
//if (fftPlan->length[0] != fftPlan->length[1] && fftPlan->outputLayout == CLFFT_COMPLEX_PLANAR) break;
if (fftPlan->inStride[0] != 1 || fftPlan->outStride[0] != 1 ||
@@ -2253,8 +2253,8 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
size_t smallerDim = biggerDim == clLengths[0] ? clLengths[1] : clLengths[0];
size_t padding = 0;
- bool xyflag = (clLengths[0]==clLengths[1]) ? false : true;
- if (xyflag && fftPlan->tmpBufSize==0 && fftPlan->length.size()<=2)
+ fftPlan->transpose_in_2d_inplace = (clLengths[0]==clLengths[1]) ? true : false;
+ if ( (!fftPlan->transpose_in_2d_inplace) && fftPlan->tmpBufSize==0 && fftPlan->length.size()<=2 )
{
if ((smallerDim % 64 == 0) || (biggerDim % 64 == 0))
if(biggerDim > 512)
@@ -2284,7 +2284,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
transPlanX->iDist = fftPlan->oDist;
transPlanX->transflag = true;
- if (xyflag)
+ if (!fftPlan->transpose_in_2d_inplace)
{
transPlanX->gen = Transpose_GCN;
transPlanX->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
@@ -2316,7 +2316,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
lockRAII* colLock = NULL;
OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
- if (xyflag)
+ if (!fftPlan->transpose_in_2d_inplace)
{
colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
colPlan->inStride[0] = 1;
@@ -2382,7 +2382,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
lockRAII* transLockY = NULL;
OPENCL_V( fftRepo.getPlan( fftPlan->planTY, transPlanY, transLockY ), _T( "fftRepo.getPlan failed" ) );
- if (xyflag)
+ if (!fftPlan->transpose_in_2d_inplace)
{
transPlanY->gen = Transpose_GCN;
transPlanY->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
diff --git a/src/library/plan.h b/src/library/plan.h
index 9d4f1e6..2b53df4 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -470,6 +470,9 @@ public:
// Allocate no extra memory
bool allOpsInplace;
+ // flag to indicate transpose placeness in 2D breakdown
+ bool transpose_in_2d_inplace;
+
// A flag to say that blocked FFTs are going to be performed
// It can only be one of these: column to row, row to column or column to column
@@ -531,6 +534,7 @@ public:
, realSpecial_Nr(0)
, userPlan(false)
, allOpsInplace(false)
+ , transpose_in_2d_inplace(false)
, blockCompute(false)
, blockComputeType(BCT_C2C)
, planTX( 0 )
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index 0dc557b..7b5a9dc 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -693,9 +693,8 @@ clfftStatus clfftEnqueueTransform(
cl_event transXOutEvents = NULL;
cl_event colOutEvents = NULL;
- bool xyflag = (fftPlan->length[0] == fftPlan->length[1]) ? false : true;
- if (xyflag)
+ if (!fftPlan->transpose_in_2d_inplace)
{
//First transpose
OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list