[clfft] 93/107: fixing rc issues and adding minor features
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:41 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 797715de64e28d0c165cd12fe97d022f0a1eed49
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Sun Jun 21 21:30:07 2015 -0500
fixing rc issues and adding minor features
---
src/library/action.cpp | 19 +++++++++++++++++--
src/library/generator.transpose.gcn.cpp | 22 ++++++++++++++++++++--
src/library/plan.cpp | 16 ++++++++++++++--
src/library/transform.cpp | 8 ++++----
4 files changed, 55 insertions(+), 10 deletions(-)
diff --git a/src/library/action.cpp b/src/library/action.cpp
index b9b9884..e1506ff 100644
--- a/src/library/action.cpp
+++ b/src/library/action.cpp
@@ -486,8 +486,23 @@ clfftStatus FFTAction::selectBufferArguments(FFTPlan * fftPlan,
}
default:
{
- // Don't recognize output layout
- return CLFFT_INVALID_ARG_VALUE;
+ if(fftPlan->transflag)
+ {
+ if( fftPlan->placeness == CLFFT_INPLACE )
+ {
+ return CLFFT_INVALID_ARG_VALUE;
+ }
+ else
+ {
+ inputBuff.push_back( clInputBuffers[ 0 ] );
+ outputBuff.push_back( clOutputBuffers[ 0 ] );
+ }
+ }
+ else
+ {
+ // Don't recognize output layout
+ return CLFFT_INVALID_ARG_VALUE;
+ }
}
}
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index c5f7ebf..c4d6e0b 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -784,13 +784,29 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
if(branchingInGroupX)
{
clKernWrite( transKernel, 9 ) << std::endl;
- clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") )" << std::endl;
+ if(params.fft_realSpecial)
+ {
+ clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") && (" <<
+ wIndexX << " < 1) )" << std::endl;
+ }
+ else
+ {
+ clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") )" << std::endl;
+ }
clKernWrite( transKernel, 9 ) << "{" << std::endl;
}
else
{
clKernWrite( transKernel, 9 ) << std::endl;
- clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexYEnd << ") )" << std::endl;
+ if(params.fft_realSpecial)
+ {
+ clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexYEnd << ") && (" <<
+ wIndexY << " < 1) )" << std::endl;
+ }
+ else
+ {
+ clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexYEnd << ") )" << std::endl;
+ }
clKernWrite( transKernel, 9 ) << "{" << std::endl;
}
}
@@ -848,6 +864,8 @@ clfftStatus FFTGeneratedTransposeGCNAction::initParams ()
this->signature.fft_outputLayout = this->plan->outputLayout;
this->signature.fft_3StepTwiddle = false;
+ this->signature.fft_realSpecial = this->plan->realSpecial;
+
this->signature.transOutHorizontal = this->plan->transOutHorizontal; // using the twiddle front flag to specify horizontal write
// we do this so as to reuse flags in FFTKernelGenKeyParams
// and to avoid making a new one
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 5234d72..013ade0 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -796,7 +796,8 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
// For real transforms
// Special case optimization with 5-step algorithm
- if( (fftPlan->inputLayout == CLFFT_REAL) && IsPo2(fftPlan->length[0]) )
+ if( (fftPlan->inputLayout == CLFFT_REAL) && IsPo2(fftPlan->length[0])
+ && (fftPlan->inStride[0] == 1) && (fftPlan->outStride[0] == 1) )
{
if (fftPlan->length.size() > 1) break;
if (fftPlan->inStride[0] != 1 || fftPlan->outStride[0] != 1) break;
@@ -814,6 +815,16 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
{
fftPlan->tmpBufSize = (smallerDim + padding) * biggerDim *
fftPlan->batchsize * fftPlan->ElementSize() / 2;
+
+ for (size_t index=1; index < fftPlan->length.size(); index++)
+ {
+ fftPlan->tmpBufSizeRC *= fftPlan->length[index];
+ }
+ }
+
+ if (fftPlan->tmpBufSizeRC==0 )
+ {
+ fftPlan->tmpBufSizeRC = fftPlan->tmpBufSize;
}
//Transpose
@@ -947,7 +958,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
row2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
row2Plan->inStride[0] = 1;
row2Plan->outStride[0] = 1;
- row2Plan->inStride.push_back(clLengths[0] + padding);
+ row2Plan->inStride.push_back(clLengths[0]);
row2Plan->outStride.push_back(1 + clLengths[0]/2);
row2Plan->iDist = (1 + clLengths[1]/2) * row2Plan->inStride[1];
row2Plan->oDist = clLengths[1] * row2Plan->outStride[1];
@@ -989,6 +1000,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans3Plan->oDist = fftPlan->oDist;
trans3Plan->gen = Transpose_GCN;
trans3Plan->transflag = true;
+ trans3Plan->realSpecial = true;
trans3Plan->transOutHorizontal = true;
OPENCL_V(clfftBakePlan(fftPlan->planTZ, numQueues, commQueueFFT, NULL, NULL ),
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index f9f9b2c..29cdc46 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -157,7 +157,7 @@ clfftStatus clfftEnqueueTransform(
//tmp->output
cl_event rowXOutEvents = NULL;
OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1,
- &transTXOutEvents, &rowXOutEvents, &localIntBuffer, mybuffers, NULL ),
+ &transTXOutEvents, &rowXOutEvents, &localIntBuffer, &(fftPlan->intBufferRC), NULL ),
_T("clfftEnqueueTransform for large1D rowX failed"));
clReleaseEvent(transTXOutEvents);
@@ -175,7 +175,7 @@ clfftStatus clfftEnqueueTransform(
// output->tmp
cl_event transTYOutEvents = NULL;
OPENCL_V( clfftEnqueueTransform( fftPlan->planTY, dir, numQueuesAndEvents, commQueues, 1,
- &rowXOutEvents, &transTYOutEvents, mybuffers, &localIntBuffer, NULL ),
+ &rowXOutEvents, &transTYOutEvents, &(fftPlan->intBufferRC), &localIntBuffer, NULL ),
_T("clfftEnqueueTransform for large1D transTY failed"));
clReleaseEvent(rowXOutEvents);
@@ -193,7 +193,7 @@ clfftStatus clfftEnqueueTransform(
//tmp->tmp, inplace
cl_event rowYOutEvents = NULL;
OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1,
- &transTYOutEvents, &rowYOutEvents, &localIntBuffer, NULL, NULL ),
+ &transTYOutEvents, &rowYOutEvents, &localIntBuffer, &(fftPlan->intBufferRC), NULL ),
_T("clfftEnqueueTransform for large1D rowY failed"));
clReleaseEvent(transTYOutEvents);
@@ -209,7 +209,7 @@ clfftStatus clfftEnqueueTransform(
//Third Transpose
// tmp->output
OPENCL_V( clfftEnqueueTransform( fftPlan->planTZ, dir, numQueuesAndEvents, commQueues, 1,
- &rowYOutEvents, outEvents, &localIntBuffer, mybuffers, NULL ),
+ &rowYOutEvents, outEvents, &(fftPlan->intBufferRC), mybuffers, NULL ),
_T("clfftEnqueueTransform for large1D transTZ failed"));
clReleaseEvent(rowYOutEvents);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list