[clfft] 82/107: updating 3D planner logic real forward
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:39 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 05f24ca7a98f9b3257c86a9bb2dd58a00b4bd4eb
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Thu Apr 16 21:01:07 2015 -0500
updating 3D planner logic real forward
---
src/library/plan.cpp | 330 +++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 278 insertions(+), 52 deletions(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index e23f5e5..747a472 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1800,15 +1800,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
length1 = fftPlan->length[1];
size_t Nt = (1 + length0/2);
- if (fftPlan->tmpBufSize==0)
- {
- fftPlan->tmpBufSize = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
- for (size_t index=2; index < fftPlan->length.size(); index++)
- {
- fftPlan->tmpBufSize *= fftPlan->length[index];
- }
- }
// create row plan
// real to hermitian
@@ -1861,6 +1853,17 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
((rowPlan->inStride[1] == length0) && (rowPlan->placeness == CLFFT_OUTOFPLACE)) )
&& (rowPlan->outStride[1] == Nt) )
{
+ // calc temp buf size
+ if (fftPlan->tmpBufSize==0)
+ {
+ fftPlan->tmpBufSize = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ fftPlan->tmpBufSize *= fftPlan->length[index];
+ }
+ }
+
// create first transpose plan
//Transpose
@@ -2368,11 +2371,16 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
{
if(fftPlan->inputLayout == CLFFT_REAL)
{
- size_t clLengths[] = { 1, 1, 0 };
- clLengths[0] = fftPlan->length[ DimX ];
- clLengths[1] = fftPlan->length[ DimY ];
+
+ size_t length0 = fftPlan->length[ DimX ];
+ size_t length1 = fftPlan->length[ DimY ];
+ size_t length2 = fftPlan->length[ DimZ ];
+
+ size_t Nt = (1 + length0/2);
+
//create 2D xy plan
+ size_t clLengths[] = { length0, length1, 0 };
OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
_T( "CreateDefaultPlan 2D planX failed" ) );
@@ -2406,61 +2414,279 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
xyPlan->iDist = fftPlan->iDist;
xyPlan->oDist = fftPlan->oDist;
+ //this 3d is decomposed from 4d
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ xyPlan->length.push_back(fftPlan->length[index]);
+ xyPlan->inStride.push_back(fftPlan->inStride[index]);
+ xyPlan->outStride.push_back(fftPlan->outStride[index]);
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
- clLengths[0] = fftPlan->length[ DimZ ];
- clLengths[1] = clLengths[2] = 0;
- //create 1D col plan
- OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
- _T( "CreateDefaultPlan for planZ failed" ) );
+ if( (xyPlan->outStride[2] == Nt*length1) &&
+ ( ((xyPlan->inStride[2] == Nt*2*length1) && (xyPlan->placeness == CLFFT_INPLACE)) ||
+ ((xyPlan->inStride[2] == length0*length1) && (xyPlan->placeness == CLFFT_OUTOFPLACE)) ) )
+ {
- FFTPlan* colPlan = NULL;
- lockRAII* colLock = NULL;
- OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+ if (fftPlan->tmpBufSize==0)
+ {
+ fftPlan->tmpBufSize = Nt * length1 * length2 * fftPlan->batchsize * fftPlan->ElementSize();
- switch(fftPlan->outputLayout)
- {
- case CLFFT_HERMITIAN_INTERLEAVED:
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ fftPlan->tmpBufSize *= fftPlan->length[index];
+ }
+ }
+
+ // create first transpose plan
+
+ //Transpose
+ // output --> tmp
+ size_t transLengths[2] = { length0*length1, length2 };
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTX, fftPlan->context, CLFFT_2D, transLengths ),
+ _T( "CreateDefaultPlan for planTX transpose failed" ) );
+
+ FFTPlan* trans1Plan = NULL;
+ lockRAII* trans1Lock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planTX, trans1Plan, trans1Lock ), _T( "fftRepo.getPlan failed" ) );
+
+ trans1Plan->transflag = true;
+
+ transLengths[0] = Nt*length1;
+ OPENCL_V(clfftSetPlanLength( fftPlan->planTX, CLFFT_2D, transLengths ),
+ _T( "clfftSetPlanLength for planTX transpose failed" ) );
+
+ switch(fftPlan->outputLayout)
{
- colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ case CLFFT_HERMITIAN_INTERLEAVED:
+ {
+ trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans1Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ case CLFFT_HERMITIAN_PLANAR:
+ {
+ trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans1Plan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ }
+ break;
+ default: assert(false);
}
- break;
- case CLFFT_HERMITIAN_PLANAR:
+
+ trans1Plan->placeness = CLFFT_OUTOFPLACE;
+ trans1Plan->precision = fftPlan->precision;
+ trans1Plan->tmpBufSize = 0;
+ trans1Plan->batchsize = fftPlan->batchsize;
+ trans1Plan->envelope = fftPlan->envelope;
+ trans1Plan->forwardScale = 1.0f;
+ trans1Plan->backwardScale = 1.0f;
+
+ trans1Plan->inStride[0] = 1;
+ trans1Plan->inStride[1] = Nt*length1;
+ trans1Plan->outStride[0] = 1;
+ trans1Plan->outStride[1] = length2;
+ trans1Plan->iDist = xyPlan->oDist;
+ trans1Plan->oDist = Nt*length1*length2;
+ trans1Plan->transOutHorizontal = true;
+
+ trans1Plan->gen = Transpose_GCN;
+
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
{
- colPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
- colPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ trans1Plan->length.push_back(fftPlan->length[index]);
+ trans1Plan->inStride.push_back(xyPlan->outStride[index]);
+ trans1Plan->outStride.push_back(trans1Plan->oDist);
+ trans1Plan->oDist *= fftPlan->length[index];
}
- break;
- default: assert(false);
+
+ OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
+ _T( "BakePlan for planTX failed" ) );
+
+ // Create column plan as a row plan
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimZ ] ),
+ _T( "CreateDefaultPlan for planZ failed" ) );
+
+ FFTPlan* colPlan = NULL;
+ lockRAII* colLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+ colPlan->outputLayout = trans1Plan->outputLayout;
+ colPlan->inputLayout = trans1Plan->outputLayout;
+ colPlan->placeness = CLFFT_INPLACE;
+ colPlan->length.push_back(Nt*length1);
+
+ colPlan->inStride[0] = 1;
+ colPlan->inStride.push_back(length2);
+ colPlan->iDist = Nt*length1*length2;
+
+ colPlan->outStride[0] = 1;
+ colPlan->outStride.push_back(length2);
+ colPlan->oDist = Nt*length1*length2;
+
+ colPlan->precision = fftPlan->precision;
+ colPlan->forwardScale = fftPlan->forwardScale;
+ colPlan->backwardScale = fftPlan->backwardScale;
+ colPlan->tmpBufSize = 0;
+
+ colPlan->gen = fftPlan->gen;
+ colPlan->envelope = fftPlan->envelope;
+
+ colPlan->batchsize = fftPlan->batchsize;
+
+ //this 2d is decomposed from 3d
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(colPlan->iDist);
+ colPlan->outStride.push_back(colPlan->oDist);
+ colPlan->iDist *= fftPlan->length[index];
+ colPlan->oDist *= fftPlan->length[index];
+ }
+
+ OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ),
+ _T( "BakePlan for planZ failed" ) );
+
+ if (fftPlan->transposed == CLFFT_TRANSPOSED)
+ {
+ fftPlan->baked = true;
+ return CLFFT_SUCCESS;
+ }
+
+ // create second transpose plan
+
+ //Transpose
+ //output --> tmp
+ size_t trans2Lengths[2] = { length2, length0*length1 };
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTY, fftPlan->context, CLFFT_2D, trans2Lengths ),
+ _T( "CreateDefaultPlan for planTY transpose failed" ) );
+
+ FFTPlan* trans2Plan = NULL;
+ lockRAII* trans2Lock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planTY, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
+
+ trans2Plan->transflag = true;
+
+ trans2Lengths[1] = Nt*length1;
+ OPENCL_V(clfftSetPlanLength( fftPlan->planTY, CLFFT_2D, trans2Lengths ),
+ _T( "clfftSetPlanLength for planTY transpose failed" ) );
+
+ switch(fftPlan->outputLayout)
+ {
+ case CLFFT_HERMITIAN_INTERLEAVED:
+ {
+ trans2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans2Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ case CLFFT_HERMITIAN_PLANAR:
+ {
+ trans2Plan->outputLayout = CLFFT_COMPLEX_PLANAR;
+ trans2Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ default: assert(false);
+ }
+
+ trans2Plan->placeness = CLFFT_OUTOFPLACE;
+ trans2Plan->precision = fftPlan->precision;
+ trans2Plan->tmpBufSize = 0;
+ trans2Plan->batchsize = fftPlan->batchsize;
+ trans2Plan->envelope = fftPlan->envelope;
+ trans2Plan->forwardScale = 1.0f;
+ trans2Plan->backwardScale = 1.0f;
+
+ trans2Plan->inStride[0] = 1;
+ trans2Plan->inStride[1] = length2;
+ trans2Plan->outStride[0] = 1;
+ trans2Plan->outStride[1] = Nt*length1;
+ trans2Plan->iDist = Nt*length1*length2;
+ trans2Plan->oDist = fftPlan->oDist;
+
+ trans2Plan->gen = Transpose_GCN;
+ trans2Plan->transflag = true;
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ trans2Plan->length.push_back(fftPlan->length[index]);
+ trans2Plan->inStride.push_back(trans2Plan->iDist);
+ trans2Plan->iDist *= fftPlan->length[index];
+ trans2Plan->outStride.push_back(fftPlan->outStride[index]);
+ }
+
+ OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
+ _T( "BakePlan for planTY failed" ) );
+
+
+ fftPlan->baked = true;
+ return CLFFT_SUCCESS;
}
+ else
+ {
- colPlan->placeness = CLFFT_INPLACE;
- colPlan->precision = fftPlan->precision;
- colPlan->forwardScale = fftPlan->forwardScale;
- colPlan->backwardScale = fftPlan->backwardScale;
- colPlan->tmpBufSize = fftPlan->tmpBufSize;
+ clLengths[0] = fftPlan->length[ DimZ ];
+ clLengths[1] = clLengths[2] = 0;
+ //create 1D col plan
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
+ _T( "CreateDefaultPlan for planZ failed" ) );
- colPlan->gen = fftPlan->gen;
- colPlan->envelope = fftPlan->envelope;
+ FFTPlan* colPlan = NULL;
+ lockRAII* colLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
- // This is a column FFT, the first elements distance between each FFT is the distance of the first two
- // elements in the original buffer. Like a transpose of the matrix
- colPlan->batchsize = fftPlan->batchsize;
- colPlan->inStride[0] = fftPlan->outStride[2];
- colPlan->outStride[0] = fftPlan->outStride[2];
+ switch(fftPlan->outputLayout)
+ {
+ case CLFFT_HERMITIAN_INTERLEAVED:
+ {
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ case CLFFT_HERMITIAN_PLANAR:
+ {
+ colPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
+ colPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ }
+ break;
+ default: assert(false);
+ }
- //pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
- colPlan->length.push_back(1 + fftPlan->length[0]/2);
- colPlan->length.push_back(fftPlan->length[1]);
- colPlan->inStride.push_back(fftPlan->outStride[0]);
- colPlan->inStride.push_back(fftPlan->outStride[1]);
- colPlan->outStride.push_back(fftPlan->outStride[0]);
- colPlan->outStride.push_back(fftPlan->outStride[1]);
- colPlan->iDist = fftPlan->oDist;
- colPlan->oDist = fftPlan->oDist;
+ colPlan->placeness = CLFFT_INPLACE;
+ colPlan->precision = fftPlan->precision;
+ colPlan->forwardScale = fftPlan->forwardScale;
+ colPlan->backwardScale = fftPlan->backwardScale;
+ colPlan->tmpBufSize = fftPlan->tmpBufSize;
- OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+ colPlan->gen = fftPlan->gen;
+ colPlan->envelope = fftPlan->envelope;
+
+ // This is a column FFT, the first elements distance between each FFT is the distance of the first two
+ // elements in the original buffer. Like a transpose of the matrix
+ colPlan->batchsize = fftPlan->batchsize;
+ colPlan->inStride[0] = fftPlan->outStride[2];
+ colPlan->outStride[0] = fftPlan->outStride[2];
+
+ //pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+ colPlan->length.push_back(1 + fftPlan->length[0]/2);
+ colPlan->length.push_back(fftPlan->length[1]);
+ colPlan->inStride.push_back(fftPlan->outStride[0]);
+ colPlan->inStride.push_back(fftPlan->outStride[1]);
+ colPlan->outStride.push_back(fftPlan->outStride[0]);
+ colPlan->outStride.push_back(fftPlan->outStride[1]);
+ colPlan->iDist = fftPlan->oDist;
+ colPlan->oDist = fftPlan->oDist;
+
+ //this 3d is decomposed from 4d
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(xyPlan->outStride[index]);
+ colPlan->outStride.push_back(fftPlan->outStride[index]);
+ }
+
+ OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+ }
}
else if(fftPlan->outputLayout == CLFFT_REAL)
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list