[clfft] 86/107: updating 2D real backward
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:40 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 157b0c961964e2758dced2e7d053b998b754456b
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Sat Apr 25 17:13:19 2015 -0500
updating 2D real backward
---
src/library/plan.cpp | 423 +++++++++++++++++++++++++++++++++++-----------
src/library/transform.cpp | 115 ++++++++++---
2 files changed, 417 insertions(+), 121 deletions(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index c82aab7..760da7b 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2046,9 +2046,6 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan for planTY failed" ) );
-
- fftPlan->baked = true;
- return CLFFT_SUCCESS;
}
else
{
@@ -2131,56 +2128,96 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
fftPlan->tmpBufSizeC2R = fftPlan->tmpBufSize;
}
- // create col plan
- // complex to complex
+ if( (fftPlan->inStride[0] == 1) && (fftPlan->outStride[0] == 1) &&
+ ( ((fftPlan->outStride[1] == Nt*2) && (fftPlan->placeness == CLFFT_INPLACE)) ||
+ ((fftPlan->outStride[1] == length0) && (fftPlan->placeness == CLFFT_OUTOFPLACE)) )
+ && (fftPlan->inStride[1] == Nt) )
+ {
+ // create first transpose plan
+
+ //Transpose
+ // input --> tmp
+ size_t transLengths[2] = { length0, length1 };
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTY, fftPlan->context, CLFFT_2D, transLengths ),
+ _T( "CreateDefaultPlan for planTY transpose failed" ) );
- OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
- _T( "CreateDefaultPlan for planY failed" ) );
+ FFTPlan* trans1Plan = NULL;
+ lockRAII* trans1Lock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planTY, trans1Plan, trans1Lock ), _T( "fftRepo.getPlan failed" ) );
- FFTPlan* colPlan = NULL;
- lockRAII* colLock = NULL;
- OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+ trans1Plan->transflag = true;
+ transLengths[0] = Nt;
+ OPENCL_V(clfftSetPlanLength( fftPlan->planTY, CLFFT_2D, transLengths ),
+ _T( "clfftSetPlanLength for planTY transpose failed" ) );
- switch(fftPlan->inputLayout)
- {
- case CLFFT_HERMITIAN_INTERLEAVED:
+ switch(fftPlan->inputLayout)
{
- colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ case CLFFT_HERMITIAN_INTERLEAVED:
+ {
+ trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans1Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ case CLFFT_HERMITIAN_PLANAR:
+ {
+ trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans1Plan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ }
+ break;
+ default: assert(false);
}
- break;
- case CLFFT_HERMITIAN_PLANAR:
+
+ trans1Plan->placeness = CLFFT_OUTOFPLACE;
+ trans1Plan->precision = fftPlan->precision;
+ trans1Plan->tmpBufSize = 0;
+ trans1Plan->batchsize = fftPlan->batchsize;
+ trans1Plan->envelope = fftPlan->envelope;
+ trans1Plan->forwardScale = 1.0f;
+ trans1Plan->backwardScale = 1.0f;
+
+ trans1Plan->inStride[0] = 1;
+ trans1Plan->inStride[1] = Nt;
+ trans1Plan->outStride[0] = 1;
+ trans1Plan->outStride[1] = length1;
+ trans1Plan->iDist = fftPlan->iDist;
+ trans1Plan->oDist = Nt*length1;
+ trans1Plan->transOutHorizontal = true;
+
+ trans1Plan->gen = Transpose_GCN;
+
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
{
- colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- colPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ trans1Plan->length.push_back(fftPlan->length[index]);
+ trans1Plan->inStride.push_back(fftPlan->inStride[index]);
+ trans1Plan->outStride.push_back(trans1Plan->oDist);
+ trans1Plan->oDist *= fftPlan->length[index];
}
- break;
- default: assert(false);
- }
+ OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
+ _T( "BakePlan for planTY failed" ) );
- colPlan->length.push_back(Nt);
+ // create col plan
+ // complex to complex
- colPlan->inStride[0] = fftPlan->inStride[1];
- colPlan->inStride.push_back(fftPlan->inStride[0]);
- colPlan->iDist = fftPlan->iDist;
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
+ _T( "CreateDefaultPlan for planY failed" ) );
+
+ FFTPlan* colPlan = NULL;
+ lockRAII* colLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+ colPlan->length.push_back(Nt);
+
+ colPlan->inStride[0] = 1;
+ colPlan->inStride.push_back(length1);
+ colPlan->iDist = trans1Plan->oDist;
- if (fftPlan->placeness == CLFFT_INPLACE)
- {
colPlan->placeness = CLFFT_INPLACE;
- }
- else
- {
- if(fftPlan->length.size() > 2)
- colPlan->placeness = CLFFT_INPLACE;
- else
- colPlan->placeness = CLFFT_OUTOFPLACE;
- }
+ colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- if(colPlan->placeness == CLFFT_INPLACE)
- {
colPlan->outStride[0] = colPlan->inStride[0];
colPlan->outStride.push_back(colPlan->inStride[1]);
colPlan->oDist = colPlan->iDist;
@@ -2188,102 +2225,286 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
for (size_t index=2; index < fftPlan->length.size(); index++)
{
colPlan->length.push_back(fftPlan->length[index]);
- colPlan->inStride.push_back(fftPlan->inStride[index]);
- colPlan->outStride.push_back(fftPlan->inStride[index]);
+ colPlan->inStride.push_back(trans1Plan->outStride[index]);
+ colPlan->outStride.push_back(trans1Plan->outStride[index]);
}
- }
- else
- {
- colPlan->outStride[0] = Nt;
- colPlan->outStride.push_back(1);
- colPlan->oDist = Nt*length1;
- for (size_t index=2; index < fftPlan->length.size(); index++)
- {
- colPlan->length.push_back(fftPlan->length[index]);
- colPlan->inStride.push_back(fftPlan->inStride[index]);
- colPlan->outStride.push_back(colPlan->oDist);
- colPlan->oDist *= fftPlan->length[index];
- }
- }
- colPlan->precision = fftPlan->precision;
- colPlan->forwardScale = 1.0f;
- colPlan->backwardScale = 1.0f;
- colPlan->tmpBufSize = 0;
+ colPlan->precision = fftPlan->precision;
+ colPlan->forwardScale = 1.0f;
+ colPlan->backwardScale = 1.0f;
+ colPlan->tmpBufSize = 0;
- colPlan->gen = fftPlan->gen;
- colPlan->envelope = fftPlan->envelope;
+ colPlan->gen = fftPlan->gen;
+ colPlan->envelope = fftPlan->envelope;
- colPlan->batchsize = fftPlan->batchsize;
+ colPlan->batchsize = fftPlan->batchsize;
- OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
+ OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
- // create row plan
- // hermitian to real
+ // create second transpose plan
+
+ //Transpose
+ //tmp --> output
+ size_t trans2Lengths[2] = { length1, length0 };
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTX, fftPlan->context, CLFFT_2D, trans2Lengths ),
+ _T( "CreateDefaultPlan for planTX transpose failed" ) );
- //create row plan
- OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimX ] ),
- _T( "CreateDefaultPlan for planX failed" ) );
+ FFTPlan* trans2Plan = NULL;
+ lockRAII* trans2Lock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planTX, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
- FFTPlan* rowPlan = NULL;
- lockRAII* rowLock = NULL;
- OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+ trans2Plan->transflag = true;
- rowPlan->outputLayout = fftPlan->outputLayout;
- rowPlan->inputLayout = CLFFT_HERMITIAN_INTERLEAVED;
+ trans2Lengths[1] = Nt;
+ OPENCL_V(clfftSetPlanLength( fftPlan->planTX, CLFFT_2D, trans2Lengths ),
+ _T( "clfftSetPlanLength for planTX transpose failed" ) );
- rowPlan->length.push_back(length1);
- rowPlan->outStride[0] = fftPlan->outStride[0];
- rowPlan->outStride.push_back(fftPlan->outStride[1]);
- rowPlan->oDist = fftPlan->oDist;
+ trans2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans2Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
- if (fftPlan->placeness == CLFFT_INPLACE)
- {
- rowPlan->placeness = CLFFT_INPLACE;
- rowPlan->inStride[0] = colPlan->outStride[1];
- rowPlan->inStride.push_back(colPlan->outStride[0]);
- rowPlan->iDist = colPlan->oDist;
+ trans2Plan->placeness = CLFFT_OUTOFPLACE;
+ trans2Plan->precision = fftPlan->precision;
+ trans2Plan->tmpBufSize = 0;
+ trans2Plan->batchsize = fftPlan->batchsize;
+ trans2Plan->envelope = fftPlan->envelope;
+ trans2Plan->forwardScale = 1.0f;
+ trans2Plan->backwardScale = 1.0f;
+
+ trans2Plan->inStride[0] = 1;
+ trans2Plan->inStride[1] = length1;
+ trans2Plan->outStride[0] = 1;
+ trans2Plan->outStride[1] = Nt;
+ trans2Plan->iDist = colPlan->oDist;
+ trans2Plan->oDist = Nt*length1;
+
+ trans2Plan->gen = Transpose_GCN;
+ trans2Plan->transflag = true;
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ trans2Plan->length.push_back(fftPlan->length[index]);
+ trans2Plan->inStride.push_back(colPlan->outStride[index]);
+ trans2Plan->outStride.push_back(trans2Plan->oDist);
+ trans2Plan->oDist *= fftPlan->length[index];
+
+ }
+
+ OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
+ _T( "BakePlan for planTX failed" ) );
+
+ // create row plan
+ // hermitian to real
+
+ //create row plan
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimX ] ),
+ _T( "CreateDefaultPlan for planX failed" ) );
+
+ FFTPlan* rowPlan = NULL;
+ lockRAII* rowLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+ rowPlan->outputLayout = fftPlan->outputLayout;
+ rowPlan->inputLayout = CLFFT_HERMITIAN_INTERLEAVED;
+
+ rowPlan->length.push_back(length1);
+
+ rowPlan->outStride[0] = fftPlan->outStride[0];
+ rowPlan->outStride.push_back(fftPlan->outStride[1]);
+ rowPlan->oDist = fftPlan->oDist;
+
+ rowPlan->inStride[0] = trans2Plan->outStride[0];
+ rowPlan->inStride.push_back(trans2Plan->outStride[1]);
+ rowPlan->iDist = trans2Plan->oDist;
for (size_t index=2; index < fftPlan->length.size(); index++)
{
rowPlan->length.push_back(fftPlan->length[index]);
- rowPlan->inStride.push_back(colPlan->outStride[index]);
+ rowPlan->inStride.push_back(trans2Plan->outStride[index]);
rowPlan->outStride.push_back(fftPlan->outStride[index]);
}
+
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ rowPlan->placeness = CLFFT_INPLACE;
+ }
+ else
+ {
+ rowPlan->placeness = CLFFT_OUTOFPLACE;
+ }
+
+
+ rowPlan->precision = fftPlan->precision;
+ rowPlan->forwardScale = fftPlan->forwardScale;
+ rowPlan->backwardScale = fftPlan->backwardScale;
+ rowPlan->tmpBufSize = 0;
+
+ rowPlan->gen = fftPlan->gen;
+ rowPlan->envelope = fftPlan->envelope;
+
+ rowPlan->batchsize = fftPlan->batchsize;
+
+ OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
}
else
{
- rowPlan->placeness = CLFFT_OUTOFPLACE;
- rowPlan->inStride[0] = 1;
- rowPlan->inStride.push_back(Nt);
- rowPlan->iDist = Nt*length1;
+ // create col plan
+ // complex to complex
- for (size_t index=2; index < fftPlan->length.size(); index++)
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
+ _T( "CreateDefaultPlan for planY failed" ) );
+
+ FFTPlan* colPlan = NULL;
+ lockRAII* colLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+
+ switch(fftPlan->inputLayout)
{
- rowPlan->length.push_back(fftPlan->length[index]);
- rowPlan->outStride.push_back(fftPlan->outStride[index]);
- rowPlan->inStride.push_back(rowPlan->iDist);
- rowPlan->iDist *= fftPlan->length[index];
+ case CLFFT_HERMITIAN_INTERLEAVED:
+ {
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ case CLFFT_HERMITIAN_PLANAR:
+ {
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ }
+ break;
+ default: assert(false);
+ }
+
+
+ colPlan->length.push_back(Nt);
+
+ colPlan->inStride[0] = fftPlan->inStride[1];
+ colPlan->inStride.push_back(fftPlan->inStride[0]);
+ colPlan->iDist = fftPlan->iDist;
+
+
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ colPlan->placeness = CLFFT_INPLACE;
+ }
+ else
+ {
+ if(fftPlan->length.size() > 2)
+ colPlan->placeness = CLFFT_INPLACE;
+ else
+ colPlan->placeness = CLFFT_OUTOFPLACE;
+ }
+
+ if(colPlan->placeness == CLFFT_INPLACE)
+ {
+ colPlan->outStride[0] = colPlan->inStride[0];
+ colPlan->outStride.push_back(colPlan->inStride[1]);
+ colPlan->oDist = colPlan->iDist;
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(fftPlan->inStride[index]);
+ colPlan->outStride.push_back(fftPlan->inStride[index]);
+ }
+ }
+ else
+ {
+ colPlan->outStride[0] = Nt;
+ colPlan->outStride.push_back(1);
+ colPlan->oDist = Nt*length1;
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(fftPlan->inStride[index]);
+ colPlan->outStride.push_back(colPlan->oDist);
+ colPlan->oDist *= fftPlan->length[index];
+ }
+ }
+
+ colPlan->precision = fftPlan->precision;
+ colPlan->forwardScale = 1.0f;
+ colPlan->backwardScale = 1.0f;
+ colPlan->tmpBufSize = 0;
+
+ colPlan->gen = fftPlan->gen;
+ colPlan->envelope = fftPlan->envelope;
+
+ colPlan->batchsize = fftPlan->batchsize;
+
+ OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
+
+ // create row plan
+ // hermitian to real
+
+ //create row plan
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimX ] ),
+ _T( "CreateDefaultPlan for planX failed" ) );
+
+ FFTPlan* rowPlan = NULL;
+ lockRAII* rowLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+ rowPlan->outputLayout = fftPlan->outputLayout;
+ rowPlan->inputLayout = CLFFT_HERMITIAN_INTERLEAVED;
+
+ rowPlan->length.push_back(length1);
+
+ rowPlan->outStride[0] = fftPlan->outStride[0];
+ rowPlan->outStride.push_back(fftPlan->outStride[1]);
+ rowPlan->oDist = fftPlan->oDist;
+
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ rowPlan->placeness = CLFFT_INPLACE;
+
+ rowPlan->inStride[0] = colPlan->outStride[1];
+ rowPlan->inStride.push_back(colPlan->outStride[0]);
+ rowPlan->iDist = colPlan->oDist;
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ rowPlan->length.push_back(fftPlan->length[index]);
+ rowPlan->inStride.push_back(colPlan->outStride[index]);
+ rowPlan->outStride.push_back(fftPlan->outStride[index]);
+ }
+ }
+ else
+ {
+ rowPlan->placeness = CLFFT_OUTOFPLACE;
+
+ rowPlan->inStride[0] = 1;
+ rowPlan->inStride.push_back(Nt);
+ rowPlan->iDist = Nt*length1;
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ rowPlan->length.push_back(fftPlan->length[index]);
+ rowPlan->outStride.push_back(fftPlan->outStride[index]);
+ rowPlan->inStride.push_back(rowPlan->iDist);
+ rowPlan->iDist *= fftPlan->length[index];
+ }
}
- }
- rowPlan->precision = fftPlan->precision;
- rowPlan->forwardScale = fftPlan->forwardScale;
- rowPlan->backwardScale = fftPlan->backwardScale;
- rowPlan->tmpBufSize = 0;
+ rowPlan->precision = fftPlan->precision;
+ rowPlan->forwardScale = fftPlan->forwardScale;
+ rowPlan->backwardScale = fftPlan->backwardScale;
+ rowPlan->tmpBufSize = 0;
- rowPlan->gen = fftPlan->gen;
- rowPlan->envelope = fftPlan->envelope;
+ rowPlan->gen = fftPlan->gen;
+ rowPlan->envelope = fftPlan->envelope;
- rowPlan->batchsize = fftPlan->batchsize;
+ rowPlan->batchsize = fftPlan->batchsize;
- OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
+ OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
+ }
}
else
{
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index d8d09db..9680fc2 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -740,40 +740,115 @@ clfftStatus clfftEnqueueTransform(
}
else if(fftPlan->outputLayout == CLFFT_REAL)
{
- cl_mem *out_local, *int_local, *out_y;
-
- if(fftPlan->placeness == CLFFT_INPLACE)
+ if(fftPlan->planTY)
{
- out_local = NULL;
- int_local = NULL;
- out_y = clInputBuffers;
+ cl_mem *mybuffers;
+
+ if (fftPlan->placeness==CLFFT_INPLACE)
+ mybuffers = clInputBuffers;
+ else
+ mybuffers = &(fftPlan->intBufferC2R);
+
+ cl_event transYOutEvents = NULL;
+ cl_event transXOutEvents = NULL;
+
+ //First transpose
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planTY, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+ waitEvents, &transYOutEvents, clInputBuffers, &localIntBuffer, NULL ),
+ _T("clfftEnqueueTransform for first transpose failed"));
+
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+ //First row
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1, &transYOutEvents,
+ &rowOutEvents, &localIntBuffer, NULL, NULL ),
+ _T("clfftEnqueueTransform for col failed"));
+ clReleaseEvent(transYOutEvents);
+
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[ 0 ], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+ //Second transpose
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+ &transXOutEvents, &localIntBuffer, mybuffers, NULL ),
+ _T("clfftEnqueueTransform for second transpose failed"));
+
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+
+ //Second Row transform
+ if(fftPlan->placeness == CLFFT_INPLACE)
+ {
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+ outEvents, clInputBuffers, NULL, NULL ),
+ _T("clfftEnqueueTransform for second row failed"));
+ }
+ else
+ {
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+ outEvents, mybuffers, clOutputBuffers, NULL ),
+ _T("clfftEnqueueTransform for second row failed"));
+ }
+ clReleaseEvent(transXOutEvents);
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+
}
else
{
- if(fftPlan->length.size() > 2)
+ cl_mem *out_local, *int_local, *out_y;
+
+ if(fftPlan->placeness == CLFFT_INPLACE)
{
- out_local = clOutputBuffers;
+ out_local = NULL;
int_local = NULL;
out_y = clInputBuffers;
}
else
{
- out_local = clOutputBuffers;
- int_local = &(fftPlan->intBufferC2R);
- out_y = int_local;
+ if(fftPlan->length.size() > 2)
+ {
+ out_local = clOutputBuffers;
+ int_local = NULL;
+ out_y = clInputBuffers;
+ }
+ else
+ {
+ out_local = clOutputBuffers;
+ int_local = &(fftPlan->intBufferC2R);
+ out_y = int_local;
+ }
}
- }
- // deal with column
- OPENCL_V( clfftEnqueueTransform( fftPlan->planY, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
- waitEvents, &rowOutEvents, clInputBuffers, int_local, localIntBuffer ),
- _T("clfftEnqueueTransform for row failed"));
+ // deal with column
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planY, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+ waitEvents, &rowOutEvents, clInputBuffers, int_local, localIntBuffer ),
+ _T("clfftEnqueueTransform for row failed"));
- // deal with row
- OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
- outEvents, out_y, out_local, localIntBuffer ),
- _T("clfftEnqueueTransform for column failed"));
+ // deal with row
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+ outEvents, out_y, out_local, localIntBuffer ),
+ _T("clfftEnqueueTransform for column failed"));
+ }
}
else
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list