[clfft] 100/128: fixing more bugs
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:44 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 618cdd6f66e1f01eaead97c40f8006c24e920094
Author: bragadeesh <bragadeesh.natarajan at amd.com>
Date: Thu Oct 8 15:08:46 2015 -0700
fixing more bugs
---
src/library/plan.cpp | 112 ++++++++++++++++++++++++++++------------------
src/library/transform.cpp | 34 +++++---------
2 files changed, 80 insertions(+), 66 deletions(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index a90f43d..cd4ef28 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -597,8 +597,11 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
{
if( fftPlan->length[0] % supported[i] == 0 )
{
- clLengths[1] = supported[i];
- break;
+ if (Is1DPossible(supported[i], Large1DThreshold))
+ {
+ clLengths[1] = supported[i];
+ break;
+ }
}
}
}
@@ -608,10 +611,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
// Start of block where transposes are generated; 1D FFT
while (1 && (fftPlan->inputLayout != CLFFT_REAL) && (fftPlan->outputLayout != CLFFT_REAL))
{
- //if (!IsPo2(fftPlan->length[0])) break;
- //TBD, only one dimension?
- if (fftPlan->length.size() > 1) break;
if (fftPlan->inStride[0] != 1 || fftPlan->outStride[0] != 1) break;
if ( IsPo2(fftPlan->length[0])
@@ -644,6 +644,11 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
{
fftPlan->tmpBufSize = (smallerDim + padding) * biggerDim *
fftPlan->batchsize * fftPlan->ElementSize();
+
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ fftPlan->tmpBufSize *= fftPlan->length[index];
+ }
}
//Transpose
@@ -671,6 +676,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans1Plan->gen = transGen;
trans1Plan->transflag = true;
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ trans1Plan->length.push_back(fftPlan->length[index]);
+ trans1Plan->inStride.push_back(fftPlan->inStride[index]);
+ trans1Plan->outStride.push_back(trans1Plan->oDist);
+ trans1Plan->oDist *= fftPlan->length[index];
+ }
+
//Set callback data if set on top level plan
if (fftPlan->hasPreCallback)
{
@@ -715,6 +728,13 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
row1Plan->iDist = clLengths[0] * row1Plan->inStride[1];
row1Plan->oDist = fftPlan->oDist;
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ row1Plan->length.push_back(fftPlan->length[index]);
+ row1Plan->inStride.push_back(row1Plan->iDist);
+ row1Plan->iDist *= fftPlan->length[index];
+ row1Plan->outStride.push_back(fftPlan->outStride[index]);
+ }
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d first row plan failed" ) );
@@ -746,6 +766,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans2Plan->large1D = fftPlan->length[0];
trans2Plan->transflag = true;
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ trans2Plan->length.push_back(fftPlan->length[index]);
+ trans2Plan->inStride.push_back(fftPlan->outStride[index]);
+ trans2Plan->outStride.push_back(trans2Plan->oDist);
+ trans2Plan->oDist *= fftPlan->length[index];
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d trans2 plan failed" ) );
@@ -780,6 +808,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
row2Plan->iDist = clLengths[1] * row2Plan->inStride[1];
row2Plan->oDist = clLengths[1] * row2Plan->outStride[1];
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ row2Plan->length.push_back(fftPlan->length[index]);
+ row2Plan->inStride.push_back(row2Plan->iDist);
+ row2Plan->outStride.push_back(row2Plan->oDist);
+ row2Plan->iDist *= fftPlan->length[index];
+ row2Plan->oDist *= fftPlan->length[index];
+ }
OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d second row plan failed" ) );
@@ -810,6 +846,15 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans3Plan->transflag = true;
trans3Plan->transOutHorizontal = true;
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ trans3Plan->length.push_back(fftPlan->length[index]);
+ trans3Plan->inStride.push_back(trans3Plan->iDist);
+ trans3Plan->iDist *= fftPlan->length[index];
+ trans3Plan->outStride.push_back(fftPlan->outStride[index]);
+ }
+
+
OPENCL_V(clfftBakePlan(fftPlan->planTZ, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d trans3 plan failed" ) );
@@ -825,6 +870,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
// For real transforms
// Special case optimization with 5-step algorithm
if( (fftPlan->inputLayout == CLFFT_REAL) && IsPo2(fftPlan->length[0])
+ && (fftPlan->length.size() == 1)
&& (fftPlan->inStride[0] == 1) && (fftPlan->outStride[0] == 1)
&& (fftPlan->length[0] > 4096) && (fftPlan->length.size() == 1) )
{
@@ -1155,51 +1201,29 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
col2Plan->inStride.push_back(1);
col2Plan->iDist = length0 * length1;
- if (colTPlan->planX)
- {
- col2Plan->large1D = fftPlan->length[0];
- col2Plan->twiddleFront = true;
- }
-
- if ((fftPlan->outputLayout == CLFFT_HERMITIAN_INTERLEAVED) ||
- (fftPlan->outputLayout == CLFFT_HERMITIAN_PLANAR))
- {
- col2Plan->placeness = CLFFT_INPLACE;
- col2Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
- col2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ // make sure colTPlan (first column plan) does not recurse, otherwise large twiddle mul
+ // cannot be done with this algorithm sequence
+ assert(colTPlan->planX == 0);
- col2Plan->outStride[0] = length1;
- col2Plan->outStride.push_back(1);
- col2Plan->oDist = length0 * length1;
- for (size_t index = 1; index < fftPlan->length.size(); index++)
- {
- col2Plan->length.push_back(fftPlan->length[index]);
- col2Plan->inStride.push_back(col2Plan->iDist);
- col2Plan->outStride.push_back(col2Plan->oDist);
- col2Plan->iDist *= fftPlan->length[index];
- col2Plan->oDist *= fftPlan->length[index];
- }
- }
- else
- {
- col2Plan->placeness = CLFFT_OUTOFPLACE;
- col2Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
- col2Plan->outputLayout = fftPlan->outputLayout;
+ col2Plan->placeness = CLFFT_INPLACE;
+ col2Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ col2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- col2Plan->outStride[0] = length1*fftPlan->outStride[0];
- col2Plan->outStride.push_back(fftPlan->outStride[0]);
- col2Plan->oDist = fftPlan->oDist;
+ col2Plan->outStride[0] = length1;
+ col2Plan->outStride.push_back(1);
+ col2Plan->oDist = length0 * length1;
- for (size_t index = 1; index < fftPlan->length.size(); index++)
- {
- col2Plan->length.push_back(fftPlan->length[index]);
- col2Plan->inStride.push_back(col2Plan->iDist);
- col2Plan->outStride.push_back(fftPlan->outStride[index]);
- col2Plan->iDist *= fftPlan->length[index];
- }
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ col2Plan->length.push_back(fftPlan->length[index]);
+ col2Plan->inStride.push_back(col2Plan->iDist);
+ col2Plan->outStride.push_back(col2Plan->oDist);
+ col2Plan->iDist *= fftPlan->length[index];
+ col2Plan->oDist *= fftPlan->length[index];
}
+
OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d second column plan failed" ) );
if ( (fftPlan->outputLayout == CLFFT_HERMITIAN_INTERLEAVED) ||
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index bd8bc5d..354c1ed 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -228,29 +228,19 @@ clfftStatus clfftEnqueueTransform(
cl_mem *out_local;
out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
- if ((fftPlan->outputLayout == CLFFT_HERMITIAN_INTERLEAVED) ||
- (fftPlan->outputLayout == CLFFT_HERMITIAN_PLANAR))
- {
- // another column FFT output, INPLACE
- OPENCL_V(clfftEnqueueTransform(fftPlan->planY, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &colOutEvents,
- ©InEvents, &(fftPlan->intBufferRC), &(fftPlan->intBufferRC), localIntBuffer),
- _T("clfftEnqueueTransform large1D second column failed"));
- clReleaseEvent(colOutEvents);
- // copy from full complex to hermitian
- OPENCL_V(clfftEnqueueTransform(fftPlan->planRCcopy, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, ©InEvents,
- outEvents, &(fftPlan->intBufferRC), out_local, localIntBuffer),
- _T("clfftEnqueueTransform large1D RC copy failed"));
- clReleaseEvent(copyInEvents);
- }
- else
- {
- // another column FFT output, OUTOFPLACE
- OPENCL_V(clfftEnqueueTransform(fftPlan->planY, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &colOutEvents,
- outEvents, &(fftPlan->intBufferRC), out_local, localIntBuffer),
- _T("clfftEnqueueTransform large1D second column failed"));
- clReleaseEvent(colOutEvents);
- }
+ // another column FFT output, INPLACE
+ OPENCL_V(clfftEnqueueTransform(fftPlan->planY, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+ ©InEvents, &(fftPlan->intBufferRC), &(fftPlan->intBufferRC), localIntBuffer),
+ _T("clfftEnqueueTransform large1D second column failed"));
+ clReleaseEvent(colOutEvents);
+
+ // copy from full complex to hermitian
+ OPENCL_V(clfftEnqueueTransform(fftPlan->planRCcopy, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, ©InEvents,
+ outEvents, &(fftPlan->intBufferRC), out_local, localIntBuffer),
+ _T("clfftEnqueueTransform large1D RC copy failed"));
+ clReleaseEvent(copyInEvents);
+
}
else if( fftPlan->outputLayout == CLFFT_REAL )
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list