[clfft] 84/107: cleaning up real backward; updating the C2R logic and temp memory use
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:39 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit a2e4b0b040cb2f0293786a4b10d1d250e43161b6
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Sat Apr 18 21:48:02 2015 -0500
cleaning up real backward; updating the C2R logic and temp memory use
---
src/library/plan.cpp | 282 ++++++++++++++++++++++++++++++----------------
src/library/plan.h | 4 +-
src/library/transform.cpp | 27 +++--
3 files changed, 209 insertions(+), 104 deletions(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 112a542..e1da880 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2102,11 +2102,11 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
colPlan->iDist = rowPlan->oDist;
//this 2d is decomposed from 3d
- if (fftPlan->length.size()>2)
+ for (size_t index=2; index < fftPlan->length.size(); index++)
{
- colPlan->length.push_back(fftPlan->length[2]);
- colPlan->outStride.push_back(fftPlan->outStride[2]);
- colPlan->inStride.push_back(rowPlan->outStride[2]);
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->outStride.push_back(fftPlan->outStride[index]);
+ colPlan->inStride.push_back(rowPlan->outStride[index]);
}
OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
@@ -2122,7 +2122,13 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
if (fftPlan->tmpBufSize==0)
{
fftPlan->tmpBufSize = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
- if(fftPlan->length.size() > 2) fftPlan->tmpBufSize *= fftPlan->length[2];
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ fftPlan->tmpBufSize *= fftPlan->length[index];
+ }
+
+ if ((fftPlan->tmpBufSizeC2R==0) && (fftPlan->placeness == CLFFT_OUTOFPLACE))
+ {
+ fftPlan->tmpBufSizeC2R = fftPlan->tmpBufSize;
}
// create col plan
@@ -2135,6 +2141,25 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
lockRAII* colLock = NULL;
OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+ switch(fftPlan->inputLayout)
+ {
+ case CLFFT_HERMITIAN_INTERLEAVED:
+ {
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ case CLFFT_HERMITIAN_PLANAR:
+ {
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ }
+ break;
+ default: assert(false);
+ }
+
+
colPlan->length.push_back(Nt);
colPlan->inStride[0] = fftPlan->inStride[1];
@@ -2142,17 +2167,20 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
colPlan->iDist = fftPlan->iDist;
- //this 2d is decomposed from 3d
- if (fftPlan->length.size()>2)
+ if (fftPlan->placeness == CLFFT_INPLACE)
{
colPlan->placeness = CLFFT_INPLACE;
- colPlan->length.push_back(fftPlan->length[2]);
- colPlan->inStride.push_back(fftPlan->inStride[2]);
colPlan->outStride[0] = colPlan->inStride[0];
colPlan->outStride.push_back(colPlan->inStride[1]);
- colPlan->outStride.push_back(colPlan->inStride[2]);
- colPlan->oDist = fftPlan->iDist;
+ colPlan->oDist = colPlan->iDist;
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(fftPlan->inStride[index]);
+ colPlan->outStride.push_back(fftPlan->inStride[index]);
+ }
}
else
{
@@ -2161,43 +2189,26 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
colPlan->outStride[0] = Nt;
colPlan->outStride.push_back(1);
colPlan->oDist = Nt*length1;
- }
-
- switch(fftPlan->inputLayout)
- {
- case CLFFT_HERMITIAN_INTERLEAVED:
- {
- colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
- }
- break;
- case CLFFT_HERMITIAN_PLANAR:
+ for (size_t index=2; index < fftPlan->length.size(); index++)
{
- colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- colPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(fftPlan->inStride[index]);
+ colPlan->outStride.push_back(colPlan->oDist);
+ colPlan->oDist *= fftPlan->length[index];
}
- break;
- default: assert(false);
}
-
colPlan->precision = fftPlan->precision;
colPlan->forwardScale = 1.0f;
colPlan->backwardScale = 1.0f;
- colPlan->tmpBufSize = fftPlan->tmpBufSize;
+ colPlan->tmpBufSize = 0;
colPlan->gen = fftPlan->gen;
colPlan->envelope = fftPlan->envelope;
colPlan->batchsize = fftPlan->batchsize;
- if ((fftPlan->tmpBufSizeC2R==0) && (length1 > Large1DThreshold) && (fftPlan->length.size()<=2))
- {
- fftPlan->tmpBufSizeC2R = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
- if(fftPlan->length.size() > 2) fftPlan->tmpBufSizeC2R *= fftPlan->length[2];
- }
-
OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
// create row plan
@@ -2211,38 +2222,58 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
lockRAII* rowLock = NULL;
OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
-
rowPlan->outputLayout = fftPlan->outputLayout;
rowPlan->inputLayout = CLFFT_HERMITIAN_INTERLEAVED;
- rowPlan->placeness = CLFFT_OUTOFPLACE;
+
rowPlan->length.push_back(length1);
- rowPlan->inStride[0] = 1;
- rowPlan->inStride.push_back(Nt);
- rowPlan->iDist = colPlan->oDist;
+ rowPlan->outStride[0] = fftPlan->outStride[0];
+ rowPlan->outStride.push_back(fftPlan->outStride[1]);
+ rowPlan->oDist = fftPlan->oDist;
+
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ rowPlan->placeness = CLFFT_INPLACE;
+
+ rowPlan->inStride[0] = colPlan->outStride[1];
+ rowPlan->inStride.push_back(colPlan->outStride[0]);
+ rowPlan->iDist = colPlan->oDist;
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ rowPlan->length.push_back(fftPlan->length[index]);
+ rowPlan->inStride.push_back(colPlan->outStride[index]);
+ rowPlan->outStride.push_back(fftPlan->outStride[index]);
+ }
+ }
+ else
+ {
+ rowPlan->placeness = CLFFT_OUTOFPLACE;
+
+ rowPlan->inStride[0] = 1;
+ rowPlan->inStride.push_back(Nt);
+ rowPlan->iDist = Nt*length1;
+
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ {
+ rowPlan->length.push_back(fftPlan->length[index]);
+ rowPlan->outStride.push_back(fftPlan->outStride[index]);
+ rowPlan->inStride.push_back(rowPlan->iDist);
+ rowPlan->iDist *= fftPlan->length[index];
+ }
+ }
+
rowPlan->precision = fftPlan->precision;
rowPlan->forwardScale = fftPlan->forwardScale;
rowPlan->backwardScale = fftPlan->backwardScale;
- rowPlan->tmpBufSize = fftPlan->tmpBufSize;
+ rowPlan->tmpBufSize = 0;
rowPlan->gen = fftPlan->gen;
- rowPlan->envelope = fftPlan->envelope;
+ rowPlan->envelope = fftPlan->envelope;
rowPlan->batchsize = fftPlan->batchsize;
- rowPlan->outStride[0] = fftPlan->outStride[0];
- rowPlan->outStride.push_back(fftPlan->outStride[1]);
- rowPlan->oDist = fftPlan->oDist;
-
- //this 2d is decomposed from 3d
- if (fftPlan->length.size()>2)
- {
- rowPlan->length.push_back(fftPlan->length[2]);
- rowPlan->inStride.push_back(Nt*length1);
- rowPlan->outStride.push_back(fftPlan->outStride[2]);
- }
-
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
}
@@ -2691,16 +2722,27 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
}
else if(fftPlan->outputLayout == CLFFT_REAL)
{
+ size_t length0 = fftPlan->length[ DimX ];
+ size_t length1 = fftPlan->length[ DimY ];
+ size_t length2 = fftPlan->length[ DimZ ];
+
+ size_t Nt = (1 + length0/2);
+
if (fftPlan->tmpBufSize == 0)
{
- fftPlan->tmpBufSize = fftPlan->length[2] * fftPlan->length[1] * (1 + fftPlan->length[0]/2);
- fftPlan->tmpBufSize *= fftPlan->batchsize * fftPlan->ElementSize();
+ fftPlan->tmpBufSize = Nt * length1 * length2 * fftPlan->batchsize * fftPlan->ElementSize();
+ for (size_t index=2; index < fftPlan->length.size(); index++)
+ fftPlan->tmpBufSize *= fftPlan->length[index];
}
- size_t clLengths[] = { 1, 1, 0 };
+ if ((fftPlan->tmpBufSizeC2R==0) && (fftPlan->placeness == CLFFT_OUTOFPLACE))
+ {
+ fftPlan->tmpBufSizeC2R = fftPlan->tmpBufSize;
+ }
+
+ size_t clLengths[] = { 1, 0, 0 };
clLengths[0] = fftPlan->length[ DimZ ];
- clLengths[1] = clLengths[2] = 0;
//create 1D col plan
OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
@@ -2727,40 +2769,64 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
default: assert(false);
}
- colPlan->placeness = CLFFT_OUTOFPLACE;
+ colPlan->length.push_back(Nt);
+ colPlan->length.push_back(length1);
+
+ colPlan->inStride[0] = fftPlan->inStride[2];
+ colPlan->inStride.push_back(fftPlan->inStride[0]);
+ colPlan->inStride.push_back(fftPlan->inStride[1]);
+ colPlan->iDist = fftPlan->iDist;
+
+
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ colPlan->placeness = CLFFT_INPLACE;
+
+ colPlan->outStride[0] = colPlan->inStride[0];
+ colPlan->outStride.push_back(colPlan->inStride[1]);
+ colPlan->outStride.push_back(colPlan->inStride[2]);
+ colPlan->oDist = colPlan->iDist;
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(fftPlan->inStride[index]);
+ colPlan->outStride.push_back(fftPlan->inStride[index]);
+ }
+ }
+ else
+ {
+ colPlan->placeness = CLFFT_OUTOFPLACE;
+
+ colPlan->outStride[0] = Nt*length1;
+ colPlan->outStride.push_back(1);
+ colPlan->outStride.push_back(Nt);
+ colPlan->oDist = Nt*length1*length2;
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(fftPlan->inStride[index]);
+ colPlan->outStride.push_back(colPlan->oDist);
+ colPlan->oDist *= fftPlan->length[index];
+ }
+ }
+
+
colPlan->precision = fftPlan->precision;
colPlan->forwardScale = 1.0f;
colPlan->backwardScale = 1.0f;
- colPlan->tmpBufSize = fftPlan->tmpBufSize;
+ colPlan->tmpBufSize = 0;
colPlan->gen = fftPlan->gen;
- colPlan->envelope = fftPlan->envelope;
+ colPlan->envelope = fftPlan->envelope;
- // This is a column FFT, the first elements distance between each FFT is the distance of the first two
- // elements in the original buffer. Like a transpose of the matrix
colPlan->batchsize = fftPlan->batchsize;
- colPlan->inStride[0] = fftPlan->inStride[2];
- colPlan->outStride[0] = fftPlan->length[1] * (1 + fftPlan->length[0]/2);
-
- //pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
- colPlan->length.push_back(1 + fftPlan->length[0]/2);
- colPlan->length.push_back(fftPlan->length[1]);
- colPlan->inStride.push_back(fftPlan->inStride[0]);
- colPlan->inStride.push_back(fftPlan->inStride[1]);
- colPlan->outStride.push_back(1);
- colPlan->outStride.push_back(1 + fftPlan->length[0]/2);
- colPlan->iDist = fftPlan->iDist;
- colPlan->oDist = fftPlan->length[2] * fftPlan->length[1] * (1 + fftPlan->length[0]/2);
-
- if ((fftPlan->tmpBufSizeC2R==0) && ((fftPlan->length[2] > Large1DThreshold) || (fftPlan->length[1] > Large1DThreshold)))
- {
- fftPlan->tmpBufSizeC2R = (1 + fftPlan->length[0]/2) * (fftPlan->length[1]) * (fftPlan->length[2]) *
- fftPlan->batchsize * fftPlan->ElementSize();
- }
+
OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+
clLengths[0] = fftPlan->length[ DimX ];
clLengths[1] = fftPlan->length[ DimY ];
@@ -2775,7 +2841,47 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
xyPlan->inputLayout = CLFFT_HERMITIAN_INTERLEAVED;
xyPlan->outputLayout = fftPlan->outputLayout;
- xyPlan->placeness = CLFFT_OUTOFPLACE;
+ xyPlan->length.push_back(length2);
+
+ xyPlan->outStride[0] = fftPlan->outStride[0];
+ xyPlan->outStride[1] = fftPlan->outStride[1];
+ xyPlan->outStride.push_back(fftPlan->outStride[2]);
+ xyPlan->oDist = fftPlan->oDist;
+
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ xyPlan->placeness = CLFFT_INPLACE;
+
+ xyPlan->inStride[0] = colPlan->outStride[1];
+ xyPlan->inStride[1] = colPlan->outStride[2];
+ xyPlan->inStride.push_back(colPlan->outStride[0]);
+ xyPlan->iDist = colPlan->oDist;
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ xyPlan->length.push_back(fftPlan->length[index]);
+ xyPlan->inStride.push_back(colPlan->outStride[index]);
+ xyPlan->outStride.push_back(fftPlan->outStride[index]);
+ }
+ }
+ else
+ {
+ xyPlan->placeness = CLFFT_OUTOFPLACE;
+
+ xyPlan->inStride[0] = 1;
+ xyPlan->inStride[1] = Nt;
+ xyPlan->inStride.push_back(Nt*length1);
+ xyPlan->iDist = Nt*length1*length2;
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ xyPlan->length.push_back(fftPlan->length[index]);
+ xyPlan->outStride.push_back(fftPlan->outStride[index]);
+ xyPlan->inStride.push_back(xyPlan->iDist);
+ xyPlan->iDist *= fftPlan->length[index];
+ }
+ }
+
xyPlan->precision = fftPlan->precision;
xyPlan->forwardScale = fftPlan->forwardScale;
@@ -2783,22 +2889,10 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
xyPlan->tmpBufSize = fftPlan->tmpBufSize;
xyPlan->gen = fftPlan->gen;
- xyPlan->envelope = fftPlan->envelope;
+ xyPlan->envelope = fftPlan->envelope;
- // This is the xy fft, the first elements distance between the first two FFTs is the distance of the first elements
- // of the first two rows in the original buffer.
xyPlan->batchsize = fftPlan->batchsize;
- xyPlan->inStride[0] = 1;
- xyPlan->inStride[1] = (1 + fftPlan->length[0]/2);
- xyPlan->outStride[0] = fftPlan->outStride[0];
- xyPlan->outStride[1] = fftPlan->outStride[1];
- //pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
- xyPlan->length.push_back(fftPlan->length[2]);
- xyPlan->inStride.push_back(fftPlan->length[1] * (1 + fftPlan->length[0]/2));
- xyPlan->outStride.push_back(fftPlan->outStride[2]);
- xyPlan->iDist = colPlan->oDist;
- xyPlan->oDist = fftPlan->oDist;
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
}
diff --git a/src/library/plan.h b/src/library/plan.h
index 787755c..a250e08 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -369,7 +369,9 @@ public:
size_t tmpBufSizeRC;
cl_mem intBufferRC;
- // for C-to-R transforms with largeness in Y or Z dimension
+ // for C-to-R transforms that are OUTOFPLACE
+ // we need this because the user supplied output buffer is not big enough
+ // to hold intermediate results for any problem other than normal 1D
size_t tmpBufSizeC2R;
cl_mem intBufferC2R;
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index 8ca90ee..a2be264 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -742,16 +742,16 @@ clfftStatus clfftEnqueueTransform(
{
cl_mem *out_local, *int_local, *out_y;
- if(fftPlan->length.size() > 2)
+ if(fftPlan->placeness == CLFFT_INPLACE)
{
- out_local = clOutputBuffers;
+ out_local = NULL;
int_local = NULL;
out_y = clInputBuffers;
}
else
{
- out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
- int_local = fftPlan->tmpBufSizeC2R ? &(fftPlan->intBufferC2R) : &localIntBuffer;
+ out_local = clOutputBuffers;
+ int_local = &(fftPlan->intBufferC2R);
out_y = int_local;
}
@@ -904,11 +904,20 @@ clfftStatus clfftEnqueueTransform(
}
else if(fftPlan->outputLayout == CLFFT_REAL)
{
- cl_mem *out_local;
- out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
+ cl_mem *out_local, *int_local, *out_z;
- cl_mem *int_local;
- int_local = fftPlan->tmpBufSizeC2R ? &(fftPlan->intBufferC2R) : &localIntBuffer;
+ if(fftPlan->placeness == CLFFT_INPLACE)
+ {
+ out_local = NULL;
+ int_local = NULL;
+ out_z = clInputBuffers;
+ }
+ else
+ {
+ out_local = clOutputBuffers;
+ int_local = &(fftPlan->intBufferC2R);
+ out_z = int_local;
+ }
//deal with 1D Z column first
OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
@@ -917,7 +926,7 @@ clfftStatus clfftEnqueueTransform(
//deal with 2D row
OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
- outEvents, int_local, out_local, localIntBuffer ),
+ outEvents, out_z, out_local, localIntBuffer ),
_T("clfftEnqueueTransform for 3D-XY row failed"));
}
else
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list