[clfft] 15/128: Precallback - C2R 2D and 3D SP
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:33 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 0c5763820ba85b6f1a2ee0725a985216065e69e7
Author: Pradeep <pradeep.rao at amd.com>
Date: Fri Aug 14 19:23:03 2015 +0530
Precallback - C2R 2D and 3D SP
---
src/client-callback/callback-client.cpp | 18 ++++++++++--------
src/library/generator.transpose.gcn.cpp | 12 ++++++------
src/library/plan.cpp | 31 +++++++++++++++++++++++++++++++
3 files changed, 47 insertions(+), 14 deletions(-)
diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
index 973237e..2b1f810 100644
--- a/src/client-callback/callback-client.cpp
+++ b/src/client-callback/callback-client.cpp
@@ -206,7 +206,7 @@ bool compare(T1 *refData, std::valarray< T2 > real,
{
size_t p0 = p1 + i * o_strides[0];
- diff = refData[p0] - (real[p0] * lengths[0]);
+ diff = refData[p0] - (real[p0] * lengths[0] * lengths[1] * lengths[2]);
error += (float)(diff * diff);
ref += refData[p0] * refData[p0];
}
@@ -326,19 +326,22 @@ fftw_complex* get_fftw_output(size_t* lengths, const size_t *inStrides, const si
// Compute C2R reference output using fftw for float type
float* get_fftwf_output_c2r(size_t* lengths, size_t *strides, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
size_t fftBatchSize, size_t outfftBatchSize, size_t fftVectorSizePadded, clfftLayout in_layout,
- size_t outfftVectorSizePadded, size_t outfftVectorSize, clfftDim dim, clfftDirection dir)
+ size_t outfftVectorSizePadded, size_t outfftVectorSize, clfftDim dim, clfftDirection dir, clfftResultLocation place)
{
//In FFTW last dimension has the fastest changing index
int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
-
+ int inembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0]/2 + 1)};
+ int lsd = (place == CLFFT_INPLACE) ? (int)(lengths[0]/2 + 1)*2 : (int)(lengths[0]);
+ int outembed[3] = {(int)lengths[2], (int)lengths[1], lsd};
+
fftwf_plan refPlan;
fftwf_complex *refin = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*fftBatchSize);
float *refout = (float*) malloc(sizeof(float)*outfftBatchSize);
refPlan = fftwf_plan_many_dft_c2r(dim, &fftwLengths[3 - dim], (int)batch_size,
- refin, &fftwLengths[3 - dim], (int)inStrides[0], (int)fftVectorSizePadded,
- refout, &fftwLengths[3 - dim], (int)outStrides[0], (int)outfftVectorSizePadded,
+ refin, &inembed[3 - dim], (int)inStrides[0], (int)fftVectorSizePadded,
+ refout, &outembed[3 - dim], (int)outStrides[0], (int)outfftVectorSizePadded,
FFTW_ESTIMATE);
// set zero
@@ -1027,7 +1030,7 @@ void compareWithReference(clfftLayout in_layout, clfftLayout out_layout, size_t
float *refout;
refout = get_fftwf_output_c2r(lengths, strides, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
- in_layout, outfftVectorSizePadded, outfftVectorSize, dim, dir);
+ in_layout, outfftVectorSizePadded, outfftVectorSize, dim, dir, place);
if (!compare<float, T>(refout, real, batch_size, o_strides, lengths))
checkflag = true;
@@ -1060,8 +1063,7 @@ void compareWithReference(clfftLayout in_layout, clfftLayout out_layout, size_t
{
checkflag = true;
break;
- }
-
+ }
}
}
}
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index aebdfdc..7740592 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -557,6 +557,11 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
size_t wIndexXEnd = params.transOutHorizontal ? params.fft_N[1] % blockSize.y : params.fft_N[0] % blockSize.x;
size_t wIndexYEnd = params.transOutHorizontal ? params.fft_N[0] % blockSize.x : params.fft_N[1] % blockSize.y;
+ //If precallback is set
+ if (params.fft_hasPreCallback)
+ {
+ clKernWrite( transKernel, 3 ) << dtComplex << " retCallback;" << std::endl;
+ }
for(size_t i = 0; i<branchBlocks; i++)
{
@@ -607,12 +612,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
clKernWrite( transKernel, 3 ) << "{" << std::endl;
}
- //If precallback is set
- if (params.fft_hasPreCallback)
- {
- clKernWrite( transKernel, 6 ) << dtComplex << " retCallback;" << std::endl;
- }
-
+
clKernWrite( transKernel, 6 ) << "for( uint t=0; t < wgUnroll; t++ )" << std::endl;
clKernWrite( transKernel, 6 ) << "{" << std::endl;
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 7ad7845..26c7af6 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2466,6 +2466,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans1Plan->oDist *= fftPlan->length[index];
}
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ trans1Plan->hasPreCallback = true;
+ trans1Plan->preCallback = fftPlan->preCallback;
+ trans1Plan->precallUserData = fftPlan->precallUserData;
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan for planTY failed" ) );
@@ -2708,6 +2716,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
colPlan->batchsize = fftPlan->batchsize;
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ colPlan->hasPreCallback = true;
+ colPlan->preCallback = fftPlan->preCallback;
+ colPlan->precallUserData = fftPlan->precallUserData;
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
// create row plan
@@ -3313,6 +3329,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans1Plan->oDist *= fftPlan->length[index];
}
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ trans1Plan->hasPreCallback = true;
+ trans1Plan->preCallback = fftPlan->preCallback;
+ trans1Plan->precallUserData = fftPlan->precallUserData;
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planTZ, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan for planTZ failed" ) );
@@ -3555,6 +3579,13 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
colPlan->batchsize = fftPlan->batchsize;
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ colPlan->hasPreCallback = true;
+ colPlan->preCallback = fftPlan->preCallback;
+ colPlan->precallUserData = fftPlan->precallUserData;
+ }
OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list