[clfft] 09/128: Precallback - C2C 2D and 3D support
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:33 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit bac0f46c7cd5ab74c21a0cbbd650fa50a57d486d
Author: Pradeep <pradeep.rao at amd.com>
Date: Wed Jul 29 11:50:28 2015 +0530
Precallback - C2C 2D and 3D support
---
src/client-callback/callback-client.cpp | 75 ++++++++++++++++++++++-----------
src/library/accessors.cpp | 2 +-
src/library/plan.cpp | 15 +++++++
3 files changed, 66 insertions(+), 26 deletions(-)
diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
index 6bcfbcf..336d4ee 100644
--- a/src/client-callback/callback-client.cpp
+++ b/src/client-callback/callback-client.cpp
@@ -48,6 +48,8 @@ bool compare(fftw_complex *refData, std::vector< std::complex< T > > data,
float error = 0.0f;
float ref = 0.0f;
float diff = 0.0f;
+ float normRef = 0.0f;
+ float normError = 0.0f;
for(int i = 0; i < length; ++i)
{
@@ -55,16 +57,19 @@ bool compare(fftw_complex *refData, std::vector< std::complex< T > > data,
error += diff * diff;
ref += refData[i][0] * refData[i][0];
}
- float normRef =::sqrtf((float) ref);
- if (::fabs((float) ref) < 1e-7f)
- {
- return false;
- }
- float normError = ::sqrtf((float) error);
- error = normError / normRef;
+ if (error != 0)
+ {
+ normRef =::sqrtf((float) ref);
+ if (::fabs((float) ref) < 1e-7f)
+ {
+ return false;
+ }
+ normError = ::sqrtf((float) error);
+ error = normError / normRef;
- if (error > epsilon)
- return false;
+ if (error > epsilon)
+ return false;
+ }
//imag
error = 0.0f;
@@ -75,6 +80,10 @@ bool compare(fftw_complex *refData, std::vector< std::complex< T > > data,
error += diff * diff;
ref += refData[i][1] * refData[i][1];
}
+
+ if (error == 0)
+ return true;
+
normRef =::sqrtf((float) ref);
if (::fabs((float) ref) < 1e-7f)
{
@@ -96,6 +105,8 @@ bool compare(fftw_complex *refData, std::valarray< T > real, std::valarray< T >
float error = 0.0f;
float ref = 0.0f;
float diff = 0.0f;
+ float normRef = 0.0f;
+ float normError = 0.0f;
//real compare
for(int i = 0; i < length; ++i)
@@ -104,16 +115,19 @@ bool compare(fftw_complex *refData, std::valarray< T > real, std::valarray< T >
error += diff * diff;
ref += refData[i][0] * refData[i][0];
}
- float normRef =::sqrtf((float) ref);
- if (::fabs((float) ref) < 1e-7f)
- {
- return false;
- }
- float normError = ::sqrtf((float) error);
- error = normError / normRef;
+ if (error != 0)
+ {
+ normRef =::sqrtf((float) ref);
+ if (::fabs((float) ref) < 1e-7f)
+ {
+ return false;
+ }
+ normError = ::sqrtf((float) error);
+ error = normError / normRef;
- if (error > epsilon)
- return false;
+ if (error > epsilon)
+ return false;
+ }
//imag compare
error = 0.0f;
@@ -125,6 +139,10 @@ bool compare(fftw_complex *refData, std::valarray< T > real, std::valarray< T >
error += diff * diff;
ref += refData[i][1] * refData[i][1];
}
+
+ if (error == 0)
+ return true;
+
normRef =::sqrtf((float) ref);
if (::fabs((float) ref) < 1e-7f)
{
@@ -250,9 +268,9 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
return 1;
}
- if (hasPrecallback && (dim != CLFFT_1D || sizeof(T) != sizeof(float)))
+ if (hasPrecallback && (sizeof(T) != sizeof(float)))
{
- terr << _T("Pre-callback feature is currently supported only for Single Precision 1D FFT and size upto 4096" ) << std::endl;
+ terr << _T("Pre-callback feature is currently supported only for Single Precision FFT " ) << std::endl;
return 1;
}
@@ -403,7 +421,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
//Check for Precallback
//Currently test includes only for 1D
- if (hasPrecallback && dim == CLFFT_1D)
+ if (hasPrecallback)
{
int precallbakType = PRECALLBACKTYPE;
cl_mem userdata;
@@ -569,7 +587,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
}
//check output data
- if (hasPrecallback && dim == CLFFT_1D)
+ if (hasPrecallback)
{
switch(in_layout)
{
@@ -580,8 +598,12 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
refin = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*fftBatchSize);
refout = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*outfftBatchSize);
- refPlan = fftw_plan_many_dft(1, (const int*)lengths, batch_size, refin, 0, inStrides[0], fftVectorSizePadded, refout, 0, outStrides[0], outfftVectorSizePadded, dir, FFTW_ESTIMATE);
+ //In FFTW last dimension has the fastest changing index
+ int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
+ refPlan = fftw_plan_many_dft(dim, &fftwLengths[3 - dim], batch_size, refin, &fftwLengths[3 - dim], inStrides[0], fftVectorSizePadded, refout, &fftwLengths[3 - dim]
+ , outStrides[0], outfftVectorSizePadded, dir, FFTW_ESTIMATE);
+
int scalar;
for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
{
@@ -669,7 +691,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
}
// Check output data
- if (hasPrecallback && dim == CLFFT_1D)
+ if (hasPrecallback)
{
switch(in_layout)
{
@@ -680,8 +702,11 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
refin = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*fftBatchSize);
refout = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*outfftBatchSize);
- refPlan = fftw_plan_many_dft(1, (const int*)lengths, batch_size, refin, 0, inStrides[0], fftVectorSizePadded, refout, 0, outStrides[0], outfftVectorSizePadded, dir, FFTW_ESTIMATE);
+ //In FFTW last dimension has the fastest changing index
+ int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
+ refPlan = fftw_plan_many_dft(dim, &fftwLengths[3 - dim], batch_size, refin, &fftwLengths[3 - dim], inStrides[0], fftVectorSizePadded, refout, &fftwLengths[3 - dim]
+ , outStrides[0], outfftVectorSizePadded, dir, FFTW_ESTIMATE);
int scalar;
for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
{
diff --git a/src/library/accessors.cpp b/src/library/accessors.cpp
index c47075e..cbd06fc 100644
--- a/src/library/accessors.cpp
+++ b/src/library/accessors.cpp
@@ -781,7 +781,7 @@ clfftStatus clFFTSetPlanCallback(clfftPlanHandle plHandle, const char* funcName,
if (callbackType == PRECALLBACK)
{
- if (fftPlan->dim == CLFFT_1D && (fftPlan->inputLayout == CLFFT_COMPLEX_INTERLEAVED || fftPlan->inputLayout == CLFFT_COMPLEX_PLANAR))
+ if (fftPlan->inputLayout == CLFFT_COMPLEX_INTERLEAVED || fftPlan->inputLayout == CLFFT_COMPLEX_PLANAR)
{
if (funcName != NULL && funcString != NULL)
{
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index d0a0a84..0e3b330 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2829,6 +2829,13 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
rowPlan->iDist = fftPlan->iDist;
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ rowPlan->hasPreCallback = true;
+ rowPlan->preCallback = fftPlan->preCallback;
+ rowPlan->precallUserData = fftPlan->precallUserData;
+ }
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
@@ -3664,6 +3671,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
xyPlan->iDist = fftPlan->iDist;
xyPlan->oDist = fftPlan->oDist;
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ xyPlan->hasPreCallback = true;
+ xyPlan->preCallback = fftPlan->preCallback;
+ xyPlan->precallUserData = fftPlan->precallUserData;
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
clLengths[0] = fftPlan->length[ DimZ ];
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list