[clfft] 10/128: Precallback - C2C double precision updates
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:33 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 5e9b96bdf92716d6224b81760d7d350c40384c63
Author: Pradeep <pradeep.rao at amd.com>
Date: Thu Jul 30 12:26:19 2015 +0530
Precallback - C2C double precision updates
---
src/client-callback/callback-client.cpp | 243 +++++++++++++++++++++++---------
src/library/generator.stockham.cpp | 2 +-
src/library/plan.cpp | 8 ++
3 files changed, 185 insertions(+), 68 deletions(-)
diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
index 336d4ee..5fb6c9e 100644
--- a/src/client-callback/callback-client.cpp
+++ b/src/client-callback/callback-client.cpp
@@ -23,26 +23,43 @@ namespace po = boost::program_options;
return ret; \n \
}
+#define MULVAL_DP double2 mulval(__global void* in, uint offset, __global void* userdata)\n \
+ { \n \
+ int scalar = *((__global int*)userdata + offset); \n \
+ double2 ret = *((__global double2*)in + offset) * scalar; \n \
+ return ret; \n \
+ }
+
#define MULVAL_PLANAR float2 mulval(__global void* inRe, __global void* inIm, uint offset, __global void* userdata)\n \
{ \n \
__global USER_DATA *data = ((__global USER_DATA *)userdata + offset); \n \
- int scalar = (int)data->scalar1 + (int)data->scalar2 + (int)data->scalar3; \n \
+ int scalar = (int)data->scalar1 + (int)data->scalar2; \n \
float2 ret; \n \
ret.x = *((__global float*)inRe + offset) * scalar; \n \
ret.y = *((__global float*)inIm + offset) * scalar; \n \
return ret; \n \
}
+#define MULVAL_PLANAR_DP double2 mulval(__global void* inRe, __global void* inIm, uint offset, __global void* userdata)\n \
+ { \n \
+ __global USER_DATA *data = ((__global USER_DATA *)userdata + offset); \n \
+ int scalar = (int)data->scalar1 + (int)data->scalar2; \n \
+ double2 ret; \n \
+ ret.x = *((__global double*)inRe + offset) * scalar; \n \
+ ret.y = *((__global double*)inIm + offset) * scalar; \n \
+ return ret; \n \
+ }
+
#define STRUCT_USERDATA typedef struct USER_DATA \
{ \
int scalar1; \
int scalar2; \
- int scalar3; \
} USER_DATA;
STRUCT_USERDATA
-template < typename T >
-bool compare(fftw_complex *refData, std::vector< std::complex< T > > data,
+//Compare reference and opencl output
+template < typename T1, typename T2>
+bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
const int length, const float epsilon = 1e-6f)
{
float error = 0.0f;
@@ -98,8 +115,9 @@ bool compare(fftw_complex *refData, std::vector< std::complex< T > > data,
return true;
}
-template < typename T >
-bool compare(fftw_complex *refData, std::valarray< T > real, std::valarray< T > imag,
+//Compare reference and opencl output
+template < typename T1, typename T2 >
+bool compare(T1 *refData, std::valarray< T2 > real, std::valarray< T2 > imag,
const int length, const float epsilon = 1e-6f)
{
float error = 0.0f;
@@ -157,6 +175,98 @@ bool compare(fftw_complex *refData, std::valarray< T > real, std::valarray< T >
return true;
}
+// Compute reference output using fftw for float type
+fftwf_complex* get_fftwf_output(size_t* lengths, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
+ size_t fftBatchSize, size_t outfftBatchSize, size_t fftVectorSizePadded, clfftLayout in_layout,
+ size_t outfftVectorSizePadded, size_t fftVectorSize, clfftDim dim, clfftDirection dir)
+{
+ //In FFTW last dimension has the fastest changing index
+ int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
+
+ fftwf_plan refPlan;
+
+ fftwf_complex *refin = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex)*fftBatchSize);
+ fftwf_complex *refout = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex)*outfftBatchSize);
+
+ refPlan = fftwf_plan_many_dft(dim, &fftwLengths[3 - dim], batch_size,
+ refin, &fftwLengths[3 - dim], inStrides[0], fftVectorSizePadded,
+ refout, &fftwLengths[3 - dim], outStrides[0], outfftVectorSizePadded,
+ dir, FFTW_ESTIMATE);
+
+ int scalar;
+ for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
+ {
+ switch (in_layout)
+ {
+ case CLFFT_COMPLEX_INTERLEAVED:
+ scalar = SCALAR + (i % fftVectorSize);
+ break;
+ case CLFFT_COMPLEX_PLANAR:
+ scalar = (SCALAR + (i % fftVectorSize)) + (SCALAR + (i % fftVectorSize) + 1);
+ break;
+ default:
+ break;
+ }
+
+ refin[i][0] = 1 * scalar;
+ refin[i][1] = 0 * scalar;
+ }
+
+ fftwf_execute(refPlan);
+
+ fftw_free(refin);
+
+ fftwf_destroy_plan(refPlan);
+
+ return refout;
+}
+
+// Compute reference output using fftw for double type
+fftw_complex* get_fftw_output(size_t* lengths, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
+ size_t fftBatchSize, size_t outfftBatchSize, size_t fftVectorSizePadded, clfftLayout in_layout,
+ size_t outfftVectorSizePadded, size_t fftVectorSize, clfftDim dim, clfftDirection dir)
+{
+ fftw_plan refPlan;
+
+ fftw_complex *refin = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*fftBatchSize);
+ fftw_complex *refout = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*outfftBatchSize);
+
+ //In FFTW last dimension has the fastest changing index
+ int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
+
+ refPlan = fftw_plan_many_dft(dim, &fftwLengths[3 - dim], batch_size,
+ refin, &fftwLengths[3 - dim], inStrides[0], fftVectorSizePadded,
+ refout, &fftwLengths[3 - dim], outStrides[0], outfftVectorSizePadded,
+ dir, FFTW_ESTIMATE);
+
+ int scalar;
+ for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
+ {
+ switch (in_layout)
+ {
+ case CLFFT_COMPLEX_INTERLEAVED:
+ scalar = SCALAR + (i % fftVectorSize);
+ break;
+ case CLFFT_COMPLEX_PLANAR:
+ scalar = (SCALAR + (i % fftVectorSize)) + (SCALAR + (i % fftVectorSize) + 1);
+ break;
+ default:
+ break;
+ }
+
+ refin[i][0] = 1 * scalar;
+ refin[i][1] = 0 * scalar;
+ }
+
+ fftw_execute(refPlan);
+
+ fftw_free(refin);
+
+ fftw_destroy_plan(refPlan);
+
+ return refout;
+}
+
// This is used with the program_options class so that the user can type an integer on the command line
// and we store into an enum varaible
template<class _Elem, class _Traits>
@@ -268,9 +378,9 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
return 1;
}
- if (hasPrecallback && (sizeof(T) != sizeof(float)))
+ if (hasPrecallback && !(in_layout == CLFFT_COMPLEX_INTERLEAVED || in_layout == CLFFT_COMPLEX_PLANAR))
{
- terr << _T("Pre-callback feature is currently supported only for Single Precision FFT " ) << std::endl;
+ terr << _T("Pre-callback feature is currently supported only for Complex-Complex FFT " ) << std::endl;
return 1;
}
@@ -432,7 +542,8 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
{
case 1: //C2C 1D Interleaved
{
- char* precallbackstr = STRINGIFY(MULVAL);
+ char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL) : STRINGIFY(MULVAL_DP);
+
int *h_userdata = (int*)malloc(sizeof(int)*fftBatchSize);
for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
{
@@ -455,13 +566,12 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
{
case 1: //C2C 1D PLANAR
{
- char* precallbackstr = STRINGIFY(MULVAL_PLANAR);
+ char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
USER_DATA *h_userdata = (USER_DATA*)malloc(sizeof(USER_DATA) * fftBatchSize);
for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
{
h_userdata[i].scalar1 = SCALAR + (i % fftVectorSize);
h_userdata[i].scalar2 = SCALAR + (i % fftVectorSize) + 1;
- h_userdata[i].scalar3 = SCALAR + (i % fftVectorSize) + 2;
}
userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(USER_DATA) * fftBatchSize, (void*)h_userdata, NULL);
@@ -593,39 +703,40 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
{
case CLFFT_COMPLEX_INTERLEAVED:
{
- fftw_complex *refin, *refout;
- fftw_plan refPlan;
- refin = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*fftBatchSize);
- refout = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*outfftBatchSize);
-
- //In FFTW last dimension has the fastest changing index
- int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
-
- refPlan = fftw_plan_many_dft(dim, &fftwLengths[3 - dim], batch_size, refin, &fftwLengths[3 - dim], inStrides[0], fftVectorSizePadded, refout, &fftwLengths[3 - dim]
- , outStrides[0], outfftVectorSizePadded, dir, FFTW_ESTIMATE);
-
- int scalar;
- for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
+ if (precision == CLFFT_SINGLE)
{
- scalar = SCALAR + (i % fftVectorSize);
- refin[i][0] = 1 * scalar;
- refin[i][1] = 0 * scalar;
- }
+ fftwf_complex *refout;
- fftw_execute(refPlan);
+ refout = get_fftwf_output(lengths, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
+ in_layout, outfftVectorSizePadded, fftVectorSize, dim, dir);
- if (!compare(refout, output, outfftBatchSize))
- checkflag = true;
+ if (!compare(refout, output, outfftBatchSize))
+ checkflag = true;
- fftw_destroy_plan(refPlan);
+ //for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
+ //{
+ // std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
+ //}
- /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
+ fftwf_free(refout);
+ }
+ else if (precision == CLFFT_DOUBLE)
{
- std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
- }*/
+ fftw_complex *refout;
- fftw_free(refin);
- fftw_free(refout);
+ refout = get_fftw_output(lengths, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
+ in_layout, outfftVectorSizePadded, fftVectorSize, dim, dir);
+
+ if (!compare(refout, output, outfftBatchSize))
+ checkflag = true;
+
+ /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
+ {
+ std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
+ }*/
+
+ fftw_free(refout);
+ }
}
break;
}
@@ -658,10 +769,6 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
break;
}
}
- /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
- {
- std::cout << "i " << i << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
- }*/
}
}
break;
@@ -697,38 +804,40 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
{
case CLFFT_COMPLEX_PLANAR:
{
- fftw_complex *refin, *refout;
- fftw_plan refPlan;
- refin = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*fftBatchSize);
- refout = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*outfftBatchSize);
-
- //In FFTW last dimension has the fastest changing index
- int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
-
- refPlan = fftw_plan_many_dft(dim, &fftwLengths[3 - dim], batch_size, refin, &fftwLengths[3 - dim], inStrides[0], fftVectorSizePadded, refout, &fftwLengths[3 - dim]
- , outStrides[0], outfftVectorSizePadded, dir, FFTW_ESTIMATE);
- int scalar;
- for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
+ if (precision == CLFFT_SINGLE)
{
- scalar = (SCALAR + (i % fftVectorSize)) + (SCALAR + (i % fftVectorSize) + 1) + (SCALAR + (i % fftVectorSize) + 2);
- refin[i][0] = 1 * scalar;
- refin[i][1] = 0 * scalar;
- }
-
- fftw_execute(refPlan);
+ fftwf_complex *refout;
- if (!compare(refout, real, imag, outfftBatchSize))
- checkflag = true;
+ refout = get_fftwf_output(lengths, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
+ in_layout, outfftVectorSizePadded, fftVectorSize, dim, dir);
- fftw_destroy_plan(refPlan);
+ if (!compare(refout, real, imag, outfftBatchSize))
+ checkflag = true;
- /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
+ /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
+ {
+ std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << real[i] << " climag " << imag[i] << std::endl;
+ }*/
+
+ fftwf_free(refout);
+ }
+ else if (precision == CLFFT_DOUBLE)
{
- std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << real[i] << " climag " << imag[i] << std::endl;
- }*/
+ fftw_complex *refout;
- fftw_free(refin);
- fftw_free(refout);
+ refout = get_fftw_output(lengths, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
+ in_layout, outfftVectorSizePadded, fftVectorSize, dim, dir);
+
+ if (!compare(refout, real, imag, outfftBatchSize))
+ checkflag = true;
+
+ /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
+ {
+ std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << real[i] << " climag " << imag[i] << std::endl;
+ }*/
+
+ fftw_free(refout);
+ }
}
break;
}
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index b4b1c29..854ffcc 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -1934,7 +1934,7 @@ namespace StockhamGenerator
//If precallback is set
if (fft_doPreCallback)
{
- passStr += "\n\tfloat2 retPrecallback[";
+ passStr += "\n\t"; passStr += regB2Type; passStr += " retPrecallback[";
passStr += (numB4 > 0) ? "4" : (numB2 > 0) ? "2" : "1";
passStr += "];";
}
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 0e3b330..7ad7845 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1886,6 +1886,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
rowPlan->inStride.push_back(fftPlan->inStride[1]);
rowPlan->iDist = fftPlan->iDist;
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ rowPlan->hasPreCallback = true;
+ rowPlan->preCallback = fftPlan->preCallback;
+ rowPlan->precallUserData = fftPlan->precallUserData;
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan for planX failed" ) );
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list