[clfft] 55/128: Precallback - exclude R2C function from client
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:38 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 40cec4eeae1e2c669da78f83b8a59fdd314212d3
Author: Pradeep <pradeep.rao at amd.com>
Date: Thu Sep 3 19:19:24 2015 +0530
Precallback - exclude R2C function from client
---
src/callback-client/callback-client.cpp | 476 +-------------------------------
src/callback-client/client.h | 56 +---
2 files changed, 6 insertions(+), 526 deletions(-)
diff --git a/src/callback-client/callback-client.cpp b/src/callback-client/callback-client.cpp
index b9b81ee..9a7e72b 100644
--- a/src/callback-client/callback-client.cpp
+++ b/src/callback-client/callback-client.cpp
@@ -5,8 +5,6 @@
#include "../library/private.h"
#include "openCL.misc.h"
#include "../include/sharedLibrary.h"
-#include "../include/unicode.compatibility.h"
-
namespace po = boost::program_options;
@@ -18,9 +16,6 @@ int main(int argc, char **argv)
size_t batchSize = 1;
- int fftType = 1;
- FFTType clFFTType = FFT_C2C;
-
// Initialize flags for FFT library
std::auto_ptr< clfftSetupData > setupData( new clfftSetupData );
OPENCL_V_THROW( clfftInitSetupData( setupData.get( ) ),
@@ -35,7 +30,6 @@ int main(int argc, char **argv)
( "dumpKernels,d", "FFT engine will dump generated OpenCL FFT kernels to disk (default: dump off)" )
( "batchSize,b", po::value< size_t >( &batchSize )->default_value( 1 ), "If this value is greater than one, arrays will be used " )
( "profile,p", po::value< cl_uint >( &profile_count )->default_value( 10 ), "Time and report the kernel speed of the FFT (default: profiling off)" )
- ( "type,t", po::value< int >( &fftType )->default_value( 1 ), "Type of FFT:\n1) Complex-Complex (default)\n2) Real-Complex\n3) Complex-Real\n" )
;
po::variables_map vm;
@@ -52,17 +46,7 @@ int main(int argc, char **argv)
{
setupData->debugFlags |= CLFFT_DUMP_PROGRAMS;
}
-
- switch (fftType)
- {
- case 1: clFFTType = FFT_C2C; break;
- case 2: clFFTType = FFT_R2C; break;
- case 3: clFFTType = FFT_C2R; break;
- default:
- throw std::runtime_error( "Invalid FFT type" );
- break;
- }
-
+
clfftDim dim = CLFFT_1D;
if( lengths[ 1 ] > 1 )
{
@@ -73,18 +57,10 @@ int main(int argc, char **argv)
dim = CLFFT_3D;
}
- if( clFFTType == FFT_C2C) // Complex-Complex cases
- {
- C2C_transform<float>(setupData, lengths, batchSize, dim, precision, profile_count);
- }
- else if (clFFTType == FFT_R2C) // Complex-Complex cases
- {
- R2C_transform<float>(setupData, lengths, batchSize, dim, precision, profile_count);
- }
- else // Complex-Complex cases
- {
- C2R_transform<float>();
- }
+ // Complex-Complex cases, SP
+
+ C2C_transform<float>(setupData, lengths, batchSize, dim, precision, profile_count);
+
}
catch( std::exception& e )
{
@@ -132,53 +108,6 @@ void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths,
}
template < typename T >
-void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize,
- clfftDim dim, clfftPrecision precision, cl_uint profile_count)
-{
- // OpenCL state
- cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
- cl_int deviceId = 0;
- std::vector< cl_device_id > device_id;
- cl_int platformId = 0;
- cl_context context;
- cl_uint command_queue_flags = 0;
- command_queue_flags |= CL_QUEUE_PROFILING_ENABLE;
-
- // Test for in-place Hermitian Interleaved output
- // Hence output size is N/2 + 1 complex. So allocate N + 2 real input
- size_t Nt = inlengths[0] + 2;
- size_t vectorLength = Nt * inlengths[1] * inlengths[2];
- size_t fftLength = vectorLength * batchSize;
-
- //OpenCL initializations
- device_id = initializeCL( deviceType, deviceId, platformId, context, false);
-
- cl_int status = 0;
-
- cl_command_queue commandQueue = ::clCreateCommandQueue( context, device_id[0], command_queue_flags, &status );
- OPENCL_V_THROW( status, "Creating Command Queue ( ::clCreateCommandQueue() )" );
-
- if (precision == CLFFT_SINGLE)
- {
- //Run clFFT with seaparate Pre-process Kernel
- runR2CPreprocessKernelFFT<float>(setupData, context, commandQueue, device_id[0], inlengths, dim, precision,
- batchSize, vectorLength, fftLength, profile_count);
-
- //Run clFFT using pre-callback
- runR2CPrecallbackFFT<float>(setupData, context, commandQueue, inlengths, dim, precision,
- batchSize, vectorLength, fftLength, profile_count);
- }
-
- OPENCL_V_THROW( clReleaseCommandQueue( commandQueue ), "Error: In clReleaseCommandQueue\n" );
- OPENCL_V_THROW( clReleaseContext( context ), "Error: In clReleaseContext\n" );
-}
-
-template < typename T >
-void C2R_transform()
-{
-}
-
-template < typename T >
void runC2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
size_t* inlengths, clfftDim dim, clfftPrecision precision,
size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
@@ -531,358 +460,6 @@ void runC2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_con
OPENCL_V_THROW( clReleaseMemObject( userdatabuffer ), "Error: In clReleaseMemObject\n" );
}
-template < typename T >
-void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
- size_t* inlengths, clfftDim dim, clfftPrecision precision,
- size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
-{
- cl_int status = 0;
-
- size_t userdataLengths[ 3 ] = {USERDATA_LENGTH,1,1};
- size_t vectorLength_userdata = userdataLengths[0] * userdataLengths[1] * userdataLengths[2];
- size_t userdataLength = vectorLength_userdata * batchSize;
-
- //input/output allocation sizes
- size_t size_of_buffers = fftLength * sizeof( T );
- size_t size_of_buffers_userdata = userdataLength * sizeof( T );
-
- //in-place transform. Same buffer for input and output
- cl_mem fftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, size_of_buffers, NULL, &status);
- OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(buffer) )" );
-
- //Initialize Data
- std::vector< T > userdata( userdataLength );
-
- // impulse test case
- for (size_t idx = 0; idx < userdataLength; ++idx)
- {
- userdata[idx] = 1;
- }
-
- //user data buffer
- cl_mem userDatabuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_of_buffers_userdata, &userdata[0], &status);
- OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(userDatabuffer) )" );
-
- //clFFT initializations
-
- // FFT state
- clfftResultLocation place = CLFFT_INPLACE;
- clfftLayout inLayout = CLFFT_REAL;
- clfftLayout outLayout = CLFFT_HERMITIAN_INTERLEAVED;
-
- clfftPlanHandle plan_handle;
- OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
- OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, inlengths ), "clfftCreateDefaultPlan failed" );
-
- //Precallback setup
- char* precallbackstr = STRINGIFY(ZERO_PAD_R2C);
-
- //Register the callback
- OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "zeroPad", precallbackstr, NULL, 0, PRECALLBACK, userDatabuffer), "clFFTSetPlanCallback failed");
-
- // Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
- OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
- OPENCL_V_THROW( clfftSetLayout( plan_handle, inLayout, outLayout ), "clfftSetLayout failed" );
- OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batchSize ), "clfftSetPlanBatchSize failed" );
- OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
-
- //Bake Plan
- OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &commandQueue, NULL, NULL ), "clfftBakePlan failed" );
-
- //get the buffersize
- size_t buffersize=0;
- OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );
-
- //allocate the intermediate buffer
- cl_mem clMedBuffer=NULL;
-
- if (buffersize)
- {
- cl_int medstatus;
- clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
- OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
- }
-
- cl_mem * buffersOut = NULL; //NULL for in-place
-
- //for functional test
- OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
- &fftbuffer, buffersOut, clMedBuffer ),
- "clfftEnqueueTransform failed" );
-
- OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
- if (profile_count > 1)
- {
- Timer tr;
- tr.Start();
-
- // Loop as many times as the user specifies to average out the timings
- for( cl_uint i = 0; i < profile_count; ++i )
- {
- OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
- &fftbuffer, buffersOut, clMedBuffer ),
- "clfftEnqueueTransform failed" );
-
- OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
- }
- double wtimesample = tr.Sample();
- double wtime = wtimesample/((double)profile_count);
-
- tout << "\nExecution wall time (with clFFT Pre-callback): " << 1000.0*wtime << " ms" << std::endl;
- }
-
- if(clMedBuffer) clReleaseMemObject(clMedBuffer);
-
- if (profile_count == 1)
- {
- std::vector< std::complex< T > > output( fftLength/2 );
-
- OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, fftbuffer, CL_TRUE, 0, size_of_buffers, &output[ 0 ],
- 0, NULL, NULL ), "Reading the result buffer failed" );
-
- /*for( cl_uint i = 0; i < fftLength/2; i++)
- {
- std::cout << "i " << i << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
- }*/
-
- ////Reference fftw output
- //fftwf_complex *refout;
-
- //refout = get_R2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim);
-
- ///*for( cl_uint i = 0; i < fftLength; i++)
- //{
- // std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
- //}*/
- //if (!compare<fftwf_complex, T>(refout, output, fftLength))
- //{
- // std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****FAIL*****" << std::endl;
- //}
- //else
- //{
- // std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****PASS*****" << std::endl;
- //}
-
- //fftwf_free(refout);
- }
-
- OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
- OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
-
- //cleanup
- OPENCL_V_THROW( clReleaseMemObject( fftbuffer ), "Error: In clReleaseMemObject\n" );
- OPENCL_V_THROW( clReleaseMemObject( userDatabuffer ), "Error: In clReleaseMemObject\n" );
-}
-
-template < typename T >
-void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context,
- cl_command_queue commandQueue, cl_device_id device_id,
- size_t* inlengths, clfftDim dim, clfftPrecision precision,
- size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
-{
- cl_int status = 0;
-
- size_t userdataLengths[ 3 ] = {USERDATA_LENGTH,1,1};
- size_t vectorLength_userdata = userdataLengths[0] * userdataLengths[1] * userdataLengths[2];
- size_t userdataLength = vectorLength_userdata * batchSize;
-
- //input/output allocation sizes
- size_t size_of_buffers = fftLength * sizeof( T );
- size_t size_of_buffers_userdata = userdataLength * sizeof( T );
-
- //in-place transform. Same buffer for input and output
- cl_mem fftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, size_of_buffers, NULL, &status);
- OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(buffer) )" );
-
- //Initialize Data
- std::vector< T > userdata( userdataLength );
-
- // impulse test case
- for (size_t idx = 0; idx < userdataLength; ++idx)
- {
- userdata[idx] = 1;
- }
-
- //user data buffer
- cl_mem userdatabuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_of_buffers_userdata, &userdata[0], &status);
- OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(userdatabuffer) )" );
-
- //clFFT initializations
-
- // FFT state
- clfftResultLocation place = CLFFT_INPLACE;
- clfftLayout inLayout = CLFFT_REAL;
- clfftLayout outLayout = CLFFT_HERMITIAN_INTERLEAVED;
-
- clfftPlanHandle plan_handle;
- OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
- OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, inlengths ), "clfftCreateDefaultPlan failed" );
-
- // Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
- OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
- OPENCL_V_THROW( clfftSetLayout( plan_handle, inLayout, outLayout ), "clfftSetLayout failed" );
- OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batchSize ), "clfftSetPlanBatchSize failed" );
- OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
-
- //Bake Plan
- OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &commandQueue, NULL, NULL ), "clfftBakePlan failed" );
-
- //get the buffersize
- size_t buffersize=0;
- OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );
-
- //allocate the intermediate buffer
- cl_mem clMedBuffer=NULL;
-
- if (buffersize)
- {
- cl_int medstatus;
- clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
- OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
- }
-
- cl_mem * buffersOut = NULL; //NULL for in-place
-
- //Pre-process kernel string
- const char* source = STRINGIFY(ZERO_PAD_R2C_KERNEL);
-
- cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, &status );
- OPENCL_V_THROW( status, "clCreateProgramWithSource failed." );
-
- status = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL);
- OPENCL_V_THROW( status, "clBuildProgram failed" );
-
-#if defined( _DEBUG )
- if( status != CL_SUCCESS )
- {
- if( status == CL_BUILD_PROGRAM_FAILURE )
- {
- size_t buildLogSize = 0;
- OPENCL_V_THROW( clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ),
- "clGetProgramBuildInfo failed" );
-
- std::vector< char > buildLog( buildLogSize );
- ::memset( &buildLog[ 0 ], 0x0, buildLogSize );
-
- OPENCL_V_THROW( clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ),
- "clGetProgramBuildInfo failed" );
-
- std::cerr << "\n\t\t\tBUILD LOG\n";
- std::cerr << "************************************************\n";
- std::cerr << &buildLog[ 0 ] << std::endl;
- std::cerr << "************************************************\n";
- }
-
- OPENCL_V_THROW( status, "clBuildProgram failed" );
- }
-#endif
-
- cl_kernel kernel = clCreateKernel( program, "zeroPad", &status );
- OPENCL_V_THROW( status, "clCreateKernel failed" );
-
- //for functional test
- cl_uint uarg = 0;
-
- //Buffer to be zero-padded
- OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&fftbuffer ), "clSetKernelArg failed" );
-
- //originial data
- OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&userdatabuffer ), "clSetKernelArg failed" );
-
- //Launch pre-process kernel
- size_t gSize = fftLength;
- status = clEnqueueNDRangeKernel( commandQueue, kernel, 1,
- NULL, &gSize, NULL, 0, NULL, NULL );
- OPENCL_V_THROW( status, "clEnqueueNDRangeKernel failed" );
-
- OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
- //Now invoke the clfft execute
- OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
- &fftbuffer, buffersOut, clMedBuffer ),
- "clfftEnqueueTransform failed" );
-
- OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
- if (profile_count > 1)
- {
- Timer tr;
- tr.Start();
-
- // Loop as many times as the user specifies to average out the timings
- for( cl_uint i = 0; i < profile_count; ++i )
- {
- uarg = 0;
-
- //Buffer to be zero-padded
- OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&fftbuffer ), "clSetKernelArg failed" );
-
- //originial data
- OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&userdatabuffer ), "clSetKernelArg failed" );
-
- //Launch pre-process kernel
- status = clEnqueueNDRangeKernel( commandQueue, kernel, 1,
- NULL, &gSize, NULL, 0, NULL, NULL );
- OPENCL_V_THROW( status, "clEnqueueNDRangeKernel failed" );
-
- OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
- //Now invoke the clfft execute
- OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
- &fftbuffer, buffersOut, clMedBuffer ),
- "clfftEnqueueTransform failed" );
-
- OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
- }
- double wtimesample = tr.Sample();
- double wtime = wtimesample/((double)profile_count);
-
- tout << "\nExecution wall time (Separate Pre-process Kernel): " << 1000.0*wtime << " ms" << std::endl;
- }
-
- //cleanup preprocess kernel opencl objects
- OPENCL_V_THROW( clReleaseProgram( program ), "Error: In clReleaseProgram\n" );
- OPENCL_V_THROW( clReleaseKernel( kernel ), "Error: In clReleaseKernel\n" );
-
- if(clMedBuffer) clReleaseMemObject(clMedBuffer);
-
- if (profile_count == 1)
- {
- std::vector< std::complex< T > > output( fftLength/2 );
-
- OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, fftbuffer, CL_TRUE, 0, size_of_buffers, &output[ 0 ],
- 0, NULL, NULL ), "Reading the result buffer failed" );
-
- ////Reference fftw output
- //fftwf_complex *refout;
-
- //refout = get_C2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim, dir);
-
- ///*for( cl_uint i = 0; i < fftLength; i++)
- //{
- // std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
- //}*/
- //if (!compare<fftwf_complex, T>(refout, output, fftLength))
- //{
- // std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****FAIL*****" << std::endl;
- //}
- //else
- //{
- // std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****PASS*****" << std::endl;
- //}
-
- //fftwf_free(refout);
- }
-
- OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
- OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
-
- //cleanup
- OPENCL_V_THROW( clReleaseMemObject( fftbuffer ), "Error: In clReleaseMemObject\n" );
- OPENCL_V_THROW( clReleaseMemObject( userdatabuffer ), "Error: In clReleaseMemObject\n" );
-}
-
//Compare reference and opencl output
template < typename T1, typename T2>
bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
@@ -990,46 +567,3 @@ fftwf_complex* get_C2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int
return refout;
}
-
-// Compute reference output using fftw for float type
-fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size,
- clfftLayout in_layout, clfftDim dim)
-{
- //In FFTW last dimension has the fastest changing index
- int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
- int inembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0] + 2)};
- int outembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0]/2 + 1)};
-
- fftwf_plan refPlan;
-
- size_t infftVectorLength = inembed[0] * inembed[1] * inembed[2];
- size_t outfftVectorLength = outembed[0] * outembed[1] * outembed[2];
-
- float *refin = (float*) malloc(sizeof(float)*fftbatchLength);
- fftwf_complex *refout = (fftwf_complex*)refin; //(fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*outfftVectorLength*batch_size);
-
- refPlan = fftwf_plan_many_dft_r2c(dim, &fftwLengths[3 - dim], batch_size,
- refin, &inembed[3 - dim], 1, infftVectorLength,
- refout, &outembed[3 - dim], 1, outfftVectorLength, FFTW_ESTIMATE);
-
- float scalar;
-
- for( size_t i = 0; i < fftbatchLength; i++)
- {
- scalar = 0.0f;
- if ( (i % infftVectorLength) < USERDATA_LENGTH)
- {
- scalar = 1.0f;
- }
-
- refin[i] = scalar;
- }
-
- fftwf_execute(refPlan);
-
- fftw_free(refin);
-
- fftwf_destroy_plan(refPlan);
-
- return refout;
-}
\ No newline at end of file
diff --git a/src/callback-client/client.h b/src/callback-client/client.h
index 9ca44dd..502bed9 100644
--- a/src/callback-client/client.h
+++ b/src/callback-client/client.h
@@ -24,18 +24,13 @@
#include <boost/program_options.hpp>
#include "stdafx.h"
#include "../statTimer/statisticalTimer.extern.h"
+#include "../include/unicode.compatibility.h"
#include <fftw3.h>
#define CALLBCKSTR(...) #__VA_ARGS__
#define STRINGIFY(...) CALLBCKSTR(__VA_ARGS__)
-enum FFTType
-{
- FFT_C2C,
- FFT_R2C,
- FFT_C2R,
-};
#define USERDATA_LENGTH 512
#define BATCH_LENGTH 1024 // Must be >= USERDATA_LENGTH
@@ -67,51 +62,13 @@ enum FFTType
*((__global float2*)input + inoffset) = scalar; \n \
} \n
-#define ZERO_PAD_R2C __attribute__((always_inline)) \n float zeroPad (__global void *input, \n \
- uint inoffset, \n \
- __global void *userdata) \n \
- { \n \
- float scalar = 0.0f; \n \
- uint udoffset; \n \
- if ((inoffset % (BATCH_LENGTH + 2)) < USERDATA_LENGTH) \n \
- { \n \
- udoffset = ((inoffset/(BATCH_LENGTH + 2)) * USERDATA_LENGTH) + (inoffset % (BATCH_LENGTH + 2)); \n \
- scalar = *((__global float*)userdata + udoffset); \n \
- } \n \
- return scalar; \n \
- } \n
-
-#define ZERO_PAD_R2C_KERNEL __kernel void zeroPad (__global void *input, \n \
- __global void *userdata) \n \
- { \n \
- uint inoffset = get_global_id(0); \n \
- float scalar = 0.0f; \n \
- uint udoffset; \n \
- if ((inoffset % (BATCH_LENGTH + 2)) < USERDATA_LENGTH) \n \
- { \n \
- udoffset = ((inoffset/(BATCH_LENGTH + 2)) * USERDATA_LENGTH) + (inoffset % (BATCH_LENGTH + 2)); \n \
- scalar = *((__global float*)userdata + udoffset); \n \
- } \n \
- *((__global float*)input + inoffset) = scalar; \n \
- } \n
-
template < typename T >
void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize,
clfftDim dim, clfftPrecision precision, cl_uint profile_count);
-template < typename T >
-void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize,
- clfftDim dim, clfftPrecision precision, cl_uint profile_count);
-
-template < typename T >
-void C2R_transform();
-
fftwf_complex* get_C2C_fftwf_output(size_t* lengths, size_t fftBatchSize, int batch_size, clfftLayout in_layout,
clfftDim dim, clfftDirection dir);
-fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size,
- clfftLayout in_layout, clfftDim dim);
-
template < typename T1, typename T2>
bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
size_t length, const float epsilon = 1e-6f);
@@ -122,17 +79,6 @@ void runC2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
template < typename T >
void runC2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue, cl_device_id device_id, size_t* inlengths, clfftDim dim, clfftPrecision precision, size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
-template < typename T >
-void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
- size_t* inlengths, clfftDim dim, clfftPrecision precision,
- size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
-
-template < typename T >
-void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context,
- cl_command_queue commandQueue, cl_device_id device_id,
- size_t* inlengths, clfftDim dim, clfftPrecision precision,
- size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
-
#ifdef WIN32
struct Timer
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list