[clfft] 55/128: Precallback - exclude R2C function from client

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:38 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 40cec4eeae1e2c669da78f83b8a59fdd314212d3
Author: Pradeep <pradeep.rao at amd.com>
Date:   Thu Sep 3 19:19:24 2015 +0530

    Precallback - exclude R2C function from client
---
 src/callback-client/callback-client.cpp | 476 +-------------------------------
 src/callback-client/client.h            |  56 +---
 2 files changed, 6 insertions(+), 526 deletions(-)

diff --git a/src/callback-client/callback-client.cpp b/src/callback-client/callback-client.cpp
index b9b81ee..9a7e72b 100644
--- a/src/callback-client/callback-client.cpp
+++ b/src/callback-client/callback-client.cpp
@@ -5,8 +5,6 @@
 #include "../library/private.h"
 #include "openCL.misc.h"
 #include "../include/sharedLibrary.h"
-#include "../include/unicode.compatibility.h"
-
 
 namespace po = boost::program_options;
 
@@ -18,9 +16,6 @@ int main(int argc, char **argv)
 
 	size_t batchSize = 1; 
 
-	int fftType = 1;
-	FFTType clFFTType = FFT_C2C;
-
 	//	Initialize flags for FFT library
 	std::auto_ptr< clfftSetupData > setupData( new clfftSetupData );
 	OPENCL_V_THROW( clfftInitSetupData( setupData.get( ) ),
@@ -35,7 +30,6 @@ int main(int argc, char **argv)
 			( "dumpKernels,d", "FFT engine will dump generated OpenCL FFT kernels to disk (default: dump off)" )
 			( "batchSize,b",   po::value< size_t >( &batchSize )->default_value( 1 ), "If this value is greater than one, arrays will be used " )
 			( "profile,p",     po::value< cl_uint >( &profile_count )->default_value( 10 ), "Time and report the kernel speed of the FFT (default: profiling off)" )
-			( "type,t",		   po::value< int	>( &fftType )->default_value( 1 ), "Type of FFT:\n1) Complex-Complex (default)\n2) Real-Complex\n3) Complex-Real\n" )
 			;
 
 		po::variables_map vm;
@@ -52,17 +46,7 @@ int main(int argc, char **argv)
 		{
 			setupData->debugFlags	|= CLFFT_DUMP_PROGRAMS;
 		}
-
-		switch (fftType)
-		{
-		case 1: clFFTType = FFT_C2C; break;
-		case 2: clFFTType = FFT_R2C; break;
-		case 3: clFFTType = FFT_C2R; break;
-		default:
-			throw std::runtime_error( "Invalid FFT type" );
-			break;
-		}
-	
+			
 		clfftDim dim = CLFFT_1D;
 		if( lengths[ 1 ] > 1 )
 		{
@@ -73,18 +57,10 @@ int main(int argc, char **argv)
 			dim	= CLFFT_3D;
 		}
 
-		if( clFFTType == FFT_C2C) // Complex-Complex cases
-		{
-			C2C_transform<float>(setupData, lengths, batchSize, dim, precision, profile_count);
-		}
-		else if (clFFTType == FFT_R2C) // Complex-Complex cases
-		{
-			R2C_transform<float>(setupData, lengths, batchSize, dim, precision, profile_count);
-		}
-		else                          // Complex-Complex cases
-		{
-			C2R_transform<float>();
-		}
+		 // Complex-Complex cases, SP
+		
+		C2C_transform<float>(setupData, lengths, batchSize, dim, precision, profile_count);
+		
 	}
 	catch( std::exception& e )
 	{
@@ -132,53 +108,6 @@ void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths,
 }
 
 template < typename T >
-void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 
-				   clfftDim dim, clfftPrecision precision,  cl_uint profile_count)
-{
-	//	OpenCL state 
-	cl_device_type		deviceType	= CL_DEVICE_TYPE_ALL;
-	cl_int			deviceId = 0;
-	std::vector< cl_device_id > device_id;
-	cl_int				platformId = 0;
-	cl_context			context;
-	cl_uint command_queue_flags = 0;
-	command_queue_flags |= CL_QUEUE_PROFILING_ENABLE;
-	
-	// Test for in-place Hermitian Interleaved output 
-	// Hence output size is N/2 + 1 complex. So allocate N + 2 real input
-	size_t Nt = inlengths[0] + 2;
-	size_t vectorLength = Nt * inlengths[1] * inlengths[2];
-	size_t fftLength = vectorLength * batchSize;
-
-	//OpenCL initializations
-	device_id = initializeCL( deviceType, deviceId, platformId, context, false);
-
-	cl_int status = 0;
-    
-	cl_command_queue commandQueue = ::clCreateCommandQueue( context, device_id[0], command_queue_flags, &status );
-    OPENCL_V_THROW( status, "Creating Command Queue ( ::clCreateCommandQueue() )" );
-
-	if (precision == CLFFT_SINGLE)
-	{
-		//Run clFFT with seaparate Pre-process Kernel
-		runR2CPreprocessKernelFFT<float>(setupData, context, commandQueue, device_id[0], inlengths, dim, precision, 
-										batchSize, vectorLength, fftLength, profile_count);
-
-		//Run clFFT using pre-callback 
-		runR2CPrecallbackFFT<float>(setupData, context, commandQueue, inlengths, dim, precision, 
-									batchSize, vectorLength, fftLength, profile_count);
-	}
-
-	OPENCL_V_THROW( clReleaseCommandQueue( commandQueue ), "Error: In clReleaseCommandQueue\n" );
-    OPENCL_V_THROW( clReleaseContext( context ), "Error: In clReleaseContext\n" );
-}
-
-template < typename T >
-void C2R_transform()
-{
-}
-
-template < typename T >
 void runC2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
 						size_t* inlengths, clfftDim dim, clfftPrecision precision,
 						size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
@@ -531,358 +460,6 @@ void runC2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_con
 	OPENCL_V_THROW( clReleaseMemObject( userdatabuffer ), "Error: In clReleaseMemObject\n" );
 }
 
-template < typename T >
-void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
-						size_t* inlengths, clfftDim dim, clfftPrecision precision,
-						size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
-{
-	cl_int status = 0;
-
-	size_t userdataLengths[ 3 ] = {USERDATA_LENGTH,1,1};
-	size_t vectorLength_userdata = userdataLengths[0] * userdataLengths[1] * userdataLengths[2];
-	size_t userdataLength = vectorLength_userdata * batchSize;
-
-	//input/output allocation sizes
-	size_t size_of_buffers = fftLength * sizeof( T  );
-	size_t size_of_buffers_userdata = userdataLength * sizeof( T );
-
-	//in-place transform. Same buffer for input and output
-	cl_mem fftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, size_of_buffers, NULL, &status);
-    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(buffer) )" );
-
-	//Initialize Data
-	std::vector< T > userdata( userdataLength );
-
-	// impulse test case
-	for (size_t idx = 0; idx < userdataLength; ++idx)
-	{
-		userdata[idx] = 1;
-	}
-
-	//user data buffer
-	cl_mem userDatabuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_of_buffers_userdata, &userdata[0], &status);
-    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(userDatabuffer) )" );
-
-	//clFFT initializations
-	
-	//	FFT state
-	clfftResultLocation	place = CLFFT_INPLACE;
-	clfftLayout	inLayout  = CLFFT_REAL;
-	clfftLayout	outLayout = CLFFT_HERMITIAN_INTERLEAVED;
-
-	clfftPlanHandle plan_handle;
-	OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
-	OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, inlengths ), "clfftCreateDefaultPlan failed" );
-
-	//Precallback setup
-	char* precallbackstr = STRINGIFY(ZERO_PAD_R2C);
-
-	//Register the callback
-	OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "zeroPad", precallbackstr, NULL, 0, PRECALLBACK, userDatabuffer), "clFFTSetPlanCallback failed");
-
-	//	Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
-	OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
-	OPENCL_V_THROW( clfftSetLayout( plan_handle, inLayout, outLayout ), "clfftSetLayout failed" );
-	OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batchSize ), "clfftSetPlanBatchSize failed" );
-	OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
-
-	//Bake Plan
-	OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &commandQueue, NULL, NULL ), "clfftBakePlan failed" );
-
-	//get the buffersize
-	size_t buffersize=0;
-	OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );
-
-	//allocate the intermediate buffer
-	cl_mem clMedBuffer=NULL;
-
-	if (buffersize)
-	{
-		cl_int medstatus;
-		clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
-		OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
-	}
-
-	cl_mem * buffersOut = NULL; //NULL for in-place
-
-	//for functional test
-	OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
-		&fftbuffer, buffersOut, clMedBuffer ),
-		"clfftEnqueueTransform failed" );
-		
-	OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
-	if (profile_count > 1)
-	{
-		Timer tr;
-		tr.Start();
-
-		//	Loop as many times as the user specifies to average out the timings
-		for( cl_uint i = 0; i < profile_count; ++i )
-		{
-			OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
-				&fftbuffer, buffersOut, clMedBuffer ),
-				"clfftEnqueueTransform failed" );
-		
-			OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-		}
-		double wtimesample = tr.Sample();
-		double wtime = wtimesample/((double)profile_count);
-	
-		tout << "\nExecution wall time (with clFFT Pre-callback): " << 1000.0*wtime << " ms" << std::endl;
-	}
-
-	if(clMedBuffer) clReleaseMemObject(clMedBuffer);
-	
-	if (profile_count == 1)
-	{
-		std::vector< std::complex< T > > output( fftLength/2 );
-
-		OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, fftbuffer, CL_TRUE, 0, size_of_buffers, &output[ 0 ],
-			0, NULL, NULL ), "Reading the result buffer failed" );
-
-		/*for( cl_uint i = 0; i < fftLength/2; i++)
-		{
-			std::cout << "i " << i << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
-		}*/
-
-		////Reference fftw output
-		//fftwf_complex *refout;
-
-		//refout = get_R2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim);
-
-		///*for( cl_uint i = 0; i < fftLength; i++)
-		//{
-		//	std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
-		//}*/
-		//if (!compare<fftwf_complex, T>(refout, output, fftLength))
-		//{
-		//	std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****FAIL*****" << std::endl;
-		//}
-		//else
-		//{
-		//	std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****PASS*****" << std::endl;
-		//}
-
-		//fftwf_free(refout);
-	}
-
-	OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
-	OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
-
-	//cleanup
-	OPENCL_V_THROW( clReleaseMemObject( fftbuffer ), "Error: In clReleaseMemObject\n" );
-	OPENCL_V_THROW( clReleaseMemObject( userDatabuffer ), "Error: In clReleaseMemObject\n" );
-}
-
-template < typename T >
-void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, 
-							cl_command_queue commandQueue, cl_device_id device_id,
-							size_t* inlengths, clfftDim dim, clfftPrecision precision,
-							size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
-{
-	cl_int status = 0;
-
-	size_t userdataLengths[ 3 ] = {USERDATA_LENGTH,1,1}; 
-	size_t vectorLength_userdata = userdataLengths[0] * userdataLengths[1] * userdataLengths[2];
-	size_t userdataLength = vectorLength_userdata * batchSize;
-
-	//input/output allocation sizes
-	size_t size_of_buffers = fftLength * sizeof( T  );
-	size_t size_of_buffers_userdata = userdataLength * sizeof( T  );
-
-	//in-place transform. Same buffer for input and output
-	cl_mem fftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, size_of_buffers, NULL, &status);
-    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(buffer) )" );
-
-	//Initialize Data
-	std::vector< T > userdata( userdataLength );
-
-	// impulse test case
-	for (size_t idx = 0; idx < userdataLength; ++idx)
-	{
-		userdata[idx] = 1;
-	}
-
-	//user data buffer
-	cl_mem userdatabuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_of_buffers_userdata, &userdata[0], &status);
-    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(userdatabuffer) )" );
-
-	//clFFT initializations
-
-	//	FFT state
-	clfftResultLocation	place = CLFFT_INPLACE;
-	clfftLayout	inLayout  = CLFFT_REAL;
-	clfftLayout	outLayout = CLFFT_HERMITIAN_INTERLEAVED;
-
-	clfftPlanHandle plan_handle;
-	OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
-	OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, inlengths ), "clfftCreateDefaultPlan failed" );
-
-	//	Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
-	OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
-	OPENCL_V_THROW( clfftSetLayout( plan_handle, inLayout, outLayout ), "clfftSetLayout failed" );
-	OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batchSize ), "clfftSetPlanBatchSize failed" );
-	OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
-
-		//Bake Plan
-	OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &commandQueue, NULL, NULL ), "clfftBakePlan failed" );
-
-	//get the buffersize
-	size_t buffersize=0;
-	OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );
-
-	//allocate the intermediate buffer
-	cl_mem clMedBuffer=NULL;
-
-	if (buffersize)
-	{
-		cl_int medstatus;
-		clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
-		OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
-	}
-
-	cl_mem * buffersOut = NULL; //NULL for in-place
-
-	//Pre-process kernel string
-	const char* source = STRINGIFY(ZERO_PAD_R2C_KERNEL);
-	
-	cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, &status );
-	OPENCL_V_THROW( status, "clCreateProgramWithSource failed." );
-
-	status = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL);
-	OPENCL_V_THROW( status, "clBuildProgram failed" );
-
-#if defined( _DEBUG )
-	if( status != CL_SUCCESS )
-	{
-		if( status == CL_BUILD_PROGRAM_FAILURE )
-		{
-			size_t buildLogSize = 0;
-			OPENCL_V_THROW( clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ),
-							"clGetProgramBuildInfo failed"  );
-
-			std::vector< char > buildLog( buildLogSize );
-			::memset( &buildLog[ 0 ], 0x0, buildLogSize );
-
-			OPENCL_V_THROW( clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ),
-						"clGetProgramBuildInfo failed"  );
-
-			std::cerr << "\n\t\t\tBUILD LOG\n";
-			std::cerr << "************************************************\n";
-			std::cerr << &buildLog[ 0 ] << std::endl;
-			std::cerr << "************************************************\n";
-		}
-
-		OPENCL_V_THROW( status, "clBuildProgram failed" );
-	}
-#endif
-
-	cl_kernel kernel = clCreateKernel( program, "zeroPad", &status );
-	OPENCL_V_THROW( status, "clCreateKernel failed" );
-
-	//for functional test
-	cl_uint uarg = 0;
-
-	//Buffer to be zero-padded
-	OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&fftbuffer ), "clSetKernelArg failed" );
-
-	//originial data
-	OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&userdatabuffer ), "clSetKernelArg failed" );
-
-	//Launch pre-process kernel
-	size_t gSize = fftLength;
-	status = clEnqueueNDRangeKernel( commandQueue, kernel, 1,
-											NULL, &gSize, NULL, 0, NULL, NULL );
-	OPENCL_V_THROW( status, "clEnqueueNDRangeKernel failed" );
-	
-	OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
-	//Now invoke the clfft execute
-	OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
-		&fftbuffer, buffersOut, clMedBuffer ),
-		"clfftEnqueueTransform failed" );
-		
-	OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-	
-	if (profile_count > 1)
-	{
-		Timer tr;
-		tr.Start();
-
-		//	Loop as many times as the user specifies to average out the timings
-		for( cl_uint i = 0; i < profile_count; ++i )
-		{
-			uarg = 0;
-
-			//Buffer to be zero-padded
-			OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&fftbuffer ), "clSetKernelArg failed" );
-
-			//originial data
-			OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&userdatabuffer ), "clSetKernelArg failed" );
-
-			//Launch pre-process kernel
-			status = clEnqueueNDRangeKernel( commandQueue, kernel, 1,
-													NULL, &gSize, NULL, 0, NULL, NULL );
-			OPENCL_V_THROW( status, "clEnqueueNDRangeKernel failed" );
-	
-			OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
-			//Now invoke the clfft execute
-			OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
-				&fftbuffer, buffersOut, clMedBuffer ),
-				"clfftEnqueueTransform failed" );
-		
-			OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-		}
-		double wtimesample = tr.Sample();
-		double wtime = wtimesample/((double)profile_count);
-	
-		tout << "\nExecution wall time (Separate Pre-process Kernel): " << 1000.0*wtime << " ms" << std::endl;
-	}
-
-	//cleanup preprocess kernel opencl objects
-	OPENCL_V_THROW( clReleaseProgram( program ), "Error: In clReleaseProgram\n" );
-	OPENCL_V_THROW( clReleaseKernel( kernel ), "Error: In clReleaseKernel\n" );
-
-	if(clMedBuffer) clReleaseMemObject(clMedBuffer);
-
-	if (profile_count == 1)
-	{
-		std::vector< std::complex< T > > output( fftLength/2 );
-
-		OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, fftbuffer, CL_TRUE, 0, size_of_buffers, &output[ 0 ],
-			0, NULL, NULL ), "Reading the result buffer failed" );
-
-		////Reference fftw output
-		//fftwf_complex *refout;
-
-		//refout = get_C2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim, dir);
-
-		///*for( cl_uint i = 0; i < fftLength; i++)
-		//{
-		//	std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
-		//}*/
-		//if (!compare<fftwf_complex, T>(refout, output, fftLength))
-		//{
-		//	std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****FAIL*****" << std::endl;
-		//}
-		//else
-		//{
-		//	std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****PASS*****" << std::endl;
-		//}
-
-		//fftwf_free(refout);
-	}
-
-	OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
-	OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
-
-	//cleanup
-	OPENCL_V_THROW( clReleaseMemObject( fftbuffer ), "Error: In clReleaseMemObject\n" );
-	OPENCL_V_THROW( clReleaseMemObject( userdatabuffer ), "Error: In clReleaseMemObject\n" );
-}
-
 //Compare reference and opencl output 
 template < typename T1, typename T2>
 bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
@@ -990,46 +567,3 @@ fftwf_complex* get_C2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int
 
 	return refout;
 }
-
-// Compute reference output using fftw for float type
-fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size,
-									clfftLayout in_layout, clfftDim dim)
-{
-	//In FFTW last dimension has the fastest changing index
-	int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
-	int inembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0] + 2)};
-	int outembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0]/2 + 1)};
-
-	fftwf_plan refPlan;
-		
-	size_t infftVectorLength = inembed[0] * inembed[1] * inembed[2];
-	size_t outfftVectorLength = outembed[0] * outembed[1] * outembed[2];
-
-	float *refin = (float*) malloc(sizeof(float)*fftbatchLength);
-	fftwf_complex *refout = (fftwf_complex*)refin; //(fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*outfftVectorLength*batch_size);
-
-	refPlan = fftwf_plan_many_dft_r2c(dim, &fftwLengths[3 - dim], batch_size, 
-									refin, &inembed[3 - dim], 1, infftVectorLength,
-									refout, &outembed[3 - dim], 1, outfftVectorLength, FFTW_ESTIMATE);
-	
-	float scalar; 
-	
-	for( size_t i = 0; i < fftbatchLength; i++)
-	{
-		scalar = 0.0f;
-		if ( (i % infftVectorLength)  < USERDATA_LENGTH)
-		{
-			scalar = 1.0f;
-		}
-		
-		refin[i] = scalar;
-	}
-
-	fftwf_execute(refPlan);
-
-	fftw_free(refin);
-
-	fftwf_destroy_plan(refPlan);
-
-	return refout;
-}
\ No newline at end of file
diff --git a/src/callback-client/client.h b/src/callback-client/client.h
index 9ca44dd..502bed9 100644
--- a/src/callback-client/client.h
+++ b/src/callback-client/client.h
@@ -24,18 +24,13 @@
 #include <boost/program_options.hpp>
 #include "stdafx.h"
 #include "../statTimer/statisticalTimer.extern.h"
+#include "../include/unicode.compatibility.h"
 
 #include <fftw3.h>
 
 #define CALLBCKSTR(...) #__VA_ARGS__
 #define STRINGIFY(...) 	CALLBCKSTR(__VA_ARGS__)
 
-enum FFTType
-{
-	FFT_C2C,
-	FFT_R2C,
-	FFT_C2R,
-};
 #define USERDATA_LENGTH 512
 #define BATCH_LENGTH 1024 // Must be >= USERDATA_LENGTH
 
@@ -67,51 +62,13 @@ enum FFTType
 					 *((__global float2*)input + inoffset) = scalar; \n \
 				} \n
 
-#define ZERO_PAD_R2C __attribute__((always_inline)) \n float zeroPad (__global void *input, \n \
-								uint inoffset, \n \
-							__global void *userdata) \n \
-				 { \n \
-					 float scalar = 0.0f; \n \
-					 uint udoffset; \n \
-					 if ((inoffset % (BATCH_LENGTH + 2)) < USERDATA_LENGTH) \n \
-					 { \n \
-					    udoffset = ((inoffset/(BATCH_LENGTH + 2)) * USERDATA_LENGTH) + (inoffset % (BATCH_LENGTH + 2)); \n \
-						scalar = *((__global float*)userdata + udoffset); \n \
-					 } \n \
-					 return scalar; \n \
-				} \n
-
-#define ZERO_PAD_R2C_KERNEL __kernel void zeroPad (__global void *input, \n \
-								__global void *userdata) \n \
-				 { \n \
-					uint inoffset = get_global_id(0); \n \
-					 float scalar = 0.0f; \n \
-					 uint udoffset; \n \
-					 if ((inoffset % (BATCH_LENGTH + 2)) < USERDATA_LENGTH) \n \
-					 { \n \
-					   udoffset = ((inoffset/(BATCH_LENGTH + 2)) * USERDATA_LENGTH) + (inoffset % (BATCH_LENGTH + 2)); \n \
-					   scalar = *((__global float*)userdata + udoffset); \n \
-					 } \n \
-					 *((__global float*)input + inoffset) = scalar; \n \
-				} \n
-
 template < typename T >
 void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 
 				   clfftDim dim, clfftPrecision precision, cl_uint profile_count);
 
-template < typename T >
-void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 
-				   clfftDim dim, clfftPrecision precision,  cl_uint profile_count);
-
-template < typename T >
-void C2R_transform();
-
 fftwf_complex* get_C2C_fftwf_output(size_t* lengths, size_t fftBatchSize, int batch_size, clfftLayout in_layout,
 								clfftDim dim, clfftDirection dir);
 
-fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size,
-									clfftLayout in_layout, clfftDim dim);
-
 template < typename T1, typename T2>
 bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
              size_t length, const float epsilon = 1e-6f);
@@ -122,17 +79,6 @@ void runC2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
 template < typename T >
 void runC2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue, cl_device_id device_id, size_t* inlengths, clfftDim dim, clfftPrecision precision, size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
 
-template < typename T >
-void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
-						size_t* inlengths, clfftDim dim, clfftPrecision precision,
-						size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
-
-template < typename T >
-void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, 
-							cl_command_queue commandQueue, cl_device_id device_id,
-							size_t* inlengths, clfftDim dim, clfftPrecision precision,
-							size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
-
 #ifdef WIN32
 
 struct Timer

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list