[clfft] 68/128: Precallback - Remove userdatastructString parameter from SetPlanCallback API

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:40 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 857c7b28c4994950ae4c673716cbee8d0abfeb6d
Author: Pradeep <pradeep.rao at amd.com>
Date:   Mon Sep 14 11:58:39 2015 +0530

    Precallback - Remove userdatastructString parameter from SetPlanCallback API
---
 src/callback-client/callback-client.cpp | 283 ++++++++++++++++++++++++++++----
 src/callback-client/client.h            |  17 +-
 src/include/clFFT.h                     |   3 +-
 src/library/accessors.cpp               |   6 +-
 src/library/generator.copy.cpp          |   7 -
 src/library/generator.stockham.cpp      |   7 -
 src/library/generator.transpose.gcn.cpp |   7 -
 src/library/mainpage.h                  |  24 ++-
 src/library/plan.h                      |   1 -
 src/tests/cl_transform.h                |   4 +-
 src/tests/test_constants.h              |  30 ++--
 11 files changed, 299 insertions(+), 90 deletions(-)

diff --git a/src/callback-client/callback-client.cpp b/src/callback-client/callback-client.cpp
index abae16d..990b94f 100644
--- a/src/callback-client/callback-client.cpp
+++ b/src/callback-client/callback-client.cpp
@@ -100,8 +100,8 @@ void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths,
 	if (precision == CLFFT_SINGLE)
 	{
 		//Run clFFT with seaparate Pre-process Kernel
-		/*runR2CPreprocessKernelFFT<float>(setupData, context, commandQueue, device_id[0], inlengths, dim, precision, 
-										batchSize, vectorLength, fftLength, profile_count);*/
+		runR2CPreprocessKernelFFT<float>(setupData, context, commandQueue, device_id[0], inlengths, dim, precision, 
+										batchSize, vectorLength, fftLength, profile_count);
 
 		//Run clFFT using pre-callback 
 		runR2CPrecallbackFFT<float>(setupData, context, commandQueue, inlengths, dim, precision, 
@@ -126,6 +126,7 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
 	char* in24bitData = (char*)malloc(in_size_of_buffers);
 
 	//Initialize Data
+	srand(1);
 	for (size_t idx = 0; idx < fftLength; ++idx)
 	{
 		in24bitData[3 * idx + 2] = (char)(rand() % 256);
@@ -156,7 +157,7 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
 	char* precallbackstr = STRINGIFY(ConvertToFloat);
 
 	//Register the callback
-	OPENCL_V_THROW (clfftSetPlanCallback(plan_handle, "convert24To32bit", precallbackstr, NULL, 0, PRECALLBACK, NULL, 0), "clFFTSetPlanCallback failed");
+	OPENCL_V_THROW (clfftSetPlanCallback(plan_handle, "convert24To32bit", precallbackstr, 0, PRECALLBACK, NULL, 0), "clFFTSetPlanCallback failed");
 
 	//	Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
 	OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
@@ -222,10 +223,222 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
 			std::cout << "i " << i << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
 		}
 
+		//Reference fftw output
+		/*fftwf_complex *refout;
+
+		refout = get_R2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim);
+
+		for( cl_uint i = 0; i < fftLength; i++)
+		{
+			std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
+		}
+		if (!compare<fftwf_complex, T>(refout, output, fftLength))
+		{
+			std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****FAIL*****" << std::endl;
+		}
+		else
+		{
+			std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****PASS*****" << std::endl;
+		}
+
+		fftwf_free(refout);*/
+	}
+
+	OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
+	OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
+
+	//cleanup
+	OPENCL_V_THROW( clReleaseMemObject( infftbuffer ), "Error: In clReleaseMemObject\n" );
+	OPENCL_V_THROW( clReleaseMemObject( outfftbuffer ), "Error: In clReleaseMemObject\n" );
+}
+
+template < typename T >
+void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, 
+							cl_command_queue commandQueue, cl_device_id device_id,
+							size_t* inlengths, clfftDim dim, clfftPrecision precision,
+							size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
+{
+	cl_int status = 0;
+
+	//input/output allocation sizes
+	size_t in_size_of_buffers = fftLength * sizeof(char) * 3 ;
+	size_t out_size_of_buffers = fftLength * sizeof( T  );
+
+	char* in24bitData = (char*)malloc(in_size_of_buffers);
+
+	//Initialize Data
+	srand(1);
+	for (size_t idx = 0; idx < fftLength; ++idx)
+	{
+		in24bitData[3 * idx + 2] = (char)(rand() % 256);
+		in24bitData[3 * idx + 1] = (char)(rand() % 256);
+		in24bitData[3 * idx] = (char)(rand() % 256);
+	}
+
+	//input data buffer
+	cl_mem in24bitfftbuffer = ::clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, in_size_of_buffers, (void*)in24bitData, &status);
+    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(in24bitfftbuffer) )" );
+
+	cl_mem in32bitfftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, out_size_of_buffers, NULL, &status);
+    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(in32bitfftbuffer) )" );
+
+	//out-place transform. 	
+	cl_mem outfftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, out_size_of_buffers, NULL, &status);
+    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(oufftbuffer) )" );
+
+	//clFFT initializations
+	
+	//	FFT state
+	clfftResultLocation	place = CLFFT_OUTOFPLACE;
+	clfftLayout	inLayout  = CLFFT_REAL;
+	clfftLayout	outLayout = CLFFT_HERMITIAN_INTERLEAVED;
+
+	clfftPlanHandle plan_handle;
+	OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
+	OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, inlengths ), "clfftCreateDefaultPlan failed" );
+
+	//	Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
+	OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
+	OPENCL_V_THROW( clfftSetLayout( plan_handle, inLayout, outLayout ), "clfftSetLayout failed" );
+	OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batchSize ), "clfftSetPlanBatchSize failed" );
+	OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
+
+		//Bake Plan
+	OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &commandQueue, NULL, NULL ), "clfftBakePlan failed" );
+
+	//get the buffersize
+	size_t buffersize=0;
+	OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );
+
+	//allocate the intermediate buffer
+	cl_mem clMedBuffer=NULL;
+
+	if (buffersize)
+	{
+		cl_int medstatus;
+		clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
+		OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
+	}
+
+	//Pre-process kernel string
+	const char* source = STRINGIFY(ConvertToFloat_KERNEL);
+	
+	cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, &status );
+	OPENCL_V_THROW( status, "clCreateProgramWithSource failed." );
+
+	status = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL);
+	OPENCL_V_THROW( status, "clBuildProgram failed" );
+
+#if defined( _DEBUG )
+	if( status != CL_SUCCESS )
+	{
+		if( status == CL_BUILD_PROGRAM_FAILURE )
+		{
+			size_t buildLogSize = 0;
+			OPENCL_V_THROW( clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ),
+							"clGetProgramBuildInfo failed"  );
+
+			std::vector< char > buildLog( buildLogSize );
+			::memset( &buildLog[ 0 ], 0x0, buildLogSize );
+
+			OPENCL_V_THROW( clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ),
+						"clGetProgramBuildInfo failed"  );
+
+			std::cerr << "\n\t\t\tBUILD LOG\n";
+			std::cerr << "************************************************\n";
+			std::cerr << &buildLog[ 0 ] << std::endl;
+			std::cerr << "************************************************\n";
+		}
+
+		OPENCL_V_THROW( status, "clBuildProgram failed" );
+	}
+#endif
+
+	cl_kernel kernel = clCreateKernel( program, "convert24To32bit", &status );
+	OPENCL_V_THROW( status, "clCreateKernel failed" );
+
+	//for functional test
+	cl_uint uarg = 0;
+
+	//Input 24bit Buffer 
+	OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&in24bitfftbuffer ), "clSetKernelArg failed" );
+	
+	//output 32bit Buffer 
+	OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&in32bitfftbuffer ), "clSetKernelArg failed" );
+
+	//Launch pre-process kernel
+	size_t gSize = fftLength;
+	status = clEnqueueNDRangeKernel( commandQueue, kernel, 1,
+											NULL, &gSize, NULL, 0, NULL, NULL );
+	OPENCL_V_THROW( status, "clEnqueueNDRangeKernel failed" );
+	
+	OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
+
+	//Now invoke the clfft execute
+	OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
+		&in32bitfftbuffer, &outfftbuffer, clMedBuffer ),
+		"clfftEnqueueTransform failed" );
+		
+	OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
+	
+	if (profile_count > 1)
+	{
+		Timer tr;
+		tr.Start();
+
+		//	Loop as many times as the user specifies to average out the timings
+		for( cl_uint i = 0; i < profile_count; ++i )
+		{
+			uarg = 0;
+
+			//Input 24bit Buffer 
+			OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&in24bitfftbuffer ), "clSetKernelArg failed" );
+	
+			//output 32bit Buffer 
+			OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&in32bitfftbuffer ), "clSetKernelArg failed" );
+
+			//Launch pre-process kernel
+			status = clEnqueueNDRangeKernel( commandQueue, kernel, 1,
+													NULL, &gSize, NULL, 0, NULL, NULL );
+			OPENCL_V_THROW( status, "clEnqueueNDRangeKernel failed" );
+	
+			OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
+
+			//Now invoke the clfft execute
+			OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
+				&in32bitfftbuffer,  &outfftbuffer, clMedBuffer ),
+				"clfftEnqueueTransform failed" );
+		
+			OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
+		}
+		double wtimesample = tr.Sample();
+		double wtime = wtimesample/((double)profile_count);
+	
+		tout << "\nExecution wall time (Separate Pre-process Kernel): " << 1000.0*wtime << " ms" << std::endl;
+	}
+
+	//cleanup preprocess kernel opencl objects
+	OPENCL_V_THROW( clReleaseProgram( program ), "Error: In clReleaseProgram\n" );
+	OPENCL_V_THROW( clReleaseKernel( kernel ), "Error: In clReleaseKernel\n" );
+
+	if(clMedBuffer) clReleaseMemObject(clMedBuffer);
+
+	if (profile_count == 1)
+	{
+		std::vector< std::complex< T > > output( fftLength/2 );
+
+		OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, outfftbuffer, CL_TRUE, 0, out_size_of_buffers, &output[ 0 ],
+			0, NULL, NULL ), "Reading the result buffer failed" );
+
+		for( cl_uint i = 0; i < fftLength/2; i++)
+		{
+			std::cout << "i " << i << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
+		}
+
 		////Reference fftw output
 		//fftwf_complex *refout;
 
-		//refout = get_R2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim);
+		//refout = get_C2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim, dir);
 
 		///*for( cl_uint i = 0; i < fftLength; i++)
 		//{
@@ -233,11 +446,11 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
 		//}*/
 		//if (!compare<fftwf_complex, T>(refout, output, fftLength))
 		//{
-		//	std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****FAIL*****" << std::endl;
+		//	std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****FAIL*****" << std::endl;
 		//}
 		//else
 		//{
-		//	std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****PASS*****" << std::endl;
+		//	std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****PASS*****" << std::endl;
 		//}
 
 		//fftwf_free(refout);
@@ -247,7 +460,8 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
 	OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
 
 	//cleanup
-	OPENCL_V_THROW( clReleaseMemObject( infftbuffer ), "Error: In clReleaseMemObject\n" );
+	OPENCL_V_THROW( clReleaseMemObject( in24bitfftbuffer ), "Error: In clReleaseMemObject\n" );
+	OPENCL_V_THROW( clReleaseMemObject( in32bitfftbuffer ), "Error: In clReleaseMemObject\n" );
 	OPENCL_V_THROW( clReleaseMemObject( outfftbuffer ), "Error: In clReleaseMemObject\n" );
 }
 
@@ -311,43 +525,44 @@ bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
 
 
 // Compute reference output using fftw for float type
-fftwf_complex* get_C2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size, clfftLayout in_layout,
-								clfftDim dim, clfftDirection dir)
+fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size,
+									clfftLayout in_layout, clfftDim dim)
 {
 	//In FFTW last dimension has the fastest changing index
 	int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
+	int inembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0] + 2)};
+	int outembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0]/2 + 1)};
 
 	fftwf_plan refPlan;
+		
+	size_t infftVectorLength = inembed[0] * inembed[1] * inembed[2];
+	size_t outfftVectorLength = outembed[0] * outembed[1] * outembed[2];
 
-	fftwf_complex *refin = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex)*fftbatchLength);
-	fftwf_complex *refout = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex)*fftbatchLength);
-
-	size_t fftVectorLength = fftbatchLength/batch_size;
+	float *refin = (float*) malloc(sizeof(float)*fftbatchLength);
+	fftwf_complex *refout = (fftwf_complex*)fftwf_malloc(sizeof(fftwf_complex)*outfftVectorLength*batch_size);
 
-	refPlan = fftwf_plan_many_dft(dim, &fftwLengths[3 - dim], batch_size, 
-									refin, &fftwLengths[3 - dim], 1, fftVectorLength, 
-									refout, &fftwLengths[3 - dim], 1, fftVectorLength, 
-									dir, FFTW_ESTIMATE);
+	refPlan = fftwf_plan_many_dft_r2c(dim, &fftwLengths[3 - dim], batch_size, 
+									refin, &inembed[3 - dim], 1, infftVectorLength,
+									refout, &outembed[3 - dim], 1, outfftVectorLength, FFTW_ESTIMATE);
 	
-	float scalar; 
+	char* in24bitData = (char*)malloc(sizeof(char) * 3 * fftbatchLength);
+
+	//Initialize Data
+	srand(1);
+	for (size_t idx = 0; idx < fftbatchLength; ++idx)
+	{
+		in24bitData[3 * idx + 2] = (char)(rand() % 256);
+		in24bitData[3 * idx + 1] = (char)(rand() % 256);
+		in24bitData[3 * idx] = (char)(rand() % 256);
+	}
+
+	float val; 
 	
 	for( size_t i = 0; i < fftbatchLength; i++)
 	{
-		scalar = 0.0f;
-		switch (in_layout)
-		{
-		case CLFFT_COMPLEX_INTERLEAVED:
-			if ( (i % fftVectorLength)  < USERDATA_LENGTH)
-			{
-				scalar = 1.0f;
-			}
-			break;
-		default:
-			break;
-		}
-
-		refin[i][0] = scalar;
-		refin[i][1] = 0;
+		val = in24bitData[3*i + 2] << 24 | in24bitData[3*i + 1] << 16 | in24bitData[3*i] << 8 ;
+		
+		refin[i] = val;
 	}
 
 	fftwf_execute(refPlan);
@@ -357,4 +572,4 @@ fftwf_complex* get_C2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int
 	fftwf_destroy_plan(refPlan);
 
 	return refout;
-}
+}
\ No newline at end of file
diff --git a/src/callback-client/client.h b/src/callback-client/client.h
index 819735d..d229d41 100644
--- a/src/callback-client/client.h
+++ b/src/callback-client/client.h
@@ -41,13 +41,12 @@
 				return val / (float)(INT_MAX - 256);  \n \
 				}
 
-#define ConvertToFloat_KERNEL __kernel void convert24To32bit (__global void *input, \n \
-								__global void *userdata) \n \
+#define ConvertToFloat_KERNEL __kernel void convert24To32bit (__global void *input, __global void *output) \n \
 				 { \n \
 					uint inoffset = get_global_id(0); \n \
-					__global char* inData =  (__global char*)in; \n \
+					__global char* inData =  (__global char*)input; \n \
 					float val = inData[3*inoffset+2] << 24 | inData[3*inoffset+1] << 16 | inData[3*inoffset] << 8 ; \n \
-					*((__global float*)input + inoffset) = val / (float)(INT_MAX - 256);  \n \
+					*((__global float*)output + inoffset) = val / (float)(INT_MAX - 256);  \n \
 				} \n
 
 
@@ -60,8 +59,14 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
 						size_t* inlengths, clfftDim dim, clfftPrecision precision,
 						size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
 
-fftwf_complex* get_C2C_fftwf_output(size_t* lengths, size_t fftBatchSize, int batch_size, clfftLayout in_layout,
-								clfftDim dim, clfftDirection dir);
+template < typename T >
+void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, 
+							cl_command_queue commandQueue, cl_device_id device_id,
+							size_t* inlengths, clfftDim dim, clfftPrecision precision,
+							size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
+
+fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int batch_size,
+									clfftLayout in_layout, clfftDim dim);
 
 template < typename T1, typename T2>
 bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
diff --git a/src/include/clFFT.h b/src/include/clFFT.h
index 9fb9186..5d9f8e1 100644
--- a/src/include/clFFT.h
+++ b/src/include/clFFT.h
@@ -557,13 +557,12 @@ extern "C" {
 	 *  @param[in] plHandle Handle to a plan previously created
 	 *  @param[funcName] Callback function name
 	 *  @param[funcString] Callback function in string form
-	 *  @param[userStructString] Optional - Custom data struct in string form used by Callback function. Pass NULL callback has no custom data type
 	 *  @param[localMemSize] Optional - Local memory size if needed by callback. Pass 0 if local memory not needed by callback
 	 *  @param[callbackType] Type of callback - Pre-Callback or Post-Callback
 	 *  @param[userdata] cl_mem object passed as paarameter to callback function
 	 *  @param[numUserdataBuffers] Number of userdata buffers
 	 */
-	CLFFTAPI clfftStatus clfftSetPlanCallback(clfftPlanHandle plHandle, const char* funcName, const char* funcString, const char* userStructString, 
+	CLFFTAPI clfftStatus clfftSetPlanCallback(clfftPlanHandle plHandle, const char* funcName, const char* funcString,  
 										int localMemSize, clFFTCallbackType callbackType, cl_mem *userdata, int numUserdataBuffers);
 
 
diff --git a/src/library/accessors.cpp b/src/library/accessors.cpp
index c67373d..8ebba2b 100644
--- a/src/library/accessors.cpp
+++ b/src/library/accessors.cpp
@@ -768,9 +768,8 @@ clfftStatus clfftLocalMemSize( const clfftPlanHandle plHandle, cl_ulong* local_m
 }
 
 clfftStatus clfftSetPlanCallback(clfftPlanHandle plHandle, const char* funcName, 
-								 const char* funcString, const char* userStructString, 
-								 int localMemSize, clFFTCallbackType callbackType, 
-								 cl_mem *userdata, int numUserdataBuffers)
+								 const char* funcString, int localMemSize, 
+								 clFFTCallbackType callbackType, cl_mem *userdata, int numUserdataBuffers)
 {
 	FFTRepo& fftRepo	= FFTRepo::getInstance( );
 	FFTPlan* fftPlan	= NULL;
@@ -795,7 +794,6 @@ clfftStatus clfftSetPlanCallback(clfftPlanHandle plHandle, const char* funcName,
 
 			fftPlan->preCallback.funcname = funcName;
 			fftPlan->preCallback.funcstring = funcString;
-			fftPlan->preCallback.userdatastruct = userStructString;
 			fftPlan->preCallback.localMemSize = (localMemSize > 0) ? localMemSize : 0;
 
 			cl_mem userdataBuf = NULL;
diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp
index 68cb77b..67d5d90 100644
--- a/src/library/generator.copy.cpp
+++ b/src/library/generator.copy.cpp
@@ -164,13 +164,6 @@ namespace CopyGenerator
 			//If pre-callback is set for the plan
 			if (params.fft_hasPreCallback && h2c)
 			{
-				//If user defined struct defined for callback function add it to opencl source string
-				if (params.fft_preCallback.userdatastruct != NULL)
-				{
-					str += params.fft_preCallback.userdatastruct;
-					str += "\n";
-				}
-
 				//Insert callback function code at the beginning 
 				str += params.fft_preCallback.funcstring;
 				str += "\n\n";
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 4d0c126..ba7febe 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -3038,13 +3038,6 @@ namespace StockhamGenerator
 			std::string callbackstr;
 			if (params.fft_hasPreCallback)
 			{
-				//If user defined struct defined for callback function add it to opencl source string
-				if (params.fft_preCallback.userdatastruct != NULL)
-				{
-					callbackstr += params.fft_preCallback.userdatastruct;
-					callbackstr += "\n";
-				}
-
 				//Insert callback function code at the beginning 
 				callbackstr += params.fft_preCallback.funcstring;
 				callbackstr += "\n\n";
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 5359ec4..638ae3b 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -398,13 +398,6 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 	//If pre-callback is set for the plan
 	if (params.fft_hasPreCallback)
 	{
-		//If user defined struct defined for callback function add it to opencl source string
-		if (params.fft_preCallback.userdatastruct != NULL)
-		{
-			clKernWrite( transKernel, 0 ) <<  params.fft_preCallback.userdatastruct;
-			clKernWrite( transKernel, 0 ) << std::endl;
-		}
-
 		//Insert callback function code at the beginning 
 		clKernWrite( transKernel, 0 ) << params.fft_preCallback.funcstring << std::endl;
 		clKernWrite( transKernel, 0 ) << std::endl;
diff --git a/src/library/mainpage.h b/src/library/mainpage.h
index 6f3f31d..09a6b6d 100644
--- a/src/library/mainpage.h
+++ b/src/library/mainpage.h
@@ -561,7 +561,12 @@ FFT features of this library.
 
 @section Callbacks  clFFT Callbacks
 
-Callback feature of clFFT provides ability to do custom processing when reading input data or when writing output data. There are 2 types of callback, Pre-callback and Post-callback. Pre-callback invokes user callback function to do custom preprocessing of input data before FFT is executed. Post-callback invokes user callback function to do custom post-processing of output data after FFT is executed. The intent is to avoid additional kernels and kernel launches to carry out the pre/post  [...]
+Callback feature of clFFT provides ability to do custom processing when reading input data or when writing output data.
+There are 2 types of callback, Pre-callback and Post-callback. Pre-callback invokes user callback function to do 
+custom preprocessing of input data before FFT is executed. Post-callback invokes user callback function to do custom 
+post-processing of output data after FFT is executed. The intent is to avoid additional kernels and kernel launches to 
+carry out the pre/post processing. Instead, the pre/post processing logic is included in an inline opencl function 
+(one each for pre and post) and passed as a string to library which would then be incorporated into the generated FFT kernel.
 
 The current release of clFFT includes Pre-callback feature. Post-callback will be supported in a future release.
 
@@ -573,15 +578,18 @@ The workflow for callback is as below
 	<li> User registers the callback function with library by passing the OpenCL inline function as a string
 	<li> User initializes other standard FFT parameters
 	<li> User invokes Bake Plan step
-	<li> Library inserts the callback code into main FFT kernel during bake plan and compiles it. If the registered callback function does not adhere to required function prototype, the compilation fails and is reported to the user
+	<li> Library inserts the callback code into main FFT kernel during bake plan and compiles it. If the registered callback 
+	function does not adhere to required function prototype, the compilation fails and is reported to the user
 	<li> User invokes Execute FFT
 </ol>
 
-The caller is responsible to provide a string-ified callback function that matches the function prototype based on the type of callback(pre/post), type of transform(real/complex) and whether LDS is used. The bake plan step does the function prototype checking.
+The caller is responsible to provide a string-ified callback function that matches the function prototype based on the type of 
+callback(pre/post), type of transform(real/complex) and whether LDS is used. The bake plan step does the function prototype checking.
 
 @subsection CallbackFunctionPrototype Callback Function Prototypes
 
-clFFT expects the callback function to be of a specific prototype depending on the type of callback(pre/post), type of transform(real/complex) and whether LDS is used. These are as following.
+clFFT expects the callback function to be of a specific prototype depending on the type of callback(pre/post), type of transform(real/complex)
+and whether LDS is used. These are as following.
 
 @subsubsection PrecallbackProtyotype Pre-callback Prototypes
 
@@ -602,10 +610,12 @@ Parameters
 	<li> \c inputIm : Start pointer of the “Imaginary” part input buffer for Planar C2C transforms
 	<li> \c inoffset : Offset of the input buffer from the start
 	<li> \c userdata : Buffer containing optional caller specified data
-	<li> \c localmem : Pointer to local memory. This memory is allocated by library based on the size specified by user and subject to local memory availability
+	<li> \c localmem : Pointer to local memory. This memory is allocated by library based on the size specified by 
+	user and subject to local memory availability
 </ul>
 
-For Planar C2C, the return type of callback is a vector (float2/double2) whose elements contain the result for Real and Imaginary as computed in the callback
+For Planar C2C, the return type of callback is a vector (float2/double2) whose elements contain the result for Real 
+and Imaginary as computed in the callback
 
 @subsubsection SamplePrecallbackCode Sample Pre-Callback Code
 
@@ -635,7 +645,7 @@ cl_mem userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PT
 //* Step 3 : Register the callback
 //**************************************************************************
 
-status = clfftSetPlanCallback(plan_handle, "mulval", precallbackstr, NULL, 0, PRECALLBACK, &userdata, 1);
+status = clfftSetPlanCallback(plan_handle, "mulval", precallbackstr, 0, PRECALLBACK, &userdata, 1);
 
 
 //**************************************************************************
diff --git a/src/library/plan.h b/src/library/plan.h
index 53be126..3dce7f9 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -100,7 +100,6 @@ typedef struct clfftCallbackParam_
 	int localMemSize;			/*!< optional local memory size if needed by callback */
 	const char* funcname;		/*!< callback function name */
 	const char* funcstring;		/*!< callback function in string form */
-	const char* userdatastruct;	/*!< optional custom data struct in string form */
 }clfftCallbackParam;
 
 struct FFTKernelGenKeyParams {
diff --git a/src/tests/cl_transform.h b/src/tests/cl_transform.h
index 4720462..99ec67b 100644
--- a/src/tests/cl_transform.h
+++ b/src/tests/cl_transform.h
@@ -665,7 +665,7 @@ public:
 		OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
 
 		//Register the callback
-		OPENCL_V_THROW (clfftSetPlanCallback(*plan_handle, "mulval", precallbackstr, NULL, localMemSize, PRECALLBACK, &userdataBuff, 1), "clFFTSetPlanCallback failed");
+		OPENCL_V_THROW (clfftSetPlanCallback(*plan_handle, "mulval", precallbackstr, localMemSize, PRECALLBACK, &userdataBuff, 1), "clFFTSetPlanCallback failed");
 	}
 
 		/*****************************************************/
@@ -704,7 +704,7 @@ public:
 		OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
 
 		//Register the callback
-		OPENCL_V_THROW (clfftSetPlanCallback(*plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, &userdataBuff, 1), "clFFTSetPlanCallback failed");
+		OPENCL_V_THROW (clfftSetPlanCallback(*plan_handle, "mulval", precallbackstr, 0, PRECALLBACK, &userdataBuff, 1), "clFFTSetPlanCallback failed");
 	}
 
 	/*****************************************************/
diff --git a/src/tests/test_constants.h b/src/tests/test_constants.h
index 5a27db8..a83e3f7 100644
--- a/src/tests/test_constants.h
+++ b/src/tests/test_constants.h
@@ -30,13 +30,18 @@
 				return ret; \n \
 				}
 
-#define MULVAL_UDT float2 mulval(__global void* in, uint offset, __global void* userdata)\n \
-				{ \n \
-				__global USER_DATA *data = ((__global USER_DATA *)userdata + offset); \n \
-				float scalar = data->scalar1 * data->scalar2; \n \
-				float2 ret = *((__global float2*)in + offset) * scalar; \n \
-				return ret; \n \
-				}
+#define MULVAL_UDT typedef struct USER_DATA  \
+					   {  \
+						float scalar1;  \
+						float scalar2;  \
+						} USER_DATA; \n \
+					float2 mulval(__global void* in, uint offset, __global void* userdata)\n \
+					{ \n \
+					__global USER_DATA *data = ((__global USER_DATA *)userdata + offset); \n \
+					float scalar = data->scalar1 * data->scalar2; \n \
+					float2 ret = *((__global float2*)in + offset) * scalar; \n \
+					return ret; \n \
+					}
 
 #define MULVAL_DP double2 mulval(__global void* in, uint offset, __global void* userdata)\n \
 				{ \n \
@@ -91,12 +96,11 @@
 				return ret; \n \
 				}
 
-#define STRUCT_USERDATA typedef struct USER_DATA  \
-					   {  \
-						float scalar1;  \
-						float scalar2;  \
-						} USER_DATA; 
-STRUCT_USERDATA
+typedef struct USER_DATA  
+				{  
+				float scalar1;  
+				float scalar2; 
+				} USER_DATA;
 
 #define CALLBCKSTR(...) #__VA_ARGS__
 #define STRINGIFY(...) 	CALLBCKSTR(__VA_ARGS__)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list