[clfft] 65/128: Precallback - Few review comment fixes

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:39 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 0967583457a94023ad317a2fc7969644115f6c35
Author: Pradeep <pradeep.rao at amd.com>
Date:   Fri Sep 11 15:11:48 2015 +0530

    Precallback - Few review comment fixes
    
    clfftSetPlanCallback proper case for clfft prefix, cl_mem instead of
    void* for userdata parameter, multiple user data buffers parameter
    (support only 1 though for now)
---
 src/callback-client/callback-client.cpp | 339 ++++++--------------------------
 src/callback-client/client.h            |  50 ++---
 src/include/clFFT.h                     |   4 +-
 src/library/accessors.cpp               |  33 +++-
 src/library/mainpage.h                  |   2 +-
 src/library/plan.h                      |   4 +-
 src/tests/cl_transform.h                |   4 +-
 7 files changed, 118 insertions(+), 318 deletions(-)

diff --git a/src/callback-client/callback-client.cpp b/src/callback-client/callback-client.cpp
index 9a7e72b..abae16d 100644
--- a/src/callback-client/callback-client.cpp
+++ b/src/callback-client/callback-client.cpp
@@ -57,9 +57,9 @@ int main(int argc, char **argv)
 			dim	= CLFFT_3D;
 		}
 
-		 // Complex-Complex cases, SP
+		 // Real-Complex cases, SP
 		
-		C2C_transform<float>(setupData, lengths, batchSize, dim, precision, profile_count);
+		R2C_transform<float>(setupData, lengths, batchSize, dim, precision, profile_count);
 		
 	}
 	catch( std::exception& e )
@@ -71,7 +71,7 @@ int main(int argc, char **argv)
 }
 
 template < typename T >
-void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 
+void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 
 				   clfftDim dim, clfftPrecision precision,  cl_uint profile_count)
 {
 	//	OpenCL state 
@@ -83,7 +83,10 @@ void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths,
 	cl_uint command_queue_flags = 0;
 	command_queue_flags |= CL_QUEUE_PROFILING_ENABLE;
 	
-	size_t vectorLength = inlengths[0] * inlengths[1] * inlengths[2];
+	// Test for in-place Hermitian Interleaved output 
+	// Hence output size is N/2 + 1 complex. So allocate N + 2 real input
+	size_t Nt = inlengths[0] + 2;
+	size_t vectorLength = Nt * inlengths[1] * inlengths[2];
 	size_t fftLength = vectorLength * batchSize;
 
 	//OpenCL initializations
@@ -97,10 +100,12 @@ void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths,
 	if (precision == CLFFT_SINGLE)
 	{
 		//Run clFFT with seaparate Pre-process Kernel
-		runC2CPreprocessKernelFFT<float>(setupData, context, commandQueue, device_id[0], inlengths, dim, precision, batchSize, vectorLength, fftLength, profile_count);
+		/*runR2CPreprocessKernelFFT<float>(setupData, context, commandQueue, device_id[0], inlengths, dim, precision, 
+										batchSize, vectorLength, fftLength, profile_count);*/
 
 		//Run clFFT using pre-callback 
-		runC2CPrecallbackFFT<float>(setupData, context, commandQueue, inlengths, dim, precision, batchSize, vectorLength, fftLength, profile_count);
+		runR2CPrecallbackFFT<float>(setupData, context, commandQueue, inlengths, dim, precision, 
+									batchSize, vectorLength, fftLength, profile_count);
 	}
 
 	OPENCL_V_THROW( clReleaseCommandQueue( commandQueue ), "Error: In clReleaseCommandQueue\n" );
@@ -108,55 +113,50 @@ void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths,
 }
 
 template < typename T >
-void runC2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
+void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
 						size_t* inlengths, clfftDim dim, clfftPrecision precision,
 						size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
 {
 	cl_int status = 0;
-
-	size_t userdataLengths[ 3 ] = {USERDATA_LENGTH,1,1};
-	size_t vectorLength_userdata = userdataLengths[0] * userdataLengths[1] * userdataLengths[2];
-	size_t userdataLength = vectorLength_userdata * batchSize;
-
+	
 	//input/output allocation sizes
-	size_t size_of_buffers = fftLength * sizeof( std::complex< T > );
-	size_t size_of_buffers_userdata = userdataLength * sizeof( std::complex< T > );
+	size_t in_size_of_buffers = fftLength * sizeof(char) * 3 ;
+	size_t out_size_of_buffers = fftLength * sizeof( T  );
 
-	//in-place transform. Same buffer for input and output
-	cl_mem fftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, size_of_buffers, NULL, &status);
-    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(buffer) )" );
+	char* in24bitData = (char*)malloc(in_size_of_buffers);
 
 	//Initialize Data
-	std::vector< std::complex< T > > userdata( userdataLength );
-
-	// impulse test case
-	std::complex< T > impulsedata(1,0);
-	for (size_t idx = 0; idx < userdataLength; ++idx)
+	for (size_t idx = 0; idx < fftLength; ++idx)
 	{
-		userdata[idx] = impulsedata;
+		in24bitData[3 * idx + 2] = (char)(rand() % 256);
+		in24bitData[3 * idx + 1] = (char)(rand() % 256);
+		in24bitData[3 * idx] = (char)(rand() % 256);
 	}
 
-	//user data buffer
-	cl_mem userDatabuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_of_buffers_userdata, &userdata[0], &status);
-    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(userDatabuffer) )" );
+	//input data buffer
+	cl_mem infftbuffer = ::clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, in_size_of_buffers, (void*)in24bitData, &status);
+    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(infftbuffer) )" );
+
+	//out-place transform. 	
+	cl_mem outfftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, out_size_of_buffers, NULL, &status);
+    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(oufftbuffer) )" );
 
 	//clFFT initializations
 	
 	//	FFT state
-	clfftResultLocation	place = CLFFT_INPLACE;
-	clfftLayout	inLayout  = CLFFT_COMPLEX_INTERLEAVED;
-	clfftLayout	outLayout = CLFFT_COMPLEX_INTERLEAVED;
-	clfftDirection dir = CLFFT_FORWARD;
+	clfftResultLocation	place = CLFFT_OUTOFPLACE;
+	clfftLayout	inLayout  = CLFFT_REAL;
+	clfftLayout	outLayout = CLFFT_HERMITIAN_INTERLEAVED;
 
 	clfftPlanHandle plan_handle;
 	OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
 	OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, inlengths ), "clfftCreateDefaultPlan failed" );
 
 	//Precallback setup
-	char* precallbackstr = STRINGIFY(ZERO_PAD_C2C);
+	char* precallbackstr = STRINGIFY(ConvertToFloat);
 
 	//Register the callback
-	OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "zeroPad", precallbackstr, NULL, 0, PRECALLBACK, userDatabuffer), "clFFTSetPlanCallback failed");
+	OPENCL_V_THROW (clfftSetPlanCallback(plan_handle, "convert24To32bit", precallbackstr, NULL, 0, PRECALLBACK, NULL, 0), "clFFTSetPlanCallback failed");
 
 	//	Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
 	OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
@@ -181,206 +181,13 @@ void runC2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
 		OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
 	}
 
-	cl_mem * buffersOut = NULL; //NULL for in-place
-
-	// for functional test
-	OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &commandQueue, 0, NULL, NULL,
-			&fftbuffer, buffersOut, clMedBuffer ),
-			"clfftEnqueueTransform failed" );
-		
-	OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
-	//	Loop as many times as the user specifies to average out the timings
-	if (profile_count > 1)
-	{
-		Timer tr;
-		tr.Start();
-		
-		for( cl_uint i = 0; i < profile_count; ++i )
-		{
-			OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &commandQueue, 0, NULL, NULL,
-				&fftbuffer, buffersOut, clMedBuffer ),
-				"clfftEnqueueTransform failed" );
-		
-			OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-		}
-		double wtimesample = tr.Sample();
-		double wtime = wtimesample/((double)profile_count);
-
-		tout << "\nExecution wall time (with clFFT Pre-callback): " << 1000.0*wtime << " ms" << std::endl;
-	}
-
-	if(clMedBuffer) clReleaseMemObject(clMedBuffer);
-	
-	if (profile_count == 1)
-	{
-		std::vector< std::complex< T > > output( fftLength );
-
-		OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, fftbuffer, CL_TRUE, 0, size_of_buffers, &output[ 0 ],
-			0, NULL, NULL ), "Reading the result buffer failed" );
-
-		//Reference fftw output
-		fftwf_complex *refout;
-
-		refout = get_C2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim, dir);
-
-		/*for( cl_uint i = 0; i < fftLength; i++)
-		{
-			std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
-		}*/
-		if (!compare<fftwf_complex, T>(refout, output, fftLength))
-		{
-			std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****FAIL*****" << std::endl;
-		}
-		else
-		{
-			std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****PASS*****" << std::endl;
-		}
-
-		fftwf_free(refout);
-	}
-
-	OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
-	OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
-
-	//cleanup
-	OPENCL_V_THROW( clReleaseMemObject( fftbuffer ), "Error: In clReleaseMemObject\n" );
-	OPENCL_V_THROW( clReleaseMemObject( userDatabuffer ), "Error: In clReleaseMemObject\n" );
-}
-
-template < typename T >
-void runC2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, 
-							cl_command_queue commandQueue, cl_device_id device_id,
-							size_t* inlengths, clfftDim dim, clfftPrecision precision,
-							size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count)
-{
-	cl_int status = 0;
-
-	size_t userdataLengths[ 3 ] = {USERDATA_LENGTH,1,1}; 
-	size_t vectorLength_userdata = userdataLengths[0] * userdataLengths[1] * userdataLengths[2];
-	size_t userdataLength = vectorLength_userdata * batchSize;
-
-	//input/output allocation sizes
-	size_t size_of_buffers = fftLength * sizeof( std::complex< T > );
-	size_t size_of_buffers_userdata = userdataLength * sizeof( std::complex< T > );
-
-	//in-place transform. Same buffer for input and output
-	cl_mem fftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, size_of_buffers, NULL, &status);
-    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(buffer) )" );
-
-	//Initialize Data
-	std::vector< std::complex< T > > userdata( userdataLength );
-
-	// impulse test case
-	std::complex< T > impulsedata(1,0);
-	for (size_t idx = 0; idx < userdataLength; ++idx)
-	{
-		userdata[idx] = impulsedata;
-	}
-
-	//user data buffer
-	cl_mem userdatabuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size_of_buffers_userdata, &userdata[0], &status);
-    OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(userdatabuffer) )" );
-
-	//clFFT initializations
-
-	//	FFT state
-	clfftResultLocation	place = CLFFT_INPLACE;
-	clfftLayout	inLayout  = CLFFT_COMPLEX_INTERLEAVED;
-	clfftLayout	outLayout = CLFFT_COMPLEX_INTERLEAVED;
-	clfftDirection dir = CLFFT_FORWARD;
-
-	clfftPlanHandle plan_handle;
-	OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
-	OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, inlengths ), "clfftCreateDefaultPlan failed" );
-
-	//	Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
-	OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
-	OPENCL_V_THROW( clfftSetLayout( plan_handle, inLayout, outLayout ), "clfftSetLayout failed" );
-	OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batchSize ), "clfftSetPlanBatchSize failed" );
-	OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
-
-		//Bake Plan
-	OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &commandQueue, NULL, NULL ), "clfftBakePlan failed" );
-
-	//get the buffersize
-	size_t buffersize=0;
-	OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );
-
-	//allocate the intermediate buffer
-	cl_mem clMedBuffer=NULL;
-
-	if (buffersize)
-	{
-		cl_int medstatus;
-		clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
-		OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
-	}
-
-	cl_mem * buffersOut = NULL; //NULL for in-place
-
-	//Pre-process kernel string
-	const char* source = STRINGIFY(ZERO_PAD_C2C_KERNEL);
-	
-	cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, &status );
-	OPENCL_V_THROW( status, "clCreateProgramWithSource failed." );
-
-	status = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL);
-	OPENCL_V_THROW( status, "clBuildProgram failed" );
-
-#if defined( _DEBUG )
-	if( status != CL_SUCCESS )
-	{
-		if( status == CL_BUILD_PROGRAM_FAILURE )
-		{
-			size_t buildLogSize = 0;
-			OPENCL_V_THROW( clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ),
-							"clGetProgramBuildInfo failed"  );
-
-			std::vector< char > buildLog( buildLogSize );
-			::memset( &buildLog[ 0 ], 0x0, buildLogSize );
-
-			OPENCL_V_THROW( clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ),
-						"clGetProgramBuildInfo failed"  );
-
-			std::cerr << "\n\t\t\tBUILD LOG\n";
-			std::cerr << "************************************************\n";
-			std::cerr << &buildLog[ 0 ] << std::endl;
-			std::cerr << "************************************************\n";
-		}
-
-		OPENCL_V_THROW( status, "clBuildProgram failed" );
-	}
-#endif
-
-	cl_kernel kernel = clCreateKernel( program, "zeroPad", &status );
-	OPENCL_V_THROW( status, "clCreateKernel failed" );
-
 	//for functional test
-	cl_uint uarg = 0;
-
-	//Buffer to be zero-padded
-	OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&fftbuffer ), "clSetKernelArg failed" );
-
-	//originial data
-	OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&userdatabuffer ), "clSetKernelArg failed" );
-
-	//Launch pre-process kernel
-	size_t gSize = fftLength;
-	size_t lSize = 64;
-	status = clEnqueueNDRangeKernel( commandQueue, kernel, 1,
-											NULL, &gSize, &lSize, 0, NULL, NULL );
-	OPENCL_V_THROW( status, "clEnqueueNDRangeKernel failed" );
-	
-	OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
-	//Now invoke the clfft execute
-	OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &commandQueue, 0, NULL, NULL,
-		&fftbuffer, buffersOut, clMedBuffer ),
+	OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
+		&infftbuffer, &outfftbuffer, clMedBuffer ),
 		"clfftEnqueueTransform failed" );
 		
 	OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-	
+
 	if (profile_count > 1)
 	{
 		Timer tr;
@@ -389,75 +196,59 @@ void runC2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_con
 		//	Loop as many times as the user specifies to average out the timings
 		for( cl_uint i = 0; i < profile_count; ++i )
 		{
-			uarg = 0;
-
-			//Buffer to be zero-padded
-			OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&fftbuffer ), "clSetKernelArg failed" );
-
-			//originial data
-			OPENCL_V_THROW( clSetKernelArg( kernel, uarg++, sizeof( cl_mem ), (void*)&userdatabuffer ), "clSetKernelArg failed" );
-
-			//Launch pre-process kernel
-			status = clEnqueueNDRangeKernel( commandQueue, kernel, 1,
-													NULL, &gSize, &lSize, 0, NULL, NULL );
-			OPENCL_V_THROW( status, "clEnqueueNDRangeKernel failed" );
-	
-			OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
-
-			//Now invoke the clfft execute
-			OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &commandQueue, 0, NULL, NULL,
-				&fftbuffer, buffersOut, clMedBuffer ),
+			OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, CLFFT_FORWARD, 1, &commandQueue, 0, NULL, NULL,
+				&infftbuffer, &outfftbuffer, clMedBuffer ),
 				"clfftEnqueueTransform failed" );
 		
 			OPENCL_V_THROW( clFinish( commandQueue ), "clFinish failed" );
 		}
-		double wtimesample =  tr.Sample();
-	
+		double wtimesample = tr.Sample();
 		double wtime = wtimesample/((double)profile_count);
 	
-		tout << "\nExecution wall time (Separate Pre-process Kernel): " << 1000.0*wtime << " ms" << std::endl;
+		tout << "\nExecution wall time (with clFFT Pre-callback): " << 1000.0*wtime << " ms" << std::endl;
 	}
 
-	//cleanup preprocess kernel opencl objects
-	OPENCL_V_THROW( clReleaseProgram( program ), "Error: In clReleaseProgram\n" );
-	OPENCL_V_THROW( clReleaseKernel( kernel ), "Error: In clReleaseKernel\n" );
-
 	if(clMedBuffer) clReleaseMemObject(clMedBuffer);
-
+	
 	if (profile_count == 1)
 	{
-		std::vector< std::complex< T > > output( fftLength );
+		std::vector< std::complex< T > > output( fftLength/2 );
 
-		OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, fftbuffer, CL_TRUE, 0, size_of_buffers, &output[ 0 ],
+		OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, outfftbuffer, CL_TRUE, 0, out_size_of_buffers, &output[ 0 ],
 			0, NULL, NULL ), "Reading the result buffer failed" );
 
-		//Reference fftw output
-		fftwf_complex *refout;
-
-		refout = get_C2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim, dir);
-
-		/*for( cl_uint i = 0; i < fftLength; i++)
-		{
-			std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
-		}*/
-		if (!compare<fftwf_complex, T>(refout, output, fftLength))
-		{
-			std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****FAIL*****" << std::endl;
-		}
-		else
+		for( cl_uint i = 0; i < fftLength/2; i++)
 		{
-			std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****PASS*****" << std::endl;
+			std::cout << "i " << i << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
 		}
 
-		fftwf_free(refout);
+		////Reference fftw output
+		//fftwf_complex *refout;
+
+		//refout = get_R2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim);
+
+		///*for( cl_uint i = 0; i < fftLength; i++)
+		//{
+		//	std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
+		//}*/
+		//if (!compare<fftwf_complex, T>(refout, output, fftLength))
+		//{
+		//	std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****FAIL*****" << std::endl;
+		//}
+		//else
+		//{
+		//	std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****PASS*****" << std::endl;
+		//}
+
+		//fftwf_free(refout);
 	}
 
 	OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
 	OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
 
 	//cleanup
-	OPENCL_V_THROW( clReleaseMemObject( fftbuffer ), "Error: In clReleaseMemObject\n" );
-	OPENCL_V_THROW( clReleaseMemObject( userdatabuffer ), "Error: In clReleaseMemObject\n" );
+	OPENCL_V_THROW( clReleaseMemObject( infftbuffer ), "Error: In clReleaseMemObject\n" );
+	OPENCL_V_THROW( clReleaseMemObject( outfftbuffer ), "Error: In clReleaseMemObject\n" );
 }
 
 //Compare reference and opencl output 
diff --git a/src/callback-client/client.h b/src/callback-client/client.h
index 502bed9..819735d 100644
--- a/src/callback-client/client.h
+++ b/src/callback-client/client.h
@@ -34,37 +34,31 @@
 #define USERDATA_LENGTH 512
 #define BATCH_LENGTH 1024 // Must be >= USERDATA_LENGTH
 
-#define ZERO_PAD_C2C __attribute__((always_inline)) \n float2 zeroPad (__global void *input, \n \
-								uint inoffset, \n \
-							__global void *userdata) \n \
-				 { \n \
-					 float2 scalar = 0.0f; \n \
-					 uint udoffset; \n \
-					 if ((inoffset % BATCH_LENGTH) < USERDATA_LENGTH) \n \
-					 { \n \
-					    udoffset = ((inoffset/BATCH_LENGTH) * USERDATA_LENGTH) + (inoffset % BATCH_LENGTH); \n \
-						scalar = *((__global float2*)userdata + udoffset); \n \
-					 } \n \
-					 return scalar; \n \
-				} \n
-
-#define ZERO_PAD_C2C_KERNEL __kernel void zeroPad (__global void *input, \n \
+#define ConvertToFloat float convert24To32bit(__global void* in, uint inoffset, __global void* userdata)\n \
+				{ \n \
+				__global char* inData =  (__global char*)in; \n \
+				float val = inData[3*inoffset+2] << 24 | inData[3*inoffset+1] << 16 | inData[3*inoffset] << 8 ; \n \
+				return val / (float)(INT_MAX - 256);  \n \
+				}
+
+#define ConvertToFloat_KERNEL __kernel void convert24To32bit (__global void *input, \n \
 								__global void *userdata) \n \
 				 { \n \
 					uint inoffset = get_global_id(0); \n \
-					 float2 scalar = 0.0f; \n \
-					 uint udoffset; \n \
-					 if ((inoffset % BATCH_LENGTH) < USERDATA_LENGTH) \n \
-					 { \n \
-					   udoffset = ((inoffset/BATCH_LENGTH) * USERDATA_LENGTH) + (inoffset % BATCH_LENGTH); \n \
-					   scalar = *((__global float2*)userdata + udoffset); \n \
-					 } \n \
-					 *((__global float2*)input + inoffset) = scalar; \n \
+					__global char* inData =  (__global char*)in; \n \
+					float val = inData[3*inoffset+2] << 24 | inData[3*inoffset+1] << 16 | inData[3*inoffset] << 8 ; \n \
+					*((__global float*)input + inoffset) = val / (float)(INT_MAX - 256);  \n \
 				} \n
 
+
+template < typename T >
+void R2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 
+				   clfftDim dim, clfftPrecision precision,  cl_uint profile_count);
+
 template < typename T >
-void C2C_transform(std::auto_ptr< clfftSetupData > setupData, size_t* inlengths, size_t batchSize, 
-				   clfftDim dim, clfftPrecision precision, cl_uint profile_count);
+void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue,
+						size_t* inlengths, clfftDim dim, clfftPrecision precision,
+						size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
 
 fftwf_complex* get_C2C_fftwf_output(size_t* lengths, size_t fftBatchSize, int batch_size, clfftLayout in_layout,
 								clfftDim dim, clfftDirection dir);
@@ -73,12 +67,6 @@ template < typename T1, typename T2>
 bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
              size_t length, const float epsilon = 1e-6f);
 
-template < typename T >
-void runC2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue, size_t* inlengths, clfftDim dim, clfftPrecision precision, size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
-
-template < typename T >
-void runC2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_context context, cl_command_queue commandQueue, cl_device_id device_id, size_t* inlengths, clfftDim dim, clfftPrecision precision, size_t batchSize, size_t vectorLength, size_t fftLength, cl_uint profile_count);
-
 #ifdef WIN32
 
 struct Timer
diff --git a/src/include/clFFT.h b/src/include/clFFT.h
index 5de09b1..9fb9186 100644
--- a/src/include/clFFT.h
+++ b/src/include/clFFT.h
@@ -561,8 +561,10 @@ extern "C" {
 	 *  @param[localMemSize] Optional - Local memory size if needed by callback. Pass 0 if local memory not needed by callback
 	 *  @param[callbackType] Type of callback - Pre-Callback or Post-Callback
 	 *  @param[userdata] cl_mem object passed as paarameter to callback function
+	 *  @param[numUserdataBuffers] Number of userdata buffers
 	 */
-	CLFFTAPI clfftStatus clFFTSetPlanCallback(clfftPlanHandle plHandle, const char* funcName, const char* funcString, const char* userStructString, int localMemSize, clFFTCallbackType callbackType, void *userdata);
+	CLFFTAPI clfftStatus clfftSetPlanCallback(clfftPlanHandle plHandle, const char* funcName, const char* funcString, const char* userStructString, 
+										int localMemSize, clFFTCallbackType callbackType, cl_mem *userdata, int numUserdataBuffers);
 
 
 	/*! @brief Enqueue an FFT transform operation, and return immediately (non-blocking)
diff --git a/src/library/accessors.cpp b/src/library/accessors.cpp
index 32de44d..c67373d 100644
--- a/src/library/accessors.cpp
+++ b/src/library/accessors.cpp
@@ -767,22 +767,30 @@ clfftStatus clfftLocalMemSize( const clfftPlanHandle plHandle, cl_ulong* local_m
 	return CLFFT_SUCCESS;
 }
 
-clfftStatus clFFTSetPlanCallback(clfftPlanHandle plHandle, const char* funcName, 
+clfftStatus clfftSetPlanCallback(clfftPlanHandle plHandle, const char* funcName, 
 								 const char* funcString, const char* userStructString, 
 								 int localMemSize, clFFTCallbackType callbackType, 
-								 void *userdata)
+								 cl_mem *userdata, int numUserdataBuffers)
 {
 	FFTRepo& fftRepo	= FFTRepo::getInstance( );
 	FFTPlan* fftPlan	= NULL;
 	lockRAII* planLock	= NULL;
 
 	OPENCL_V( fftRepo.getPlan( plHandle, fftPlan, planLock ), _T( "fftRepo.getPlan failed" ) );
-	scopedLock sLock( *planLock, _T( "clFFTSetPlanCallback" ) );
+	scopedLock sLock( *planLock, _T( "clfftSetPlanCallback" ) );
 
-	if (callbackType == PRECALLBACK)
+	switch (callbackType)
 	{
-		if (funcName != NULL && funcString != NULL)
+	case PRECALLBACK:
 		{
+			ARG_CHECK(funcName != NULL);
+			ARG_CHECK(funcString != NULL);
+			ARG_CHECK(numUserdataBuffers >= 0);
+
+			//	We do not currently support multiple user data buffers
+			if( numUserdataBuffers > 1 )
+				return CLFFT_NOTIMPLEMENTED;
+
 			fftPlan->hasPreCallback = true;
 
 			fftPlan->preCallback.funcname = funcName;
@@ -790,8 +798,19 @@ clfftStatus clFFTSetPlanCallback(clfftPlanHandle plHandle, const char* funcName,
 			fftPlan->preCallback.userdatastruct = userStructString;
 			fftPlan->preCallback.localMemSize = (localMemSize > 0) ? localMemSize : 0;
 
-			fftPlan->precallUserData = userdata;
-		}		
+			cl_mem userdataBuf = NULL;
+			
+			if (userdata)
+				userdataBuf = userdata[0];
+
+			fftPlan->precallUserData = userdataBuf;
+		}
+
+		break;
+	case POSTCALLBACK:
+		return CLFFT_NOTIMPLEMENTED;
+	default:
+		ARG_CHECK (false);
 	}
 
 	return	CLFFT_SUCCESS;
diff --git a/src/library/mainpage.h b/src/library/mainpage.h
index 268da3b..6f3f31d 100644
--- a/src/library/mainpage.h
+++ b/src/library/mainpage.h
@@ -635,7 +635,7 @@ cl_mem userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PT
 //* Step 3 : Register the callback
 //**************************************************************************
 
-status = clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, NULL, 0, PRECALLBACK, userdata);
+status = clfftSetPlanCallback(plan_handle, "mulval", precallbackstr, NULL, 0, PRECALLBACK, &userdata, 1);
 
 
 //**************************************************************************
diff --git a/src/library/plan.h b/src/library/plan.h
index 747137c..53be126 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -92,7 +92,7 @@ enum BlockComputeType
 #define CLFFT_MAX_INTERNAL_DIM 16
 
 /*! @brief Data structure to store the callback function string and other metadata passed by client 
-*  @details Client sets the callback function and other required parameters through clFFTSetPlanCallback() 
+*  @details Client sets the callback function and other required parameters through clfftSetPlanCallback() 
 *  in order to register the callback function. The library populates these values into this data structure
 */ 
 typedef struct clfftCallbackParam_
@@ -448,7 +448,7 @@ public:
 	bool hasPreCallback;
 
 	clfftCallbackParam preCallback;
-	void *precallUserData;
+	cl_mem precallUserData;
 
     clfftPlanHandle plHandle;
 
diff --git a/src/tests/cl_transform.h b/src/tests/cl_transform.h
index 9c6d2b2..4720462 100644
--- a/src/tests/cl_transform.h
+++ b/src/tests/cl_transform.h
@@ -665,7 +665,7 @@ public:
 		OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
 
 		//Register the callback
-		OPENCL_V_THROW (clFFTSetPlanCallback(*plan_handle, "mulval", precallbackstr, NULL, localMemSize, PRECALLBACK, userdataBuff), "clFFTSetPlanCallback failed");
+		OPENCL_V_THROW (clfftSetPlanCallback(*plan_handle, "mulval", precallbackstr, NULL, localMemSize, PRECALLBACK, &userdataBuff, 1), "clFFTSetPlanCallback failed");
 	}
 
 		/*****************************************************/
@@ -704,7 +704,7 @@ public:
 		OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
 
 		//Register the callback
-		OPENCL_V_THROW (clFFTSetPlanCallback(*plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, userdataBuff), "clFFTSetPlanCallback failed");
+		OPENCL_V_THROW (clfftSetPlanCallback(*plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, &userdataBuff, 1), "clFFTSetPlanCallback failed");
 	}
 
 	/*****************************************************/

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list