[clfft] 15/128: Precallback - C2R 2D and 3D SP

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:33 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 0c5763820ba85b6f1a2ee0725a985216065e69e7
Author: Pradeep <pradeep.rao at amd.com>
Date:   Fri Aug 14 19:23:03 2015 +0530

    Precallback - C2R 2D and 3D SP
---
 src/client-callback/callback-client.cpp | 18 ++++++++++--------
 src/library/generator.transpose.gcn.cpp | 12 ++++++------
 src/library/plan.cpp                    | 31 +++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
index 973237e..2b1f810 100644
--- a/src/client-callback/callback-client.cpp
+++ b/src/client-callback/callback-client.cpp
@@ -206,7 +206,7 @@ bool compare(T1 *refData, std::valarray< T2 > real,
 				{
 					size_t p0 = p1 + i * o_strides[0];
 
-					diff = refData[p0] - (real[p0] * lengths[0]);
+					diff = refData[p0] - (real[p0] * lengths[0] * lengths[1] * lengths[2]);
 					error += (float)(diff * diff);
 					ref += refData[p0] * refData[p0];
 				}
@@ -326,19 +326,22 @@ fftw_complex* get_fftw_output(size_t* lengths, const size_t *inStrides, const si
 // Compute C2R reference output using fftw for float type
 float* get_fftwf_output_c2r(size_t* lengths, size_t *strides, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
 								size_t fftBatchSize, size_t outfftBatchSize, size_t fftVectorSizePadded, clfftLayout in_layout,
-								size_t outfftVectorSizePadded, size_t outfftVectorSize, clfftDim dim, clfftDirection dir)
+								size_t outfftVectorSizePadded, size_t outfftVectorSize, clfftDim dim, clfftDirection dir, clfftResultLocation place)
 {
 	//In FFTW last dimension has the fastest changing index
 	int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
-
+	int inembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0]/2 + 1)};
+	int lsd = (place == CLFFT_INPLACE) ? (int)(lengths[0]/2 + 1)*2 : (int)(lengths[0]);
+	int outembed[3] = {(int)lengths[2], (int)lengths[1], lsd};
+	
 	fftwf_plan refPlan;
 
 	fftwf_complex *refin = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*fftBatchSize);
 	float *refout = (float*) malloc(sizeof(float)*outfftBatchSize);
 
 	refPlan = fftwf_plan_many_dft_c2r(dim, &fftwLengths[3 - dim], (int)batch_size, 
-									refin, &fftwLengths[3 - dim], (int)inStrides[0], (int)fftVectorSizePadded, 
-									refout, &fftwLengths[3 - dim], (int)outStrides[0], (int)outfftVectorSizePadded,
+									refin, &inembed[3 - dim], (int)inStrides[0], (int)fftVectorSizePadded, 
+									refout, &outembed[3 - dim], (int)outStrides[0], (int)outfftVectorSizePadded,
 									FFTW_ESTIMATE);
 
 	// set zero
@@ -1027,7 +1030,7 @@ void compareWithReference(clfftLayout in_layout, clfftLayout out_layout, size_t
 					float *refout;
 
 					refout = get_fftwf_output_c2r(lengths, strides,  inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
-												in_layout, outfftVectorSizePadded, outfftVectorSize, dim, dir);
+												in_layout, outfftVectorSizePadded, outfftVectorSize, dim, dir, place);
 
 					if (!compare<float, T>(refout, real, batch_size, o_strides, lengths))
 						checkflag = true;
@@ -1060,8 +1063,7 @@ void compareWithReference(clfftLayout in_layout, clfftLayout out_layout, size_t
 								{
 									checkflag = true;
 									break;
-								}
-
+								}							
 							}
 						}
 					}
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index aebdfdc..7740592 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -557,6 +557,11 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 		size_t wIndexXEnd = params.transOutHorizontal ? params.fft_N[1] % blockSize.y : params.fft_N[0] % blockSize.x;
 		size_t wIndexYEnd = params.transOutHorizontal ? params.fft_N[0] % blockSize.x : params.fft_N[1] % blockSize.y;
 
+		//If precallback is set
+		if (params.fft_hasPreCallback)
+		{
+			clKernWrite( transKernel, 3 ) << dtComplex << " retCallback;" << std::endl;
+		}
 
 		for(size_t i = 0; i<branchBlocks; i++)
 		{
@@ -607,12 +612,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 					clKernWrite( transKernel, 3 ) << "{" << std::endl;
 				}
 
-			//If precallback is set
-			if (params.fft_hasPreCallback)
-			{
-				clKernWrite( transKernel, 6 ) << dtComplex << " retCallback;" << std::endl;
-			}
-
+			
 			clKernWrite( transKernel, 6 ) << "for( uint t=0; t < wgUnroll; t++ )" << std::endl;
 			clKernWrite( transKernel, 6 ) << "{" << std::endl;
 
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 7ad7845..26c7af6 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2466,6 +2466,14 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						trans1Plan->oDist *= fftPlan->length[index];
 					}
 
+					//Set callback data if set on top level plan
+					if (fftPlan->hasPreCallback)
+					{
+						trans1Plan->hasPreCallback = true;
+						trans1Plan->preCallback = fftPlan->preCallback;
+						trans1Plan->precallUserData = fftPlan->precallUserData;
+					}
+
 					OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
 						_T( "BakePlan for planTY failed" ) );
 
@@ -2708,6 +2716,14 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 					colPlan->batchsize = fftPlan->batchsize;
 
+					//Set callback data if set on top level plan
+					if (fftPlan->hasPreCallback)
+					{
+						colPlan->hasPreCallback = true;
+						colPlan->preCallback = fftPlan->preCallback;
+						colPlan->precallUserData = fftPlan->precallUserData;
+					}
+
 					OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
 
 					// create row plan
@@ -3313,6 +3329,14 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						trans1Plan->oDist *= fftPlan->length[index];
 					}
 
+					//Set callback data if set on top level plan
+					if (fftPlan->hasPreCallback)
+					{
+						trans1Plan->hasPreCallback = true;
+						trans1Plan->preCallback = fftPlan->preCallback;
+						trans1Plan->precallUserData = fftPlan->precallUserData;
+					}
+
 					OPENCL_V(clfftBakePlan(fftPlan->planTZ, numQueues, commQueueFFT, NULL, NULL ),
 						_T( "BakePlan for planTZ failed" ) );
 
@@ -3555,6 +3579,13 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 					colPlan->batchsize = fftPlan->batchsize;
 
+					//Set callback data if set on top level plan
+					if (fftPlan->hasPreCallback)
+					{
+						colPlan->hasPreCallback = true;
+						colPlan->preCallback = fftPlan->preCallback;
+						colPlan->precallUserData = fftPlan->precallUserData;
+					}
 				
 					OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list