[clfft] 78/128: merging from pradeep fork to pull in pre-callback feature

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:41 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit c55b715cd92605d2ed07366ea0d586f4e5c0773e
Merge: 1e661fb 9ac208d
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Wed Sep 16 12:19:50 2015 -0500

    merging from pradeep fork to pull in pre-callback feature

 src/CMakeLists.txt                            |   15 +
 src/callback-client/CMakeLists.txt            |   62 +
 src/callback-client/callback-client.cpp       |  573 ++
 src/callback-client/client.h                  |  119 +
 src/callback-client/openCL.misc.cpp           |  533 ++
 src/callback-client/openCL.misc.h             |  151 +
 src/callback-client/stdafx.cpp                |   25 +
 src/include/clFFT.h                           |   25 +
 src/library/accessors.cpp                     |   47 +
 src/library/action.cpp                        |   13 +
 src/library/generator.copy.cpp                |  114 +-
 src/library/generator.stockham.cpp            |  850 ++-
 src/library/generator.transpose.gcn.cpp       |  130 +-
 src/library/mainpage.h                        |  142 +
 src/library/plan.cpp                          |  117 +
 src/library/plan.h                            |   23 +-
 src/tests/CMakeLists.txt                      |    4 +
 src/tests/accuracy_test_common.h              |  434 ++
 src/tests/accuracy_test_mixed_precallback.cpp |  306 +
 src/tests/accuracy_test_pow2_precallback.cpp  | 7526 +++++++++++++++++++++++++
 src/tests/accuracy_test_pow3_precallback.cpp  | 7357 ++++++++++++++++++++++++
 src/tests/accuracy_test_pow5_precallback.cpp  | 7357 ++++++++++++++++++++++++
 src/tests/buffer.h                            |  125 +
 src/tests/cl_transform.h                      |   92 +-
 src/tests/fftw_transform.h                    |   34 +
 src/tests/test_constants.h                    |   82 +
 26 files changed, 26058 insertions(+), 198 deletions(-)

diff --cc src/library/generator.transpose.gcn.cpp
index 0be927b,638ae3b..36c3d66
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@@ -383,9 -395,20 +395,17 @@@ static clfftStatus genTransposeKernel( 
      clKernWrite( transKernel, 3 ) << "size_t y;" << std::endl;
      clKernWrite( transKernel, 0 ) << "} Tile;" << std::endl << std::endl;
  
 +    if( params.fft_placeness == CLFFT_INPLACE )
-         return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
++		return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
++
+ 	//If pre-callback is set for the plan
+ 	if (params.fft_hasPreCallback)
+ 	{
+ 		//Insert callback function code at the beginning 
+ 		clKernWrite( transKernel, 0 ) << params.fft_preCallback.funcstring << std::endl;
+ 		clKernWrite( transKernel, 0 ) << std::endl;
+ 	}
  
 -    // This detects whether the input matrix is square
 -    bool notSquare = ( params.fft_N[ 0 ] == params.fft_N[ 1 ] ) ? false : true;
 -
 -    if( notSquare && (params.fft_placeness == CLFFT_INPLACE) )
 -        return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
 -
  
  	for(size_t bothDir=0; bothDir<2; bothDir++)
  	{
@@@ -976,23 -1045,23 +1066,39 @@@ static clfftStatus CalculateBlockSize(c
  	blockSize.x = lwSize.x * reShapeFactor;
  	blockSize.y = lwSize.y / reShapeFactor * loopCount;
  
 +	return CLFFT_SUCCESS;
 +}
 +
 +
 +
 +
 +//	OpenCL does not take unicode strings as input, so this routine returns only ASCII strings
 +//	Feed this generator the FFTPlan, and it returns the generated program as a string
 +clfftStatus FFTGeneratedTransposeGCNAction::generateKernel ( FFTRepo& fftRepo, const cl_command_queue commQueueFFT )
 +{
 +	
 +	size_t loopCount = 0;
 +	tile blockSize = {0, 0};
 +	OPENCL_V( CalculateBlockSize(this->signature.fft_precision, loopCount, blockSize), _T("CalculateBlockSize() failed!") );
 +
+ 	//Requested local memory size by callback must not exceed the device LDS limits after factoring the LDS size required by main FFT kernel
+ 	if (this->signature.fft_hasPreCallback && this->signature.fft_preCallback.localMemSize > 0)
+ 	{
+ 		bool validLDSSize = false;
+ 		size_t length = blockSize.x * blockSize.y;
+ 		
+ 		validLDSSize = ((length * this->plan->ElementSize()) + this->signature.fft_preCallback.localMemSize) < this->plan->envelope.limit_LocalMemSize;
+ 		
+ 		if(!validLDSSize)
+ 		{
+ 			fprintf(stderr, "Requested local memory size not available\n");
+ 			return CLFFT_INVALID_ARG_VALUE;
+ 		}
+ 	}
+ 
++
      std::string programCode;
 -    OPENCL_V( genTransposeKernel( this->signature, programCode, lwSize, reShapeFactor, loopCount, blockSize, outRowPadding ), _T( "GenerateTransposeKernel() failed!" ) );
 +    OPENCL_V( genTransposeKernel( this->signature, programCode, lwSize, reShapeFactor, loopCount, blockSize ), _T( "GenerateTransposeKernel() failed!" ) );
  
      cl_int status = CL_SUCCESS;
      cl_device_id Device = NULL;
diff --cc src/library/plan.cpp
index e0b7181,67f9b3b..33241fb
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@@ -661,9 -648,17 +661,17 @@@ clfftStatus	clfftBakePlan( clfftPlanHan
  					trans1Plan->outStride[1]  = clLengths[1] + padding;
  					trans1Plan->iDist         = fftPlan->iDist;
  					trans1Plan->oDist         = clLengths[0] * trans1Plan->outStride[1];
 -					trans1Plan->gen           = Transpose_GCN;
 +					trans1Plan->gen           = transGen;
  					trans1Plan->transflag     = true;
  
+ 					//Set callback data if set on top level plan
+ 					if (fftPlan->hasPreCallback)
+ 					{
+ 						trans1Plan->hasPreCallback = true;
+ 						trans1Plan->preCallback = fftPlan->preCallback;
+ 						trans1Plan->precallUserData = fftPlan->precallUserData;
+ 					}
+ 
  					OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
  						_T( "BakePlan large1d trans1 plan failed" ) );
  

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list