[clfft] 78/128: merging from pradeep fork to pull in pre-callback feature
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:41 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit c55b715cd92605d2ed07366ea0d586f4e5c0773e
Merge: 1e661fb 9ac208d
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Wed Sep 16 12:19:50 2015 -0500
merging from pradeep fork to pull in pre-callback feature
src/CMakeLists.txt | 15 +
src/callback-client/CMakeLists.txt | 62 +
src/callback-client/callback-client.cpp | 573 ++
src/callback-client/client.h | 119 +
src/callback-client/openCL.misc.cpp | 533 ++
src/callback-client/openCL.misc.h | 151 +
src/callback-client/stdafx.cpp | 25 +
src/include/clFFT.h | 25 +
src/library/accessors.cpp | 47 +
src/library/action.cpp | 13 +
src/library/generator.copy.cpp | 114 +-
src/library/generator.stockham.cpp | 850 ++-
src/library/generator.transpose.gcn.cpp | 130 +-
src/library/mainpage.h | 142 +
src/library/plan.cpp | 117 +
src/library/plan.h | 23 +-
src/tests/CMakeLists.txt | 4 +
src/tests/accuracy_test_common.h | 434 ++
src/tests/accuracy_test_mixed_precallback.cpp | 306 +
src/tests/accuracy_test_pow2_precallback.cpp | 7526 +++++++++++++++++++++++++
src/tests/accuracy_test_pow3_precallback.cpp | 7357 ++++++++++++++++++++++++
src/tests/accuracy_test_pow5_precallback.cpp | 7357 ++++++++++++++++++++++++
src/tests/buffer.h | 125 +
src/tests/cl_transform.h | 92 +-
src/tests/fftw_transform.h | 34 +
src/tests/test_constants.h | 82 +
26 files changed, 26058 insertions(+), 198 deletions(-)
diff --cc src/library/generator.transpose.gcn.cpp
index 0be927b,638ae3b..36c3d66
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@@ -383,9 -395,20 +395,17 @@@ static clfftStatus genTransposeKernel(
clKernWrite( transKernel, 3 ) << "size_t y;" << std::endl;
clKernWrite( transKernel, 0 ) << "} Tile;" << std::endl << std::endl;
+ if( params.fft_placeness == CLFFT_INPLACE )
- return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
++ return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
++
+ //If pre-callback is set for the plan
+ if (params.fft_hasPreCallback)
+ {
+ //Insert callback function code at the beginning
+ clKernWrite( transKernel, 0 ) << params.fft_preCallback.funcstring << std::endl;
+ clKernWrite( transKernel, 0 ) << std::endl;
+ }
- // This detects whether the input matrix is square
- bool notSquare = ( params.fft_N[ 0 ] == params.fft_N[ 1 ] ) ? false : true;
-
- if( notSquare && (params.fft_placeness == CLFFT_INPLACE) )
- return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
-
for(size_t bothDir=0; bothDir<2; bothDir++)
{
@@@ -976,23 -1045,23 +1066,39 @@@ static clfftStatus CalculateBlockSize(c
blockSize.x = lwSize.x * reShapeFactor;
blockSize.y = lwSize.y / reShapeFactor * loopCount;
+ return CLFFT_SUCCESS;
+}
+
+
+
+
+// OpenCL does not take unicode strings as input, so this routine returns only ASCII strings
+// Feed this generator the FFTPlan, and it returns the generated program as a string
+clfftStatus FFTGeneratedTransposeGCNAction::generateKernel ( FFTRepo& fftRepo, const cl_command_queue commQueueFFT )
+{
+
+ size_t loopCount = 0;
+ tile blockSize = {0, 0};
+ OPENCL_V( CalculateBlockSize(this->signature.fft_precision, loopCount, blockSize), _T("CalculateBlockSize() failed!") );
+
+ //Requested local memory size by callback must not exceed the device LDS limits after factoring the LDS size required by main FFT kernel
+ if (this->signature.fft_hasPreCallback && this->signature.fft_preCallback.localMemSize > 0)
+ {
+ bool validLDSSize = false;
+ size_t length = blockSize.x * blockSize.y;
+
+ validLDSSize = ((length * this->plan->ElementSize()) + this->signature.fft_preCallback.localMemSize) < this->plan->envelope.limit_LocalMemSize;
+
+ if(!validLDSSize)
+ {
+ fprintf(stderr, "Requested local memory size not available\n");
+ return CLFFT_INVALID_ARG_VALUE;
+ }
+ }
+
++
std::string programCode;
- OPENCL_V( genTransposeKernel( this->signature, programCode, lwSize, reShapeFactor, loopCount, blockSize, outRowPadding ), _T( "GenerateTransposeKernel() failed!" ) );
+ OPENCL_V( genTransposeKernel( this->signature, programCode, lwSize, reShapeFactor, loopCount, blockSize ), _T( "GenerateTransposeKernel() failed!" ) );
cl_int status = CL_SUCCESS;
cl_device_id Device = NULL;
diff --cc src/library/plan.cpp
index e0b7181,67f9b3b..33241fb
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@@ -661,9 -648,17 +661,17 @@@ clfftStatus clfftBakePlan( clfftPlanHan
trans1Plan->outStride[1] = clLengths[1] + padding;
trans1Plan->iDist = fftPlan->iDist;
trans1Plan->oDist = clLengths[0] * trans1Plan->outStride[1];
- trans1Plan->gen = Transpose_GCN;
+ trans1Plan->gen = transGen;
trans1Plan->transflag = true;
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ trans1Plan->hasPreCallback = true;
+ trans1Plan->preCallback = fftPlan->preCallback;
+ trans1Plan->precallUserData = fftPlan->precallUserData;
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d trans1 plan failed" ) );
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list