[clfft] 92/107: fixing dev issues
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:41 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 862cb2c7194525ed91c38def1a9953e0e1425c75
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Mon Jun 15 20:30:06 2015 -0500
fixing dev issues
---
src/library/generator.transpose.gcn.cpp | 32 ++++++++++++++++++++++-----
src/library/plan.cpp | 39 ++++++++++++++++++++++-----------
2 files changed, 52 insertions(+), 19 deletions(-)
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 01afca1..c5f7ebf 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -451,8 +451,10 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
break;
case CLFFT_HERMITIAN_INTERLEAVED:
case CLFFT_HERMITIAN_PLANAR:
- case CLFFT_REAL:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+ case CLFFT_REAL:
+ clKernWrite( transKernel, 3 ) << "local " << dtPlanar << " lds[ " << ldsSize.x << " ][ " << ldsSize.y << " ];" << std::endl << std::endl;
+ break;
}
@@ -474,12 +476,23 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
break;
case CLFFT_HERMITIAN_INTERLEAVED:
case CLFFT_HERMITIAN_PLANAR:
- case CLFFT_REAL:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+ case CLFFT_REAL:
+ clKernWrite( transKernel, 3 ) << "global " << dtInput << "* tileIn = " << pmRealIn << " + iOffset;" << std::endl;
+ break;
+
}
// This is the loop reading through the Tile
- clKernWrite( transKernel, 3 ) << dtComplex << " tmp;" << std::endl;
+ if( params.fft_inputLayout == CLFFT_REAL )
+ {
+ clKernWrite( transKernel, 3 ) << dtPlanar << " tmp;" << std::endl;
+ }
+ else
+ {
+ clKernWrite( transKernel, 3 ) << dtComplex << " tmp;" << std::endl;
+ }
+
clKernWrite( transKernel, 3 ) << "rowSizeinUnits = " << params.fft_inStride[ 1 ] << ";" << std::endl;
clKernWrite( transKernel, 3 ) << std::endl << std::endl;
@@ -626,8 +639,11 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
break;
case CLFFT_HERMITIAN_INTERLEAVED:
case CLFFT_HERMITIAN_PLANAR:
- case CLFFT_REAL:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+ case CLFFT_REAL:
+ clKernWrite( transKernel, 9 ) << "tmp = tileIn[ gInd ];" << std::endl;
+ break;
+
}
if(branchingInAny)
@@ -668,8 +684,10 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
break;
case CLFFT_HERMITIAN_INTERLEAVED:
case CLFFT_HERMITIAN_PLANAR:
- case CLFFT_REAL:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+ case CLFFT_REAL:
+ clKernWrite( transKernel, 3 ) << "global " << dtOutput << "* tileOut = " << pmRealOut << " + oOffset;" << std::endl << std::endl;
+ break;
}
// Write the transposed values from LDS into global memory
@@ -791,8 +809,10 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
break;
case CLFFT_HERMITIAN_INTERLEAVED:
case CLFFT_HERMITIAN_PLANAR:
- case CLFFT_REAL:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+ case CLFFT_REAL:
+ clKernWrite( transKernel, 9 ) << "tileOut[ gInd ] = tmp;" << std::endl;
+ break;
}
if(branchingInAny)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 1355a6c..5234d72 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -891,6 +891,8 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
lockRAII* trans2Lock = NULL;
OPENCL_V( fftRepo.getPlan( fftPlan->planTY, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
+ trans2Plan->transflag = true;
+
size_t transLengths[2];
transLengths[0] = 1 + clLengths[1]/2;
transLengths[1] = clLengths[0];
@@ -909,11 +911,12 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans2Plan->inStride[0] = 1;
trans2Plan->inStride[1] = 1 + clLengths[1]/2;
trans2Plan->outStride[0] = 1;
- trans2Plan->outStride[1] = clLengths[0] + padding;
+ trans2Plan->outStride[1] = clLengths[0];
trans2Plan->iDist = clLengths[0] * trans2Plan->inStride[1];
- trans2Plan->oDist = (1 + clLengths[1]/2) * trans2Plan->outStride[1];
+ trans2Plan->oDist = fftPlan->oDist;
trans2Plan->gen = Transpose_GCN;
trans2Plan->transflag = true;
+ trans2Plan->transOutHorizontal = true;
OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d trans2 plan failed" ) );
@@ -964,6 +967,13 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
lockRAII* trans3Lock = NULL;
OPENCL_V( fftRepo.getPlan( fftPlan->planTZ, trans3Plan, trans3Lock ), _T( "fftRepo.getPlan failed" ) );
+ trans3Plan->transflag = true;
+
+ transLengths[0] = 1 + clLengths[0]/2;
+ transLengths[1] = clLengths[1];
+ OPENCL_V(clfftSetPlanLength( fftPlan->planTZ, CLFFT_2D, transLengths ),
+ _T( "clfftSetPlanLength for planTZ transpose failed" ) );
+
trans3Plan->placeness = CLFFT_OUTOFPLACE;
trans3Plan->precision = fftPlan->precision;
trans3Plan->tmpBufSize = 0;
@@ -1707,6 +1717,20 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
break;
case CLFFT_2D:
{
+
+ if (fftPlan->transflag) //Transpose for 2D
+ {
+ clfftStatus err;
+ if(fftPlan->gen == Transpose_GCN)
+ fftPlan->action = new FFTGeneratedTransposeGCNAction(plHandle, fftPlan, *commQueueFFT, err);
+ else
+ fftPlan->action = new FFTGeneratedTransposeVLIWAction(plHandle, fftPlan, *commQueueFFT, err);
+ OPENCL_V( err, "FFTGeneratedTransposeVLIWAction failed");
+
+ fftPlan->baked = true;
+ return CLFFT_SUCCESS;
+ }
+
size_t length0 = fftPlan->length[0];
size_t length1 = fftPlan->length[1];
@@ -1740,18 +1764,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
while (1 && (fftPlan->inputLayout != CLFFT_REAL) && (fftPlan->outputLayout != CLFFT_REAL))
{
//break;
- if (fftPlan->transflag) //Transpose for 2D
- {
- clfftStatus err;
- if(fftPlan->gen == Transpose_GCN)
- fftPlan->action = new FFTGeneratedTransposeGCNAction(plHandle, fftPlan, *commQueueFFT, err);
- else
- fftPlan->action = new FFTGeneratedTransposeVLIWAction(plHandle, fftPlan, *commQueueFFT, err);
- OPENCL_V( err, "FFTGeneratedTransposeVLIWAction failed");
- fftPlan->baked = true;
- return CLFFT_SUCCESS;
- }
// TODO : Check for a better way to do this.
bool isnvidia = false;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list