[clfft] 72/107: 2D logic update for real

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:38 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit b34393b694e491cab43f539cb268e2607e172ae6
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Fri Apr 10 18:38:17 2015 -0500

    2D logic update for real
---
 src/library/plan.cpp | 272 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 225 insertions(+), 47 deletions(-)

diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 31aca3e..4c61355 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1803,7 +1803,11 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				if (fftPlan->tmpBufSize==0)
 				{
 					fftPlan->tmpBufSize = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
-					if(fftPlan->length.size() > 2) fftPlan->tmpBufSize *= fftPlan->length[2];
+
+					for (size_t index=2; index < fftPlan->length.size(); index++)
+					{
+						fftPlan->tmpBufSize *= fftPlan->length[index];
+					}
 				}
 
 				// create row plan
@@ -1833,7 +1837,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				rowPlan->tmpBufSize    = fftPlan->tmpBufSize;
 
 				rowPlan->gen			= fftPlan->gen;
-				rowPlan->envelope			= fftPlan->envelope;
+				rowPlan->envelope		= fftPlan->envelope;
 
 				rowPlan->batchsize    = fftPlan->batchsize;
 
@@ -1842,74 +1846,248 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				rowPlan->oDist         = fftPlan->oDist;
 
 				//this 2d is decomposed from 3d
-				if (fftPlan->length.size()>2)
+				for (size_t index=2; index < fftPlan->length.size(); index++)
 				{
-					rowPlan->length.push_back(fftPlan->length[2]);
-					rowPlan->inStride.push_back(fftPlan->inStride[2]);
-					rowPlan->outStride.push_back(fftPlan->outStride[2]);
+					rowPlan->length.push_back(fftPlan->length[index]);
+					rowPlan->inStride.push_back(fftPlan->inStride[index]);
+					rowPlan->outStride.push_back(fftPlan->outStride[index]);
 				}
 
 
 				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
 
-				// create col plan
-				// complex to complex
+				if( (rowPlan->inStride[0] == 1) && (rowPlan->outStride[0] == 1) &&
+					(rowPlan->inStride[1] == length0) && (rowPlan->outStride[1] == Nt) )
+				{
+					// create first transpose plan
+					
+					//Transpose 
+					// output --> tmp
+					size_t transLengths[2] = { Nt, length1 };
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTX, fftPlan->context, CLFFT_2D, transLengths ),
+						_T( "CreateDefaultPlan Large1d transpose failed" ) );
 
-				OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
-					_T( "CreateDefaultPlan for planY failed" ) );
+					FFTPlan* trans1Plan	= NULL;
+					lockRAII* trans1Lock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planTX, trans1Plan, trans1Lock ), _T( "fftRepo.getPlan failed" ) );
 
-				FFTPlan* colPlan	= NULL;
-				lockRAII* colLock	= NULL;
-				OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+					switch(fftPlan->outputLayout)
+					{
+					case CLFFT_HERMITIAN_INTERLEAVED:
+						{
+							trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							trans1Plan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					case CLFFT_HERMITIAN_PLANAR:
+						{
+							trans1Plan->outputLayout = CLFFT_COMPLEX_PLANAR;
+							trans1Plan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						}
+						break;
+					default: assert(false);
+					}
 
-				switch(fftPlan->outputLayout)
-				{
-				case CLFFT_HERMITIAN_INTERLEAVED:
+					trans1Plan->placeness     = CLFFT_OUTOFPLACE;
+					trans1Plan->precision     = fftPlan->precision;
+					trans1Plan->tmpBufSize    = fftPlan->tmpBufSize;
+					trans1Plan->batchsize     = fftPlan->batchsize;
+					trans1Plan->envelope	  = fftPlan->envelope;
+
+					trans1Plan->inStride[0]   = 1;
+					trans1Plan->inStride[1]   = Nt;
+					trans1Plan->outStride[0]  = 1;
+					trans1Plan->outStride[1]  = length1;
+					trans1Plan->iDist         = fftPlan->iDist;
+					trans1Plan->oDist		  = Nt*length1;
+
+					trans1Plan->gen           = Transpose_GCN;
+					trans1Plan->transflag     = true;
+
+					for (size_t index=2; index < fftPlan->length.size(); index++)
 					{
-						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
-						colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						trans1Plan->length.push_back(fftPlan->length[index]);
+						trans1Plan->inStride.push_back(fftPlan->inStride[index]);
+						trans1Plan->outStride.push_back(trans1Plan->oDist);
+						trans1Plan->oDist *= fftPlan->length[index];
 					}
-					break;
-				case CLFFT_HERMITIAN_PLANAR:
+
+					OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
+						_T( "BakePlan for planTX failed" ) );
+
+
+					// Create column plan as a row plan
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
+						_T( "CreateDefaultPlan for planY failed" ) );
+
+					FFTPlan* colPlan	= NULL;
+					lockRAII* colLock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+					colPlan->outputLayout  = trans1Plan->outputLayout;
+					colPlan->inputLayout   = trans1Plan->outputLayout;
+					colPlan->placeness     = CLFFT_INPLACE;
+					colPlan->length.push_back(Nt);
+
+					colPlan->inStride[0]  = 1;
+					colPlan->inStride.push_back(length1);
+					colPlan->iDist         = Nt*length1;
+
+					colPlan->outStride[0]  = 1;
+					colPlan->outStride.push_back(length1);
+					colPlan->oDist         = Nt*length1;
+
+					colPlan->precision     = fftPlan->precision;
+					colPlan->forwardScale  = 1.0f;
+					colPlan->backwardScale = 1.0f;
+					colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+
+					colPlan->gen			= fftPlan->gen;
+					colPlan->envelope		= fftPlan->envelope;
+
+					colPlan->batchsize    = fftPlan->batchsize;
+
+					//this 2d is decomposed from 3d
+					for (size_t index=2; index < fftPlan->length.size(); index++)
 					{
-						colPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
-						colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						colPlan->length.push_back(fftPlan->length[index]);
+						colPlan->inStride.push_back(colPlan->iDist);
+						colPlan->outStride.push_back(colPlan->oDist);
+						colPlan->iDist *= fftPlan->length[index];
+						colPlan->oDist *= fftPlan->length[index];
 					}
-					break;
-				default: assert(false);
-				}
 
-				colPlan->placeness     = CLFFT_INPLACE;
-				colPlan->length.push_back(Nt);
+					OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ),
+						_T( "BakePlan for planY failed" ) );
 
-				colPlan->outStride[0]  = fftPlan->outStride[1];
-				colPlan->outStride.push_back(fftPlan->outStride[0]);
-				colPlan->oDist         = fftPlan->oDist;
+					if (fftPlan->transposed == CLFFT_TRANSPOSED)
+					{
+						fftPlan->baked = true;
+						return	CLFFT_SUCCESS;
+					}
 
+					// create second transpose plan
+					
+					//Transpose 
+					//output --> tmp
+					size_t trans2Lengths[2] = { length1, Nt };
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTY, fftPlan->context, CLFFT_2D, trans2Lengths ),
+						_T( "CreateDefaultPlan Large1d transpose failed" ) );
 
-				colPlan->precision     = fftPlan->precision;
-				colPlan->forwardScale  = fftPlan->forwardScale;
-				colPlan->backwardScale = fftPlan->backwardScale;
-				colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+					FFTPlan* trans2Plan	= NULL;
+					lockRAII* trans2Lock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planTY, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
 
-				colPlan->gen			= fftPlan->gen;
-				colPlan->envelope			= fftPlan->envelope;
+					switch(fftPlan->outputLayout)
+					{
+					case CLFFT_HERMITIAN_INTERLEAVED:
+						{
+							trans2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							trans2Plan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					case CLFFT_HERMITIAN_PLANAR:
+						{
+							trans2Plan->outputLayout = CLFFT_COMPLEX_PLANAR;
+							trans2Plan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						}
+						break;
+					default: assert(false);
+					}
 
-				colPlan->batchsize = fftPlan->batchsize;
+					trans2Plan->placeness     = CLFFT_OUTOFPLACE;
+					trans2Plan->precision     = fftPlan->precision;
+					trans2Plan->tmpBufSize    = fftPlan->tmpBufSize;
+					trans2Plan->batchsize     = fftPlan->batchsize;
+					trans2Plan->envelope	  = fftPlan->envelope;
+
+					trans2Plan->inStride[0]   = 1;
+					trans2Plan->inStride[1]   = length1;
+					trans2Plan->outStride[0]  = 1;
+					trans2Plan->outStride[1]  = Nt;
+					trans2Plan->iDist         = Nt*length1;
+					trans2Plan->oDist		  = fftPlan->oDist;
 
-				colPlan->inStride[0]  = rowPlan->outStride[1];
-				colPlan->inStride.push_back(rowPlan->outStride[0]);
-				colPlan->iDist         = rowPlan->oDist;
+					trans2Plan->gen           = Transpose_GCN;
+					trans2Plan->transflag     = true;
 
-				//this 2d is decomposed from 3d
-				if (fftPlan->length.size()>2)
-				{
-					colPlan->length.push_back(fftPlan->length[2]);
-					colPlan->outStride.push_back(fftPlan->outStride[2]);
-					colPlan->inStride.push_back(rowPlan->outStride[2]);
+					for (size_t index=2; index < fftPlan->length.size(); index++)
+					{
+						trans2Plan->length.push_back(fftPlan->length[index]);
+						trans2Plan->inStride.push_back(trans2Plan->iDist);
+						trans2Plan->iDist *= fftPlan->length[index];
+						trans2Plan->outStride.push_back(fftPlan->outStride[index]);
+
+					}
+
+					OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
+						_T( "BakePlan for planTY failed" ) );
+
+
+					fftPlan->baked = true;
+					return	CLFFT_SUCCESS;
 				}
+				else
+				{
+					// create col plan
+					// complex to complex
 
-				OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planY, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimY ] ),
+						_T( "CreateDefaultPlan for planY failed" ) );
+
+					FFTPlan* colPlan	= NULL;
+					lockRAII* colLock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planY, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+					switch(fftPlan->outputLayout)
+					{
+					case CLFFT_HERMITIAN_INTERLEAVED:
+						{
+							colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					case CLFFT_HERMITIAN_PLANAR:
+						{
+							colPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
+							colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						}
+						break;
+					default: assert(false);
+					}
+
+					colPlan->placeness     = CLFFT_INPLACE;
+					colPlan->length.push_back(Nt);
+
+					colPlan->outStride[0]  = fftPlan->outStride[1];
+					colPlan->outStride.push_back(fftPlan->outStride[0]);
+					colPlan->oDist         = fftPlan->oDist;
+
+
+					colPlan->precision     = fftPlan->precision;
+					colPlan->forwardScale  = fftPlan->forwardScale;
+					colPlan->backwardScale = fftPlan->backwardScale;
+					colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+
+					colPlan->gen			= fftPlan->gen;
+					colPlan->envelope			= fftPlan->envelope;
+
+					colPlan->batchsize = fftPlan->batchsize;
+
+					colPlan->inStride[0]  = rowPlan->outStride[1];
+					colPlan->inStride.push_back(rowPlan->outStride[0]);
+					colPlan->iDist         = rowPlan->oDist;
+
+					//this 2d is decomposed from 3d
+					if (fftPlan->length.size()>2)
+					{
+						colPlan->length.push_back(fftPlan->length[2]);
+						colPlan->outStride.push_back(fftPlan->outStride[2]);
+						colPlan->inStride.push_back(rowPlan->outStride[2]);
+					}
+
+					OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planY failed" ) );
+				}
 
 			}
 			else if(fftPlan->outputLayout == CLFFT_REAL)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list