[clfft] 82/107: updating 3D planner logic real forward

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:39 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 05f24ca7a98f9b3257c86a9bb2dd58a00b4bd4eb
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Thu Apr 16 21:01:07 2015 -0500

    updating 3D planner logic real forward
---
 src/library/plan.cpp | 330 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 278 insertions(+), 52 deletions(-)

diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index e23f5e5..747a472 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1800,15 +1800,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				length1 = fftPlan->length[1];
 
 				size_t Nt = (1 + length0/2);
-				if (fftPlan->tmpBufSize==0)
-				{
-					fftPlan->tmpBufSize = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
 
-					for (size_t index=2; index < fftPlan->length.size(); index++)
-					{
-						fftPlan->tmpBufSize *= fftPlan->length[index];
-					}
-				}
 
 				// create row plan
 				// real to hermitian
@@ -1861,6 +1853,17 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					  ((rowPlan->inStride[1] == length0) && (rowPlan->placeness == CLFFT_OUTOFPLACE)) )
 					&& (rowPlan->outStride[1] == Nt) )
 				{
+					// calc temp buf size
+					if (fftPlan->tmpBufSize==0)
+					{
+						fftPlan->tmpBufSize = Nt * length1 * fftPlan->batchsize * fftPlan->ElementSize();
+
+						for (size_t index=2; index < fftPlan->length.size(); index++)
+						{
+							fftPlan->tmpBufSize *= fftPlan->length[index];
+						}
+					}
+
 					// create first transpose plan
 					
 					//Transpose 
@@ -2368,11 +2371,16 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 		{
 			if(fftPlan->inputLayout == CLFFT_REAL)
 			{
-				size_t clLengths[] = { 1, 1, 0 };
-				clLengths[0] = fftPlan->length[ DimX ];
-				clLengths[1] = fftPlan->length[ DimY ];
+
+				size_t length0 = fftPlan->length[ DimX ];
+				size_t length1 = fftPlan->length[ DimY ];
+				size_t length2 = fftPlan->length[ DimZ ];
+
+				size_t Nt = (1 + length0/2);
+
 
 				//create 2D xy plan
+				size_t clLengths[] = { length0, length1, 0 };
 				OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
 					_T( "CreateDefaultPlan 2D planX failed" ) );
 
@@ -2406,61 +2414,279 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				xyPlan->iDist    = fftPlan->iDist;
 				xyPlan->oDist    = fftPlan->oDist;
 
+				//this 3d is decomposed from 4d
+				for (size_t index=3; index < fftPlan->length.size(); index++)
+				{
+					xyPlan->length.push_back(fftPlan->length[index]);
+					xyPlan->inStride.push_back(fftPlan->inStride[index]);
+					xyPlan->outStride.push_back(fftPlan->outStride[index]);
+				}
+
 				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
 
-				clLengths[0] = fftPlan->length[ DimZ ];
-				clLengths[1] = clLengths[2] = 0;
-				//create 1D col plan
-				OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
-					_T( "CreateDefaultPlan for planZ failed" ) );
+				if( (xyPlan->outStride[2] == Nt*length1) &&
+					( ((xyPlan->inStride[2] == Nt*2*length1) && (xyPlan->placeness == CLFFT_INPLACE)) ||
+					  ((xyPlan->inStride[2] == length0*length1) && (xyPlan->placeness == CLFFT_OUTOFPLACE)) ) )
+				{
 
-				FFTPlan* colPlan	= NULL;
-				lockRAII* colLock	= NULL;
-				OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+					if (fftPlan->tmpBufSize==0)
+					{
+						fftPlan->tmpBufSize = Nt * length1 * length2 * fftPlan->batchsize * fftPlan->ElementSize();
 
-				switch(fftPlan->outputLayout)
-				{
-				case CLFFT_HERMITIAN_INTERLEAVED:
+						for (size_t index=3; index < fftPlan->length.size(); index++)
+						{
+							fftPlan->tmpBufSize *= fftPlan->length[index];
+						}
+					}
+
+					// create first transpose plan
+					
+					//Transpose 
+					// output --> tmp
+					size_t transLengths[2] = { length0*length1, length2 };
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTX, fftPlan->context, CLFFT_2D, transLengths ),
+						_T( "CreateDefaultPlan for planTX transpose failed" ) );
+
+					FFTPlan* trans1Plan	= NULL;
+					lockRAII* trans1Lock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planTX, trans1Plan, trans1Lock ), _T( "fftRepo.getPlan failed" ) );
+
+					trans1Plan->transflag = true;
+
+					transLengths[0] = Nt*length1;
+					OPENCL_V(clfftSetPlanLength( fftPlan->planTX, CLFFT_2D, transLengths ),
+						_T( "clfftSetPlanLength for planTX transpose failed" ) );
+
+					switch(fftPlan->outputLayout)
 					{
-						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
-						colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+					case CLFFT_HERMITIAN_INTERLEAVED:
+						{
+							trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							trans1Plan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					case CLFFT_HERMITIAN_PLANAR:
+						{
+							trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							trans1Plan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						}
+						break;
+					default: assert(false);
 					}
-					break;
-				case CLFFT_HERMITIAN_PLANAR:
+
+					trans1Plan->placeness     = CLFFT_OUTOFPLACE;
+					trans1Plan->precision     = fftPlan->precision;
+					trans1Plan->tmpBufSize    = 0;
+					trans1Plan->batchsize     = fftPlan->batchsize;
+					trans1Plan->envelope	  = fftPlan->envelope;
+					trans1Plan->forwardScale  = 1.0f;
+					trans1Plan->backwardScale = 1.0f;
+
+					trans1Plan->inStride[0]   = 1;
+					trans1Plan->inStride[1]   = Nt*length1;
+					trans1Plan->outStride[0]  = 1;
+					trans1Plan->outStride[1]  = length2;
+					trans1Plan->iDist         = xyPlan->oDist;
+					trans1Plan->oDist		  = Nt*length1*length2;
+					trans1Plan->transOutHorizontal = true;
+
+					trans1Plan->gen           = Transpose_GCN;
+
+
+					for (size_t index=3; index < fftPlan->length.size(); index++)
 					{
-						colPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
-						colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						trans1Plan->length.push_back(fftPlan->length[index]);
+						trans1Plan->inStride.push_back(xyPlan->outStride[index]);
+						trans1Plan->outStride.push_back(trans1Plan->oDist);
+						trans1Plan->oDist *= fftPlan->length[index];
 					}
-					break;
-				default: assert(false);
+
+					OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
+						_T( "BakePlan for planTX failed" ) );
+
+					// Create column plan as a row plan
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimZ ] ),
+						_T( "CreateDefaultPlan for planZ failed" ) );
+
+					FFTPlan* colPlan	= NULL;
+					lockRAII* colLock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+					colPlan->outputLayout  = trans1Plan->outputLayout;
+					colPlan->inputLayout   = trans1Plan->outputLayout;
+					colPlan->placeness     = CLFFT_INPLACE;
+					colPlan->length.push_back(Nt*length1);
+
+					colPlan->inStride[0]  = 1;
+					colPlan->inStride.push_back(length2);
+					colPlan->iDist         = Nt*length1*length2;
+
+					colPlan->outStride[0]  = 1;
+					colPlan->outStride.push_back(length2);
+					colPlan->oDist         = Nt*length1*length2;
+
+					colPlan->precision     = fftPlan->precision;
+					colPlan->forwardScale  = fftPlan->forwardScale;
+					colPlan->backwardScale = fftPlan->backwardScale;
+					colPlan->tmpBufSize    = 0;
+
+					colPlan->gen			= fftPlan->gen;
+					colPlan->envelope		= fftPlan->envelope;
+
+					colPlan->batchsize    = fftPlan->batchsize;
+
+					//this 2d is decomposed from 3d
+					for (size_t index=3; index < fftPlan->length.size(); index++)
+					{
+						colPlan->length.push_back(fftPlan->length[index]);
+						colPlan->inStride.push_back(colPlan->iDist);
+						colPlan->outStride.push_back(colPlan->oDist);
+						colPlan->iDist *= fftPlan->length[index];
+						colPlan->oDist *= fftPlan->length[index];
+					}
+
+					OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ),
+						_T( "BakePlan for planZ failed" ) );
+
+					if (fftPlan->transposed == CLFFT_TRANSPOSED)
+					{
+						fftPlan->baked = true;
+						return	CLFFT_SUCCESS;
+					}
+
+					// create second transpose plan
+					
+					//Transpose 
+					//output --> tmp
+					size_t trans2Lengths[2] = { length2, length0*length1 };
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTY, fftPlan->context, CLFFT_2D, trans2Lengths ),
+						_T( "CreateDefaultPlan for planTY transpose failed" ) );
+
+					FFTPlan* trans2Plan	= NULL;
+					lockRAII* trans2Lock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planTY, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
+
+					trans2Plan->transflag = true;
+
+					trans2Lengths[1] = Nt*length1;
+					OPENCL_V(clfftSetPlanLength( fftPlan->planTY, CLFFT_2D, trans2Lengths ),
+						_T( "clfftSetPlanLength for planTY transpose failed" ) );
+
+					switch(fftPlan->outputLayout)
+					{
+					case CLFFT_HERMITIAN_INTERLEAVED:
+						{
+							trans2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							trans2Plan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					case CLFFT_HERMITIAN_PLANAR:
+						{
+							trans2Plan->outputLayout = CLFFT_COMPLEX_PLANAR;
+							trans2Plan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					default: assert(false);
+					}
+
+					trans2Plan->placeness     = CLFFT_OUTOFPLACE;
+					trans2Plan->precision     = fftPlan->precision;
+					trans2Plan->tmpBufSize    = 0;
+					trans2Plan->batchsize     = fftPlan->batchsize;
+					trans2Plan->envelope	  = fftPlan->envelope;
+					trans2Plan->forwardScale  = 1.0f;
+					trans2Plan->backwardScale = 1.0f;
+
+					trans2Plan->inStride[0]   = 1;
+					trans2Plan->inStride[1]   = length2;
+					trans2Plan->outStride[0]  = 1;
+					trans2Plan->outStride[1]  = Nt*length1;
+					trans2Plan->iDist         = Nt*length1*length2;
+					trans2Plan->oDist		  = fftPlan->oDist;
+
+					trans2Plan->gen           = Transpose_GCN;
+					trans2Plan->transflag     = true;
+
+					for (size_t index=3; index < fftPlan->length.size(); index++)
+					{
+						trans2Plan->length.push_back(fftPlan->length[index]);
+						trans2Plan->inStride.push_back(trans2Plan->iDist);
+						trans2Plan->iDist *= fftPlan->length[index];
+						trans2Plan->outStride.push_back(fftPlan->outStride[index]);
+					}
+
+					OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
+						_T( "BakePlan for planTY failed" ) );
+
+
+					fftPlan->baked = true;
+					return	CLFFT_SUCCESS;
 				}
+				else
+				{
 
-				colPlan->placeness     = CLFFT_INPLACE;
-				colPlan->precision     = fftPlan->precision;
-				colPlan->forwardScale  = fftPlan->forwardScale;
-				colPlan->backwardScale = fftPlan->backwardScale;
-				colPlan->tmpBufSize    = fftPlan->tmpBufSize;
+					clLengths[0] = fftPlan->length[ DimZ ];
+					clLengths[1] = clLengths[2] = 0;
+					//create 1D col plan
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
+						_T( "CreateDefaultPlan for planZ failed" ) );
 
-				colPlan->gen			 = fftPlan->gen;
-				colPlan->envelope			 = fftPlan->envelope;
+					FFTPlan* colPlan	= NULL;
+					lockRAII* colLock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
 
-				// This is a column FFT, the first elements distance between each FFT is the distance of the first two
-				// elements in the original buffer. Like a transpose of the matrix
-				colPlan->batchsize = fftPlan->batchsize;
-				colPlan->inStride[0] = fftPlan->outStride[2];
-				colPlan->outStride[0] = fftPlan->outStride[2];
+					switch(fftPlan->outputLayout)
+					{
+					case CLFFT_HERMITIAN_INTERLEAVED:
+						{
+							colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					case CLFFT_HERMITIAN_PLANAR:
+						{
+							colPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
+							colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						}
+						break;
+					default: assert(false);
+					}
 
-				//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
-				colPlan->length.push_back(1 + fftPlan->length[0]/2);
-				colPlan->length.push_back(fftPlan->length[1]);
-				colPlan->inStride.push_back(fftPlan->outStride[0]);
-				colPlan->inStride.push_back(fftPlan->outStride[1]);
-				colPlan->outStride.push_back(fftPlan->outStride[0]);
-				colPlan->outStride.push_back(fftPlan->outStride[1]);
-				colPlan->iDist    = fftPlan->oDist;
-				colPlan->oDist    = fftPlan->oDist;
+					colPlan->placeness     = CLFFT_INPLACE;
+					colPlan->precision     = fftPlan->precision;
+					colPlan->forwardScale  = fftPlan->forwardScale;
+					colPlan->backwardScale = fftPlan->backwardScale;
+					colPlan->tmpBufSize    = fftPlan->tmpBufSize;
 
-				OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+					colPlan->gen			 = fftPlan->gen;
+					colPlan->envelope			 = fftPlan->envelope;
+
+					// This is a column FFT, the first elements distance between each FFT is the distance of the first two
+					// elements in the original buffer. Like a transpose of the matrix
+					colPlan->batchsize = fftPlan->batchsize;
+					colPlan->inStride[0] = fftPlan->outStride[2];
+					colPlan->outStride[0] = fftPlan->outStride[2];
+
+					//pass length and other info to kernel, so the kernel knows this is decomposed from higher dimension
+					colPlan->length.push_back(1 + fftPlan->length[0]/2);
+					colPlan->length.push_back(fftPlan->length[1]);
+					colPlan->inStride.push_back(fftPlan->outStride[0]);
+					colPlan->inStride.push_back(fftPlan->outStride[1]);
+					colPlan->outStride.push_back(fftPlan->outStride[0]);
+					colPlan->outStride.push_back(fftPlan->outStride[1]);
+					colPlan->iDist    = fftPlan->oDist;
+					colPlan->oDist    = fftPlan->oDist;
+
+					//this 3d is decomposed from 4d
+					for (size_t index=3; index < fftPlan->length.size(); index++)
+					{
+						colPlan->length.push_back(fftPlan->length[index]);
+						colPlan->inStride.push_back(xyPlan->outStride[index]);
+						colPlan->outStride.push_back(fftPlan->outStride[index]);
+					}
+
+					OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+				}
 			}
 			else if(fftPlan->outputLayout == CLFFT_REAL)
 			{

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list