[clfft] 14/128: Precallback - Handle C2R Hermitian Planar and single pass use cases

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:33 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 37b5a1fadfbb99bfcce4bca0a01402b9b5592b81
Author: Pradeep <pradeep.rao at amd.com>
Date:   Thu Aug 13 10:56:29 2015 +0530

    Precallback - Handle C2R Hermitian Planar and single pass use cases
---
 src/client-callback/callback-client.cpp | 43 +++++++++++--------
 src/library/accessors.cpp               |  3 +-
 src/library/generator.stockham.cpp      | 74 +++++++++++++++++++++++----------
 3 files changed, 81 insertions(+), 39 deletions(-)

diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
index c660154..973237e 100644
--- a/src/client-callback/callback-client.cpp
+++ b/src/client-callback/callback-client.cpp
@@ -361,18 +361,8 @@ float* get_fftwf_output_c2r(size_t* lengths, size_t *strides, const size_t *inSt
 	
 		for( size_t i = 0; i < fftVectorSizePadded; i = i + inStrides[0])
 		{
-			switch (in_layout)
-			{
-			case CLFFT_HERMITIAN_INTERLEAVED:
-				scalar = SCALAR + i;
-				break;
-			case CLFFT_HERMITIAN_PLANAR:
-				scalar = (int)(SCALAR + i + (SCALAR + i + 1));
-				break;
-			default:
-				break;
-			}
-
+			scalar = SCALAR + i;
+			
 			refin[p3 + i][0] *= (float)(scalar);
 			refin[p3 + i][1] *= (float)(scalar);
 		}
@@ -1131,9 +1121,9 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
 	//Valudate input and output data layout
 	validateDataLayout(in_layout, out_layout, place);
 	
-	if (hasPrecallback && !(in_layout == CLFFT_COMPLEX_INTERLEAVED || in_layout == CLFFT_COMPLEX_PLANAR || in_layout == CLFFT_HERMITIAN_INTERLEAVED))
+	if (hasPrecallback && !(in_layout == CLFFT_COMPLEX_INTERLEAVED || in_layout == CLFFT_COMPLEX_PLANAR || in_layout == CLFFT_HERMITIAN_INTERLEAVED || in_layout == CLFFT_HERMITIAN_PLANAR))
 	{
-		terr << _T("Pre-callback feature is currently supported only for Complex-Complex and Complex-Real Interleaved FFT " ) << std::endl;
+		terr << _T("Pre-callback feature is currently supported only for Complex-Complex and Complex-Real FFT " ) << std::endl;
 		return 1;
 	}
 
@@ -1224,10 +1214,9 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
 			//Register the callback
 			OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, NULL, 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
 		}
-
-		//C2C PLANAR 
-		if (in_layout == CLFFT_COMPLEX_PLANAR)
+		else if (in_layout == CLFFT_COMPLEX_PLANAR)
 		{	
+			//C2C PLANAR 
 			char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
 			USER_DATA *h_userdata = (USER_DATA*)malloc(sizeof(USER_DATA) * fftBatchSize);
 			for( size_t i = 0; i < fftBatchSize; i = i + inStrides[0])
@@ -1240,6 +1229,26 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
 			//Register the callback
 			OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
 		}
+		else if (in_layout == CLFFT_HERMITIAN_PLANAR)
+		{	
+			//C2C PLANAR 
+			char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
+			USER_DATA *h_userdata = (USER_DATA*)malloc(sizeof(USER_DATA) * fftBatchSize);
+			for(size_t b = 0; b < batch_size; b++)
+			{
+				size_t p3 = b * strides[3];
+	
+				for( size_t i = 0; i < fftVectorSizePadded; i = i + inStrides[0])
+				{
+					h_userdata[p3 + i].scalar1 = SCALAR + i ;
+					h_userdata[p3 + i].scalar2 = 0;
+				}
+			}
+			userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(USER_DATA) * fftBatchSize, (void*)h_userdata, NULL);
+
+			//Register the callback
+			OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
+		}
 	}
 
 	OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &queue, NULL, NULL ), "clfftBakePlan failed" );
diff --git a/src/library/accessors.cpp b/src/library/accessors.cpp
index 493c05d..808b202 100644
--- a/src/library/accessors.cpp
+++ b/src/library/accessors.cpp
@@ -781,7 +781,8 @@ clfftStatus clFFTSetPlanCallback(clfftPlanHandle plHandle, const char* funcName,
 
 	if (callbackType == PRECALLBACK)
 	{
-		if (fftPlan->inputLayout == CLFFT_COMPLEX_INTERLEAVED || fftPlan->inputLayout == CLFFT_COMPLEX_PLANAR || fftPlan->inputLayout == CLFFT_HERMITIAN_INTERLEAVED)
+		if (fftPlan->inputLayout == CLFFT_COMPLEX_INTERLEAVED || fftPlan->inputLayout == CLFFT_COMPLEX_PLANAR 
+			|| fftPlan->inputLayout == CLFFT_HERMITIAN_INTERLEAVED || fftPlan->inputLayout == CLFFT_HERMITIAN_PLANAR)
 		{
 			if (funcName != NULL && funcString != NULL)
 			{
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 9c7e734..03771e5 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -862,7 +862,7 @@ namespace StockhamGenerator
 									{ 
 										RegBaseAndCountAndPos("", i*radix + r, regIndex); 
 									
-										hid = r / (numB * radix / 2);
+										hid = (i * radix + r) / (numB * radix / 2);
 										if (fft_doPreCallback && component == SR_COMP_REAL && hid != 0)
 										{
 											regIndexC = regIndex; regIndexC += ").y";
@@ -1229,10 +1229,17 @@ namespace StockhamGenerator
 						std::string buffer;
 
 						RegBaseAndCountAndPos("", r, regIndex); 
-						if (fft_doPreCallback && interleaved)
+						if (fft_doPreCallback)
 						{
 							 regIndex += ")";
-							 buffer = (c == 0) ? bufferRe : bufferIm;
+							 if (interleaved)
+							 {
+								buffer = (c == 0) ? bufferRe : bufferIm;
+							 }
+							 else
+							 {
+								 buffer += bufferRe; buffer += ", "; buffer += bufferIm;
+							 }
 						}
 						else
 						{
@@ -1884,7 +1891,9 @@ namespace StockhamGenerator
 					
 					if (fft_doPreCallback)
 					{
-						passStr += fft_preCallback.funcname; passStr += "("; passStr += bufferInRe; passStr += ", inOffset, userdata";
+						passStr += fft_preCallback.funcname; passStr += "("; passStr += bufferInRe; 
+						if (!inInterleaved) { passStr += ", "; passStr += bufferInIm; }
+						passStr += ", inOffset, userdata";
 						passStr += fft_preCallback.localMemSize > 0 ? ", localmem)" : ")";
 					}
 					else
@@ -1892,18 +1901,25 @@ namespace StockhamGenerator
 						passStr += bufferInRe; passStr+= "[inOffset]";
 					}
 
-					if(inInterleaved) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
+					if(inInterleaved || fft_doPreCallback) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
 
 					if(length > 1)
 					{
 						passStr += "\n\n\tif(rw)\n\t{";
-						SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, false, bufferInRe, bufferInRe, "inOffset", passStr);
+						if (fft_doPreCallback && !inInterleaved)
+						{
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, false, bufferInRe, bufferInIm, "inOffset", passStr);
+						}
+						else
+						{
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, false, bufferInRe, bufferInRe, "inOffset", passStr);
+						}
 						passStr += "\n\t}\n";
 
 						passStr += "\n\tif(rw > 1)\n\t{";
 						if (fft_doPreCallback)
 						{
-							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, false, bufferInIm2, bufferInIm2, "inOffset2", passStr);
+							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, false, bufferInRe2, bufferInIm2, "inOffset2", passStr);
 						}
 						else
 						{
@@ -1951,14 +1967,16 @@ namespace StockhamGenerator
 					
 					if (fft_doPreCallback)
 					{
-						passStr += fft_preCallback.funcname; passStr += "("; passStr += bufferInRe2; passStr += ", inOffset, userdata";
+						passStr += fft_preCallback.funcname; passStr += "("; passStr += bufferInRe2; 
+						if (!inInterleaved) { passStr += ", "; passStr += bufferInIm2; }
+						passStr += ", inOffset2, userdata";
 						passStr += fft_preCallback.localMemSize > 0 ? ", localmem)" : ")";
 					}
 					else
 					{
 						passStr += bufferInRe2; passStr+= "[inOffset]";
 					}
-					if(inInterleaved) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
+					if(inInterleaved || fft_doPreCallback) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
 					passStr += "\n\tif((rw == 1) && !me)\n\t{\n\t"; passStr += processBufIm; passStr += "["; passStr += processBufOffset; passStr += "] = 0;\n\t}";
 
 
@@ -1974,8 +1992,9 @@ namespace StockhamGenerator
 							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, true, false, bufferInRe2, bufferInRe2, "inOffset", passStr);
 							passStr += "\n\t}\n\telse\n\t{";
 							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, true, true, false, bufferInRe2, bufferInRe2, "inOffset", passStr);
-							passStr += "\n\t}\n";
+							passStr += "\n\t}";
 						}
+						passStr += "\n";
 
 						if(oddp)
 						{
@@ -2520,7 +2539,7 @@ namespace StockhamGenerator
 
 					//Pass precallback information to Pass object if its the first pass. 
 					//This will be used in single kernel transforms
-					if (!r2c2r && pid == 0 && params.fft_hasPreCallback)
+					if (!r2c && pid == 0 && params.fft_hasPreCallback)
 					{
 						passes[0].SetPrecallback(params.fft_hasPreCallback, params.fft_preCallback);
 					}
@@ -3226,16 +3245,21 @@ namespace StockhamGenerator
 					{
 						if(inInterleaved || inReal)
 						{
-							if(!rcSimple && !params.fft_hasPreCallback) {	str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
-							if(!params.fft_hasPreCallback) { str += "lwbIn = gbIn + iOffset;\n\t"; }
+							if (!params.fft_hasPreCallback)
+							{
+								if(!rcSimple) {	str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
+								str += "lwbIn = gbIn + iOffset;\n\t"; 
+							}
 						}
 						else
 						{
-							if(!rcSimple) {	str += "lwbInRe2 = gbInRe + iOffset2;\n\t"; }
-							if(!rcSimple) {	str += "lwbInIm2 = gbInIm + iOffset2;\n\t"; }
-											str += "lwbInRe = gbInRe + iOffset;\n\t";
-											str += "lwbInIm = gbInIm + iOffset;\n\t";
-
+							if (!params.fft_hasPreCallback)
+							{
+								if(!rcSimple) {	str += "lwbInRe2 = gbInRe + iOffset2;\n\t"; }
+								if(!rcSimple) {	str += "lwbInIm2 = gbInIm + iOffset2;\n\t"; }
+												str += "lwbInRe = gbInRe + iOffset;\n\t";
+												str += "lwbInIm = gbInIm + iOffset;\n\t";
+							}
 						}
 
 						if(outInterleaved || outReal)
@@ -3448,7 +3472,15 @@ namespace StockhamGenerator
 							}
 							else
 							{
-								inBuf  = (params.fft_placeness == CLFFT_INPLACE) ? "gb, gb, " : "gbIn, gbIn, " ;
+								if (params.fft_placeness == CLFFT_INPLACE) 
+								{
+									inBuf = "(__global "; inBuf += r2Type; inBuf += "*) gb, ";
+									inBuf += "(__global "; inBuf += r2Type; inBuf += "*) gb, ";
+								}
+								else
+								{
+									inBuf  = "gbIn, gbIn, " ;
+								}
 							}	
 						}
 						else							inBuf  = (params.fft_hasPreCallback) ? "gbInRe, gbInRe, gbInIm, gbInIm, " : "lwbInRe, lwbInRe2, lwbInIm, lwbInIm2, ";
@@ -3514,9 +3546,9 @@ namespace StockhamGenerator
 					str += IterRegs("&");
 
 					//if precalback set 
-					if (!r2c2r && params.fft_hasPreCallback)
+					if (!r2c && params.fft_hasPreCallback)
 					{
-						str += ", userdata";
+						str += c2r ?  ", iOffset2, userdata" : ", userdata";
 
 						if (params.fft_preCallback.localMemSize > 0)
 						{

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list