[clfft] 14/128: Precallback - Handle C2R Hermitian Planar and single pass use cases
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:33 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 37b5a1fadfbb99bfcce4bca0a01402b9b5592b81
Author: Pradeep <pradeep.rao at amd.com>
Date: Thu Aug 13 10:56:29 2015 +0530
Precallback - Handle C2R Hermitian Planar and single pass use cases
---
src/client-callback/callback-client.cpp | 43 +++++++++++--------
src/library/accessors.cpp | 3 +-
src/library/generator.stockham.cpp | 74 +++++++++++++++++++++++----------
3 files changed, 81 insertions(+), 39 deletions(-)
diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
index c660154..973237e 100644
--- a/src/client-callback/callback-client.cpp
+++ b/src/client-callback/callback-client.cpp
@@ -361,18 +361,8 @@ float* get_fftwf_output_c2r(size_t* lengths, size_t *strides, const size_t *inSt
for( size_t i = 0; i < fftVectorSizePadded; i = i + inStrides[0])
{
- switch (in_layout)
- {
- case CLFFT_HERMITIAN_INTERLEAVED:
- scalar = SCALAR + i;
- break;
- case CLFFT_HERMITIAN_PLANAR:
- scalar = (int)(SCALAR + i + (SCALAR + i + 1));
- break;
- default:
- break;
- }
-
+ scalar = SCALAR + i;
+
refin[p3 + i][0] *= (float)(scalar);
refin[p3 + i][1] *= (float)(scalar);
}
@@ -1131,9 +1121,9 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
//Valudate input and output data layout
validateDataLayout(in_layout, out_layout, place);
- if (hasPrecallback && !(in_layout == CLFFT_COMPLEX_INTERLEAVED || in_layout == CLFFT_COMPLEX_PLANAR || in_layout == CLFFT_HERMITIAN_INTERLEAVED))
+ if (hasPrecallback && !(in_layout == CLFFT_COMPLEX_INTERLEAVED || in_layout == CLFFT_COMPLEX_PLANAR || in_layout == CLFFT_HERMITIAN_INTERLEAVED || in_layout == CLFFT_HERMITIAN_PLANAR))
{
- terr << _T("Pre-callback feature is currently supported only for Complex-Complex and Complex-Real Interleaved FFT " ) << std::endl;
+ terr << _T("Pre-callback feature is currently supported only for Complex-Complex and Complex-Real FFT " ) << std::endl;
return 1;
}
@@ -1224,10 +1214,9 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
//Register the callback
OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, NULL, 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
}
-
- //C2C PLANAR
- if (in_layout == CLFFT_COMPLEX_PLANAR)
+ else if (in_layout == CLFFT_COMPLEX_PLANAR)
{
+ //C2C PLANAR
char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
USER_DATA *h_userdata = (USER_DATA*)malloc(sizeof(USER_DATA) * fftBatchSize);
for( size_t i = 0; i < fftBatchSize; i = i + inStrides[0])
@@ -1240,6 +1229,26 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
//Register the callback
OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
}
+ else if (in_layout == CLFFT_HERMITIAN_PLANAR)
+ {
+ //C2C PLANAR
+ char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
+ USER_DATA *h_userdata = (USER_DATA*)malloc(sizeof(USER_DATA) * fftBatchSize);
+ for(size_t b = 0; b < batch_size; b++)
+ {
+ size_t p3 = b * strides[3];
+
+ for( size_t i = 0; i < fftVectorSizePadded; i = i + inStrides[0])
+ {
+ h_userdata[p3 + i].scalar1 = SCALAR + i ;
+ h_userdata[p3 + i].scalar2 = 0;
+ }
+ }
+ userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(USER_DATA) * fftBatchSize, (void*)h_userdata, NULL);
+
+ //Register the callback
+ OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
+ }
}
OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &queue, NULL, NULL ), "clfftBakePlan failed" );
diff --git a/src/library/accessors.cpp b/src/library/accessors.cpp
index 493c05d..808b202 100644
--- a/src/library/accessors.cpp
+++ b/src/library/accessors.cpp
@@ -781,7 +781,8 @@ clfftStatus clFFTSetPlanCallback(clfftPlanHandle plHandle, const char* funcName,
if (callbackType == PRECALLBACK)
{
- if (fftPlan->inputLayout == CLFFT_COMPLEX_INTERLEAVED || fftPlan->inputLayout == CLFFT_COMPLEX_PLANAR || fftPlan->inputLayout == CLFFT_HERMITIAN_INTERLEAVED)
+ if (fftPlan->inputLayout == CLFFT_COMPLEX_INTERLEAVED || fftPlan->inputLayout == CLFFT_COMPLEX_PLANAR
+ || fftPlan->inputLayout == CLFFT_HERMITIAN_INTERLEAVED || fftPlan->inputLayout == CLFFT_HERMITIAN_PLANAR)
{
if (funcName != NULL && funcString != NULL)
{
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 9c7e734..03771e5 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -862,7 +862,7 @@ namespace StockhamGenerator
{
RegBaseAndCountAndPos("", i*radix + r, regIndex);
- hid = r / (numB * radix / 2);
+ hid = (i * radix + r) / (numB * radix / 2);
if (fft_doPreCallback && component == SR_COMP_REAL && hid != 0)
{
regIndexC = regIndex; regIndexC += ").y";
@@ -1229,10 +1229,17 @@ namespace StockhamGenerator
std::string buffer;
RegBaseAndCountAndPos("", r, regIndex);
- if (fft_doPreCallback && interleaved)
+ if (fft_doPreCallback)
{
regIndex += ")";
- buffer = (c == 0) ? bufferRe : bufferIm;
+ if (interleaved)
+ {
+ buffer = (c == 0) ? bufferRe : bufferIm;
+ }
+ else
+ {
+ buffer += bufferRe; buffer += ", "; buffer += bufferIm;
+ }
}
else
{
@@ -1884,7 +1891,9 @@ namespace StockhamGenerator
if (fft_doPreCallback)
{
- passStr += fft_preCallback.funcname; passStr += "("; passStr += bufferInRe; passStr += ", inOffset, userdata";
+ passStr += fft_preCallback.funcname; passStr += "("; passStr += bufferInRe;
+ if (!inInterleaved) { passStr += ", "; passStr += bufferInIm; }
+ passStr += ", inOffset, userdata";
passStr += fft_preCallback.localMemSize > 0 ? ", localmem)" : ")";
}
else
@@ -1892,18 +1901,25 @@ namespace StockhamGenerator
passStr += bufferInRe; passStr+= "[inOffset]";
}
- if(inInterleaved) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
+ if(inInterleaved || fft_doPreCallback) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
if(length > 1)
{
passStr += "\n\n\tif(rw)\n\t{";
- SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, false, bufferInRe, bufferInRe, "inOffset", passStr);
+ if (fft_doPreCallback && !inInterleaved)
+ {
+ SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, false, bufferInRe, bufferInIm, "inOffset", passStr);
+ }
+ else
+ {
+ SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, false, bufferInRe, bufferInRe, "inOffset", passStr);
+ }
passStr += "\n\t}\n";
passStr += "\n\tif(rw > 1)\n\t{";
if (fft_doPreCallback)
{
- SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, false, bufferInIm2, bufferInIm2, "inOffset2", passStr);
+ SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, false, bufferInRe2, bufferInIm2, "inOffset2", passStr);
}
else
{
@@ -1951,14 +1967,16 @@ namespace StockhamGenerator
if (fft_doPreCallback)
{
- passStr += fft_preCallback.funcname; passStr += "("; passStr += bufferInRe2; passStr += ", inOffset, userdata";
+ passStr += fft_preCallback.funcname; passStr += "("; passStr += bufferInRe2;
+ if (!inInterleaved) { passStr += ", "; passStr += bufferInIm2; }
+ passStr += ", inOffset2, userdata";
passStr += fft_preCallback.localMemSize > 0 ? ", localmem)" : ")";
}
else
{
passStr += bufferInRe2; passStr+= "[inOffset]";
}
- if(inInterleaved) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
+ if(inInterleaved || fft_doPreCallback) passStr += ".x;\n\t}"; else passStr += ";\n\t}";
passStr += "\n\tif((rw == 1) && !me)\n\t{\n\t"; passStr += processBufIm; passStr += "["; passStr += processBufOffset; passStr += "] = 0;\n\t}";
@@ -1974,8 +1992,9 @@ namespace StockhamGenerator
SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, true, false, bufferInRe2, bufferInRe2, "inOffset", passStr);
passStr += "\n\t}\n\telse\n\t{";
SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, true, true, false, bufferInRe2, bufferInRe2, "inOffset", passStr);
- passStr += "\n\t}\n";
+ passStr += "\n\t}";
}
+ passStr += "\n";
if(oddp)
{
@@ -2520,7 +2539,7 @@ namespace StockhamGenerator
//Pass precallback information to Pass object if its the first pass.
//This will be used in single kernel transforms
- if (!r2c2r && pid == 0 && params.fft_hasPreCallback)
+ if (!r2c && pid == 0 && params.fft_hasPreCallback)
{
passes[0].SetPrecallback(params.fft_hasPreCallback, params.fft_preCallback);
}
@@ -3226,16 +3245,21 @@ namespace StockhamGenerator
{
if(inInterleaved || inReal)
{
- if(!rcSimple && !params.fft_hasPreCallback) { str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
- if(!params.fft_hasPreCallback) { str += "lwbIn = gbIn + iOffset;\n\t"; }
+ if (!params.fft_hasPreCallback)
+ {
+ if(!rcSimple) { str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
+ str += "lwbIn = gbIn + iOffset;\n\t";
+ }
}
else
{
- if(!rcSimple) { str += "lwbInRe2 = gbInRe + iOffset2;\n\t"; }
- if(!rcSimple) { str += "lwbInIm2 = gbInIm + iOffset2;\n\t"; }
- str += "lwbInRe = gbInRe + iOffset;\n\t";
- str += "lwbInIm = gbInIm + iOffset;\n\t";
-
+ if (!params.fft_hasPreCallback)
+ {
+ if(!rcSimple) { str += "lwbInRe2 = gbInRe + iOffset2;\n\t"; }
+ if(!rcSimple) { str += "lwbInIm2 = gbInIm + iOffset2;\n\t"; }
+ str += "lwbInRe = gbInRe + iOffset;\n\t";
+ str += "lwbInIm = gbInIm + iOffset;\n\t";
+ }
}
if(outInterleaved || outReal)
@@ -3448,7 +3472,15 @@ namespace StockhamGenerator
}
else
{
- inBuf = (params.fft_placeness == CLFFT_INPLACE) ? "gb, gb, " : "gbIn, gbIn, " ;
+ if (params.fft_placeness == CLFFT_INPLACE)
+ {
+ inBuf = "(__global "; inBuf += r2Type; inBuf += "*) gb, ";
+ inBuf += "(__global "; inBuf += r2Type; inBuf += "*) gb, ";
+ }
+ else
+ {
+ inBuf = "gbIn, gbIn, " ;
+ }
}
}
else inBuf = (params.fft_hasPreCallback) ? "gbInRe, gbInRe, gbInIm, gbInIm, " : "lwbInRe, lwbInRe2, lwbInIm, lwbInIm2, ";
@@ -3514,9 +3546,9 @@ namespace StockhamGenerator
str += IterRegs("&");
//if precalback set
- if (!r2c2r && params.fft_hasPreCallback)
+ if (!r2c && params.fft_hasPreCallback)
{
- str += ", userdata";
+ str += c2r ? ", iOffset2, userdata" : ", userdata";
if (params.fft_preCallback.localMemSize > 0)
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list