[clfft] 71/128: Precallback - client sample for converting 24bit input to 32bit format using precallback
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:40 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 9ac208d10921b8608d8e13fcd85b73a7f0baf4a8
Author: Pradeep <pradeep.rao at amd.com>
Date: Tue Sep 15 14:30:28 2015 +0530
Precallback - client sample for converting 24bit input to 32bit format using precallback
---
.gitignore | 7 +++
src/callback-client/callback-client.cpp | 100 ++++++++++++++++----------------
src/callback-client/client.h | 37 ++++++------
3 files changed, 76 insertions(+), 68 deletions(-)
diff --git a/.gitignore b/.gitignore
index 0788f46..c4239c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,10 @@ build/
src/client-callback/client.cpp
*.pack
src/client-callback/callback_tests.bat
+src/client-callback/callback-client.cpp
+src/client-callback/CMakeLists.txt
+src/client-callback/README.md
+src/client-callback/client.h
+src/client-callback/openCL.misc.h
+src/client-callback/openCL.misc.cpp
+src/client-callback/stdafx.cpp
diff --git a/src/callback-client/callback-client.cpp b/src/callback-client/callback-client.cpp
index 990b94f..9b404ff 100644
--- a/src/callback-client/callback-client.cpp
+++ b/src/callback-client/callback-client.cpp
@@ -120,22 +120,24 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
cl_int status = 0;
//input/output allocation sizes
- size_t in_size_of_buffers = fftLength * sizeof(char) * 3 ;
+ size_t in_size_of_buffers = fftLength * sizeof(uint24_t);
size_t out_size_of_buffers = fftLength * sizeof( T );
- char* in24bitData = (char*)malloc(in_size_of_buffers);
+ uint24_t *input24bitData = (uint24_t*)malloc(in_size_of_buffers);
//Initialize Data
srand(1);
for (size_t idx = 0; idx < fftLength; ++idx)
{
- in24bitData[3 * idx + 2] = (char)(rand() % 256);
- in24bitData[3 * idx + 1] = (char)(rand() % 256);
- in24bitData[3 * idx] = (char)(rand() % 256);
- }
+ int randomVal = (int)rand();
+ input24bitData[idx][0] = (randomVal >> 16) & 0xFF;
+ input24bitData[idx][1] = (randomVal >> 8) & 0xFF;
+ input24bitData[idx][2] = randomVal & 0xFF;
+ }
+
//input data buffer
- cl_mem infftbuffer = ::clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, in_size_of_buffers, (void*)in24bitData, &status);
+ cl_mem infftbuffer = ::clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, in_size_of_buffers, (void*)input24bitData, &status);
OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(infftbuffer) )" );
//out-place transform.
@@ -164,6 +166,7 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
OPENCL_V_THROW( clfftSetLayout( plan_handle, inLayout, outLayout ), "clfftSetLayout failed" );
OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batchSize ), "clfftSetPlanBatchSize failed" );
OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
+ OPENCL_V_THROW( clfftSetPlanDistance( plan_handle, BATCH_LENGTH + 2, (BATCH_LENGTH/2 + 1)), "clfftSetPlanDistance failed" );
//Bake Plan
OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &commandQueue, NULL, NULL ), "clfftBakePlan failed" );
@@ -218,21 +221,16 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, outfftbuffer, CL_TRUE, 0, out_size_of_buffers, &output[ 0 ],
0, NULL, NULL ), "Reading the result buffer failed" );
- for( cl_uint i = 0; i < fftLength/2; i++)
- {
- std::cout << "i " << i << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
- }
-
//Reference fftw output
- /*fftwf_complex *refout;
+ fftwf_complex *refout;
refout = get_R2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim);
- for( cl_uint i = 0; i < fftLength; i++)
+ /*for( cl_uint i = 0; i < fftLength/2; i++)
{
std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
- }
- if (!compare<fftwf_complex, T>(refout, output, fftLength))
+ }*/
+ if (!compare<fftwf_complex, T>(refout, output, fftLength/2))
{
std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****FAIL*****" << std::endl;
}
@@ -241,7 +239,7 @@ void runR2CPrecallbackFFT(std::auto_ptr< clfftSetupData > setupData, cl_context
std::cout << "\n\n\t\tInternal Client Test (with clFFT Pre-callback) *****PASS*****" << std::endl;
}
- fftwf_free(refout);*/
+ fftwf_free(refout);
}
OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
@@ -261,22 +259,24 @@ void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_con
cl_int status = 0;
//input/output allocation sizes
- size_t in_size_of_buffers = fftLength * sizeof(char) * 3 ;
+ size_t in_size_of_buffers = fftLength * sizeof(uint24_t);
size_t out_size_of_buffers = fftLength * sizeof( T );
- char* in24bitData = (char*)malloc(in_size_of_buffers);
+ uint24_t *input24bitData = (uint24_t*)malloc(in_size_of_buffers);
//Initialize Data
srand(1);
for (size_t idx = 0; idx < fftLength; ++idx)
{
- in24bitData[3 * idx + 2] = (char)(rand() % 256);
- in24bitData[3 * idx + 1] = (char)(rand() % 256);
- in24bitData[3 * idx] = (char)(rand() % 256);
+ int randomVal = (int)rand();
+
+ input24bitData[idx][0] = (randomVal >> 16) & 0xFF;
+ input24bitData[idx][1] = (randomVal >> 8) & 0xFF;
+ input24bitData[idx][2] = randomVal & 0xFF;
}
//input data buffer
- cl_mem in24bitfftbuffer = ::clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, in_size_of_buffers, (void*)in24bitData, &status);
+ cl_mem in24bitfftbuffer = ::clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, in_size_of_buffers, (void*)input24bitData, &status);
OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer(in24bitfftbuffer) )" );
cl_mem in32bitfftbuffer = ::clCreateBuffer( context, CL_MEM_READ_WRITE, out_size_of_buffers, NULL, &status);
@@ -302,6 +302,7 @@ void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_con
OPENCL_V_THROW( clfftSetLayout( plan_handle, inLayout, outLayout ), "clfftSetLayout failed" );
OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batchSize ), "clfftSetPlanBatchSize failed" );
OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
+ OPENCL_V_THROW( clfftSetPlanDistance( plan_handle, BATCH_LENGTH + 2, (BATCH_LENGTH/2 + 1)), "clfftSetPlanDistance failed" );
//Bake Plan
OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &commandQueue, NULL, NULL ), "clfftBakePlan failed" );
@@ -430,30 +431,25 @@ void runR2CPreprocessKernelFFT(std::auto_ptr< clfftSetupData > setupData, cl_con
OPENCL_V_THROW( clEnqueueReadBuffer( commandQueue, outfftbuffer, CL_TRUE, 0, out_size_of_buffers, &output[ 0 ],
0, NULL, NULL ), "Reading the result buffer failed" );
- for( cl_uint i = 0; i < fftLength/2; i++)
+ //Reference fftw output
+ fftwf_complex *refout;
+
+ refout = get_R2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim);
+
+ /*for( cl_uint i = 0; i < fftLength/2; i++)
{
- std::cout << "i " << i << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
+ std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
+ }*/
+ if (!compare<fftwf_complex, T>(refout, output, fftLength/2))
+ {
+ std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****FAIL*****" << std::endl;
+ }
+ else
+ {
+ std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****PASS*****" << std::endl;
}
- ////Reference fftw output
- //fftwf_complex *refout;
-
- //refout = get_C2C_fftwf_output(inlengths, fftLength, batchSize, inLayout, dim, dir);
-
- ///*for( cl_uint i = 0; i < fftLength; i++)
- //{
- // std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << output[i].real() << " climag " << output[i].imag() << std::endl;
- //}*/
- //if (!compare<fftwf_complex, T>(refout, output, fftLength))
- //{
- // std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****FAIL*****" << std::endl;
- //}
- //else
- //{
- // std::cout << "\n\n\t\tInternal Client Test (Separate Pre-process Kernel) *****PASS*****" << std::endl;
- //}
-
- //fftwf_free(refout);
+ fftwf_free(refout);
}
OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
@@ -545,31 +541,33 @@ fftwf_complex* get_R2C_fftwf_output(size_t* lengths, size_t fftbatchLength, int
refin, &inembed[3 - dim], 1, infftVectorLength,
refout, &outembed[3 - dim], 1, outfftVectorLength, FFTW_ESTIMATE);
- char* in24bitData = (char*)malloc(sizeof(char) * 3 * fftbatchLength);
+ uint24_t* in24bitData = (uint24_t*)malloc(sizeof(uint24_t) * fftbatchLength);
//Initialize Data
srand(1);
for (size_t idx = 0; idx < fftbatchLength; ++idx)
{
- in24bitData[3 * idx + 2] = (char)(rand() % 256);
- in24bitData[3 * idx + 1] = (char)(rand() % 256);
- in24bitData[3 * idx] = (char)(rand() % 256);
+ int randomVal = (int)rand();
+
+ in24bitData[idx][0] = (randomVal >> 16) & 0xFF;
+ in24bitData[idx][1] = (randomVal >> 8) & 0xFF;
+ in24bitData[idx][2] = randomVal & 0xFF;
}
float val;
for( size_t i = 0; i < fftbatchLength; i++)
{
- val = in24bitData[3*i + 2] << 24 | in24bitData[3*i + 1] << 16 | in24bitData[3*i] << 8 ;
+ val = in24bitData[i][0] << 16 | in24bitData[i][1] << 8 | in24bitData[i][2] ;
refin[i] = val;
}
fftwf_execute(refPlan);
- fftw_free(refin);
+ free(refin);
fftwf_destroy_plan(refPlan);
return refout;
-}
\ No newline at end of file
+}
diff --git a/src/callback-client/client.h b/src/callback-client/client.h
index d229d41..1b75e29 100644
--- a/src/callback-client/client.h
+++ b/src/callback-client/client.h
@@ -28,26 +28,29 @@
#include <fftw3.h>
+typedef unsigned char uint24_t[3];
+
#define CALLBCKSTR(...) #__VA_ARGS__
#define STRINGIFY(...) CALLBCKSTR(__VA_ARGS__)
-#define USERDATA_LENGTH 512
-#define BATCH_LENGTH 1024 // Must be >= USERDATA_LENGTH
-
-#define ConvertToFloat float convert24To32bit(__global void* in, uint inoffset, __global void* userdata)\n \
- { \n \
- __global char* inData = (__global char*)in; \n \
- float val = inData[3*inoffset+2] << 24 | inData[3*inoffset+1] << 16 | inData[3*inoffset] << 8 ; \n \
- return val / (float)(INT_MAX - 256); \n \
- }
-
-#define ConvertToFloat_KERNEL __kernel void convert24To32bit (__global void *input, __global void *output) \n \
- { \n \
- uint inoffset = get_global_id(0); \n \
- __global char* inData = (__global char*)input; \n \
- float val = inData[3*inoffset+2] << 24 | inData[3*inoffset+1] << 16 | inData[3*inoffset] << 8 ; \n \
- *((__global float*)output + inoffset) = val / (float)(INT_MAX - 256); \n \
- } \n
+#define BATCH_LENGTH 1024
+
+#define ConvertToFloat typedef unsigned char uint24_t[3]; \n \
+ float convert24To32bit(__global void* in, uint inoffset, __global void* userdata) \n \
+ { \n \
+ __global uint24_t* inData = (__global uint24_t*)in; \n \
+ float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \
+ return val; \n \
+ }
+
+#define ConvertToFloat_KERNEL typedef unsigned char uint24_t[3]; \n \
+ __kernel void convert24To32bit (__global void *input, __global void *output) \n \
+ { \n \
+ uint inoffset = get_global_id(0); \n \
+ __global uint24_t* inData = (__global uint24_t*)input; \n \
+ float val = inData[inoffset][0] << 16 | inData[inoffset][1] << 8 | inData[inoffset][2] ; \n \
+ *((__global float*)output + inoffset) = val; \n \
+ } \n
template < typename T >
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list