[clfft] 69/128: precallback-remove old client code
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:40 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit fab73e7590b69f241c6c6590c0c02ebf8a3bc3e9
Author: Pradeep <pradeep.rao at amd.com>
Date: Mon Sep 14 12:13:07 2015 +0530
precallback-remove old client code
---
src/client-callback/CMakeLists.txt | 62 --
src/client-callback/README.md | 117 ---
src/client-callback/callback-client.cpp | 1524 -------------------------------
src/client-callback/client.h | 70 --
src/client-callback/openCL.misc.cpp | 536 -----------
src/client-callback/openCL.misc.h | 151 ---
src/client-callback/stdafx.cpp | 25 -
7 files changed, 2485 deletions(-)
diff --git a/src/client-callback/CMakeLists.txt b/src/client-callback/CMakeLists.txt
deleted file mode 100644
index 81c7096..0000000
--- a/src/client-callback/CMakeLists.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-# ########################################################################
-# Copyright 2013 Advanced Micro Devices, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ########################################################################
-
-
-# client
-set( Client.Source callback-client.cpp
- openCL.misc.cpp
- stdafx.cpp )
-
-set( Client.Headers client.h
- openCL.misc.h
- ../statTimer/statisticalTimer.extern.h
- ../include/unicode.compatibility.h
- ../include/stdafx.h
- ../include/targetver.h
- ../include/clFFT.h )
-
-set( Client.Files ${Client.Source} ${Client.Headers} )
-
-set( DL_LIB "" )
-if( WIN32 )
- add_definitions( "/D_CONSOLE" )
-elseif( APPLE )
- set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" )
-else( )
- # To use the dlopen() and dlclose() functions, we should link with libdl
- set( DL_LIB "-ldl -lrt" )
-endif( )
-
-# Include standard OpenCL headers
-include_directories( ${Boost_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ${FFTW_INCLUDE_DIRS} ../../../common ${PROJECT_BINARY_DIR}/include ../include )
-
-add_executable( clFFT-callback ${Client.Files} )
-
-target_link_libraries( clFFT-callback clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} ${FFTW_LIBRARIES} ${DL_LIB} )
-
-set_target_properties( clFFT-callback PROPERTIES VERSION ${CLFFT_VERSION} )
-set_target_properties( clFFT-callback PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
-if( APPLE )
- # properly deal with RPATH on mac
- set_target_properties( clFFT-callback PROPERTIES INSTALL_RPATH "@loader_path/../lib${SUFFIX_LIB}")
-endif()
-
-# CPack configuration; include the executable into the package
-install( TARGETS clFFT-callback
- RUNTIME DESTINATION bin${SUFFIX_BIN}
- LIBRARY DESTINATION lib${SUFFIX_LIB}
- ARCHIVE DESTINATION lib${SUFFIX_LIB}/import
- )
diff --git a/src/client-callback/README.md b/src/client-callback/README.md
deleted file mode 100644
index 108c3dd..0000000
--- a/src/client-callback/README.md
+++ /dev/null
@@ -1,117 +0,0 @@
-clFFT - Callback Client
-=======================
-
-
-clFFT Callback client is a sample application demonstrating the use of
-callback feature of clFFT.
-
-Callback feature provides ability to do custom processing when reading
-input data or when writing output data. There are 2 types of callback,
-Pre-callback and Post-callback. Pre-callback invokes user callback
-function to do custom preprocessing of input data before FFT is executed.
-Post-callback invokes user callback function to do custom post-processing
-of output data after FFT is executed. The intent is to avoid additional
-kernels and kernel launches to carry out the pre/post processing. Instead
-the pre/post processing logic can be included in an inline opencl function
-(one each for pre and post) and passed as a string to library which would
-then be incorporated into the generated FFT kernel.
-
-The block below shows the help message given by the callback client listing
-all the command line options.
-
-```c
-C:\clFFT\src\build\staging\Debug>clFFT-callback.exe -h
-clFFT client command line options:
- -h [ --help ] produces this help message
- -g [ --gpu ] Force selection of OpenCL GPU devices only
- -c [ --cpu ] Force selection of OpenCL CPU devices only
- -a [ --all ] Force selection of all OpenCL devices (default)
- -o [ --outPlace ] Out of place FFT transform (default: in place)
- --double Double precision transform (default: single)
- --inv Backward transform (default: forward)
- -d [ --dumpKernels ] FFT engine will dump generated OpenCL FFT kernels
- to disk (default: dump off)
- --noprecall Disable Precallback (default: precallback on)
- -x [ --lenX ] arg (=1024) Specify the length of the 1st dimension of a test
- array
- -y [ --lenY ] arg (=1) Specify the length of the 2nd dimension of a test
- array
- -z [ --lenZ ] arg (=1) Specify the length of the 3rd dimension of a test
- array
- --isX arg (=1) Specify the input stride of the 1st dimension of
- a test array
- --isY arg (=0) Specify the input stride of the 2nd dimension of
- a test array
- --isZ arg (=0) Specify the input stride of the 3rd dimension of
- a test array
- --iD arg (=0) input distance between subsequent sets of data
- when batch size > 1
- --osX arg (=1) Specify the output stride of the 1st dimension of
- a test array
- --osY arg (=0) Specify the output stride of the 2nd dimension of
- a test array
- --osZ arg (=0) Specify the output stride of the 3rd dimension of
- a test array
- --oD arg (=0) output distance between subsequent sets of data
- when batch size > 1
- -b [ --batchSize ] arg (=1) If this value is greater than one, arrays will be
- used
- -p [ --profile ] arg (=1) Time and report the kernel speed of the FFT
- (default: profiling off)
- --inLayout arg (=1) Layout of input data:
- 1) interleaved
- 2) planar
- 3) hermitian interleaved
- 4) hermitian planar
- 5) real
- --outLayout arg (=1) Layout of input data:
- 1) interleaved
- 2) planar
- 3) hermitian interleaved
- 4) hermitian planar
- 5) real
-
-```
-"--noprecall" option can be used to disable Pre-callback (default: precallback on)
-
-## What's New
-
-Callback client in the develop branch demonstrates use of pre-callback
-for Single Precision Complex-Complex 1D transforms for lengths upto 4096. Output data
-is verified against fftw library.
-
-## Example
-
-Some examples are shown below.
-
-1D Complex-Complex Interleaved transform with pre-callback for length 1024
-```c
-C:\clFFT\src\build\staging\Debug>clFFT-callback.exe -x 1024 --inLayout 1 --outLayout 1
-
-
- Internal Client Test *****PASS*****
-```
-
-1D Complex-Complex Planar transform with pre-callback for length 1024
-```c
-C:\clFFT\src\build\staging\Debug>clFFT-callback.exe -x 1024 --inLayout 2 --outLayout 2
-
-
- Internal Client Test *****PASS*****
-```
-
-1D Complex-Complex Interleaved transform with pre-callback for length 1024 and batch size of 2
-```c
-C:\clFFT\src\build\staging\Debug>clFFT-callback.exe -x 1024 --inLayout 1 --outLayout 1 -b 2
-
-
- Internal Client Test *****PASS*****
-```
-
-1D Complex-Complex Interleaved transform without pre-callback for length 1024
-```c
-C:\clFFT\src\build\staging\Debug>clFFT-callback.exe -x 1024 --inLayout 1 --outLayout 1 --noprecall
-
-
- Internal Client Test *****PASS*****
-```
\ No newline at end of file
diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
deleted file mode 100644
index c3d424b..0000000
--- a/src/client-callback/callback-client.cpp
+++ /dev/null
@@ -1,1524 +0,0 @@
-#include "stdafx.h"
-#include <functional>
-#include <cmath>
-
-#include "client.h"
-#include "../library/private.h"
-#include "openCL.misc.h"
-#include "../statTimer/statisticalTimer.extern.h"
-#include "../include/sharedLibrary.h"
-#include "../include/unicode.compatibility.h"
-
-#include <fftw3.h>
-
-namespace po = boost::program_options;
-
-#define SCALAR 10
-
-#define MULVAL float2 mulval(__global void* in, uint offset, __global void* userdata)\n \
- { \n \
- int scalar = *((__global int*)userdata + offset); \n \
- float2 ret = *((__global float2*)in + offset) * scalar; \n \
- return ret; \n \
- }
-
-#define MULVAL_C2R float2 mulval(__global void* in, uint offset, __global void* userdata)\n \
- { \n \
- int scalar = *((__global int*)userdata + offset); \n \
- float2 ret = *((__global float2*)in + offset) * scalar; \n \
- return ret; \n \
- }
-
-#define MULVAL_DP double2 mulval(__global void* in, uint offset, __global void* userdata)\n \
- { \n \
- int scalar = *((__global int*)userdata + offset); \n \
- double2 ret = *((__global double2*)in + offset) * scalar; \n \
- return ret; \n \
- }
-
-#define MULVAL_PLANAR float2 mulval(__global void* inRe, __global void* inIm, uint offset, __global void* userdata)\n \
- { \n \
- __global USER_DATA *data = ((__global USER_DATA *)userdata + offset); \n \
- int scalar = (int)data->scalar1 + (int)data->scalar2; \n \
- float2 ret; \n \
- ret.x = *((__global float*)inRe + offset) * scalar; \n \
- ret.y = *((__global float*)inIm + offset) * scalar; \n \
- return ret; \n \
- }
-
-#define MULVAL_PLANAR_DP double2 mulval(__global void* inRe, __global void* inIm, uint offset, __global void* userdata)\n \
- { \n \
- __global USER_DATA *data = ((__global USER_DATA *)userdata + offset); \n \
- int scalar = (int)data->scalar1 + (int)data->scalar2; \n \
- double2 ret; \n \
- ret.x = *((__global double*)inRe + offset) * scalar; \n \
- ret.y = *((__global double*)inIm + offset) * scalar; \n \
- return ret; \n \
- }
-
-#define STRUCT_USERDATA typedef struct USER_DATA \
- { \
- int scalar1; \
- int scalar2; \
- } USER_DATA;
-STRUCT_USERDATA
-
-//Compare reference and opencl output
-template < typename T1, typename T2>
-bool compare(T1 *refData, std::vector< std::complex< T2 > > data,
- size_t length, const float epsilon = 1e-6f)
-{
- float error = 0.0f;
- T1 ref;
- T1 diff;
- float normRef = 0.0f;
- float normError = 0.0f;
-
- for(size_t i = 0; i < length; ++i)
- {
- diff[0] = refData[i][0] - data[i].real();
- error += (float)(diff[0] * diff[0]);
- ref[0] += refData[i][0] * refData[i][0];
- }
- if (error != 0)
- {
- normRef =::sqrtf((float) ref[0]);
- if (::fabs((float) ref[0]) < 1e-7f)
- {
- return false;
- }
- normError = ::sqrtf((float) error);
- error = normError / normRef;
-
- if (error > epsilon)
- return false;
- }
-
- //imag
- error = 0.0f;
- ref[1] = 0.0;
- for(size_t i = 0; i < length; ++i)
- {
- diff[1] = refData[i][1] - data[i].imag();
- error += (float)(diff[1] * diff[1]);
- ref[1] += refData[i][1] * refData[i][1];
- }
-
- if (error == 0)
- return true;
-
- normRef =::sqrtf((float) ref[1]);
- if (::fabs((float) ref[1]) < 1e-7f)
- {
- return false;
- }
- normError = ::sqrtf((float) error);
- error = normError / normRef;
-
- if (error > epsilon)
- return false;
-
- return true;
-}
-
-//Compare reference and opencl output
-template < typename T1, typename T2 >
-bool compare(T1 *refData, std::valarray< T2 > real, std::valarray< T2 > imag,
- size_t length, const float epsilon = 1e-6f)
-{
- float error = 0.0f;
- T1 ref;
- T1 diff;
- float normRef = 0.0f;
- float normError = 0.0f;
-
- //real compare
- for(size_t i = 0; i < length; ++i)
- {
- diff[0] = refData[i][0] - real[i];
- error += (float)(diff[0] * diff[0]);
- ref[0] += refData[i][0] * refData[i][0];
- }
- if (error != 0)
- {
- normRef =::sqrtf((float) ref[0]);
- if (::fabs((float) ref[0]) < 1e-7f)
- {
- return false;
- }
- normError = ::sqrtf((float) error);
- error = normError / normRef;
-
- if (error > epsilon)
- return false;
- }
-
- //imag compare
- error = 0.0f;
- ref[1] = 0.0;
-
- for(size_t i = 0; i < length; ++i)
- {
- diff[1] = refData[i][1] - imag[i];
- error += (float)(diff[1] * diff[1]);
- ref[1] += refData[i][1] * refData[i][1];
- }
-
- if (error == 0)
- return true;
-
- normRef =::sqrtf((float) ref[1]);
- if (::fabs((float) ref[1]) < 1e-7f)
- {
- return false;
- }
- normError = ::sqrtf((float) error);
- error = normError / normRef;
-
- if (error > epsilon)
- return false;
-
- return true;
-}
-
-//Compare reference and opencl output
-template < typename T1 , typename T2 >
-bool compare(T1 *refData, std::valarray< T2 > real,
- size_t batch_size, size_t *o_strides, size_t *lengths, const float epsilon = 1e-6f)
-{
- float error = 0.0f;
- T1 ref = 0.0;
- T1 diff;
- float normRef = 0.0f;
- float normError = 0.0f;
-
- //real compare
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * o_strides[3];
- for(size_t k = 0; k < lengths[2]; k++)
- {
- size_t p2 = p3 + k * o_strides[2];
- for(size_t j = 0; j < lengths[1]; j++)
- {
- size_t p1 = p2 + j * o_strides[1];
- for(size_t i = 0; i < lengths[0]; i++)
- {
- size_t p0 = p1 + i * o_strides[0];
-
- diff = refData[p0] - (real[p0] * lengths[0] * lengths[1] * lengths[2]);
- error += (float)(diff * diff);
- ref += refData[p0] * refData[p0];
- }
- }
- }
- }
-
- if (error != 0)
- {
- normRef =::sqrtf((float) ref);
- if (::fabs((float) ref) < 1e-7f)
- {
- return false;
- }
- normError = ::sqrtf((float) error);
- error = normError / normRef;
-
- if (error > epsilon)
- return false;
- }
-
- return true;
-}
-
-// Compute reference output using fftw for float type
-fftwf_complex* get_fftwf_output(size_t* lengths, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
- size_t fftBatchSize, size_t outfftBatchSize, size_t fftVectorSizePadded, clfftLayout in_layout,
- size_t outfftVectorSizePadded, size_t fftVectorSize, clfftDim dim, clfftDirection dir)
-{
- //In FFTW last dimension has the fastest changing index
- int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
-
- fftwf_plan refPlan;
-
- fftwf_complex *refin = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex)*fftBatchSize);
- fftwf_complex *refout = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex)*outfftBatchSize);
-
- refPlan = fftwf_plan_many_dft(dim, &fftwLengths[3 - dim], (int)batch_size,
- refin, &fftwLengths[3 - dim], (int)inStrides[0], (int)fftVectorSizePadded,
- refout, &fftwLengths[3 - dim], (int)outStrides[0], (int)outfftVectorSizePadded,
- dir, FFTW_ESTIMATE);
-
- int scalar;
- for( size_t i = 0; i < fftBatchSize; i = i + inStrides[0])
- {
- switch (in_layout)
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- scalar = SCALAR + (int)(i % fftVectorSize);
- break;
- case CLFFT_COMPLEX_PLANAR:
- scalar = (int)((SCALAR + (i % fftVectorSize)) + (SCALAR + (i % fftVectorSize) + 1));
- break;
- default:
- break;
- }
-
- refin[i][0] = (float)(1 * scalar);
- refin[i][1] = (float)(0 * scalar);
- }
-
- fftwf_execute(refPlan);
-
- fftw_free(refin);
-
- fftwf_destroy_plan(refPlan);
-
- return refout;
-}
-
-// Compute reference output using fftw for double type
-fftw_complex* get_fftw_output(size_t* lengths, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
- size_t fftBatchSize, size_t outfftBatchSize, size_t fftVectorSizePadded, clfftLayout in_layout,
- size_t outfftVectorSizePadded, size_t fftVectorSize, clfftDim dim, clfftDirection dir)
-{
- fftw_plan refPlan;
-
- fftw_complex *refin = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*fftBatchSize);
- fftw_complex *refout = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*outfftBatchSize);
-
- //In FFTW last dimension has the fastest changing index
- int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
-
- refPlan = fftw_plan_many_dft(dim, &fftwLengths[3 - dim], (int)batch_size,
- refin, &fftwLengths[3 - dim], (int)inStrides[0], (int)fftVectorSizePadded,
- refout, &fftwLengths[3 - dim], (int)outStrides[0], (int)outfftVectorSizePadded,
- dir, FFTW_ESTIMATE);
-
- int scalar;
- for( size_t i = 0; i < fftBatchSize; i = i + inStrides[0])
- {
- switch (in_layout)
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- scalar = SCALAR + (int)(i % fftVectorSize);
- break;
- case CLFFT_COMPLEX_PLANAR:
- scalar = (int)((SCALAR + (i % fftVectorSize)) + (SCALAR + (i % fftVectorSize) + 1));
- break;
- default:
- break;
- }
-
- refin[i][0] = 1 * scalar;
- refin[i][1] = 0 * scalar;
- }
-
- fftw_execute(refPlan);
-
- fftw_free(refin);
-
- fftw_destroy_plan(refPlan);
-
- return refout;
-}
-
-// Compute C2R reference output using fftw for float type
-float* get_fftwf_output_c2r(size_t* lengths, size_t *strides, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
- size_t fftBatchSize, size_t outfftBatchSize, size_t fftVectorSizePadded, clfftLayout in_layout,
- size_t outfftVectorSizePadded, size_t outfftVectorSize, clfftDim dim, clfftDirection dir, clfftResultLocation place)
-{
- //In FFTW last dimension has the fastest changing index
- int fftwLengths[3] = {(int)lengths[2], (int)lengths[1], (int)lengths[0]};
- int inembed[3] = {(int)lengths[2], (int)lengths[1], (int)(lengths[0]/2 + 1)};
- int lsd = (place == CLFFT_INPLACE) ? (int)(lengths[0]/2 + 1)*2 : (int)(lengths[0]);
- int outembed[3] = {(int)lengths[2], (int)lengths[1], lsd};
-
- fftwf_plan refPlan;
-
- fftwf_complex *refin = (fftwf_complex*) fftwf_malloc(sizeof(fftwf_complex)*fftBatchSize);
- float *refout = (float*) malloc(sizeof(float)*outfftBatchSize);
-
- refPlan = fftwf_plan_many_dft_c2r(dim, &fftwLengths[3 - dim], (int)batch_size,
- refin, &inembed[3 - dim], (int)inStrides[0], (int)fftVectorSizePadded,
- refout, &outembed[3 - dim], (int)outStrides[0], (int)outfftVectorSizePadded,
- FFTW_ESTIMATE);
-
- // set zero
- for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0] )
- {
- refin[ i ][0] = 0; refin[ i ][1] = 0;
- }
-
- // impulse test case
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
- refin[ p3 ][0] = static_cast<float>(outfftVectorSize);
- }
-
- int scalar;
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
-
- for( size_t i = 0; i < fftVectorSizePadded; i = i + inStrides[0])
- {
- scalar = SCALAR + i;
-
- refin[p3 + i][0] *= (float)(scalar);
- refin[p3 + i][1] *= (float)(scalar);
- }
- }
-
- fftwf_execute(refPlan);
-
- fftw_free(refin);
-
- fftwf_destroy_plan(refPlan);
-
- return refout;
-}
-
-// This is used with the program_options class so that the user can type an integer on the command line
-// and we store into an enum varaible
-template<class _Elem, class _Traits>
-std::basic_istream<_Elem, _Traits> & operator>> (std::basic_istream<_Elem, _Traits> & stream, clfftLayout & layout)
-{
- cl_uint tmp;
- stream >> tmp;
- layout = clfftLayout(tmp);
- return stream;
-}
-
-//Validate the input and output data layout
-void validateDataLayout(clfftLayout in_layout, clfftLayout out_layout, clfftResultLocation place)
-{
- switch( in_layout )
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- case CLFFT_COMPLEX_PLANAR:
- case CLFFT_HERMITIAN_INTERLEAVED:
- case CLFFT_HERMITIAN_PLANAR:
- case CLFFT_REAL:
- break;
- default:
- // Don't recognize input layout
- {
- throw std::runtime_error( "Un-recognized data layout" );
- }
- break;
- }
-
- switch( out_layout )
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- case CLFFT_COMPLEX_PLANAR:
- case CLFFT_HERMITIAN_INTERLEAVED:
- case CLFFT_HERMITIAN_PLANAR:
- case CLFFT_REAL:
- break;
- default:
- // Don't recognize output layout
- {
- throw std::runtime_error( "Un-recognized data layout" );
- }
- break;
- }
-
- if (( place == CLFFT_INPLACE ) && ( in_layout != out_layout ))
- {
- switch( in_layout )
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- {
- if( (out_layout == CLFFT_COMPLEX_PLANAR) || (out_layout == CLFFT_HERMITIAN_PLANAR) )
- {
- throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
- }
- break;
- }
- case CLFFT_COMPLEX_PLANAR:
- {
- if( (out_layout == CLFFT_COMPLEX_INTERLEAVED) || (out_layout == CLFFT_HERMITIAN_INTERLEAVED) )
- {
- throw std::runtime_error( "Cannot use the same buffer for planar->interleaved in-place transforms" );
- }
- break;
- }
- case CLFFT_HERMITIAN_INTERLEAVED:
- {
- if( out_layout != CLFFT_REAL )
- {
- throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
- }
- break;
- }
- case CLFFT_HERMITIAN_PLANAR:
- {
- throw std::runtime_error( "Cannot use the same buffer for planar->interleaved in-place transforms" );
- break;
- }
- case CLFFT_REAL:
- {
- if( (out_layout == CLFFT_COMPLEX_PLANAR) || (out_layout == CLFFT_HERMITIAN_PLANAR) )
- {
- throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
- }
- break;
- }
- default:
- {
- throw std::runtime_error( "Input layout format not yet supported" );
- }
- break;
- }
- }
-}
-
-//FFT data initializations
-template < typename T >
-cl_int dataInitialize(size_t* lengths, clfftDim *dim, size_t batch_size,
- const size_t *inStrides, size_t *strides, const size_t *outStrides, size_t *o_strides,
- size_t *fftBatchSize, size_t *outfftBatchSize, size_t *fftVectorSizePadded, clfftLayout in_layout, clfftLayout out_layout,
- size_t *outfftVectorSizePadded, size_t *fftVectorSize, size_t *outfftVectorSize,
- clfftResultLocation place, size_t *size_of_output_buffers_in_bytes,
- size_t *size_of_input_buffers_in_bytes, cl_mem *input_cl_mem_buffers, cl_mem *output_cl_mem_buffers,
- cl_context *context, cl_command_queue *queue,
- cl_device_type deviceType, cl_int deviceId, cl_int platformId, cl_uint command_queue_flags)
-{
- cl_event outEvent = NULL;
- cl_uint number_of_output_buffers = 0;
- const size_t max_dimensions = 3;
- std::vector< cl_device_id > device_id;
-
- for (unsigned u = 0; u < max_dimensions; ++u) {
- if (0 != lengths[u])
- continue;
- lengths[u] = 1;
- }
-
- if( lengths[ 1 ] > 1 )
- {
- *dim = CLFFT_2D;
- }
- if( lengths[ 2 ] > 1 )
- {
- *dim = CLFFT_3D;
- }
-
- strides[ 0 ] = inStrides[0];
- strides[ 1 ] = inStrides[1];
- strides[ 2 ] = inStrides[2];
- strides[ 3 ] = inStrides[3];
-
- o_strides[ 0 ] = outStrides[0];
- o_strides[ 1 ] = outStrides[1];
- o_strides[ 2 ] = outStrides[2];
- o_strides[ 3 ] = outStrides[3];
-
- *fftVectorSize = lengths[0] * lengths[1] * lengths[2];
- *fftVectorSizePadded = strides[3];
- *fftBatchSize = *fftVectorSizePadded * batch_size;
-
- if(place == CLFFT_INPLACE)
- {
- *outfftVectorSize = *fftVectorSize;
- *outfftVectorSizePadded = *fftVectorSizePadded;
- *outfftBatchSize = *fftBatchSize;
- }
- else
- {
- *outfftVectorSize = lengths[0] * lengths[1] * lengths[2];
- *outfftVectorSizePadded = o_strides[3];
- *outfftBatchSize = *outfftVectorSizePadded * batch_size;
- }
-
- // Real to complex case
- if( (in_layout == CLFFT_REAL) || (out_layout == CLFFT_REAL) )
- {
- *fftVectorSizePadded = strides[3];
- *fftBatchSize = *fftVectorSizePadded * batch_size;
-
- *outfftVectorSizePadded = o_strides[3];
- *outfftBatchSize = *outfftVectorSizePadded * batch_size;
-
- *fftVectorSize = lengths[0] * lengths[1] * lengths[2];
- *outfftVectorSize = *fftVectorSize;
- }
-
- switch( out_layout )
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- number_of_output_buffers = 1;
- *size_of_output_buffers_in_bytes = *outfftBatchSize * sizeof( std::complex< T > );
- break;
- case CLFFT_COMPLEX_PLANAR:
- number_of_output_buffers = 2;
- *size_of_output_buffers_in_bytes = *outfftBatchSize * sizeof(T);
- break;
- case CLFFT_HERMITIAN_INTERLEAVED:
- number_of_output_buffers = 1;
- *size_of_output_buffers_in_bytes = *outfftBatchSize * sizeof( std::complex< T > );
- break;
- case CLFFT_HERMITIAN_PLANAR:
- number_of_output_buffers = 2;
- *size_of_output_buffers_in_bytes = *outfftBatchSize * sizeof(T);
- break;
- case CLFFT_REAL:
- number_of_output_buffers = 1;
- *size_of_output_buffers_in_bytes = *outfftBatchSize * sizeof(T);
- break;
- }
-
-
- // Fill the input buffers
- switch( in_layout )
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- {
- // This call creates our openCL context and sets up our devices; expected to throw on error
- *size_of_input_buffers_in_bytes = *fftBatchSize * sizeof( std::complex< T > );
-
- device_id = initializeCL( deviceType, deviceId, platformId, *context, false );
- createOpenCLCommandQueue( *context,
- command_queue_flags, *queue,
- device_id,
- *size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
- *size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
-
- std::vector< std::complex< T > > input( *fftBatchSize );
-
- // set zero
- for( cl_uint i = 0; i < *fftBatchSize; ++i )
- {
- input[ i ] = 0;
- }
-
- // impulse test case
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
- for(size_t k = 0; k < lengths[2]; k++)
- {
- size_t p2 = p3 + k * strides[2];
- for(size_t j = 0; j < lengths[1]; j++)
- {
- size_t p1 = p2 + j * strides[1];
- for(size_t i = 0; i < lengths[0]; i++)
- {
- size_t p0 = p1 + i * strides[0];
- input[p0] = 1;
- }
- }
- }
- }
-
- OPENCL_V_THROW( clEnqueueWriteBuffer( *queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, *size_of_input_buffers_in_bytes, &input[ 0 ],
- 0, NULL, &outEvent ),
- "clEnqueueWriteBuffer failed" );
-
- }
- break;
- case CLFFT_COMPLEX_PLANAR:
- {
- // This call creates our openCL context and sets up our devices; expected to throw on error
- *size_of_input_buffers_in_bytes = *fftBatchSize * sizeof( T );
-
- device_id = initializeCL( deviceType, deviceId, platformId, *context, false );
- createOpenCLCommandQueue( *context,
- command_queue_flags, *queue,
- device_id,
- *size_of_input_buffers_in_bytes, 2, input_cl_mem_buffers,
- *size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
-
- std::vector< T > real( *fftBatchSize );
- std::vector< T > imag( *fftBatchSize );
-
- // set zero
- for( cl_uint i = 0; i < *fftBatchSize; ++i )
- {
- real[ i ] = 0;
- imag[ i ] = 0;
- }
-
- // impulse test case
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
- for(size_t k = 0; k < lengths[2]; k++)
- {
- size_t p2 = p3 + k * strides[2];
- for(size_t j = 0; j < lengths[1]; j++)
- {
- size_t p1 = p2 + j * strides[1];
- for(size_t i = 0; i < lengths[0]; i++)
- {
- size_t p0 = p1 + i * strides[0];
- real[p0] = 1;
- }
- }
- }
- }
-
- OPENCL_V_THROW( clEnqueueWriteBuffer( *queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, *size_of_input_buffers_in_bytes, &real[ 0 ],
- 0, NULL, &outEvent ),
- "clEnqueueWriteBuffer failed" );
- OPENCL_V_THROW( clEnqueueWriteBuffer( *queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, *size_of_input_buffers_in_bytes, &imag[ 0 ],
- 0, NULL, &outEvent ),
- "clEnqueueWriteBuffer failed" );
- }
- break;
- case CLFFT_HERMITIAN_INTERLEAVED:
- {
- // This call creates our openCL context and sets up our devices; expected to throw on error
- *size_of_input_buffers_in_bytes = *fftBatchSize * sizeof( std::complex< T > );
-
- device_id = initializeCL( deviceType, deviceId, platformId, *context, false );
- createOpenCLCommandQueue( *context,
- command_queue_flags, *queue,
- device_id,
- *size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
- *size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
-
- std::vector< std::complex< T > > input( *fftBatchSize );
-
- // set zero
- for( cl_uint i = 0; i < *fftBatchSize; ++i )
- {
- input[ i ] = 0;
- }
-
- // impulse test case
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
- input[p3] = static_cast<T>(*outfftVectorSize);
-
- }
-
- OPENCL_V_THROW( clEnqueueWriteBuffer( *queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, *size_of_input_buffers_in_bytes, &input[ 0 ],
- 0, NULL, &outEvent ),
- "clEnqueueWriteBuffer failed" );
- }
- break;
- case CLFFT_HERMITIAN_PLANAR:
- {
- // This call creates our openCL context and sets up our devices; expected to throw on error
- *size_of_input_buffers_in_bytes = *fftBatchSize * sizeof( T );
-
- device_id = initializeCL( deviceType, deviceId, platformId, *context, false );
- createOpenCLCommandQueue( *context,
- command_queue_flags, *queue,
- device_id,
- *size_of_input_buffers_in_bytes, 2, input_cl_mem_buffers,
- *size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
-
- std::vector< T > real( *fftBatchSize );
- std::vector< T > imag( *fftBatchSize );
-
- // set zero
- for( cl_uint i = 0; i < *fftBatchSize; ++i )
- {
- real[ i ] = 0;
- imag[ i ] = 0;
- }
-
- // impulse test case
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
- real[p3] = static_cast<T>(*outfftVectorSize);
- }
-
- OPENCL_V_THROW( clEnqueueWriteBuffer( *queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, *size_of_input_buffers_in_bytes, &real[ 0 ],
- 0, NULL, &outEvent ),
- "clEnqueueWriteBuffer failed" );
- OPENCL_V_THROW( clEnqueueWriteBuffer( *queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, *size_of_input_buffers_in_bytes, &imag[ 0 ],
- 0, NULL, &outEvent ),
- "clEnqueueWriteBuffer failed" );
- }
- break;
- case CLFFT_REAL:
- {
- // This call creates our openCL context and sets up our devices; expected to throw on error
- *size_of_input_buffers_in_bytes = *fftBatchSize * sizeof( T );
-
- device_id = initializeCL( deviceType, deviceId, platformId, *context, false );
- createOpenCLCommandQueue( *context,
- command_queue_flags, *queue,
- device_id,
- *size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
- *size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);
-
- std::vector< T > real( *fftBatchSize );
-
- // set zero
- for( cl_uint i = 0; i < *fftBatchSize; ++i )
- {
- real[ i ] = 0;
- }
-
- // impulse test case
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
- for(size_t k = 0; k < lengths[2]; k++)
- {
- size_t p2 = p3 + k * strides[2];
- for(size_t j = 0; j < lengths[1]; j++)
- {
- size_t p1 = p2 + j * strides[1];
- for(size_t i = 0; i < lengths[0]; i++)
- {
- size_t p0 = p1 + i * strides[0];
- real[p0] = 1;
- }
- }
- }
- }
-
- OPENCL_V_THROW( clEnqueueWriteBuffer( *queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, *size_of_input_buffers_in_bytes, &real[ 0 ],
- 0, NULL, &outEvent ),
- "clEnqueueWriteBuffer failed" );
- }
- break;
- default:
- {
- throw std::runtime_error( "Input layout format not yet supported" );
- }
- break;
- }
-
- return 0;
-}
-
-//Compare output with reference C/FFTW code
-template < typename T >
-void compareWithReference(clfftLayout in_layout, clfftLayout out_layout, size_t outfftBatchSize, clfftResultLocation place, clfftPrecision precision,
- cl_command_queue queue, cl_mem *input_cl_mem_buffers, size_t size_of_input_buffers_in_bytes, size_t size_of_output_buffers_in_bytes,
- cl_mem *BuffersOut, size_t* lengths, size_t * strides, const size_t *inStrides, const size_t *outStrides, size_t *o_strides,
- size_t batch_size, size_t fftBatchSize, size_t fftVectorSizePadded, size_t outfftVectorSize,
- size_t outfftVectorSizePadded, size_t fftVectorSize, clfftDim dim, clfftDirection dir, bool hasPrecallback)
-{
- bool checkflag= false;
-
- switch( out_layout )
- {
- case CLFFT_HERMITIAN_INTERLEAVED:
- case CLFFT_COMPLEX_INTERLEAVED:
- {
- std::vector< std::complex< T > > output( outfftBatchSize );
-
- if( place == CLFFT_INPLACE )
- {
- OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &output[ 0 ],
- 0, NULL, NULL ),
- "Reading the result buffer failed" );
- }
- else
- {
- OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &output[ 0 ],
- 0, NULL, NULL ),
- "Reading the result buffer failed" );
- }
-
- //check output data
- if (hasPrecallback)
- {
- switch(in_layout)
- {
- case CLFFT_HERMITIAN_INTERLEAVED:
- case CLFFT_COMPLEX_INTERLEAVED:
- {
- if (precision == CLFFT_SINGLE)
- {
- fftwf_complex *refout;
-
- refout = get_fftwf_output(lengths, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
- in_layout, outfftVectorSizePadded, fftVectorSize, dim, dir);
-
- if (!compare<fftwf_complex, T>(refout, output, outfftBatchSize))
- checkflag = true;
-
- fftwf_free(refout);
- }
- else if (precision == CLFFT_DOUBLE)
- {
- fftw_complex *refout;
-
- refout = get_fftw_output(lengths, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
- in_layout, outfftVectorSizePadded, fftVectorSize, dim, dir);
-
- if (!compare<fftw_complex, T>(refout, output, outfftBatchSize))
- checkflag = true;
-
- fftw_free(refout);
- }
- }
- break;
- }
- }
- else
- {
- for( cl_uint i = 0; i < outfftBatchSize; ++i )
- {
- if (0 == (i % outfftVectorSizePadded))
- {
- if (output[i].real() != outfftVectorSize)
- {
- checkflag = true;
- break;
- }
-
- }
- else
- {
- if (output[ i ].real() != 0)
- {
- checkflag = true;
- break;
- }
- }
-
- if (output[ i ].imag() != 0)
- {
- checkflag = true;
- break;
- }
- }
- }
- }
- break;
- case CLFFT_HERMITIAN_PLANAR:
- case CLFFT_COMPLEX_PLANAR:
- {
- std::valarray< T > real( outfftBatchSize );
- std::valarray< T > imag( outfftBatchSize );
-
- if( place == CLFFT_INPLACE )
- {
- OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
- 0, NULL, NULL ),
- "Reading the result buffer failed" );
- OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
- 0, NULL, NULL ),
- "Reading the result buffer failed" );
- }
- else
- {
- OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &real[ 0 ],
- 0, NULL, NULL ),
- "Reading the result buffer failed" );
- OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 1 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &imag[ 0 ],
- 0, NULL, NULL ),
- "Reading the result buffer failed" );
- }
-
- // Check output data
- if (hasPrecallback)
- {
- switch(in_layout)
- {
- case CLFFT_COMPLEX_PLANAR:
- {
- if (precision == CLFFT_SINGLE)
- {
- fftwf_complex *refout;
-
- refout = get_fftwf_output(lengths, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
- in_layout, outfftVectorSizePadded, fftVectorSize, dim, dir);
-
- if (!compare<fftwf_complex, T>(refout, real, imag, outfftBatchSize))
- checkflag = true;
-
- /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
- {
- std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << real[i] << " climag " << imag[i] << std::endl;
- }*/
-
- fftwf_free(refout);
- }
- else if (precision == CLFFT_DOUBLE)
- {
- fftw_complex *refout;
-
- refout = get_fftw_output(lengths, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
- in_layout, outfftVectorSizePadded, fftVectorSize, dim, dir);
-
- if (!compare<fftw_complex, T>(refout, real, imag, outfftBatchSize))
- checkflag = true;
-
- /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
- {
- std::cout << "i " << i << " refreal " << refout[i][0] << " refimag " << refout[i][1] << " clreal " << real[i] << " climag " << imag[i] << std::endl;
- }*/
-
- fftw_free(refout);
- }
- }
- break;
- }
- }
- else
- {
- for( cl_uint i = 0; i < outfftBatchSize; ++i )
- {
- if (0 == (i % outfftVectorSizePadded))
- {
- if (real[i] != outfftVectorSize)
- {
- checkflag = true;
- break;
- }
- }
- else
- {
- if (real[i] != 0)
- {
- checkflag = true;
- break;
- }
- }
-
- if (imag[i] != 0)
- {
- checkflag = true;
- break;
- }
- }
- }
- }
- break;
- case CLFFT_REAL:
- {
- std::valarray< T > real( outfftBatchSize );
-
- if( place == CLFFT_INPLACE )
- {
- OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
- 0, NULL, NULL ),
- "Reading the result buffer failed" );
- }
- else
- {
- OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &real[ 0 ],
- 0, NULL, NULL ),
- "Reading the result buffer failed" );
- }
-
- // Check output data
- if (hasPrecallback)
- {
- if (precision == CLFFT_SINGLE)
- {
- float *refout;
-
- refout = get_fftwf_output_c2r(lengths, strides, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
- in_layout, outfftVectorSizePadded, outfftVectorSize, dim, dir, place);
-
- if (!compare<float, T>(refout, real, batch_size, o_strides, lengths))
- checkflag = true;
-
- /*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
- {
- std::cout << "i " << i << " refreal " << refout[i] << " clreal " << (real[i] * outfftVectorSize) << std::endl;
- }*/
-
- if (refout)
- free(refout);
- }
- }
- else
- {
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * o_strides[3];
- for(size_t k = 0; k < lengths[2]; k++)
- {
- size_t p2 = p3 + k * o_strides[2];
- for(size_t j = 0; j < lengths[1]; j++)
- {
- size_t p1 = p2 + j * o_strides[1];
- for(size_t i = 0; i < lengths[0]; i++)
- {
- size_t p0 = p1 + i * o_strides[0];
-
- if (real[p0] != 1)
- {
- checkflag = true;
- break;
- }
- }
- }
- }
- }
- }
- }
- break;
- default:
- {
- throw std::runtime_error( "Input layout format not yet supported" );
- }
- break;
- }
-
- if (checkflag)
- {
- std::cout << "\n\n\t\tInternal Client Test *****FAIL*****" << std::endl;
- }
- else
- {
- std::cout << "\n\n\t\tInternal Client Test *****PASS*****" << std::endl;
- }
-}
-
-template < typename T >
-int transform( size_t* lengths, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
- clfftLayout in_layout, clfftLayout out_layout,
- clfftResultLocation place, clfftPrecision precision, clfftDirection dir,
- cl_device_type deviceType, cl_int deviceId, cl_int platformId, bool printInfo,
- cl_uint command_queue_flags, cl_uint profile_count,
- std::auto_ptr< clfftSetupData > setupData,
- bool hasPrecallback)
-{
- // Our command line does not specify what dimension FFT we wish to transform; we decode
- // this from the lengths that the user specifies for X, Y, Z. A length of one means that
- // The user does not want that dimension.
-
- size_t strides[ 4 ];
- size_t o_strides[ 4 ];
- size_t fftVectorSize = 0;
- size_t fftVectorSizePadded = 0;
- size_t fftBatchSize = 0;
- size_t outfftVectorSize = 0;
- size_t outfftVectorSizePadded = 0;
- size_t outfftBatchSize = 0;
- size_t size_of_input_buffers_in_bytes = 0;
- size_t size_of_output_buffers_in_bytes = 0;
-
- clfftDim dim = CLFFT_1D;
- cl_mem input_cl_mem_buffers [2] = { NULL, NULL };
- cl_mem output_cl_mem_buffers[2] = { NULL, NULL };
- cl_context context;
- cl_command_queue queue;
- cl_event outEvent = NULL;
- clfftPlanHandle plan_handle;
-
- //Valudate input and output data layout
- validateDataLayout(in_layout, out_layout, place);
-
- //Initializations
- OPENCL_V_THROW( dataInitialize<T>(lengths, &dim, batch_size, inStrides, strides, outStrides, o_strides, &fftBatchSize, &outfftBatchSize,
- &fftVectorSizePadded, in_layout, out_layout, &outfftVectorSizePadded, &fftVectorSize, &outfftVectorSize, place,
- &size_of_output_buffers_in_bytes, &size_of_input_buffers_in_bytes, input_cl_mem_buffers, output_cl_mem_buffers, &context, &queue,
- deviceType, deviceId, platformId, command_queue_flags), "Data Initialization failed");
-
- // Discover and load the timer module if present
- void* timerLibHandle = LoadSharedLibrary( "lib", "StatTimer", false );
- if( timerLibHandle == NULL )
- {
- terr << _T( "Could not find the external timing library; timings disabled" ) << std::endl;
- }
-
-
- // Timer module discovered and loaded successfully
- // Initialize function pointers to call into the shared module
- PFGETSTATTIMER get_timer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( timerLibHandle, "getStatTimer" ) );
-
- // Create and initialize our timer class, if the external timer shared library loaded
- baseStatTimer* timer = NULL;
- size_t clFFTID = 0;
- if( get_timer )
- {
- timer = get_timer( CLFFT_GPU );
- timer->Reserve( 1, profile_count );
- timer->setNormalize( true );
-
- clFFTID = timer->getUniqueID( "clFFT", 0 );
- }
-
- OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
- OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, lengths ), "clfftCreateDefaultPlan failed" );
-
- // Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
- OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
- OPENCL_V_THROW( clfftSetLayout( plan_handle, in_layout, out_layout ), "clfftSetLayout failed" );
- OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batch_size ), "clfftSetPlanBatchSize failed" );
- OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );
-
- OPENCL_V_THROW (clfftSetPlanInStride ( plan_handle, dim, strides ), "clfftSetPlanInStride failed" );
- OPENCL_V_THROW (clfftSetPlanOutStride ( plan_handle, dim, o_strides ), "clfftSetPlanOutStride failed" );
- OPENCL_V_THROW (clfftSetPlanDistance ( plan_handle, strides[ 3 ], o_strides[ 3 ]), "clfftSetPlanDistance failed" );
-
- // Set backward scale factor to 1.0 for non real FFTs to do correct output checks
- if(dir == CLFFT_BACKWARD && in_layout != CLFFT_REAL && out_layout != CLFFT_REAL)
- OPENCL_V_THROW (clfftSetPlanScale( plan_handle, CLFFT_BACKWARD, (cl_float)1.0f ), "clfftSetPlanScale failed" );
-
- //Check for Precallback
- //Currently test includes only for 1D
- if (hasPrecallback)
- {
- cl_mem userdata;
-
- //C2C 1D Interleaved
- if (in_layout == CLFFT_COMPLEX_INTERLEAVED )
- {
- const char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL) : STRINGIFY(MULVAL_DP);
-
- int *h_userdata = (int*)malloc(sizeof(int)*fftBatchSize);
- for( cl_uint i = 0; i < fftBatchSize; i = i + inStrides[0])
- {
- h_userdata[ i ] = SCALAR + (i % fftVectorSize);
- }
- userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * fftBatchSize, (void*)h_userdata, NULL);
-
- //Register the callback
- OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, NULL, 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
- }
- else if (in_layout == CLFFT_HERMITIAN_INTERLEAVED)
- {
- const char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_C2R) : STRINGIFY(MULVAL_DP);
-
- int *h_userdata = (int*)malloc(sizeof(int)*fftBatchSize);
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
-
- for( size_t i = 0; i < fftVectorSizePadded; i = i + inStrides[0])
- {
- h_userdata[ p3 + i ] = SCALAR + i;
- }
- }
- userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * fftBatchSize, (void*)h_userdata, NULL);
-
- //Register the callback
- OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, NULL, 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
- }
- else if (in_layout == CLFFT_COMPLEX_PLANAR)
- {
- //C2C PLANAR
- const char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
- USER_DATA *h_userdata = (USER_DATA*)malloc(sizeof(USER_DATA) * fftBatchSize);
- for( size_t i = 0; i < fftBatchSize; i = i + inStrides[0])
- {
- h_userdata[i].scalar1 = SCALAR + (int)(i % fftVectorSize);
- h_userdata[i].scalar2 = SCALAR + (int)(i % fftVectorSize) + 1;
- }
- userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(USER_DATA) * fftBatchSize, (void*)h_userdata, NULL);
-
- //Register the callback
- OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
- }
- else if (in_layout == CLFFT_HERMITIAN_PLANAR)
- {
- //C2C PLANAR
- const char* precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
- USER_DATA *h_userdata = (USER_DATA*)malloc(sizeof(USER_DATA) * fftBatchSize);
- for(size_t b = 0; b < batch_size; b++)
- {
- size_t p3 = b * strides[3];
-
- for( size_t i = 0; i < fftVectorSizePadded; i = i + inStrides[0])
- {
- h_userdata[p3 + i].scalar1 = SCALAR + i ;
- h_userdata[p3 + i].scalar2 = 0;
- }
- }
- userdata = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(USER_DATA) * fftBatchSize, (void*)h_userdata, NULL);
-
- //Register the callback
- OPENCL_V_THROW (clFFTSetPlanCallback(plan_handle, "mulval", precallbackstr, STRINGIFY(STRUCT_USERDATA), 0, PRECALLBACK, userdata), "clFFTSetPlanCallback failed");
- }
- }
-
- OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &queue, NULL, NULL ), "clfftBakePlan failed" );
-
- //get the buffersize
- size_t buffersize=0;
- OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );
-
- //allocate the intermediate buffer
- cl_mem clMedBuffer=NULL;
-
- if (buffersize)
- {
- cl_int medstatus;
- clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
- OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
- }
-
-
- cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &output_cl_mem_buffers[ 0 ];
-
- Timer tr;
- tr.Start();
-
- // Loop as many times as the user specifies to average out the timings
- for( cl_uint i = 0; i < profile_count; ++i )
- {
- if( timer ) timer->Start( clFFTID );
-
- OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent,
- &input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
- "clfftEnqueueTransform failed" );
-
- if( timer ) timer->Stop( clFFTID );
- }
- OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
- if(clMedBuffer) clReleaseMemObject(clMedBuffer);
-
- double wtime = tr.Sample()/((double)profile_count);
- size_t totalLen = 1;
- for(int i=0; i<dim; i++) totalLen *= lengths[i];
- double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);
-
- if(profile_count > 1)
- {
- tout << "\nExecution wall time: " << 1000.0*wtime << " ms" << std::endl;
- tout << "Execution gflops: " << ((double)batch_size * opsconst)/(1000000000.0*wtime) << std::endl;
- }
-
- if( timer && (command_queue_flags & CL_QUEUE_PROFILING_ENABLE) )
- {
- // Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
- timer->pruneOutliers( 2.0 );
- timer->Print( );
- timer->Reset( );
- }
-
- /*****************/
- FreeSharedLibrary( timerLibHandle );
-
- // Read and check output data
- // This check is not valid if the FFT is executed multiple times inplace.
- //
- if (( place == CLFFT_OUTOFPLACE )
- || ( profile_count == 1))
- {
- compareWithReference<T>(in_layout, out_layout, outfftBatchSize, place, precision, queue, input_cl_mem_buffers, size_of_input_buffers_in_bytes, size_of_output_buffers_in_bytes,
- BuffersOut, lengths, strides, inStrides, outStrides, o_strides, batch_size, fftBatchSize, fftVectorSizePadded, outfftVectorSize, outfftVectorSizePadded, fftVectorSize,
- dim, dir, hasPrecallback);
- }
-
- OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
- OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );
-
- cleanupCL( &context, &queue, countOf( input_cl_mem_buffers ), input_cl_mem_buffers, countOf( output_cl_mem_buffers ), output_cl_mem_buffers, &outEvent );
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- // OpenCL state
- cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
- cl_int deviceId = 0;
- cl_int platformId = 0;
-
- // FFT state
-
- clfftResultLocation place = CLFFT_INPLACE;
- clfftLayout inLayout = CLFFT_COMPLEX_INTERLEAVED;
- clfftLayout outLayout = CLFFT_COMPLEX_INTERLEAVED;
- clfftPrecision precision = CLFFT_SINGLE;
- clfftDirection dir = CLFFT_FORWARD;
- size_t lengths[ 3 ] = {1,1,1};
- size_t iStrides[ 4 ] = {0,0,0,0};
- size_t oStrides[ 4 ] = {0,0,0,0};
- cl_uint profile_count = 0;
-
- cl_uint command_queue_flags = 0;
- size_t batchSize = 1;
-
- //callback
- bool hasPrecallback = true;
-
- // Initialize flags for FFT library
- std::auto_ptr< clfftSetupData > setupData( new clfftSetupData );
- OPENCL_V_THROW( clfftInitSetupData( setupData.get( ) ),
- "clfftInitSetupData failed" );
-
- try
- {
- // Declare the supported options.
- po::options_description desc( "clFFT client command line options" );
- desc.add_options()
- ( "help,h", "produces this help message" )
- ( "gpu,g", "Force selection of OpenCL GPU devices only" )
- ( "cpu,c", "Force selection of OpenCL CPU devices only" )
- ( "all,a", "Force selection of all OpenCL devices (default)" )
- ( "outPlace,o", "Out of place FFT transform (default: in place)" )
- ( "double", "Double precision transform (default: single)" )
- ( "inv", "Backward transform (default: forward)" )
- ( "dumpKernels,d", "FFT engine will dump generated OpenCL FFT kernels to disk (default: dump off)" )
- ( "noprecall", "Disable Precallback (default: precallback on)" )
- ( "lenX,x", po::value< size_t >( &lengths[ 0 ] )->default_value( 1024 ), "Specify the length of the 1st dimension of a test array" )
- ( "lenY,y", po::value< size_t >( &lengths[ 1 ] )->default_value( 1 ), "Specify the length of the 2nd dimension of a test array" )
- ( "lenZ,z", po::value< size_t >( &lengths[ 2 ] )->default_value( 1 ), "Specify the length of the 3rd dimension of a test array" )
- ( "isX", po::value< size_t >( &iStrides[ 0 ] )->default_value( 1 ), "Specify the input stride of the 1st dimension of a test array" )
- ( "isY", po::value< size_t >( &iStrides[ 1 ] )->default_value( 0 ), "Specify the input stride of the 2nd dimension of a test array" )
- ( "isZ", po::value< size_t >( &iStrides[ 2 ] )->default_value( 0 ), "Specify the input stride of the 3rd dimension of a test array" )
- ( "iD", po::value< size_t >( &iStrides[ 3 ] )->default_value( 0 ), "input distance between subsequent sets of data when batch size > 1" )
- ( "osX", po::value< size_t >( &oStrides[ 0 ] )->default_value( 1 ), "Specify the output stride of the 1st dimension of a test array" )
- ( "osY", po::value< size_t >( &oStrides[ 1 ] )->default_value( 0 ), "Specify the output stride of the 2nd dimension of a test array" )
- ( "osZ", po::value< size_t >( &oStrides[ 2 ] )->default_value( 0 ), "Specify the output stride of the 3rd dimension of a test array" )
- ( "oD", po::value< size_t >( &oStrides[ 3 ] )->default_value( 0 ), "output distance between subsequent sets of data when batch size > 1" )
- ( "batchSize,b", po::value< size_t >( &batchSize )->default_value( 1 ), "If this value is greater than one, arrays will be used " )
- ( "profile,p", po::value< cl_uint >( &profile_count )->default_value( 1 ), "Time and report the kernel speed of the FFT (default: profiling off)" )
- ( "inLayout", po::value< clfftLayout >( &inLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar\n3) hermitian interleaved\n4) hermitian planar\n5) real" )
- ( "outLayout", po::value< clfftLayout >( &outLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar\n3) hermitian interleaved\n4) hermitian planar\n5) real" )
- ;
-
- po::variables_map vm;
- po::store( po::parse_command_line( argc, argv, desc ), vm );
- po::notify( vm );
-
- if( vm.count( "help" ) )
- {
- std::cout << desc << std::endl;
- return 0;
- }
-
- size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0)
- | ((vm.count( "cpu" ) > 0) ? 2 : 0)
- | ((vm.count( "all" ) > 0) ? 4 : 0);
- if ((mutex & (mutex-1)) != 0) {
- terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl;
- if (vm.count ( "gpu" ) > 0) terr << _T(" gpu,g Force selection of OpenCL GPU devices only" ) << std::endl;
- if (vm.count ( "cpu" ) > 0) terr << _T(" cpu,c Force selection of OpenCL CPU devices only" ) << std::endl;
- if (vm.count ( "all" ) > 0) terr << _T(" all,a Force selection of all OpenCL devices (default)" ) << std::endl;
- return 1;
- }
-
- if( vm.count( "gpu" ) )
- {
- deviceType = CL_DEVICE_TYPE_GPU;
- }
-
- if( vm.count( "cpu" ) )
- {
- deviceType = CL_DEVICE_TYPE_CPU;
- }
-
- if( vm.count( "all" ) )
- {
- deviceType = CL_DEVICE_TYPE_ALL;
- }
-
- if( vm.count( "outPlace" ) )
- {
- place = CLFFT_OUTOFPLACE;
- }
-
- if( vm.count( "double" ) )
- {
- precision = CLFFT_DOUBLE;
- }
-
- if( vm.count( "inv" ) )
- {
- dir = CLFFT_BACKWARD;
- }
-
- if( profile_count > 1 )
- {
- command_queue_flags |= CL_QUEUE_PROFILING_ENABLE;
- }
-
- if( vm.count( "dumpKernels" ) )
- {
- setupData->debugFlags |= CLFFT_DUMP_PROGRAMS;
- }
-
- if( vm.count( "noprecall" ) )
- {
- hasPrecallback = false;
- }
-
- int inL = (int)inLayout;
- int otL = (int)outLayout;
-
- // input output layout support matrix
- int ioLayoutSupport[5][5] = {
- { 1, 1, 0, 0, 1 },
- { 1, 1, 0, 0, 1 },
- { 0, 0, 0, 0, 1 },
- { 0, 0, 0, 0, 1 },
- { 1, 1, 1, 1, 0 },
- };
-
- if((inL < 1) || (inL > 5)) throw std::runtime_error( "Invalid Input layout format" );
- if((otL < 1) || (otL > 5)) throw std::runtime_error( "Invalid Output layout format" );
-
- if(ioLayoutSupport[inL-1][otL-1] == 0) throw std::runtime_error( "Invalid combination of Input/Output layout formats" );
-
- if( ((inL == 1) || (inL == 2)) && ((otL == 1) || (otL == 2)) ) // Complex-Complex cases
- {
- iStrides[1] = iStrides[1] ? iStrides[1] : lengths[0] * iStrides[0];
- iStrides[2] = iStrides[2] ? iStrides[2] : lengths[1] * iStrides[1];
- iStrides[3] = iStrides[3] ? iStrides[3] : lengths[2] * iStrides[2];
-
- if(place == CLFFT_INPLACE)
- {
- oStrides[0] = iStrides[0];
- oStrides[1] = iStrides[1];
- oStrides[2] = iStrides[2];
- oStrides[3] = iStrides[3];
- }
- else
- {
- oStrides[1] = oStrides[1] ? oStrides[1] : lengths[0] * oStrides[0];
- oStrides[2] = oStrides[2] ? oStrides[2] : lengths[1] * oStrides[1];
- oStrides[3] = oStrides[3] ? oStrides[3] : lengths[2] * oStrides[2];
- }
- }
- else // Real-Complex and Complex-Real cases
- {
- size_t *rst, *cst;
- size_t N = lengths[0];
- size_t Nt = 1 + lengths[0]/2;
- bool iflag = false;
- bool rcFull = (inL == 1) || (inL == 2) || (otL == 1) || (otL == 2);
-
- if(inLayout == CLFFT_REAL) { iflag = true; rst = iStrides; }
- else { rst = oStrides; } // either in or out should be REAL
-
- // Set either in or out strides whichever is real
- if(place == CLFFT_INPLACE)
- {
- if(rcFull) { rst[1] = rst[1] ? rst[1] : N * 2 * rst[0]; }
- else { rst[1] = rst[1] ? rst[1] : Nt * 2 * rst[0]; }
-
- rst[2] = rst[2] ? rst[2] : lengths[1] * rst[1];
- rst[3] = rst[3] ? rst[3] : lengths[2] * rst[2];
- }
- else
- {
- rst[1] = rst[1] ? rst[1] : lengths[0] * rst[0];
- rst[2] = rst[2] ? rst[2] : lengths[1] * rst[1];
- rst[3] = rst[3] ? rst[3] : lengths[2] * rst[2];
- }
-
- // Set the remaining of in or out strides that is not real
- if(iflag) { cst = oStrides; }
- else { cst = iStrides; }
-
- if(rcFull) { cst[1] = cst[1] ? cst[1] : N * cst[0]; }
- else { cst[1] = cst[1] ? cst[1] : Nt * cst[0]; }
-
- cst[2] = cst[2] ? cst[2] : lengths[1] * cst[1];
- cst[3] = cst[3] ? cst[3] : lengths[2] * cst[2];
- }
-
- if( precision == CLFFT_SINGLE )
- transform<float>( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceId, platformId, false, command_queue_flags, profile_count, setupData, hasPrecallback );
- else
- transform<double>( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceId, platformId, false, command_queue_flags, profile_count, setupData, hasPrecallback );
- }
- catch( std::exception& e )
- {
- terr << _T( "clFFT error condition reported:" ) << std::endl << e.what() << std::endl;
- return 1;
- }
- return 0;
-}
diff --git a/src/client-callback/client.h b/src/client-callback/client.h
deleted file mode 100644
index a1e100d..0000000
--- a/src/client-callback/client.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-#pragma once
-#if !defined( CLIENT_H )
-#define CLIENT_H
-
-// Boost headers that we want to use
-// #define BOOST_PROGRAM_OPTIONS_DYN_LINK
-#include <boost/program_options.hpp>
-
-#define CALLBCKSTR(...) #__VA_ARGS__
-#define STRINGIFY(...) CALLBCKSTR(__VA_ARGS__)
-
-#ifdef WIN32
-
-struct Timer
-{
- LARGE_INTEGER start, stop, freq;
-
-public:
- Timer() { QueryPerformanceFrequency( &freq ); }
-
- void Start() { QueryPerformanceCounter(&start); }
- double Sample()
- {
- QueryPerformanceCounter ( &stop );
- double time = (double)(stop.QuadPart-start.QuadPart) / (double)(freq.QuadPart);
- return time;
- }
-};
-
-#else
-
-#include <time.h>
-#include <math.h>
-
-struct Timer
-{
- struct timespec start, end;
-
-public:
- Timer() { }
-
- void Start() { clock_gettime(CLOCK_MONOTONIC, &start); }
- double Sample()
- {
- clock_gettime(CLOCK_MONOTONIC, &end);
- double time = 1000000000L * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
- return time * 1E-9;
- }
-};
-
-#endif
-
-#endif
diff --git a/src/client-callback/openCL.misc.cpp b/src/client-callback/openCL.misc.cpp
deleted file mode 100644
index cb5db29..0000000
--- a/src/client-callback/openCL.misc.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-// clfft.opencl.cpp : Provides functions to set up openCL
-//
-
-#include "stdafx.h"
-#include <stdexcept>
-#include <iomanip>
-#include <sstream>
-#include <cstring>
-#include <vector>
-#include "clFFT.h"
-#include "openCL.misc.h"
-
-
-
-void prettyPrintPlatformInfo( const cl_platform_id& pId )
-{
- size_t platformProfileSize = 0;
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_PROFILE, 0, NULL, &platformProfileSize ),
- "Getting CL_PLATFORM_PROFILE Platform Info string size ( ::clGetPlatformInfo() )" );
-
- std::vector< char > szPlatformProfile( platformProfileSize );
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_PROFILE, platformProfileSize, &szPlatformProfile[ 0 ], NULL),
- "Getting CL_PLATFORM_PROFILE Platform Info string ( ::clGetPlatformInfo() )" );
-
- size_t platformVersionSize = 0;
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VERSION, 0, NULL, &platformVersionSize ),
- "Getting CL_PLATFORM_VERSION Platform Info string size ( ::clGetPlatformInfo() )" );
-
- std::vector< char > szPlatformVersion( platformVersionSize );
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VERSION, platformVersionSize, &szPlatformVersion[ 0 ], NULL),
- "Getting CL_PLATFORM_VERSION Platform Info string ( ::clGetPlatformInfo() )" );
-
- size_t platformNameSize = 0;
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_NAME, 0, NULL, &platformNameSize ),
- "Getting CL_PLATFORM_NAME Platform Info string size ( ::clGetPlatformInfo() )" );
-
- std::vector< char > szPlatformName( platformNameSize );
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_NAME, platformNameSize, &szPlatformName[ 0 ], NULL),
- "Getting CL_PLATFORM_NAME Platform Info string ( ::clGetPlatformInfo() )" );
-
- size_t vendorStringSize = 0;
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VENDOR, 0, NULL, &vendorStringSize ),
- "Getting CL_PLATFORM_VENDOR Platform Info string size ( ::clGetPlatformInfo() )" );
-
- std::vector< char > szPlatformVendor( vendorStringSize );
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VENDOR, vendorStringSize, &szPlatformVendor[ 0 ], NULL),
- "Getting CL_PLATFORM_VENDOR Platform Info string ( ::clGetPlatformInfo() )" );
-
- size_t platformExtensionsSize = 0;
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_EXTENSIONS, 0, NULL, &platformExtensionsSize ),
- "Getting CL_PLATFORM_EXTENSIONS Platform Info string size ( ::clGetPlatformInfo() )" );
-
- std::vector< char > szPlatformExtensions( platformExtensionsSize );
- OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_EXTENSIONS, platformExtensionsSize, &szPlatformExtensions[ 0 ], NULL),
- "Getting CL_PLATFORM_EXTENSIONS Platform Info string ( ::clGetPlatformInfo() )" );
-
- const int indent = countOf( " CL_PLATFORM_EXTENSIONS: " );
- std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_PROFILE: " << &szPlatformProfile[ 0 ] << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_VERSION: " << &szPlatformVersion[ 0 ] << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_NAME: " << &szPlatformName[ 0 ] << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_VENDOR: " << &szPlatformVendor[ 0 ] << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_EXTENSIONS: " << &szPlatformExtensions[ 0 ] << std::endl;
- std::cout << std::right << std::endl;
-}
-
-void prettyPrintDeviceInfo( const cl_device_id& dId )
-{
- size_t deviceNameSize = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_NAME, 0, NULL, &deviceNameSize ),
- "Getting CL_DEVICE_NAME Platform Info string size ( ::clGetDeviceInfo() )" );
-
- std::vector< char > szDeviceName( deviceNameSize );
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_NAME, deviceNameSize, &szDeviceName[ 0 ], NULL ),
- "Getting CL_DEVICE_NAME Platform Info string ( ::clGetDeviceInfo() )" );
-
- size_t deviceVersionSize = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ),
- "Getting CL_DEVICE_VERSION Platform Info string size ( ::clGetDeviceInfo() )" );
-
- std::vector< char > szDeviceVersion( deviceVersionSize );
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ),
- "Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" );
-
- size_t driverVersionSize = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DRIVER_VERSION, 0, NULL, &driverVersionSize ),
- "Getting CL_DRIVER_VERSION Platform Info string size ( ::clGetDeviceInfo() )" );
-
- std::vector< char > szDriverVersion( driverVersionSize );
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DRIVER_VERSION, driverVersionSize, &szDriverVersion[ 0 ], NULL ),
- "Getting CL_DRIVER_VERSION Platform Info string ( ::clGetDeviceInfo() )" );
-
- size_t openCLVersionSize = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &openCLVersionSize ),
- "Getting CL_DEVICE_OPENCL_C_VERSION Platform Info string size ( ::clGetDeviceInfo() )" );
-
- std::vector< char > szOpenCLVersion( openCLVersionSize );
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_OPENCL_C_VERSION, openCLVersionSize, &szOpenCLVersion[ 0 ], NULL ),
- "Getting CL_DEVICE_OPENCL_C_VERSION Platform Info string ( ::clGetDeviceInfo() )" );
-
- cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_TYPE, sizeof( cl_device_type ), &devType, NULL ),
- "Getting CL_DEVICE_TYPE device info ( ::clGetDeviceInfo() )" );
-
- cl_uint devAddrBits = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_ADDRESS_BITS, sizeof( cl_uint ), &devAddrBits, NULL ),
- "Getting CL_DEVICE_ADDRESS_BITS device info ( ::clGetDeviceInfo() )" );
-
- cl_uint maxClockFreq = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof( cl_uint ), &maxClockFreq, NULL ),
- "Getting CL_DEVICE_MAX_CLOCK_FREQUENCY device info ( ::clGetDeviceInfo() )" );
-
- cl_bool devAvailable = CL_FALSE;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_AVAILABLE, sizeof( cl_bool ), &devAvailable, NULL ),
- "Getting CL_DEVICE_AVAILABLE device info ( ::clGetDeviceInfo() )" );
-
- cl_bool devCompAvailable = CL_FALSE;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_COMPILER_AVAILABLE, sizeof( cl_bool ), &devCompAvailable, NULL ),
- "Getting CL_DEVICE_COMPILER_AVAILABLE device info ( ::clGetDeviceInfo() )" );
-
- size_t devMaxWorkGroup = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size_t ), &devMaxWorkGroup, NULL ),
- "Getting CL_DEVICE_MAX_WORK_GROUP_SIZE device info ( ::clGetDeviceInfo() )" );
-
- cl_uint devMaxWorkItemDim = CL_FALSE;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &devMaxWorkItemDim, NULL ),
- "Getting CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS device info ( ::clGetDeviceInfo() )" );
-
- std::vector< size_t > devMaxWorkItemSizes( devMaxWorkItemDim );
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( size_t )*devMaxWorkItemSizes.size( ), &devMaxWorkItemSizes[0], NULL),
- "Getting CL_DEVICE_MAX_WORK_ITEM_SIZES device info ( ::clGetDeviceInfo() )" );
-
- cl_bool deviceHostUnified = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( cl_bool ), &deviceHostUnified, NULL ),
- "Getting CL_DEVICE_HOST_UNIFIED_MEMORY Platform Info string ( ::clGetDeviceInfo() )" );
-
- cl_ulong devMaxConstantBuffer = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( cl_ulong ), &devMaxConstantBuffer, NULL ),
- "Getting CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE device info ( ::clGetDeviceInfo() )" );
-
- cl_ulong devLocalMemSize = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( cl_ulong ), &devLocalMemSize, NULL ),
- "Getting CL_DEVICE_LOCAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" );
-
- cl_ulong deviceGlobalMemSize = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( cl_ulong ), &deviceGlobalMemSize, NULL ),
- "Getting CL_DEVICE_GLOBAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" );
-
- cl_ulong deviceMaxMemAllocSize = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( cl_ulong ), &deviceMaxMemAllocSize, NULL ),
- "Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )" );
-
- size_t deviceExtSize = 0;
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_EXTENSIONS, 0, NULL, &deviceExtSize ),
- "Getting CL_DEVICE_EXTENSIONS Platform Info string size ( ::clGetDeviceInfo() )" );
-
- std::vector< char > szDeviceExt( deviceExtSize );
- OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_EXTENSIONS, deviceExtSize, &szDeviceExt[ 0 ], NULL ),
- "Getting CL_DEVICE_EXTENSIONS Platform Info string ( ::clGetDeviceInfo() )" );
-
- const int indent = countOf( " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " );
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_NAME: " << &szDeviceName[ 0 ] << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_VERSION: " << &szDeviceVersion[ 0 ] << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DRIVER_VERSION: " << &szDriverVersion[ 0 ] << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_TYPE: "
- << (CL_DEVICE_TYPE_DEFAULT & devType ? "default" : "")
- << (CL_DEVICE_TYPE_CPU & devType ? "CPU" : "")
- << (CL_DEVICE_TYPE_GPU & devType ? "GPU" : "")
- << (CL_DEVICE_TYPE_ACCELERATOR & devType ? "Accelerator" : "")
- << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_CLOCK_FREQUENCY: " << maxClockFreq << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_ADDRESS_BITS: " << devAddrBits << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_AVAILABLE: " << ( devAvailable ? "TRUE": "FALSE") << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_COMPILER_AVAILABLE: " << ( devCompAvailable ? "TRUE": "FALSE") << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_OPENCL_C_VERSION: " << &szOpenCLVersion[ 0 ] << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_WORK_GROUP_SIZE: " << devMaxWorkGroup << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " << devMaxWorkItemDim << std::endl;
- for( cl_uint wis = 0; wis < devMaxWorkItemSizes.size( ); ++wis )
- {
- std::stringstream dimString;
- dimString << "Dimension[ " << wis << " ] ";
- std::cout << std::right << std::setw( indent ) << dimString.str( ) << devMaxWorkItemSizes[wis] << std::endl;
- }
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_HOST_UNIFIED_MEMORY: " << ( deviceHostUnified ? "TRUE": "FALSE") << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: " << devMaxConstantBuffer;
- std::cout << " ( " << devMaxConstantBuffer / 1024 << " KB )" << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_LOCAL_MEM_SIZE: " << devLocalMemSize;
- std::cout << " ( " << devLocalMemSize / 1024 << " KB )" << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_GLOBAL_MEM_SIZE: " << deviceGlobalMemSize;
- std::cout << " ( " << deviceGlobalMemSize / 1048576 << " MB )" << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_MEM_ALLOC_SIZE: " << deviceMaxMemAllocSize;
- std::cout << " ( " << deviceMaxMemAllocSize / 1048576 << " MB )" << std::endl;
- std::cout << std::left << std::setw( indent ) << " CL_DEVICE_EXTENSIONS: " << &szDeviceExt[ 0 ] << std::endl;
-
- std::cout << std::right << std::endl;
-}
-
-void prettyPrintCLPlatforms(std::vector< cl_platform_id >& platforms,
- std::vector< std::vector< cl_device_id > >& devices)
-{
- for (unsigned int i = 0; i < platforms.size(); ++i)
- {
- std::cout << "OpenCL platform [ " << i << " ]:" << std::endl;
- prettyPrintPlatformInfo(platforms[i]);
-
- for (unsigned int n = 0; n < devices[i].size(); ++n)
- {
- std::cout << "OpenCL platform [ " << i << " ], device [ " << n << " ]:" << std::endl;
- prettyPrintDeviceInfo((devices[i])[n]);
- }
- }
-
-}
-
-// Verify a failed condition; return true on fail
-inline cl_bool OPENCL_V_FAIL( cl_int res )
-{
- if( res == CL_SUCCESS )
- return CL_FALSE;
- else
- return CL_TRUE;
-}
-
-std::string prettyPrintclFFTStatus( const cl_int& status )
-{
- switch( status )
- {
- case CLFFT_INVALID_GLOBAL_WORK_SIZE:
- return "CLFFT_INVALID_GLOBAL_WORK_SIZE";
- case CLFFT_INVALID_MIP_LEVEL:
- return "CLFFT_INVALID_MIP_LEVEL";
- case CLFFT_INVALID_BUFFER_SIZE:
- return "CLFFT_INVALID_BUFFER_SIZE";
- case CLFFT_INVALID_GL_OBJECT:
- return "CLFFT_INVALID_GL_OBJECT";
- case CLFFT_INVALID_OPERATION:
- return "CLFFT_INVALID_OPERATION";
- case CLFFT_INVALID_EVENT:
- return "CLFFT_INVALID_EVENT";
- case CLFFT_INVALID_EVENT_WAIT_LIST:
- return "CLFFT_INVALID_EVENT_WAIT_LIST";
- case CLFFT_INVALID_GLOBAL_OFFSET:
- return "CLFFT_INVALID_GLOBAL_OFFSET";
- case CLFFT_INVALID_WORK_ITEM_SIZE:
- return "CLFFT_INVALID_WORK_ITEM_SIZE";
- case CLFFT_INVALID_WORK_GROUP_SIZE:
- return "CLFFT_INVALID_WORK_GROUP_SIZE";
- case CLFFT_INVALID_WORK_DIMENSION:
- return "CLFFT_INVALID_WORK_DIMENSION";
- case CLFFT_INVALID_KERNEL_ARGS:
- return "CLFFT_INVALID_KERNEL_ARGS";
- case CLFFT_INVALID_ARG_SIZE:
- return "CLFFT_INVALID_ARG_SIZE";
- case CLFFT_INVALID_ARG_VALUE:
- return "CLFFT_INVALID_ARG_VALUE";
- case CLFFT_INVALID_ARG_INDEX:
- return "CLFFT_INVALID_ARG_INDEX";
- case CLFFT_INVALID_KERNEL:
- return "CLFFT_INVALID_KERNEL";
- case CLFFT_INVALID_KERNEL_DEFINITION:
- return "CLFFT_INVALID_KERNEL_DEFINITION";
- case CLFFT_INVALID_KERNEL_NAME:
- return "CLFFT_INVALID_KERNEL_NAME";
- case CLFFT_INVALID_PROGRAM_EXECUTABLE:
- return "CLFFT_INVALID_PROGRAM_EXECUTABLE";
- case CLFFT_INVALID_PROGRAM:
- return "CLFFT_INVALID_PROGRAM";
- case CLFFT_INVALID_BUILD_OPTIONS:
- return "CLFFT_INVALID_BUILD_OPTIONS";
- case CLFFT_INVALID_BINARY:
- return "CLFFT_INVALID_BINARY";
- case CLFFT_INVALID_SAMPLER:
- return "CLFFT_INVALID_SAMPLER";
- case CLFFT_INVALID_IMAGE_SIZE:
- return "CLFFT_INVALID_IMAGE_SIZE";
- case CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR:
- return "CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR";
- case CLFFT_INVALID_MEM_OBJECT:
- return "CLFFT_INVALID_MEM_OBJECT";
- case CLFFT_INVALID_HOST_PTR:
- return "CLFFT_INVALID_HOST_PTR";
- case CLFFT_INVALID_COMMAND_QUEUE:
- return "CLFFT_INVALID_COMMAND_QUEUE";
- case CLFFT_INVALID_QUEUE_PROPERTIES:
- return "CLFFT_INVALID_QUEUE_PROPERTIES";
- case CLFFT_INVALID_CONTEXT:
- return "CLFFT_INVALID_CONTEXT";
- case CLFFT_INVALID_DEVICE:
- return "CLFFT_INVALID_DEVICE";
- case CLFFT_INVALID_PLATFORM:
- return "CLFFT_INVALID_PLATFORM";
- case CLFFT_INVALID_DEVICE_TYPE:
- return "CLFFT_INVALID_DEVICE_TYPE";
- case CLFFT_INVALID_VALUE:
- return "CLFFT_INVALID_VALUE";
- case CLFFT_MAP_FAILURE:
- return "CLFFT_MAP_FAILURE";
- case CLFFT_BUILD_PROGRAM_FAILURE:
- return "CLFFT_BUILD_PROGRAM_FAILURE";
- case CLFFT_IMAGE_FORMAT_NOT_SUPPORTED:
- return "CLFFT_IMAGE_FORMAT_NOT_SUPPORTED";
- case CLFFT_IMAGE_FORMAT_MISMATCH:
- return "CLFFT_IMAGE_FORMAT_MISMATCH";
- case CLFFT_MEM_COPY_OVERLAP:
- return "CLFFT_MEM_COPY_OVERLAP";
- case CLFFT_PROFILING_INFO_NOT_AVAILABLE:
- return "CLFFT_PROFILING_INFO_NOT_AVAILABLE";
- case CLFFT_OUT_OF_HOST_MEMORY:
- return "CLFFT_OUT_OF_HOST_MEMORY";
- case CLFFT_OUT_OF_RESOURCES:
- return "CLFFT_OUT_OF_RESOURCES";
- case CLFFT_MEM_OBJECT_ALLOCATION_FAILURE:
- return "CLFFT_MEM_OBJECT_ALLOCATION_FAILURE";
- case CLFFT_COMPILER_NOT_AVAILABLE:
- return "CLFFT_COMPILER_NOT_AVAILABLE";
- case CLFFT_DEVICE_NOT_AVAILABLE:
- return "CLFFT_DEVICE_NOT_AVAILABLE";
- case CLFFT_DEVICE_NOT_FOUND:
- return "CLFFT_DEVICE_NOT_FOUND";
- case CLFFT_SUCCESS:
- return "CLFFT_SUCCESS";
- case CLFFT_NOTIMPLEMENTED:
- return "CLFFT_NOTIMPLEMENTED";
- case CLFFT_TRANSPOSED_NOTIMPLEMENTED:
- return "CLFFT_TRANSPOSED_NOTIMPLEMENTED";
- case CLFFT_FILE_NOT_FOUND:
- return "CLFFT_FILE_NOT_FOUND";
- case CLFFT_FILE_CREATE_FAILURE:
- return "CLFFT_FILE_CREATE_FAILURE";
- case CLFFT_VERSION_MISMATCH:
- return "CLFFT_VERSION_MISMATCH";
- case CLFFT_INVALID_PLAN:
- return "CLFFT_INVALID_PLAN";
- default:
- return "Error code not defined";
- break;
- }
-}
-
-
-int discoverCLPlatforms( cl_device_type deviceType,
- std::vector< cl_platform_id >& platforms,
- std::vector< std::vector< cl_device_id > >& devices )
-{
- cl_int status = 0;
-
- /*
- * Find all OpenCL platforms this system has to offer.
- */
-
- cl_uint numPlatforms = 0;
- cl_platform_id platform = NULL;
- OPENCL_V_THROW(::clGetPlatformIDs(0, NULL, &numPlatforms),
- "Getting number of platforms( ::clGetPlatformsIDs() )");
-
- if (numPlatforms > 0)
- {
- platforms.resize( numPlatforms );
- devices.resize( numPlatforms );
- OPENCL_V_THROW(::clGetPlatformIDs(numPlatforms, &platforms[0], NULL),
- "Getting Platform Id's ( ::clGetPlatformsIDs() )");
-
- if (NULL == platforms[0])
- {
- throw std::runtime_error("No appropriate OpenCL platform could be found");
- }
-
- /*
- * Now, for each platform get all available devices matching deviceType.
- */
- for (unsigned int i = 0; i < numPlatforms; ++i)
- {
- // Get the device list for deviceType.
- //
- cl_uint numDevices = 0;
- OPENCL_V_WARN(::clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &numDevices),
- "Getting OpenCL devices ( ::clGetDeviceIDs() )");
- if (0 == numDevices)
- {
- // OPENCL_V_WARN(CLFFT_DEVICE_NOT_AVAILABLE, "No devices available");
- continue;
- }
-
- devices[i].resize(numDevices);
- OPENCL_V_THROW(::clGetDeviceIDs(platforms[i], deviceType, numDevices, &(devices[i])[0], NULL),
- "Getting OpenCL deviceIDs ( ::clGetDeviceIDs() )");
- }
- }
-
- return 0;
-}
-
-std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
- cl_int deviceId,
- cl_int platformId,
- cl_context& context,
- bool printclInfo )
-{
- cl_int status = 0;
- cl_platform_id platform = NULL;
- std::vector< cl_device_id > devices(1);
- devices[0] = NULL;
-
- // Have a look at all the available platforms on this system
- std::vector< cl_platform_id > platformInfos;
- std::vector< std::vector< cl_device_id > > deviceInfos;
- discoverCLPlatforms( deviceType, platformInfos, deviceInfos );
-
-
- for (unsigned int i = 0; i < platformInfos.size(); ++i)
- {
- if(i == platformId)
- {
- for (unsigned int n = 0; n < deviceInfos[i].size(); ++n)
- {
- if (n == deviceId)
- {
- platform = platformInfos[i];
- devices[0] = deviceInfos[i][n];
-
- if(printclInfo)
- {
- prettyPrintPlatformInfo(platform);
- prettyPrintDeviceInfo(devices[0]);
- }
-
- break;
- }
- }
-
- break;
- }
- }
-
-
-
- // Do some error checking if we really selected a valid platform and a valid device
- if (NULL == devices[0])
- {
- OPENCL_V_THROW(CLFFT_DEVICE_NOT_AVAILABLE, "No devices available");
- }
-
- if (NULL == platform)
- {
- throw std::runtime_error("No appropriate OpenCL platform could be found");
- }
-
- // Create an OpenCL context
- cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0 };
- context = clCreateContext(cps,
- (cl_uint)devices.size(),
- &devices[0],
- NULL,
- NULL,
- &status);
- OPENCL_V_THROW(status, "Creating Context ( ::clCreateContextFromType() )");
-
- return devices;
-}
-
-int cleanupCL( cl_context* context, cl_command_queue* commandQueue,
- const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent )
-{
- if( *outEvent != NULL )
- OPENCL_V_THROW( clReleaseEvent( *outEvent ), "Error: In clReleaseEvent\n" );
-
- releaseOpenCLMemBuffer( numBuffersIn, inputBuffer);
- releaseOpenCLMemBuffer( numBuffersOut, outputBuffer);
-
- if( *commandQueue != NULL )
- OPENCL_V_THROW( clReleaseCommandQueue( *commandQueue ), "Error: In clReleaseCommandQueue\n" );
-
- if( *context != NULL )
- OPENCL_V_THROW( clReleaseContext( *context ), "Error: In clReleaseContext\n" );
-
- return 0;
-}
-
-int createOpenCLMemoryBuffer( cl_context& context, const size_t bufferSizeBytes, const cl_uint numBuffers, cl_mem buffer[], cl_mem_flags accessibility) {
- cl_int status = 0;
-
- for( cl_uint i = 0; i < numBuffers; ++i )
- {
- buffer[ i ] = ::clCreateBuffer( context, accessibility, bufferSizeBytes, NULL, &status);
- OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" );
- }
-
- return 0;
-}
-
-int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[])
-{
- for( cl_uint i = 0; i < numBuffers; ++i )
- {
- if( buffer[ i ] != NULL )
- OPENCL_V_THROW( clReleaseMemObject( buffer[ i ] ), "Error: In clReleaseMemObject\n" );
- }
-
- return 0;
-}
-
-void createOpenCLCommandQueue( cl_context& context,
- cl_uint commandQueueFlags,
- cl_command_queue& commandQueue,
- std::vector< cl_device_id > devices,
- const size_t bufferSizeBytesIn,
- const cl_uint numBuffersIn,
- cl_mem clMemBufferIn[],
- const size_t bufferSizeBytesOut,
- const cl_uint numBuffersOut,
- cl_mem clMemBufferOut[] )
-{
- cl_int status = 0;
- commandQueue = ::clCreateCommandQueue( context, devices[0], commandQueueFlags, &status );
- OPENCL_V_THROW( status, "Creating Command Queue ( ::clCreateCommandQueue() )" );
-
- createOpenCLMemoryBuffer( context, bufferSizeBytesIn, numBuffersIn, clMemBufferIn, CL_MEM_READ_WRITE);
- createOpenCLMemoryBuffer( context, bufferSizeBytesOut, numBuffersOut, clMemBufferOut, CL_MEM_READ_WRITE);
-}
-
diff --git a/src/client-callback/openCL.misc.h b/src/client-callback/openCL.misc.h
deleted file mode 100644
index 07cc416..0000000
--- a/src/client-callback/openCL.misc.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-#pragma once
-#if !defined( OPENCL_MISC_H )
-#define OPENCL_MISC_H
-#include <memory>
-#include <stdexcept>
-#include "unicode.compatibility.h"
-
-// Creating a portable defintion of countof
-#if defined( _MSC_VER )
- #define countOf _countof
-#else
- #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) )
-#endif
-
-/*
- * \brief OpenCL platform and device discovery
- * Creates a list of OpenCL platforms
- * and their associated devices
- */
-int discoverCLPlatforms( cl_device_type deviceType,
- std::vector< cl_platform_id >& platforms,
- std::vector< std::vector< cl_device_id > >& devices );
-
-void prettyPrintCLPlatforms(std::vector< cl_platform_id >& platforms,
- std::vector< std::vector< cl_device_id > >& devices);
-
-/*
- * \brief OpenCL related initialization
- * Create Context, Device list
- * Load CL file, compile, link CL source
- * Build program and kernel objects
- */
-std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
- cl_int deviceId,
- cl_int platformId,
- cl_context& context,
- bool printclInfo );
-
-/*
- * \brief OpenCL memory buffer creation
- */
-int createOpenCLMemoryBuffer(
- cl_context& context,
- const size_t bufferSizeBytes,
- const cl_uint numBuffers,
- cl_mem buffer[],
- cl_mem_flags accessibility
- );
-
-/*
- * \brief OpenCL command queue creation
- * Create Command Queue
- * Create OpenCL memory buffer objects
- */
-void createOpenCLCommandQueue( cl_context& context,
- cl_uint commandQueueFlags,
- cl_command_queue& commandQueue,
- std::vector< cl_device_id > devices,
- const size_t bufferSizeBytesIn,
- const cl_uint numBuffersIn,
- cl_mem clMemBufferIn[],
- const size_t bufferSizeBytesOut,
- const cl_uint numBuffersOut,
- cl_mem clMemBufferOut[] );
-
-/*
- * \brief release OpenCL memory buffer
- */
-int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[] );
-
-std::string prettyPrintclFFTStatus( const cl_int& status );
-
-// This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition.
-// If an error occurs, we throw.
-// Note: std::runtime_error does not take unicode strings as input, so only strings supported
-inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno )
-{
- switch( res )
- {
- case CL_SUCCESS: /**< No error */
- break;
- default:
- {
- std::stringstream tmp;
- tmp << "OPENCL_V_THROWERROR< ";
- tmp << prettyPrintclFFTStatus( res );
- tmp << " > (";
- tmp << lineno;
- tmp << "): ";
- tmp << msg;
- std::string errorm (tmp.str());
- std::cout << errorm<< std::endl;
- throw std::runtime_error( errorm );
- }
- }
-
- return res;
-}
-#define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__)
-
-inline cl_int OpenCL_V_Warn(cl_int res, const std::string& msg, size_t lineno)
-{
- switch (res)
- {
- case CL_SUCCESS: /**< No error */
- break;
- case CL_DEVICE_NOT_FOUND:
- // This happens all the time when discovering the OpenCL capabilities of the system,
- // so do nothing here.
- break;
- default:
- {
- std::stringstream tmp;
- tmp << "OPENCL_V_WARN< ";
- tmp << prettyPrintclFFTStatus(res);
- tmp << " > (";
- tmp << lineno;
- tmp << "): ";
- tmp << msg;
- std::string errorm(tmp.str());
- std::cout << errorm << std::endl;
- }
- }
-
- return res;
-}
-#define OPENCL_V_WARN(_status,_message) OpenCL_V_Warn (_status, _message, __LINE__);
-
-/*
- * \brief Release OpenCL resources (Context, Memory etc.)
- */
-int cleanupCL( cl_context* context, cl_command_queue* commandQueue, const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent );
-
-#endif
diff --git a/src/client-callback/stdafx.cpp b/src/client-callback/stdafx.cpp
deleted file mode 100644
index 2587b2c..0000000
--- a/src/client-callback/stdafx.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-// stdafx.cpp : source file that includes just the standard includes
-// clFFT.pch will be the pre-compiled header
-// stdafx.obj will contain the pre-compiled type information
-
-#include "stdafx.h"
-
-// TODO: reference any additional headers you need in STDAFX.H
-// and not in this file
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list