[arrayfire] 264/408: Moved OpenCL's conv2Helper to kernel directory
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:11 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 3ada93a102aeae15a57f62167cb0e01415e491e0
Author: Peter Andreas Entschev <peter at arrayfire.com>
Date: Fri Aug 14 14:09:14 2015 -0400
Moved OpenCL's conv2Helper to kernel directory
---
src/backend/opencl/convolve_separable.cpp | 44 +------
src/backend/opencl/kernel/convolve_separable.cpp | 151 +++++++++++++++++++++++
src/backend/opencl/kernel/convolve_separable.hpp | 78 +-----------
3 files changed, 157 insertions(+), 116 deletions(-)
diff --git a/src/backend/opencl/convolve_separable.cpp b/src/backend/opencl/convolve_separable.cpp
index fa2e8f0..3bd304e 100644
--- a/src/backend/opencl/convolve_separable.cpp
+++ b/src/backend/opencl/convolve_separable.cpp
@@ -20,44 +20,6 @@ using af::dim4;
namespace opencl
{
-template<typename T, typename accT, dim_t cDim, bool expand>
-void conv2Helper(Array<T>& out, const Array<T>& sig, const Array<accT>& filt, dim_t f)
-{
- switch(f) {
- case 2: kernel::convolve2<T, accT, cDim, expand, 2>(out, sig, filt); break;
- case 3: kernel::convolve2<T, accT, cDim, expand, 3>(out, sig, filt); break;
- case 4: kernel::convolve2<T, accT, cDim, expand, 4>(out, sig, filt); break;
- case 5: kernel::convolve2<T, accT, cDim, expand, 5>(out, sig, filt); break;
- case 6: kernel::convolve2<T, accT, cDim, expand, 6>(out, sig, filt); break;
- case 7: kernel::convolve2<T, accT, cDim, expand, 7>(out, sig, filt); break;
- case 8: kernel::convolve2<T, accT, cDim, expand, 8>(out, sig, filt); break;
- case 9: kernel::convolve2<T, accT, cDim, expand, 9>(out, sig, filt); break;
- case 10: kernel::convolve2<T, accT, cDim, expand, 10>(out, sig, filt); break;
- case 11: kernel::convolve2<T, accT, cDim, expand, 11>(out, sig, filt); break;
- case 12: kernel::convolve2<T, accT, cDim, expand, 12>(out, sig, filt); break;
- case 13: kernel::convolve2<T, accT, cDim, expand, 13>(out, sig, filt); break;
- case 14: kernel::convolve2<T, accT, cDim, expand, 14>(out, sig, filt); break;
- case 15: kernel::convolve2<T, accT, cDim, expand, 15>(out, sig, filt); break;
- case 16: kernel::convolve2<T, accT, cDim, expand, 16>(out, sig, filt); break;
- case 17: kernel::convolve2<T, accT, cDim, expand, 17>(out, sig, filt); break;
- case 18: kernel::convolve2<T, accT, cDim, expand, 18>(out, sig, filt); break;
- case 19: kernel::convolve2<T, accT, cDim, expand, 19>(out, sig, filt); break;
- case 20: kernel::convolve2<T, accT, cDim, expand, 20>(out, sig, filt); break;
- case 21: kernel::convolve2<T, accT, cDim, expand, 21>(out, sig, filt); break;
- case 22: kernel::convolve2<T, accT, cDim, expand, 22>(out, sig, filt); break;
- case 23: kernel::convolve2<T, accT, cDim, expand, 23>(out, sig, filt); break;
- case 24: kernel::convolve2<T, accT, cDim, expand, 24>(out, sig, filt); break;
- case 25: kernel::convolve2<T, accT, cDim, expand, 25>(out, sig, filt); break;
- case 26: kernel::convolve2<T, accT, cDim, expand, 26>(out, sig, filt); break;
- case 27: kernel::convolve2<T, accT, cDim, expand, 27>(out, sig, filt); break;
- case 28: kernel::convolve2<T, accT, cDim, expand, 28>(out, sig, filt); break;
- case 29: kernel::convolve2<T, accT, cDim, expand, 29>(out, sig, filt); break;
- case 30: kernel::convolve2<T, accT, cDim, expand, 30>(out, sig, filt); break;
- case 31: kernel::convolve2<T, accT, cDim, expand, 31>(out, sig, filt); break;
- default: OPENCL_NOT_SUPPORTED();
- }
-}
-
template<typename T, typename accT, bool expand>
Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<accT> const& r_filter)
{
@@ -65,7 +27,7 @@ Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<ac
const dim_t rflen = (dim_t)r_filter.elements();
if ((cflen > kernel::MAX_SCONV_FILTER_LEN) ||
- (rflen > kernel::MAX_SCONV_FILTER_LEN)) {
+ (rflen > kernel::MAX_SCONV_FILTER_LEN)) {
// call upon fft
OPENCL_NOT_SUPPORTED();
}
@@ -83,8 +45,8 @@ Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<ac
Array<T> temp= createEmptyArray<T>(tDims);
Array<T> out = createEmptyArray<T>(oDims);
- conv2Helper<T, accT, 0, expand>(temp, signal, c_filter, cflen);
- conv2Helper<T, accT, 1, expand>( out, temp, r_filter, rflen);
+ kernel::conv2Helper<T, accT, 0, expand>(temp, signal, c_filter, cflen);
+ kernel::conv2Helper<T, accT, 1, expand>( out, temp, r_filter, rflen);
return out;
}
diff --git a/src/backend/opencl/kernel/convolve_separable.cpp b/src/backend/opencl/kernel/convolve_separable.cpp
new file mode 100644
index 0000000..f37ef18
--- /dev/null
+++ b/src/backend/opencl/kernel/convolve_separable.cpp
@@ -0,0 +1,151 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <kernel_headers/convolve_separable.hpp>
+#include <program.hpp>
+#include <traits.hpp>
+#include <string>
+#include <mutex>
+#include <map>
+#include <dispatch.hpp>
+#include <Param.hpp>
+#include <debug_opencl.hpp>
+#include <memory.hpp>
+
+using cl::Buffer;
+using cl::Program;
+using cl::Kernel;
+using cl::make_kernel;
+using cl::EnqueueArgs;
+using cl::NDRange;
+using std::string;
+
+namespace opencl
+{
+
+namespace kernel
+{
+
+static const int THREADS_X = 16;
+static const int THREADS_Y = 16;
+
+template<typename T, typename accType, int conv_dim, bool expand, int fLen>
+void convolve2(Param out, const Param signal, const Param filter)
+{
+ try {
+ static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
+ static std::map<int, Program*> convProgs;
+ static std::map<int, Kernel*> convKernels;
+
+ int device = getActiveDeviceId();
+
+ std::call_once( compileFlags[device], [device] () {
+ const size_t C0_SIZE = (THREADS_X+2*(fLen-1))* THREADS_Y;
+ const size_t C1_SIZE = (THREADS_Y+2*(fLen-1))* THREADS_X;
+
+ size_t locSize = (conv_dim==0 ? C0_SIZE : C1_SIZE);
+
+ std::ostringstream options;
+ options << " -D T=" << dtype_traits<T>::getName()
+ << " -D accType="<< dtype_traits<accType>::getName()
+ << " -D CONV_DIM="<< conv_dim
+ << " -D EXPAND="<< expand
+ << " -D FLEN="<< fLen
+ << " -D LOCAL_MEM_SIZE="<<locSize;
+ if (std::is_same<T, double>::value ||
+ std::is_same<T, cdouble>::value) {
+ options << " -D USE_DOUBLE";
+ }
+ Program prog;
+ buildProgram(prog, convolve_separable_cl, convolve_separable_cl_len, options.str());
+ convProgs[device] = new Program(prog);
+ convKernels[device] = new Kernel(*convProgs[device], "convolve");
+ });
+
+ auto convOp = make_kernel<Buffer, KParam, Buffer, KParam, Buffer,
+ int, int>(*convKernels[device]);
+
+ NDRange local(THREADS_X, THREADS_Y);
+
+ int blk_x = divup(out.info.dims[0], THREADS_X);
+ int blk_y = divup(out.info.dims[1], THREADS_Y);
+
+ NDRange global(blk_x*signal.info.dims[2]*THREADS_X,
+ blk_y*signal.info.dims[3]*THREADS_Y);
+
+ cl::Buffer *mBuff = bufferAlloc(fLen*sizeof(accType));
+ // FIX ME: if the filter array is strided, direct might cause issues
+ getQueue().enqueueCopyBuffer(*filter.data, *mBuff, 0, 0, fLen*sizeof(accType));
+
+ convOp(EnqueueArgs(getQueue(), global, local),
+ *out.data, out.info, *signal.data, signal.info, *mBuff, blk_x, blk_y);
+
+ bufferFree(mBuff);
+ } catch (cl::Error err) {
+ CL_TO_AF_ERROR(err);
+ throw;
+ }
+}
+
+template<typename T, typename accT, dim_t cDim, bool expand>
+void conv2Helper(Param out, const Param sig, const Param filt, dim_t f)
+{
+ switch(f) {
+ case 2: kernel::convolve2<T, accT, cDim, expand, 2>(out, sig, filt); break;
+ case 3: kernel::convolve2<T, accT, cDim, expand, 3>(out, sig, filt); break;
+ case 4: kernel::convolve2<T, accT, cDim, expand, 4>(out, sig, filt); break;
+ case 5: kernel::convolve2<T, accT, cDim, expand, 5>(out, sig, filt); break;
+ case 6: kernel::convolve2<T, accT, cDim, expand, 6>(out, sig, filt); break;
+ case 7: kernel::convolve2<T, accT, cDim, expand, 7>(out, sig, filt); break;
+ case 8: kernel::convolve2<T, accT, cDim, expand, 8>(out, sig, filt); break;
+ case 9: kernel::convolve2<T, accT, cDim, expand, 9>(out, sig, filt); break;
+ case 10: kernel::convolve2<T, accT, cDim, expand, 10>(out, sig, filt); break;
+ case 11: kernel::convolve2<T, accT, cDim, expand, 11>(out, sig, filt); break;
+ case 12: kernel::convolve2<T, accT, cDim, expand, 12>(out, sig, filt); break;
+ case 13: kernel::convolve2<T, accT, cDim, expand, 13>(out, sig, filt); break;
+ case 14: kernel::convolve2<T, accT, cDim, expand, 14>(out, sig, filt); break;
+ case 15: kernel::convolve2<T, accT, cDim, expand, 15>(out, sig, filt); break;
+ case 16: kernel::convolve2<T, accT, cDim, expand, 16>(out, sig, filt); break;
+ case 17: kernel::convolve2<T, accT, cDim, expand, 17>(out, sig, filt); break;
+ case 18: kernel::convolve2<T, accT, cDim, expand, 18>(out, sig, filt); break;
+ case 19: kernel::convolve2<T, accT, cDim, expand, 19>(out, sig, filt); break;
+ case 20: kernel::convolve2<T, accT, cDim, expand, 20>(out, sig, filt); break;
+ case 21: kernel::convolve2<T, accT, cDim, expand, 21>(out, sig, filt); break;
+ case 22: kernel::convolve2<T, accT, cDim, expand, 22>(out, sig, filt); break;
+ case 23: kernel::convolve2<T, accT, cDim, expand, 23>(out, sig, filt); break;
+ case 24: kernel::convolve2<T, accT, cDim, expand, 24>(out, sig, filt); break;
+ case 25: kernel::convolve2<T, accT, cDim, expand, 25>(out, sig, filt); break;
+ case 26: kernel::convolve2<T, accT, cDim, expand, 26>(out, sig, filt); break;
+ case 27: kernel::convolve2<T, accT, cDim, expand, 27>(out, sig, filt); break;
+ case 28: kernel::convolve2<T, accT, cDim, expand, 28>(out, sig, filt); break;
+ case 29: kernel::convolve2<T, accT, cDim, expand, 29>(out, sig, filt); break;
+ case 30: kernel::convolve2<T, accT, cDim, expand, 30>(out, sig, filt); break;
+ case 31: kernel::convolve2<T, accT, cDim, expand, 31>(out, sig, filt); break;
+ default: OPENCL_NOT_SUPPORTED();
+ }
+}
+
+#define INSTANTIATE(T, accT) \
+ template void conv2Helper<T, accT, 0, true >(Param out, const Param sig, const Param filt, dim_t f); \
+ template void conv2Helper<T, accT, 1, true >(Param out, const Param sig, const Param filt, dim_t f); \
+ template void conv2Helper<T, accT, 0, false>(Param out, const Param sig, const Param filt, dim_t f); \
+ template void conv2Helper<T, accT, 1, false>(Param out, const Param sig, const Param filt, dim_t f);
+
+INSTANTIATE(cdouble, cdouble)
+INSTANTIATE(cfloat , cfloat)
+INSTANTIATE(double , double)
+INSTANTIATE(float , float)
+INSTANTIATE(uint , float)
+INSTANTIATE(int , float)
+INSTANTIATE(uchar , float)
+INSTANTIATE(char , float)
+
+}
+
+}
diff --git a/src/backend/opencl/kernel/convolve_separable.hpp b/src/backend/opencl/kernel/convolve_separable.hpp
index 1a9edc3..7fb9aa3 100644
--- a/src/backend/opencl/kernel/convolve_separable.hpp
+++ b/src/backend/opencl/kernel/convolve_separable.hpp
@@ -8,24 +8,7 @@
********************************************************/
#pragma once
-#include <kernel_headers/convolve_separable.hpp>
-#include <program.hpp>
-#include <traits.hpp>
-#include <string>
-#include <mutex>
-#include <map>
-#include <dispatch.hpp>
#include <Param.hpp>
-#include <debug_opencl.hpp>
-#include <memory.hpp>
-
-using cl::Buffer;
-using cl::Program;
-using cl::Kernel;
-using cl::make_kernel;
-using cl::EnqueueArgs;
-using cl::NDRange;
-using std::string;
namespace opencl
{
@@ -33,71 +16,16 @@ namespace opencl
namespace kernel
{
-static const int THREADS_X = 16;
-static const int THREADS_Y = 16;
-
// below shared MAX_*_LEN's are calculated based on
// a maximum shared memory configuration of 48KB per block
// considering complex types as well
static const int MAX_SCONV_FILTER_LEN = 31;
template<typename T, typename accType, int conv_dim, bool expand, int fLen>
-void convolve2(Param out, const Param signal, const Param filter)
-{
- try {
- static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
- static std::map<int, Program*> convProgs;
- static std::map<int, Kernel*> convKernels;
-
- int device = getActiveDeviceId();
-
- std::call_once( compileFlags[device], [device] () {
- const size_t C0_SIZE = (THREADS_X+2*(fLen-1))* THREADS_Y;
- const size_t C1_SIZE = (THREADS_Y+2*(fLen-1))* THREADS_X;
-
- size_t locSize = (conv_dim==0 ? C0_SIZE : C1_SIZE);
-
- std::ostringstream options;
- options << " -D T=" << dtype_traits<T>::getName()
- << " -D accType="<< dtype_traits<accType>::getName()
- << " -D CONV_DIM="<< conv_dim
- << " -D EXPAND="<< expand
- << " -D FLEN="<< fLen
- << " -D LOCAL_MEM_SIZE="<<locSize;
- if (std::is_same<T, double>::value ||
- std::is_same<T, cdouble>::value) {
- options << " -D USE_DOUBLE";
- }
- Program prog;
- buildProgram(prog, convolve_separable_cl, convolve_separable_cl_len, options.str());
- convProgs[device] = new Program(prog);
- convKernels[device] = new Kernel(*convProgs[device], "convolve");
- });
+void convolve2(Param out, const Param signal, const Param filter);
- auto convOp = make_kernel<Buffer, KParam, Buffer, KParam, Buffer,
- int, int>(*convKernels[device]);
-
- NDRange local(THREADS_X, THREADS_Y);
-
- int blk_x = divup(out.info.dims[0], THREADS_X);
- int blk_y = divup(out.info.dims[1], THREADS_Y);
-
- NDRange global(blk_x*signal.info.dims[2]*THREADS_X,
- blk_y*signal.info.dims[3]*THREADS_Y);
-
- cl::Buffer *mBuff = bufferAlloc(fLen*sizeof(accType));
- // FIX ME: if the filter array is strided, direct might cause issues
- getQueue().enqueueCopyBuffer(*filter.data, *mBuff, 0, 0, fLen*sizeof(accType));
-
- convOp(EnqueueArgs(getQueue(), global, local),
- *out.data, out.info, *signal.data, signal.info, *mBuff, blk_x, blk_y);
-
- bufferFree(mBuff);
- } catch (cl::Error err) {
- CL_TO_AF_ERROR(err);
- throw;
- }
-}
+template<typename T, typename accT, dim_t cDim, bool expand>
+void conv2Helper(Param out, const Param sig, const Param filt, dim_t f);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list