[arrayfire] 322/408: Templated options are now runtime compile options for opencl convolutions
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:22 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 8bf658998c998ae37161e9e114ac613325c4bd77
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Mon Aug 24 05:56:16 2015 -0400
Templated options are now runtime compile options for opencl convolutions
---
src/backend/opencl/convolve_separable.cpp | 4 +-
src/backend/opencl/kernel/convolve/conv2_impl.hpp | 117 ++++++++-------------
src/backend/opencl/kernel/convolve_separable.cpp | 122 +++++++++-------------
src/backend/opencl/kernel/convolve_separable.hpp | 7 +-
src/backend/opencl/kernel/harris.hpp | 47 ++-------
src/backend/opencl/kernel/orb.hpp | 4 +-
src/backend/opencl/kernel/sift.hpp | 9 +-
7 files changed, 113 insertions(+), 197 deletions(-)
diff --git a/src/backend/opencl/convolve_separable.cpp b/src/backend/opencl/convolve_separable.cpp
index 3bd304e..fede1d7 100644
--- a/src/backend/opencl/convolve_separable.cpp
+++ b/src/backend/opencl/convolve_separable.cpp
@@ -45,8 +45,8 @@ Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<ac
Array<T> temp= createEmptyArray<T>(tDims);
Array<T> out = createEmptyArray<T>(oDims);
- kernel::conv2Helper<T, accT, 0, expand>(temp, signal, c_filter, cflen);
- kernel::conv2Helper<T, accT, 1, expand>( out, temp, r_filter, rflen);
+ kernel::convSep<T, accT, 0, expand>(temp, signal, c_filter);
+ kernel::convSep<T, accT, 1, expand>( out, temp, r_filter);
return out;
}
diff --git a/src/backend/opencl/kernel/convolve/conv2_impl.hpp b/src/backend/opencl/kernel/convolve/conv2_impl.hpp
index 4c1ac5f..ee2555e 100644
--- a/src/backend/opencl/kernel/convolve/conv2_impl.hpp
+++ b/src/backend/opencl/kernel/convolve/conv2_impl.hpp
@@ -8,6 +8,7 @@
********************************************************/
#include <kernel/convolve/conv_common.hpp>
+#include <cache.hpp>
namespace opencl
{
@@ -15,42 +16,59 @@ namespace opencl
namespace kernel
{
-template<typename T, typename aT, bool expand, int f0, int f1>
+template<typename T, typename aT, bool expand>
void conv2Helper(const conv_kparam_t& param, Param out, const Param signal, const Param filter)
{
try {
- static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
- static std::map<int, Program*> convProgs;
- static std::map<int, Kernel*> convKernels;
+ int f0 = filter.info.dims[0];
+ int f1 = filter.info.dims[1];
+
+ std::string ref_name =
+ std::string("conv2_") +
+ std::string(dtype_traits<T>::getName()) +
+ std::string("_") +
+ std::string(dtype_traits<aT>::getName()) +
+ std::string("_") +
+ std::to_string(expand) +
+ std::string("_") +
+ std::to_string(f0) +
+ std::string("_") +
+ std::to_string(f1);
int device = getActiveDeviceId();
-
- std::call_once( compileFlags[device], [device] () {
- size_t LOC_SIZE = (THREADS_X+2*(f0-1))*(THREADS_Y+2*(f1-1));
-
- std::ostringstream options;
- options << " -D T=" << dtype_traits<T>::getName()
- << " -D accType="<< dtype_traits<aT>::getName()
- << " -D BASE_DIM="<< 2 /* hard constant specific to this convolution type */
- << " -D FLEN0=" << f0
- << " -D FLEN1=" << f1
- << " -D EXPAND="<< expand
- << " -D C_SIZE="<< LOC_SIZE;
- if (std::is_same<T, double>::value ||
- std::is_same<T, cdouble>::value) {
- options << " -D USE_DOUBLE";
- }
- Program prog;
- buildProgram(prog, convolve_cl, convolve_cl_len, options.str());
- convProgs[device] = new Program(prog);
- convKernels[device] = new Kernel(*convProgs[device], "convolve");
- });
+ kc_t::iterator idx = kernelCaches[device].find(ref_name);
+
+ kc_entry_t entry;
+ if (idx == kernelCaches[device].end()) {
+ size_t LOC_SIZE = (THREADS_X+2*(f0-1))*(THREADS_Y+2*(f1-1));
+
+ std::ostringstream options;
+ options << " -D T=" << dtype_traits<T>::getName()
+ << " -D accType="<< dtype_traits<aT>::getName()
+ << " -D BASE_DIM="<< 2 /* hard constant specific to this convolution type */
+ << " -D FLEN0=" << f0
+ << " -D FLEN1=" << f1
+ << " -D EXPAND="<< expand
+ << " -D C_SIZE="<< LOC_SIZE;
+ if (std::is_same<T, double>::value ||
+ std::is_same<T, cdouble>::value) {
+ options << " -D USE_DOUBLE";
+ }
+ Program prog;
+ buildProgram(prog, convolve_cl, convolve_cl_len, options.str());
+ entry.prog = new Program(prog);
+ entry.ker = new Kernel(*entry.prog, "convolve");
+
+ kernelCaches[device][ref_name] = entry;
+ } else {
+ entry = idx->second;
+ }
auto convOp = make_kernel<Buffer, KParam, Buffer, KParam,
Buffer, KParam, int, int,
int, int,
int, int
- >(*convKernels[device]);
+ >(*entry.ker);
convOp(EnqueueArgs(getQueue(), param.global, param.local),
*out.data, out.info, *signal.data, signal.info,
@@ -63,53 +81,6 @@ void conv2Helper(const conv_kparam_t& param, Param out, const Param signal, cons
}
}
-template<typename T, typename aT, bool expand, int f>
-void conv2Helper(const conv_kparam_t& p, Param out, const Param sig, const Param filt)
-{
- switch(filt.info.dims[1]) {
- case 1: conv2Helper<T, aT, expand, f, 1>(p, out, sig, filt); break;
- case 2: conv2Helper<T, aT, expand, f, 2>(p, out, sig, filt); break;
- case 3: conv2Helper<T, aT, expand, f, 3>(p, out, sig, filt); break;
- case 4: conv2Helper<T, aT, expand, f, 4>(p, out, sig, filt); break;
- case 5: conv2Helper<T, aT, expand, f, 5>(p, out, sig, filt); break;
- default: OPENCL_NOT_SUPPORTED();
- }
-}
-
-template<typename T, typename aT, bool expand>
-void conv2Helper(const conv_kparam_t& p, Param& out, const Param& sig, const Param& filt)
-{
- int f0 = filt.info.dims[0];
- int f1 = filt.info.dims[1];
- switch(f0) {
- case 1: conv2Helper<T, aT, expand, 1>(p, out, sig, filt); break;
- case 2: conv2Helper<T, aT, expand, 2>(p, out, sig, filt); break;
- case 3: conv2Helper<T, aT, expand, 3>(p, out, sig, filt); break;
- case 4: conv2Helper<T, aT, expand, 4>(p, out, sig, filt); break;
- case 5: conv2Helper<T, aT, expand, 5>(p, out, sig, filt); break;
- default: {
- if (f0==f1) {
- switch(f1) {
- case 6: conv2Helper<T, aT, expand, 6, 6>(p, out, sig, filt); break;
- case 7: conv2Helper<T, aT, expand, 7, 7>(p, out, sig, filt); break;
- case 8: conv2Helper<T, aT, expand, 8, 8>(p, out, sig, filt); break;
- case 9: conv2Helper<T, aT, expand, 9, 9>(p, out, sig, filt); break;
- case 10: conv2Helper<T, aT, expand, 10, 10>(p, out, sig, filt); break;
- case 11: conv2Helper<T, aT, expand, 11, 11>(p, out, sig, filt); break;
- case 12: conv2Helper<T, aT, expand, 12, 12>(p, out, sig, filt); break;
- case 13: conv2Helper<T, aT, expand, 13, 13>(p, out, sig, filt); break;
- case 14: conv2Helper<T, aT, expand, 14, 14>(p, out, sig, filt); break;
- case 15: conv2Helper<T, aT, expand, 15, 15>(p, out, sig, filt); break;
- case 16: conv2Helper<T, aT, expand, 16, 16>(p, out, sig, filt); break;
- case 17: conv2Helper<T, aT, expand, 17, 17>(p, out, sig, filt); break;
- default: OPENCL_NOT_SUPPORTED();
- }
- } else
- OPENCL_NOT_SUPPORTED();
- } break;
- }
-}
-
template<typename T, typename aT, bool expand>
void conv2(conv_kparam_t& p, Param& out, const Param& sig, const Param& filt)
{
diff --git a/src/backend/opencl/kernel/convolve_separable.cpp b/src/backend/opencl/kernel/convolve_separable.cpp
index f37ef18..e546cc4 100644
--- a/src/backend/opencl/kernel/convolve_separable.cpp
+++ b/src/backend/opencl/kernel/convolve_separable.cpp
@@ -17,6 +17,7 @@
#include <Param.hpp>
#include <debug_opencl.hpp>
#include <memory.hpp>
+#include <cache.hpp>
using cl::Buffer;
using cl::Program;
@@ -35,41 +36,58 @@ namespace kernel
static const int THREADS_X = 16;
static const int THREADS_Y = 16;
-template<typename T, typename accType, int conv_dim, bool expand, int fLen>
-void convolve2(Param out, const Param signal, const Param filter)
+template<typename T, typename accType, int conv_dim, bool expand>
+void convSep(Param out, const Param signal, const Param filter)
{
try {
- static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
- static std::map<int, Program*> convProgs;
- static std::map<int, Kernel*> convKernels;
- int device = getActiveDeviceId();
+ const int fLen = filter.info.dims[0] * filter.info.dims[1];
+
+ std::string ref_name =
+ std::string("convsep_") +
+ std::to_string(conv_dim) +
+ std::string("_") +
+ std::string(dtype_traits<T>::getName()) +
+ std::string("_") +
+ std::string(dtype_traits<accType>::getName()) +
+ std::string("_") +
+ std::to_string(expand) +
+ std::string("_") +
+ std::to_string(fLen);
- std::call_once( compileFlags[device], [device] () {
- const size_t C0_SIZE = (THREADS_X+2*(fLen-1))* THREADS_Y;
- const size_t C1_SIZE = (THREADS_Y+2*(fLen-1))* THREADS_X;
-
- size_t locSize = (conv_dim==0 ? C0_SIZE : C1_SIZE);
-
- std::ostringstream options;
- options << " -D T=" << dtype_traits<T>::getName()
- << " -D accType="<< dtype_traits<accType>::getName()
- << " -D CONV_DIM="<< conv_dim
- << " -D EXPAND="<< expand
- << " -D FLEN="<< fLen
- << " -D LOCAL_MEM_SIZE="<<locSize;
- if (std::is_same<T, double>::value ||
- std::is_same<T, cdouble>::value) {
- options << " -D USE_DOUBLE";
- }
- Program prog;
- buildProgram(prog, convolve_separable_cl, convolve_separable_cl_len, options.str());
- convProgs[device] = new Program(prog);
- convKernels[device] = new Kernel(*convProgs[device], "convolve");
- });
+ int device = getActiveDeviceId();
+ kc_t::iterator idx = kernelCaches[device].find(ref_name);
+
+ kc_entry_t entry;
+ if (idx == kernelCaches[device].end()) {
+ const size_t C0_SIZE = (THREADS_X+2*(fLen-1))* THREADS_Y;
+ const size_t C1_SIZE = (THREADS_Y+2*(fLen-1))* THREADS_X;
+
+ size_t locSize = (conv_dim==0 ? C0_SIZE : C1_SIZE);
+
+ std::ostringstream options;
+ options << " -D T=" << dtype_traits<T>::getName()
+ << " -D accType="<< dtype_traits<accType>::getName()
+ << " -D CONV_DIM="<< conv_dim
+ << " -D EXPAND="<< expand
+ << " -D FLEN="<< fLen
+ << " -D LOCAL_MEM_SIZE="<<locSize;
+ if (std::is_same<T, double>::value ||
+ std::is_same<T, cdouble>::value) {
+ options << " -D USE_DOUBLE";
+ }
+ Program prog;
+ buildProgram(prog, convolve_separable_cl, convolve_separable_cl_len, options.str());
+
+ entry.prog = new Program(prog);
+ entry.ker = new Kernel(*entry.prog, "convolve");
+ kernelCaches[device][ref_name] = entry;
+ } else {
+ entry = idx->second;
+ }
auto convOp = make_kernel<Buffer, KParam, Buffer, KParam, Buffer,
- int, int>(*convKernels[device]);
+ int, int>(*entry.ker);
NDRange local(THREADS_X, THREADS_Y);
@@ -93,49 +111,11 @@ void convolve2(Param out, const Param signal, const Param filter)
}
}
-template<typename T, typename accT, dim_t cDim, bool expand>
-void conv2Helper(Param out, const Param sig, const Param filt, dim_t f)
-{
- switch(f) {
- case 2: kernel::convolve2<T, accT, cDim, expand, 2>(out, sig, filt); break;
- case 3: kernel::convolve2<T, accT, cDim, expand, 3>(out, sig, filt); break;
- case 4: kernel::convolve2<T, accT, cDim, expand, 4>(out, sig, filt); break;
- case 5: kernel::convolve2<T, accT, cDim, expand, 5>(out, sig, filt); break;
- case 6: kernel::convolve2<T, accT, cDim, expand, 6>(out, sig, filt); break;
- case 7: kernel::convolve2<T, accT, cDim, expand, 7>(out, sig, filt); break;
- case 8: kernel::convolve2<T, accT, cDim, expand, 8>(out, sig, filt); break;
- case 9: kernel::convolve2<T, accT, cDim, expand, 9>(out, sig, filt); break;
- case 10: kernel::convolve2<T, accT, cDim, expand, 10>(out, sig, filt); break;
- case 11: kernel::convolve2<T, accT, cDim, expand, 11>(out, sig, filt); break;
- case 12: kernel::convolve2<T, accT, cDim, expand, 12>(out, sig, filt); break;
- case 13: kernel::convolve2<T, accT, cDim, expand, 13>(out, sig, filt); break;
- case 14: kernel::convolve2<T, accT, cDim, expand, 14>(out, sig, filt); break;
- case 15: kernel::convolve2<T, accT, cDim, expand, 15>(out, sig, filt); break;
- case 16: kernel::convolve2<T, accT, cDim, expand, 16>(out, sig, filt); break;
- case 17: kernel::convolve2<T, accT, cDim, expand, 17>(out, sig, filt); break;
- case 18: kernel::convolve2<T, accT, cDim, expand, 18>(out, sig, filt); break;
- case 19: kernel::convolve2<T, accT, cDim, expand, 19>(out, sig, filt); break;
- case 20: kernel::convolve2<T, accT, cDim, expand, 20>(out, sig, filt); break;
- case 21: kernel::convolve2<T, accT, cDim, expand, 21>(out, sig, filt); break;
- case 22: kernel::convolve2<T, accT, cDim, expand, 22>(out, sig, filt); break;
- case 23: kernel::convolve2<T, accT, cDim, expand, 23>(out, sig, filt); break;
- case 24: kernel::convolve2<T, accT, cDim, expand, 24>(out, sig, filt); break;
- case 25: kernel::convolve2<T, accT, cDim, expand, 25>(out, sig, filt); break;
- case 26: kernel::convolve2<T, accT, cDim, expand, 26>(out, sig, filt); break;
- case 27: kernel::convolve2<T, accT, cDim, expand, 27>(out, sig, filt); break;
- case 28: kernel::convolve2<T, accT, cDim, expand, 28>(out, sig, filt); break;
- case 29: kernel::convolve2<T, accT, cDim, expand, 29>(out, sig, filt); break;
- case 30: kernel::convolve2<T, accT, cDim, expand, 30>(out, sig, filt); break;
- case 31: kernel::convolve2<T, accT, cDim, expand, 31>(out, sig, filt); break;
- default: OPENCL_NOT_SUPPORTED();
- }
-}
-
#define INSTANTIATE(T, accT) \
- template void conv2Helper<T, accT, 0, true >(Param out, const Param sig, const Param filt, dim_t f); \
- template void conv2Helper<T, accT, 1, true >(Param out, const Param sig, const Param filt, dim_t f); \
- template void conv2Helper<T, accT, 0, false>(Param out, const Param sig, const Param filt, dim_t f); \
- template void conv2Helper<T, accT, 1, false>(Param out, const Param sig, const Param filt, dim_t f);
+ template void convSep<T, accT, 0, true >(Param out, const Param sig, const Param filt); \
+ template void convSep<T, accT, 1, true >(Param out, const Param sig, const Param filt); \
+ template void convSep<T, accT, 0, false>(Param out, const Param sig, const Param filt); \
+ template void convSep<T, accT, 1, false>(Param out, const Param sig, const Param filt);
INSTANTIATE(cdouble, cdouble)
INSTANTIATE(cfloat , cfloat)
diff --git a/src/backend/opencl/kernel/convolve_separable.hpp b/src/backend/opencl/kernel/convolve_separable.hpp
index 7fb9aa3..265dc6e 100644
--- a/src/backend/opencl/kernel/convolve_separable.hpp
+++ b/src/backend/opencl/kernel/convolve_separable.hpp
@@ -21,11 +21,8 @@ namespace kernel
// considering complex types as well
static const int MAX_SCONV_FILTER_LEN = 31;
-template<typename T, typename accType, int conv_dim, bool expand, int fLen>
-void convolve2(Param out, const Param signal, const Param filter);
-
-template<typename T, typename accT, dim_t cDim, bool expand>
-void conv2Helper(Param out, const Param sig, const Param filt, dim_t f);
+template<typename T, typename accT, int cDim, bool expand>
+void convSep(Param out, const Param sig, const Param filt);
}
diff --git a/src/backend/opencl/kernel/harris.hpp b/src/backend/opencl/kernel/harris.hpp
index bb9325f..7fffdee 100644
--- a/src/backend/opencl/kernel/harris.hpp
+++ b/src/backend/opencl/kernel/harris.hpp
@@ -56,7 +56,7 @@ void gaussian1D(T* out, const int dim, double sigma=0.0)
out[k] /= sum;
}
-template<typename T, typename convAccT, unsigned fLen>
+template<typename T, typename convAccT>
void conv_helper(Param &ixx, Param &ixy, Param &iyy, Param &filter)
{
Param ixx_tmp, ixy_tmp, iyy_tmp;
@@ -73,12 +73,12 @@ void conv_helper(Param &ixx, Param &ixy, Param &iyy, Param &filter)
ixy_tmp.data = bufferAlloc(ixy_tmp.info.dims[3] * ixy_tmp.info.strides[3] * sizeof(convAccT));
iyy_tmp.data = bufferAlloc(iyy_tmp.info.dims[3] * iyy_tmp.info.strides[3] * sizeof(convAccT));
- convolve2<T, convAccT, 0, false, fLen>(ixx_tmp, ixx, filter);
- convolve2<T, convAccT, 1, false, fLen>(ixx, ixx_tmp, filter);
- convolve2<T, convAccT, 0, false, fLen>(ixy_tmp, ixy, filter);
- convolve2<T, convAccT, 1, false, fLen>(ixy, ixy_tmp, filter);
- convolve2<T, convAccT, 0, false, fLen>(iyy_tmp, iyy, filter);
- convolve2<T, convAccT, 1, false, fLen>(iyy, iyy_tmp, filter);
+ convSep<T, convAccT, 0, false>(ixx_tmp, ixx, filter);
+ convSep<T, convAccT, 1, false>(ixx, ixx_tmp, filter);
+ convSep<T, convAccT, 0, false>(ixy_tmp, ixy, filter);
+ convSep<T, convAccT, 1, false>(ixy, ixy_tmp, filter);
+ convSep<T, convAccT, 0, false>(iyy_tmp, iyy, filter);
+ convSep<T, convAccT, 1, false>(iyy, iyy_tmp, filter);
bufferFree(ixx_tmp.data);
bufferFree(ixy_tmp.data);
@@ -195,38 +195,7 @@ void harris(unsigned* corners_out,
bufferFree(iy.data);
// Convolve second order derivatives with proper window filter
- switch (filter_len) {
- case 3: conv_helper<T, convAccT, 3 >(ixx, ixy, iyy, filter); break;
- case 4: conv_helper<T, convAccT, 4 >(ixx, ixy, iyy, filter); break;
- case 5: conv_helper<T, convAccT, 5 >(ixx, ixy, iyy, filter); break;
- case 6: conv_helper<T, convAccT, 6 >(ixx, ixy, iyy, filter); break;
- case 7: conv_helper<T, convAccT, 7 >(ixx, ixy, iyy, filter); break;
- case 8: conv_helper<T, convAccT, 8 >(ixx, ixy, iyy, filter); break;
- case 9: conv_helper<T, convAccT, 9 >(ixx, ixy, iyy, filter); break;
- case 10: conv_helper<T, convAccT, 10>(ixx, ixy, iyy, filter); break;
- case 11: conv_helper<T, convAccT, 11>(ixx, ixy, iyy, filter); break;
- case 12: conv_helper<T, convAccT, 12>(ixx, ixy, iyy, filter); break;
- case 13: conv_helper<T, convAccT, 13>(ixx, ixy, iyy, filter); break;
- case 14: conv_helper<T, convAccT, 14>(ixx, ixy, iyy, filter); break;
- case 15: conv_helper<T, convAccT, 15>(ixx, ixy, iyy, filter); break;
- case 16: conv_helper<T, convAccT, 16>(ixx, ixy, iyy, filter); break;
- case 17: conv_helper<T, convAccT, 17>(ixx, ixy, iyy, filter); break;
- case 18: conv_helper<T, convAccT, 18>(ixx, ixy, iyy, filter); break;
- case 19: conv_helper<T, convAccT, 19>(ixx, ixy, iyy, filter); break;
- case 20: conv_helper<T, convAccT, 20>(ixx, ixy, iyy, filter); break;
- case 21: conv_helper<T, convAccT, 21>(ixx, ixy, iyy, filter); break;
- case 22: conv_helper<T, convAccT, 22>(ixx, ixy, iyy, filter); break;
- case 23: conv_helper<T, convAccT, 23>(ixx, ixy, iyy, filter); break;
- case 24: conv_helper<T, convAccT, 24>(ixx, ixy, iyy, filter); break;
- case 25: conv_helper<T, convAccT, 25>(ixx, ixy, iyy, filter); break;
- case 26: conv_helper<T, convAccT, 26>(ixx, ixy, iyy, filter); break;
- case 27: conv_helper<T, convAccT, 27>(ixx, ixy, iyy, filter); break;
- case 28: conv_helper<T, convAccT, 28>(ixx, ixy, iyy, filter); break;
- case 29: conv_helper<T, convAccT, 29>(ixx, ixy, iyy, filter); break;
- case 30: conv_helper<T, convAccT, 30>(ixx, ixy, iyy, filter); break;
- case 31: conv_helper<T, convAccT, 31>(ixx, ixy, iyy, filter); break;
- }
-
+ conv_helper<T, convAccT>(ixx, ixy, iyy, filter);
bufferFree(filter.data);
cl::Buffer *d_responses = bufferAlloc(in.info.dims[3] * in.info.strides[3] * sizeof(T));
diff --git a/src/backend/opencl/kernel/orb.hpp b/src/backend/opencl/kernel/orb.hpp
index 36a16c3..0c662ba 100644
--- a/src/backend/opencl/kernel/orb.hpp
+++ b/src/backend/opencl/kernel/orb.hpp
@@ -360,8 +360,8 @@ void orb(unsigned* out_feat,
}
// Filter level image with Gaussian kernel to reduce noise sensitivity
- convolve2<T, convAccT, 0, false, gauss_len>(lvl_tmp, lvl_img, gauss_filter);
- convolve2<T, convAccT, 1, false, gauss_len>(lvl_filt, lvl_tmp, gauss_filter);
+ convSep<T, convAccT, 0, false>(lvl_tmp, lvl_img, gauss_filter);
+ convSep<T, convAccT, 1, false>(lvl_filt, lvl_tmp, gauss_filter);
bufferFree(lvl_tmp.data);
}
diff --git a/src/backend/opencl/kernel/sift.hpp b/src/backend/opencl/kernel/sift.hpp
index 5fcead5..eaca91d 100644
--- a/src/backend/opencl/kernel/sift.hpp
+++ b/src/backend/opencl/kernel/sift.hpp
@@ -172,7 +172,7 @@ Param gaussFilter(float sigma)
}
template<typename T, typename convAccT>
-void conv2HelperFull(Param& dst, Param src, Param filter)
+void convSepFull(Param& dst, Param src, Param filter)
{
Param tmp;
tmp.info.offset = 0;
@@ -185,8 +185,7 @@ void conv2HelperFull(Param& dst, Param src, Param filter)
tmp.data = bufferAlloc(src_el * sizeof(T));
const dim_t fLen = filter.info.dims[0];
- conv2Helper<T, convAccT, 0, false>(tmp, src, filter, fLen);
- conv2Helper<T, convAccT, 1, false>(dst, tmp, filter, fLen);
+ convSep<T, convAccT, 0, false>(tmp, src, filter);
bufferFree(tmp.data);
}
@@ -220,7 +219,7 @@ Param createInitialImage(
if (double_input)
resize<T, AF_INTERP_BILINEAR>(init_img, img);
- conv2HelperFull<T, convAccT>(init_img, (double_input) ? init_img : img, filter);
+ convSepFull<T, convAccT>(init_img, (double_input) ? init_img : img, filter);
bufferFree(filter.data);
@@ -301,7 +300,7 @@ std::vector<Param> buildGaussPyr(
Param filter = gaussFilter<convAccT>(sig_layers[l]);
- conv2HelperFull<T, convAccT>(tmp_pyr[idx], tmp_pyr[src_idx], filter);
+ convSepFull<T, convAccT>(tmp_pyr[idx], tmp_pyr[src_idx], filter);
bufferFree(filter.data);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list