[arrayfire] 322/408: Templated options are now runtime compile options for opencl convolutions

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:22 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 8bf658998c998ae37161e9e114ac613325c4bd77
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Mon Aug 24 05:56:16 2015 -0400

    Templated options are now runtime compile options for opencl convolutions
---
 src/backend/opencl/convolve_separable.cpp         |   4 +-
 src/backend/opencl/kernel/convolve/conv2_impl.hpp | 117 ++++++++-------------
 src/backend/opencl/kernel/convolve_separable.cpp  | 122 +++++++++-------------
 src/backend/opencl/kernel/convolve_separable.hpp  |   7 +-
 src/backend/opencl/kernel/harris.hpp              |  47 ++-------
 src/backend/opencl/kernel/orb.hpp                 |   4 +-
 src/backend/opencl/kernel/sift.hpp                |   9 +-
 7 files changed, 113 insertions(+), 197 deletions(-)

diff --git a/src/backend/opencl/convolve_separable.cpp b/src/backend/opencl/convolve_separable.cpp
index 3bd304e..fede1d7 100644
--- a/src/backend/opencl/convolve_separable.cpp
+++ b/src/backend/opencl/convolve_separable.cpp
@@ -45,8 +45,8 @@ Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<ac
     Array<T> temp= createEmptyArray<T>(tDims);
     Array<T> out = createEmptyArray<T>(oDims);
 
-    kernel::conv2Helper<T, accT, 0, expand>(temp, signal, c_filter, cflen);
-    kernel::conv2Helper<T, accT, 1, expand>( out,   temp, r_filter, rflen);
+    kernel::convSep<T, accT, 0, expand>(temp, signal, c_filter);
+    kernel::convSep<T, accT, 1, expand>( out,   temp, r_filter);
 
     return out;
 }
diff --git a/src/backend/opencl/kernel/convolve/conv2_impl.hpp b/src/backend/opencl/kernel/convolve/conv2_impl.hpp
index 4c1ac5f..ee2555e 100644
--- a/src/backend/opencl/kernel/convolve/conv2_impl.hpp
+++ b/src/backend/opencl/kernel/convolve/conv2_impl.hpp
@@ -8,6 +8,7 @@
  ********************************************************/
 
 #include <kernel/convolve/conv_common.hpp>
+#include <cache.hpp>
 
 namespace opencl
 {
@@ -15,42 +16,59 @@ namespace opencl
 namespace kernel
 {
 
-template<typename T, typename aT, bool expand, int f0, int f1>
+template<typename T, typename aT, bool expand>
 void conv2Helper(const conv_kparam_t& param, Param out, const Param signal, const Param filter)
 {
     try {
-        static std::once_flag  compileFlags[DeviceManager::MAX_DEVICES];
-        static std::map<int, Program*> convProgs;
-        static std::map<int, Kernel*>  convKernels;
+        int f0 = filter.info.dims[0];
+        int f1 = filter.info.dims[1];
+
+        std::string ref_name =
+            std::string("conv2_") +
+            std::string(dtype_traits<T>::getName()) +
+            std::string("_") +
+            std::string(dtype_traits<aT>::getName()) +
+            std::string("_") +
+            std::to_string(expand) +
+            std::string("_") +
+            std::to_string(f0) +
+            std::string("_") +
+            std::to_string(f1);
 
         int device = getActiveDeviceId();
-
-        std::call_once( compileFlags[device], [device] () {
-                    size_t LOC_SIZE = (THREADS_X+2*(f0-1))*(THREADS_Y+2*(f1-1));
-
-                    std::ostringstream options;
-                    options << " -D T=" << dtype_traits<T>::getName()
-                            << " -D accType="<< dtype_traits<aT>::getName()
-                            << " -D BASE_DIM="<< 2 /* hard constant specific to this convolution type */
-                            << " -D FLEN0=" << f0
-                            << " -D FLEN1=" << f1
-                            << " -D EXPAND="<< expand
-                            << " -D C_SIZE="<< LOC_SIZE;
-                    if (std::is_same<T, double>::value ||
-                        std::is_same<T, cdouble>::value) {
-                        options << " -D USE_DOUBLE";
-                    }
-                    Program prog;
-                    buildProgram(prog, convolve_cl, convolve_cl_len, options.str());
-                    convProgs[device]   = new Program(prog);
-                    convKernels[device] = new Kernel(*convProgs[device], "convolve");
-                });
+        kc_t::iterator idx = kernelCaches[device].find(ref_name);
+
+        kc_entry_t entry;
+        if (idx == kernelCaches[device].end()) {
+            size_t LOC_SIZE = (THREADS_X+2*(f0-1))*(THREADS_Y+2*(f1-1));
+
+            std::ostringstream options;
+            options << " -D T=" << dtype_traits<T>::getName()
+                    << " -D accType="<< dtype_traits<aT>::getName()
+                    << " -D BASE_DIM="<< 2 /* hard constant specific to this convolution type */
+                    << " -D FLEN0=" << f0
+                    << " -D FLEN1=" << f1
+                    << " -D EXPAND="<< expand
+                    << " -D C_SIZE="<< LOC_SIZE;
+            if (std::is_same<T, double>::value ||
+                std::is_same<T, cdouble>::value) {
+                options << " -D USE_DOUBLE";
+            }
+            Program prog;
+            buildProgram(prog, convolve_cl, convolve_cl_len, options.str());
+            entry.prog   = new Program(prog);
+            entry.ker = new Kernel(*entry.prog, "convolve");
+
+            kernelCaches[device][ref_name] = entry;
+        } else {
+            entry = idx->second;
+        }
 
         auto convOp = make_kernel<Buffer, KParam, Buffer, KParam,
                                   Buffer, KParam, int, int,
                                   int, int,
                                   int, int
-                                 >(*convKernels[device]);
+                                 >(*entry.ker);
 
         convOp(EnqueueArgs(getQueue(), param.global, param.local),
                 *out.data, out.info, *signal.data, signal.info,
@@ -63,53 +81,6 @@ void conv2Helper(const conv_kparam_t& param, Param out, const Param signal, cons
     }
 }
 
-template<typename T, typename aT, bool expand, int f>
-void conv2Helper(const conv_kparam_t& p, Param out, const Param sig, const Param filt)
-{
-    switch(filt.info.dims[1]) {
-        case  1: conv2Helper<T, aT, expand, f,  1>(p, out, sig, filt); break;
-        case  2: conv2Helper<T, aT, expand, f,  2>(p, out, sig, filt); break;
-        case  3: conv2Helper<T, aT, expand, f,  3>(p, out, sig, filt); break;
-        case  4: conv2Helper<T, aT, expand, f,  4>(p, out, sig, filt); break;
-        case  5: conv2Helper<T, aT, expand, f,  5>(p, out, sig, filt); break;
-        default: OPENCL_NOT_SUPPORTED();
-    }
-}
-
-template<typename T, typename aT, bool expand>
-void conv2Helper(const conv_kparam_t& p, Param& out, const Param& sig, const Param& filt)
-{
-    int f0 = filt.info.dims[0];
-    int f1 = filt.info.dims[1];
-    switch(f0) {
-        case  1: conv2Helper<T, aT, expand,  1>(p, out, sig, filt); break;
-        case  2: conv2Helper<T, aT, expand,  2>(p, out, sig, filt); break;
-        case  3: conv2Helper<T, aT, expand,  3>(p, out, sig, filt); break;
-        case  4: conv2Helper<T, aT, expand,  4>(p, out, sig, filt); break;
-        case  5: conv2Helper<T, aT, expand,  5>(p, out, sig, filt); break;
-        default: {
-                     if (f0==f1) {
-                         switch(f1) {
-                             case  6: conv2Helper<T, aT, expand,  6,  6>(p, out, sig, filt); break;
-                             case  7: conv2Helper<T, aT, expand,  7,  7>(p, out, sig, filt); break;
-                             case  8: conv2Helper<T, aT, expand,  8,  8>(p, out, sig, filt); break;
-                             case  9: conv2Helper<T, aT, expand,  9,  9>(p, out, sig, filt); break;
-                             case 10: conv2Helper<T, aT, expand, 10, 10>(p, out, sig, filt); break;
-                             case 11: conv2Helper<T, aT, expand, 11, 11>(p, out, sig, filt); break;
-                             case 12: conv2Helper<T, aT, expand, 12, 12>(p, out, sig, filt); break;
-                             case 13: conv2Helper<T, aT, expand, 13, 13>(p, out, sig, filt); break;
-                             case 14: conv2Helper<T, aT, expand, 14, 14>(p, out, sig, filt); break;
-                             case 15: conv2Helper<T, aT, expand, 15, 15>(p, out, sig, filt); break;
-                             case 16: conv2Helper<T, aT, expand, 16, 16>(p, out, sig, filt); break;
-                             case 17: conv2Helper<T, aT, expand, 17, 17>(p, out, sig, filt); break;
-                             default: OPENCL_NOT_SUPPORTED();
-                         }
-                     } else
-                         OPENCL_NOT_SUPPORTED();
-                 } break;
-    }
-}
-
 template<typename T, typename aT, bool expand>
 void conv2(conv_kparam_t& p, Param& out, const Param& sig, const Param& filt)
 {
diff --git a/src/backend/opencl/kernel/convolve_separable.cpp b/src/backend/opencl/kernel/convolve_separable.cpp
index f37ef18..e546cc4 100644
--- a/src/backend/opencl/kernel/convolve_separable.cpp
+++ b/src/backend/opencl/kernel/convolve_separable.cpp
@@ -17,6 +17,7 @@
 #include <Param.hpp>
 #include <debug_opencl.hpp>
 #include <memory.hpp>
+#include <cache.hpp>
 
 using cl::Buffer;
 using cl::Program;
@@ -35,41 +36,58 @@ namespace kernel
 static const int THREADS_X = 16;
 static const int THREADS_Y = 16;
 
-template<typename T, typename accType, int conv_dim, bool expand, int fLen>
-void convolve2(Param out, const Param signal, const Param filter)
+template<typename T, typename accType, int conv_dim, bool expand>
+void convSep(Param out, const Param signal, const Param filter)
 {
     try {
-        static std::once_flag  compileFlags[DeviceManager::MAX_DEVICES];
-        static std::map<int, Program*>   convProgs;
-        static std::map<int, Kernel*>  convKernels;
 
-        int device = getActiveDeviceId();
+        const int fLen = filter.info.dims[0] * filter.info.dims[1];
+
+        std::string ref_name =
+            std::string("convsep_") +
+            std::to_string(conv_dim) +
+            std::string("_") +
+            std::string(dtype_traits<T>::getName()) +
+            std::string("_") +
+            std::string(dtype_traits<accType>::getName()) +
+            std::string("_") +
+            std::to_string(expand) +
+            std::string("_") +
+            std::to_string(fLen);
 
-        std::call_once( compileFlags[device], [device] () {
-                const size_t C0_SIZE  = (THREADS_X+2*(fLen-1))* THREADS_Y;
-                const size_t C1_SIZE  = (THREADS_Y+2*(fLen-1))* THREADS_X;
-
-                size_t locSize = (conv_dim==0 ? C0_SIZE : C1_SIZE);
-
-                    std::ostringstream options;
-                    options << " -D T=" << dtype_traits<T>::getName()
-                            << " -D accType="<< dtype_traits<accType>::getName()
-                            << " -D CONV_DIM="<< conv_dim
-                            << " -D EXPAND="<< expand
-                            << " -D FLEN="<< fLen
-                            << " -D LOCAL_MEM_SIZE="<<locSize;
-                    if (std::is_same<T, double>::value ||
-                        std::is_same<T, cdouble>::value) {
-                        options << " -D USE_DOUBLE";
-                    }
-                    Program prog;
-                    buildProgram(prog, convolve_separable_cl, convolve_separable_cl_len, options.str());
-                    convProgs[device]   = new Program(prog);
-                    convKernels[device] = new Kernel(*convProgs[device], "convolve");
-                });
+        int device = getActiveDeviceId();
+        kc_t::iterator idx = kernelCaches[device].find(ref_name);
+
+        kc_entry_t entry;
+        if (idx == kernelCaches[device].end()) {
+            const size_t C0_SIZE  = (THREADS_X+2*(fLen-1))* THREADS_Y;
+            const size_t C1_SIZE  = (THREADS_Y+2*(fLen-1))* THREADS_X;
+
+            size_t locSize = (conv_dim==0 ? C0_SIZE : C1_SIZE);
+
+            std::ostringstream options;
+            options << " -D T=" << dtype_traits<T>::getName()
+                    << " -D accType="<< dtype_traits<accType>::getName()
+                    << " -D CONV_DIM="<< conv_dim
+                    << " -D EXPAND="<< expand
+                    << " -D FLEN="<< fLen
+                    << " -D LOCAL_MEM_SIZE="<<locSize;
+            if (std::is_same<T, double>::value ||
+                std::is_same<T, cdouble>::value) {
+                options << " -D USE_DOUBLE";
+            }
+            Program prog;
+            buildProgram(prog, convolve_separable_cl, convolve_separable_cl_len, options.str());
+
+            entry.prog   = new Program(prog);
+            entry.ker  = new Kernel(*entry.prog, "convolve");
+            kernelCaches[device][ref_name] = entry;
+        } else {
+            entry = idx->second;
+        }
 
         auto convOp = make_kernel<Buffer, KParam, Buffer, KParam, Buffer,
-                                  int, int>(*convKernels[device]);
+                                  int, int>(*entry.ker);
 
         NDRange local(THREADS_X, THREADS_Y);
 
@@ -93,49 +111,11 @@ void convolve2(Param out, const Param signal, const Param filter)
     }
 }
 
-template<typename T, typename accT, dim_t cDim, bool expand>
-void conv2Helper(Param out, const Param sig, const Param filt, dim_t f)
-{
-    switch(f) {
-        case  2: kernel::convolve2<T, accT, cDim, expand,  2>(out, sig, filt); break;
-        case  3: kernel::convolve2<T, accT, cDim, expand,  3>(out, sig, filt); break;
-        case  4: kernel::convolve2<T, accT, cDim, expand,  4>(out, sig, filt); break;
-        case  5: kernel::convolve2<T, accT, cDim, expand,  5>(out, sig, filt); break;
-        case  6: kernel::convolve2<T, accT, cDim, expand,  6>(out, sig, filt); break;
-        case  7: kernel::convolve2<T, accT, cDim, expand,  7>(out, sig, filt); break;
-        case  8: kernel::convolve2<T, accT, cDim, expand,  8>(out, sig, filt); break;
-        case  9: kernel::convolve2<T, accT, cDim, expand,  9>(out, sig, filt); break;
-        case 10: kernel::convolve2<T, accT, cDim, expand, 10>(out, sig, filt); break;
-        case 11: kernel::convolve2<T, accT, cDim, expand, 11>(out, sig, filt); break;
-        case 12: kernel::convolve2<T, accT, cDim, expand, 12>(out, sig, filt); break;
-        case 13: kernel::convolve2<T, accT, cDim, expand, 13>(out, sig, filt); break;
-        case 14: kernel::convolve2<T, accT, cDim, expand, 14>(out, sig, filt); break;
-        case 15: kernel::convolve2<T, accT, cDim, expand, 15>(out, sig, filt); break;
-        case 16: kernel::convolve2<T, accT, cDim, expand, 16>(out, sig, filt); break;
-        case 17: kernel::convolve2<T, accT, cDim, expand, 17>(out, sig, filt); break;
-        case 18: kernel::convolve2<T, accT, cDim, expand, 18>(out, sig, filt); break;
-        case 19: kernel::convolve2<T, accT, cDim, expand, 19>(out, sig, filt); break;
-        case 20: kernel::convolve2<T, accT, cDim, expand, 20>(out, sig, filt); break;
-        case 21: kernel::convolve2<T, accT, cDim, expand, 21>(out, sig, filt); break;
-        case 22: kernel::convolve2<T, accT, cDim, expand, 22>(out, sig, filt); break;
-        case 23: kernel::convolve2<T, accT, cDim, expand, 23>(out, sig, filt); break;
-        case 24: kernel::convolve2<T, accT, cDim, expand, 24>(out, sig, filt); break;
-        case 25: kernel::convolve2<T, accT, cDim, expand, 25>(out, sig, filt); break;
-        case 26: kernel::convolve2<T, accT, cDim, expand, 26>(out, sig, filt); break;
-        case 27: kernel::convolve2<T, accT, cDim, expand, 27>(out, sig, filt); break;
-        case 28: kernel::convolve2<T, accT, cDim, expand, 28>(out, sig, filt); break;
-        case 29: kernel::convolve2<T, accT, cDim, expand, 29>(out, sig, filt); break;
-        case 30: kernel::convolve2<T, accT, cDim, expand, 30>(out, sig, filt); break;
-        case 31: kernel::convolve2<T, accT, cDim, expand, 31>(out, sig, filt); break;
-        default: OPENCL_NOT_SUPPORTED();
-    }
-}
-
 #define INSTANTIATE(T, accT)  \
-    template void conv2Helper<T, accT, 0, true >(Param out, const Param sig, const Param filt, dim_t f); \
-    template void conv2Helper<T, accT, 1, true >(Param out, const Param sig, const Param filt, dim_t f); \
-    template void conv2Helper<T, accT, 0, false>(Param out, const Param sig, const Param filt, dim_t f); \
-    template void conv2Helper<T, accT, 1, false>(Param out, const Param sig, const Param filt, dim_t f);
+    template void convSep<T, accT, 0, true >(Param out, const Param sig, const Param filt); \
+    template void convSep<T, accT, 1, true >(Param out, const Param sig, const Param filt); \
+    template void convSep<T, accT, 0, false>(Param out, const Param sig, const Param filt); \
+    template void convSep<T, accT, 1, false>(Param out, const Param sig, const Param filt);
 
 INSTANTIATE(cdouble, cdouble)
 INSTANTIATE(cfloat ,  cfloat)
diff --git a/src/backend/opencl/kernel/convolve_separable.hpp b/src/backend/opencl/kernel/convolve_separable.hpp
index 7fb9aa3..265dc6e 100644
--- a/src/backend/opencl/kernel/convolve_separable.hpp
+++ b/src/backend/opencl/kernel/convolve_separable.hpp
@@ -21,11 +21,8 @@ namespace kernel
 // considering complex types as well
 static const int MAX_SCONV_FILTER_LEN = 31;
 
-template<typename T, typename accType, int conv_dim, bool expand, int fLen>
-void convolve2(Param out, const Param signal, const Param filter);
-
-template<typename T, typename accT, dim_t cDim, bool expand>
-void conv2Helper(Param out, const Param sig, const Param filt, dim_t f);
+template<typename T, typename accT, int cDim, bool expand>
+void convSep(Param out, const Param sig, const Param filt);
 
 }
 
diff --git a/src/backend/opencl/kernel/harris.hpp b/src/backend/opencl/kernel/harris.hpp
index bb9325f..7fffdee 100644
--- a/src/backend/opencl/kernel/harris.hpp
+++ b/src/backend/opencl/kernel/harris.hpp
@@ -56,7 +56,7 @@ void gaussian1D(T* out, const int dim, double sigma=0.0)
         out[k] /= sum;
 }
 
-template<typename T, typename convAccT, unsigned fLen>
+template<typename T, typename convAccT>
 void conv_helper(Param &ixx, Param &ixy, Param &iyy, Param &filter)
 {
     Param ixx_tmp, ixy_tmp, iyy_tmp;
@@ -73,12 +73,12 @@ void conv_helper(Param &ixx, Param &ixy, Param &iyy, Param &filter)
     ixy_tmp.data = bufferAlloc(ixy_tmp.info.dims[3] * ixy_tmp.info.strides[3] * sizeof(convAccT));
     iyy_tmp.data = bufferAlloc(iyy_tmp.info.dims[3] * iyy_tmp.info.strides[3] * sizeof(convAccT));
 
-    convolve2<T, convAccT, 0, false, fLen>(ixx_tmp, ixx, filter);
-    convolve2<T, convAccT, 1, false, fLen>(ixx, ixx_tmp, filter);
-    convolve2<T, convAccT, 0, false, fLen>(ixy_tmp, ixy, filter);
-    convolve2<T, convAccT, 1, false, fLen>(ixy, ixy_tmp, filter);
-    convolve2<T, convAccT, 0, false, fLen>(iyy_tmp, iyy, filter);
-    convolve2<T, convAccT, 1, false, fLen>(iyy, iyy_tmp, filter);
+    convSep<T, convAccT, 0, false>(ixx_tmp, ixx, filter);
+    convSep<T, convAccT, 1, false>(ixx, ixx_tmp, filter);
+    convSep<T, convAccT, 0, false>(ixy_tmp, ixy, filter);
+    convSep<T, convAccT, 1, false>(ixy, ixy_tmp, filter);
+    convSep<T, convAccT, 0, false>(iyy_tmp, iyy, filter);
+    convSep<T, convAccT, 1, false>(iyy, iyy_tmp, filter);
 
     bufferFree(ixx_tmp.data);
     bufferFree(ixy_tmp.data);
@@ -195,38 +195,7 @@ void harris(unsigned* corners_out,
         bufferFree(iy.data);
 
         // Convolve second order derivatives with proper window filter
-        switch (filter_len) {
-            case 3:  conv_helper<T, convAccT, 3 >(ixx, ixy, iyy, filter); break;
-            case 4:  conv_helper<T, convAccT, 4 >(ixx, ixy, iyy, filter); break;
-            case 5:  conv_helper<T, convAccT, 5 >(ixx, ixy, iyy, filter); break;
-            case 6:  conv_helper<T, convAccT, 6 >(ixx, ixy, iyy, filter); break;
-            case 7:  conv_helper<T, convAccT, 7 >(ixx, ixy, iyy, filter); break;
-            case 8:  conv_helper<T, convAccT, 8 >(ixx, ixy, iyy, filter); break;
-            case 9:  conv_helper<T, convAccT, 9 >(ixx, ixy, iyy, filter); break;
-            case 10: conv_helper<T, convAccT, 10>(ixx, ixy, iyy, filter); break;
-            case 11: conv_helper<T, convAccT, 11>(ixx, ixy, iyy, filter); break;
-            case 12: conv_helper<T, convAccT, 12>(ixx, ixy, iyy, filter); break;
-            case 13: conv_helper<T, convAccT, 13>(ixx, ixy, iyy, filter); break;
-            case 14: conv_helper<T, convAccT, 14>(ixx, ixy, iyy, filter); break;
-            case 15: conv_helper<T, convAccT, 15>(ixx, ixy, iyy, filter); break;
-            case 16: conv_helper<T, convAccT, 16>(ixx, ixy, iyy, filter); break;
-            case 17: conv_helper<T, convAccT, 17>(ixx, ixy, iyy, filter); break;
-            case 18: conv_helper<T, convAccT, 18>(ixx, ixy, iyy, filter); break;
-            case 19: conv_helper<T, convAccT, 19>(ixx, ixy, iyy, filter); break;
-            case 20: conv_helper<T, convAccT, 20>(ixx, ixy, iyy, filter); break;
-            case 21: conv_helper<T, convAccT, 21>(ixx, ixy, iyy, filter); break;
-            case 22: conv_helper<T, convAccT, 22>(ixx, ixy, iyy, filter); break;
-            case 23: conv_helper<T, convAccT, 23>(ixx, ixy, iyy, filter); break;
-            case 24: conv_helper<T, convAccT, 24>(ixx, ixy, iyy, filter); break;
-            case 25: conv_helper<T, convAccT, 25>(ixx, ixy, iyy, filter); break;
-            case 26: conv_helper<T, convAccT, 26>(ixx, ixy, iyy, filter); break;
-            case 27: conv_helper<T, convAccT, 27>(ixx, ixy, iyy, filter); break;
-            case 28: conv_helper<T, convAccT, 28>(ixx, ixy, iyy, filter); break;
-            case 29: conv_helper<T, convAccT, 29>(ixx, ixy, iyy, filter); break;
-            case 30: conv_helper<T, convAccT, 30>(ixx, ixy, iyy, filter); break;
-            case 31: conv_helper<T, convAccT, 31>(ixx, ixy, iyy, filter); break;
-        }
-
+        conv_helper<T, convAccT>(ixx, ixy, iyy, filter);
         bufferFree(filter.data);
 
         cl::Buffer *d_responses = bufferAlloc(in.info.dims[3] * in.info.strides[3] * sizeof(T));
diff --git a/src/backend/opencl/kernel/orb.hpp b/src/backend/opencl/kernel/orb.hpp
index 36a16c3..0c662ba 100644
--- a/src/backend/opencl/kernel/orb.hpp
+++ b/src/backend/opencl/kernel/orb.hpp
@@ -360,8 +360,8 @@ void orb(unsigned* out_feat,
                 }
 
                 // Filter level image with Gaussian kernel to reduce noise sensitivity
-                convolve2<T, convAccT, 0, false, gauss_len>(lvl_tmp, lvl_img, gauss_filter);
-                convolve2<T, convAccT, 1, false, gauss_len>(lvl_filt, lvl_tmp, gauss_filter);
+                convSep<T, convAccT, 0, false>(lvl_tmp, lvl_img, gauss_filter);
+                convSep<T, convAccT, 1, false>(lvl_filt, lvl_tmp, gauss_filter);
 
                 bufferFree(lvl_tmp.data);
             }
diff --git a/src/backend/opencl/kernel/sift.hpp b/src/backend/opencl/kernel/sift.hpp
index 5fcead5..eaca91d 100644
--- a/src/backend/opencl/kernel/sift.hpp
+++ b/src/backend/opencl/kernel/sift.hpp
@@ -172,7 +172,7 @@ Param gaussFilter(float sigma)
 }
 
 template<typename T, typename convAccT>
-void conv2HelperFull(Param& dst, Param src, Param filter)
+void convSepFull(Param& dst, Param src, Param filter)
 {
     Param tmp;
     tmp.info.offset = 0;
@@ -185,8 +185,7 @@ void conv2HelperFull(Param& dst, Param src, Param filter)
     tmp.data = bufferAlloc(src_el * sizeof(T));
 
     const dim_t fLen = filter.info.dims[0];
-    conv2Helper<T, convAccT, 0, false>(tmp, src, filter, fLen);
-    conv2Helper<T, convAccT, 1, false>(dst, tmp, filter, fLen);
+    convSep<T, convAccT, 0, false>(tmp, src, filter);
 
     bufferFree(tmp.data);
 }
@@ -220,7 +219,7 @@ Param createInitialImage(
     if (double_input)
         resize<T, AF_INTERP_BILINEAR>(init_img, img);
 
-    conv2HelperFull<T, convAccT>(init_img, (double_input) ? init_img : img, filter);
+    convSepFull<T, convAccT>(init_img, (double_input) ? init_img : img, filter);
 
     bufferFree(filter.data);
 
@@ -301,7 +300,7 @@ std::vector<Param> buildGaussPyr(
 
                 Param filter = gaussFilter<convAccT>(sig_layers[l]);
 
-                conv2HelperFull<T, convAccT>(tmp_pyr[idx], tmp_pyr[src_idx], filter);
+                convSepFull<T, convAccT>(tmp_pyr[idx], tmp_pyr[src_idx], filter);
 
                 bufferFree(filter.data);
             }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list