[arrayfire] 327/408: Templated options are now runtime compile options for opencl FAST

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:23 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 2c00e646364fec97a85765f48bef89064313202f
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Mon Aug 24 07:17:28 2015 -0400

    Templated options are now runtime compile options for opencl FAST
---
 src/backend/opencl/kernel/fast.hpp | 98 ++++++++++++--------------------------
 src/backend/opencl/kernel/orb.hpp  |  4 +-
 2 files changed, 33 insertions(+), 69 deletions(-)

diff --git a/src/backend/opencl/kernel/fast.hpp b/src/backend/opencl/kernel/fast.hpp
index 68cb767..fcc5a6c 100644
--- a/src/backend/opencl/kernel/fast.hpp
+++ b/src/backend/opencl/kernel/fast.hpp
@@ -12,6 +12,7 @@
 #include <dispatch.hpp>
 #include <err_opencl.hpp>
 #include <debug_opencl.hpp>
+#include <cache.hpp>
 #include <kernel_headers/fast.hpp>
 #include <memory.hpp>
 #include <map>
@@ -34,8 +35,9 @@ static const int FAST_THREADS_Y = 16;
 static const int FAST_THREADS_NONMAX_X = 32;
 static const int FAST_THREADS_NONMAX_Y = 8;
 
-template<typename T, const unsigned arc_length, const bool nonmax>
-void fast(unsigned* out_feat,
+template<typename T, const bool nonmax>
+void fast(const unsigned arc_length,
+          unsigned* out_feat,
           Param &x_out,
           Param &y_out,
           Param &score_out,
@@ -45,15 +47,19 @@ void fast(unsigned* out_feat,
           const unsigned edge)
 {
     try {
-        static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
-        static std::map<int, Program*> fastProgs;
-        static std::map<int, Kernel*>  lfKernel;
-        static std::map<int, Kernel*>  nmKernel;
-        static std::map<int, Kernel*>  gfKernel;
+        std::string ref_name =
+            std::string("fast_") +
+            std::to_string(arc_length) +
+            std::string("_") +
+            std::to_string(nonmax) +
+            std::string("_") +
+            std::string(dtype_traits<T>::getName());
 
         int device = getActiveDeviceId();
+        kc_t::iterator cache_idx = kernelCaches[device].find(ref_name);
 
-        std::call_once( compileFlags[device], [device] () {
+        kc_entry_t entry;
+        if (cache_idx == kernelCaches[device].end()) {
 
                 std::ostringstream options;
                 options << " -D T=" << dtype_traits<T>::getName()
@@ -67,12 +73,17 @@ void fast(unsigned* out_feat,
 
                 cl::Program prog;
                 buildProgram(prog, fast_cl, fast_cl_len, options.str());
-                fastProgs[device] = new Program(prog);
+                entry.prog = new Program(prog);
+                entry.ker = new Kernel[3];
 
-                lfKernel[device] = new Kernel(*fastProgs[device], "locate_features");
-                nmKernel[device] = new Kernel(*fastProgs[device], "non_max_counts");
-                gfKernel[device] = new Kernel(*fastProgs[device], "get_features");
-            });
+                entry.ker[0] = Kernel(*entry.prog, "locate_features");
+                entry.ker[1] = Kernel(*entry.prog, "non_max_counts");
+                entry.ker[2] = Kernel(*entry.prog, "get_features");
+
+                kernelCaches[device][ref_name] = entry;
+        } else {
+            entry = cache_idx -> second;
+        }
 
         const unsigned max_feat = ceil(in.info.dims[0] * in.info.dims[1] * feature_ratio);
 
@@ -96,7 +107,7 @@ void fast(unsigned* out_feat,
 
         auto lfOp = make_kernel<Buffer, KParam,
                                 Buffer, const float, const unsigned,
-                                LocalSpaceArg> (*lfKernel[device]);
+                                LocalSpaceArg> (entry.ker[0]);
 
         lfOp(EnqueueArgs(getQueue(), global, local),
              *in.data, in.info, *d_score, thr, edge,
@@ -121,7 +132,7 @@ void fast(unsigned* out_feat,
 
         auto nmOp = make_kernel<Buffer, Buffer, Buffer,
                                 Buffer, Buffer,
-                                KParam, const unsigned> (*nmKernel[device]);
+                                KParam, const unsigned> (entry.ker[1]);
         nmOp(EnqueueArgs(getQueue(), global_nonmax, local_nonmax),
                          *d_counts, *d_offsets, *d_total, *d_flags, *d_score, in.info, edge);
         CL_DEBUG_FINISH(getQueue());
@@ -139,7 +150,7 @@ void fast(unsigned* out_feat,
             auto gfOp = make_kernel<Buffer, Buffer, Buffer,
                                     Buffer, Buffer, Buffer,
                                     KParam, const unsigned,
-                                    const unsigned> (*gfKernel[device]);
+                                    const unsigned> (entry.ker[2]);
             gfOp(EnqueueArgs(getQueue(), global_nonmax, local_nonmax),
                              *x_out.data, *y_out.data, *score_out.data,
                              *d_flags, *d_counts, *d_offsets,
@@ -176,53 +187,6 @@ void fast(unsigned* out_feat,
     }
 }
 
-template<typename T, bool nonmax>
-void fast_dispatch_nonmax(const unsigned arc_length,
-                          unsigned* out_feat,
-                          Param &x_out,
-                          Param &y_out,
-                          Param &score_out,
-                          Param in,
-                          const float thr,
-                          const float feature_ratio,
-                          const unsigned edge)
-{
-    switch (arc_length) {
-    case 9:
-        fast<T,  9, nonmax>(out_feat, x_out, y_out, score_out, in,
-                            thr, feature_ratio, edge);
-        break;
-    case 10:
-        fast<T, 10, nonmax>(out_feat, x_out, y_out, score_out, in,
-                            thr, feature_ratio, edge);
-        break;
-    case 11:
-        fast<T, 11, nonmax>(out_feat, x_out, y_out, score_out, in,
-                            thr, feature_ratio, edge);
-        break;
-    case 12:
-        fast<T, 12, nonmax>(out_feat, x_out, y_out, score_out, in,
-                            thr, feature_ratio, edge);
-        break;
-    case 13:
-        fast<T, 13, nonmax>(out_feat, x_out, y_out, score_out, in,
-                            thr, feature_ratio, edge);
-        break;
-    case 14:
-        fast<T, 14, nonmax>(out_feat, x_out, y_out, score_out, in,
-                            thr, feature_ratio, edge);
-        break;
-    case 15:
-        fast<T, 15, nonmax>(out_feat, x_out, y_out, score_out, in,
-                            thr, feature_ratio, edge);
-        break;
-    case 16:
-        fast<T, 16, nonmax>(out_feat, x_out, y_out, score_out, in,
-                            thr, feature_ratio, edge);
-        break;
-    }
-}
-
 template<typename T>
 void fast_dispatch(const unsigned arc_length, const bool nonmax,
                    unsigned* out_feat,
@@ -235,11 +199,11 @@ void fast_dispatch(const unsigned arc_length, const bool nonmax,
                    const unsigned edge)
 {
     if (!nonmax) {
-        fast_dispatch_nonmax<T, 0>(arc_length, out_feat, x_out, y_out, score_out, in,
-                                   thr, feature_ratio, edge);
+        fast<T, 0>(arc_length, out_feat, x_out, y_out, score_out, in,
+                   thr, feature_ratio, edge);
     } else {
-        fast_dispatch_nonmax<T, 1>(arc_length, out_feat, x_out, y_out, score_out, in,
-                                   thr, feature_ratio, edge);
+        fast<T, 1>(arc_length, out_feat, x_out, y_out, score_out, in,
+                   thr, feature_ratio, edge);
     }
 }
 
diff --git a/src/backend/opencl/kernel/orb.hpp b/src/backend/opencl/kernel/orb.hpp
index 0c662ba..71d6ae0 100644
--- a/src/backend/opencl/kernel/orb.hpp
+++ b/src/backend/opencl/kernel/orb.hpp
@@ -201,8 +201,8 @@ void orb(unsigned* out_feat,
             unsigned edge = ceil(size * sqrt(2.f) / 2.f);
 
             // Detect FAST features
-            fast<T, 9, true>(&lvl_feat, d_x_feat, d_y_feat, d_score_feat,
-                             lvl_img, fast_thr, 0.15f, edge);
+            fast<T, true>(9, &lvl_feat, d_x_feat, d_y_feat, d_score_feat,
+                          lvl_img, fast_thr, 0.15f, edge);
 
             if (lvl_feat == 0) {
                 feat_pyr[i] = 0;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list