[arrayfire] 325/408: Templated options are now runtime compile options for opencl nearest neighbor

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:22 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 39f99774fbea082a0f4cbd104bfb7215729b56d8
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Mon Aug 24 07:00:56 2015 -0400

    Templated options are now runtime compile options for opencl nearest neighbor
---
 src/backend/opencl/kernel/nearest_neighbour.hpp | 55 +++++++++++++++++--------
 src/backend/opencl/nearest_neighbour.cpp        | 54 +-----------------------
 2 files changed, 39 insertions(+), 70 deletions(-)

diff --git a/src/backend/opencl/kernel/nearest_neighbour.hpp b/src/backend/opencl/kernel/nearest_neighbour.hpp
index 5e021f9..d7c800d 100644
--- a/src/backend/opencl/kernel/nearest_neighbour.hpp
+++ b/src/backend/opencl/kernel/nearest_neighbour.hpp
@@ -15,6 +15,8 @@
 #include <kernel_headers/nearest_neighbour.hpp>
 #include <memory.hpp>
 #include <math.hpp>
+#include <dispatch.hpp>
+#include <cache.hpp>
 
 using cl::LocalSpaceArg;
 
@@ -26,7 +28,7 @@ namespace kernel
 
 static const unsigned THREADS = 256;
 
-template<typename T, typename To, af_match_type dist_type, bool use_lmem, unsigned unroll_len>
+template<typename T, typename To, af_match_type dist_type, bool use_lmem>
 void nearest_neighbour(Param idx,
                        Param dist,
                        Param query,
@@ -36,14 +38,6 @@ void nearest_neighbour(Param idx,
                        const size_t lmem_sz)
 {
     try {
-        static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
-        static Program        nearest_neighbourProgs[DeviceManager::MAX_DEVICES];
-        static Kernel             huKernel[DeviceManager::MAX_DEVICES];
-        static Kernel             hmKernel[DeviceManager::MAX_DEVICES];
-        static Kernel             smKernel[DeviceManager::MAX_DEVICES];
-
-        int device = getActiveDeviceId();
-
         const unsigned feat_len = query.info.dims[dist_dim];
         const To max_dist = limit_max<To>();
 
@@ -51,7 +45,24 @@ void nearest_neighbour(Param idx,
             OPENCL_NOT_SUPPORTED();
         }
 
-        std::call_once( compileFlags[device], [device] () {
+        unsigned unroll_len = nextpow2(feat_len);
+        if (unroll_len != feat_len) unroll_len = 0;
+
+        std::string ref_name =
+            std::string("knn_") +
+            std::to_string(dist_type) +
+            std::string("_") +
+            std::to_string(use_lmem) +
+            std::string("_") +
+            std::string(dtype_traits<T>::getName()) +
+            std::string("_") +
+            std::to_string(unroll_len);
+
+        int device = getActiveDeviceId();
+        kc_t::iterator cache_idx = kernelCaches[device].find(ref_name);
+
+        kc_entry_t entry;
+        if (cache_idx == kernelCaches[device].end()) {
 
                 std::ostringstream options;
                 options << " -D T=" << dtype_traits<T>::getName()
@@ -75,15 +86,23 @@ void nearest_neighbour(Param idx,
                 if (use_lmem)
                     options << " -D USE_LOCAL_MEM";
 
-                buildProgram(nearest_neighbourProgs[device],
+                cl::Program prog;
+                buildProgram(prog,
                              nearest_neighbour_cl,
                              nearest_neighbour_cl_len,
                              options.str());
 
-                huKernel[device] = Kernel(nearest_neighbourProgs[device], "nearest_neighbour_unroll");
-                hmKernel[device] = Kernel(nearest_neighbourProgs[device], "nearest_neighbour");
-                smKernel[device] = Kernel(nearest_neighbourProgs[device], "select_matches");
-            });
+                entry.prog = new Program(prog);
+                entry.ker = new Kernel[3];
+
+                entry.ker[0] = Kernel(*entry.prog, "nearest_neighbour_unroll");
+                entry.ker[1] = Kernel(*entry.prog, "nearest_neighbour");
+                entry.ker[2] = Kernel(*entry.prog, "select_matches");
+
+                kernelCaches[device][ref_name] = entry;
+        } else {
+            entry = cache_idx->second;
+        }
 
         const dim_t sample_dim = (dist_dim == 0) ? 1 : 0;
 
@@ -104,7 +123,7 @@ void nearest_neighbour(Param idx,
                                     Buffer, KParam,
                                     Buffer, KParam,
                                     const To,
-                                    LocalSpaceArg> (huKernel[device]);
+                                    LocalSpaceArg> (entry.ker[0]);
 
             huOp(EnqueueArgs(getQueue(), global, local),
                  *d_blk_idx, *d_blk_dist,
@@ -116,7 +135,7 @@ void nearest_neighbour(Param idx,
                                     Buffer, KParam,
                                     Buffer, KParam,
                                     const To, const unsigned,
-                                    LocalSpaceArg> (hmKernel[device]);
+                                    LocalSpaceArg> (entry.ker[1]);
 
             hmOp(EnqueueArgs(getQueue(), global, local),
                  *d_blk_idx, *d_blk_dist,
@@ -132,7 +151,7 @@ void nearest_neighbour(Param idx,
         // best match
         auto smOp = make_kernel<Buffer, Buffer, Buffer, Buffer,
                                 const unsigned, const unsigned,
-                                const To> (smKernel[device]);
+                                const To> (entry.ker[2]);
 
         smOp(EnqueueArgs(getQueue(), global_sm, local_sm),
              *idx.data, *dist.data,
diff --git a/src/backend/opencl/nearest_neighbour.cpp b/src/backend/opencl/nearest_neighbour.cpp
index cd86b7f..b2cb142 100644
--- a/src/backend/opencl/nearest_neighbour.cpp
+++ b/src/backend/opencl/nearest_neighbour.cpp
@@ -76,59 +76,9 @@ void nearest_neighbour_(Array<uint>& idx, Array<To>& dist,
     }
 
     if (use_lmem) {
-        switch (feat_len) {
-        case 1:
-            kernel::nearest_neighbour<T, To, dist_type, true , 1 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 2:
-            kernel::nearest_neighbour<T, To, dist_type, true , 2 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 4:
-            kernel::nearest_neighbour<T, To, dist_type, true , 4 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 8:
-            kernel::nearest_neighbour<T, To, dist_type, true , 8 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 16:
-            kernel::nearest_neighbour<T, To, dist_type, true , 16>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 32:
-            kernel::nearest_neighbour<T, To, dist_type, true , 32>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 64:
-            kernel::nearest_neighbour<T, To, dist_type, true , 64>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        default:
-            kernel::nearest_neighbour<T, To, dist_type, true , 0 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        }
+        kernel::nearest_neighbour<T, To, dist_type, true >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
     } else {
-        switch (feat_len) {
-        case 1:
-            kernel::nearest_neighbour<T, To, dist_type, false, 1 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 2:
-            kernel::nearest_neighbour<T, To, dist_type, false, 2 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 4:
-            kernel::nearest_neighbour<T, To, dist_type, false, 4 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 8:
-            kernel::nearest_neighbour<T, To, dist_type, false, 8 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 16:
-            kernel::nearest_neighbour<T, To, dist_type, false, 16>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 32:
-            kernel::nearest_neighbour<T, To, dist_type, false, 32>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        case 64:
-            kernel::nearest_neighbour<T, To, dist_type, false, 64>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        default:
-            kernel::nearest_neighbour<T, To, dist_type, false, 0 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-            break;
-        }
+        kernel::nearest_neighbour<T, To, dist_type, false>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
     }
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list