[arrayfire] 325/408: Templated options are now runtime compile options for opencl nearest neighbor
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:22 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 39f99774fbea082a0f4cbd104bfb7215729b56d8
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Mon Aug 24 07:00:56 2015 -0400
Templated options are now runtime compile options for opencl nearest neighbor
---
src/backend/opencl/kernel/nearest_neighbour.hpp | 55 +++++++++++++++++--------
src/backend/opencl/nearest_neighbour.cpp | 54 +-----------------------
2 files changed, 39 insertions(+), 70 deletions(-)
diff --git a/src/backend/opencl/kernel/nearest_neighbour.hpp b/src/backend/opencl/kernel/nearest_neighbour.hpp
index 5e021f9..d7c800d 100644
--- a/src/backend/opencl/kernel/nearest_neighbour.hpp
+++ b/src/backend/opencl/kernel/nearest_neighbour.hpp
@@ -15,6 +15,8 @@
#include <kernel_headers/nearest_neighbour.hpp>
#include <memory.hpp>
#include <math.hpp>
+#include <dispatch.hpp>
+#include <cache.hpp>
using cl::LocalSpaceArg;
@@ -26,7 +28,7 @@ namespace kernel
static const unsigned THREADS = 256;
-template<typename T, typename To, af_match_type dist_type, bool use_lmem, unsigned unroll_len>
+template<typename T, typename To, af_match_type dist_type, bool use_lmem>
void nearest_neighbour(Param idx,
Param dist,
Param query,
@@ -36,14 +38,6 @@ void nearest_neighbour(Param idx,
const size_t lmem_sz)
{
try {
- static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
- static Program nearest_neighbourProgs[DeviceManager::MAX_DEVICES];
- static Kernel huKernel[DeviceManager::MAX_DEVICES];
- static Kernel hmKernel[DeviceManager::MAX_DEVICES];
- static Kernel smKernel[DeviceManager::MAX_DEVICES];
-
- int device = getActiveDeviceId();
-
const unsigned feat_len = query.info.dims[dist_dim];
const To max_dist = limit_max<To>();
@@ -51,7 +45,24 @@ void nearest_neighbour(Param idx,
OPENCL_NOT_SUPPORTED();
}
- std::call_once( compileFlags[device], [device] () {
+ unsigned unroll_len = nextpow2(feat_len);
+ if (unroll_len != feat_len) unroll_len = 0;
+
+ std::string ref_name =
+ std::string("knn_") +
+ std::to_string(dist_type) +
+ std::string("_") +
+ std::to_string(use_lmem) +
+ std::string("_") +
+ std::string(dtype_traits<T>::getName()) +
+ std::string("_") +
+ std::to_string(unroll_len);
+
+ int device = getActiveDeviceId();
+ kc_t::iterator cache_idx = kernelCaches[device].find(ref_name);
+
+ kc_entry_t entry;
+ if (cache_idx == kernelCaches[device].end()) {
std::ostringstream options;
options << " -D T=" << dtype_traits<T>::getName()
@@ -75,15 +86,23 @@ void nearest_neighbour(Param idx,
if (use_lmem)
options << " -D USE_LOCAL_MEM";
- buildProgram(nearest_neighbourProgs[device],
+ cl::Program prog;
+ buildProgram(prog,
nearest_neighbour_cl,
nearest_neighbour_cl_len,
options.str());
- huKernel[device] = Kernel(nearest_neighbourProgs[device], "nearest_neighbour_unroll");
- hmKernel[device] = Kernel(nearest_neighbourProgs[device], "nearest_neighbour");
- smKernel[device] = Kernel(nearest_neighbourProgs[device], "select_matches");
- });
+ entry.prog = new Program(prog);
+ entry.ker = new Kernel[3];
+
+ entry.ker[0] = Kernel(*entry.prog, "nearest_neighbour_unroll");
+ entry.ker[1] = Kernel(*entry.prog, "nearest_neighbour");
+ entry.ker[2] = Kernel(*entry.prog, "select_matches");
+
+ kernelCaches[device][ref_name] = entry;
+ } else {
+ entry = cache_idx->second;
+ }
const dim_t sample_dim = (dist_dim == 0) ? 1 : 0;
@@ -104,7 +123,7 @@ void nearest_neighbour(Param idx,
Buffer, KParam,
Buffer, KParam,
const To,
- LocalSpaceArg> (huKernel[device]);
+ LocalSpaceArg> (entry.ker[0]);
huOp(EnqueueArgs(getQueue(), global, local),
*d_blk_idx, *d_blk_dist,
@@ -116,7 +135,7 @@ void nearest_neighbour(Param idx,
Buffer, KParam,
Buffer, KParam,
const To, const unsigned,
- LocalSpaceArg> (hmKernel[device]);
+ LocalSpaceArg> (entry.ker[1]);
hmOp(EnqueueArgs(getQueue(), global, local),
*d_blk_idx, *d_blk_dist,
@@ -132,7 +151,7 @@ void nearest_neighbour(Param idx,
// best match
auto smOp = make_kernel<Buffer, Buffer, Buffer, Buffer,
const unsigned, const unsigned,
- const To> (smKernel[device]);
+ const To> (entry.ker[2]);
smOp(EnqueueArgs(getQueue(), global_sm, local_sm),
*idx.data, *dist.data,
diff --git a/src/backend/opencl/nearest_neighbour.cpp b/src/backend/opencl/nearest_neighbour.cpp
index cd86b7f..b2cb142 100644
--- a/src/backend/opencl/nearest_neighbour.cpp
+++ b/src/backend/opencl/nearest_neighbour.cpp
@@ -76,59 +76,9 @@ void nearest_neighbour_(Array<uint>& idx, Array<To>& dist,
}
if (use_lmem) {
- switch (feat_len) {
- case 1:
- kernel::nearest_neighbour<T, To, dist_type, true , 1 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 2:
- kernel::nearest_neighbour<T, To, dist_type, true , 2 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 4:
- kernel::nearest_neighbour<T, To, dist_type, true , 4 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 8:
- kernel::nearest_neighbour<T, To, dist_type, true , 8 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 16:
- kernel::nearest_neighbour<T, To, dist_type, true , 16>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 32:
- kernel::nearest_neighbour<T, To, dist_type, true , 32>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 64:
- kernel::nearest_neighbour<T, To, dist_type, true , 64>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- default:
- kernel::nearest_neighbour<T, To, dist_type, true , 0 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- }
+ kernel::nearest_neighbour<T, To, dist_type, true >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
} else {
- switch (feat_len) {
- case 1:
- kernel::nearest_neighbour<T, To, dist_type, false, 1 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 2:
- kernel::nearest_neighbour<T, To, dist_type, false, 2 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 4:
- kernel::nearest_neighbour<T, To, dist_type, false, 4 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 8:
- kernel::nearest_neighbour<T, To, dist_type, false, 8 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 16:
- kernel::nearest_neighbour<T, To, dist_type, false, 16>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 32:
- kernel::nearest_neighbour<T, To, dist_type, false, 32>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- case 64:
- kernel::nearest_neighbour<T, To, dist_type, false, 64>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- default:
- kernel::nearest_neighbour<T, To, dist_type, false, 0 >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
- break;
- }
+ kernel::nearest_neighbour<T, To, dist_type, false>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
}
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list