[arrayfire] 327/408: Templated options are now runtime compile options for opencl FAST
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:23 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 2c00e646364fec97a85765f48bef89064313202f
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Mon Aug 24 07:17:28 2015 -0400
Templated options are now runtime compile options for opencl FAST
---
src/backend/opencl/kernel/fast.hpp | 98 ++++++++++++--------------------------
src/backend/opencl/kernel/orb.hpp | 4 +-
2 files changed, 33 insertions(+), 69 deletions(-)
diff --git a/src/backend/opencl/kernel/fast.hpp b/src/backend/opencl/kernel/fast.hpp
index 68cb767..fcc5a6c 100644
--- a/src/backend/opencl/kernel/fast.hpp
+++ b/src/backend/opencl/kernel/fast.hpp
@@ -12,6 +12,7 @@
#include <dispatch.hpp>
#include <err_opencl.hpp>
#include <debug_opencl.hpp>
+#include <cache.hpp>
#include <kernel_headers/fast.hpp>
#include <memory.hpp>
#include <map>
@@ -34,8 +35,9 @@ static const int FAST_THREADS_Y = 16;
static const int FAST_THREADS_NONMAX_X = 32;
static const int FAST_THREADS_NONMAX_Y = 8;
-template<typename T, const unsigned arc_length, const bool nonmax>
-void fast(unsigned* out_feat,
+template<typename T, const bool nonmax>
+void fast(const unsigned arc_length,
+ unsigned* out_feat,
Param &x_out,
Param &y_out,
Param &score_out,
@@ -45,15 +47,19 @@ void fast(unsigned* out_feat,
const unsigned edge)
{
try {
- static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
- static std::map<int, Program*> fastProgs;
- static std::map<int, Kernel*> lfKernel;
- static std::map<int, Kernel*> nmKernel;
- static std::map<int, Kernel*> gfKernel;
+ std::string ref_name =
+ std::string("fast_") +
+ std::to_string(arc_length) +
+ std::string("_") +
+ std::to_string(nonmax) +
+ std::string("_") +
+ std::string(dtype_traits<T>::getName());
int device = getActiveDeviceId();
+ kc_t::iterator cache_idx = kernelCaches[device].find(ref_name);
- std::call_once( compileFlags[device], [device] () {
+ kc_entry_t entry;
+ if (cache_idx == kernelCaches[device].end()) {
std::ostringstream options;
options << " -D T=" << dtype_traits<T>::getName()
@@ -67,12 +73,17 @@ void fast(unsigned* out_feat,
cl::Program prog;
buildProgram(prog, fast_cl, fast_cl_len, options.str());
- fastProgs[device] = new Program(prog);
+ entry.prog = new Program(prog);
+ entry.ker = new Kernel[3];
- lfKernel[device] = new Kernel(*fastProgs[device], "locate_features");
- nmKernel[device] = new Kernel(*fastProgs[device], "non_max_counts");
- gfKernel[device] = new Kernel(*fastProgs[device], "get_features");
- });
+ entry.ker[0] = Kernel(*entry.prog, "locate_features");
+ entry.ker[1] = Kernel(*entry.prog, "non_max_counts");
+ entry.ker[2] = Kernel(*entry.prog, "get_features");
+
+ kernelCaches[device][ref_name] = entry;
+ } else {
+ entry = cache_idx -> second;
+ }
const unsigned max_feat = ceil(in.info.dims[0] * in.info.dims[1] * feature_ratio);
@@ -96,7 +107,7 @@ void fast(unsigned* out_feat,
auto lfOp = make_kernel<Buffer, KParam,
Buffer, const float, const unsigned,
- LocalSpaceArg> (*lfKernel[device]);
+ LocalSpaceArg> (entry.ker[0]);
lfOp(EnqueueArgs(getQueue(), global, local),
*in.data, in.info, *d_score, thr, edge,
@@ -121,7 +132,7 @@ void fast(unsigned* out_feat,
auto nmOp = make_kernel<Buffer, Buffer, Buffer,
Buffer, Buffer,
- KParam, const unsigned> (*nmKernel[device]);
+ KParam, const unsigned> (entry.ker[1]);
nmOp(EnqueueArgs(getQueue(), global_nonmax, local_nonmax),
*d_counts, *d_offsets, *d_total, *d_flags, *d_score, in.info, edge);
CL_DEBUG_FINISH(getQueue());
@@ -139,7 +150,7 @@ void fast(unsigned* out_feat,
auto gfOp = make_kernel<Buffer, Buffer, Buffer,
Buffer, Buffer, Buffer,
KParam, const unsigned,
- const unsigned> (*gfKernel[device]);
+ const unsigned> (entry.ker[2]);
gfOp(EnqueueArgs(getQueue(), global_nonmax, local_nonmax),
*x_out.data, *y_out.data, *score_out.data,
*d_flags, *d_counts, *d_offsets,
@@ -176,53 +187,6 @@ void fast(unsigned* out_feat,
}
}
-template<typename T, bool nonmax>
-void fast_dispatch_nonmax(const unsigned arc_length,
- unsigned* out_feat,
- Param &x_out,
- Param &y_out,
- Param &score_out,
- Param in,
- const float thr,
- const float feature_ratio,
- const unsigned edge)
-{
- switch (arc_length) {
- case 9:
- fast<T, 9, nonmax>(out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
- break;
- case 10:
- fast<T, 10, nonmax>(out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
- break;
- case 11:
- fast<T, 11, nonmax>(out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
- break;
- case 12:
- fast<T, 12, nonmax>(out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
- break;
- case 13:
- fast<T, 13, nonmax>(out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
- break;
- case 14:
- fast<T, 14, nonmax>(out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
- break;
- case 15:
- fast<T, 15, nonmax>(out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
- break;
- case 16:
- fast<T, 16, nonmax>(out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
- break;
- }
-}
-
template<typename T>
void fast_dispatch(const unsigned arc_length, const bool nonmax,
unsigned* out_feat,
@@ -235,11 +199,11 @@ void fast_dispatch(const unsigned arc_length, const bool nonmax,
const unsigned edge)
{
if (!nonmax) {
- fast_dispatch_nonmax<T, 0>(arc_length, out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
+ fast<T, 0>(arc_length, out_feat, x_out, y_out, score_out, in,
+ thr, feature_ratio, edge);
} else {
- fast_dispatch_nonmax<T, 1>(arc_length, out_feat, x_out, y_out, score_out, in,
- thr, feature_ratio, edge);
+ fast<T, 1>(arc_length, out_feat, x_out, y_out, score_out, in,
+ thr, feature_ratio, edge);
}
}
diff --git a/src/backend/opencl/kernel/orb.hpp b/src/backend/opencl/kernel/orb.hpp
index 0c662ba..71d6ae0 100644
--- a/src/backend/opencl/kernel/orb.hpp
+++ b/src/backend/opencl/kernel/orb.hpp
@@ -201,8 +201,8 @@ void orb(unsigned* out_feat,
unsigned edge = ceil(size * sqrt(2.f) / 2.f);
// Detect FAST features
- fast<T, 9, true>(&lvl_feat, d_x_feat, d_y_feat, d_score_feat,
- lvl_img, fast_thr, 0.15f, edge);
+ fast<T, true>(9, &lvl_feat, d_x_feat, d_y_feat, d_score_feat,
+ lvl_img, fast_thr, 0.15f, edge);
if (lvl_feat == 0) {
feat_pyr[i] = 0;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list