[arrayfire] 286/408: FEAT: wrap for OpenCL backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:15 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 2e05dd4738430932417c75e0956ebf9d44357b9f
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Wed Aug 19 02:49:35 2015 -0400
FEAT: wrap for OpenCL backend
---
src/backend/opencl/kernel/wrap.cl | 74 ++++++++++++++++++++++++
src/backend/opencl/kernel/wrap.hpp | 112 +++++++++++++++++++++++++++++++++++++
src/backend/opencl/wrap.cpp | 8 ++-
3 files changed, 193 insertions(+), 1 deletion(-)
diff --git a/src/backend/opencl/kernel/wrap.cl b/src/backend/opencl/kernel/wrap.cl
new file mode 100644
index 0000000..8171f9e
--- /dev/null
+++ b/src/backend/opencl/kernel/wrap.cl
@@ -0,0 +1,74 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+__kernel
+void wrap_kernel(__global T *optr, KParam out,
+ __global T *iptr, KParam in,
+ const dim_t wx, const dim_t wy,
+ const dim_t sx, const dim_t sy,
+ const dim_t px, const dim_t py,
+ const dim_t nx, const dim_t ny,
+ dim_t groups_x,
+ dim_t groups_y)
+{
+ dim_t idx2 = get_group_id(0) / groups_x;
+ dim_t idx3 = get_group_id(1) / groups_y;
+
+ dim_t groupId_x = get_group_id(0) - idx2 * groups_x;
+ dim_t groupId_y = get_group_id(1) - idx3 * groups_y;
+
+ dim_t oidx0 = get_local_id(0) + get_local_size(0) * groupId_x;
+ dim_t oidx1 = get_local_id(1) + get_local_size(1) * groupId_y;
+
+ optr += idx2 * out.strides[2] + idx3 * out.strides[3];
+ iptr += idx2 * in.strides[2] + idx3 * in.strides[3];
+
+
+ if (oidx0 >= out.dims[0] || oidx1 >= out.dims[1]) return;
+
+ dim_t pidx0 = oidx0 + px;
+ dim_t pidx1 = oidx1 + py;
+
+ // The last time a value appears in the unwrapped index is padded_index / stride
+ // Each previous index has the value appear "stride" locations earlier
+ // We work our way back from the last index
+
+ const dim_t x_end = min(pidx0 / sx, nx - 1);
+ const dim_t y_end = min(pidx1 / sy, ny - 1);
+
+ const dim_t x_off = pidx0 - sx * x_end;
+ const dim_t y_off = pidx1 - sy * y_end;
+
+ T val = ZERO;
+ dim_t idx = 1;
+
+ for (dim_t y = y_end, yo = y_off; y >= 0 && yo < wy; yo += sy, y--) {
+ dim_t win_end_y = yo * wx;
+ dim_t dim_end_y = y * nx;
+
+ for (dim_t x = x_end, xo = x_off; x >= 0 && xo < wx; xo += sx, x--) {
+
+ dim_t win_end = win_end_y + xo;
+ dim_t dim_end = dim_end_y + x;
+
+ if (is_column) {
+ idx = dim_end * in.strides[1] + win_end;
+ } else {
+ idx = dim_end + win_end * in.strides[1];
+ }
+
+ // No need to include anything special for complex
+ // Add for complex numbers is just vector add of reals
+ // Might need to change if we generalize add to more binary ops
+ val = val + iptr[idx];
+ }
+ }
+
+ optr[oidx1 * out.strides[1] + oidx0] = val;
+}
diff --git a/src/backend/opencl/kernel/wrap.hpp b/src/backend/opencl/kernel/wrap.hpp
new file mode 100644
index 0000000..b99c06d
--- /dev/null
+++ b/src/backend/opencl/kernel/wrap.hpp
@@ -0,0 +1,112 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+#include <kernel_headers/wrap.hpp>
+#include <program.hpp>
+#include <traits.hpp>
+#include <string>
+#include <map>
+#include <mutex>
+#include <dispatch.hpp>
+#include <Param.hpp>
+#include <debug_opencl.hpp>
+#include <type_util.hpp>
+#include <math.hpp>
+#include "config.hpp"
+#include <cache.hpp>
+
+using cl::Buffer;
+using cl::Program;
+using cl::Kernel;
+using cl::make_kernel;
+using cl::EnqueueArgs;
+using cl::NDRange;
+using std::string;
+
+namespace opencl
+{
+ namespace kernel
+ {
+ template<typename T>
+ void wrap(Param out, const Param in,
+ const dim_t wx, const dim_t wy,
+ const dim_t sx, const dim_t sy,
+ const dim_t px, const dim_t py,
+ const bool is_column)
+ {
+ try {
+
+ std::string ref_name =
+ std::string("wrap_") +
+ std::string(dtype_traits<T>::getName()) +
+ std::string("_") +
+ std::to_string(is_column);
+
+ int device = getActiveDeviceId();
+ kc_t::iterator idx = kernelCaches[device].find(ref_name);
+
+ kc_entry_t entry;
+ if (idx == kernelCaches[device].end()) {
+
+ ToNum<T> toNum;
+ std::ostringstream options;
+ options << " -D is_column=" << is_column
+ << " -D ZERO=" << toNum(scalar<T>(0))
+ << " -D T=" << dtype_traits<T>::getName();
+
+ if (std::is_same<T, double>::value ||
+ std::is_same<T, cdouble>::value) {
+ options << " -D USE_DOUBLE";
+ }
+
+ Program prog;
+ buildProgram(prog, wrap_cl, wrap_cl_len, options.str());
+
+ entry.prog = new Program(prog);
+ entry.ker = new Kernel(*entry.prog, "wrap_kernel");
+
+ kernelCaches[device][ref_name] = entry;
+ } else {
+ entry = idx->second;
+ }
+
+ dim_t nx = (out.info.dims[0] + 2 * px - wx) / sx + 1;
+ dim_t ny = (out.info.dims[1] + 2 * py - wy) / sy + 1;
+
+ NDRange local(THREADS_X, THREADS_Y);
+
+ dim_t groups_x = divup(out.info.dims[0], local[0]);
+ dim_t groups_y = divup(out.info.dims[1], local[1]);
+
+ NDRange global(local[0] * groups_x * out.info.dims[2],
+ local[1] * groups_y * out.info.dims[3]);
+
+
+ auto wrapOp = make_kernel<Buffer, const KParam,
+ const Buffer, const KParam,
+ const dim_t, const dim_t,
+ const dim_t, const dim_t,
+ const dim_t, const dim_t,
+ const dim_t, const dim_t,
+ const dim_t, const dim_t> (*entry.ker);
+
+ wrapOp(EnqueueArgs(getQueue(), global, local),
+ *out.data, out.info, *in.data, in.info,
+ wx, wy, sx, sy, px, py, nx, ny, groups_x, groups_y);
+
+ CL_DEBUG_FINISH(getQueue());
+
+ } catch (cl::Error err) {
+ CL_TO_AF_ERROR(err);
+ throw;
+ }
+ }
+ }
+}
diff --git a/src/backend/opencl/wrap.cpp b/src/backend/opencl/wrap.cpp
index 240b4a6..f3a5e1b 100644
--- a/src/backend/opencl/wrap.cpp
+++ b/src/backend/opencl/wrap.cpp
@@ -13,6 +13,7 @@
#include <err_opencl.hpp>
#include <dispatch.hpp>
#include <math.hpp>
+#include <kernel/wrap.hpp>
namespace opencl
{
@@ -25,7 +26,12 @@ namespace opencl
const dim_t px, const dim_t py,
const bool is_column)
{
- OPENCL_NOT_SUPPORTED();
+ af::dim4 idims = in.dims();
+ af::dim4 odims(ox, oy, idims[2], idims[3]);
+ Array<T> out = createValueArray<T>(odims, scalar<T>(0));
+
+ kernel::wrap<T>(out, in, wx, wy, sx, sy, px, py, is_column);
+ return out;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list