[arrayfire] 286/408: FEAT: wrap for OpenCL backend

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:15 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 2e05dd4738430932417c75e0956ebf9d44357b9f
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Wed Aug 19 02:49:35 2015 -0400

    FEAT: wrap for OpenCL backend
---
 src/backend/opencl/kernel/wrap.cl  |  74 ++++++++++++++++++++++++
 src/backend/opencl/kernel/wrap.hpp | 112 +++++++++++++++++++++++++++++++++++++
 src/backend/opencl/wrap.cpp        |   8 ++-
 3 files changed, 193 insertions(+), 1 deletion(-)

diff --git a/src/backend/opencl/kernel/wrap.cl b/src/backend/opencl/kernel/wrap.cl
new file mode 100644
index 0000000..8171f9e
--- /dev/null
+++ b/src/backend/opencl/kernel/wrap.cl
@@ -0,0 +1,74 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+__kernel
+void wrap_kernel(__global T *optr, KParam out,
+                 __global T *iptr, KParam in,
+                 const dim_t wx, const dim_t wy,
+                 const dim_t sx, const dim_t sy,
+                 const dim_t px, const dim_t py,
+                 const dim_t nx, const dim_t ny,
+                 dim_t groups_x,
+                 dim_t groups_y)
+{
+    dim_t idx2 = get_group_id(0) / groups_x;
+    dim_t idx3 = get_group_id(1) / groups_y;
+
+    dim_t groupId_x = get_group_id(0) - idx2 * groups_x;
+    dim_t groupId_y = get_group_id(1) - idx3 * groups_y;
+
+    dim_t oidx0 = get_local_id(0) + get_local_size(0) * groupId_x;
+    dim_t oidx1 = get_local_id(1) + get_local_size(1) * groupId_y;
+
+    optr += idx2 * out.strides[2] + idx3 * out.strides[3];
+    iptr += idx2 *  in.strides[2] + idx3 *  in.strides[3];
+
+
+    if (oidx0 >= out.dims[0] || oidx1 >= out.dims[1]) return;
+
+    dim_t pidx0 = oidx0 + px;
+    dim_t pidx1 = oidx1 + py;
+
+    // The last time a value appears in the unwrapped index is padded_index / stride
+    // Each previous index has the value appear "stride" locations earlier
+    // We work our way back from the last index
+
+    const dim_t x_end = min(pidx0 / sx, nx - 1);
+    const dim_t y_end = min(pidx1 / sy, ny - 1);
+
+    const dim_t x_off = pidx0 - sx * x_end;
+    const dim_t y_off = pidx1 - sy * y_end;
+
+    T val = ZERO;
+    dim_t idx = 1;
+
+    for (dim_t y = y_end, yo = y_off; y >= 0 && yo < wy; yo += sy, y--) {
+        dim_t win_end_y = yo * wx;
+        dim_t dim_end_y = y * nx;
+
+        for (dim_t x = x_end, xo = x_off; x >= 0 && xo < wx; xo += sx, x--) {
+
+            dim_t win_end = win_end_y + xo;
+            dim_t dim_end = dim_end_y + x;
+
+            if (is_column) {
+                idx = dim_end * in.strides[1] + win_end;
+            } else {
+                idx = dim_end + win_end * in.strides[1];
+            }
+
+            // No need to include anything special for complex
+            // Add for complex numbers is just vector add of reals
+            // Might need to change if we generalize add to more binary ops
+            val = val + iptr[idx];
+        }
+    }
+
+    optr[oidx1 * out.strides[1] + oidx0] = val;
+}
diff --git a/src/backend/opencl/kernel/wrap.hpp b/src/backend/opencl/kernel/wrap.hpp
new file mode 100644
index 0000000..b99c06d
--- /dev/null
+++ b/src/backend/opencl/kernel/wrap.hpp
@@ -0,0 +1,112 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+#include <kernel_headers/wrap.hpp>
+#include <program.hpp>
+#include <traits.hpp>
+#include <string>
+#include <map>
+#include <mutex>
+#include <dispatch.hpp>
+#include <Param.hpp>
+#include <debug_opencl.hpp>
+#include <type_util.hpp>
+#include <math.hpp>
+#include "config.hpp"
+#include <cache.hpp>
+
+using cl::Buffer;
+using cl::Program;
+using cl::Kernel;
+using cl::make_kernel;
+using cl::EnqueueArgs;
+using cl::NDRange;
+using std::string;
+
+namespace opencl
+{
+    namespace kernel
+    {
+        template<typename T>
+        void wrap(Param out, const Param in,
+                  const dim_t wx, const dim_t wy,
+                  const dim_t sx, const dim_t sy,
+                  const dim_t px, const dim_t py,
+                  const bool is_column)
+        {
+            try {
+
+                std::string ref_name =
+                    std::string("wrap_") +
+                    std::string(dtype_traits<T>::getName()) +
+                    std::string("_") +
+                    std::to_string(is_column);
+
+                int device = getActiveDeviceId();
+                kc_t::iterator idx = kernelCaches[device].find(ref_name);
+
+                kc_entry_t entry;
+                if (idx == kernelCaches[device].end()) {
+
+                    ToNum<T> toNum;
+                    std::ostringstream options;
+                    options << " -D is_column=" << is_column
+                            << " -D ZERO=" << toNum(scalar<T>(0))
+                            << " -D T="    << dtype_traits<T>::getName();
+
+                    if (std::is_same<T, double>::value ||
+                        std::is_same<T, cdouble>::value) {
+                        options << " -D USE_DOUBLE";
+                    }
+
+                    Program prog;
+                    buildProgram(prog, wrap_cl, wrap_cl_len, options.str());
+
+                    entry.prog = new Program(prog);
+                    entry.ker = new Kernel(*entry.prog, "wrap_kernel");
+
+                    kernelCaches[device][ref_name] = entry;
+                } else {
+                    entry = idx->second;
+                }
+
+                dim_t nx = (out.info.dims[0] + 2 * px - wx) / sx + 1;
+                dim_t ny = (out.info.dims[1] + 2 * py - wy) / sy + 1;
+
+                NDRange local(THREADS_X, THREADS_Y);
+
+                dim_t groups_x = divup(out.info.dims[0], local[0]);
+                dim_t groups_y = divup(out.info.dims[1], local[1]);
+
+                NDRange global(local[0] * groups_x * out.info.dims[2],
+                               local[1] * groups_y * out.info.dims[3]);
+
+
+                auto wrapOp = make_kernel<Buffer, const KParam,
+                                          const Buffer, const KParam,
+                                          const dim_t, const dim_t,
+                                          const dim_t, const dim_t,
+                                          const dim_t, const dim_t,
+                                          const dim_t, const dim_t,
+                                          const dim_t, const dim_t> (*entry.ker);
+
+                wrapOp(EnqueueArgs(getQueue(), global, local),
+                       *out.data, out.info, *in.data, in.info,
+                       wx, wy, sx, sy, px, py, nx, ny, groups_x, groups_y);
+
+                CL_DEBUG_FINISH(getQueue());
+
+            } catch (cl::Error err) {
+                CL_TO_AF_ERROR(err);
+                throw;
+            }
+        }
+    }
+}
diff --git a/src/backend/opencl/wrap.cpp b/src/backend/opencl/wrap.cpp
index 240b4a6..f3a5e1b 100644
--- a/src/backend/opencl/wrap.cpp
+++ b/src/backend/opencl/wrap.cpp
@@ -13,6 +13,7 @@
 #include <err_opencl.hpp>
 #include <dispatch.hpp>
 #include <math.hpp>
+#include <kernel/wrap.hpp>
 
 namespace opencl
 {
@@ -25,7 +26,12 @@ namespace opencl
                   const dim_t px, const dim_t py,
                   const bool is_column)
     {
-        OPENCL_NOT_SUPPORTED();
+        af::dim4 idims = in.dims();
+        af::dim4 odims(ox, oy, idims[2], idims[3]);
+        Array<T> out = createValueArray<T>(odims, scalar<T>(0));
+
+        kernel::wrap<T>(out, in, wx, wy, sx, sy, px, py, is_column);
+        return out;
     }
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list