[arrayfire] 09/408: Added OpenCL backend for Unwrap

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:04 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 669b4b2954d6fad143b552613b97c64489a69427
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Wed Jun 17 16:54:56 2015 -0400

    Added OpenCL backend for Unwrap
---
 src/backend/opencl/kernel/unwrap.cl  | 53 ++++++++++++++++++++
 src/backend/opencl/kernel/unwrap.hpp | 93 ++++++++++++++++++++++++++++++++++++
 src/backend/opencl/unwrap.cpp        | 14 +++++-
 3 files changed, 159 insertions(+), 1 deletion(-)

diff --git a/src/backend/opencl/kernel/unwrap.cl b/src/backend/opencl/kernel/unwrap.cl
new file mode 100644
index 0000000..a6370f2
--- /dev/null
+++ b/src/backend/opencl/kernel/unwrap.cl
@@ -0,0 +1,53 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+__kernel
+void unwrap_kernel(__global T *d_out, const KParam out,
+                   __global const T *d_in, const KParam in,
+                   const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy,
+                   const dim_t repsPerColumn)
+{
+    const dim_t w = get_group_id(1) / in.dims[2];
+    const dim_t z = get_group_id(1) % in.dims[2];
+
+    if(w >= in.dims[3] || z >= in.dims[2])
+        return;
+
+    const dim_t cOut = w * out.strides[3] + z * out.strides[2];
+    const dim_t cIn  = w *  in.strides[3] + z *  in.strides[2];
+
+    const dim_t nx = (in.dims[0] - wx) / sx + 1;
+    //dim_t ny = (in.dims[1] - wy) / sy + 1;
+
+    const dim_t colId = get_group_id(0) * get_local_size(1) + get_local_id(1);
+
+    if(colId >= out.dims[1])
+        return;
+
+    const dim_t startx = (colId % nx) * sx;
+    const dim_t starty = (colId / nx) * sy;
+
+    __global T* optr = d_out + cOut + colId * out.strides[1];
+    __global T* iptr = d_in  + cIn  + starty * in.strides[1] + startx + in.offset;
+
+    for(int i = 0; i < repsPerColumn; i++) {
+        const dim_t colIndex = i * TX + get_local_id(0);
+
+        if(colIndex >= out.dims[0])
+            return;
+
+        const dim_t x = colIndex % wx;
+        const dim_t y = colIndex / wx;
+
+        const dim_t outIdx = (y * wx + x) * out.strides[0];
+        const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
+
+        optr[outIdx] = iptr[inIdx];
+    }
+}
diff --git a/src/backend/opencl/kernel/unwrap.hpp b/src/backend/opencl/kernel/unwrap.hpp
new file mode 100644
index 0000000..957dbe8
--- /dev/null
+++ b/src/backend/opencl/kernel/unwrap.hpp
@@ -0,0 +1,93 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+#include <kernel_headers/unwrap.hpp>
+#include <program.hpp>
+#include <traits.hpp>
+#include <string>
+#include <map>
+#include <mutex>
+#include <dispatch.hpp>
+#include <Param.hpp>
+#include <debug_opencl.hpp>
+
+using cl::Buffer;
+using cl::Program;
+using cl::Kernel;
+using cl::make_kernel;
+using cl::EnqueueArgs;
+using cl::NDRange;
+using std::string;
+
+namespace opencl
+{
+    namespace kernel
+    {
+        template<typename T, int TX>
+        void unwrap(Param out, const Param in, const dim_t wx, const dim_t wy,
+                    const dim_t sx, const dim_t sy)
+        {
+            try {
+                static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
+                static std::map<int, Program*>   unwrapProgs;
+                static std::map<int, Kernel *> unwrapKernels;
+
+                int device = getActiveDeviceId();
+
+                std::call_once( compileFlags[device], [device] () {
+                    std::ostringstream options;
+                    options << " -D T="        << dtype_traits<T>::getName();
+                    options << " -D TX="       << TX;
+
+                    if((af_dtype) dtype_traits<T>::af_type == c32 ||
+                       (af_dtype) dtype_traits<T>::af_type == c64) {
+                        options << " -D CPLX=1";
+                    } else {
+                        options << " -D CPLX=0";
+                    }
+
+                    if (std::is_same<T, double>::value ||
+                        std::is_same<T, cdouble>::value) {
+                        options << " -D USE_DOUBLE";
+                    }
+
+                    Program prog;
+                    buildProgram(prog, unwrap_cl, unwrap_cl_len, options.str());
+                    unwrapProgs[device] = new Program(prog);
+                    unwrapKernels[device] = new Kernel(*unwrapProgs[device], "unwrap_kernel");
+                });
+
+                auto unwrapOp = make_kernel<Buffer, const KParam, const Buffer, const KParam,
+                                      const dim_t, const dim_t, const dim_t, const dim_t, const dim_t>
+                                      (*unwrapKernels[device]);
+
+                const dim_t TY = 256 / TX;
+                dim_t repsPerColumn = 1;
+                if(TX == 256 && wx * wy > 256) {
+                    repsPerColumn = (wx * wy) / 256;
+                }
+
+                NDRange local(TX, TY, 1);
+
+                NDRange global(local[0] * divup(in.info.dims[1], TY),
+                               local[1] * in.info.dims[2] * in.info.dims[3],
+                               1);
+
+                unwrapOp(EnqueueArgs(getQueue(), global, local),
+                       *out.data, out.info, *in.data, in.info, wx, wy, sx, sy, repsPerColumn);
+
+                CL_DEBUG_FINISH(getQueue());
+            } catch (cl::Error err) {
+                CL_TO_AF_ERROR(err);
+                throw;
+            }
+        }
+    }
+}
diff --git a/src/backend/opencl/unwrap.cpp b/src/backend/opencl/unwrap.cpp
index 7403bb4..e9f36a0 100644
--- a/src/backend/opencl/unwrap.cpp
+++ b/src/backend/opencl/unwrap.cpp
@@ -9,7 +9,7 @@
 
 #include <Array.hpp>
 #include <unwrap.hpp>
-//#include <kernel/unwrap.hpp>
+#include <kernel/unwrap.hpp>
 #include <stdexcept>
 #include <err_opencl.hpp>
 
@@ -29,6 +29,18 @@ namespace opencl
         // Create output placeholder
         Array<T> outArray = createEmptyArray<T>(odims);
 
+        if(odims[0] <= 16) {
+            kernel::unwrap<T, 16 >(outArray, in, wx, wy, sx, sy);
+        } else if (odims[0] <= 32) {
+            kernel::unwrap<T, 32 >(outArray, in, wx, wy, sx, sy);
+        } else if (odims[0] <= 64) {
+            kernel::unwrap<T, 64 >(outArray, in, wx, wy, sx, sy);
+        } else if(odims[0] <= 128) {
+            kernel::unwrap<T, 128>(outArray, in, wx, wy, sx, sy);
+        } else {
+            kernel::unwrap<T, 256>(outArray, in, wx, wy, sx, sy);
+        }
+
         return outArray;
     }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list