[arrayfire] 214/408: FEAT: Select for opencl backend

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:00 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 773c02da54f7e1b4786f5bd0cf2efc0d1ffcf674
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Sat Aug 8 22:20:00 2015 -0400

    FEAT: Select for opencl backend
---
 src/backend/opencl/kernel/select.cl  |  90 ++++++++++++++++++
 src/backend/opencl/kernel/select.hpp | 175 +++++++++++++++++++++++++++++++++++
 src/backend/opencl/select.cpp        |   5 +-
 3 files changed, 268 insertions(+), 2 deletions(-)

diff --git a/src/backend/opencl/kernel/select.cl b/src/backend/opencl/kernel/select.cl
new file mode 100644
index 0000000..2cfebb8
--- /dev/null
+++ b/src/backend/opencl/kernel/select.cl
@@ -0,0 +1,90 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#ifndef flip
+#define flip 0
+#endif
+
+#ifndef is_same
+#define is_same 0
+#endif
+
+int getOffset(dim_t *dims, dim_t *strides, dim_t *refdims)
+{
+    int off = 0;
+    off += (dims[3] == refdims[3]) * strides[3];
+    off += (dims[2] == refdims[2]) * strides[2];
+    off += (dims[1] == refdims[1]) * strides[1];
+    off += (dims[0] == refdims[0]);
+    return off;
+}
+
+__kernel
+void select_kernel(__global T *optr, KParam oinfo,
+                   __global char *cptr, KParam cinfo,
+                   __global T *aptr, KParam ainfo,
+                   __global T *bptr, KParam binfo,
+                   int groups_0,
+                   int groups_1)
+{
+    const int idz = get_group_id(0) / groups_0;
+    const int idw = get_group_id(1) / groups_1;
+
+    const int group_id_0 = get_group_id(0) - idz * groups_0;
+    const int group_id_1 = get_group_id(1) - idz * groups_1;
+
+    const int idx = group_id_0 * get_local_size(0) + get_local_id(0);
+    const int idy = group_id_1 * get_local_size(1) + get_local_id(1);
+
+    const int off = idw * oinfo.strides[3] + idz * oinfo.strides[2] + idy * oinfo.strides[1] + idx;
+
+    optr += off;
+
+    if (is_same) {
+        aptr += off;
+        bptr += off;
+        cptr += off;
+    } else {
+        aptr += getOffset(ainfo.dims, ainfo.strides, oinfo.dims);
+        bptr += getOffset(binfo.dims, binfo.strides, oinfo.dims);
+        cptr += getOffset(cinfo.dims, cinfo.strides, oinfo.dims);
+    }
+
+    if (idx < oinfo.dims[0] && idy < oinfo.dims[1] && idz < oinfo.dims[2] && idw < oinfo.dims[3]) {
+        *optr = (*cptr) ? *aptr : *bptr;
+    }
+}
+
+__kernel
+void select_scalar_kernel(__global T *optr, KParam oinfo,
+                          __global char *cptr, KParam cinfo,
+                          __global T *aptr, KParam ainfo,
+                          T b,
+                          int groups_0,
+                          int groups_1)
+{
+    const int idz = get_group_id(0) / groups_0;
+    const int idw = get_group_id(1) / groups_1;
+
+    const int group_id_0 = get_group_id(0) - idz * groups_0;
+    const int group_id_1 = get_group_id(1) - idz * groups_1;
+
+    const int idx = group_id_0 * get_local_size(0) + get_local_id(0);
+    const int idy = group_id_1 * get_local_size(1) + get_local_id(1);
+
+    const int off = idw * oinfo.strides[3] + idz * oinfo.strides[2] + idy * oinfo.strides[1] + idx;
+
+    optr += off;
+    aptr += off;
+    cptr += off;
+
+    if (idx < oinfo.dims[0] && idy < oinfo.dims[1] && idz < oinfo.dims[2] && idw < oinfo.dims[3]) {
+        *optr = ((*cptr) ^ flip) ? *aptr : b;
+    }
+}
diff --git a/src/backend/opencl/kernel/select.hpp b/src/backend/opencl/kernel/select.hpp
new file mode 100644
index 0000000..8f17cf9
--- /dev/null
+++ b/src/backend/opencl/kernel/select.hpp
@@ -0,0 +1,175 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+#include <kernel_headers/select.hpp>
+#include <program.hpp>
+#include <traits.hpp>
+#include <string>
+#include <mutex>
+#include <map>
+#include <dispatch.hpp>
+#include <Param.hpp>
+#include <debug_opencl.hpp>
+#include <types.hpp>
+#include <math.hpp>
+
+using cl::Buffer;
+using cl::Program;
+using cl::Kernel;
+using cl::make_kernel;
+using cl::EnqueueArgs;
+using cl::NDRange;
+using std::string;
+
+namespace opencl
+{
+    namespace kernel
+    {
+        static const uint DIMX = 32;
+        static const uint DIMY =  8;
+
+        template<typename T, bool is_same>
+        void select_launcher(Param out, Param cond, Param a, Param b, int ndims)
+        {
+            static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
+            static std::map<int, Program*>  selProgs;
+            static std::map<int, Kernel*> selKernels;
+
+            int device = getActiveDeviceId();
+
+            std::call_once(compileFlags[device], [device] () {
+
+                    std::ostringstream options;
+                    options << " -D is_same=" << is_same
+                            << " -D T=" << dtype_traits<T>::getName();
+
+                    if (std::is_same<T, double>::value ||
+                        std::is_same<T, cdouble>::value) {
+                        options << " -D USE_DOUBLE";
+                    }
+
+                    cl::Program prog;
+                    buildProgram(prog, select_cl, select_cl_len, options.str());
+                    selProgs[device] = new Program(prog);
+
+                    selKernels[device] = new Kernel(*selProgs[device], "select_kernel");
+                });
+
+
+            int threads[] = {DIMX, DIMY};
+
+            if (ndims == 1) {
+                threads[0] *= threads[1];
+                threads[1] = 1;
+            }
+
+            NDRange local(threads[0],
+                          threads[1]);
+
+
+            int groups_0 = divup(out.info.dims[0], local[0]);
+            int groups_1 = divup(out.info.dims[1], local[1]);
+
+            NDRange global(groups_0 * out.info.dims[2] * local[0],
+                           groups_1 * out.info.dims[3] * local[1]);
+
+            auto selectOp = make_kernel<Buffer, KParam,
+                                        Buffer, KParam,
+                                        Buffer, KParam,
+                                        Buffer, KParam,
+                                        int, int>(*selKernels[device]);
+
+            selectOp(EnqueueArgs(getQueue(), global, local),
+                     *out.data, out.info,
+                     *cond.data, cond.info,
+                     *a.data, a.info,
+                     *b.data, b.info,
+                     groups_0, groups_1);
+
+        }
+
+        template<typename T>
+        void select(Param out, Param cond, Param a, Param b, int ndims)
+        {
+            try {
+                bool is_same = true;
+                for (int i = 0; i < 4; i++) {
+                    is_same &= (a.info.dims[i] == b.info.dims[i]);
+                }
+
+                if (is_same) {
+                    select_launcher<T, true >(out, cond, a, b, ndims);
+                } else {
+                    select_launcher<T, false>(out, cond, a, b, ndims);
+                }
+            } catch (cl::Error err) {
+                CL_TO_AF_ERROR(err);
+            }
+        }
+
+        template<typename T, bool flip>
+        void select_scalar(Param out, Param cond, Param a, const double b, int ndims)
+        {
+            static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
+            static std::map<int, Program*>  selProgs;
+            static std::map<int, Kernel*> selKernels;
+
+            int device = getActiveDeviceId();
+
+            std::call_once(compileFlags[device], [device] () {
+
+                    std::ostringstream options;
+                    options << " -D flip=" << flip
+                            << " -D T=" << dtype_traits<T>::getName();
+
+                    if (std::is_same<T, double>::value ||
+                        std::is_same<T, cdouble>::value) {
+                        options << " -D USE_DOUBLE";
+                    }
+
+                    cl::Program prog;
+                    buildProgram(prog, select_cl, select_cl_len, options.str());
+                    selProgs[device] = new Program(prog);
+
+                    selKernels[device] = new Kernel(*selProgs[device], "select_scalar_kernel");
+                });
+
+
+            int threads[] = {DIMX, DIMY};
+
+            if (ndims == 1) {
+                threads[0] *= threads[1];
+                threads[1] = 1;
+            }
+
+            NDRange local(threads[0],
+                          threads[1]);
+
+            int groups_0 = divup(out.info.dims[0], local[0]);
+            int groups_1 = divup(out.info.dims[1], local[1]);
+
+            NDRange global(groups_0 * out.info.dims[2] * local[0],
+                           groups_1 * out.info.dims[3] * local[1]);
+
+            auto selectOp = make_kernel<Buffer, KParam,
+                                        Buffer, KParam,
+                                        Buffer, KParam,
+                                        T,
+                                        int, int>(*selKernels[device]);
+
+            selectOp(EnqueueArgs(getQueue(), global, local),
+                     *out.data, out.info,
+                     *cond.data, cond.info,
+                     *a.data, a.info,
+                     scalar<T>(b),
+                     groups_0, groups_1);
+        }
+    }
+}
diff --git a/src/backend/opencl/select.cpp b/src/backend/opencl/select.cpp
index 92bcc2b..5c9a5d0 100644
--- a/src/backend/opencl/select.cpp
+++ b/src/backend/opencl/select.cpp
@@ -10,19 +10,20 @@
 #include <Array.hpp>
 #include <select.hpp>
 #include <err_opencl.hpp>
+#include <kernel/select.hpp>
 
 namespace opencl
 {
     template<typename T>
     void select(Array<T> &out, const Array<char> &cond, const Array<T> &a, const Array<T> &b)
     {
-        OPENCL_NOT_SUPPORTED();
+        kernel::select<T>(out, cond, a, b, out.ndims());
     }
 
     template<typename T, bool flip>
     void select_scalar(Array<T> &out, const Array<char> &cond, const Array<T> &a, const double &b)
     {
-        OPENCL_NOT_SUPPORTED();
+        kernel::select_scalar<T, flip>(out, cond, a, b, out.ndims());
     }
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list