[arrayfire] 214/408: FEAT: Select for opencl backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:00 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 773c02da54f7e1b4786f5bd0cf2efc0d1ffcf674
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Sat Aug 8 22:20:00 2015 -0400
FEAT: Select for opencl backend
---
src/backend/opencl/kernel/select.cl | 90 ++++++++++++++++++
src/backend/opencl/kernel/select.hpp | 175 +++++++++++++++++++++++++++++++++++
src/backend/opencl/select.cpp | 5 +-
3 files changed, 268 insertions(+), 2 deletions(-)
diff --git a/src/backend/opencl/kernel/select.cl b/src/backend/opencl/kernel/select.cl
new file mode 100644
index 0000000..2cfebb8
--- /dev/null
+++ b/src/backend/opencl/kernel/select.cl
@@ -0,0 +1,90 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#ifndef flip
+#define flip 0
+#endif
+
+#ifndef is_same
+#define is_same 0
+#endif
+
+int getOffset(dim_t *dims, dim_t *strides, dim_t *refdims)
+{
+ int off = 0;
+ off += (dims[3] == refdims[3]) * strides[3];
+ off += (dims[2] == refdims[2]) * strides[2];
+ off += (dims[1] == refdims[1]) * strides[1];
+ off += (dims[0] == refdims[0]);
+ return off;
+}
+
+__kernel
+void select_kernel(__global T *optr, KParam oinfo,
+ __global char *cptr, KParam cinfo,
+ __global T *aptr, KParam ainfo,
+ __global T *bptr, KParam binfo,
+ int groups_0,
+ int groups_1)
+{
+ const int idz = get_group_id(0) / groups_0;
+ const int idw = get_group_id(1) / groups_1;
+
+ const int group_id_0 = get_group_id(0) - idz * groups_0;
+ const int group_id_1 = get_group_id(1) - idz * groups_1;
+
+ const int idx = group_id_0 * get_local_size(0) + get_local_id(0);
+ const int idy = group_id_1 * get_local_size(1) + get_local_id(1);
+
+ const int off = idw * oinfo.strides[3] + idz * oinfo.strides[2] + idy * oinfo.strides[1] + idx;
+
+ optr += off;
+
+ if (is_same) {
+ aptr += off;
+ bptr += off;
+ cptr += off;
+ } else {
+ aptr += getOffset(ainfo.dims, ainfo.strides, oinfo.dims);
+ bptr += getOffset(binfo.dims, binfo.strides, oinfo.dims);
+ cptr += getOffset(cinfo.dims, cinfo.strides, oinfo.dims);
+ }
+
+ if (idx < oinfo.dims[0] && idy < oinfo.dims[1] && idz < oinfo.dims[2] && idw < oinfo.dims[3]) {
+ *optr = (*cptr) ? *aptr : *bptr;
+ }
+}
+
+__kernel
+void select_scalar_kernel(__global T *optr, KParam oinfo,
+ __global char *cptr, KParam cinfo,
+ __global T *aptr, KParam ainfo,
+ T b,
+ int groups_0,
+ int groups_1)
+{
+ const int idz = get_group_id(0) / groups_0;
+ const int idw = get_group_id(1) / groups_1;
+
+ const int group_id_0 = get_group_id(0) - idz * groups_0;
+ const int group_id_1 = get_group_id(1) - idz * groups_1;
+
+ const int idx = group_id_0 * get_local_size(0) + get_local_id(0);
+ const int idy = group_id_1 * get_local_size(1) + get_local_id(1);
+
+ const int off = idw * oinfo.strides[3] + idz * oinfo.strides[2] + idy * oinfo.strides[1] + idx;
+
+ optr += off;
+ aptr += off;
+ cptr += off;
+
+ if (idx < oinfo.dims[0] && idy < oinfo.dims[1] && idz < oinfo.dims[2] && idw < oinfo.dims[3]) {
+ *optr = ((*cptr) ^ flip) ? *aptr : b;
+ }
+}
diff --git a/src/backend/opencl/kernel/select.hpp b/src/backend/opencl/kernel/select.hpp
new file mode 100644
index 0000000..8f17cf9
--- /dev/null
+++ b/src/backend/opencl/kernel/select.hpp
@@ -0,0 +1,175 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+#include <kernel_headers/select.hpp>
+#include <program.hpp>
+#include <traits.hpp>
+#include <string>
+#include <mutex>
+#include <map>
+#include <dispatch.hpp>
+#include <Param.hpp>
+#include <debug_opencl.hpp>
+#include <types.hpp>
+#include <math.hpp>
+
+using cl::Buffer;
+using cl::Program;
+using cl::Kernel;
+using cl::make_kernel;
+using cl::EnqueueArgs;
+using cl::NDRange;
+using std::string;
+
+namespace opencl
+{
+ namespace kernel
+ {
+ static const uint DIMX = 32;
+ static const uint DIMY = 8;
+
+ template<typename T, bool is_same>
+ void select_launcher(Param out, Param cond, Param a, Param b, int ndims)
+ {
+ static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
+ static std::map<int, Program*> selProgs;
+ static std::map<int, Kernel*> selKernels;
+
+ int device = getActiveDeviceId();
+
+ std::call_once(compileFlags[device], [device] () {
+
+ std::ostringstream options;
+ options << " -D is_same=" << is_same
+ << " -D T=" << dtype_traits<T>::getName();
+
+ if (std::is_same<T, double>::value ||
+ std::is_same<T, cdouble>::value) {
+ options << " -D USE_DOUBLE";
+ }
+
+ cl::Program prog;
+ buildProgram(prog, select_cl, select_cl_len, options.str());
+ selProgs[device] = new Program(prog);
+
+ selKernels[device] = new Kernel(*selProgs[device], "select_kernel");
+ });
+
+
+ int threads[] = {DIMX, DIMY};
+
+ if (ndims == 1) {
+ threads[0] *= threads[1];
+ threads[1] = 1;
+ }
+
+ NDRange local(threads[0],
+ threads[1]);
+
+
+ int groups_0 = divup(out.info.dims[0], local[0]);
+ int groups_1 = divup(out.info.dims[1], local[1]);
+
+ NDRange global(groups_0 * out.info.dims[2] * local[0],
+ groups_1 * out.info.dims[3] * local[1]);
+
+ auto selectOp = make_kernel<Buffer, KParam,
+ Buffer, KParam,
+ Buffer, KParam,
+ Buffer, KParam,
+ int, int>(*selKernels[device]);
+
+ selectOp(EnqueueArgs(getQueue(), global, local),
+ *out.data, out.info,
+ *cond.data, cond.info,
+ *a.data, a.info,
+ *b.data, b.info,
+ groups_0, groups_1);
+
+ }
+
+ template<typename T>
+ void select(Param out, Param cond, Param a, Param b, int ndims)
+ {
+ try {
+ bool is_same = true;
+ for (int i = 0; i < 4; i++) {
+ is_same &= (a.info.dims[i] == b.info.dims[i]);
+ }
+
+ if (is_same) {
+ select_launcher<T, true >(out, cond, a, b, ndims);
+ } else {
+ select_launcher<T, false>(out, cond, a, b, ndims);
+ }
+ } catch (cl::Error err) {
+ CL_TO_AF_ERROR(err);
+ }
+ }
+
+ template<typename T, bool flip>
+ void select_scalar(Param out, Param cond, Param a, const double b, int ndims)
+ {
+ static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
+ static std::map<int, Program*> selProgs;
+ static std::map<int, Kernel*> selKernels;
+
+ int device = getActiveDeviceId();
+
+ std::call_once(compileFlags[device], [device] () {
+
+ std::ostringstream options;
+ options << " -D flip=" << flip
+ << " -D T=" << dtype_traits<T>::getName();
+
+ if (std::is_same<T, double>::value ||
+ std::is_same<T, cdouble>::value) {
+ options << " -D USE_DOUBLE";
+ }
+
+ cl::Program prog;
+ buildProgram(prog, select_cl, select_cl_len, options.str());
+ selProgs[device] = new Program(prog);
+
+ selKernels[device] = new Kernel(*selProgs[device], "select_scalar_kernel");
+ });
+
+
+ int threads[] = {DIMX, DIMY};
+
+ if (ndims == 1) {
+ threads[0] *= threads[1];
+ threads[1] = 1;
+ }
+
+ NDRange local(threads[0],
+ threads[1]);
+
+ int groups_0 = divup(out.info.dims[0], local[0]);
+ int groups_1 = divup(out.info.dims[1], local[1]);
+
+ NDRange global(groups_0 * out.info.dims[2] * local[0],
+ groups_1 * out.info.dims[3] * local[1]);
+
+ auto selectOp = make_kernel<Buffer, KParam,
+ Buffer, KParam,
+ Buffer, KParam,
+ T,
+ int, int>(*selKernels[device]);
+
+ selectOp(EnqueueArgs(getQueue(), global, local),
+ *out.data, out.info,
+ *cond.data, cond.info,
+ *a.data, a.info,
+ scalar<T>(b),
+ groups_0, groups_1);
+ }
+ }
+}
diff --git a/src/backend/opencl/select.cpp b/src/backend/opencl/select.cpp
index 92bcc2b..5c9a5d0 100644
--- a/src/backend/opencl/select.cpp
+++ b/src/backend/opencl/select.cpp
@@ -10,19 +10,20 @@
#include <Array.hpp>
#include <select.hpp>
#include <err_opencl.hpp>
+#include <kernel/select.hpp>
namespace opencl
{
template<typename T>
void select(Array<T> &out, const Array<char> &cond, const Array<T> &a, const Array<T> &b)
{
- OPENCL_NOT_SUPPORTED();
+ kernel::select<T>(out, cond, a, b, out.ndims());
}
template<typename T, bool flip>
void select_scalar(Array<T> &out, const Array<char> &cond, const Array<T> &a, const double &b)
{
- OPENCL_NOT_SUPPORTED();
+ kernel::select_scalar<T, flip>(out, cond, a, b, out.ndims());
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list