[arrayfire] 25/29: Adding missing offsets for various OpenCL kernels

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Oct 19 17:48:55 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository arrayfire.

commit 2dfad04a4797dc52e21756fbc540d7e315e507ea
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Sat Oct 17 20:53:31 2015 -0400

    Adding missing offsets for various OpenCL kernels
---
 src/backend/opencl/kernel/assign.cl             |  5 +++--
 src/backend/opencl/kernel/fast.cl               |  2 +-
 src/backend/opencl/kernel/hsv_rgb.cl            |  2 +-
 src/backend/opencl/kernel/iir.cl                |  4 ++--
 src/backend/opencl/kernel/index.cl              |  5 +++--
 src/backend/opencl/kernel/lookup.cl             |  5 +++--
 src/backend/opencl/kernel/nearest_neighbour.cl  | 12 ++++++------
 src/backend/opencl/kernel/nearest_neighbour.hpp |  5 +++--
 src/backend/opencl/kernel/regions.cl            |  6 ++++--
 src/backend/opencl/kernel/scan_dim.cl           |  2 ++
 src/backend/opencl/kernel/select.cl             | 17 ++++++++++++-----
 src/backend/opencl/kernel/susan.cl              |  5 ++++-
 src/backend/opencl/kernel/susan.hpp             |  4 +++-
 src/backend/opencl/kernel/where.cl              |  2 +-
 src/backend/opencl/kernel/wrap.cl               |  2 +-
 src/backend/opencl/nearest_neighbour.cpp        | 11 ++++-------
 src/backend/opencl/susan.cpp                    | 18 +++++++++---------
 17 files changed, 62 insertions(+), 45 deletions(-)

diff --git a/src/backend/opencl/kernel/assign.cl b/src/backend/opencl/kernel/assign.cl
index 167031b..927ccdd 100644
--- a/src/backend/opencl/kernel/assign.cl
+++ b/src/backend/opencl/kernel/assign.cl
@@ -52,8 +52,9 @@ void assignKernel(global T * optr, KParam oInfo, global const T * iptr, KParam i
         global const T *src = iptr + (gx*iInfo.strides[0]+
                                       gy*iInfo.strides[1]+
                                       gz*iInfo.strides[2]+
-                                      gw*iInfo.strides[3]);
-        global T *dst = optr + (i+j+k+l);
+                                      gw*iInfo.strides[3]+
+                                      iInfo.offset);
+        global T *dst = optr + (i+j+k+l) + oInfo.offset;
         // set the output
         dst[0] = src[0];
     }
diff --git a/src/backend/opencl/kernel/fast.cl b/src/backend/opencl/kernel/fast.cl
index 1c9fdf3..695e167 100644
--- a/src/backend/opencl/kernel/fast.cl
+++ b/src/backend/opencl/kernel/fast.cl
@@ -165,7 +165,7 @@ void locate_features(
     unsigned lx = bx / 2 + 3;
     unsigned ly = by / 2 + 3;
 
-    load_shared_image(in, iInfo, local_image, ix, iy, bx, by, x, y, lx, ly);
+    load_shared_image(in + iInfo.offset, iInfo, local_image, ix, iy, bx, by, x, y, lx, ly);
     barrier(CLK_LOCAL_MEM_FENCE);
     locate_features_core(local_image, score,
                          iInfo, thr, x, y, edge);
diff --git a/src/backend/opencl/kernel/hsv_rgb.cl b/src/backend/opencl/kernel/hsv_rgb.cl
index a62ab6a..d309525 100644
--- a/src/backend/opencl/kernel/hsv_rgb.cl
+++ b/src/backend/opencl/kernel/hsv_rgb.cl
@@ -24,7 +24,7 @@ void convert(global T * out, KParam oInfo, global const T * in, KParam iInfo, in
         int oIdx1 = oIdx0 + oInfo.strides[2];
         int oIdx2 = oIdx1 + oInfo.strides[2];
 
-        int iIdx0 = gx * iInfo.strides[0] + gy * iInfo.strides[1];
+        int iIdx0 = gx * iInfo.strides[0] + gy * iInfo.strides[1] + iInfo.offset;
         int iIdx1 = iIdx0 + iInfo.strides[2];
         int iIdx2 = iIdx1 + iInfo.strides[2];
 
diff --git a/src/backend/opencl/kernel/iir.cl b/src/backend/opencl/kernel/iir.cl
index ce970d7..b189065 100644
--- a/src/backend/opencl/kernel/iir.cl
+++ b/src/backend/opencl/kernel/iir.cl
@@ -72,8 +72,8 @@ void iir_kernel(      __global T *yptr, const KParam yinfo,
 #endif
 
     __global T *d_y = yptr + y_off;
-    const __global T *d_c = cptr + c_off;
-    const __global T *d_a = aptr + a_off;
+    const __global T *d_c = cptr + c_off + cinfo.offset;
+    const __global T *d_a = aptr + a_off + ainfo.offset;
     const int repeat = (num_a + get_local_size(0) - 1) / get_local_size(0);
 
     for (int ii = 0; ii < MAX_A_SIZE / get_local_size(0); ii++) {
diff --git a/src/backend/opencl/kernel/index.cl b/src/backend/opencl/kernel/index.cl
index 2fe9287..0d2839d 100644
--- a/src/backend/opencl/kernel/index.cl
+++ b/src/backend/opencl/kernel/index.cl
@@ -49,11 +49,12 @@ void indexKernel(global T * optr, KParam oInfo, global const T * iptr, KParam iI
         int k = p.strds[2] * trimIndex(s2 ? gz+p.offs[2] : ptr2[gz], iInfo.dims[2]);
         int l = p.strds[3] * trimIndex(s3 ? gw+p.offs[3] : ptr3[gw], iInfo.dims[3]);
         // offset input and output pointers
-        global const T *src = iptr + (i+j+k+l);
+        global const T *src = iptr + (i+j+k+l) + iInfo.offset;
         global T *dst = optr + (gx*oInfo.strides[0]+
                                 gy*oInfo.strides[1]+
                                 gz*oInfo.strides[2]+
-                                gw*oInfo.strides[3]);
+                                gw*oInfo.strides[3]+
+                                oInfo.offset);
         // set the output
         dst[0] = src[0];
     }
diff --git a/src/backend/opencl/kernel/lookup.cl b/src/backend/opencl/kernel/lookup.cl
index b573429..d24572f 100644
--- a/src/backend/opencl/kernel/lookup.cl
+++ b/src/backend/opencl/kernel/lookup.cl
@@ -45,9 +45,10 @@ void lookupND(global in_t * out,
     int k = iInfo.strides[2]*(DIM==2 ? trimIndex((int)idxPtr[gz], iInfo.dims[2]): gz);
     int l = iInfo.strides[3]*(DIM==3 ? trimIndex((int)idxPtr[gw], iInfo.dims[3]): gw);
 
-    global const in_t *inPtr = in + (i+j+k+l);
+    global const in_t *inPtr = in + (i+j+k+l) + iInfo.offset;
     global in_t *outPtr = out + (gx*oInfo.strides[0]+gy*oInfo.strides[1]+
-                                 gz*oInfo.strides[2]+gw*oInfo.strides[3]);
+                                 gz*oInfo.strides[2]+gw*oInfo.strides[3]+
+                                 oInfo.offset);
 
     if (gx<oInfo.dims[0] && gy<oInfo.dims[1] && gz<oInfo.dims[2] && gw<oInfo.dims[3]) {
         outPtr[0] = inPtr[0];
diff --git a/src/backend/opencl/kernel/nearest_neighbour.cl b/src/backend/opencl/kernel/nearest_neighbour.cl
index 024fdc6..6247d1d 100644
--- a/src/backend/opencl/kernel/nearest_neighbour.cl
+++ b/src/backend/opencl/kernel/nearest_neighbour.cl
@@ -77,7 +77,7 @@ void nearest_neighbour_unroll(
         // Copy local_size(0) training features to shared memory
         #pragma unroll
         for (unsigned i = 0; i < FEAT_LEN; i++) {
-            l_train[i * get_local_size(0) + tid] = train[i * ntrain + f];
+            l_train[i * get_local_size(0) + tid] = train[i * ntrain + f + tInfo.offset];
         }
     }
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -89,7 +89,7 @@ void nearest_neighbour_unroll(
         // Load one query feature that will be tested against all training
         // features in current block
         if (tid < FEAT_LEN && valid_feat) {
-            l_query[tid] = query[tid * nquery + j];
+            l_query[tid] = query[tid * nquery + j + qInfo.offset];
         }
         barrier(CLK_LOCAL_MEM_FENCE);
 
@@ -102,7 +102,7 @@ void nearest_neighbour_unroll(
 #ifdef USE_LOCAL_MEM
                 dist += DISTOP(l_train[k * get_local_size(0) + tid], l_query[k]);
 #else
-                dist += DISTOP(train[k * ntrain + f], l_query[k]);
+                dist += DISTOP(train[k * ntrain + f + tInfo.offset], l_query[k]);
 #endif
             }
         }
@@ -217,7 +217,7 @@ void nearest_neighbour(
     if (valid_feat) {
         // Copy local_size(0) training features to shared memory
         for (unsigned i = 0; i < feat_len; i++) {
-            l_train[i * get_local_size(0) + tid] = train[i * ntrain + f];
+            l_train[i * get_local_size(0) + tid] = train[i * ntrain + f + tInfo.offset];
         }
     }
     barrier(CLK_LOCAL_MEM_FENCE);
@@ -229,7 +229,7 @@ void nearest_neighbour(
         // Load one query feature that will be tested against all training
         // features in current block
         if (tid < feat_len && valid_feat) {
-            l_query[tid] = query[tid * nquery + j];
+            l_query[tid] = query[tid * nquery + j + qInfo.offset];
         }
         barrier(CLK_LOCAL_MEM_FENCE);
 
@@ -241,7 +241,7 @@ void nearest_neighbour(
 #ifdef USE_LOCAL_MEM
                 dist += DISTOP(l_train[k * get_local_size(0) + tid], l_query[k]);
 #else
-                dist += DISTOP(train[k * ntrain + f], l_query[k]);
+                dist += DISTOP(train[k * ntrain + f + tInfo.offset], l_query[k]);
 #endif
             }
         }
diff --git a/src/backend/opencl/kernel/nearest_neighbour.hpp b/src/backend/opencl/kernel/nearest_neighbour.hpp
index d7c800d..34688b2 100644
--- a/src/backend/opencl/kernel/nearest_neighbour.hpp
+++ b/src/backend/opencl/kernel/nearest_neighbour.hpp
@@ -28,14 +28,15 @@ namespace kernel
 
 static const unsigned THREADS = 256;
 
-template<typename T, typename To, af_match_type dist_type, bool use_lmem>
+template<typename T, typename To, af_match_type dist_type>
 void nearest_neighbour(Param idx,
                        Param dist,
                        Param query,
                        Param train,
                        const dim_t dist_dim,
                        const unsigned n_dist,
-                       const size_t lmem_sz)
+                       const size_t lmem_sz,
+                       bool use_lmem)
 {
     try {
         const unsigned feat_len = query.info.dims[dist_dim];
diff --git a/src/backend/opencl/kernel/regions.cl b/src/backend/opencl/kernel/regions.cl
index 38bc159..51e4d9a 100644
--- a/src/backend/opencl/kernel/regions.cl
+++ b/src/backend/opencl/kernel/regions.cl
@@ -12,9 +12,10 @@
 __kernel
 void initial_label(global    T * equiv_map,
                    KParam        eInfo,
-                   global char * bin,
+                   global char * bin_,
                    KParam        bInfo)
 {
+    global char *bin = bin_ + bInfo.offset;
     const int base_x = (get_group_id(0) * get_local_size(0) * N_PER_THREAD) + get_local_id(0);
     const int base_y = (get_group_id(1) * get_local_size(1) * N_PER_THREAD) + get_local_id(1);
 
@@ -36,10 +37,11 @@ void initial_label(global    T * equiv_map,
 __kernel
 void final_relabel(global       T    * equiv_map,
                    KParam              eInfo,
-                   global       char * bin,
+                   global       char * bin_,
                    KParam              bInfo,
                    global const T    * d_tmp)
 {
+    global char *bin = bin_ + bInfo.offset;
     const int base_x = (get_group_id(0) * get_local_size(0) * N_PER_THREAD) + get_local_id(0);
     const int base_y = (get_group_id(1) * get_local_size(1) * N_PER_THREAD) + get_local_id(1);
 
diff --git a/src/backend/opencl/kernel/scan_dim.cl b/src/backend/opencl/kernel/scan_dim.cl
index 5434189..b15d8ed 100644
--- a/src/backend/opencl/kernel/scan_dim.cl
+++ b/src/backend/opencl/kernel/scan_dim.cl
@@ -38,6 +38,8 @@ void scan_dim_kernel(__global To *oData, KParam oInfo,
     ids[dim] = ids[dim] * DIMY * lim + lidy;
     oData  += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] + ids[1] * oInfo.strides[1] + ids[0];
     iData  += ids[3] *  iInfo.strides[3] + ids[2] *  iInfo.strides[2] + ids[1] *  iInfo.strides[1] + ids[0];
+    iData  += iInfo.offset;
+
     int id_dim = ids[dim];
     const int out_dim = oInfo.dims[dim];
 
diff --git a/src/backend/opencl/kernel/select.cl b/src/backend/opencl/kernel/select.cl
index 2cfebb8..94a3603 100644
--- a/src/backend/opencl/kernel/select.cl
+++ b/src/backend/opencl/kernel/select.cl
@@ -27,12 +27,16 @@ int getOffset(dim_t *dims, dim_t *strides, dim_t *refdims)
 
 __kernel
 void select_kernel(__global T *optr, KParam oinfo,
-                   __global char *cptr, KParam cinfo,
-                   __global T *aptr, KParam ainfo,
-                   __global T *bptr, KParam binfo,
+                   __global char *cptr_, KParam cinfo,
+                   __global T *aptr_, KParam ainfo,
+                   __global T *bptr_, KParam binfo,
                    int groups_0,
                    int groups_1)
 {
+    __global char *cptr = cptr_ + cinfo.offset;
+    __global T *aptr = aptr_ + ainfo.offset;
+    __global T *bptr = bptr_ + binfo.offset;
+
     const int idz = get_group_id(0) / groups_0;
     const int idw = get_group_id(1) / groups_1;
 
@@ -63,12 +67,15 @@ void select_kernel(__global T *optr, KParam oinfo,
 
 __kernel
 void select_scalar_kernel(__global T *optr, KParam oinfo,
-                          __global char *cptr, KParam cinfo,
-                          __global T *aptr, KParam ainfo,
+                          __global char *cptr_, KParam cinfo,
+                          __global T *aptr_, KParam ainfo,
                           T b,
                           int groups_0,
                           int groups_1)
 {
+    __global char *cptr = cptr_ + cinfo.offset;
+    __global T *aptr = aptr_ + ainfo.offset;
+
     const int idz = get_group_id(0) / groups_0;
     const int idw = get_group_id(1) / groups_1;
 
diff --git a/src/backend/opencl/kernel/susan.cl b/src/backend/opencl/kernel/susan.cl
index 5bfd094..97dc8d6 100644
--- a/src/backend/opencl/kernel/susan.cl
+++ b/src/backend/opencl/kernel/susan.cl
@@ -11,11 +11,14 @@
 
 #ifdef RESPONSE
 kernel
-void susan_responses(global T* out, global const T* in,
+void susan_responses(global T* out, global const T* in_,
+                     const unsigned in_off,
                      const unsigned idim0, const unsigned idim1,
                      const float t, const float g,
                      const unsigned edge)
 {
+    global const T* in = in_ + in_off;
+
     const int rSqrd   = RADIUS*RADIUS;
     const int windLen = 2*RADIUS+1;
     const int shrdLen = BLOCK_X + windLen-1;
diff --git a/src/backend/opencl/kernel/susan.hpp b/src/backend/opencl/kernel/susan.hpp
index 814616e..a3f669d 100644
--- a/src/backend/opencl/kernel/susan.hpp
+++ b/src/backend/opencl/kernel/susan.hpp
@@ -36,6 +36,7 @@ static const unsigned SUSAN_THREADS_Y = 16;
 
 template<typename T, unsigned radius>
 void susan(cl::Buffer* out, const cl::Buffer* in,
+           const unsigned in_off,
            const unsigned idim0, const unsigned idim1,
            const float t, const float g, const unsigned edge)
 {
@@ -69,6 +70,7 @@ void susan(cl::Buffer* out, const cl::Buffer* in,
             });
 
         auto susanOp = make_kernel<Buffer, Buffer,
+                                   unsigned,
                                    unsigned, unsigned,
                                    float, float, unsigned>(*suKernel[device]);
 
@@ -76,7 +78,7 @@ void susan(cl::Buffer* out, const cl::Buffer* in,
         NDRange global(divup(idim0-2*edge, local[0])*local[0],
                        divup(idim1-2*edge, local[1])*local[1]);
 
-        susanOp(EnqueueArgs(getQueue(), global, local), *out, *in, idim0, idim1, t, g, edge);
+        susanOp(EnqueueArgs(getQueue(), global, local), *out, *in, in_off, idim0, idim1, t, g, edge);
 
     } catch (cl::Error err) {
         CL_TO_AF_ERROR(err);
diff --git a/src/backend/opencl/kernel/where.cl b/src/backend/opencl/kernel/where.cl
index ab4b31e..08c4731 100644
--- a/src/backend/opencl/kernel/where.cl
+++ b/src/backend/opencl/kernel/where.cl
@@ -42,7 +42,7 @@ void get_out_idx_kernel(__global uint *oData,
     const uint gid = wid * rtInfo.strides[3] + zid * rtInfo.strides[2] + yid * rtInfo.strides[1] + groupId_x;
 
     otData += wid * otInfo.strides[3] + zid * otInfo.strides[2] + yid * otInfo.strides[1];
-    iData  += wid *  iInfo.strides[3] + zid *  iInfo.strides[2] + yid *  iInfo.strides[1];
+    iData  += wid *  iInfo.strides[3] + zid *  iInfo.strides[2] + yid *  iInfo.strides[1] + iInfo.offset;
 
     bool cond = (yid < otInfo.dims[1]) && (zid < otInfo.dims[2]) && (wid < otInfo.dims[3]);
     if (!cond) return;
diff --git a/src/backend/opencl/kernel/wrap.cl b/src/backend/opencl/kernel/wrap.cl
index 8171f9e..c8242a7 100644
--- a/src/backend/opencl/kernel/wrap.cl
+++ b/src/backend/opencl/kernel/wrap.cl
@@ -27,7 +27,7 @@ void wrap_kernel(__global T *optr, KParam out,
     dim_t oidx1 = get_local_id(1) + get_local_size(1) * groupId_y;
 
     optr += idx2 * out.strides[2] + idx3 * out.strides[3];
-    iptr += idx2 *  in.strides[2] + idx3 *  in.strides[3];
+    iptr += idx2 *  in.strides[2] + idx3 *  in.strides[3] + in.offset;
 
 
     if (oidx0 >= out.dims[0] || oidx1 >= out.dims[1]) return;
diff --git a/src/backend/opencl/nearest_neighbour.cpp b/src/backend/opencl/nearest_neighbour.cpp
index b2cb142..11fc832 100644
--- a/src/backend/opencl/nearest_neighbour.cpp
+++ b/src/backend/opencl/nearest_neighbour.cpp
@@ -26,8 +26,8 @@ static const unsigned THREADS = 256;
 
 template<typename T, typename To, af_match_type dist_type>
 void nearest_neighbour_(Array<uint>& idx, Array<To>& dist,
-                     const Array<T>& query, const Array<T>& train,
-                     const uint dist_dim, const uint n_dist)
+                        const Array<T>& query, const Array<T>& train,
+                        const uint dist_dim, const uint n_dist)
 {
     uint sample_dim = (dist_dim == 0) ? 1 : 0;
     const dim4 qDims = query.dims();
@@ -75,11 +75,8 @@ void nearest_neighbour_(Array<uint>& idx, Array<To>& dist,
             kernel::transpose<T, false, false>(trainT, train);
     }
 
-    if (use_lmem) {
-        kernel::nearest_neighbour<T, To, dist_type, true >(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-    } else {
-        kernel::nearest_neighbour<T, To, dist_type, false>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz);
-    }
+    kernel::nearest_neighbour<T, To, dist_type>(idx, dist, queryT, trainT, 1, n_dist, lmem_sz, use_lmem);
+
 }
 
 template<typename T, typename To>
diff --git a/src/backend/opencl/susan.cpp b/src/backend/opencl/susan.cpp
index 71aca1b..5e9b1a6 100644
--- a/src/backend/opencl/susan.cpp
+++ b/src/backend/opencl/susan.cpp
@@ -35,15 +35,15 @@ unsigned susan(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out,
     cl::Buffer* resp = bufferAlloc(in.elements()*sizeof(float));
 
     switch(radius) {
-        case 1: kernel::susan<T, 1>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
-        case 2: kernel::susan<T, 2>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
-        case 3: kernel::susan<T, 3>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
-        case 4: kernel::susan<T, 4>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
-        case 5: kernel::susan<T, 5>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
-        case 6: kernel::susan<T, 6>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
-        case 7: kernel::susan<T, 7>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
-        case 8: kernel::susan<T, 8>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
-        case 9: kernel::susan<T, 9>(resp, in.get(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 1: kernel::susan<T, 1>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 2: kernel::susan<T, 2>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 3: kernel::susan<T, 3>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 4: kernel::susan<T, 4>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 5: kernel::susan<T, 5>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 6: kernel::susan<T, 6>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 7: kernel::susan<T, 7>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 8: kernel::susan<T, 8>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
+    case 9: kernel::susan<T, 9>(resp, in.get(), in.getOffset(), idims[0], idims[1], diff_thr, geom_thr, edge); break;
     }
 
     unsigned corners_found = kernel::nonMaximal<T>(x_corners, y_corners, resp_corners,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list