[arrayfire] 11/408: Adding padding for strides > 1

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:04 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 9b13e8a7a0f323cbc44d25e2a96553e60d25fdcd
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Thu Jun 18 14:02:02 2015 -0400

    Adding padding for strides > 1
---
 src/backend/cpu/unwrap.cpp          | 19 ++++++++++++++-----
 src/backend/cuda/kernel/unwrap.hpp  | 12 +++++++++---
 src/backend/cuda/unwrap.cu          |  4 ++--
 src/backend/opencl/kernel/unwrap.cl | 31 ++++++++++++++++++++++++++-----
 src/backend/opencl/unwrap.cpp       |  4 ++--
 5 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/src/backend/cpu/unwrap.cpp b/src/backend/cpu/unwrap.cpp
index 3d25661..76bc915 100644
--- a/src/backend/cpu/unwrap.cpp
+++ b/src/backend/cpu/unwrap.cpp
@@ -11,6 +11,8 @@
 #include <unwrap.hpp>
 #include <stdexcept>
 #include <err_cpu.hpp>
+#include <dispatch.hpp>
+#include <math.hpp>
 
 namespace cpu
 {
@@ -19,8 +21,7 @@ namespace cpu
                  const af::dim4 &ostrides, const af::dim4 &istrides,
                  const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy)
     {
-        dim_t nx = (idims[0] - wx) / sx + 1;
-        //dim_t ny = (idims[1] - wy) / sy + 1;
+        dim_t nx = divup(idims[0] - wx, sx) + (sx >= idims[0] ? 0 : 1);
 
         for(dim_t w = 0; w < odims[3]; w++) {
             for(dim_t z = 0; z < odims[2]; z++) {
@@ -37,11 +38,19 @@ namespace cpu
                           T* optr = outPtr + cOut + col * ostrides[1];
                     const T* iptr = inPtr  + cIn  + starty * istrides[1] + startx;
 
+                    // Condition shortcuts
+                    bool cond = true;
+                    if((startx + wx >= idims[0]) || (starty + wy >= idims[1]))
+                        cond = false;
+
                     for(dim_t y = 0; y < wy; y++) {
                         for(dim_t x = 0; x < wx; x++) {
                             dim_t oloc = (y * wx + x) * ostrides[0];
                             dim_t iloc = (y * istrides[1] + x * istrides[0]);
-                            optr[oloc] = iptr[iloc];
+                            if(cond || (startx + x < idims[0] && starty + y < idims[1]))
+                                optr[oloc] = iptr[iloc];
+                            else
+                                optr[oloc] = scalar<T>(0.0);
                         }
                     }
                 }
@@ -55,8 +64,8 @@ namespace cpu
     {
         af::dim4 idims = in.dims();
 
-        dim_t nx = (idims[0] - wx) / sx + 1;
-        dim_t ny = (idims[1] - wy) / sy + 1;
+        dim_t nx = divup(idims[0] - wx, sx) + (sx >= idims[0] ? 0 : 1);
+        dim_t ny = divup(idims[1] - wy, sy) + (sy >= idims[1] ? 0 : 1);
 
         af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
 
diff --git a/src/backend/cuda/kernel/unwrap.hpp b/src/backend/cuda/kernel/unwrap.hpp
index 32449d4..d5921df 100644
--- a/src/backend/cuda/kernel/unwrap.hpp
+++ b/src/backend/cuda/kernel/unwrap.hpp
@@ -38,8 +38,7 @@ namespace cuda
             const dim_t cIn  = w *  in.strides[3] + z *  in.strides[2];
 
             // Compute the number of windows along dim0 of input
-            const dim_t nx = (in.dims[0] - wx) / sx + 1;
-            //dim_t ny = (in.dims[1] - wy) / sy + 1;
+            const dim_t nx = divup(in.dims[0] - wx, sx) + (sx >= in.dims[0] ? 0 : 1);
 
             // Compute the output column index
             const dim_t colId = blockIdx.x * blockDim.y + threadIdx.y;
@@ -55,6 +54,10 @@ namespace cuda
                   T* optr = out.ptr + cOut + colId * out.strides[1];
             const T* iptr = in.ptr  + cIn  + starty * in.strides[1] + startx;
 
+            bool cond = true;
+            if((startx + wx >= in.dims[0]) || (starty + wy >= in.dims[1]))
+                cond = false;
+
             for(int i = 0; i < repsPerColumn; i++) {
                 // Compute output index local to column
                 const dim_t colIndex = i * threads + threadIdx.x;
@@ -70,7 +73,10 @@ namespace cuda
                 const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
 
                 // Copy
-                optr[outIdx] = iptr[inIdx];
+                if(cond || (startx + x < in.dims[0] && starty + y < in.dims[1]))
+                    optr[outIdx] = iptr[inIdx];
+                else
+                    optr[outIdx] = scalar<T>(0.0);
             }
         }
 
diff --git a/src/backend/cuda/unwrap.cu b/src/backend/cuda/unwrap.cu
index b180420..41aa8dc 100644
--- a/src/backend/cuda/unwrap.cu
+++ b/src/backend/cuda/unwrap.cu
@@ -21,8 +21,8 @@ namespace cuda
     {
         af::dim4 idims = in.dims();
 
-        dim_t nx = (idims[0] - wx) / sx + 1;
-        dim_t ny = (idims[1] - wy) / sy + 1;
+        dim_t nx = divup(idims[0] - wx, sx) + (sx >= idims[0] ? 0 : 1);
+        dim_t ny = divup(idims[1] - wy, sy) + (sy >= idims[1] ? 0 : 1);
 
         af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
 
diff --git a/src/backend/opencl/kernel/unwrap.cl b/src/backend/opencl/kernel/unwrap.cl
index eec5446..331f0da 100644
--- a/src/backend/opencl/kernel/unwrap.cl
+++ b/src/backend/opencl/kernel/unwrap.cl
@@ -7,6 +7,19 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
+#define divup(a, b) (((a)+(b)-1)/(b))
+
+#if CPLX
+#define set(a, b) a = b
+#define set_scalar(a, b) do {                   \
+        a.x = b;                                \
+        a.y = 0;                                \
+    } while(0)
+#else
+#define set(a, b) a = b
+#define set_scalar(a, b) a = b
+#endif
+
 __kernel
 void unwrap_kernel(__global T *d_out, const KParam out,
                    __global const T *d_in, const KParam in,
@@ -25,8 +38,9 @@ void unwrap_kernel(__global T *d_out, const KParam out,
     const dim_t cIn  = w *  in.strides[3] + z *  in.strides[2];
 
     // Compute the number of windows along dim0 of input
-    const dim_t nx = (in.dims[0] - wx) / sx + 1;
-    //dim_t ny = (in.dims[1] - wy) / sy + 1;
+    int nx_add = 1;
+    if(sx >= in.dims[0]) nx_add = 0;
+    const dim_t nx = divup(in.dims[0] - wx, sx) + nx_add;
 
     // Compute the output column index
     const dim_t colId = get_group_id(0) * get_local_size(1) + get_local_id(1);
@@ -39,8 +53,12 @@ void unwrap_kernel(__global T *d_out, const KParam out,
     const dim_t starty = (colId / nx) * sy;
 
     // Offset the global pointers to the respective starting indices
-    __global T* optr = d_out + cOut + colId * out.strides[1];
-    __global T* iptr = d_in  + cIn  + starty * in.strides[1] + startx + in.offset;
+    __global       T* optr = d_out + cOut + colId * out.strides[1];
+    __global const T* iptr = d_in  + cIn  + starty * in.strides[1] + startx + in.offset;
+
+    bool cond = true;
+    if((startx + wx >= in.dims[0]) || (starty + wy >= in.dims[1]))
+        cond = false;
 
     for(int i = 0; i < repsPerColumn; i++) {
         // Compute output index local to column
@@ -57,6 +75,9 @@ void unwrap_kernel(__global T *d_out, const KParam out,
         const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
 
         // Copy
-        optr[outIdx] = iptr[inIdx];
+        if(cond || (startx + x < in.dims[0] && starty + y < in.dims[1]))
+            optr[outIdx] = iptr[inIdx];
+        else
+            set_scalar(optr[outIdx], 0);
     }
 }
diff --git a/src/backend/opencl/unwrap.cpp b/src/backend/opencl/unwrap.cpp
index e9f36a0..61f2ddb 100644
--- a/src/backend/opencl/unwrap.cpp
+++ b/src/backend/opencl/unwrap.cpp
@@ -21,8 +21,8 @@ namespace opencl
     {
         af::dim4 idims = in.dims();
 
-        dim_t nx = (idims[0] - wx) / sx + 1;
-        dim_t ny = (idims[1] - wy) / sy + 1;
+        dim_t nx = divup(idims[0] - wx, sx) + (sx >= idims[0] ? 0 : 1);
+        dim_t ny = divup(idims[1] - wy, sy) + (sy >= idims[1] ? 0 : 1);
 
         af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list