[arrayfire] 10/408: Bugfixes, comments

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:04 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 4badf65315400472780957fd5131c5191d2b00df
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Thu Jun 18 12:39:54 2015 -0400

    Bugfixes, comments
---
 src/backend/cuda/kernel/unwrap.hpp   | 11 ++++++++++-
 src/backend/opencl/kernel/unwrap.cl  | 13 +++++++++++--
 src/backend/opencl/kernel/unwrap.hpp |  6 +++---
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/backend/cuda/kernel/unwrap.hpp b/src/backend/cuda/kernel/unwrap.hpp
index 9ac0663..32449d4 100644
--- a/src/backend/cuda/kernel/unwrap.hpp
+++ b/src/backend/cuda/kernel/unwrap.hpp
@@ -26,41 +26,50 @@ namespace cuda
                            const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy,
                            dim_t repsPerColumn)
         {
+            // Compute channel and volume
             const dim_t w = blockIdx.y / in.dims[2];
             const dim_t z = blockIdx.y % in.dims[2];
 
             if(w >= in.dims[3] || z >= in.dims[2])
                 return;
 
+            // Compute offset for channel and volume
             const dim_t cOut = w * out.strides[3] + z * out.strides[2];
             const dim_t cIn  = w *  in.strides[3] + z *  in.strides[2];
 
+            // Compute the number of windows along dim0 of input
             const dim_t nx = (in.dims[0] - wx) / sx + 1;
             //dim_t ny = (in.dims[1] - wy) / sy + 1;
 
+            // Compute the output column index
             const dim_t colId = blockIdx.x * blockDim.y + threadIdx.y;
 
             if(colId >= out.dims[1])
                 return;
 
+            // Compute the starting index of window in x and y of input
             const dim_t startx = (colId % nx) * sx;
             const dim_t starty = (colId / nx) * sy;
 
+            // Offset the global pointers to the respective starting indices
                   T* optr = out.ptr + cOut + colId * out.strides[1];
             const T* iptr = in.ptr  + cIn  + starty * in.strides[1] + startx;
 
             for(int i = 0; i < repsPerColumn; i++) {
+                // Compute output index local to column
                 const dim_t colIndex = i * threads + threadIdx.x;
 
                 if(colIndex >= out.dims[0])
                     return;
 
+                // Compute input index local to window
                 const dim_t x = colIndex % wx;
                 const dim_t y = colIndex / wx;
 
                 const dim_t outIdx = (y * wx + x) * out.strides[0];
                 const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
 
+                // Copy
                 optr[outIdx] = iptr[inIdx];
             }
         }
@@ -76,7 +85,7 @@ namespace cuda
 
             dim_t repsPerColumn = 1;
             if(TX == 256 && wx * wy > 256) {
-                repsPerColumn = (wx * wy) / 256;
+                repsPerColumn = divup((wx * wy), 256);
             }
 
             dim3 blocks(divup(out.dims[1], threads.y), out.dims[2] * out.dims[3]);
diff --git a/src/backend/opencl/kernel/unwrap.cl b/src/backend/opencl/kernel/unwrap.cl
index a6370f2..eec5446 100644
--- a/src/backend/opencl/kernel/unwrap.cl
+++ b/src/backend/opencl/kernel/unwrap.cl
@@ -13,41 +13,50 @@ void unwrap_kernel(__global T *d_out, const KParam out,
                    const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy,
                    const dim_t repsPerColumn)
 {
+    // Compute channel and volume
     const dim_t w = get_group_id(1) / in.dims[2];
-    const dim_t z = get_group_id(1) % in.dims[2];
+    const dim_t z = get_group_id(1) - w * in.dims[2]; // get_group_id(1) % in.dims[2];
 
     if(w >= in.dims[3] || z >= in.dims[2])
         return;
 
+    // Compute offset for channel and volume
     const dim_t cOut = w * out.strides[3] + z * out.strides[2];
     const dim_t cIn  = w *  in.strides[3] + z *  in.strides[2];
 
+    // Compute the number of windows along dim0 of input
     const dim_t nx = (in.dims[0] - wx) / sx + 1;
     //dim_t ny = (in.dims[1] - wy) / sy + 1;
 
+    // Compute the output column index
     const dim_t colId = get_group_id(0) * get_local_size(1) + get_local_id(1);
 
     if(colId >= out.dims[1])
         return;
 
+    // Compute the starting index of window in x and y of input
     const dim_t startx = (colId % nx) * sx;
     const dim_t starty = (colId / nx) * sy;
 
+    // Offset the global pointers to the respective starting indices
     __global T* optr = d_out + cOut + colId * out.strides[1];
     __global T* iptr = d_in  + cIn  + starty * in.strides[1] + startx + in.offset;
 
     for(int i = 0; i < repsPerColumn; i++) {
+        // Compute output index local to column
         const dim_t colIndex = i * TX + get_local_id(0);
 
         if(colIndex >= out.dims[0])
             return;
 
-        const dim_t x = colIndex % wx;
+        // Compute input index local to window
         const dim_t y = colIndex / wx;
+        const dim_t x = colIndex - y * wx;  // colIndex % wx
 
         const dim_t outIdx = (y * wx + x) * out.strides[0];
         const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
 
+        // Copy
         optr[outIdx] = iptr[inIdx];
     }
 }
diff --git a/src/backend/opencl/kernel/unwrap.hpp b/src/backend/opencl/kernel/unwrap.hpp
index 957dbe8..8341e72 100644
--- a/src/backend/opencl/kernel/unwrap.hpp
+++ b/src/backend/opencl/kernel/unwrap.hpp
@@ -71,13 +71,13 @@ namespace opencl
                 const dim_t TY = 256 / TX;
                 dim_t repsPerColumn = 1;
                 if(TX == 256 && wx * wy > 256) {
-                    repsPerColumn = (wx * wy) / 256;
+                    repsPerColumn = divup((wx * wy), 256);
                 }
 
                 NDRange local(TX, TY, 1);
 
-                NDRange global(local[0] * divup(in.info.dims[1], TY),
-                               local[1] * in.info.dims[2] * in.info.dims[3],
+                NDRange global(local[0] * divup(out.info.dims[1], TY),
+                               local[1] * out.info.dims[2] * out.info.dims[3],
                                1);
 
                 unwrapOp(EnqueueArgs(getQueue(), global, local),

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list