[arrayfire] 11/408: Adding padding for strides > 1
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:04 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 9b13e8a7a0f323cbc44d25e2a96553e60d25fdcd
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Thu Jun 18 14:02:02 2015 -0400
Adding padding for strides > 1
---
src/backend/cpu/unwrap.cpp | 19 ++++++++++++++-----
src/backend/cuda/kernel/unwrap.hpp | 12 +++++++++---
src/backend/cuda/unwrap.cu | 4 ++--
src/backend/opencl/kernel/unwrap.cl | 31 ++++++++++++++++++++++++++-----
src/backend/opencl/unwrap.cpp | 4 ++--
5 files changed, 53 insertions(+), 17 deletions(-)
diff --git a/src/backend/cpu/unwrap.cpp b/src/backend/cpu/unwrap.cpp
index 3d25661..76bc915 100644
--- a/src/backend/cpu/unwrap.cpp
+++ b/src/backend/cpu/unwrap.cpp
@@ -11,6 +11,8 @@
#include <unwrap.hpp>
#include <stdexcept>
#include <err_cpu.hpp>
+#include <dispatch.hpp>
+#include <math.hpp>
namespace cpu
{
@@ -19,8 +21,7 @@ namespace cpu
const af::dim4 &ostrides, const af::dim4 &istrides,
const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy)
{
- dim_t nx = (idims[0] - wx) / sx + 1;
- //dim_t ny = (idims[1] - wy) / sy + 1;
+ dim_t nx = divup(idims[0] - wx, sx) + (sx >= idims[0] ? 0 : 1);
for(dim_t w = 0; w < odims[3]; w++) {
for(dim_t z = 0; z < odims[2]; z++) {
@@ -37,11 +38,19 @@ namespace cpu
T* optr = outPtr + cOut + col * ostrides[1];
const T* iptr = inPtr + cIn + starty * istrides[1] + startx;
+ // Condition shortcuts
+ bool cond = true;
+ if((startx + wx >= idims[0]) || (starty + wy >= idims[1]))
+ cond = false;
+
for(dim_t y = 0; y < wy; y++) {
for(dim_t x = 0; x < wx; x++) {
dim_t oloc = (y * wx + x) * ostrides[0];
dim_t iloc = (y * istrides[1] + x * istrides[0]);
- optr[oloc] = iptr[iloc];
+ if(cond || (startx + x < idims[0] && starty + y < idims[1]))
+ optr[oloc] = iptr[iloc];
+ else
+ optr[oloc] = scalar<T>(0.0);
}
}
}
@@ -55,8 +64,8 @@ namespace cpu
{
af::dim4 idims = in.dims();
- dim_t nx = (idims[0] - wx) / sx + 1;
- dim_t ny = (idims[1] - wy) / sy + 1;
+ dim_t nx = divup(idims[0] - wx, sx) + (sx >= idims[0] ? 0 : 1);
+ dim_t ny = divup(idims[1] - wy, sy) + (sy >= idims[1] ? 0 : 1);
af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
diff --git a/src/backend/cuda/kernel/unwrap.hpp b/src/backend/cuda/kernel/unwrap.hpp
index 32449d4..d5921df 100644
--- a/src/backend/cuda/kernel/unwrap.hpp
+++ b/src/backend/cuda/kernel/unwrap.hpp
@@ -38,8 +38,7 @@ namespace cuda
const dim_t cIn = w * in.strides[3] + z * in.strides[2];
// Compute the number of windows along dim0 of input
- const dim_t nx = (in.dims[0] - wx) / sx + 1;
- //dim_t ny = (in.dims[1] - wy) / sy + 1;
+ const dim_t nx = divup(in.dims[0] - wx, sx) + (sx >= in.dims[0] ? 0 : 1);
// Compute the output column index
const dim_t colId = blockIdx.x * blockDim.y + threadIdx.y;
@@ -55,6 +54,10 @@ namespace cuda
T* optr = out.ptr + cOut + colId * out.strides[1];
const T* iptr = in.ptr + cIn + starty * in.strides[1] + startx;
+ bool cond = true;
+ if((startx + wx >= in.dims[0]) || (starty + wy >= in.dims[1]))
+ cond = false;
+
for(int i = 0; i < repsPerColumn; i++) {
// Compute output index local to column
const dim_t colIndex = i * threads + threadIdx.x;
@@ -70,7 +73,10 @@ namespace cuda
const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
// Copy
- optr[outIdx] = iptr[inIdx];
+ if(cond || (startx + x < in.dims[0] && starty + y < in.dims[1]))
+ optr[outIdx] = iptr[inIdx];
+ else
+ optr[outIdx] = scalar<T>(0.0);
}
}
diff --git a/src/backend/cuda/unwrap.cu b/src/backend/cuda/unwrap.cu
index b180420..41aa8dc 100644
--- a/src/backend/cuda/unwrap.cu
+++ b/src/backend/cuda/unwrap.cu
@@ -21,8 +21,8 @@ namespace cuda
{
af::dim4 idims = in.dims();
- dim_t nx = (idims[0] - wx) / sx + 1;
- dim_t ny = (idims[1] - wy) / sy + 1;
+ dim_t nx = divup(idims[0] - wx, sx) + (sx >= idims[0] ? 0 : 1);
+ dim_t ny = divup(idims[1] - wy, sy) + (sy >= idims[1] ? 0 : 1);
af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
diff --git a/src/backend/opencl/kernel/unwrap.cl b/src/backend/opencl/kernel/unwrap.cl
index eec5446..331f0da 100644
--- a/src/backend/opencl/kernel/unwrap.cl
+++ b/src/backend/opencl/kernel/unwrap.cl
@@ -7,6 +7,19 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#define divup(a, b) (((a)+(b)-1)/(b))
+
+#if CPLX
+#define set(a, b) a = b
+#define set_scalar(a, b) do { \
+ a.x = b; \
+ a.y = 0; \
+ } while(0)
+#else
+#define set(a, b) a = b
+#define set_scalar(a, b) a = b
+#endif
+
__kernel
void unwrap_kernel(__global T *d_out, const KParam out,
__global const T *d_in, const KParam in,
@@ -25,8 +38,9 @@ void unwrap_kernel(__global T *d_out, const KParam out,
const dim_t cIn = w * in.strides[3] + z * in.strides[2];
// Compute the number of windows along dim0 of input
- const dim_t nx = (in.dims[0] - wx) / sx + 1;
- //dim_t ny = (in.dims[1] - wy) / sy + 1;
+ int nx_add = 1;
+ if(sx >= in.dims[0]) nx_add = 0;
+ const dim_t nx = divup(in.dims[0] - wx, sx) + nx_add;
// Compute the output column index
const dim_t colId = get_group_id(0) * get_local_size(1) + get_local_id(1);
@@ -39,8 +53,12 @@ void unwrap_kernel(__global T *d_out, const KParam out,
const dim_t starty = (colId / nx) * sy;
// Offset the global pointers to the respective starting indices
- __global T* optr = d_out + cOut + colId * out.strides[1];
- __global T* iptr = d_in + cIn + starty * in.strides[1] + startx + in.offset;
+ __global T* optr = d_out + cOut + colId * out.strides[1];
+ __global const T* iptr = d_in + cIn + starty * in.strides[1] + startx + in.offset;
+
+ bool cond = true;
+ if((startx + wx >= in.dims[0]) || (starty + wy >= in.dims[1]))
+ cond = false;
for(int i = 0; i < repsPerColumn; i++) {
// Compute output index local to column
@@ -57,6 +75,9 @@ void unwrap_kernel(__global T *d_out, const KParam out,
const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
// Copy
- optr[outIdx] = iptr[inIdx];
+ if(cond || (startx + x < in.dims[0] && starty + y < in.dims[1]))
+ optr[outIdx] = iptr[inIdx];
+ else
+ set_scalar(optr[outIdx], 0);
}
}
diff --git a/src/backend/opencl/unwrap.cpp b/src/backend/opencl/unwrap.cpp
index e9f36a0..61f2ddb 100644
--- a/src/backend/opencl/unwrap.cpp
+++ b/src/backend/opencl/unwrap.cpp
@@ -21,8 +21,8 @@ namespace opencl
{
af::dim4 idims = in.dims();
- dim_t nx = (idims[0] - wx) / sx + 1;
- dim_t ny = (idims[1] - wy) / sy + 1;
+ dim_t nx = divup(idims[0] - wx, sx) + (sx >= idims[0] ? 0 : 1);
+ dim_t ny = divup(idims[1] - wy, sy) + (sy >= idims[1] ? 0 : 1);
af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list