[arrayfire] 10/408: Bugfixes, comments
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:04 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 4badf65315400472780957fd5131c5191d2b00df
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Thu Jun 18 12:39:54 2015 -0400
Bugfixes, comments
---
src/backend/cuda/kernel/unwrap.hpp | 11 ++++++++++-
src/backend/opencl/kernel/unwrap.cl | 13 +++++++++++--
src/backend/opencl/kernel/unwrap.hpp | 6 +++---
3 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/src/backend/cuda/kernel/unwrap.hpp b/src/backend/cuda/kernel/unwrap.hpp
index 9ac0663..32449d4 100644
--- a/src/backend/cuda/kernel/unwrap.hpp
+++ b/src/backend/cuda/kernel/unwrap.hpp
@@ -26,41 +26,50 @@ namespace cuda
const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy,
dim_t repsPerColumn)
{
+ // Compute channel and volume
const dim_t w = blockIdx.y / in.dims[2];
const dim_t z = blockIdx.y % in.dims[2];
if(w >= in.dims[3] || z >= in.dims[2])
return;
+ // Compute offset for channel and volume
const dim_t cOut = w * out.strides[3] + z * out.strides[2];
const dim_t cIn = w * in.strides[3] + z * in.strides[2];
+ // Compute the number of windows along dim0 of input
const dim_t nx = (in.dims[0] - wx) / sx + 1;
//dim_t ny = (in.dims[1] - wy) / sy + 1;
+ // Compute the output column index
const dim_t colId = blockIdx.x * blockDim.y + threadIdx.y;
if(colId >= out.dims[1])
return;
+ // Compute the starting index of window in x and y of input
const dim_t startx = (colId % nx) * sx;
const dim_t starty = (colId / nx) * sy;
+ // Offset the global pointers to the respective starting indices
T* optr = out.ptr + cOut + colId * out.strides[1];
const T* iptr = in.ptr + cIn + starty * in.strides[1] + startx;
for(int i = 0; i < repsPerColumn; i++) {
+ // Compute output index local to column
const dim_t colIndex = i * threads + threadIdx.x;
if(colIndex >= out.dims[0])
return;
+ // Compute input index local to window
const dim_t x = colIndex % wx;
const dim_t y = colIndex / wx;
const dim_t outIdx = (y * wx + x) * out.strides[0];
const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
+ // Copy
optr[outIdx] = iptr[inIdx];
}
}
@@ -76,7 +85,7 @@ namespace cuda
dim_t repsPerColumn = 1;
if(TX == 256 && wx * wy > 256) {
- repsPerColumn = (wx * wy) / 256;
+ repsPerColumn = divup((wx * wy), 256);
}
dim3 blocks(divup(out.dims[1], threads.y), out.dims[2] * out.dims[3]);
diff --git a/src/backend/opencl/kernel/unwrap.cl b/src/backend/opencl/kernel/unwrap.cl
index a6370f2..eec5446 100644
--- a/src/backend/opencl/kernel/unwrap.cl
+++ b/src/backend/opencl/kernel/unwrap.cl
@@ -13,41 +13,50 @@ void unwrap_kernel(__global T *d_out, const KParam out,
const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy,
const dim_t repsPerColumn)
{
+ // Compute channel and volume
const dim_t w = get_group_id(1) / in.dims[2];
- const dim_t z = get_group_id(1) % in.dims[2];
+ const dim_t z = get_group_id(1) - w * in.dims[2]; // get_group_id(1) % in.dims[2];
if(w >= in.dims[3] || z >= in.dims[2])
return;
+ // Compute offset for channel and volume
const dim_t cOut = w * out.strides[3] + z * out.strides[2];
const dim_t cIn = w * in.strides[3] + z * in.strides[2];
+ // Compute the number of windows along dim0 of input
const dim_t nx = (in.dims[0] - wx) / sx + 1;
//dim_t ny = (in.dims[1] - wy) / sy + 1;
+ // Compute the output column index
const dim_t colId = get_group_id(0) * get_local_size(1) + get_local_id(1);
if(colId >= out.dims[1])
return;
+ // Compute the starting index of window in x and y of input
const dim_t startx = (colId % nx) * sx;
const dim_t starty = (colId / nx) * sy;
+ // Offset the global pointers to the respective starting indices
__global T* optr = d_out + cOut + colId * out.strides[1];
__global T* iptr = d_in + cIn + starty * in.strides[1] + startx + in.offset;
for(int i = 0; i < repsPerColumn; i++) {
+ // Compute output index local to column
const dim_t colIndex = i * TX + get_local_id(0);
if(colIndex >= out.dims[0])
return;
- const dim_t x = colIndex % wx;
+ // Compute input index local to window
const dim_t y = colIndex / wx;
+ const dim_t x = colIndex - y * wx; // colIndex % wx
const dim_t outIdx = (y * wx + x) * out.strides[0];
const dim_t inIdx = y * in.strides[1] + x * in.strides[0];
+ // Copy
optr[outIdx] = iptr[inIdx];
}
}
diff --git a/src/backend/opencl/kernel/unwrap.hpp b/src/backend/opencl/kernel/unwrap.hpp
index 957dbe8..8341e72 100644
--- a/src/backend/opencl/kernel/unwrap.hpp
+++ b/src/backend/opencl/kernel/unwrap.hpp
@@ -71,13 +71,13 @@ namespace opencl
const dim_t TY = 256 / TX;
dim_t repsPerColumn = 1;
if(TX == 256 && wx * wy > 256) {
- repsPerColumn = (wx * wy) / 256;
+ repsPerColumn = divup((wx * wy), 256);
}
NDRange local(TX, TY, 1);
- NDRange global(local[0] * divup(in.info.dims[1], TY),
- local[1] * in.info.dims[2] * in.info.dims[3],
+ NDRange global(local[0] * divup(out.info.dims[1], TY),
+ local[1] * out.info.dims[2] * out.info.dims[3],
1);
unwrapOp(EnqueueArgs(getQueue(), global, local),
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list