[arrayfire] 08/34: BUFIX: Converting non-linear indices to linear indices in ireduce
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Sep 27 14:46:02 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit 139de0424e6341127b26b5c7176e923c0b4cb01b
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Fri Sep 18 18:51:14 2015 -0400
BUFIX: Converting non-linear indices to linear indices in ireduce
- Fixes in both CUDA and OpenCL backends
---
src/backend/cuda/kernel/ireduce.hpp | 11 +++++++++++
src/backend/opencl/kernel/ireduce.hpp | 13 ++++++++++++-
2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/src/backend/cuda/kernel/ireduce.hpp b/src/backend/cuda/kernel/ireduce.hpp
index 4a2ab14..4354f2a 100644
--- a/src/backend/cuda/kernel/ireduce.hpp
+++ b/src/backend/cuda/kernel/ireduce.hpp
@@ -497,6 +497,17 @@ namespace kernel
memFree(tmp.ptr);
memFree(tlptr);
+ if (!is_linear) {
+ // Converting n-d index into a linear index
+ // in is of size [ dims0, dims1, dims2, dims3]
+ // tidx is of size [blocks_x, dims1, dims2, dims3]
+ // i / blocks_x gives you the batch number "N"
+ // "N * dims0 + i" gives the linear index
+ for (int i = 0; i < tmp_elements; i++) {
+ h_lptr_raw[i] += (i / blocks_x) * in.dims[0];
+ }
+ }
+
MinMaxOp<op, T> Op(h_ptr_raw[0], h_lptr_raw[0]);
for (int i = 1; i < tmp_elements; i++) {
diff --git a/src/backend/opencl/kernel/ireduce.hpp b/src/backend/opencl/kernel/ireduce.hpp
index e4147ab..0adc0c8 100644
--- a/src/backend/opencl/kernel/ireduce.hpp
+++ b/src/backend/opencl/kernel/ireduce.hpp
@@ -381,8 +381,19 @@ namespace kernel
T* h_ptr_raw = h_ptr.get();
uint* h_iptr_raw = h_iptr.get();
- MinMaxOp<op, T> Op(h_ptr_raw[0], h_iptr_raw[0]);
+ if (!is_linear) {
+ // Converting n-d index into a linear index
+ // in is of size [ dims0, dims1, dims2, dims3]
+ // tidx is of size [groups_x, dims1, dims2, dims3]
+ // i / groups_x gives you the batch number "N"
+ // "N * dims0 + i" gives the linear index
+ for (int i = 0; i < tmp_elements; i++) {
+ h_iptr_raw[i] += (i / groups_x) * in.info.dims[0];
+ }
+ }
+
+ MinMaxOp<op, T> Op(h_ptr_raw[0], h_iptr_raw[0]);
for (int i = 1; i < (int)tmp_elements; i++) {
Op(h_ptr_raw[i], h_iptr_raw[i]);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list