[arrayfire] 375/408: BUGFIX: For calculating number of elements for a buffer in CUDA backend

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:34 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 095f29e9ebdb47c917197d914039730d3f8a80b7
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Wed Sep 2 14:03:05 2015 -0400

    BUGFIX: For calculating number of elements for a buffer in CUDA backend
---
 src/backend/cuda/kernel/fftconvolve.hpp | 18 ++++++++++++++----
 src/backend/cuda/kernel/ireduce.hpp     |  2 +-
 src/backend/cuda/kernel/reduce.hpp      |  2 +-
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/backend/cuda/kernel/fftconvolve.hpp b/src/backend/cuda/kernel/fftconvolve.hpp
index 2acf2db..186fb11 100644
--- a/src/backend/cuda/kernel/fftconvolve.hpp
+++ b/src/backend/cuda/kernel/fftconvolve.hpp
@@ -262,8 +262,13 @@ void packDataHelper(Param<convT> sig_packed,
 {
     dim_t *sd = sig.dims;
 
-    int sig_packed_elem = sig_packed.strides[3] * sig_packed.dims[3];
-    int filter_packed_elem = filter_packed.strides[3] * filter_packed.dims[3];
+    int sig_packed_elem = 1;
+    int filter_packed_elem = 1;
+
+    for (int i = 0; i < 4; i++) {
+        sig_packed_elem *= sig_packed.dims[i];
+        filter_packed_elem *= filter_packed.dims[i];
+    }
 
     // Number of packed complex elements in dimension 0
     int sig_half_d0 = divup(sd[0], 2);
@@ -292,8 +297,13 @@ void complexMultiplyHelper(Param<T> out,
                            CParam<T> filter,
                            ConvolveBatchKind kind)
 {
-    int sig_packed_elem = sig_packed.strides[3] * sig_packed.dims[3];
-    int filter_packed_elem = filter_packed.strides[3] * filter_packed.dims[3];
+    int sig_packed_elem = 1;
+    int filter_packed_elem = 1;
+
+    for (int i = 0; i < 4; i++) {
+        sig_packed_elem *= sig_packed.dims[i];
+        filter_packed_elem *= filter_packed.dims[i];
+    }
 
     dim3 threads(THREADS);
     dim3 blocks(divup(sig_packed_elem / 2, threads.x));
diff --git a/src/backend/cuda/kernel/ireduce.hpp b/src/backend/cuda/kernel/ireduce.hpp
index 4f9cac5..4a2ab14 100644
--- a/src/backend/cuda/kernel/ireduce.hpp
+++ b/src/backend/cuda/kernel/ireduce.hpp
@@ -444,7 +444,7 @@ namespace kernel
     template<typename T, af_op_t op>
     T ireduce_all(uint *idx, CParam<T> in)
     {
-        int in_elements = in.strides[3] * in.dims[3];
+        int in_elements = in.dims[0] * in.dims[1] * in.dims[2] * in.dims[3];
 
         // FIXME: Use better heuristics to get to the optimum number
         if (in_elements > 4096) {
diff --git a/src/backend/cuda/kernel/reduce.hpp b/src/backend/cuda/kernel/reduce.hpp
index be52375..8cc720f 100644
--- a/src/backend/cuda/kernel/reduce.hpp
+++ b/src/backend/cuda/kernel/reduce.hpp
@@ -371,7 +371,7 @@ namespace kernel
     template<typename Ti, typename To, af_op_t op>
     To reduce_all(CParam<Ti> in, bool change_nan, double nanval)
     {
-        int in_elements = in.strides[3] * in.dims[3];
+        int in_elements = in.dims[0] * in.dims[1] * in.dims[2] * in.dims[3];
 
         // FIXME: Use better heuristics to get to the optimum number
         if (in_elements > 4096) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list