[arrayfire] 375/408: BUGFIX: For calculating number of elements for a buffer in CUDA backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:34 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 095f29e9ebdb47c917197d914039730d3f8a80b7
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Wed Sep 2 14:03:05 2015 -0400
BUGFIX: For calculating number of elements for a buffer in CUDA backend
---
src/backend/cuda/kernel/fftconvolve.hpp | 18 ++++++++++++++----
src/backend/cuda/kernel/ireduce.hpp | 2 +-
src/backend/cuda/kernel/reduce.hpp | 2 +-
3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/src/backend/cuda/kernel/fftconvolve.hpp b/src/backend/cuda/kernel/fftconvolve.hpp
index 2acf2db..186fb11 100644
--- a/src/backend/cuda/kernel/fftconvolve.hpp
+++ b/src/backend/cuda/kernel/fftconvolve.hpp
@@ -262,8 +262,13 @@ void packDataHelper(Param<convT> sig_packed,
{
dim_t *sd = sig.dims;
- int sig_packed_elem = sig_packed.strides[3] * sig_packed.dims[3];
- int filter_packed_elem = filter_packed.strides[3] * filter_packed.dims[3];
+ int sig_packed_elem = 1;
+ int filter_packed_elem = 1;
+
+ for (int i = 0; i < 4; i++) {
+ sig_packed_elem *= sig_packed.dims[i];
+ filter_packed_elem *= filter_packed.dims[i];
+ }
// Number of packed complex elements in dimension 0
int sig_half_d0 = divup(sd[0], 2);
@@ -292,8 +297,13 @@ void complexMultiplyHelper(Param<T> out,
CParam<T> filter,
ConvolveBatchKind kind)
{
- int sig_packed_elem = sig_packed.strides[3] * sig_packed.dims[3];
- int filter_packed_elem = filter_packed.strides[3] * filter_packed.dims[3];
+ int sig_packed_elem = 1;
+ int filter_packed_elem = 1;
+
+ for (int i = 0; i < 4; i++) {
+ sig_packed_elem *= sig_packed.dims[i];
+ filter_packed_elem *= filter_packed.dims[i];
+ }
dim3 threads(THREADS);
dim3 blocks(divup(sig_packed_elem / 2, threads.x));
diff --git a/src/backend/cuda/kernel/ireduce.hpp b/src/backend/cuda/kernel/ireduce.hpp
index 4f9cac5..4a2ab14 100644
--- a/src/backend/cuda/kernel/ireduce.hpp
+++ b/src/backend/cuda/kernel/ireduce.hpp
@@ -444,7 +444,7 @@ namespace kernel
template<typename T, af_op_t op>
T ireduce_all(uint *idx, CParam<T> in)
{
- int in_elements = in.strides[3] * in.dims[3];
+ int in_elements = in.dims[0] * in.dims[1] * in.dims[2] * in.dims[3];
// FIXME: Use better heuristics to get to the optimum number
if (in_elements > 4096) {
diff --git a/src/backend/cuda/kernel/reduce.hpp b/src/backend/cuda/kernel/reduce.hpp
index be52375..8cc720f 100644
--- a/src/backend/cuda/kernel/reduce.hpp
+++ b/src/backend/cuda/kernel/reduce.hpp
@@ -371,7 +371,7 @@ namespace kernel
template<typename Ti, typename To, af_op_t op>
To reduce_all(CParam<Ti> in, bool change_nan, double nanval)
{
- int in_elements = in.strides[3] * in.dims[3];
+ int in_elements = in.dims[0] * in.dims[1] * in.dims[2] * in.dims[3];
// FIXME: Use better heuristics to get to the optimum number
if (in_elements > 4096) {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list