[arrayfire] 15/284: Async CPU reduce and ireduce
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:14 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 1a0802fb2fe22930e81613faa340038b68e0e2e2
Author: Umar Arshad <umar at arrayfire.com>
Date: Tue Sep 22 13:05:17 2015 -0400
Async CPU reduce and ireduce
---
src/backend/cpu/ireduce.cpp | 42 +++++++++++++++++-------------------------
src/backend/cpu/reduce.cpp | 15 ++++++++++-----
2 files changed, 27 insertions(+), 30 deletions(-)
diff --git a/src/backend/cpu/ireduce.cpp b/src/backend/cpu/ireduce.cpp
index 199a0be..d3a76d9 100644
--- a/src/backend/cpu/ireduce.cpp
+++ b/src/backend/cpu/ireduce.cpp
@@ -14,6 +14,9 @@
#include <Array.hpp>
#include <ireduce.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
+
using af::dim4;
namespace cpu
@@ -106,42 +109,31 @@ namespace cpu
};
template<af_op_t op, typename T>
+ using ireduce_dim_func = std::function<void(T *out, const dim4 ostrides, const dim4 odims,
+ uint *loc,
+ const T *in , const dim4 istrides, const dim4 idims,
+ const int dim)>;
+
+ template<af_op_t op, typename T>
void ireduce(Array<T> &out, Array<uint> &loc,
const Array<T> &in, const int dim)
{
dim4 odims = in.dims();
odims[dim] = 1;
+ static const ireduce_dim_func<op, T> ireduce_funcs[] = { ireduce_dim<op, T, 1>()
+ , ireduce_dim<op, T, 2>()
+ , ireduce_dim<op, T, 3>()
+ , ireduce_dim<op, T, 4>()};
- switch (in.ndims()) {
- case 1:
- ireduce_dim<op, T, 1>()(out.get(), out.strides(), out.dims(),
- loc.get(),
- in.get(), in.strides(), in.dims(), dim);
- break;
-
- case 2:
- ireduce_dim<op, T, 2>()(out.get(), out.strides(), out.dims(),
- loc.get(),
- in.get(), in.strides(), in.dims(), dim);
- break;
-
- case 3:
- ireduce_dim<op, T, 3>()(out.get(), out.strides(), out.dims(),
- loc.get(),
- in.get(), in.strides(), in.dims(), dim);
- break;
-
- case 4:
- ireduce_dim<op, T, 4>()(out.get(), out.strides(), out.dims(),
- loc.get(),
- in.get(), in.strides(), in.dims(), dim);
- break;
- }
+ getQueue().enqueue(ireduce_funcs[in.ndims() - 1], out.get(), out.strides(), out.dims(),
+ loc.get(), in.get(), in.strides(), in.dims(), dim);
}
template<af_op_t op, typename T>
T ireduce_all(unsigned *loc, const Array<T> &in)
{
+ evalArray(in);
+ getQueue().sync();
af::dim4 dims = in.dims();
af::dim4 strides = in.strides();
const T *inPtr = in.get();
diff --git a/src/backend/cpu/reduce.cpp b/src/backend/cpu/reduce.cpp
index 5724508..8ce7d0d 100644
--- a/src/backend/cpu/reduce.cpp
+++ b/src/backend/cpu/reduce.cpp
@@ -16,6 +16,9 @@
#include <functional>
#include <complex>
+#include <platform.hpp>
+#include <async_queue.hpp>
+
using af::dim4;
namespace cpu
@@ -74,12 +77,12 @@ namespace cpu
odims[dim] = 1;
Array<To> out = createEmptyArray<To>(odims);
- static reduce_dim_func<op, Ti, To> reduce_funcs[4] = { reduce_dim<op, Ti, To, 1>()
- , reduce_dim<op, Ti, To, 2>()
- , reduce_dim<op, Ti, To, 3>()
- , reduce_dim<op, Ti, To, 4>()};
+ static const reduce_dim_func<op, Ti, To> reduce_funcs[4] = { reduce_dim<op, Ti, To, 1>()
+ , reduce_dim<op, Ti, To, 2>()
+ , reduce_dim<op, Ti, To, 3>()
+ , reduce_dim<op, Ti, To, 4>()};
- reduce_funcs[in.ndims() - 1](out.get(), out.strides(), out.dims(),
+ getQueue().enqueue(reduce_funcs[in.ndims() - 1],out.get(), out.strides(), out.dims(),
in.get(), in.strides(), in.dims(), dim,
change_nan, nanval);
@@ -89,6 +92,8 @@ namespace cpu
template<af_op_t op, typename Ti, typename To>
To reduce_all(const Array<Ti> &in, bool change_nan, double nanval)
{
+ evalArray(in);
+ getQueue().sync();
Transform<Ti, To, op> transform;
Binary<To, op> reduce;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list