[arrayfire] 43/284: Converted cpu scan function to async call
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:17 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 9510fcb1e2554078070c28ea88c2e5078353f72d
Author: pradeep <pradeep at arrayfire.com>
Date: Wed Dec 2 10:44:06 2015 -0500
Converted cpu scan function to async call
Added `.eval()` calls on input Array objects inside the following
functions to ensure that the inputs are computed by the time `.get()`
is called on these objects to get the data values.
* reduce
* setUnique
* setIntersection
* setUnion
---
src/backend/cpu/reduce.cpp | 1 +
src/backend/cpu/scan.cpp | 154 ++++++++++++++++++++++++---------------------
src/backend/cpu/set.cpp | 13 ++++
3 files changed, 96 insertions(+), 72 deletions(-)
diff --git a/src/backend/cpu/reduce.cpp b/src/backend/cpu/reduce.cpp
index ffe9185..e01f0c5 100644
--- a/src/backend/cpu/reduce.cpp
+++ b/src/backend/cpu/reduce.cpp
@@ -89,6 +89,7 @@ namespace cpu
{
dim4 odims = in.dims();
odims[dim] = 1;
+ in.eval();
Array<To> out = createEmptyArray<To>(odims);
static const reduce_dim_func<op, Ti, To> reduce_funcs[4] = { reduce_dim<op, Ti, To, 1>()
diff --git a/src/backend/cpu/scan.cpp b/src/backend/cpu/scan.cpp
index 2bdda21..39157ca 100644
--- a/src/backend/cpu/scan.cpp
+++ b/src/backend/cpu/scan.cpp
@@ -14,102 +14,112 @@
#include <Array.hpp>
#include <scan.hpp>
#include <ops.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
namespace cpu
{
- template<af_op_t op, typename Ti, typename To, int D>
- struct scan_dim
- {
- void operator()(To *out, const dim4 ostrides, const dim4 odims,
- const Ti *in , const dim4 istrides, const dim4 idims,
- const int dim)
- {
- const int D1 = D - 1;
- for (dim_t i = 0; i < odims[D1]; i++) {
- scan_dim<op, Ti, To, D1>()(out + i * ostrides[D1],
- ostrides, odims,
- in + i * istrides[D1],
- istrides, idims,
- dim);
- if (D1 == dim) break;
- }
- }
- };
- template<af_op_t op, typename Ti, typename To>
- struct scan_dim<op, Ti, To, 0>
+template<af_op_t op, typename Ti, typename To, int D>
+struct scan_dim
+{
+ void operator()(Array<To> out, dim_t outOffset,
+ const Array<Ti> in, dim_t inOffset,
+ const int dim) const
{
- void operator()(To *out, const dim4 ostrides, const dim4 odims,
- const Ti *in , const dim4 istrides, const dim4 idims,
- const int dim)
- {
-
- dim_t istride = istrides[dim];
- dim_t ostride = ostrides[dim];
-
- Transform<Ti, To, op> transform;
- // FIXME: Change the name to something better
- Binary<To, op> scan;
-
- To out_val = scan.init();
- for (dim_t i = 0; i < idims[dim]; i++) {
- To in_val = transform(in[i * istride]);
- out_val = scan(in_val, out_val);
- out[i * ostride] = out_val;
- }
+ const dim4 odims = out.dims();
+ const dim4 ostrides = out.strides();
+ const dim4 istrides = in.strides();
+
+ const int D1 = D - 1;
+ for (dim_t i = 0; i < odims[D1]; i++) {
+ scan_dim<op, Ti, To, D1> func;
+ getQueue().enqueue(func,
+ out, outOffset + i * ostrides[D1],
+ in, inOffset + i * istrides[D1], dim);
+ if (D1 == dim) break;
}
- };
+ }
+};
- template<af_op_t op, typename Ti, typename To>
- Array<To> scan(const Array<Ti>& in, const int dim)
+template<af_op_t op, typename Ti, typename To>
+struct scan_dim<op, Ti, To, 0>
+{
+ void operator()(Array<To> output, dim_t outOffset,
+ const Array<Ti> input, dim_t inOffset,
+ const int dim) const
{
- dim4 dims = in.dims();
+ const Ti* in = input.get() + inOffset;
+ To* out= output.get()+ outOffset;
- Array<To> out = createValueArray<To>(dims, 0);
+ const dim4 ostrides = output.strides();
+ const dim4 istrides = input.strides();
+ const dim4 idims = input.dims();
+
+ dim_t istride = istrides[dim];
+ dim_t ostride = ostrides[dim];
+
+ Transform<Ti, To, op> transform;
+ // FIXME: Change the name to something better
+ Binary<To, op> scan;
+
+ To out_val = scan.init();
+ for (dim_t i = 0; i < idims[dim]; i++) {
+ To in_val = transform(in[i * istride]);
+ out_val = scan(in_val, out_val);
+ out[i * ostride] = out_val;
+ }
+ }
+};
- switch (in.ndims()) {
+template<af_op_t op, typename Ti, typename To>
+Array<To> scan(const Array<Ti>& in, const int dim)
+{
+ dim4 dims = in.dims();
+ Array<To> out = createValueArray<To>(dims, 0);
+ out.eval();
+ in.eval();
+
+ switch (in.ndims()) {
case 1:
- scan_dim<op, Ti, To, 1>()(out.get(), out.strides(), out.dims(),
- in.get(), in.strides(), in.dims(), dim);
+ scan_dim<op, Ti, To, 1> func1;
+ getQueue().enqueue(func1, out, 0, in, 0, dim);
break;
-
case 2:
- scan_dim<op, Ti, To, 2>()(out.get(), out.strides(), out.dims(),
- in.get(), in.strides(), in.dims(), dim);
+ scan_dim<op, Ti, To, 2> func2;
+ getQueue().enqueue(func2, out, 0, in, 0, dim);
break;
-
case 3:
- scan_dim<op, Ti, To, 3>()(out.get(), out.strides(), out.dims(),
- in.get(), in.strides(), in.dims(), dim);
+ scan_dim<op, Ti, To, 3> func3;
+ getQueue().enqueue(func3, out, 0, in, 0, dim);
break;
-
case 4:
- scan_dim<op, Ti, To, 4>()(out.get(), out.strides(), out.dims(),
- in.get(), in.strides(), in.dims(), dim);
+ scan_dim<op, Ti, To, 4> func4;
+ getQueue().enqueue(func4, out, 0, in, 0, dim);
break;
- }
-
- return out;
}
+ return out;
+}
+
#define INSTANTIATE(ROp, Ti, To) \
template Array<To> scan<ROp, Ti, To>(const Array<Ti> &in, const int dim); \
- //accum
- INSTANTIATE(af_add_t, float , float )
- INSTANTIATE(af_add_t, double , double )
- INSTANTIATE(af_add_t, cfloat , cfloat )
- INSTANTIATE(af_add_t, cdouble, cdouble)
- INSTANTIATE(af_add_t, int , int )
- INSTANTIATE(af_add_t, uint , uint )
- INSTANTIATE(af_add_t, intl , intl )
- INSTANTIATE(af_add_t, uintl , uintl )
- INSTANTIATE(af_add_t, char , int )
- INSTANTIATE(af_add_t, uchar , uint )
- INSTANTIATE(af_add_t, short , int )
- INSTANTIATE(af_add_t, ushort , uint )
- INSTANTIATE(af_notzero_t, char , uint )
+//accum
+INSTANTIATE(af_add_t, float , float )
+INSTANTIATE(af_add_t, double , double )
+INSTANTIATE(af_add_t, cfloat , cfloat )
+INSTANTIATE(af_add_t, cdouble, cdouble)
+INSTANTIATE(af_add_t, int , int )
+INSTANTIATE(af_add_t, uint , uint )
+INSTANTIATE(af_add_t, intl , intl )
+INSTANTIATE(af_add_t, uintl , uintl )
+INSTANTIATE(af_add_t, char , int )
+INSTANTIATE(af_add_t, uchar , uint )
+INSTANTIATE(af_add_t, short , int )
+INSTANTIATE(af_add_t, ushort , uint )
+INSTANTIATE(af_notzero_t, char , uint)
}
diff --git a/src/backend/cpu/set.cpp b/src/backend/cpu/set.cpp
index 3215e6d..d9ca084 100644
--- a/src/backend/cpu/set.cpp
+++ b/src/backend/cpu/set.cpp
@@ -18,6 +18,8 @@
#include <sort.hpp>
#include <err_cpu.hpp>
#include <vector>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
@@ -28,6 +30,9 @@ namespace cpu
Array<T> setUnique(const Array<T> &in,
const bool is_sorted)
{
+ in.eval();
+ getQueue().sync();
+
Array<T> out = createEmptyArray<T>(af::dim4());
if (is_sorted) out = copyArray<T>(in);
else out = sort<T, 1>(in, 0);
@@ -46,6 +51,10 @@ namespace cpu
const Array<T> &second,
const bool is_unique)
{
+ first.eval();
+ second.eval();
+ getQueue().sync();
+
Array<T> uFirst = first;
Array<T> uSecond = second;
@@ -78,6 +87,10 @@ namespace cpu
const Array<T> &second,
const bool is_unique)
{
+ first.eval();
+ second.eval();
+ getQueue().sync();
+
Array<T> uFirst = first;
Array<T> uSecond = second;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list