[arrayfire] 37/284: convert select & rotate cpu fns to async calls
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:16 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 3c2bc65b12fe04c455837790cbba27b2417bc9a1
Author: pradeep <pradeep at arrayfire.com>
Date: Tue Nov 24 14:37:14 2015 -0500
convert select & rotate cpu fns to async calls
---
src/backend/cpu/rotate.cpp | 23 +++++++++--------
src/backend/cpu/select.cpp | 64 +++++++++++++++++++++++++++++-----------------
2 files changed, 53 insertions(+), 34 deletions(-)
diff --git a/src/backend/cpu/rotate.cpp b/src/backend/cpu/rotate.cpp
index a4af64b..01ec962 100644
--- a/src/backend/cpu/rotate.cpp
+++ b/src/backend/cpu/rotate.cpp
@@ -12,15 +12,22 @@
#include <math.hpp>
#include <stdexcept>
#include <err_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
#include "transform_interp.hpp"
namespace cpu
{
template<typename T, af_interp_type method>
- void rotate_(T *out, const T *in, const float theta,
- const af::dim4 &odims, const af::dim4 &idims,
- const af::dim4 &ostrides, const af::dim4 &istrides)
+ void rotate_(Array<T> output, const Array<T> input, const float theta)
{
+ const af::dim4 odims = output.dims();
+ const af::dim4 idims = input.dims();
+ const af::dim4 ostrides = output.strides();
+ const af::dim4 istrides = input.strides();
+
+ const T* in = input.get();
+ T* out = output.get();
dim_t nimages = idims[2];
void (*t_fn)(T *, const T *, const float *, const af::dim4 &,
@@ -77,20 +84,16 @@ namespace cpu
const af_interp_type method)
{
Array<T> out = createEmptyArray<T>(odims);
- const af::dim4 idims = in.dims();
switch(method) {
case AF_INTERP_NEAREST:
- rotate_<T, AF_INTERP_NEAREST>
- (out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
+ getQueue().enqueue(rotate_<T, AF_INTERP_NEAREST>, out, in, theta);
break;
case AF_INTERP_BILINEAR:
- rotate_<T, AF_INTERP_BILINEAR>
- (out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
+ getQueue().enqueue(rotate_<T, AF_INTERP_BILINEAR>, out, in, theta);
break;
case AF_INTERP_LOWER:
- rotate_<T, AF_INTERP_LOWER>
- (out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
+ getQueue().enqueue(rotate_<T, AF_INTERP_LOWER>, out, in, theta);
break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
diff --git a/src/backend/cpu/select.cpp b/src/backend/cpu/select.cpp
index 7b2cc81..4a219ed 100644
--- a/src/backend/cpu/select.cpp
+++ b/src/backend/cpu/select.cpp
@@ -10,14 +10,22 @@
#include <Array.hpp>
#include <select.hpp>
#include <err_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
namespace cpu
{
- template<typename T>
- void select(Array<T> &out, const Array<char> &cond, const Array<T> &a, const Array<T> &b)
- {
+
+template<typename T>
+void select(Array<T> &out, const Array<char> &cond, const Array<T> &a, const Array<T> &b)
+{
+ out.eval();
+ cond.eval();
+ a.eval();
+ b.eval();
+ auto func = [=] (Array<T> out, const Array<char> cond, const Array<T> a, const Array<T> b) {
dim4 adims = a.dims();
dim4 astrides = a.strides();
dim4 bdims = b.dims();
@@ -30,13 +38,13 @@ namespace cpu
dim4 ostrides = out.strides();
bool is_a_same[] = {adims[0] == odims[0], adims[1] == odims[1],
- adims[2] == odims[2], adims[3] == odims[3]};
+ adims[2] == odims[2], adims[3] == odims[3]};
bool is_b_same[] = {bdims[0] == odims[0], bdims[1] == odims[1],
- bdims[2] == odims[2], bdims[3] == odims[3]};
+ bdims[2] == odims[2], bdims[3] == odims[3]};
bool is_c_same[] = {cdims[0] == odims[0], cdims[1] == odims[1],
- cdims[2] == odims[2], cdims[3] == odims[3]};
+ cdims[2] == odims[2], cdims[3] == odims[3]};
const T *aptr = a.get();
const T *bptr = b.get();
@@ -75,11 +83,17 @@ namespace cpu
}
}
}
- }
+ };
+ getQueue().enqueue(func, out, cond, a, b);
+}
- template<typename T, bool flip>
- void select_scalar(Array<T> &out, const Array<char> &cond, const Array<T> &a, const double &b)
- {
+template<typename T, bool flip>
+void select_scalar(Array<T> &out, const Array<char> &cond, const Array<T> &a, const double &b)
+{
+ out.eval();
+ cond.eval();
+ a.eval();
+ auto func = [=] (Array<T> out, const Array<char> cond, const Array<T> a, const double b) {
dim4 astrides = a.strides();
dim4 cstrides = cond.strides();
@@ -115,8 +129,9 @@ namespace cpu
}
}
}
- }
-
+ };
+ getQueue().enqueue(func, out, cond, a, b);
+}
#define INSTANTIATE(T) \
template void select<T>(Array<T> &out, const Array<char> &cond, \
@@ -130,16 +145,17 @@ namespace cpu
const Array<T> &a, \
const double &b); \
- INSTANTIATE(float )
- INSTANTIATE(double )
- INSTANTIATE(cfloat )
- INSTANTIATE(cdouble)
- INSTANTIATE(int )
- INSTANTIATE(uint )
- INSTANTIATE(intl )
- INSTANTIATE(uintl )
- INSTANTIATE(char )
- INSTANTIATE(uchar )
- INSTANTIATE(short )
- INSTANTIATE(ushort )
+INSTANTIATE(float )
+INSTANTIATE(double )
+INSTANTIATE(cfloat )
+INSTANTIATE(cdouble)
+INSTANTIATE(int )
+INSTANTIATE(uint )
+INSTANTIATE(intl )
+INSTANTIATE(uintl )
+INSTANTIATE(char )
+INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort )
+
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list