[arrayfire] 09/284: Async CPU Copy, Assign, and Index
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:13 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 96c5602965334c5f36f33dc69ad81314fd6e6bd7
Author: Umar Arshad <umar at arrayfire.com>
Date: Thu Aug 13 17:25:31 2015 -0400
Async CPU Copy, Assign, and Index
---
src/backend/cpu/assign.cpp | 9 ++++++--
src/backend/cpu/copy.cpp | 6 ++---
src/backend/cpu/index.cpp | 56 ++++++++++++++++++++++++++--------------------
3 files changed, 42 insertions(+), 29 deletions(-)
diff --git a/src/backend/cpu/assign.cpp b/src/backend/cpu/assign.cpp
index c0a177f..589fa53 100644
--- a/src/backend/cpu/assign.cpp
+++ b/src/backend/cpu/assign.cpp
@@ -16,9 +16,12 @@
#include <err_cpu.hpp>
#include <platform.hpp>
#include <async_queue.hpp>
+#include <array>
using af::dim4;
using std::ref;
+using std::copy;
+using std::array;
namespace cpu
{
@@ -37,7 +40,7 @@ dim_t trimIndex(int idx, const dim_t &len)
}
template<typename T>
-void assign_(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs)
+void assign_(Array<T> out, const array<af_index_t, 4> idxrs, const Array<T> rhs)
{
bool isSeq[4];
std::vector<af_seq> seqs(4, af_span);
@@ -117,7 +120,9 @@ void assign_(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs)
template<typename T>
void assign(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs)
{
- getQueue().enqueue(assign_<T>, ref(out), idxrs, ref(rhs));
+ array<af_index_t, 4> idx;
+ copy(idxrs, idxrs+4, begin(idx));
+ getQueue().enqueue(assign_<T>, out, move(idx), rhs);
}
#define INSTANTIATE(T) \
diff --git a/src/backend/cpu/copy.cpp b/src/backend/cpu/copy.cpp
index 35c1ebe..433e718 100644
--- a/src/backend/cpu/copy.cpp
+++ b/src/backend/cpu/copy.cpp
@@ -117,7 +117,7 @@ namespace cpu
template<typename T>
void multiply_inplace(Array<T> &in, double val)
{
- copy<T, T>(in, in, 0, val);
+ getQueue().enqueue(copy<T, T>,in, in, 0, val);
}
template<typename inType, typename outType>
@@ -126,14 +126,14 @@ namespace cpu
outType default_value, double factor)
{
Array<outType> ret = createValueArray<outType>(dims, default_value);
- copy<inType, outType>(ret, in, outType(default_value), factor);
+ getQueue().enqueue(copy<inType, outType>,ret, in, outType(default_value), factor);
return ret;
}
template<typename inType, typename outType>
void copyArray(Array<outType> &out, Array<inType> const &in)
{
- copy<inType, outType>(out, in, scalar<outType>(0), 1.0);
+ getQueue().enqueue(copy<inType, outType>,out, in, scalar<outType>(0), 1.0);
}
diff --git a/src/backend/cpu/index.cpp b/src/backend/cpu/index.cpp
index 162e67f..c6112fa 100644
--- a/src/backend/cpu/index.cpp
+++ b/src/backend/cpu/index.cpp
@@ -15,6 +15,8 @@
#include <handle.hpp>
#include <err_cpu.hpp>
#include <vector>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -68,43 +70,49 @@ Array<T> index(const Array<T>& in, const af_index_t idxrs[])
Array<T> out = createEmptyArray<T>(oDims);
dim4 oStrides= out.strides();
- const T *src = in.get();
- T *dst = out.get();
- const uint* ptr0 = idxArrs[0].get();
- const uint* ptr1 = idxArrs[1].get();
- const uint* ptr2 = idxArrs[2].get();
- const uint* ptr3 = idxArrs[3].get();
+ auto func = [=] (Array<T> out, const Array<T> in) {
- for (dim_t l=0; l<oDims[3]; ++l) {
+ const T *src = in.get();
+ T *dst = out.get();
- dim_t lOff = l*oStrides[3];
- dim_t inIdx3 = trimIndex(isSeq[3] ? l+iOffs[3] : ptr3[l], iDims[3]);
- dim_t inOff3 = inIdx3*iStrds[3];
+ const uint* ptr0 = idxArrs[0].get();
+ const uint* ptr1 = idxArrs[1].get();
+ const uint* ptr2 = idxArrs[2].get();
+ const uint* ptr3 = idxArrs[3].get();
- for (dim_t k=0; k<oDims[2]; ++k) {
+ for (dim_t l=0; l<oDims[3]; ++l) {
- dim_t kOff = k*oStrides[2];
- dim_t inIdx2 = trimIndex(isSeq[2] ? k+iOffs[2] : ptr2[k], iDims[2]);
- dim_t inOff2 = inIdx2*iStrds[2];
+ dim_t lOff = l*oStrides[3];
+ dim_t inIdx3 = trimIndex(isSeq[3] ? l+iOffs[3] : ptr3[l], iDims[3]);
+ dim_t inOff3 = inIdx3*iStrds[3];
- for (dim_t j=0; j<oDims[1]; ++j) {
+ for (dim_t k=0; k<oDims[2]; ++k) {
- dim_t jOff = j*oStrides[1];
- dim_t inIdx1 = trimIndex(isSeq[1] ? j+iOffs[1] : ptr1[j], iDims[1]);
- dim_t inOff1 = inIdx1*iStrds[1];
+ dim_t kOff = k*oStrides[2];
+ dim_t inIdx2 = trimIndex(isSeq[2] ? k+iOffs[2] : ptr2[k], iDims[2]);
+ dim_t inOff2 = inIdx2*iStrds[2];
- for (dim_t i=0; i<oDims[0]; ++i) {
+ for (dim_t j=0; j<oDims[1]; ++j) {
- dim_t iOff = i*oStrides[0];
- dim_t inIdx0 = trimIndex(isSeq[0] ? i+iOffs[0] : ptr0[i], iDims[0]);
- dim_t inOff0 = inIdx0*iStrds[0];
+ dim_t jOff = j*oStrides[1];
+ dim_t inIdx1 = trimIndex(isSeq[1] ? j+iOffs[1] : ptr1[j], iDims[1]);
+ dim_t inOff1 = inIdx1*iStrds[1];
- dst[lOff+kOff+jOff+iOff] = src[inOff3+inOff2+inOff1+inOff0];
+ for (dim_t i=0; i<oDims[0]; ++i) {
+
+ dim_t iOff = i*oStrides[0];
+ dim_t inIdx0 = trimIndex(isSeq[0] ? i+iOffs[0] : ptr0[i], iDims[0]);
+ dim_t inOff0 = inIdx0*iStrds[0];
+
+ dst[lOff+kOff+jOff+iOff] = src[inOff3+inOff2+inOff1+inOff0];
+ }
}
}
}
- }
+ };
+
+ getQueue().enqueue(func, out, in);
return out;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list