[arrayfire] 48/284: converted join cpu func to async call
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:18 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit e5ab6713a4260e3b89109f048f133db71f571174
Author: pradeep <pradeep at arrayfire.com>
Date: Wed Dec 2 16:18:11 2015 -0500
converted join cpu func to async call
---
src/backend/cpu/join.cpp | 373 ++++++++++++++++++++++++-----------------------
1 file changed, 193 insertions(+), 180 deletions(-)
diff --git a/src/backend/cpu/join.cpp b/src/backend/cpu/join.cpp
index 78d2a51..8af9c24 100644
--- a/src/backend/cpu/join.cpp
+++ b/src/backend/cpu/join.cpp
@@ -11,241 +11,254 @@
#include <join.hpp>
#include <stdexcept>
#include <err_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
- template<typename To, typename Tx, int dim>
- void join_append(To *out, const Tx *X, const af::dim4 &offset,
- const af::dim4 &odims, const af::dim4 &xdims,
- const af::dim4 &ost, const af::dim4 &xst)
- {
- for(dim_t ow = 0; ow < xdims[3]; ow++) {
- const dim_t xW = ow * xst[3];
- const dim_t oW = (ow + offset[3]) * ost[3];
-
- for(dim_t oz = 0; oz < xdims[2]; oz++) {
- const dim_t xZW = xW + oz * xst[2];
- const dim_t oZW = oW + (oz + offset[2]) * ost[2];
-
- for(dim_t oy = 0; oy < xdims[1]; oy++) {
- const dim_t xYZW = xZW + oy * xst[1];
- const dim_t oYZW = oZW + (oy + offset[1]) * ost[1];
-
- for(dim_t ox = 0; ox < xdims[0]; ox++) {
- const dim_t iMem = xYZW + ox;
- const dim_t oMem = oYZW + (ox + offset[0]);
- out[oMem] = X[iMem];
- }
+template<typename To, typename Tx, int dim>
+void join_append(To *out, const Tx *X, const af::dim4 &offset,
+ const af::dim4 &odims, const af::dim4 &xdims,
+ const af::dim4 &ost, const af::dim4 &xst)
+{
+ for(dim_t ow = 0; ow < xdims[3]; ow++) {
+ const dim_t xW = ow * xst[3];
+ const dim_t oW = (ow + offset[3]) * ost[3];
+
+ for(dim_t oz = 0; oz < xdims[2]; oz++) {
+ const dim_t xZW = xW + oz * xst[2];
+ const dim_t oZW = oW + (oz + offset[2]) * ost[2];
+
+ for(dim_t oy = 0; oy < xdims[1]; oy++) {
+ const dim_t xYZW = xZW + oy * xst[1];
+ const dim_t oYZW = oZW + (oy + offset[1]) * ost[1];
+
+ for(dim_t ox = 0; ox < xdims[0]; ox++) {
+ const dim_t iMem = xYZW + ox;
+ const dim_t oMem = oYZW + (ox + offset[0]);
+ out[oMem] = X[iMem];
}
}
}
}
+}
- template<int dim>
- af::dim4 calcOffset(const af::dim4 dims)
- {
- af::dim4 offset;
- offset[0] = (dim == 0) ? dims[0] : 0;
- offset[1] = (dim == 1) ? dims[1] : 0;
- offset[2] = (dim == 2) ? dims[2] : 0;
- offset[3] = (dim == 3) ? dims[3] : 0;
- return offset;
- }
+template<int dim>
+af::dim4 calcOffset(const af::dim4 dims)
+{
+ af::dim4 offset;
+ offset[0] = (dim == 0) ? dims[0] : 0;
+ offset[1] = (dim == 1) ? dims[1] : 0;
+ offset[2] = (dim == 2) ? dims[2] : 0;
+ offset[3] = (dim == 3) ? dims[3] : 0;
+ return offset;
+}
- template<typename Tx, typename Ty>
- Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second)
- {
- // All dimensions except join dimension must be equal
- // Compute output dims
- af::dim4 odims;
- af::dim4 fdims = first.dims();
- af::dim4 sdims = second.dims();
-
- for(int i = 0; i < 4; i++) {
- if(i == dim) {
- odims[i] = fdims[i] + sdims[i];
- } else {
- odims[i] = fdims[i];
- }
+template<typename Tx, typename Ty>
+Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second)
+{
+ first.eval();
+ second.eval();
+
+ // All dimensions except join dimension must be equal
+ // Compute output dims
+ af::dim4 odims;
+ af::dim4 fdims = first.dims();
+ af::dim4 sdims = second.dims();
+
+ for(int i = 0; i < 4; i++) {
+ if(i == dim) {
+ odims[i] = fdims[i] + sdims[i];
+ } else {
+ odims[i] = fdims[i];
}
+ }
- Array<Tx> out = createEmptyArray<Tx>(odims);
+ Array<Tx> out = createEmptyArray<Tx>(odims);
+ auto func = [=] (Array<Tx> out, const Array<Tx> first, const Array<Ty> second) {
Tx* outPtr = out.get();
const Tx* fptr = first.get();
const Ty* sptr = second.get();
af::dim4 zero(0,0,0,0);
+ const af::dim4 odims = out.dims();
+ const af::dim4 fdims = first.dims();
+ const af::dim4 sdims = second.dims();
switch(dim) {
case 0:
join_append<Tx, Tx, 0>(outPtr, fptr, zero,
- odims, fdims, out.strides(), first.strides());
+ odims, fdims, out.strides(), first.strides());
join_append<Tx, Ty, 0>(outPtr, sptr, calcOffset<0>(fdims),
- odims, sdims, out.strides(), second.strides());
+ odims, sdims, out.strides(), second.strides());
break;
case 1:
join_append<Tx, Tx, 1>(outPtr, fptr, zero,
- odims, fdims, out.strides(), first.strides());
+ odims, fdims, out.strides(), first.strides());
join_append<Tx, Ty, 1>(outPtr, sptr, calcOffset<1>(fdims),
- odims, sdims, out.strides(), second.strides());
+ odims, sdims, out.strides(), second.strides());
break;
case 2:
join_append<Tx, Tx, 2>(outPtr, fptr, zero,
- odims, fdims, out.strides(), first.strides());
+ odims, fdims, out.strides(), first.strides());
join_append<Tx, Ty, 2>(outPtr, sptr, calcOffset<2>(fdims),
- odims, sdims, out.strides(), second.strides());
+ odims, sdims, out.strides(), second.strides());
break;
case 3:
join_append<Tx, Tx, 3>(outPtr, fptr, zero,
- odims, fdims, out.strides(), first.strides());
+ odims, fdims, out.strides(), first.strides());
join_append<Tx, Ty, 3>(outPtr, sptr, calcOffset<3>(fdims),
- odims, sdims, out.strides(), second.strides());
+ odims, sdims, out.strides(), second.strides());
break;
}
+ };
+ getQueue().enqueue(func, out, first, second);
- return out;
- }
+ return out;
+}
- template<typename T, int n_arrays>
- void join_wrapper(const int dim, Array<T> &out, const std::vector<Array<T>> &inputs)
- {
- af::dim4 zero(0,0,0,0);
- af::dim4 d = zero;
- switch(dim) {
- case 0:
- join_append<T, T, 0>(out.get(), inputs[0].get(), zero,
- out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
- for(int i = 1; i < n_arrays; i++) {
- d += inputs[i - 1].dims();
- join_append<T, T, 0>(out.get(), inputs[i].get(), calcOffset<0>(d),
- out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
- }
- break;
- case 1:
- join_append<T, T, 1>(out.get(), inputs[0].get(), zero,
- out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
- for(int i = 1; i < n_arrays; i++) {
- d += inputs[i - 1].dims();
- join_append<T, T, 1>(out.get(), inputs[i].get(), calcOffset<1>(d),
- out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
- }
- break;
- case 2:
- join_append<T, T, 2>(out.get(), inputs[0].get(), zero,
- out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
- for(int i = 1; i < n_arrays; i++) {
- d += inputs[i - 1].dims();
- join_append<T, T, 2>(out.get(), inputs[i].get(), calcOffset<2>(d),
- out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
- }
- break;
- case 3:
- join_append<T, T, 3>(out.get(), inputs[0].get(), zero,
- out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
- for(int i = 1; i < n_arrays; i++) {
- d += inputs[i - 1].dims();
- join_append<T, T, 3>(out.get(), inputs[i].get(), calcOffset<3>(d),
- out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
- }
- break;
- }
+template<typename T, int n_arrays>
+void join_wrapper(const int dim, Array<T> out, const std::vector<Array<T>> inputs)
+{
+ af::dim4 zero(0,0,0,0);
+ af::dim4 d = zero;
+ switch(dim) {
+ case 0:
+ join_append<T, T, 0>(out.get(), inputs[0].get(), zero,
+ out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
+ for(int i = 1; i < n_arrays; i++) {
+ d += inputs[i - 1].dims();
+ join_append<T, T, 0>(out.get(), inputs[i].get(), calcOffset<0>(d),
+ out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
+ }
+ break;
+ case 1:
+ join_append<T, T, 1>(out.get(), inputs[0].get(), zero,
+ out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
+ for(int i = 1; i < n_arrays; i++) {
+ d += inputs[i - 1].dims();
+ join_append<T, T, 1>(out.get(), inputs[i].get(), calcOffset<1>(d),
+ out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
+ }
+ break;
+ case 2:
+ join_append<T, T, 2>(out.get(), inputs[0].get(), zero,
+ out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
+ for(int i = 1; i < n_arrays; i++) {
+ d += inputs[i - 1].dims();
+ join_append<T, T, 2>(out.get(), inputs[i].get(), calcOffset<2>(d),
+ out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
+ }
+ break;
+ case 3:
+ join_append<T, T, 3>(out.get(), inputs[0].get(), zero,
+ out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
+ for(int i = 1; i < n_arrays; i++) {
+ d += inputs[i - 1].dims();
+ join_append<T, T, 3>(out.get(), inputs[i].get(), calcOffset<3>(d),
+ out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
+ }
+ break;
}
+}
- template<typename T>
- Array<T> join(const int dim, const std::vector<Array<T>> &inputs)
- {
- // All dimensions except join dimension must be equal
- // Compute output dims
- af::dim4 odims;
- const dim_t n_arrays = inputs.size();
- std::vector<af::dim4> idims(n_arrays);
-
- dim_t dim_size = 0;
- for(int i = 0; i < (int)idims.size(); i++) {
- idims[i] = inputs[i].dims();
- dim_size += idims[i][dim];
- }
-
- for(int i = 0; i < 4; i++) {
- if(i == dim) {
- odims[i] = dim_size;
- } else {
- odims[i] = idims[0][i];
- }
- }
+template<typename T>
+Array<T> join(const int dim, const std::vector<Array<T>> &inputs)
+{
+ for (int i=0; i<inputs.size(); ++i)
+ inputs[i].eval();
+ // All dimensions except join dimension must be equal
+ // Compute output dims
+ af::dim4 odims;
+ const dim_t n_arrays = inputs.size();
+ std::vector<af::dim4> idims(n_arrays);
- Array<T> out = createEmptyArray<T>(odims);
+ dim_t dim_size = 0;
+ for(int i = 0; i < (int)idims.size(); i++) {
+ idims[i] = inputs[i].dims();
+ dim_size += idims[i][dim];
+ }
- switch(n_arrays) {
- case 1:
- join_wrapper<T, 1>(dim, out, inputs);
- break;
- case 2:
- join_wrapper<T, 2>(dim, out, inputs);
- break;
- case 3:
- join_wrapper<T, 3>(dim, out, inputs);
- break;
- case 4:
- join_wrapper<T, 4>(dim, out, inputs);
- break;
- case 5:
- join_wrapper<T, 5>(dim, out, inputs);
- break;
- case 6:
- join_wrapper<T, 6>(dim, out, inputs);
- break;
- case 7:
- join_wrapper<T, 7>(dim, out, inputs);
- break;
- case 8:
- join_wrapper<T, 8>(dim, out, inputs);
- break;
- case 9:
- join_wrapper<T, 9>(dim, out, inputs);
- break;
- case 10:
- join_wrapper<T,10>(dim, out, inputs);
- break;
+ for(int i = 0; i < 4; i++) {
+ if(i == dim) {
+ odims[i] = dim_size;
+ } else {
+ odims[i] = idims[0][i];
}
+ }
- return out;
+ Array<T> out = createEmptyArray<T>(odims);
+
+ switch(n_arrays) {
+ case 1:
+ getQueue().enqueue(join_wrapper<T, 1>, dim, out, inputs);
+ break;
+ case 2:
+ getQueue().enqueue(join_wrapper<T, 2>, dim, out, inputs);
+ break;
+ case 3:
+ getQueue().enqueue(join_wrapper<T, 3>, dim, out, inputs);
+ break;
+ case 4:
+ getQueue().enqueue(join_wrapper<T, 4>, dim, out, inputs);
+ break;
+ case 5:
+ getQueue().enqueue(join_wrapper<T, 5>, dim, out, inputs);
+ break;
+ case 6:
+ getQueue().enqueue(join_wrapper<T, 6>, dim, out, inputs);
+ break;
+ case 7:
+ getQueue().enqueue(join_wrapper<T, 7>, dim, out, inputs);
+ break;
+ case 8:
+ getQueue().enqueue(join_wrapper<T, 8>, dim, out, inputs);
+ break;
+ case 9:
+ getQueue().enqueue(join_wrapper<T, 9>, dim, out, inputs);
+ break;
+ case 10:
+ getQueue().enqueue(join_wrapper<T,10>, dim, out, inputs);
+ break;
}
+ return out;
+}
+
#define INSTANTIATE(Tx, Ty) \
template Array<Tx> join<Tx, Ty>(const int dim, const Array<Tx> &first, const Array<Ty> &second);
- INSTANTIATE(float, float)
- INSTANTIATE(double, double)
- INSTANTIATE(cfloat, cfloat)
- INSTANTIATE(cdouble, cdouble)
- INSTANTIATE(int, int)
- INSTANTIATE(uint, uint)
- INSTANTIATE(intl, intl)
- INSTANTIATE(uintl, uintl)
- INSTANTIATE(uchar, uchar)
- INSTANTIATE(char, char)
- INSTANTIATE(ushort, ushort)
- INSTANTIATE(short, short)
+INSTANTIATE(float, float)
+INSTANTIATE(double, double)
+INSTANTIATE(cfloat, cfloat)
+INSTANTIATE(cdouble, cdouble)
+INSTANTIATE(int, int)
+INSTANTIATE(uint, uint)
+INSTANTIATE(intl, intl)
+INSTANTIATE(uintl, uintl)
+INSTANTIATE(uchar, uchar)
+INSTANTIATE(char, char)
+INSTANTIATE(ushort, ushort)
+INSTANTIATE(short, short)
#undef INSTANTIATE
#define INSTANTIATE(T) \
template Array<T> join<T>(const int dim, const std::vector<Array<T>> &inputs);
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(cfloat)
- INSTANTIATE(cdouble)
- INSTANTIATE(int)
- INSTANTIATE(uint)
- INSTANTIATE(intl)
- INSTANTIATE(uintl)
- INSTANTIATE(uchar)
- INSTANTIATE(char)
- INSTANTIATE(ushort)
- INSTANTIATE(short)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(ushort)
+INSTANTIATE(short)
#undef INSTANTIATE
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list