[arrayfire] 48/284: converted join cpu func to async call

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:18 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit e5ab6713a4260e3b89109f048f133db71f571174
Author: pradeep <pradeep at arrayfire.com>
Date:   Wed Dec 2 16:18:11 2015 -0500

    converted join cpu func to async call
---
 src/backend/cpu/join.cpp | 373 ++++++++++++++++++++++++-----------------------
 1 file changed, 193 insertions(+), 180 deletions(-)

diff --git a/src/backend/cpu/join.cpp b/src/backend/cpu/join.cpp
index 78d2a51..8af9c24 100644
--- a/src/backend/cpu/join.cpp
+++ b/src/backend/cpu/join.cpp
@@ -11,241 +11,254 @@
 #include <join.hpp>
 #include <stdexcept>
 #include <err_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 namespace cpu
 {
-    template<typename To, typename Tx, int dim>
-    void join_append(To *out, const Tx *X, const af::dim4 &offset,
-               const af::dim4 &odims, const af::dim4 &xdims,
-               const af::dim4 &ost, const af::dim4 &xst)
-    {
-        for(dim_t ow = 0; ow < xdims[3]; ow++) {
-            const dim_t xW = ow * xst[3];
-            const dim_t oW = (ow + offset[3]) * ost[3];
-
-            for(dim_t oz = 0; oz < xdims[2]; oz++) {
-                const dim_t xZW = xW + oz * xst[2];
-                const dim_t oZW = oW + (oz + offset[2]) * ost[2];
-
-                for(dim_t oy = 0; oy < xdims[1]; oy++) {
-                    const dim_t xYZW = xZW + oy * xst[1];
-                    const dim_t oYZW = oZW + (oy + offset[1]) * ost[1];
-
-                    for(dim_t ox = 0; ox < xdims[0]; ox++) {
-                        const dim_t iMem = xYZW + ox;
-                        const dim_t oMem = oYZW + (ox + offset[0]);
-                        out[oMem] = X[iMem];
-                    }
+template<typename To, typename Tx, int dim>
+void join_append(To *out, const Tx *X, const af::dim4 &offset,
+           const af::dim4 &odims, const af::dim4 &xdims,
+           const af::dim4 &ost, const af::dim4 &xst)
+{
+    for(dim_t ow = 0; ow < xdims[3]; ow++) {
+        const dim_t xW = ow * xst[3];
+        const dim_t oW = (ow + offset[3]) * ost[3];
+
+        for(dim_t oz = 0; oz < xdims[2]; oz++) {
+            const dim_t xZW = xW + oz * xst[2];
+            const dim_t oZW = oW + (oz + offset[2]) * ost[2];
+
+            for(dim_t oy = 0; oy < xdims[1]; oy++) {
+                const dim_t xYZW = xZW + oy * xst[1];
+                const dim_t oYZW = oZW + (oy + offset[1]) * ost[1];
+
+                for(dim_t ox = 0; ox < xdims[0]; ox++) {
+                    const dim_t iMem = xYZW + ox;
+                    const dim_t oMem = oYZW + (ox + offset[0]);
+                    out[oMem] = X[iMem];
                 }
             }
         }
     }
+}
 
-    template<int dim>
-    af::dim4 calcOffset(const af::dim4 dims)
-    {
-        af::dim4 offset;
-        offset[0] = (dim == 0) ? dims[0] : 0;
-        offset[1] = (dim == 1) ? dims[1] : 0;
-        offset[2] = (dim == 2) ? dims[2] : 0;
-        offset[3] = (dim == 3) ? dims[3] : 0;
-        return offset;
-    }
+template<int dim>
+af::dim4 calcOffset(const af::dim4 dims)
+{
+    af::dim4 offset;
+    offset[0] = (dim == 0) ? dims[0] : 0;
+    offset[1] = (dim == 1) ? dims[1] : 0;
+    offset[2] = (dim == 2) ? dims[2] : 0;
+    offset[3] = (dim == 3) ? dims[3] : 0;
+    return offset;
+}
 
-    template<typename Tx, typename Ty>
-    Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second)
-    {
-        // All dimensions except join dimension must be equal
-        // Compute output dims
-        af::dim4 odims;
-        af::dim4 fdims = first.dims();
-        af::dim4 sdims = second.dims();
-
-        for(int i = 0; i < 4; i++) {
-            if(i == dim) {
-                odims[i] = fdims[i] + sdims[i];
-            } else {
-                odims[i] = fdims[i];
-            }
+template<typename Tx, typename Ty>
+Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second)
+{
+    first.eval();
+    second.eval();
+
+    // All dimensions except join dimension must be equal
+    // Compute output dims
+    af::dim4 odims;
+    af::dim4 fdims = first.dims();
+    af::dim4 sdims = second.dims();
+
+    for(int i = 0; i < 4; i++) {
+        if(i == dim) {
+            odims[i] = fdims[i] + sdims[i];
+        } else {
+            odims[i] = fdims[i];
         }
+    }
 
-        Array<Tx> out = createEmptyArray<Tx>(odims);
+    Array<Tx> out = createEmptyArray<Tx>(odims);
 
+    auto func = [=] (Array<Tx> out, const Array<Tx> first, const Array<Ty> second) {
         Tx* outPtr = out.get();
         const Tx* fptr = first.get();
         const Ty* sptr = second.get();
 
         af::dim4 zero(0,0,0,0);
+        const af::dim4 odims = out.dims();
+        const af::dim4 fdims = first.dims();
+        const af::dim4 sdims = second.dims();
 
         switch(dim) {
             case 0:
                 join_append<Tx, Tx, 0>(outPtr, fptr, zero,
-                                       odims, fdims, out.strides(), first.strides());
+                        odims, fdims, out.strides(), first.strides());
                 join_append<Tx, Ty, 0>(outPtr, sptr, calcOffset<0>(fdims),
-                                       odims, sdims, out.strides(), second.strides());
+                        odims, sdims, out.strides(), second.strides());
                 break;
             case 1:
                 join_append<Tx, Tx, 1>(outPtr, fptr, zero,
-                                       odims, fdims, out.strides(), first.strides());
+                        odims, fdims, out.strides(), first.strides());
                 join_append<Tx, Ty, 1>(outPtr, sptr, calcOffset<1>(fdims),
-                                       odims, sdims, out.strides(), second.strides());
+                        odims, sdims, out.strides(), second.strides());
                 break;
             case 2:
                 join_append<Tx, Tx, 2>(outPtr, fptr, zero,
-                                       odims, fdims, out.strides(), first.strides());
+                        odims, fdims, out.strides(), first.strides());
                 join_append<Tx, Ty, 2>(outPtr, sptr, calcOffset<2>(fdims),
-                                       odims, sdims, out.strides(), second.strides());
+                        odims, sdims, out.strides(), second.strides());
                 break;
             case 3:
                 join_append<Tx, Tx, 3>(outPtr, fptr, zero,
-                                       odims, fdims, out.strides(), first.strides());
+                        odims, fdims, out.strides(), first.strides());
                 join_append<Tx, Ty, 3>(outPtr, sptr, calcOffset<3>(fdims),
-                                       odims, sdims, out.strides(), second.strides());
+                        odims, sdims, out.strides(), second.strides());
                 break;
         }
+    };
+    getQueue().enqueue(func, out, first, second);
 
-        return out;
-    }
+    return out;
+}
 
-    template<typename T, int n_arrays>
-    void join_wrapper(const int dim, Array<T> &out, const std::vector<Array<T>> &inputs)
-    {
-        af::dim4 zero(0,0,0,0);
-        af::dim4 d = zero;
-        switch(dim) {
-            case 0:
-                join_append<T, T, 0>(out.get(), inputs[0].get(), zero,
-                            out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
-                for(int i = 1; i < n_arrays; i++) {
-                    d += inputs[i - 1].dims();
-                    join_append<T, T, 0>(out.get(), inputs[i].get(), calcOffset<0>(d),
-                            out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
-                }
-                break;
-            case 1:
-                join_append<T, T, 1>(out.get(), inputs[0].get(), zero,
-                            out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
-                for(int i = 1; i < n_arrays; i++) {
-                    d += inputs[i - 1].dims();
-                    join_append<T, T, 1>(out.get(), inputs[i].get(), calcOffset<1>(d),
-                            out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
-                }
-                break;
-            case 2:
-                join_append<T, T, 2>(out.get(), inputs[0].get(), zero,
-                            out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
-                for(int i = 1; i < n_arrays; i++) {
-                    d += inputs[i - 1].dims();
-                    join_append<T, T, 2>(out.get(), inputs[i].get(), calcOffset<2>(d),
-                            out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
-                }
-                break;
-            case 3:
-                join_append<T, T, 3>(out.get(), inputs[0].get(), zero,
-                            out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
-                for(int i = 1; i < n_arrays; i++) {
-                    d += inputs[i - 1].dims();
-                    join_append<T, T, 3>(out.get(), inputs[i].get(), calcOffset<3>(d),
-                            out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
-                }
-                break;
-        }
+template<typename T, int n_arrays>
+void join_wrapper(const int dim, Array<T> out, const std::vector<Array<T>> inputs)
+{
+    af::dim4 zero(0,0,0,0);
+    af::dim4 d = zero;
+    switch(dim) {
+        case 0:
+            join_append<T, T, 0>(out.get(), inputs[0].get(), zero,
+                        out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
+            for(int i = 1; i < n_arrays; i++) {
+                d += inputs[i - 1].dims();
+                join_append<T, T, 0>(out.get(), inputs[i].get(), calcOffset<0>(d),
+                        out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
+            }
+            break;
+        case 1:
+            join_append<T, T, 1>(out.get(), inputs[0].get(), zero,
+                        out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
+            for(int i = 1; i < n_arrays; i++) {
+                d += inputs[i - 1].dims();
+                join_append<T, T, 1>(out.get(), inputs[i].get(), calcOffset<1>(d),
+                        out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
+            }
+            break;
+        case 2:
+            join_append<T, T, 2>(out.get(), inputs[0].get(), zero,
+                        out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
+            for(int i = 1; i < n_arrays; i++) {
+                d += inputs[i - 1].dims();
+                join_append<T, T, 2>(out.get(), inputs[i].get(), calcOffset<2>(d),
+                        out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
+            }
+            break;
+        case 3:
+            join_append<T, T, 3>(out.get(), inputs[0].get(), zero,
+                        out.dims(), inputs[0].dims(), out.strides(), inputs[0].strides());
+            for(int i = 1; i < n_arrays; i++) {
+                d += inputs[i - 1].dims();
+                join_append<T, T, 3>(out.get(), inputs[i].get(), calcOffset<3>(d),
+                        out.dims(), inputs[i].dims(), out.strides(), inputs[i].strides());
+            }
+            break;
     }
+}
 
-    template<typename T>
-    Array<T> join(const int dim, const std::vector<Array<T>> &inputs)
-    {
-        // All dimensions except join dimension must be equal
-        // Compute output dims
-        af::dim4 odims;
-        const dim_t n_arrays = inputs.size();
-        std::vector<af::dim4> idims(n_arrays);
-
-        dim_t dim_size = 0;
-        for(int i = 0; i < (int)idims.size(); i++) {
-            idims[i] = inputs[i].dims();
-            dim_size += idims[i][dim];
-        }
-
-        for(int i = 0; i < 4; i++) {
-            if(i == dim) {
-                odims[i] = dim_size;
-            } else {
-                odims[i] = idims[0][i];
-            }
-        }
+template<typename T>
+Array<T> join(const int dim, const std::vector<Array<T>> &inputs)
+{
+    for (int i=0; i<inputs.size(); ++i)
+        inputs[i].eval();
+    // All dimensions except join dimension must be equal
+    // Compute output dims
+    af::dim4 odims;
+    const dim_t n_arrays = inputs.size();
+    std::vector<af::dim4> idims(n_arrays);
 
-        Array<T> out = createEmptyArray<T>(odims);
+    dim_t dim_size = 0;
+    for(int i = 0; i < (int)idims.size(); i++) {
+        idims[i] = inputs[i].dims();
+        dim_size += idims[i][dim];
+    }
 
-        switch(n_arrays) {
-            case 1:
-                join_wrapper<T, 1>(dim, out, inputs);
-                break;
-            case 2:
-                join_wrapper<T, 2>(dim, out, inputs);
-                break;
-            case 3:
-                join_wrapper<T, 3>(dim, out, inputs);
-                break;
-            case 4:
-                join_wrapper<T, 4>(dim, out, inputs);
-                break;
-            case 5:
-                join_wrapper<T, 5>(dim, out, inputs);
-                break;
-            case 6:
-                join_wrapper<T, 6>(dim, out, inputs);
-                break;
-            case 7:
-                join_wrapper<T, 7>(dim, out, inputs);
-                break;
-            case 8:
-                join_wrapper<T, 8>(dim, out, inputs);
-                break;
-            case 9:
-                join_wrapper<T, 9>(dim, out, inputs);
-                break;
-            case 10:
-                join_wrapper<T,10>(dim, out, inputs);
-                break;
+    for(int i = 0; i < 4; i++) {
+        if(i == dim) {
+            odims[i] = dim_size;
+        } else {
+            odims[i] = idims[0][i];
         }
+    }
 
-        return out;
+    Array<T> out = createEmptyArray<T>(odims);
+
+    switch(n_arrays) {
+        case 1:
+            getQueue().enqueue(join_wrapper<T, 1>, dim, out, inputs);
+            break;
+        case 2:
+            getQueue().enqueue(join_wrapper<T, 2>, dim, out, inputs);
+            break;
+        case 3:
+            getQueue().enqueue(join_wrapper<T, 3>, dim, out, inputs);
+            break;
+        case 4:
+            getQueue().enqueue(join_wrapper<T, 4>, dim, out, inputs);
+            break;
+        case 5:
+            getQueue().enqueue(join_wrapper<T, 5>, dim, out, inputs);
+            break;
+        case 6:
+            getQueue().enqueue(join_wrapper<T, 6>, dim, out, inputs);
+            break;
+        case 7:
+            getQueue().enqueue(join_wrapper<T, 7>, dim, out, inputs);
+            break;
+        case 8:
+            getQueue().enqueue(join_wrapper<T, 8>, dim, out, inputs);
+            break;
+        case 9:
+            getQueue().enqueue(join_wrapper<T, 9>, dim, out, inputs);
+            break;
+        case 10:
+            getQueue().enqueue(join_wrapper<T,10>, dim, out, inputs);
+            break;
     }
 
+    return out;
+}
+
 #define INSTANTIATE(Tx, Ty) \
     template Array<Tx> join<Tx, Ty>(const int dim, const Array<Tx> &first, const Array<Ty> &second);
 
-    INSTANTIATE(float,   float)
-    INSTANTIATE(double,  double)
-    INSTANTIATE(cfloat,  cfloat)
-    INSTANTIATE(cdouble, cdouble)
-    INSTANTIATE(int,     int)
-    INSTANTIATE(uint,    uint)
-    INSTANTIATE(intl,    intl)
-    INSTANTIATE(uintl,   uintl)
-    INSTANTIATE(uchar,   uchar)
-    INSTANTIATE(char,    char)
-    INSTANTIATE(ushort,  ushort)
-    INSTANTIATE(short,   short)
+INSTANTIATE(float,   float)
+INSTANTIATE(double,  double)
+INSTANTIATE(cfloat,  cfloat)
+INSTANTIATE(cdouble, cdouble)
+INSTANTIATE(int,     int)
+INSTANTIATE(uint,    uint)
+INSTANTIATE(intl,    intl)
+INSTANTIATE(uintl,   uintl)
+INSTANTIATE(uchar,   uchar)
+INSTANTIATE(char,    char)
+INSTANTIATE(ushort,  ushort)
+INSTANTIATE(short,   short)
 
 #undef INSTANTIATE
 
 #define INSTANTIATE(T)      \
     template Array<T> join<T>(const int dim, const std::vector<Array<T>> &inputs);
 
-    INSTANTIATE(float)
-    INSTANTIATE(double)
-    INSTANTIATE(cfloat)
-    INSTANTIATE(cdouble)
-    INSTANTIATE(int)
-    INSTANTIATE(uint)
-    INSTANTIATE(intl)
-    INSTANTIATE(uintl)
-    INSTANTIATE(uchar)
-    INSTANTIATE(char)
-    INSTANTIATE(ushort)
-    INSTANTIATE(short)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(ushort)
+INSTANTIATE(short)
 
 #undef INSTANTIATE
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list