[arrayfire] 36/284: Converted resize & shift cpu fns to async calls

Sun Feb 7 18:59:16 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit 14e9d3180ecc4dde44bf66dba743524408030ba7
Author: pradeep <pradeep at arrayfire.com>
Date:   Tue Nov 24 13:29:57 2015 -0500

    Converted resize & shift cpu fns to async calls
---
 src/backend/cpu/resize.cpp | 348 ++++++++++++++++++++++-----------------------
 src/backend/cpu/shift.cpp  |  60 ++++----
 2 files changed, 205 insertions(+), 203 deletions(-)

diff --git a/src/backend/cpu/resize.cpp b/src/backend/cpu/resize.cpp
index 8c4da58..160ed46 100644
--- a/src/backend/cpu/resize.cpp
+++ b/src/backend/cpu/resize.cpp
@@ -14,209 +14,205 @@
 #include <math.hpp>
 #include <types.hpp>
 #include <af/traits.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 namespace cpu
 {
-    /**
-     * noop function for round to avoid compilation
-     * issues due to lack of this function in C90 based
-     * compilers, it is only present in C99 and C++11
-     *
-     * This is not a full fledged implementation, this function
-     * is to be used only for positive numbers, i m using it here
-     * for calculating dimensions of arrays
-     */
-    dim_t round2int(float value)
-    {
-        return (dim_t)(value+0.5f);
-    }
-
-    using std::conditional;
-    using std::is_same;
+/**
+ * noop function for round to avoid compilation
+ * issues due to lack of this function in C90 based
+ * compilers, it is only present in C99 and C++11
+ *
+ * This is not a full fledged implementation, this function
+ * is to be used only for positive numbers, i m using it here
+ * for calculating dimensions of arrays
+ */
+dim_t round2int(float value)
+{
+    return (dim_t)(value+0.5f);
+}
 
-    template<typename T>
-    using wtype_t = typename conditional<is_same<T, double>::value, double, float>::type;
+using std::conditional;
+using std::is_same;
 
-    template<typename T>
-    using vtype_t = typename conditional<is_complex<T>::value,
-                                         T, wtype_t<T>
-                                        >::type;
+template<typename T>
+using wtype_t = typename conditional<is_same<T, double>::value, double, float>::type;
 
-    template<typename T, af_interp_type method>
-    struct resize_op
-    {
-        void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
-                  const af::dim4 &ostrides, const af::dim4 &istrides,
-                  const dim_t x, const dim_t y)
-        {
-            return;
-        }
-    };
+template<typename T>
+using vtype_t = typename conditional<is_complex<T>::value,
+                                     T, wtype_t<T>
+                                    >::type;
 
-    template<typename T>
-    struct resize_op<T, AF_INTERP_NEAREST>
+template<typename T, af_interp_type method>
+struct resize_op
+{
+    void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
+              const af::dim4 &ostrides, const af::dim4 &istrides,
+              const dim_t x, const dim_t y)
     {
-        void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
-                const af::dim4 &ostrides, const af::dim4 &istrides,
-                const dim_t x, const dim_t y)
-        {
-            // Compute Indices
-            dim_t i_x = round2int((float)x / (odims[0] / (float)idims[0]));
-            dim_t i_y = round2int((float)y / (odims[1] / (float)idims[1]));
-
-            if (i_x >= idims[0]) i_x = idims[0] - 1;
-            if (i_y >= idims[1]) i_y = idims[1] - 1;
-
-            dim_t i_off = i_y * istrides[1] + i_x;
-            dim_t o_off =   y * ostrides[1] + x;
-            // Copy values from all channels
-            for(dim_t w = 0; w < odims[3]; w++) {
-                dim_t wost = w * ostrides[3];
-                dim_t wist = w * istrides[3];
-                for(dim_t z = 0; z < odims[2]; z++) {
-                    outPtr[o_off + z * ostrides[2] + wost] = inPtr[i_off + z * istrides[2] + wist];
-                }
-            }
-        }
-    };
+        return;
+    }
+};
 
-    template<typename T>
-    struct resize_op<T, AF_INTERP_BILINEAR>
+template<typename T>
+struct resize_op<T, AF_INTERP_NEAREST>
+{
+    void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
+            const af::dim4 &ostrides, const af::dim4 &istrides,
+            const dim_t x, const dim_t y)
     {
-        void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
-                const af::dim4 &ostrides, const af::dim4 &istrides,
-                const dim_t x, const dim_t y)
-        {
-            // Compute Indices
-            float f_x = (float)x / (odims[0] / (float)idims[0]);
-            float f_y = (float)y / (odims[1] / (float)idims[1]);
-
-            dim_t i1_x  = floor(f_x);
-            dim_t i1_y  = floor(f_y);
-
-            if (i1_x >= idims[0]) i1_x = idims[0] - 1;
-            if (i1_y >= idims[1]) i1_y = idims[1] - 1;
-
-            float b   = f_x - i1_x;
-            float a   = f_y - i1_y;
-
-            dim_t i2_x  = (i1_x + 1 >= idims[0] ? idims[0] - 1 : i1_x + 1);
-            dim_t i2_y  = (i1_y + 1 >= idims[1] ? idims[1] - 1 : i1_y + 1);
-
-            typedef typename dtype_traits<T>::base_type BT;
-            typedef wtype_t<BT> WT;
-            typedef vtype_t<T> VT;
-
-            dim_t o_off = y * ostrides[1] + x;
-            // Copy values from all channels
-            for(dim_t w = 0; w < odims[3]; w++) {
-                dim_t wst = w * istrides[3];
-                for(dim_t z = 0; z < odims[2]; z++) {
-                    dim_t zst = z * istrides[2];
-                    dim_t channel_off = zst + wst;
-                    VT p1 = inPtr[i1_y * istrides[1] + i1_x + channel_off];
-                    VT p2 = inPtr[i2_y * istrides[1] + i1_x + channel_off];
-                    VT p3 = inPtr[i1_y * istrides[1] + i2_x + channel_off];
-                    VT p4 = inPtr[i2_y * istrides[1] + i2_x + channel_off];
-
-                    outPtr[o_off + z * ostrides[2] + w * ostrides[3]] =
-                                    scalar<WT>((1.0f - a) * (1.0f - b)) * p1 +
-                                    scalar<WT>((    a   ) * (1.0f - b)) * p2 +
-                                    scalar<WT>((1.0f - a) * (    b   )) * p3 +
-                                    scalar<WT>((    a   ) * (    b   )) * p4;
-                }
+        // Compute Indices
+        dim_t i_x = round2int((float)x / (odims[0] / (float)idims[0]));
+        dim_t i_y = round2int((float)y / (odims[1] / (float)idims[1]));
+
+        if (i_x >= idims[0]) i_x = idims[0] - 1;
+        if (i_y >= idims[1]) i_y = idims[1] - 1;
+
+        dim_t i_off = i_y * istrides[1] + i_x;
+        dim_t o_off =   y * ostrides[1] + x;
+        // Copy values from all channels
+        for(dim_t w = 0; w < odims[3]; w++) {
+            dim_t wost = w * ostrides[3];
+            dim_t wist = w * istrides[3];
+            for(dim_t z = 0; z < odims[2]; z++) {
+                outPtr[o_off + z * ostrides[2] + wost] = inPtr[i_off + z * istrides[2] + wist];
             }
         }
-    };
+    }
+};
 
-    template<typename T>
-    struct resize_op<T, AF_INTERP_LOWER>
+template<typename T>
+struct resize_op<T, AF_INTERP_BILINEAR>
+{
+    void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
+            const af::dim4 &ostrides, const af::dim4 &istrides,
+            const dim_t x, const dim_t y)
     {
-        void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
-                const af::dim4 &ostrides, const af::dim4 &istrides,
-                const dim_t x, const dim_t y)
-        {
-            // Compute Indices
-            dim_t i_x = floor((float)x / (odims[0] / (float)idims[0]));
-            dim_t i_y = floor((float)y / (odims[1] / (float)idims[1]));
-
-            if (i_x >= idims[0]) i_x = idims[0] - 1;
-            if (i_y >= idims[1]) i_y = idims[1] - 1;
-
-            dim_t i_off = i_y * istrides[1] + i_x;
-            dim_t o_off =   y * ostrides[1] + x;
-            // Copy values from all channels
-            for(dim_t w = 0; w < odims[3]; w++) {
-                dim_t wost = w * ostrides[3];
-                dim_t wist = w * istrides[3];
-                for(dim_t z = 0; z < odims[2]; z++) {
-                    outPtr[o_off + z * ostrides[2] + wost] = inPtr[i_off + z * istrides[2] + wist];
-                }
+        // Compute Indices
+        float f_x = (float)x / (odims[0] / (float)idims[0]);
+        float f_y = (float)y / (odims[1] / (float)idims[1]);
+
+        dim_t i1_x  = floor(f_x);
+        dim_t i1_y  = floor(f_y);
+
+        if (i1_x >= idims[0]) i1_x = idims[0] - 1;
+        if (i1_y >= idims[1]) i1_y = idims[1] - 1;
+
+        float b   = f_x - i1_x;
+        float a   = f_y - i1_y;
+
+        dim_t i2_x  = (i1_x + 1 >= idims[0] ? idims[0] - 1 : i1_x + 1);
+        dim_t i2_y  = (i1_y + 1 >= idims[1] ? idims[1] - 1 : i1_y + 1);
+
+        typedef typename dtype_traits<T>::base_type BT;
+        typedef wtype_t<BT> WT;
+        typedef vtype_t<T> VT;
+
+        dim_t o_off = y * ostrides[1] + x;
+        // Copy values from all channels
+        for(dim_t w = 0; w < odims[3]; w++) {
+            dim_t wst = w * istrides[3];
+            for(dim_t z = 0; z < odims[2]; z++) {
+                dim_t zst = z * istrides[2];
+                dim_t channel_off = zst + wst;
+                VT p1 = inPtr[i1_y * istrides[1] + i1_x + channel_off];
+                VT p2 = inPtr[i2_y * istrides[1] + i1_x + channel_off];
+                VT p3 = inPtr[i1_y * istrides[1] + i2_x + channel_off];
+                VT p4 = inPtr[i2_y * istrides[1] + i2_x + channel_off];
+
+                outPtr[o_off + z * ostrides[2] + w * ostrides[3]] =
+                                scalar<WT>((1.0f - a) * (1.0f - b)) * p1 +
+                                scalar<WT>((    a   ) * (1.0f - b)) * p2 +
+                                scalar<WT>((1.0f - a) * (    b   )) * p3 +
+                                scalar<WT>((    a   ) * (    b   )) * p4;
             }
         }
-    };
+    }
+};
 
-    template<typename T, af_interp_type method>
-    void resize_(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
-                 const af::dim4 &ostrides, const af::dim4 &istrides)
+template<typename T>
+struct resize_op<T, AF_INTERP_LOWER>
+{
+    void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
+            const af::dim4 &ostrides, const af::dim4 &istrides,
+            const dim_t x, const dim_t y)
     {
-        resize_op<T, method> op;
-        for(dim_t y = 0; y < odims[1]; y++) {
-            for(dim_t x = 0; x < odims[0]; x++) {
-                op(outPtr, inPtr, odims, idims, ostrides, istrides, x, y);
+        // Compute Indices
+        dim_t i_x = floor((float)x / (odims[0] / (float)idims[0]));
+        dim_t i_y = floor((float)y / (odims[1] / (float)idims[1]));
+
+        if (i_x >= idims[0]) i_x = idims[0] - 1;
+        if (i_y >= idims[1]) i_y = idims[1] - 1;
+
+        dim_t i_off = i_y * istrides[1] + i_x;
+        dim_t o_off =   y * ostrides[1] + x;
+        // Copy values from all channels
+        for(dim_t w = 0; w < odims[3]; w++) {
+            dim_t wost = w * ostrides[3];
+            dim_t wist = w * istrides[3];
+            for(dim_t z = 0; z < odims[2]; z++) {
+                outPtr[o_off + z * ostrides[2] + wost] = inPtr[i_off + z * istrides[2] + wist];
             }
         }
     }
+};
 
-    template<typename T>
-    Array<T> resize(const Array<T> &in, const dim_t odim0, const dim_t odim1,
-                    const af_interp_type method)
-    {
-        af::dim4 idims = in.dims();
-        af::dim4 odims(odim0, odim1, idims[2], idims[3]);
-
-        // Create output placeholder
-        Array<T> outArray = createValueArray(odims, (T)0);
-
-        // Get pointers to raw data
-        const T *inPtr = in.get();
-              T *outPtr = outArray.get();
-
-        af::dim4 ostrides = outArray.strides();
-        af::dim4 istrides = in.strides();
-
-        switch(method) {
-            case AF_INTERP_NEAREST:
-                resize_<T, AF_INTERP_NEAREST>(outPtr, inPtr, odims, idims, ostrides, istrides);
-                break;
-            case AF_INTERP_BILINEAR:
-                resize_<T, AF_INTERP_BILINEAR>(outPtr, inPtr, odims, idims, ostrides, istrides);
-                break;
-            case AF_INTERP_LOWER:
-                resize_<T, AF_INTERP_LOWER>(outPtr, inPtr, odims, idims, ostrides, istrides);
-                break;
-            default:
-                break;
+template<typename T, af_interp_type method>
+void resize_(Array<T> out, const Array<T> in)
+{
+    af::dim4 idims    = in.dims();
+    af::dim4 odims    = out.dims();
+    const T *inPtr    = in.get();
+          T *outPtr   = out.get();
+    af::dim4 ostrides = out.strides();
+    af::dim4 istrides = in.strides();
+
+    resize_op<T, method> op;
+    for(dim_t y = 0; y < odims[1]; y++) {
+        for(dim_t x = 0; x < odims[0]; x++) {
+            op(outPtr, inPtr, odims, idims, ostrides, istrides, x, y);
         }
-        return outArray;
     }
+}
 
+template<typename T>
+Array<T> resize(const Array<T> &in, const dim_t odim0, const dim_t odim1,
+                const af_interp_type method)
+{
+    af::dim4 idims = in.dims();
+    af::dim4 odims(odim0, odim1, idims[2], idims[3]);
+    // Create output placeholder
+    Array<T> out = createValueArray(odims, (T)0);
+    out.eval();
+    in.eval();
+
+    switch(method) {
+        case AF_INTERP_NEAREST:
+            getQueue().enqueue(resize_<T, AF_INTERP_NEAREST>, out, in); break;
+        case AF_INTERP_BILINEAR:
+            getQueue().enqueue(resize_<T, AF_INTERP_BILINEAR>, out, in); break;
+        case AF_INTERP_LOWER:
+            getQueue().enqueue(resize_<T, AF_INTERP_LOWER>, out, in); break;
+        default: break;
+    }
+    return out;
+}
 
-#define INSTANTIATE(T)                                                                            \
+#define INSTANTIATE(T)                                                                     \
     template Array<T> resize<T> (const Array<T> &in, const dim_t odim0, const dim_t odim1, \
                                  const af_interp_type method);
 
-
-    INSTANTIATE(float)
-    INSTANTIATE(double)
-    INSTANTIATE(cfloat)
-    INSTANTIATE(cdouble)
-    INSTANTIATE(int)
-    INSTANTIATE(uint)
-    INSTANTIATE(intl)
-    INSTANTIATE(uintl)
-    INSTANTIATE(uchar)
-    INSTANTIATE(char)
-    INSTANTIATE(short)
-    INSTANTIATE(ushort)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/shift.cpp b/src/backend/cpu/shift.cpp
index 05cac4c..6a2b939 100644
--- a/src/backend/cpu/shift.cpp
+++ b/src/backend/cpu/shift.cpp
@@ -12,27 +12,32 @@
 #include <stdexcept>
 #include <err_cpu.hpp>
 #include <cassert>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 namespace cpu
 {
-    static inline dim_t simple_mod(const dim_t i, const dim_t dim)
-    {
-        return (i < dim) ? i : (i - dim);
-    }
+static inline dim_t simple_mod(const dim_t i, const dim_t dim)
+{
+    return (i < dim) ? i : (i - dim);
+}
 
-    template<typename T>
-    Array<T> shift(const Array<T> &in, const int sdims[4])
-    {
-        const af::dim4 iDims = in.dims();
-        af::dim4 oDims = iDims;
+template<typename T>
+Array<T> shift(const Array<T> &in, const int sdims[4])
+{
+    Array<T> out = createEmptyArray<T>(in.dims());
+    out.eval();
+    in.eval();
+    const af::dim4 temp(sdims[0], sdims[1], sdims[2], sdims[3]);
 
-        Array<T> out = createEmptyArray<T>(oDims);
+    auto func = [=] (Array<T> out, const Array<T> in, const af::dim4 sdims) {
 
         T* outPtr = out.get();
         const T* inPtr = in.get();
 
-        const af::dim4 ist = in.strides();
-        const af::dim4 ost = out.strides();
+        const af::dim4 oDims = out.dims();
+        const af::dim4 ist   = in.strides();
+        const af::dim4 ost   = out.strides();
 
         int sdims_[4];
         // Need to do this because we are mapping output to input in the kernel
@@ -65,24 +70,25 @@ namespace cpu
                 }
             }
         }
+    };
+    getQueue().enqueue(func, out, in, temp);
 
-        return out;
-    }
+    return out;
+}
 
 #define INSTANTIATE(T)                                                  \
     template Array<T> shift<T>(const Array<T> &in, const int sdims[4]); \
 
-    INSTANTIATE(float)
-    INSTANTIATE(double)
-    INSTANTIATE(cfloat)
-    INSTANTIATE(cdouble)
-    INSTANTIATE(int)
-    INSTANTIATE(uint)
-    INSTANTIATE(intl)
-    INSTANTIATE(uintl)
-    INSTANTIATE(uchar)
-    INSTANTIATE(char)
-    INSTANTIATE(short)
-    INSTANTIATE(ushort)
-
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git