[arrayfire] 28/284: converted qr & solve cpu functions to async calls

Sun Feb 7 18:59:15 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit e0d7c12d97a69d950623691b4c17c3755b1387f1
Author: pradeep <pradeep at arrayfire.com>
Date:   Fri Nov 20 13:58:35 2015 -0500

    converted qr & solve cpu functions to async calls
    
    Fixed lu async function
---
 src/backend/cpu/lu.cpp    |  66 ++++++++++--------------
 src/backend/cpu/qr.cpp    |  56 ++++++++------------
 src/backend/cpu/solve.cpp | 127 +++++++++++++++++++++-------------------------
 3 files changed, 105 insertions(+), 144 deletions(-)

diff --git a/src/backend/cpu/lu.cpp b/src/backend/cpu/lu.cpp
index ed165cb..ff0be43 100644
--- a/src/backend/cpu/lu.cpp
+++ b/src/backend/cpu/lu.cpp
@@ -11,7 +11,6 @@
 #include <err_common.hpp>
 
 #if defined(WITH_CPU_LINEAR_ALGEBRA)
-
 #include <af/dim4.hpp>
 #include <handle.hpp>
 #include <iostream>
@@ -26,9 +25,7 @@ namespace cpu
 {
 
 template<typename T>
-using getrf_func_def = int (*)(ORDER_TYPE, int, int,
-                               T*, int,
-                               int*);
+using getrf_func_def = int (*)(ORDER_TYPE, int, int, T*, int, int*);
 
 #define LU_FUNC_DEF( FUNC )                                     \
 template<typename T> FUNC##_func_def<T> FUNC##_func();
@@ -45,7 +42,7 @@ LU_FUNC(getrf , cfloat , c)
 LU_FUNC(getrf , cdouble, z)
 
 template<typename T>
-void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
+void lu_split(Array<T> lower, Array<T> upper, const Array<T> in)
 {
     T *l = lower.get();
     T *u = upper.get();
@@ -54,7 +51,6 @@ void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
     dim4 ldm = lower.dims();
     dim4 udm = upper.dims();
     dim4 idm = in.dims();
-
     dim4 lst = lower.strides();
     dim4 ust = upper.strides();
     dim4 ist = in.strides();
@@ -79,20 +75,14 @@ void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
                     const dim_t uMem = uYZW + ox;
                     const dim_t iMem = iYZW + ox;
                     if(ox > oy) {
-                        if(oy < ldm[1])
-                            l[lMem] = i[iMem];
-                        if(ox < udm[0])
-                            u[uMem] = scalar<T>(0);
+                        if(oy < ldm[1]) l[lMem] = i[iMem];
+                        if(ox < udm[0]) u[uMem] = scalar<T>(0);
                     } else if (oy > ox) {
-                        if(oy < ldm[1])
-                            l[lMem] = scalar<T>(0);
-                        if(ox < udm[0])
-                            u[uMem] = i[iMem];
+                        if(oy < ldm[1]) l[lMem] = scalar<T>(0);
+                        if(ox < udm[0]) u[uMem] = i[iMem];
                     } else if(ox == oy) {
-                        if(oy < ldm[1])
-                            l[lMem] = scalar<T>(1.0);
-                        if(ox < udm[0])
-                            u[uMem] = i[iMem];
+                        if(oy < ldm[1]) l[lMem] = scalar<T>(1.0);
+                        if(ox < udm[0]) u[uMem] = i[iMem];
                     }
                 }
             }
@@ -100,17 +90,15 @@ void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
     }
 }
 
-void convertPivot(Array<int> &pivot, int out_sz)
+void convertPivot(Array<int> p, Array<int> pivot)
 {
-    Array<int> p = range<int>(dim4(out_sz), 0);
     int *d_pi = pivot.get();
     int *d_po = p.get();
-    dim_t d0 = pivot.dims()[0];
+    dim_t d0  = pivot.dims()[0];
     for(int j = 0; j < (int)d0; j++) {
         // 1 indexed in pivot
         std::swap(d_po[j], d_po[d_pi[j] - 1]);
     }
-    pivot = p;
 }
 
 template<typename T>
@@ -138,26 +126,21 @@ Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
     dim4 iDims = in.dims();
     Array<int> pivot = createEmptyArray<int>(af::dim4(min(iDims[0], iDims[1]), 1, 1, 1));
 
-    auto func = [=] (Array<T> in, Array<int> pivot, const bool convert_pivot) {
+    auto func = [=] (Array<T> in, Array<int> pivot) {
         dim4 iDims = in.dims();
         getrf_func<T>()(AF_LAPACK_COL_MAJOR, iDims[0], iDims[1], in.get(), in.strides()[1], pivot.get());
-        if(convert_pivot) convertPivot(pivot, iDims[0]);
     };
-
-    getQueue().enqueue(func, in, pivot, convert_pivot);
-
-    return pivot;
+    getQueue().enqueue(func, in, pivot);
+
+    if(convert_pivot) {
+        Array<int> p = range<int>(dim4(iDims[0]), 0);
+        getQueue().enqueue(convertPivot, p, pivot);
+        return p;
+    } else {
+        return pivot;
+    }
 }
 
-#define INSTANTIATE_LU(T)                                                                           \
-    template Array<int> lu_inplace<T>(Array<T> &in, const bool convert_pivot);                      \
-    template void lu<T>(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in);
-
-INSTANTIATE_LU(float)
-INSTANTIATE_LU(cfloat)
-INSTANTIATE_LU(double)
-INSTANTIATE_LU(cdouble)
-
 }
 
 #else
@@ -177,6 +160,12 @@ Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
     AF_ERROR("Linear Algebra is disabled on CPU", AF_ERR_NOT_CONFIGURED);
 }
 
+}
+
+#endif
+
+namespace cpu
+{
 #define INSTANTIATE_LU(T)                                                                           \
     template Array<int> lu_inplace<T>(Array<T> &in, const bool convert_pivot);                      \
     template void lu<T>(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in);
@@ -185,7 +174,4 @@ INSTANTIATE_LU(float)
 INSTANTIATE_LU(cfloat)
 INSTANTIATE_LU(double)
 INSTANTIATE_LU(cdouble)
-
 }
-
-#endif
diff --git a/src/backend/cpu/qr.cpp b/src/backend/cpu/qr.cpp
index d1c3e23..b5f1806 100644
--- a/src/backend/cpu/qr.cpp
+++ b/src/backend/cpu/qr.cpp
@@ -11,28 +11,23 @@
 #include <err_common.hpp>
 
 #if defined(WITH_CPU_LINEAR_ALGEBRA)
-
 #include <af/dim4.hpp>
 #include <handle.hpp>
-#include <iostream>
 #include <cassert>
 #include <err_cpu.hpp>
 #include <triangle.hpp>
-
 #include <lapack_helper.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 namespace cpu
 {
 
 template<typename T>
-using geqrf_func_def = int (*)(ORDER_TYPE, int, int,
-                               T*, int,
-                               T*);
+using geqrf_func_def = int (*)(ORDER_TYPE, int, int, T*, int, T*);
 
 template<typename T>
-using gqr_func_def = int (*)(ORDER_TYPE, int, int, int,
-                             T*, int,
-                             const T*);
+using gqr_func_def = int (*)(ORDER_TYPE, int, int, int, T*, int, const T*);
 
 #define QR_FUNC_DEF( FUNC )                                         \
 template<typename T> FUNC##_func_def<T> FUNC##_func();
@@ -65,8 +60,8 @@ template<typename T>
 void qr(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in)
 {
     dim4 iDims = in.dims();
-    int M = iDims[0];
-    int N = iDims[1];
+    int M      = iDims[0];
+    int N      = iDims[1];
 
     q = padArray<T, T>(in, dim4(M, max(M, N)));
     q.resetDims(iDims);
@@ -78,39 +73,29 @@ void qr(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in)
 
     triangle<T, true, false>(r, q);
 
-    gqr_func<T>()(AF_LAPACK_COL_MAJOR,
-                  M, M, min(M, N),
-                  q.get(), q.strides()[1],
-                  t.get());
-
+    auto func = [=] (Array<T> q, Array<T> t, int M, int N) {
+        gqr_func<T>()(AF_LAPACK_COL_MAJOR, M, M, min(M, N), q.get(), q.strides()[1], t.get());
+    };
     q.resetDims(dim4(M, M));
+    getQueue().enqueue(func, q, t, M, N);
 }
 
 template<typename T>
 Array<T> qr_inplace(Array<T> &in)
 {
     dim4 iDims = in.dims();
-    int M = iDims[0];
-    int N = iDims[1];
-
+    int M      = iDims[0];
+    int N      = iDims[1];
     Array<T> t = createEmptyArray<T>(af::dim4(min(M, N), 1, 1, 1));
 
-    geqrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N,
-                    in.get(), in.strides()[1],
-                    t.get());
+    auto func = [=] (Array<T> in, Array<T> t, int M, int N) {
+        geqrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N, in.get(), in.strides()[1], t.get());
+    };
+    getQueue().enqueue(func, in, t, M, N);
 
     return t;
 }
 
-#define INSTANTIATE_QR(T)                                                                           \
-    template Array<T> qr_inplace<T>(Array<T> &in);                                                \
-    template void qr<T>(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in);
-
-INSTANTIATE_QR(float)
-INSTANTIATE_QR(cfloat)
-INSTANTIATE_QR(double)
-INSTANTIATE_QR(cdouble)
-
 }
 
 #else
@@ -130,6 +115,12 @@ Array<T> qr_inplace(Array<T> &in)
     AF_ERROR("Linear Algebra is disabled on CPU", AF_ERR_NOT_CONFIGURED);
 }
 
+}
+
+#endif
+
+namespace cpu
+{
 #define INSTANTIATE_QR(T)                                                                           \
     template Array<T> qr_inplace<T>(Array<T> &in);                                                \
     template void qr<T>(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in);
@@ -138,7 +129,4 @@ INSTANTIATE_QR(float)
 INSTANTIATE_QR(cfloat)
 INSTANTIATE_QR(double)
 INSTANTIATE_QR(cdouble)
-
 }
-
-#endif
diff --git a/src/backend/cpu/solve.cpp b/src/backend/cpu/solve.cpp
index 1e88e8d..b279971 100644
--- a/src/backend/cpu/solve.cpp
+++ b/src/backend/cpu/solve.cpp
@@ -11,52 +11,40 @@
 #include <err_common.hpp>
 
 #if defined(WITH_CPU_LINEAR_ALGEBRA)
-
 #include <af/dim4.hpp>
 #include <handle.hpp>
-#include <range.hpp>
-#include <iostream>
 #include <cassert>
 #include <err_cpu.hpp>
-
 #include <lapack_helper.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 namespace cpu
 {
 
 template<typename T>
 using gesv_func_def = int (*)(ORDER_TYPE, int, int,
-                              T *, int,
-                              int *,
-                              T *, int);
+                              T *, int, int *, T *, int);
 
 template<typename T>
-using gels_func_def = int (*)(ORDER_TYPE, char,
-                              int, int, int,
-                              T *, int,
-                              T *, int);
+using gels_func_def = int (*)(ORDER_TYPE, char, int, int, int,
+                              T *, int, T *, int);
 
 template<typename T>
-using getrs_func_def = int (*)(ORDER_TYPE, char,
-                               int, int,
-                               const T *, int,
-                               const int *,
-                               T *, int);
+using getrs_func_def = int (*)(ORDER_TYPE, char, int, int,
+                               const T *, int, const int *, T *, int);
 
 template<typename T>
-using trtrs_func_def = int (*)(ORDER_TYPE,
-                               char, char, char,
-                               int, int,
-                               const T *, int,
-                               T *, int);
+using trtrs_func_def = int (*)(ORDER_TYPE, char, char, char, int, int,
+                               const T *, int, T *, int);
 
 
-#define SOLVE_FUNC_DEF( FUNC )                                      \
+#define SOLVE_FUNC_DEF( FUNC )                                 \
 template<typename T> FUNC##_func_def<T> FUNC##_func();
 
 
-#define SOLVE_FUNC( FUNC, TYPE, PREFIX )                            \
-template<> FUNC##_func_def<TYPE>     FUNC##_func<TYPE>()            \
+#define SOLVE_FUNC( FUNC, TYPE, PREFIX )                       \
+template<> FUNC##_func_def<TYPE>     FUNC##_func<TYPE>()       \
 { return & LAPACK_NAME(PREFIX##FUNC); }
 
 SOLVE_FUNC_DEF( gesv )
@@ -87,16 +75,16 @@ template<typename T>
 Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
                  const Array<T> &b, const af_mat_prop options)
 {
-    int N = A.dims()[0];
-    int NRHS = b.dims()[1];
-
+    int N        = A.dims()[0];
+    int NRHS     = b.dims()[1];
     Array< T > B = copyArray<T>(b);
 
-    getrs_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
-                    N, NRHS,
-                    A.get(), A.strides()[1],
-                    pivot.get(),
-                    B.get(), B.strides()[1]);
+    auto func = [=] (Array<T> A, Array<T> B, Array<int> pivot, int N, int NRHS) {
+        getrs_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
+                        N, NRHS, A.get(), A.strides()[1],
+                        pivot.get(), B.get(), B.strides()[1]);
+    };
+    getQueue().enqueue(func, A, B, pivot, N, NRHS);
 
     return B;
 }
@@ -105,16 +93,20 @@ template<typename T>
 Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
 {
     Array<T> B = copyArray<T>(b);
-    int N = B.dims()[0];
-    int NRHS = B.dims()[1];
-
-    trtrs_func<T>()(AF_LAPACK_COL_MAJOR,
-                    options & AF_MAT_UPPER ? 'U' : 'L',
-                    'N', // transpose flag
-                    options & AF_MAT_DIAG_UNIT ? 'U' : 'N',
-                    N, NRHS,
-                    A.get(), A.strides()[1],
-                    B.get(), B.strides()[1]);
+    int N      = B.dims()[0];
+    int NRHS   = B.dims()[1];
+
+    auto func = [=] (Array<T> A, Array<T> B, int N, int NRHS, const af_mat_prop options) {
+        trtrs_func<T>()(AF_LAPACK_COL_MAJOR,
+                        options & AF_MAT_UPPER ? 'U' : 'L',
+                        'N', // transpose flag
+                        options & AF_MAT_DIAG_UNIT ? 'U' : 'N',
+                        N, NRHS,
+                        A.get(), A.strides()[1],
+                        B.get(), B.strides()[1]);
+    };
+    getQueue().enqueue(func, A, B, N, NRHS, options);
+
     return B;
 }
 
@@ -132,41 +124,34 @@ Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
     int N = a.dims()[1];
     int K = b.dims()[1];
 
-
     Array<T> A = copyArray<T>(a);
     Array<T> B = padArray<T, T>(b, dim4(max(M, N), K));
 
     if(M == N) {
         Array<int> pivot = createEmptyArray<int>(dim4(N, 1, 1));
-        gesv_func<T>()(AF_LAPACK_COL_MAJOR, N, K,
-                       A.get(), A.strides()[1],
-                       pivot.get(),
-                       B.get(), B.strides()[1]);
+
+        auto func = [=] (Array<T> A, Array<T> B, Array<int> pivot, int N, int K) {
+            gesv_func<T>()(AF_LAPACK_COL_MAJOR, N, K, A.get(), A.strides()[1],
+                           pivot.get(), B.get(), B.strides()[1]);
+        };
+        getQueue().enqueue(func, A, B, pivot, N, K);
     } else {
-        int sM = a.strides()[1];
-        int sN = a.strides()[2] / sM;
+        auto func = [=] (Array<T> A, Array<T> B, int M, int N, int K) {
+            int sM = A.strides()[1];
+            int sN = A.strides()[2] / sM;
 
-        gels_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
-                       M, N, K,
-                       A.get(), A.strides()[1],
-                       B.get(), max(sM, sN));
+            gels_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
+                    M, N, K,
+                    A.get(), A.strides()[1],
+                    B.get(), max(sM, sN));
+        };
         B.resetDims(dim4(N, K));
+        getQueue().enqueue(func, A, B, M, N, K);
     }
 
     return B;
 }
 
-#define INSTANTIATE_SOLVE(T)                                            \
-    template Array<T> solve<T>(const Array<T> &a, const Array<T> &b,    \
-                               const af_mat_prop options);              \
-    template Array<T> solveLU<T>(const Array<T> &A, const Array<int> &pivot, \
-                                 const Array<T> &b, const af_mat_prop options); \
-
-INSTANTIATE_SOLVE(float)
-INSTANTIATE_SOLVE(cfloat)
-INSTANTIATE_SOLVE(double)
-INSTANTIATE_SOLVE(cdouble)
-
 }
 
 #else
@@ -178,17 +163,21 @@ template<typename T>
 Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
                  const Array<T> &b, const af_mat_prop options)
 {
-    AF_ERROR("Linear Algebra is diabled on CPU",
-             AF_ERR_NOT_CONFIGURED);
+    AF_ERROR("Linear Algebra is diabled on CPU", AF_ERR_NOT_CONFIGURED);
 }
 
 template<typename T>
 Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
 {
-    AF_ERROR("Linear Algebra is diabled on CPU",
-              AF_ERR_NOT_CONFIGURED);
+    AF_ERROR("Linear Algebra is diabled on CPU", AF_ERR_NOT_CONFIGURED);
+}
+
 }
 
+#endif
+
+namespace cpu
+{
 #define INSTANTIATE_SOLVE(T)                                            \
     template Array<T> solve<T>(const Array<T> &a, const Array<T> &b,    \
                                const af_mat_prop options);              \
@@ -200,5 +189,3 @@ INSTANTIATE_SOLVE(cfloat)
 INSTANTIATE_SOLVE(double)
 INSTANTIATE_SOLVE(cdouble)
 }
-
-#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git