[arrayfire] 28/284: converted qr & solve cpu functions to async calls
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:15 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit e0d7c12d97a69d950623691b4c17c3755b1387f1
Author: pradeep <pradeep at arrayfire.com>
Date: Fri Nov 20 13:58:35 2015 -0500
converted qr & solve cpu functions to async calls
Fixed lu async function
---
src/backend/cpu/lu.cpp | 66 ++++++++++--------------
src/backend/cpu/qr.cpp | 56 ++++++++------------
src/backend/cpu/solve.cpp | 127 +++++++++++++++++++++-------------------------
3 files changed, 105 insertions(+), 144 deletions(-)
diff --git a/src/backend/cpu/lu.cpp b/src/backend/cpu/lu.cpp
index ed165cb..ff0be43 100644
--- a/src/backend/cpu/lu.cpp
+++ b/src/backend/cpu/lu.cpp
@@ -11,7 +11,6 @@
#include <err_common.hpp>
#if defined(WITH_CPU_LINEAR_ALGEBRA)
-
#include <af/dim4.hpp>
#include <handle.hpp>
#include <iostream>
@@ -26,9 +25,7 @@ namespace cpu
{
template<typename T>
-using getrf_func_def = int (*)(ORDER_TYPE, int, int,
- T*, int,
- int*);
+using getrf_func_def = int (*)(ORDER_TYPE, int, int, T*, int, int*);
#define LU_FUNC_DEF( FUNC ) \
template<typename T> FUNC##_func_def<T> FUNC##_func();
@@ -45,7 +42,7 @@ LU_FUNC(getrf , cfloat , c)
LU_FUNC(getrf , cdouble, z)
template<typename T>
-void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
+void lu_split(Array<T> lower, Array<T> upper, const Array<T> in)
{
T *l = lower.get();
T *u = upper.get();
@@ -54,7 +51,6 @@ void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
dim4 ldm = lower.dims();
dim4 udm = upper.dims();
dim4 idm = in.dims();
-
dim4 lst = lower.strides();
dim4 ust = upper.strides();
dim4 ist = in.strides();
@@ -79,20 +75,14 @@ void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
const dim_t uMem = uYZW + ox;
const dim_t iMem = iYZW + ox;
if(ox > oy) {
- if(oy < ldm[1])
- l[lMem] = i[iMem];
- if(ox < udm[0])
- u[uMem] = scalar<T>(0);
+ if(oy < ldm[1]) l[lMem] = i[iMem];
+ if(ox < udm[0]) u[uMem] = scalar<T>(0);
} else if (oy > ox) {
- if(oy < ldm[1])
- l[lMem] = scalar<T>(0);
- if(ox < udm[0])
- u[uMem] = i[iMem];
+ if(oy < ldm[1]) l[lMem] = scalar<T>(0);
+ if(ox < udm[0]) u[uMem] = i[iMem];
} else if(ox == oy) {
- if(oy < ldm[1])
- l[lMem] = scalar<T>(1.0);
- if(ox < udm[0])
- u[uMem] = i[iMem];
+ if(oy < ldm[1]) l[lMem] = scalar<T>(1.0);
+ if(ox < udm[0]) u[uMem] = i[iMem];
}
}
}
@@ -100,17 +90,15 @@ void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
}
}
-void convertPivot(Array<int> &pivot, int out_sz)
+void convertPivot(Array<int> p, Array<int> pivot)
{
- Array<int> p = range<int>(dim4(out_sz), 0);
int *d_pi = pivot.get();
int *d_po = p.get();
- dim_t d0 = pivot.dims()[0];
+ dim_t d0 = pivot.dims()[0];
for(int j = 0; j < (int)d0; j++) {
// 1 indexed in pivot
std::swap(d_po[j], d_po[d_pi[j] - 1]);
}
- pivot = p;
}
template<typename T>
@@ -138,26 +126,21 @@ Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
dim4 iDims = in.dims();
Array<int> pivot = createEmptyArray<int>(af::dim4(min(iDims[0], iDims[1]), 1, 1, 1));
- auto func = [=] (Array<T> in, Array<int> pivot, const bool convert_pivot) {
+ auto func = [=] (Array<T> in, Array<int> pivot) {
dim4 iDims = in.dims();
getrf_func<T>()(AF_LAPACK_COL_MAJOR, iDims[0], iDims[1], in.get(), in.strides()[1], pivot.get());
- if(convert_pivot) convertPivot(pivot, iDims[0]);
};
-
- getQueue().enqueue(func, in, pivot, convert_pivot);
-
- return pivot;
+ getQueue().enqueue(func, in, pivot);
+
+ if(convert_pivot) {
+ Array<int> p = range<int>(dim4(iDims[0]), 0);
+ getQueue().enqueue(convertPivot, p, pivot);
+ return p;
+ } else {
+ return pivot;
+ }
}
-#define INSTANTIATE_LU(T) \
- template Array<int> lu_inplace<T>(Array<T> &in, const bool convert_pivot); \
- template void lu<T>(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in);
-
-INSTANTIATE_LU(float)
-INSTANTIATE_LU(cfloat)
-INSTANTIATE_LU(double)
-INSTANTIATE_LU(cdouble)
-
}
#else
@@ -177,6 +160,12 @@ Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
AF_ERROR("Linear Algebra is disabled on CPU", AF_ERR_NOT_CONFIGURED);
}
+}
+
+#endif
+
+namespace cpu
+{
#define INSTANTIATE_LU(T) \
template Array<int> lu_inplace<T>(Array<T> &in, const bool convert_pivot); \
template void lu<T>(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in);
@@ -185,7 +174,4 @@ INSTANTIATE_LU(float)
INSTANTIATE_LU(cfloat)
INSTANTIATE_LU(double)
INSTANTIATE_LU(cdouble)
-
}
-
-#endif
diff --git a/src/backend/cpu/qr.cpp b/src/backend/cpu/qr.cpp
index d1c3e23..b5f1806 100644
--- a/src/backend/cpu/qr.cpp
+++ b/src/backend/cpu/qr.cpp
@@ -11,28 +11,23 @@
#include <err_common.hpp>
#if defined(WITH_CPU_LINEAR_ALGEBRA)
-
#include <af/dim4.hpp>
#include <handle.hpp>
-#include <iostream>
#include <cassert>
#include <err_cpu.hpp>
#include <triangle.hpp>
-
#include <lapack_helper.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
template<typename T>
-using geqrf_func_def = int (*)(ORDER_TYPE, int, int,
- T*, int,
- T*);
+using geqrf_func_def = int (*)(ORDER_TYPE, int, int, T*, int, T*);
template<typename T>
-using gqr_func_def = int (*)(ORDER_TYPE, int, int, int,
- T*, int,
- const T*);
+using gqr_func_def = int (*)(ORDER_TYPE, int, int, int, T*, int, const T*);
#define QR_FUNC_DEF( FUNC ) \
template<typename T> FUNC##_func_def<T> FUNC##_func();
@@ -65,8 +60,8 @@ template<typename T>
void qr(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in)
{
dim4 iDims = in.dims();
- int M = iDims[0];
- int N = iDims[1];
+ int M = iDims[0];
+ int N = iDims[1];
q = padArray<T, T>(in, dim4(M, max(M, N)));
q.resetDims(iDims);
@@ -78,39 +73,29 @@ void qr(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in)
triangle<T, true, false>(r, q);
- gqr_func<T>()(AF_LAPACK_COL_MAJOR,
- M, M, min(M, N),
- q.get(), q.strides()[1],
- t.get());
-
+ auto func = [=] (Array<T> q, Array<T> t, int M, int N) {
+ gqr_func<T>()(AF_LAPACK_COL_MAJOR, M, M, min(M, N), q.get(), q.strides()[1], t.get());
+ };
q.resetDims(dim4(M, M));
+ getQueue().enqueue(func, q, t, M, N);
}
template<typename T>
Array<T> qr_inplace(Array<T> &in)
{
dim4 iDims = in.dims();
- int M = iDims[0];
- int N = iDims[1];
-
+ int M = iDims[0];
+ int N = iDims[1];
Array<T> t = createEmptyArray<T>(af::dim4(min(M, N), 1, 1, 1));
- geqrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N,
- in.get(), in.strides()[1],
- t.get());
+ auto func = [=] (Array<T> in, Array<T> t, int M, int N) {
+ geqrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N, in.get(), in.strides()[1], t.get());
+ };
+ getQueue().enqueue(func, in, t, M, N);
return t;
}
-#define INSTANTIATE_QR(T) \
- template Array<T> qr_inplace<T>(Array<T> &in); \
- template void qr<T>(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in);
-
-INSTANTIATE_QR(float)
-INSTANTIATE_QR(cfloat)
-INSTANTIATE_QR(double)
-INSTANTIATE_QR(cdouble)
-
}
#else
@@ -130,6 +115,12 @@ Array<T> qr_inplace(Array<T> &in)
AF_ERROR("Linear Algebra is disabled on CPU", AF_ERR_NOT_CONFIGURED);
}
+}
+
+#endif
+
+namespace cpu
+{
#define INSTANTIATE_QR(T) \
template Array<T> qr_inplace<T>(Array<T> &in); \
template void qr<T>(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in);
@@ -138,7 +129,4 @@ INSTANTIATE_QR(float)
INSTANTIATE_QR(cfloat)
INSTANTIATE_QR(double)
INSTANTIATE_QR(cdouble)
-
}
-
-#endif
diff --git a/src/backend/cpu/solve.cpp b/src/backend/cpu/solve.cpp
index 1e88e8d..b279971 100644
--- a/src/backend/cpu/solve.cpp
+++ b/src/backend/cpu/solve.cpp
@@ -11,52 +11,40 @@
#include <err_common.hpp>
#if defined(WITH_CPU_LINEAR_ALGEBRA)
-
#include <af/dim4.hpp>
#include <handle.hpp>
-#include <range.hpp>
-#include <iostream>
#include <cassert>
#include <err_cpu.hpp>
-
#include <lapack_helper.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
template<typename T>
using gesv_func_def = int (*)(ORDER_TYPE, int, int,
- T *, int,
- int *,
- T *, int);
+ T *, int, int *, T *, int);
template<typename T>
-using gels_func_def = int (*)(ORDER_TYPE, char,
- int, int, int,
- T *, int,
- T *, int);
+using gels_func_def = int (*)(ORDER_TYPE, char, int, int, int,
+ T *, int, T *, int);
template<typename T>
-using getrs_func_def = int (*)(ORDER_TYPE, char,
- int, int,
- const T *, int,
- const int *,
- T *, int);
+using getrs_func_def = int (*)(ORDER_TYPE, char, int, int,
+ const T *, int, const int *, T *, int);
template<typename T>
-using trtrs_func_def = int (*)(ORDER_TYPE,
- char, char, char,
- int, int,
- const T *, int,
- T *, int);
+using trtrs_func_def = int (*)(ORDER_TYPE, char, char, char, int, int,
+ const T *, int, T *, int);
-#define SOLVE_FUNC_DEF( FUNC ) \
+#define SOLVE_FUNC_DEF( FUNC ) \
template<typename T> FUNC##_func_def<T> FUNC##_func();
-#define SOLVE_FUNC( FUNC, TYPE, PREFIX ) \
-template<> FUNC##_func_def<TYPE> FUNC##_func<TYPE>() \
+#define SOLVE_FUNC( FUNC, TYPE, PREFIX ) \
+template<> FUNC##_func_def<TYPE> FUNC##_func<TYPE>() \
{ return & LAPACK_NAME(PREFIX##FUNC); }
SOLVE_FUNC_DEF( gesv )
@@ -87,16 +75,16 @@ template<typename T>
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
const Array<T> &b, const af_mat_prop options)
{
- int N = A.dims()[0];
- int NRHS = b.dims()[1];
-
+ int N = A.dims()[0];
+ int NRHS = b.dims()[1];
Array< T > B = copyArray<T>(b);
- getrs_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
- N, NRHS,
- A.get(), A.strides()[1],
- pivot.get(),
- B.get(), B.strides()[1]);
+ auto func = [=] (Array<T> A, Array<T> B, Array<int> pivot, int N, int NRHS) {
+ getrs_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
+ N, NRHS, A.get(), A.strides()[1],
+ pivot.get(), B.get(), B.strides()[1]);
+ };
+ getQueue().enqueue(func, A, B, pivot, N, NRHS);
return B;
}
@@ -105,16 +93,20 @@ template<typename T>
Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
{
Array<T> B = copyArray<T>(b);
- int N = B.dims()[0];
- int NRHS = B.dims()[1];
-
- trtrs_func<T>()(AF_LAPACK_COL_MAJOR,
- options & AF_MAT_UPPER ? 'U' : 'L',
- 'N', // transpose flag
- options & AF_MAT_DIAG_UNIT ? 'U' : 'N',
- N, NRHS,
- A.get(), A.strides()[1],
- B.get(), B.strides()[1]);
+ int N = B.dims()[0];
+ int NRHS = B.dims()[1];
+
+ auto func = [=] (Array<T> A, Array<T> B, int N, int NRHS, const af_mat_prop options) {
+ trtrs_func<T>()(AF_LAPACK_COL_MAJOR,
+ options & AF_MAT_UPPER ? 'U' : 'L',
+ 'N', // transpose flag
+ options & AF_MAT_DIAG_UNIT ? 'U' : 'N',
+ N, NRHS,
+ A.get(), A.strides()[1],
+ B.get(), B.strides()[1]);
+ };
+ getQueue().enqueue(func, A, B, N, NRHS, options);
+
return B;
}
@@ -132,41 +124,34 @@ Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
int N = a.dims()[1];
int K = b.dims()[1];
-
Array<T> A = copyArray<T>(a);
Array<T> B = padArray<T, T>(b, dim4(max(M, N), K));
if(M == N) {
Array<int> pivot = createEmptyArray<int>(dim4(N, 1, 1));
- gesv_func<T>()(AF_LAPACK_COL_MAJOR, N, K,
- A.get(), A.strides()[1],
- pivot.get(),
- B.get(), B.strides()[1]);
+
+ auto func = [=] (Array<T> A, Array<T> B, Array<int> pivot, int N, int K) {
+ gesv_func<T>()(AF_LAPACK_COL_MAJOR, N, K, A.get(), A.strides()[1],
+ pivot.get(), B.get(), B.strides()[1]);
+ };
+ getQueue().enqueue(func, A, B, pivot, N, K);
} else {
- int sM = a.strides()[1];
- int sN = a.strides()[2] / sM;
+ auto func = [=] (Array<T> A, Array<T> B, int M, int N, int K) {
+ int sM = A.strides()[1];
+ int sN = A.strides()[2] / sM;
- gels_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
- M, N, K,
- A.get(), A.strides()[1],
- B.get(), max(sM, sN));
+ gels_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
+ M, N, K,
+ A.get(), A.strides()[1],
+ B.get(), max(sM, sN));
+ };
B.resetDims(dim4(N, K));
+ getQueue().enqueue(func, A, B, M, N, K);
}
return B;
}
-#define INSTANTIATE_SOLVE(T) \
- template Array<T> solve<T>(const Array<T> &a, const Array<T> &b, \
- const af_mat_prop options); \
- template Array<T> solveLU<T>(const Array<T> &A, const Array<int> &pivot, \
- const Array<T> &b, const af_mat_prop options); \
-
-INSTANTIATE_SOLVE(float)
-INSTANTIATE_SOLVE(cfloat)
-INSTANTIATE_SOLVE(double)
-INSTANTIATE_SOLVE(cdouble)
-
}
#else
@@ -178,17 +163,21 @@ template<typename T>
Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
const Array<T> &b, const af_mat_prop options)
{
- AF_ERROR("Linear Algebra is diabled on CPU",
- AF_ERR_NOT_CONFIGURED);
+ AF_ERROR("Linear Algebra is diabled on CPU", AF_ERR_NOT_CONFIGURED);
}
template<typename T>
Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
{
- AF_ERROR("Linear Algebra is diabled on CPU",
- AF_ERR_NOT_CONFIGURED);
+ AF_ERROR("Linear Algebra is diabled on CPU", AF_ERR_NOT_CONFIGURED);
+}
+
}
+#endif
+
+namespace cpu
+{
#define INSTANTIATE_SOLVE(T) \
template Array<T> solve<T>(const Array<T> &a, const Array<T> &b, \
const af_mat_prop options); \
@@ -200,5 +189,3 @@ INSTANTIATE_SOLVE(cfloat)
INSTANTIATE_SOLVE(double)
INSTANTIATE_SOLVE(cdouble)
}
-
-#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list