[arrayfire] 113/248: Added CPU fallback for CUDA Solve when CUDA older than 7
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Nov 17 15:54:11 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit 876494d6b4f7ad63d49b1cbf1637b66093ebd7ea
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Thu Oct 8 18:44:46 2015 -0400
Added CPU fallback for CUDA Solve when CUDA older than 7
---
src/backend/cuda/cpu_lapack/cpu_cholesky.cpp | 5 +-
src/backend/cuda/cpu_lapack/cpu_lu.cpp | 4 +-
src/backend/cuda/cpu_lapack/cpu_qr.cpp | 5 +-
src/backend/cuda/cpu_lapack/cpu_solve.cpp | 206 +++++++++++++++++++++++++++
src/backend/cuda/cpu_lapack/cpu_solve.hpp | 23 +++
src/backend/cuda/solve.cu | 31 ++++
6 files changed, 266 insertions(+), 8 deletions(-)
diff --git a/src/backend/cuda/cpu_lapack/cpu_cholesky.cpp b/src/backend/cuda/cpu_lapack/cpu_cholesky.cpp
index 4fb2644..29826dc 100644
--- a/src/backend/cuda/cpu_lapack/cpu_cholesky.cpp
+++ b/src/backend/cuda/cpu_lapack/cpu_cholesky.cpp
@@ -7,11 +7,10 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
-#include <cholesky.hpp>
-#include <err_common.hpp>
-
#if defined(WITH_CPU_LINEAR_ALGEBRA)
+#include <cpu_lapack/cpu_cholesky.hpp>
+#include <err_common.hpp>
#include <af/dim4.hpp>
#include <handle.hpp>
#include <copy.hpp>
diff --git a/src/backend/cuda/cpu_lapack/cpu_lu.cpp b/src/backend/cuda/cpu_lapack/cpu_lu.cpp
index df7dde6..ea83132 100644
--- a/src/backend/cuda/cpu_lapack/cpu_lu.cpp
+++ b/src/backend/cuda/cpu_lapack/cpu_lu.cpp
@@ -7,11 +7,11 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#if defined(WITH_CPU_LINEAR_ALGEBRA)
+
#include <cpu_lapack/cpu_lu.hpp>
#include <err_common.hpp>
-#if defined(WITH_CPU_LINEAR_ALGEBRA)
-
#include <af/dim4.hpp>
#include <handle.hpp>
#include <iostream>
diff --git a/src/backend/cuda/cpu_lapack/cpu_qr.cpp b/src/backend/cuda/cpu_lapack/cpu_qr.cpp
index 22447a2..853119f 100644
--- a/src/backend/cuda/cpu_lapack/cpu_qr.cpp
+++ b/src/backend/cuda/cpu_lapack/cpu_qr.cpp
@@ -7,11 +7,10 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
-#include <qr.hpp>
-#include <err_common.hpp>
-
#if defined(WITH_CPU_LINEAR_ALGEBRA)
+#include <cpu_lapack/cpu_qr.hpp>
+#include <err_common.hpp>
#include <af/dim4.hpp>
#include <handle.hpp>
#include <copy.hpp>
diff --git a/src/backend/cuda/cpu_lapack/cpu_solve.cpp b/src/backend/cuda/cpu_lapack/cpu_solve.cpp
new file mode 100644
index 0000000..c9d0803
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_solve.cpp
@@ -0,0 +1,206 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#if defined(WITH_CPU_LINEAR_ALGEBRA)
+
+#include <cpu_lapack/cpu_solve.hpp>
+#include <err_common.hpp>
+
+#include <af/dim4.hpp>
+#include <handle.hpp>
+#include <iostream>
+#include <cassert>
+#include <err_cuda.hpp>
+
+#include "lapack_helper.hpp"
+
+namespace cuda
+{
+namespace cpu
+{
+
+template<typename T>
+using gesv_func_def = int (*)(ORDER_TYPE, int, int,
+ T *, int,
+ int *,
+ T *, int);
+
+template<typename T>
+using gels_func_def = int (*)(ORDER_TYPE, char,
+ int, int, int,
+ T *, int,
+ T *, int);
+
+template<typename T>
+using getrs_func_def = int (*)(ORDER_TYPE, char,
+ int, int,
+ const T *, int,
+ const int *,
+ T *, int);
+
+template<typename T>
+using trtrs_func_def = int (*)(ORDER_TYPE,
+ char, char, char,
+ int, int,
+ const T *, int,
+ T *, int);
+
+
+#define SOLVE_FUNC_DEF( FUNC ) \
+template<typename T> FUNC##_func_def<T> FUNC##_func();
+
+
+#define SOLVE_FUNC( FUNC, TYPE, PREFIX ) \
+template<> FUNC##_func_def<TYPE> FUNC##_func<TYPE>() \
+{ return & LAPACK_NAME(PREFIX##FUNC); }
+
+SOLVE_FUNC_DEF( gesv )
+SOLVE_FUNC(gesv , float , s)
+SOLVE_FUNC(gesv , double , d)
+SOLVE_FUNC(gesv , cfloat , c)
+SOLVE_FUNC(gesv , cdouble, z)
+
+SOLVE_FUNC_DEF( gels )
+SOLVE_FUNC(gels , float , s)
+SOLVE_FUNC(gels , double , d)
+SOLVE_FUNC(gels , cfloat , c)
+SOLVE_FUNC(gels , cdouble, z)
+
+SOLVE_FUNC_DEF( getrs )
+SOLVE_FUNC(getrs , float , s)
+SOLVE_FUNC(getrs , double , d)
+SOLVE_FUNC(getrs , cfloat , c)
+SOLVE_FUNC(getrs , cdouble, z)
+
+SOLVE_FUNC_DEF( trtrs )
+SOLVE_FUNC(trtrs , float , s)
+SOLVE_FUNC(trtrs , double , d)
+SOLVE_FUNC(trtrs , cfloat , c)
+SOLVE_FUNC(trtrs , cdouble, z)
+
+template<typename T>
+Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
+ const Array<T> &b, const af_mat_prop options)
+{
+ int N = A.dims()[0];
+ int NRHS = b.dims()[1];
+
+ T *aPtr = pinnedAlloc<T>(A.elements());
+ T *bPtr = pinnedAlloc<T>(b.elements());
+ int *pPtr = pinnedAlloc<int>(pivot.elements());
+
+ copyData(aPtr, A);
+ copyData(bPtr, b);
+ copyData(pPtr, pivot);
+
+ getrs_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
+ N, NRHS,
+ aPtr, A.strides()[1],
+ pPtr,
+ bPtr, b.strides()[1]);
+
+ Array<T> B = createHostDataArray<T>(b.dims(), bPtr);
+
+ pinnedFree(aPtr);
+ pinnedFree(bPtr);
+ pinnedFree(pPtr);
+
+ return B;
+}
+
+template<typename T>
+Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
+{
+ int N = b.dims()[0];
+ int NRHS = b.dims()[1];
+
+ T *aPtr = pinnedAlloc<T>(A.elements());
+ T *bPtr = pinnedAlloc<T>(b.elements());
+ copyData(aPtr, A);
+ copyData(bPtr, b);
+
+ trtrs_func<T>()(AF_LAPACK_COL_MAJOR,
+ options & AF_MAT_UPPER ? 'U' : 'L',
+ 'N', // transpose flag
+ options & AF_MAT_DIAG_UNIT ? 'U' : 'N',
+ N, NRHS,
+ aPtr, A.strides()[1],
+ bPtr, b.strides()[1]);
+
+ Array<T> B = createHostDataArray<T>(b.dims(), bPtr);
+
+ pinnedFree(aPtr);
+ pinnedFree(bPtr);
+
+ return B;
+}
+
+
+template<typename T>
+Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
+{
+
+ if (options & AF_MAT_UPPER ||
+ options & AF_MAT_LOWER) {
+ return triangleSolve<T>(a, b, options);
+ }
+
+ int M = a.dims()[0];
+ int N = a.dims()[1];
+ int K = b.dims()[1];
+
+ Array<T> B = padArray<T, T>(b, dim4(max(M, N), K), scalar<T>(0));
+
+ T *aPtr = pinnedAlloc<T>(a.elements());
+ T *bPtr = pinnedAlloc<T>(B.elements());
+ copyData(aPtr, a);
+ copyData(bPtr, B);
+
+ if(M == N) {
+ int *pivotPtr = pinnedAlloc<int>(N);
+ gesv_func<T>()(AF_LAPACK_COL_MAJOR, N, K,
+ aPtr, a.strides()[1],
+ pivotPtr,
+ bPtr, B.strides()[1]);
+ pinnedFree(pivotPtr);
+
+ writeHostDataArray<T>(B, bPtr, B.elements() * sizeof(T));
+ } else {
+ int sM = a.strides()[1];
+ int sN = a.strides()[2] / sM;
+
+ gels_func<T>()(AF_LAPACK_COL_MAJOR, 'N',
+ M, N, K,
+ aPtr, a.strides()[1],
+ bPtr, max(sM, sN));
+ writeHostDataArray<T>(B, bPtr, B.elements() * sizeof(T));
+ B.resetDims(dim4(N, K));
+ }
+
+ pinnedFree(aPtr);
+ pinnedFree(bPtr);
+
+ return B;
+}
+
+#define INSTANTIATE_SOLVE(T) \
+ template Array<T> solve<T>(const Array<T> &a, const Array<T> &b, \
+ const af_mat_prop options); \
+ template Array<T> solveLU<T>(const Array<T> &A, const Array<int> &pivot, \
+ const Array<T> &b, const af_mat_prop options); \
+
+INSTANTIATE_SOLVE(float)
+INSTANTIATE_SOLVE(cfloat)
+INSTANTIATE_SOLVE(double)
+INSTANTIATE_SOLVE(cdouble)
+
+}
+}
+
+#endif
diff --git a/src/backend/cuda/cpu_lapack/cpu_solve.hpp b/src/backend/cuda/cpu_lapack/cpu_solve.hpp
new file mode 100644
index 0000000..64a1ef3
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_solve.hpp
@@ -0,0 +1,23 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <Array.hpp>
+
+namespace cuda
+{
+namespace cpu
+{
+ template<typename T>
+ Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options = AF_MAT_NONE);
+
+ template<typename T>
+ Array<T> solveLU(const Array<T> &a, const Array<int> &pivot,
+ const Array<T> &b, const af_mat_prop options = AF_MAT_NONE);
+}
+}
diff --git a/src/backend/cuda/solve.cu b/src/backend/cuda/solve.cu
index 7077c1f..8008ba1 100644
--- a/src/backend/cuda/solve.cu
+++ b/src/backend/cuda/solve.cu
@@ -384,6 +384,37 @@ INSTANTIATE_SOLVE(cdouble)
}
+#elif defined(WITH_CPU_LINEAR_ALGEBRA)
+#include<cpu_lapack/cpu_solve.hpp>
+
+namespace cuda
+{
+
+template<typename T>
+Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
+ const Array<T> &b, const af_mat_prop options)
+{
+ return cpu::solveLU(A, pivot, b, options);
+}
+
+template<typename T>
+Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
+{
+ return cpu::solve(a, b, options);
+}
+
+#define INSTANTIATE_SOLVE(T) \
+ template Array<T> solve<T>(const Array<T> &a, const Array<T> &b, \
+ const af_mat_prop options); \
+ template Array<T> solveLU<T>(const Array<T> &A, const Array<int> &pivot, \
+ const Array<T> &b, const af_mat_prop options); \
+
+INSTANTIATE_SOLVE(float)
+INSTANTIATE_SOLVE(cfloat)
+INSTANTIATE_SOLVE(double)
+INSTANTIATE_SOLVE(cdouble)
+}
+
#else
namespace cuda
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list