[arrayfire] 112/248: Added CPU fallback for CUDA QR when CUDA older than 7

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Nov 17 15:54:11 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.

commit 37e0658559fca47cac8bcab2cf1991de74b57061
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Thu Oct 8 17:58:07 2015 -0400

    Added CPU fallback for CUDA QR when CUDA older than 7
    
    * Moved triangle function into it's own header file
---
 src/backend/cuda/cholesky.cu                 |  28 +++++++
 src/backend/cuda/cpu_lapack/cpu_cholesky.cpp | 110 +++++++++++++++++++++++++++
 src/backend/cuda/cpu_lapack/cpu_cholesky.hpp |  22 ++++++
 src/backend/cuda/cpu_lapack/cpu_qr.cpp       |  34 +--------
 src/backend/cuda/cpu_lapack/cpu_triangle.hpp |  52 +++++++++++++
 5 files changed, 213 insertions(+), 33 deletions(-)

diff --git a/src/backend/cuda/cholesky.cu b/src/backend/cuda/cholesky.cu
index d785eef..c6869dc 100644
--- a/src/backend/cuda/cholesky.cu
+++ b/src/backend/cuda/cholesky.cu
@@ -148,6 +148,34 @@ INSTANTIATE_CH(double)
 INSTANTIATE_CH(cdouble)
 }
 
+#elif defined(WITH_CPU_LINEAR_ALGEBRA)
+#include <cpu_lapack/cpu_cholesky.hpp>
+namespace cuda
+{
+
+template<typename T>
+Array<T> cholesky(int *info, const Array<T> &in, const bool is_upper)
+{
+    return cpu::cholesky(info, in, is_upper);
+}
+
+template<typename T>
+int cholesky_inplace(Array<T> &in, const bool is_upper)
+{
+    return cpu::cholesky_inplace(in, is_upper);
+}
+
+#define INSTANTIATE_CH(T)                                                                   \
+    template int cholesky_inplace<T>(Array<T> &in, const bool is_upper);                    \
+    template Array<T> cholesky<T>   (int *info, const Array<T> &in, const bool is_upper);
+
+INSTANTIATE_CH(float)
+INSTANTIATE_CH(cfloat)
+INSTANTIATE_CH(double)
+INSTANTIATE_CH(cdouble)
+
+}
+
 #else
 namespace cuda
 {
diff --git a/src/backend/cuda/cpu_lapack/cpu_cholesky.cpp b/src/backend/cuda/cpu_lapack/cpu_cholesky.cpp
new file mode 100644
index 0000000..4fb2644
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_cholesky.cpp
@@ -0,0 +1,110 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <cholesky.hpp>
+#include <err_common.hpp>
+
+#if defined(WITH_CPU_LINEAR_ALGEBRA)
+
+#include <af/dim4.hpp>
+#include <handle.hpp>
+#include <copy.hpp>
+#include <iostream>
+#include <cassert>
+
+#include <cpu_lapack/cpu_triangle.hpp>
+#include "lapack_helper.hpp"
+
+namespace cuda
+{
+namespace cpu
+{
+
+template<typename T>
+using potrf_func_def = int (*)(ORDER_TYPE, char,
+                               int,
+                               T*, int);
+
+#define CH_FUNC_DEF( FUNC )                                     \
+template<typename T> FUNC##_func_def<T> FUNC##_func();
+
+
+#define CH_FUNC( FUNC, TYPE, PREFIX )                           \
+template<> FUNC##_func_def<TYPE>     FUNC##_func<TYPE>()        \
+{ return & LAPACK_NAME(PREFIX##FUNC); }
+
+CH_FUNC_DEF( potrf )
+CH_FUNC(potrf , float  , s)
+CH_FUNC(potrf , double , d)
+CH_FUNC(potrf , cfloat , c)
+CH_FUNC(potrf , cdouble, z)
+
+template<typename T>
+Array<T> cholesky(int *info, const Array<T> &in, const bool is_upper)
+{
+    dim4 iDims = in.dims();
+    int N = iDims[0];
+
+    char uplo = 'L';
+    if(is_upper)
+        uplo = 'U';
+
+    T *inPtr = pinnedAlloc<T>(in.elements());
+    copyData(inPtr, in);
+
+    *info = potrf_func<T>()(AF_LAPACK_COL_MAJOR, uplo,
+                            N, inPtr, in.strides()[1]);
+
+    if (is_upper) triangle<T, true , false>(inPtr, inPtr, in.dims(), in.strides(), in.strides());
+    else          triangle<T, false, false>(inPtr, inPtr, in.dims(), in.strides(), in.strides());
+
+    Array<T> out = createHostDataArray<T>(in.dims(), inPtr);
+
+    pinnedFree(inPtr);
+
+    return out;
+}
+
+template<typename T>
+int cholesky_inplace(Array<T> &in, const bool is_upper)
+{
+    dim4 iDims = in.dims();
+    int N = iDims[0];
+
+    char uplo = 'L';
+    if(is_upper)
+        uplo = 'U';
+
+    T *inPtr = pinnedAlloc<T>(in.elements());
+    copyData(inPtr, in);
+
+    int info = potrf_func<T>()(AF_LAPACK_COL_MAJOR, uplo,
+                               N, inPtr, in.strides()[1]);
+
+    writeHostDataArray<T>(in, inPtr, in.elements() * sizeof(T));
+
+    pinnedFree(inPtr);
+
+    return info;
+}
+
+#define INSTANTIATE_CH(T)                                                                   \
+    template int cholesky_inplace<T>(Array<T> &in, const bool is_upper);                    \
+    template Array<T> cholesky<T>   (int *info, const Array<T> &in, const bool is_upper);   \
+
+
+INSTANTIATE_CH(float)
+INSTANTIATE_CH(cfloat)
+INSTANTIATE_CH(double)
+INSTANTIATE_CH(cdouble)
+
+}
+}
+
+#endif
diff --git a/src/backend/cuda/cpu_lapack/cpu_cholesky.hpp b/src/backend/cuda/cpu_lapack/cpu_cholesky.hpp
new file mode 100644
index 0000000..03f9fa8
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_cholesky.hpp
@@ -0,0 +1,22 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <Array.hpp>
+
+namespace cuda
+{
+namespace cpu
+{
+    template<typename T>
+    Array<T> cholesky(int *info, const Array<T> &in, const bool is_upper);
+
+    template<typename T>
+    int cholesky_inplace(Array<T> &in, const bool is_upper);
+}
+}
diff --git a/src/backend/cuda/cpu_lapack/cpu_qr.cpp b/src/backend/cuda/cpu_lapack/cpu_qr.cpp
index 0fd04b1..22447a2 100644
--- a/src/backend/cuda/cpu_lapack/cpu_qr.cpp
+++ b/src/backend/cuda/cpu_lapack/cpu_qr.cpp
@@ -17,8 +17,8 @@
 #include <copy.hpp>
 #include <iostream>
 #include <cassert>
-#include <triangle.hpp>
 
+#include <cpu_lapack/cpu_triangle.hpp>
 #include "lapack_helper.hpp"
 
 namespace cuda
@@ -63,38 +63,6 @@ GQR_FUNC(gqr , double , dorgqr)
 GQR_FUNC(gqr , cfloat , cungqr)
 GQR_FUNC(gqr , cdouble, zungqr)
 
-template<typename T, bool is_upper, bool is_unit_diag>
-void triangle(T *o, const T *i, const dim4 odm, const dim4 ost, const dim4 ist)
-{
-    for(dim_t ow = 0; ow < odm[3]; ow++) {
-        const dim_t oW = ow * ost[3];
-        const dim_t iW = ow * ist[3];
-
-        for(dim_t oz = 0; oz < odm[2]; oz++) {
-            const dim_t oZW = oW + oz * ost[2];
-            const dim_t iZW = iW + oz * ist[2];
-
-            for(dim_t oy = 0; oy < odm[1]; oy++) {
-                const dim_t oYZW = oZW + oy * ost[1];
-                const dim_t iYZW = iZW + oy * ist[1];
-
-                for(dim_t ox = 0; ox < odm[0]; ox++) {
-                    const dim_t oMem = oYZW + ox;
-                    const dim_t iMem = iYZW + ox;
-
-                    bool cond = is_upper ? (oy >= ox) : (oy <= ox);
-                    bool do_unit_diag = (is_unit_diag && ox == oy);
-                    if(cond) {
-                        o[oMem] = do_unit_diag ? scalar<T>(1) : i[iMem];
-                    } else {
-                        o[oMem] = scalar<T>(0);
-                    }
-                }
-            }
-        }
-    }
-}
-
 template<typename T>
 void qr(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in)
 {
diff --git a/src/backend/cuda/cpu_lapack/cpu_triangle.hpp b/src/backend/cuda/cpu_lapack/cpu_triangle.hpp
new file mode 100644
index 0000000..fb8fea1
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_triangle.hpp
@@ -0,0 +1,52 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#ifndef CPU_LAPACK_TRIANGLE
+#define CPU_LAPACK_TRIANGLE
+namespace cuda
+{
+namespace cpu
+{
+
+template<typename T, bool is_upper, bool is_unit_diag>
+void triangle(T *o, const T *i, const dim4 odm, const dim4 ost, const dim4 ist)
+{
+    for(dim_t ow = 0; ow < odm[3]; ow++) {
+        const dim_t oW = ow * ost[3];
+        const dim_t iW = ow * ist[3];
+
+        for(dim_t oz = 0; oz < odm[2]; oz++) {
+            const dim_t oZW = oW + oz * ost[2];
+            const dim_t iZW = iW + oz * ist[2];
+
+            for(dim_t oy = 0; oy < odm[1]; oy++) {
+                const dim_t oYZW = oZW + oy * ost[1];
+                const dim_t iYZW = iZW + oy * ist[1];
+
+                for(dim_t ox = 0; ox < odm[0]; ox++) {
+                    const dim_t oMem = oYZW + ox;
+                    const dim_t iMem = iYZW + ox;
+
+                    bool cond = is_upper ? (oy >= ox) : (oy <= ox);
+                    bool do_unit_diag = (is_unit_diag && ox == oy);
+                    if(cond) {
+                        o[oMem] = do_unit_diag ? scalar<T>(1) : i[iMem];
+                    } else {
+                        o[oMem] = scalar<T>(0);
+                    }
+                }
+            }
+        }
+    }
+}
+
+}
+}
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list