[arrayfire] 295/408: Cleaning up cpu blas / lapack in OpenCL backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:17 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 20b5f5183c4e4bb28babf4119e5a84b7f48a5bad
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Wed Aug 19 22:54:36 2015 -0400
Cleaning up cpu blas / lapack in OpenCL backend
---
src/api/c/svd.cpp | 6 --
src/backend/cblas.cpp | 74 +++++++++++--------
src/backend/opencl/CMakeLists.txt | 22 +++++-
src/backend/opencl/magma/geqrf2.cpp | 10 +--
src/backend/opencl/magma/geqrf3.cpp | 10 +--
src/backend/opencl/magma/getrf.cpp | 40 +++++------
src/backend/opencl/magma/getrs.cpp | 30 ++++----
src/backend/opencl/magma/larfb.cpp | 31 ++++----
src/backend/opencl/magma/magma_blas.h | 73 ++++++++-----------
src/backend/opencl/magma/magma_cpu_blas.h | 90 +++++++++++++++++++++++
src/backend/opencl/magma/magma_cpu_lapack.h | 107 +++++++++++++---------------
src/backend/opencl/magma/magma_helper.cpp | 22 ++++++
src/backend/opencl/magma/magma_helper.h | 3 +
src/backend/opencl/magma/potrf.cpp | 40 +++++------
src/backend/opencl/magma/ungqr.cpp | 4 +-
src/backend/opencl/magma/unmqr.cpp | 6 +-
src/backend/opencl/magma/unmqr2.cpp | 4 +-
src/backend/opencl/solve.cpp | 14 ++--
18 files changed, 355 insertions(+), 231 deletions(-)
diff --git a/src/api/c/svd.cpp b/src/api/c/svd.cpp
index 31f9aae..fc465dd 100644
--- a/src/api/c/svd.cpp
+++ b/src/api/c/svd.cpp
@@ -12,17 +12,11 @@
#include <af/lapack.h>
#include <af/util.h>
-
#include <af/defines.h>
-
#include <err_common.hpp>
-
#include <backend.hpp>
-
#include <Array.hpp>
-
#include <handle.hpp>
-
#include <svd.hpp>
using namespace detail;
diff --git a/src/backend/cblas.cpp b/src/backend/cblas.cpp
index 1b582c5..5400740 100644
--- a/src/backend/cblas.cpp
+++ b/src/backend/cblas.cpp
@@ -23,34 +23,52 @@ static char transChar(CBLAS_TRANSPOSE Trans)
}
}
-#define GEMM_F77(X, TS, TV, TY) \
-void cblas_##X##gemm( \
- const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, \
- const CBLAS_TRANSPOSE TransB, const int M, const int N, \
- const int K, const TS alpha, const TV *A, \
- const int lda, const TV *B, const int ldb, \
- const TS beta, TV *C, const int ldc) \
-{ \
- char aT = transChar(TransA); \
- char bT = transChar(TransB); \
- X##gemm_(&aT, &bT, &M, &N, &K, \
- (const TY *)ADDR(alpha), (const TY *)A, &lda, \
- (const TY *)B, &ldb, \
- (const TY *)ADDR(beta), (TY *)C, &ldc); \
-} \
-void cblas_##X##gemv( \
- const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, \
- const int M, const int N, \
- const TS alpha, const TV *A, const int lda, \
- const TV *X, const int incX, const TS beta, \
- TV *Y, const int incY) \
-{ \
- char aT = transChar(TransA); \
- X##gemv_(&aT, &M, &N, \
- (const TY *)ADDR(alpha), (const TY *)A, &lda, \
- (const TY *)X, &incX, \
- (const TY *)ADDR(beta), (TY *)Y, &incY); \
-} \
+#define GEMM_F77(X, TS, TV, TY) \
+ void cblas_##X##gemm( \
+ const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, \
+ const CBLAS_TRANSPOSE TransB, const int M, const int N, \
+ const int K, const TS alpha, const TV *A, \
+ const int lda, const TV *B, const int ldb, \
+ const TS beta, TV *C, const int ldc) \
+ { \
+ char aT = transChar(TransA); \
+ char bT = transChar(TransB); \
+ X##gemm_(&aT, &bT, &M, &N, &K, \
+ (const TY *)ADDR(alpha), (const TY *)A, &lda, \
+ (const TY *)B, &ldb, \
+ (const TY *)ADDR(beta), (TY *)C, &ldc); \
+ } \
+ void cblas_##X##gemv( \
+ const CBLAS_ORDER order, const CBLAS_TRANSPOSE TransA, \
+ const int M, const int N, \
+ const TS alpha, const TV *A, const int lda, \
+ const TV *X, const int incX, const TS beta, \
+ TV *Y, const int incY) \
+ { \
+ char aT = transChar(TransA); \
+ X##gemv_(&aT, &M, &N, \
+ (const TY *)ADDR(alpha), (const TY *)A, &lda, \
+ (const TY *)X, &incX, \
+ (const TY *)ADDR(beta), (TY *)Y, &incY); \
+ } \
+ void cblas_##X##axpy( \
+ const int N, const TS alpha, \
+ const TV *X, const int incX, \
+ TV *Y, const int incY) \
+ { \
+ X##axpy_(&N, \
+ (const TY *)ADDR(alpha), \
+ (const TY *)X, &incX, \
+ (TY *)Y, &incY); \
+ } \
+ void cblas_##X##scal( \
+ const int N, const TS alpha, \
+ TV *X, const int incX) \
+ { \
+ X##scal_(&N, \
+ (const TY *)ADDR(alpha), \
+ (TY *)X, &incX); \
+ } \
#define ADDR(val) &val
GEMM_F77(s, float, float, float)
diff --git a/src/backend/opencl/CMakeLists.txt b/src/backend/opencl/CMakeLists.txt
index 3c6bc37..767a292 100644
--- a/src/backend/opencl/CMakeLists.txt
+++ b/src/backend/opencl/CMakeLists.txt
@@ -24,7 +24,22 @@ ENDIF(APPLE)
IF(NOT LAPACK_FOUND)
MESSAGE(WARNING "LAPACK not found. Functionality will be disabled")
ELSE(NOT LAPACK_FOUND)
- ADD_DEFINITIONS(-DWITH_OPENCL_LINEAR_ALGEBRA)
+ ADD_DEFINITIONS(-DWITH_OPENCL_LINEAR_ALGEBRA)
+
+ IF(NOT USE_OPENCL_MKL)
+ FIND_PACKAGE(CBLAS REQUIRED)
+
+ IF(USE_CPU_F77_BLAS)
+ MESSAGE("Using F77 BLAS")
+ ADD_DEFINITIONS(-DUSE_F77_BLAS)
+ ENDIF()
+
+ IF (NOT CBLAS_LIBRARIES)
+ MESSAGE(SEND_ERROR "CBLAS Library not set")
+ ELSE()
+ MESSAGE(STATUS "Using CBLAS Library: ${CBLAS_LIBRARIES}")
+ ENDIF()
+ ENDIF()
ENDIF()
IF(NOT UNIX)
@@ -75,6 +90,7 @@ INCLUDE_DIRECTORIES(
${CLFFT_INCLUDE_DIRS}
${Boost_INCLUDE_DIR}
${BoostCompute_INCLUDE_DIRS}
+ ${CBLAS_INCLUDE_DIR}
${LAPACK_INCLUDE_DIR}
)
@@ -243,7 +259,9 @@ IF(FORGE_FOUND AND NOT USE_SYSTEM_FORGE)
ENDIF()
IF(LAPACK_FOUND)
- TARGET_LINK_LIBRARIES(afopencl PRIVATE ${LAPACK_LIBRARIES})
+ TARGET_LINK_LIBRARIES(afopencl
+ PRIVATE ${LAPACK_LIBRARIES}
+ PRIVATE ${CBLAS_LIBRARIES})
ENDIF()
SET_TARGET_PROPERTIES(afopencl PROPERTIES
diff --git a/src/backend/opencl/magma/geqrf2.cpp b/src/backend/opencl/magma/geqrf2.cpp
index 3784eda..4041976 100644
--- a/src/backend/opencl/magma/geqrf2.cpp
+++ b/src/backend/opencl/magma/geqrf2.cpp
@@ -245,8 +245,8 @@ magma_geqrf2_gpu(
0, lwork*sizeof(Ty),
0, NULL, NULL, NULL);
- geqrf_work_func<Ty> cpu_geqrf;
- larft_func<Ty> cpu_larft;
+ cpu_geqrf_work_func<Ty> cpu_geqrf;
+ cpu_larft_func<Ty> cpu_larft;
nbmin = 2;
nx = nb;
@@ -275,11 +275,11 @@ magma_geqrf2_gpu(
}
magma_queue_sync(queue[0]);
- *info = cpu_geqrf(LAPACK_COL_MAJOR, rows, ib, work(i), ldwork, tau+i, hwork, lhwork);
+ *info = cpu_geqrf( rows, ib, work(i), ldwork, tau+i, hwork, lhwork);
/* Form the triangular factor of the block reflector
H = H(i) H(i+1) . . . H(i+ib-1) */
- cpu_larft(LAPACK_COL_MAJOR,
+ cpu_larft(
*MagmaForwardStr, *MagmaColumnwiseStr,
rows, ib,
work(i), ldwork, tau+i, hwork, ib);
@@ -329,7 +329,7 @@ magma_geqrf2_gpu(
magma_queue_sync(queue[1]);
lhwork = lwork - rows*ib;
- *info = cpu_geqrf(LAPACK_COL_MAJOR, rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
+ *info = cpu_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
magma_setmatrix_async<Ty>(rows, ib, work, rows, dA(i, i), ldda, queue[1], NULL);
}
diff --git a/src/backend/opencl/magma/geqrf3.cpp b/src/backend/opencl/magma/geqrf3.cpp
index ce7a1c9..192bd45 100644
--- a/src/backend/opencl/magma/geqrf3.cpp
+++ b/src/backend/opencl/magma/geqrf3.cpp
@@ -217,8 +217,8 @@ magma_geqrf3_gpu(
ldwork = m;
lddwork= n;
- geqrf_work_func<Ty> cpu_geqrf;
- larft_func<Ty> cpu_larft;
+ cpu_geqrf_work_func<Ty> cpu_geqrf;
+ cpu_larft_func<Ty> cpu_larft;
if ( (nb > 1) && (nb < k) ) {
/* Use blocked code initially */
@@ -244,11 +244,11 @@ magma_geqrf3_gpu(
}
magma_event_sync(event[1]);
- *info = cpu_geqrf(LAPACK_COL_MAJOR, rows, ib, work_ref(i), ldwork, tau+i, hwork, lhwork);
+ *info = cpu_geqrf( rows, ib, work_ref(i), ldwork, tau+i, hwork, lhwork);
/* Form the triangular factor of the block reflector
H = H(i) H(i+1) . . . H(i+ib-1) */
- cpu_larft(LAPACK_COL_MAJOR,
+ cpu_larft(
*MagmaForwardStr, *MagmaColumnwiseStr,
rows, ib,
work_ref(i), ldwork,
@@ -296,7 +296,7 @@ magma_geqrf3_gpu(
magma_getmatrix<Ty>( rows, ib, a_ref(i, i), ldda, work, rows, queue );
lhwork = lwork - rows*ib;
- *info = cpu_geqrf(LAPACK_COL_MAJOR, rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
+ *info = cpu_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
magma_setmatrix<Ty>( rows, ib, work, rows, a_ref(i, i), ldda, queue );
}
diff --git a/src/backend/opencl/magma/getrf.cpp b/src/backend/opencl/magma/getrf.cpp
index a79bd7c..b398afd 100644
--- a/src/backend/opencl/magma/getrf.cpp
+++ b/src/backend/opencl/magma/getrf.cpp
@@ -31,22 +31,22 @@
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * * Neither the name of the University of Tennessee, Knoxville nor the
+ * * Neither the name of the University of Tennessee, Knoxville nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**********************************************************************/
@@ -149,9 +149,9 @@ magma_int_t magma_getrf_gpu(
if (m == 0 || n == 0)
return *info;
- gemm_func<Ty> gpu_gemm;
- trsm_func<Ty> gpu_trsm;
- getrf_func<Ty> cpu_getrf;
+ gpu_gemm_func<Ty> gpu_gemm;
+ gpu_trsm_func<Ty> gpu_trsm;
+ cpu_getrf_func<Ty> cpu_getrf;
/* Function Body */
mindim = std::min(m, n);
@@ -165,7 +165,7 @@ magma_int_t magma_getrf_gpu(
return *info;
}
magma_getmatrix<Ty>(m, n, dA(0,0), ldda, work(0), m, queue);
- cpu_getrf(LAPACK_COL_MAJOR, m, n, work, m, ipiv);
+ cpu_getrf( m, n, work, m, ipiv);
magma_setmatrix<Ty>(m, n, work(0), m, dA(0,0), ldda, queue);
magma_free_cpu(work);
}
@@ -219,7 +219,7 @@ magma_int_t magma_getrf_gpu(
magma_getmatrix<Ty>(m-j*nb, nb, dAP(0,0), maxm, work(0), ldwork, queue);
if (j > 0 && n > (j + 1) * nb) {
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
n - (j+1)*nb, nb,
c_one,
@@ -228,7 +228,7 @@ magma_int_t magma_getrf_gpu(
1, &queue, 0, nullptr, &event);
if (m > j * nb) {
- gpu_gemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans,
+ gpu_gemm( clblasNoTrans, clblasNoTrans,
n-(j+1)*nb, m-j*nb, nb,
c_neg_one,
dAT(j-1,j+1), lddat,
@@ -241,7 +241,7 @@ magma_int_t magma_getrf_gpu(
// do the cpu part
rows = m - j*nb;
- cpu_getrf(LAPACK_COL_MAJOR, rows, nb, work, ldwork, ipiv+j*nb);
+ cpu_getrf( rows, nb, work, ldwork, ipiv+j*nb);
if (*info == 0 && iinfo > 0)
*info = iinfo + j*nb;
@@ -257,7 +257,7 @@ magma_int_t magma_getrf_gpu(
// do the small non-parallel computations (next panel update)
if (s > (j+1)) {
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
nb, nb,
c_one,
@@ -266,7 +266,7 @@ magma_int_t magma_getrf_gpu(
1, &queue, 0, nullptr, &event);
- gpu_gemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans,
+ gpu_gemm( clblasNoTrans, clblasNoTrans,
nb, m-(j+1)*nb, nb,
c_neg_one,
dAT(j, j+1), lddat,
@@ -277,7 +277,7 @@ magma_int_t magma_getrf_gpu(
}
else {
if (n > s * nb) {
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
n-s*nb, nb,
c_one,
@@ -287,7 +287,7 @@ magma_int_t magma_getrf_gpu(
}
if ((n > (j+1) * nb) && (m > (j+1) * nb)) {
- gpu_gemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans,
+ gpu_gemm( clblasNoTrans, clblasNoTrans,
n-(j+1)*nb, m-(j+1)*nb, nb,
c_neg_one,
dAT(j, j+1), lddat,
@@ -308,7 +308,7 @@ magma_int_t magma_getrf_gpu(
magma_getmatrix<Ty>(rows, nb0, dAP(0,0), maxm, work(0), ldwork, queue);
// do the cpu part
- cpu_getrf(LAPACK_COL_MAJOR, rows, nb0, work, ldwork, ipiv+s*nb);
+ cpu_getrf( rows, nb0, work, ldwork, ipiv+s*nb);
if (*info == 0 && iinfo > 0)
*info = iinfo + s*nb;
@@ -322,7 +322,7 @@ magma_int_t magma_getrf_gpu(
magmablas_transpose<Ty>(rows, nb0, dAP(0,0), maxm, dAT(s,s), lddat, queue);
if (n > s * nb + nb0) {
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
n-s*nb-nb0, nb0,
c_one, dAT(s,s), lddat,
diff --git a/src/backend/opencl/magma/getrs.cpp b/src/backend/opencl/magma/getrs.cpp
index 3b83179..6ad943b 100644
--- a/src/backend/opencl/magma/getrs.cpp
+++ b/src/backend/opencl/magma/getrs.cpp
@@ -159,9 +159,9 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
i1 = 1;
i2 = n;
- laswp_func<Ty> cpu_laswp;
- trsm_func<Ty> gpu_trsm;
- trsv_func<Ty> gpu_trsv;
+ cpu_laswp_func<Ty> cpu_laswp;
+ gpu_trsm_func<Ty> gpu_trsm;
+ gpu_trsv_func<Ty> gpu_trsv;
cl_event event = NULL;
@@ -180,18 +180,18 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
/* Solve A * X = B. */
magma_getmatrix<Ty>( n, nrhs, dB, dB_offset, lddb, work, n, queue );
- cpu_laswp(LAPACK_COL_MAJOR, nrhs, work, n, i1, i2, ipiv, inc);
+ cpu_laswp( nrhs, work, n, i1, i2, ipiv, inc);
magma_setmatrix<Ty>( n, nrhs, work, n, dB, dB_offset, lddb, queue );
if ( nrhs == 1) {
- gpu_trsv(clblasColumnMajor, clblasLower, clblasNoTrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
- gpu_trsv(clblasColumnMajor, clblasUpper, clblasNoTrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
+ gpu_trsv( clblasLower, clblasNoTrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
+ gpu_trsv( clblasUpper, clblasNoTrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
} else {
- gpu_trsm(clblasColumnMajor, clblasLeft, clblasLower, clblasNoTrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ gpu_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
if(cond) {
- gpu_trsm(clblasColumnMajor, clblasLeft, clblasLower, clblasTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ gpu_trsm( clblasLeft, clblasLower, clblasTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
} else {
- gpu_trsm(clblasColumnMajor, clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ gpu_trsm( clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
}
}
} else {
@@ -199,18 +199,18 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
/* Solve A' * X = B. */
if ( nrhs == 1) {
- gpu_trsv(clblasColumnMajor, clblasUpper, cltrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
- gpu_trsv(clblasColumnMajor, clblasLower, cltrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
+ gpu_trsv( clblasUpper, cltrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
+ gpu_trsv( clblasLower, cltrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
} else {
if(cond) {
- gpu_trsm(clblasColumnMajor, clblasLeft, clblasLower, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ gpu_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
} else {
- gpu_trsm(clblasColumnMajor, clblasLeft, clblasUpper, cltrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ gpu_trsm( clblasLeft, clblasUpper, cltrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
}
- gpu_trsm(clblasColumnMajor, clblasLeft, clblasLower, cltrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ gpu_trsm( clblasLeft, clblasLower, cltrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
}
magma_getmatrix<Ty>( n, nrhs, dB, dB_offset, lddb, work, n, queue );
- cpu_laswp(LAPACK_COL_MAJOR, nrhs, work, n, i1, i2, ipiv, inc);
+ cpu_laswp( nrhs, work, n, i1, i2, ipiv, inc);
magma_setmatrix<Ty>( n, nrhs, work, n, dB, dB_offset, lddb, queue );
}
diff --git a/src/backend/opencl/magma/larfb.cpp b/src/backend/opencl/magma/larfb.cpp
index 747e16a..5b188f4 100644
--- a/src/backend/opencl/magma/larfb.cpp
+++ b/src/backend/opencl/magma/larfb.cpp
@@ -33,22 +33,22 @@
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * * Neither the name of the University of Tennessee, Knoxville nor the
+ * * Neither the name of the University of Tennessee, Knoxville nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**********************************************************************/
@@ -56,7 +56,6 @@
#include "magma.h"
#include "magma_blas.h"
#include "magma_data.h"
-#include "magma_cpu_lapack.h"
#include "magma_helper.h"
#include "magma_sync.h"
@@ -255,8 +254,8 @@ magma_larfb_gpu(
transV = clblasNoTrans;
}
- gemm_func<Ty> gpu_gemm;
- trmm_func<Ty> gpu_trmm;
+ gpu_gemm_func<Ty> gpu_gemm;
+ gpu_trmm_func<Ty> gpu_trmm;
cl_event event = NULL;
@@ -265,7 +264,7 @@ magma_larfb_gpu(
// Comments assume H C. When forming H^H C, T gets transposed via transt.
// W = C^H V
- gpu_gemm(clblasColumnMajor,
+ gpu_gemm(
transType, notransV,
n, k, m,
c_one,
@@ -276,7 +275,7 @@ magma_larfb_gpu(
1, &queue, 0, nullptr, &event);
// W = W T^H = C^H V T^H
- gpu_trmm(clblasColumnMajor,
+ gpu_trmm(
clblasRight,
uplo, transt, clblasNonUnit,
n, k,
@@ -286,7 +285,7 @@ magma_larfb_gpu(
1, &queue, 0, nullptr, &event);
// C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C
- gpu_gemm(clblasColumnMajor,
+ gpu_gemm(
notransV, transType,
m, n, k,
c_neg_one,
@@ -301,7 +300,7 @@ magma_larfb_gpu(
// Comments assume C H. When forming C H^H, T gets transposed via trans.
// W = C V
- gpu_gemm(clblasColumnMajor,
+ gpu_gemm(
clblasNoTrans, notransV,
m, k, n,
c_one,
@@ -312,7 +311,7 @@ magma_larfb_gpu(
1, &queue, 0, nullptr, &event);
// W = W T = C V T
- gpu_trmm(clblasColumnMajor,
+ gpu_trmm(
clblasRight, uplo,
cltrans,
clblasNonUnit,
@@ -323,7 +322,7 @@ magma_larfb_gpu(
1, &queue, 0, nullptr, &event);
// C = C - W V^H = C - C V T V^H = C (I - V T V^H) = C H
- gpu_gemm(clblasColumnMajor,
+ gpu_gemm(
clblasNoTrans, transV,
m, n, k,
c_neg_one,
diff --git a/src/backend/opencl/magma/magma_blas.h b/src/backend/opencl/magma/magma_blas.h
index 8314bb7..44ebc03 100644
--- a/src/backend/opencl/magma/magma_blas.h
+++ b/src/backend/opencl/magma/magma_blas.h
@@ -19,53 +19,38 @@
using opencl::cfloat;
using opencl::cdouble;
+#define clblasSherk(...) clblasSsyrk(__VA_ARGS__)
+#define clblasDherk(...) clblasDsyrk(__VA_ARGS__)
+
#define BLAS_FUNC_DEF(NAME) \
template<typename T> \
- struct NAME##_func;
-
-#define BLAS_FUNC(NAME, TYPE, PREFIX) \
- template<> \
- struct NAME##_func<TYPE> \
- { \
- template<typename... Args> \
- void \
- operator() (Args... args) \
- { \
- CLBLAS_CHECK(clblas##PREFIX##NAME(args...)); \
- } \
+ struct gpu_##NAME##_func;
+
+#define BLAS_FUNC(NAME, TYPE, PREFIX) \
+ template<> \
+ struct gpu_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ void \
+ operator() (Args... args) \
+ { \
+ CLBLAS_CHECK(clblas##PREFIX##NAME(clblasColumnMajor, \
+ args...)); \
+ } \
};
-BLAS_FUNC_DEF(gemm)
-BLAS_FUNC(gemm, float, S)
-BLAS_FUNC(gemm, double, D)
-BLAS_FUNC(gemm, cfloat, C)
-BLAS_FUNC(gemm, cdouble, Z)
-
-BLAS_FUNC_DEF(trmm)
-BLAS_FUNC(trmm, float, S)
-BLAS_FUNC(trmm, double, D)
-BLAS_FUNC(trmm, cfloat, C)
-BLAS_FUNC(trmm, cdouble, Z)
-
-BLAS_FUNC_DEF(trsm)
-BLAS_FUNC(trsm, float, S)
-BLAS_FUNC(trsm, double, D)
-BLAS_FUNC(trsm, cfloat, C)
-BLAS_FUNC(trsm, cdouble, Z)
-
-BLAS_FUNC_DEF(trsv)
-BLAS_FUNC(trsv, float, S)
-BLAS_FUNC(trsv, double, D)
-BLAS_FUNC(trsv, cfloat, C)
-BLAS_FUNC(trsv, cdouble, Z)
-
-#define clblasSherk(...) clblasSsyrk(__VA_ARGS__)
-#define clblasDherk(...) clblasDsyrk(__VA_ARGS__)
-
-BLAS_FUNC_DEF(herk)
-BLAS_FUNC(herk, float, S)
-BLAS_FUNC(herk, double, D)
-BLAS_FUNC(herk, cfloat, C)
-BLAS_FUNC(herk, cdouble, Z)
+#define BLAS_FUNC_DECL(NAME) \
+ BLAS_FUNC_DEF(NAME) \
+ BLAS_FUNC(NAME, float, S) \
+ BLAS_FUNC(NAME, double, D) \
+ BLAS_FUNC(NAME, cfloat, C) \
+ BLAS_FUNC(NAME, cdouble, Z) \
+
+BLAS_FUNC_DECL(gemm)
+BLAS_FUNC_DECL(gemv)
+BLAS_FUNC_DECL(trmm)
+BLAS_FUNC_DECL(trsm)
+BLAS_FUNC_DECL(trsv)
+BLAS_FUNC_DECL(herk)
#endif
diff --git a/src/backend/opencl/magma/magma_cpu_blas.h b/src/backend/opencl/magma/magma_cpu_blas.h
new file mode 100644
index 0000000..f5df93d
--- /dev/null
+++ b/src/backend/opencl/magma/magma_cpu_blas.h
@@ -0,0 +1,90 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#ifndef MAGMA_CPU_BLAS
+#define MAGMA_CPU_BLAS
+
+#include "magma_types.h"
+
+#ifdef __APPLE__
+#include <Accelerate/Accelerate.h>
+#else
+#ifdef USE_MKL
+#include <mkl_cblas.h>
+#else
+extern "C" {
+#include <cblas.h>
+}
+#endif
+#endif
+
+// TODO: Ask upstream for a more official way to detect it
+#ifdef OPENBLAS_CONST
+#define IS_OPENBLAS
+#endif
+
+// Make sure we get the correct type signature for OpenBLAS
+// OpenBLAS defines blasint as it's index type. Emulate this
+// if we're not dealing with openblas and use it where applicable
+#ifndef IS_OPENBLAS
+typedef int blasint;
+#endif
+
+#define CPU_BLAS_FUNC_DEF(NAME) \
+ template<typename T> \
+ struct cpu_##NAME##_func;
+
+#define CPU_BLAS_FUNC1(NAME, TYPE, X) \
+ template<> \
+ struct cpu_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ void \
+ operator() (Args... args) \
+ { return cblas_##X##NAME(CblasColMajor, args...); } \
+ };
+
+#define CPU_BLAS_FUNC2(NAME, TYPE, X) \
+ template<> \
+ struct cpu_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ void \
+ operator() (Args... args) \
+ { return cblas_##X##NAME(args...); } \
+ };
+
+#define CPU_BLAS_DECL1(NAME) \
+ CPU_BLAS_FUNC_DEF(NAME) \
+ CPU_BLAS_FUNC1(NAME, float, s) \
+ CPU_BLAS_FUNC1(NAME, double, d) \
+ CPU_BLAS_FUNC1(NAME, magmaFloatComplex, c) \
+ CPU_BLAS_FUNC1(NAME, magmaDoubleComplex, z) \
+
+#define CPU_BLAS_DECL2(NAME) \
+ CPU_BLAS_FUNC_DEF(NAME) \
+ CPU_BLAS_FUNC2(NAME, float, s) \
+ CPU_BLAS_FUNC2(NAME, double, d) \
+ CPU_BLAS_FUNC2(NAME, magmaFloatComplex, c) \
+ CPU_BLAS_FUNC2(NAME, magmaDoubleComplex, z) \
+
+CPU_BLAS_DECL1(gemv)
+CPU_BLAS_DECL2(scal)
+CPU_BLAS_DECL2(axpy)
+
+inline float * cblas_ptr(float *in) { return in; }
+inline double * cblas_ptr(double *in) { return in; }
+inline void * cblas_ptr(magmaFloatComplex *in) { return (void *)in; }
+inline void * cblas_ptr(magmaDoubleComplex *in) { return (void *)in; }
+
+inline float cblas_scalar(float *in) { return *in; }
+inline double cblas_scalar(double *in) { return *in; }
+inline void *cblas_scalar(magmaFloatComplex *in) { return (void *)in; }
+inline void *cblas_scalar(magmaDoubleComplex *in) { return (void *)in; }
+#endif
diff --git a/src/backend/opencl/magma/magma_cpu_lapack.h b/src/backend/opencl/magma/magma_cpu_lapack.h
index 4410e1b..d406508 100644
--- a/src/backend/opencl/magma/magma_cpu_lapack.h
+++ b/src/backend/opencl/magma/magma_cpu_lapack.h
@@ -17,6 +17,12 @@
#define LAPACKE_sungqr_work(...) LAPACKE_sorgqr_work(__VA_ARGS__)
#define LAPACKE_dungqr_work(...) LAPACKE_dorgqr_work(__VA_ARGS__)
+template<typename... Args>
+int LAPACKE_slacgv(Args... args) { return 0; }
+
+template<typename... Args>
+int LAPACKE_dlacgv(Args... args) { return 0; }
+
#define lapack_complex_float magmaFloatComplex
#define lapack_complex_double magmaDoubleComplex
#define LAPACK_PREFIX LAPACKE_
@@ -36,64 +42,53 @@
#define CPU_LAPACK_FUNC_DEF(NAME) \
template<typename T> \
- struct NAME##_func;
-
-#define CPU_LAPACK_FUNC(NAME, TYPE, X) \
- template<> \
- struct NAME##_func<TYPE> \
- { \
- template<typename... Args> \
- int \
- operator() (Args... args) \
- { return LAPACK_NAME(X##NAME)(args...); } \
+ struct cpu_##NAME##_func;
+
+#define CPU_LAPACK_FUNC1(NAME, TYPE, X) \
+ template<> \
+ struct cpu_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ int \
+ operator() (Args... args) \
+ { return LAPACK_NAME(X##NAME)(LAPACK_COL_MAJOR, args...); } \
};
-CPU_LAPACK_FUNC_DEF(getrf)
-CPU_LAPACK_FUNC(getrf, float, s)
-CPU_LAPACK_FUNC(getrf, double, d)
-CPU_LAPACK_FUNC(getrf, magmaFloatComplex, c)
-CPU_LAPACK_FUNC(getrf, magmaDoubleComplex, z)
-
-CPU_LAPACK_FUNC_DEF(potrf)
-CPU_LAPACK_FUNC(potrf, float, s)
-CPU_LAPACK_FUNC(potrf, double, d)
-CPU_LAPACK_FUNC(potrf, magmaFloatComplex, c)
-CPU_LAPACK_FUNC(potrf, magmaDoubleComplex, z)
-
-CPU_LAPACK_FUNC_DEF(trtri)
-CPU_LAPACK_FUNC(trtri, float, s)
-CPU_LAPACK_FUNC(trtri, double, d)
-CPU_LAPACK_FUNC(trtri, magmaFloatComplex, c)
-CPU_LAPACK_FUNC(trtri, magmaDoubleComplex, z)
-
-CPU_LAPACK_FUNC_DEF(geqrf_work)
-CPU_LAPACK_FUNC(geqrf_work, float, s)
-CPU_LAPACK_FUNC(geqrf_work, double, d)
-CPU_LAPACK_FUNC(geqrf_work, magmaFloatComplex, c)
-CPU_LAPACK_FUNC(geqrf_work, magmaDoubleComplex, z)
-
-CPU_LAPACK_FUNC_DEF(larft)
-CPU_LAPACK_FUNC(larft, float, s)
-CPU_LAPACK_FUNC(larft, double, d)
-CPU_LAPACK_FUNC(larft, magmaFloatComplex, c)
-CPU_LAPACK_FUNC(larft, magmaDoubleComplex, z)
-
-CPU_LAPACK_FUNC_DEF(unmqr_work)
-CPU_LAPACK_FUNC(unmqr_work, float, s)
-CPU_LAPACK_FUNC(unmqr_work, double, d)
-CPU_LAPACK_FUNC(unmqr_work, magmaFloatComplex, c)
-CPU_LAPACK_FUNC(unmqr_work, magmaDoubleComplex, z)
-
-CPU_LAPACK_FUNC_DEF(ungqr_work)
-CPU_LAPACK_FUNC(ungqr_work, float, s)
-CPU_LAPACK_FUNC(ungqr_work, double, d)
-CPU_LAPACK_FUNC(ungqr_work, magmaFloatComplex, c)
-CPU_LAPACK_FUNC(ungqr_work, magmaDoubleComplex, z)
+#define CPU_LAPACK_FUNC2(NAME, TYPE, X) \
+ template<> \
+ struct cpu_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ int \
+ operator() (Args... args) \
+ { return LAPACK_NAME(X##NAME)(args...); } \
+ };
-CPU_LAPACK_FUNC_DEF(laswp)
-CPU_LAPACK_FUNC(laswp, float, s)
-CPU_LAPACK_FUNC(laswp, double, d)
-CPU_LAPACK_FUNC(laswp, magmaFloatComplex, c)
-CPU_LAPACK_FUNC(laswp, magmaDoubleComplex, z)
+#define CPU_LAPACK_DECL1(NAME) \
+ CPU_LAPACK_FUNC_DEF(NAME) \
+ CPU_LAPACK_FUNC1(NAME, float, s) \
+ CPU_LAPACK_FUNC1(NAME, double, d) \
+ CPU_LAPACK_FUNC1(NAME, magmaFloatComplex, c) \
+ CPU_LAPACK_FUNC1(NAME, magmaDoubleComplex, z) \
+
+#define CPU_LAPACK_DECL2(NAME) \
+ CPU_LAPACK_FUNC_DEF(NAME) \
+ CPU_LAPACK_FUNC2(NAME, float, s) \
+ CPU_LAPACK_FUNC2(NAME, double, d) \
+ CPU_LAPACK_FUNC2(NAME, magmaFloatComplex, c) \
+ CPU_LAPACK_FUNC2(NAME, magmaDoubleComplex, z) \
+
+CPU_LAPACK_DECL1(getrf)
+CPU_LAPACK_DECL1(gebrd)
+CPU_LAPACK_DECL1(potrf)
+CPU_LAPACK_DECL1(trtri)
+CPU_LAPACK_DECL1(geqrf_work)
+CPU_LAPACK_DECL1(larft)
+CPU_LAPACK_DECL1(unmqr_work)
+CPU_LAPACK_DECL1(ungqr_work)
+CPU_LAPACK_DECL1(laswp)
+CPU_LAPACK_DECL1(laset)
+CPU_LAPACK_DECL2(lacgv)
+CPU_LAPACK_DECL2(larfg)
#endif
diff --git a/src/backend/opencl/magma/magma_helper.cpp b/src/backend/opencl/magma/magma_helper.cpp
index b38045a..a3a3d9e 100644
--- a/src/backend/opencl/magma/magma_helper.cpp
+++ b/src/backend/opencl/magma/magma_helper.cpp
@@ -43,6 +43,12 @@ template<typename T> T magma_scalar(double val) { return (T)val; }
template float magma_scalar<float>(double val);
template double magma_scalar<double>(double val);
+template<typename T> double magma_real(T val) { return (double)val; }
+template double magma_real<float>(float val);
+template double magma_real<double>(double val);
+template<> double magma_real<magmaFloatComplex>(magmaFloatComplex val) { return (double)val.s[0]; }
+template<> double magma_real<magmaDoubleComplex>(magmaDoubleComplex val) { return (double)val.s[0]; }
+
#define INSTANTIATE_CPLX_SCALAR(T) \
template<> T magma_scalar<T>(double val) \
{ \
@@ -152,3 +158,19 @@ magma_int_t magma_get_geqrf_nb<magmaDoubleComplex>( magma_int_t m )
else if (m <= 4032) return 64;
else return 128;
}
+
+template<typename T> magma_int_t magma_get_gebrd_nb(int num) { return 256; }
+
+template<typename T> T magma_make(double r, double i) { return (T) r; }
+template float magma_make<float>(double r, double i);
+template double magma_make<double>(double r, double i);
+template<> magmaFloatComplex magma_make<magmaFloatComplex>(double r, double i)
+{
+ magmaFloatComplex tmp = {r, i};
+ return tmp;
+}
+template<> magmaDoubleComplex magma_make<magmaDoubleComplex>(double r, double i)
+{
+ magmaDoubleComplex tmp = {r, i};
+ return tmp;
+}
diff --git a/src/backend/opencl/magma/magma_helper.h b/src/backend/opencl/magma/magma_helper.h
index 32fd065..a63c8d1 100644
--- a/src/backend/opencl/magma/magma_helper.h
+++ b/src/backend/opencl/magma/magma_helper.h
@@ -14,11 +14,14 @@ template<typename T> T magma_zero();
template<typename T> T magma_one();
template<typename T> T magma_neg_one();
template<typename T> T magma_scalar(double val);
+template<typename T> double magma_real(T val);
+template<typename T> T magma_make(double r, double i);
template<typename T> bool magma_is_real();
template<typename T> magma_int_t magma_get_getrf_nb(int num);
template<typename T> magma_int_t magma_get_potrf_nb(int num);
template<typename T> magma_int_t magma_get_geqrf_nb(int num);
+template<typename T> magma_int_t magma_get_gebrd_nb(int num);
#endif
diff --git a/src/backend/opencl/magma/potrf.cpp b/src/backend/opencl/magma/potrf.cpp
index f09cd22..ddc4f46 100644
--- a/src/backend/opencl/magma/potrf.cpp
+++ b/src/backend/opencl/magma/potrf.cpp
@@ -31,22 +31,22 @@
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * * Neither the name of the University of Tennessee, Knoxville nor the
+ * * Neither the name of the University of Tennessee, Knoxville nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**********************************************************************/
@@ -151,10 +151,10 @@ magma_int_t magma_potrf_gpu(
nb = magma_get_potrf_nb<Ty>(n);
- gemm_func<Ty> gpu_gemm;
- trsm_func<Ty> gpu_trsm;
- herk_func<Ty> gpu_herk;
- potrf_func<Ty> cpu_potrf;
+ gpu_gemm_func<Ty> gpu_gemm;
+ gpu_trsm_func<Ty> gpu_trsm;
+ gpu_herk_func<Ty> gpu_herk;
+ cpu_potrf_func<Ty> cpu_potrf;
err = magma_malloc_cpu<Ty>( &work, nb*nb);
@@ -170,7 +170,7 @@ magma_int_t magma_potrf_gpu(
// use unblocked code
magma_getmatrix<Ty>(n, n, dA, dA_offset, ldda, work, n, queue);
- cpu_potrf(LAPACK_COL_MAJOR,
+ cpu_potrf(
uplo == MagmaUpper ? *MagmaUpperStr : *MagmaLowerStr,
n, work, n);
@@ -185,7 +185,7 @@ magma_int_t magma_potrf_gpu(
// apply all previous updates to diagonal block
jb = std::min(nb, n-j);
if (j > 0) {
- gpu_herk(clblasColumnMajor,
+ gpu_herk(
clblasUpper, transType,
jb, j,
m_one,
@@ -200,7 +200,7 @@ magma_int_t magma_potrf_gpu(
// apply all previous updates to block row right of diagonal block
if (j+jb < n) {
- gpu_gemm(clblasColumnMajor,
+ gpu_gemm(
transType, clblasNoTrans,
jb, n-j-jb, j,
mz_one,
@@ -214,7 +214,7 @@ magma_int_t magma_potrf_gpu(
// simultaneous with above zgemm, transfer data, factor
// diagonal block on CPU, and test for positive definiteness
magma_event_sync(event);
- *info =cpu_potrf(LAPACK_COL_MAJOR, *MagmaUpperStr, jb, work, jb);
+ *info =cpu_potrf( *MagmaUpperStr, jb, work, jb);
if (*info != 0) {
assert(*info > 0);
@@ -227,7 +227,7 @@ magma_int_t magma_potrf_gpu(
// apply diagonal block to block row right of diagonal block
if (j+jb < n) {
magma_event_sync(event);
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasLeft, clblasUpper,
transType, clblasNonUnit,
jb, n-j-jb,
@@ -246,7 +246,7 @@ magma_int_t magma_potrf_gpu(
// apply all previous updates to diagonal block
jb = std::min(nb, n-j);
if (j>0) {
- gpu_herk(clblasColumnMajor,
+ gpu_herk(
clblasLower, clblasNoTrans, jb, j,
m_one,
dA(j, 0), ldda,
@@ -260,7 +260,7 @@ magma_int_t magma_potrf_gpu(
// apply all previous updates to block column below diagonal block
if (j+jb < n) {
- gpu_gemm(clblasColumnMajor,
+ gpu_gemm(
clblasNoTrans, transType,
n-j-jb, jb, j,
mz_one,
@@ -274,7 +274,7 @@ magma_int_t magma_potrf_gpu(
// simultaneous with above zgemm, transfer data, factor
// diagonal block on CPU, and test for positive definiteness
magma_event_sync(event);
- *info = cpu_potrf(LAPACK_COL_MAJOR,
+ *info = cpu_potrf(
*MagmaLowerStr, jb, work, jb);
if (*info != 0) {
assert(*info > 0);
@@ -286,7 +286,7 @@ magma_int_t magma_potrf_gpu(
// apply diagonal block to block column below diagonal
if (j+jb < n) {
magma_event_sync(event);
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasRight, clblasLower, transType, clblasNonUnit,
n-j-jb, jb,
z_one,
diff --git a/src/backend/opencl/magma/ungqr.cpp b/src/backend/opencl/magma/ungqr.cpp
index c0dd47b..49a120a 100644
--- a/src/backend/opencl/magma/ungqr.cpp
+++ b/src/backend/opencl/magma/ungqr.cpp
@@ -137,7 +137,7 @@ magma_ungqr_gpu(
cl_mem dW;
magma_malloc<Ty>(&dW, (((n+31)/32)*32)*nb);
- ungqr_work_func<Ty> cpu_ungqr;
+ cpu_ungqr_work_func<Ty> cpu_ungqr;
// Use unblocked code for the last or only block.
if (kk < n) {
@@ -147,7 +147,7 @@ magma_ungqr_gpu(
magma_getmatrix<Ty>(m_kk, k_kk,
dA(kk, kk), ldda, panel, m_kk, queue);
- cpu_ungqr(LAPACK_COL_MAJOR,
+ cpu_ungqr(
m_kk, n_kk, k_kk,
panel, m_kk,
&tau[kk], work, lwork);
diff --git a/src/backend/opencl/magma/unmqr.cpp b/src/backend/opencl/magma/unmqr.cpp
index b740a87..ed69e51 100644
--- a/src/backend/opencl/magma/unmqr.cpp
+++ b/src/backend/opencl/magma/unmqr.cpp
@@ -227,7 +227,7 @@ magma_unmqr_gpu(
magma_malloc<Ty>(&dwork, (((n+31)/32)*32)*nb);
- unmqr_work_func<Ty> cpu_unmqr;
+ cpu_unmqr_work_func<Ty> cpu_unmqr;
if ( (left && (! notran)) || ( (!left) && notran ) ) {
i1 = 0;
@@ -283,7 +283,7 @@ magma_unmqr_gpu(
magma_getmatrix<Ty>(ma, ib, a_ref(i, i ), ldda, hA, ma, queue);
magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue);
- *info = cpu_unmqr(LAPACK_COL_MAJOR,
+ *info = cpu_unmqr(
side == MagmaRight ? 'R' : 'L',
notran ? 'N' : (is_real ? 'T' : 'C'),
mi, ni, ib,
@@ -351,7 +351,7 @@ magma_unmqr_gpu(
magma_getmatrix<Ty>(ma, ib, a_ref(i, i ), ldda, hA, ma, queue);
magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue);
- *info = cpu_unmqr(LAPACK_COL_MAJOR,
+ *info = cpu_unmqr(
side == MagmaRight ? 'R' : 'L',
notran ? 'N' : (is_real ? 'T' : 'C'),
mi, ni, ib,
diff --git a/src/backend/opencl/magma/unmqr2.cpp b/src/backend/opencl/magma/unmqr2.cpp
index 6de4caf..4da4143 100644
--- a/src/backend/opencl/magma/unmqr2.cpp
+++ b/src/backend/opencl/magma/unmqr2.cpp
@@ -251,7 +251,7 @@ magma_unmqr2_gpu(
ic = 1;
}
- larft_func<Ty> cpu_larft;
+ cpu_larft_func<Ty> cpu_larft;
// set nb-1 super-diagonals to 0, and diagonal to 1.
// This way we can copy V directly to the GPU,
@@ -265,7 +265,7 @@ magma_unmqr2_gpu(
/* Form the triangular factor of the block reflector
H = H(i) H(i+1) . . . H(i+ib-1) */
i__4 = nq - i + 1;
- cpu_larft(LAPACK_COL_MAJOR,
+ cpu_larft(
*MagmaForwardStr, *MagmaColumnwiseStr,
i__4, ib,
wA(i,i), ldwa, &tau[i], T, ib);
diff --git a/src/backend/opencl/solve.cpp b/src/backend/opencl/solve.cpp
index 34a357e..fa101e8 100644
--- a/src/backend/opencl/solve.cpp
+++ b/src/backend/opencl/solve.cpp
@@ -89,7 +89,7 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
int MN = std::min(M, N);
Array<T> B = createEmptyArray<T>(dim4());
- trsm_func<T> gpu_trsm;
+ gpu_trsm_func<T> gpu_trsm;
cl_event event;
cl_command_queue queue = getQueue()();
@@ -137,7 +137,7 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
(*dA)(), A.getOffset(), A.strides()[1], 1,
(*dT)(), tmp.getOffset() + MN * NB, NB, 0, queue);
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasLeft, clblasUpper,
clblasConjTrans, clblasNonUnit,
B.dims()[0], B.dims()[1],
@@ -225,14 +225,14 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
{
Array<T> AT = transpose<T>(A, true);
cl::Buffer* AT_buf = AT.get();
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasLeft, clblasLower, clblasConjTrans, clblasNonUnit,
N, NRHS, scalar<T>(1),
(*AT_buf)(), AT.getOffset(), AT.strides()[1],
(*B_buf)(), B.getOffset(), B.strides()[1],
1, &queue, 0, nullptr, &event);
} else {
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit,
N, NRHS, scalar<T>(1),
(*A_buf)(), A.getOffset(), A.strides()[1],
@@ -248,7 +248,7 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
template<typename T>
Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
{
- trsm_func<T> gpu_trsm;
+ gpu_trsm_func<T> gpu_trsm;
Array<T> B = copyArray<T>(b);
@@ -267,7 +267,7 @@ Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop o
Array<T> AT = transpose<T>(A, true);
cl::Buffer* AT_buf = AT.get();
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasLeft,
clblasLower,
clblasConjTrans,
@@ -277,7 +277,7 @@ Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop o
(*B_buf)(), B.getOffset(), B.strides()[1],
1, &queue, 0, nullptr, &event);
} else {
- gpu_trsm(clblasColumnMajor,
+ gpu_trsm(
clblasLeft,
options & AF_MAT_LOWER ? clblasLower : clblasUpper,
clblasNoTrans,
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list