[arrayfire] 307/408: Adding proper error checking in magma
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:19 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 163ab3733748ccc52865285eecc33749f809dc57
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Fri Aug 21 16:21:47 2015 -0400
Adding proper error checking in magma
---
src/backend/opencl/magma/gebrd.cpp | 37 ++++-----
src/backend/opencl/magma/geqrf2.cpp | 17 ++--
src/backend/opencl/magma/geqrf3.cpp | 19 +++--
src/backend/opencl/magma/getrf.cpp | 116 ++++++++++++++--------------
src/backend/opencl/magma/getrs.cpp | 30 +++----
src/backend/opencl/magma/labrd.cpp | 113 ++++++++++++++-------------
src/backend/opencl/magma/larfb.cpp | 110 +++++++++++++-------------
src/backend/opencl/magma/magma_blas.h | 26 +++----
src/backend/opencl/magma/magma_cpu_blas.h | 26 +++----
src/backend/opencl/magma/magma_cpu_lapack.h | 65 +++++++++-------
src/backend/opencl/magma/potrf.cpp | 116 ++++++++++++++--------------
src/backend/opencl/magma/ungqr.cpp | 11 ++-
src/backend/opencl/magma/unmqr.cpp | 11 ++-
src/backend/opencl/magma/unmqr2.cpp | 10 +--
src/backend/opencl/solve.cpp | 80 +++++++++----------
src/backend/opencl/svd.cpp | 23 +++---
16 files changed, 412 insertions(+), 398 deletions(-)
diff --git a/src/backend/opencl/magma/gebrd.cpp b/src/backend/opencl/magma/gebrd.cpp
index e4df977..dbeeb1f 100644
--- a/src/backend/opencl/magma/gebrd.cpp
+++ b/src/backend/opencl/magma/gebrd.cpp
@@ -266,8 +266,8 @@ magma_gebrd_hybrid(
magma_setmatrix<Ty>(m, n, a, lda, da, da_offset, ldda, queue);
}
- gpu_gemm_func<Ty> gpu_blas_gemm;
- cpu_gebrd_work_func<Ty> cpu_lapack_gebrd_work;
+ gpu_blas_gemm_func<Ty> gpu_blas_gemm;
+ cpu_lapack_gebrd_work_func<Ty> cpu_lapack_gebrd_work;
for (i=0; i< (minmn - nx); i += nb) {
/* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
@@ -302,19 +302,19 @@ magma_gebrd_hybrid(
work + (ldwrkx+1)*nb, ldwrky,
dwork, dwork_offset + (ldwrkx+1)*nb, ldwrky, queue);
- gpu_blas_gemm(clblasNoTrans, clblasConjTrans,
- nrow, ncol, nb,
- c_neg_one, dA(i+nb, i ), ldda,
- dwork, dwork_offset+(ldwrkx+1)*nb, ldwrky,
- c_one, dA(i+nb, i+nb), ldda,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemm(clblasNoTrans, clblasConjTrans,
+ nrow, ncol, nb,
+ c_neg_one, dA(i+nb, i ), ldda,
+ dwork, dwork_offset+(ldwrkx+1)*nb, ldwrky,
+ c_one, dA(i+nb, i+nb), ldda,
+ 1, &queue, 0, nullptr, &event));
- gpu_blas_gemm(clblasNoTrans, clblasNoTrans,
- nrow, ncol, nb,
- c_neg_one, dwork, dwork_offset+nb, ldwrkx,
- dA(i, i+nb), ldda,
- c_one, dA(i+nb, i+nb), ldda,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemm(clblasNoTrans, clblasNoTrans,
+ nrow, ncol, nb,
+ c_neg_one, dwork, dwork_offset+nb, ldwrkx,
+ dA(i, i+nb), ldda,
+ c_one, dA(i+nb, i+nb), ldda,
+ 1, &queue, 0, nullptr, &event));
/* Copy diagonal and off-diagonal elements of B back into A */
if (m >= n) {
@@ -340,13 +340,14 @@ magma_gebrd_hybrid(
magma_getmatrix<Ty>(nrow, ncol, dA(i, i), ldda, A(i, i), lda, queue);
}
- *info = cpu_lapack_gebrd_work(nrow, ncol,
- A(i, i), lda, d+i, e+i,
- tauq+i, taup+i, work, lwork);
+ LAPACKE_CHECK(cpu_lapack_gebrd_work(nrow, ncol,
+ A(i, i), lda, d+i, e+i,
+ tauq+i, taup+i, work, lwork));
work[0] = magma_make<Ty>(lwkopt, 0.);
magma_free(dwork);
- return *info;
+ *info = 0;
+ return 0;
} /* magma_zgebrd */
#define INSTANTIATE(Ty) \
diff --git a/src/backend/opencl/magma/geqrf2.cpp b/src/backend/opencl/magma/geqrf2.cpp
index 4041976..3191954 100644
--- a/src/backend/opencl/magma/geqrf2.cpp
+++ b/src/backend/opencl/magma/geqrf2.cpp
@@ -52,7 +52,6 @@
**********************************************************************/
#include "magma.h"
-#include "magma_blas.h"
#include "magma_data.h"
#include "magma_cpu_lapack.h"
#include "magma_helper.h"
@@ -245,8 +244,8 @@ magma_geqrf2_gpu(
0, lwork*sizeof(Ty),
0, NULL, NULL, NULL);
- cpu_geqrf_work_func<Ty> cpu_geqrf;
- cpu_larft_func<Ty> cpu_larft;
+ cpu_lapack_geqrf_work_func<Ty> cpu_lapack_geqrf;
+ cpu_lapack_larft_func<Ty> cpu_lapack_larft;
nbmin = 2;
nx = nb;
@@ -275,14 +274,14 @@ magma_geqrf2_gpu(
}
magma_queue_sync(queue[0]);
- *info = cpu_geqrf( rows, ib, work(i), ldwork, tau+i, hwork, lhwork);
+ LAPACKE_CHECK(cpu_lapack_geqrf( rows, ib, work(i), ldwork, tau+i, hwork, lhwork));
/* Form the triangular factor of the block reflector
H = H(i) H(i+1) . . . H(i+ib-1) */
- cpu_larft(
- *MagmaForwardStr, *MagmaColumnwiseStr,
- rows, ib,
- work(i), ldwork, tau+i, hwork, ib);
+ LAPACKE_CHECK(cpu_lapack_larft(
+ *MagmaForwardStr, *MagmaColumnwiseStr,
+ rows, ib,
+ work(i), ldwork, tau+i, hwork, ib));
panel_to_q<Ty>( MagmaUpper, ib, work(i), ldwork, hwork+ib*ib );
@@ -329,7 +328,7 @@ magma_geqrf2_gpu(
magma_queue_sync(queue[1]);
lhwork = lwork - rows*ib;
- *info = cpu_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
+ LAPACKE_CHECK(cpu_lapack_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork));
magma_setmatrix_async<Ty>(rows, ib, work, rows, dA(i, i), ldda, queue[1], NULL);
}
diff --git a/src/backend/opencl/magma/geqrf3.cpp b/src/backend/opencl/magma/geqrf3.cpp
index 192bd45..8a6a05f 100644
--- a/src/backend/opencl/magma/geqrf3.cpp
+++ b/src/backend/opencl/magma/geqrf3.cpp
@@ -52,7 +52,6 @@
**********************************************************************/
#include "magma.h"
-#include "magma_blas.h"
#include "magma_data.h"
#include "magma_cpu_lapack.h"
#include "magma_helper.h"
@@ -217,8 +216,8 @@ magma_geqrf3_gpu(
ldwork = m;
lddwork= n;
- cpu_geqrf_work_func<Ty> cpu_geqrf;
- cpu_larft_func<Ty> cpu_larft;
+ cpu_lapack_geqrf_work_func<Ty> cpu_lapack_geqrf;
+ cpu_lapack_larft_func<Ty> cpu_lapack_larft;
if ( (nb > 1) && (nb < k) ) {
/* Use blocked code initially */
@@ -244,15 +243,15 @@ magma_geqrf3_gpu(
}
magma_event_sync(event[1]);
- *info = cpu_geqrf( rows, ib, work_ref(i), ldwork, tau+i, hwork, lhwork);
+ LAPACKE_CHECK(cpu_lapack_geqrf( rows, ib, work_ref(i), ldwork, tau+i, hwork, lhwork));
/* Form the triangular factor of the block reflector
H = H(i) H(i+1) . . . H(i+ib-1) */
- cpu_larft(
- *MagmaForwardStr, *MagmaColumnwiseStr,
- rows, ib,
- work_ref(i), ldwork,
- tau+i, hwork, ib);
+ LAPACKE_CHECK(cpu_lapack_larft(
+ *MagmaForwardStr, *MagmaColumnwiseStr,
+ rows, ib,
+ work_ref(i), ldwork,
+ tau+i, hwork, ib));
/* Put 0s in the upper triangular part of a panel (and 1s on the
diagonal); copy the upper triangular in ut and invert it. */
@@ -296,7 +295,7 @@ magma_geqrf3_gpu(
magma_getmatrix<Ty>( rows, ib, a_ref(i, i), ldda, work, rows, queue );
lhwork = lwork - rows*ib;
- *info = cpu_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
+ LAPACKE_CHECK(cpu_lapack_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork));
magma_setmatrix<Ty>( rows, ib, work, rows, a_ref(i, i), ldda, queue );
}
diff --git a/src/backend/opencl/magma/getrf.cpp b/src/backend/opencl/magma/getrf.cpp
index b398afd..bd9c9d2 100644
--- a/src/backend/opencl/magma/getrf.cpp
+++ b/src/backend/opencl/magma/getrf.cpp
@@ -149,9 +149,9 @@ magma_int_t magma_getrf_gpu(
if (m == 0 || n == 0)
return *info;
- gpu_gemm_func<Ty> gpu_gemm;
- gpu_trsm_func<Ty> gpu_trsm;
- cpu_getrf_func<Ty> cpu_getrf;
+ gpu_blas_gemm_func<Ty> gpu_blas_gemm;
+ gpu_blas_trsm_func<Ty> gpu_blas_trsm;
+ cpu_lapack_getrf_func<Ty> cpu_lapack_getrf;
/* Function Body */
mindim = std::min(m, n);
@@ -165,7 +165,7 @@ magma_int_t magma_getrf_gpu(
return *info;
}
magma_getmatrix<Ty>(m, n, dA(0,0), ldda, work(0), m, queue);
- cpu_getrf( m, n, work, m, ipiv);
+ LAPACKE_CHECK(cpu_lapack_getrf( m, n, work, m, ipiv));
magma_setmatrix<Ty>(m, n, work(0), m, dA(0,0), ldda, queue);
magma_free_cpu(work);
}
@@ -219,29 +219,29 @@ magma_int_t magma_getrf_gpu(
magma_getmatrix<Ty>(m-j*nb, nb, dAP(0,0), maxm, work(0), ldwork, queue);
if (j > 0 && n > (j + 1) * nb) {
- gpu_trsm(
- clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
- n - (j+1)*nb, nb,
- c_one,
- dAT(j-1,j-1), lddat,
- dAT(j-1,j+1), lddat,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
+ n - (j+1)*nb, nb,
+ c_one,
+ dAT(j-1,j-1), lddat,
+ dAT(j-1,j+1), lddat,
+ 1, &queue, 0, nullptr, &event));
if (m > j * nb) {
- gpu_gemm( clblasNoTrans, clblasNoTrans,
- n-(j+1)*nb, m-j*nb, nb,
- c_neg_one,
- dAT(j-1,j+1), lddat,
- dAT(j, j-1), lddat,
- c_one,
- dAT(j, j+1), lddat,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemm( clblasNoTrans, clblasNoTrans,
+ n-(j+1)*nb, m-j*nb, nb,
+ c_neg_one,
+ dAT(j-1,j+1), lddat,
+ dAT(j, j-1), lddat,
+ c_one,
+ dAT(j, j+1), lddat,
+ 1, &queue, 0, nullptr, &event));
}
}
// do the cpu part
rows = m - j*nb;
- cpu_getrf( rows, nb, work, ldwork, ipiv+j*nb);
+ LAPACKE_CHECK(cpu_lapack_getrf( rows, nb, work, ldwork, ipiv+j*nb));
if (*info == 0 && iinfo > 0)
*info = iinfo + j*nb;
@@ -257,44 +257,44 @@ magma_int_t magma_getrf_gpu(
// do the small non-parallel computations (next panel update)
if (s > (j+1)) {
- gpu_trsm(
- clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
- nb, nb,
- c_one,
- dAT(j, j ), lddat,
- dAT(j, j+1), lddat,
- 1, &queue, 0, nullptr, &event);
-
-
- gpu_gemm( clblasNoTrans, clblasNoTrans,
- nb, m-(j+1)*nb, nb,
- c_neg_one,
- dAT(j, j+1), lddat,
- dAT(j+1, j ), lddat,
- c_one,
- dAT(j+1, j+1), lddat,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
+ nb, nb,
+ c_one,
+ dAT(j, j ), lddat,
+ dAT(j, j+1), lddat,
+ 1, &queue, 0, nullptr, &event));
+
+
+ CLBLAS_CHECK(gpu_blas_gemm( clblasNoTrans, clblasNoTrans,
+ nb, m-(j+1)*nb, nb,
+ c_neg_one,
+ dAT(j, j+1), lddat,
+ dAT(j+1, j ), lddat,
+ c_one,
+ dAT(j+1, j+1), lddat,
+ 1, &queue, 0, nullptr, &event));
}
else {
if (n > s * nb) {
- gpu_trsm(
- clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
- n-s*nb, nb,
- c_one,
- dAT(j, j ), lddat,
- dAT(j, j+1), lddat,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
+ n-s*nb, nb,
+ c_one,
+ dAT(j, j ), lddat,
+ dAT(j, j+1), lddat,
+ 1, &queue, 0, nullptr, &event));
}
if ((n > (j+1) * nb) && (m > (j+1) * nb)) {
- gpu_gemm( clblasNoTrans, clblasNoTrans,
- n-(j+1)*nb, m-(j+1)*nb, nb,
- c_neg_one,
- dAT(j, j+1), lddat,
- dAT(j+1, j ), lddat,
- c_one,
- dAT(j+1, j+1), lddat,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemm( clblasNoTrans, clblasNoTrans,
+ n-(j+1)*nb, m-(j+1)*nb, nb,
+ c_neg_one,
+ dAT(j, j+1), lddat,
+ dAT(j+1, j ), lddat,
+ c_one,
+ dAT(j+1, j+1), lddat,
+ 1, &queue, 0, nullptr, &event));
}
}
}
@@ -308,7 +308,7 @@ magma_int_t magma_getrf_gpu(
magma_getmatrix<Ty>(rows, nb0, dAP(0,0), maxm, work(0), ldwork, queue);
// do the cpu part
- cpu_getrf( rows, nb0, work, ldwork, ipiv+s*nb);
+ LAPACKE_CHECK(cpu_lapack_getrf( rows, nb0, work, ldwork, ipiv+s*nb));
if (*info == 0 && iinfo > 0)
*info = iinfo + s*nb;
@@ -322,11 +322,11 @@ magma_int_t magma_getrf_gpu(
magmablas_transpose<Ty>(rows, nb0, dAP(0,0), maxm, dAT(s,s), lddat, queue);
if (n > s * nb + nb0) {
- gpu_trsm(
- clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
- n-s*nb-nb0, nb0,
- c_one, dAT(s,s), lddat,
- dAT(s,s)+nb0, lddat, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
+ n-s*nb-nb0, nb0,
+ c_one, dAT(s,s), lddat,
+ dAT(s,s)+nb0, lddat, 1, &queue, 0, nullptr, &event));
}
}
diff --git a/src/backend/opencl/magma/getrs.cpp b/src/backend/opencl/magma/getrs.cpp
index 6ad943b..1dc106c 100644
--- a/src/backend/opencl/magma/getrs.cpp
+++ b/src/backend/opencl/magma/getrs.cpp
@@ -159,9 +159,9 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
i1 = 1;
i2 = n;
- cpu_laswp_func<Ty> cpu_laswp;
- gpu_trsm_func<Ty> gpu_trsm;
- gpu_trsv_func<Ty> gpu_trsv;
+ cpu_lapack_laswp_func<Ty> cpu_lapack_laswp;
+ gpu_blas_trsm_func<Ty> gpu_blas_trsm;
+ gpu_blas_trsv_func<Ty> gpu_blas_trsv;
cl_event event = NULL;
@@ -180,18 +180,18 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
/* Solve A * X = B. */
magma_getmatrix<Ty>( n, nrhs, dB, dB_offset, lddb, work, n, queue );
- cpu_laswp( nrhs, work, n, i1, i2, ipiv, inc);
+ LAPACKE_CHECK(cpu_lapack_laswp( nrhs, work, n, i1, i2, ipiv, inc));
magma_setmatrix<Ty>( n, nrhs, work, n, dB, dB_offset, lddb, queue );
if ( nrhs == 1) {
- gpu_trsv( clblasLower, clblasNoTrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
- gpu_trsv( clblasUpper, clblasNoTrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsv( clblasLower, clblasNoTrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event));
+ CLBLAS_CHECK(gpu_blas_trsv( clblasUpper, clblasNoTrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event));
} else {
- gpu_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
if(cond) {
- gpu_trsm( clblasLeft, clblasLower, clblasTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasLower, clblasTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
} else {
- gpu_trsm( clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
}
}
} else {
@@ -199,18 +199,18 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
/* Solve A' * X = B. */
if ( nrhs == 1) {
- gpu_trsv( clblasUpper, cltrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
- gpu_trsv( clblasLower, cltrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsv( clblasUpper, cltrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event));
+ CLBLAS_CHECK(gpu_blas_trsv( clblasLower, cltrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event));
} else {
if(cond) {
- gpu_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
} else {
- gpu_trsm( clblasLeft, clblasUpper, cltrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasUpper, cltrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
}
- gpu_trsm( clblasLeft, clblasLower, cltrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasLower, cltrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
}
magma_getmatrix<Ty>( n, nrhs, dB, dB_offset, lddb, work, n, queue );
- cpu_laswp( nrhs, work, n, i1, i2, ipiv, inc);
+ LAPACKE_CHECK(cpu_lapack_laswp( nrhs, work, n, i1, i2, ipiv, inc));
magma_setmatrix<Ty>( n, nrhs, work, n, dB, dB_offset, lddb, queue );
}
diff --git a/src/backend/opencl/magma/labrd.cpp b/src/backend/opencl/magma/labrd.cpp
index ee7f120..2487190 100644
--- a/src/backend/opencl/magma/labrd.cpp
+++ b/src/backend/opencl/magma/labrd.cpp
@@ -244,12 +244,12 @@ magma_labrd_gpu(
magma_event_t event = NULL;
- gpu_gemv_func<Ty> gpu_blas_gemv;
- cpu_gemv_func<Ty> cpu_blas_gemv;
- cpu_scal_func<Ty> cpu_blas_scal;
- cpu_axpy_func<Ty> cpu_blas_axpy;
- cpu_larfg_func<Ty> cpu_lapack_larfg;
- cpu_lacgv_func<Ty> cpu_lapack_lacgv;
+ gpu_blas_gemv_func<Ty> gpu_blas_gemv;
+ cpu_blas_gemv_func<Ty> cpu_blas_gemv;
+ cpu_blas_scal_func<Ty> cpu_blas_scal;
+ cpu_blas_axpy_func<Ty> cpu_blas_axpy;
+ cpu_lapack_larfg_func<Ty> cpu_lapack_larfg;
+ cpu_lapack_lacgv_func<Ty> cpu_lapack_lacgv;
CBLAS_TRANSPOSE CblasTransParam = is_cplx ? CblasConjTrans : CblasTrans;
@@ -261,14 +261,14 @@ magma_labrd_gpu(
i__3 = i__ - 1;
if (is_cplx) {
- cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy));
}
cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &a[i__ + a_dim1], lda,
&y[i__+y_dim1], ldy, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], c__1);
if (is_cplx) {
- cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy));
}
cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &x[i__ + x_dim1], ldx,
@@ -278,8 +278,11 @@ magma_labrd_gpu(
alpha = a[i__ + i__ * a_dim1];
i__2 = m - i__ + 1;
i__3 = i__ + 1;
- cpu_lapack_larfg(i__2, &alpha,
- &a[std::min(i__3,m) + i__ * a_dim1], c__1, &tauq[i__]);
+
+ LAPACKE_CHECK(cpu_lapack_larfg(i__2, &alpha,
+ &a[std::min(i__3,m) + i__ * a_dim1],
+ c__1, &tauq[i__]));
+
d[i__] = magma_real<Ty>(alpha);
if (i__ < n) {
a[i__ + i__ * a_dim1] = c_one;
@@ -294,11 +297,11 @@ magma_labrd_gpu(
da, da_offset + (i__-1)+(i__-1)* (ldda), 1,
queue);
// 2. Multiply ---------------------------------------------
- gpu_blas_gemv(clblasConjTrans, i__2, i__3, c_one,
- da, da_offset + (i__-1) + ((i__-1) + 1) * (ldda), ldda,
- da, da_offset + (i__-1) + (i__-1) * (ldda), c__1, c_zero,
- dy, dy_offset + i__ + 1 + i__ * y_dim1, c__1,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemv(clblasConjTrans, i__2, i__3, c_one,
+ da, da_offset + (i__-1) + ((i__-1) + 1) * (ldda), ldda,
+ da, da_offset + (i__-1) + (i__-1) * (ldda), c__1, c_zero,
+ dy, dy_offset + i__ + 1 + i__ * y_dim1, c__1,
+ 1, &queue, 0, nullptr, &event));
// 3. Put the result back ----------------------------------
magma_getmatrix_async<Ty>(i__3, 1,
@@ -341,8 +344,8 @@ magma_labrd_gpu(
/* Update A(i,i+1:n) */
i__2 = n - i__;
if (is_cplx) {
- cpu_lapack_lacgv(i__2, &a[i__+(i__+1)*a_dim1], lda);
- cpu_lapack_lacgv(i__, &a[i__+a_dim1], lda);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__+(i__+1)*a_dim1], lda));
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__, &a[i__+a_dim1], lda));
}
cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one),
@@ -352,15 +355,15 @@ magma_labrd_gpu(
i__3 = n - i__;
if (is_cplx) {
- cpu_lapack_lacgv(i__, &a[i__+a_dim1], lda);
- cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__, &a[i__+a_dim1], lda));
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx));
}
cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one), &a[(i__ + 1) *
a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, cblas_scalar(&c_one), &a[
i__ + (i__ + 1) * a_dim1], lda);
if (is_cplx) {
- cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx));
}
/* Generate reflection P(i) to annihilate A(i,i+2:n) */
@@ -368,8 +371,9 @@ magma_labrd_gpu(
/* Computing MIN */
i__3 = i__ + 2;
alpha = a[i__ + (i__ + 1) * a_dim1];
- cpu_lapack_larfg(i__2, &alpha, &a[i__ + std::min(
- i__3,n) * a_dim1], lda, &taup[i__]);
+ LAPACKE_CHECK(cpu_lapack_larfg(i__2, &alpha,
+ &a[i__ + std::min(i__3,n) * a_dim1],
+ lda, &taup[i__]));
e[i__] = magma_real<Ty>(alpha);
a[i__ + (i__ + 1) * a_dim1] = c_one;
@@ -384,12 +388,12 @@ magma_labrd_gpu(
// 2. Multiply ---------------------------------------------
//magma_zcopy(i__3, da+(i__-1)+((i__-1)+1)*(ldda), ldda,
// dy + 1 + lddy, 1);
- gpu_blas_gemv(clblasNoTrans, i__2, i__3, c_one,
- da, da_offset + (i__-1)+1+ ((i__-1)+1) * (ldda), ldda,
- da, da_offset + (i__-1) + ((i__-1)+1) * (ldda), ldda,
- //dy + 1 + lddy, 1,
- c_zero, dx, dx_offset + i__ + 1 + i__ * x_dim1, c__1,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemv(clblasNoTrans, i__2, i__3, c_one,
+ da, da_offset + (i__-1)+1+ ((i__-1)+1) * (ldda), ldda,
+ da, da_offset + (i__-1) + ((i__-1)+1) * (ldda), ldda,
+ //dy + 1 + lddy, 1,
+ c_zero, dx, dx_offset + i__ + 1 + i__ * x_dim1, c__1,
+ 1, &queue, 0, nullptr, &event));
// 3. Put the result back ----------------------------------
magma_getmatrix_async<Ty>(i__2, 1,
@@ -430,7 +434,7 @@ magma_labrd_gpu(
if (is_cplx) {
i__2 = n - i__;
- cpu_lapack_lacgv(i__2, &a[i__+(i__+1)*a_dim1], lda);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__+(i__+1)*a_dim1], lda));
// 4. Send the block reflector A(i+1:m,i) to the GPU after ZLACGV()
magma_setvector<Ty>(i__2,
a + i__ + (i__ +1)* a_dim1, lda,
@@ -448,21 +452,21 @@ magma_labrd_gpu(
i__2 = n - i__ + 1;
i__3 = i__ - 1;
if (is_cplx) {
- cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda);
- cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda));
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda));
}
cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &y[i__ + y_dim1], ldy,
&a[i__ + a_dim1], lda, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], lda);
i__2 = i__ - 1;
if (is_cplx) {
- cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda);
- cpu_lapack_lacgv(i__3, &x[i__ + x_dim1], ldx);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda));
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &x[i__ + x_dim1], ldx));
}
i__3 = n - i__ + 1;
cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one), &a[i__ * a_dim1 + 1],
lda, &x[i__ + x_dim1], ldx, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], lda);
if (is_cplx) {
- cpu_lapack_lacgv(i__2, &x[i__ + x_dim1], ldx);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__ + x_dim1], ldx));
}
/* Generate reflection P(i) to annihilate A(i,i+1:n) */
@@ -470,8 +474,8 @@ magma_labrd_gpu(
/* Computing MIN */
i__3 = i__ + 1;
alpha = a[i__ + i__ * a_dim1];
- cpu_lapack_larfg(i__2, &alpha,
- &a[i__ + std::min(i__3,n) * a_dim1], lda, &taup[i__]);
+ LAPACKE_CHECK(cpu_lapack_larfg(i__2, &alpha,
+ &a[i__ + std::min(i__3,n) * a_dim1], lda, &taup[i__]));
d[i__] = magma_real<Ty>(alpha);
if (i__ < m) {
a[i__ + i__ * a_dim1] = c_one;
@@ -489,13 +493,13 @@ magma_labrd_gpu(
// 2. Multiply ---------------------------------------------
//magma_zcopy(i__3, da+(i__-1)+(i__-1)*(ldda), ldda,
// dy + 1 + lddy, 1);
- gpu_blas_gemv(clblasNoTrans, i__2, i__3, c_one,
- da, da_offset + (i__-1)+1 + (i__-1) * ldda, ldda,
- da, da_offset + (i__-1) + (i__-1) * ldda, ldda,
- // dy + 1 + lddy, 1,
- c_zero,
- dx, dx_offset + i__ + 1 + i__ * x_dim1, c__1,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemv(clblasNoTrans, i__2, i__3, c_one,
+ da, da_offset + (i__-1)+1 + (i__-1) * ldda, ldda,
+ da, da_offset + (i__-1) + (i__-1) * ldda, ldda,
+ // dy + 1 + lddy, 1,
+ c_zero,
+ dx, dx_offset + i__ + 1 + i__ * x_dim1, c__1,
+ 1, &queue, 0, nullptr, &event));
// 3. Put the result back ----------------------------------
@@ -538,7 +542,7 @@ magma_labrd_gpu(
i__2 = n - i__ + 1;
if (is_cplx) {
- cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda));
magma_setvector<Ty>(i__2,
a + i__ + (i__ )* a_dim1, lda,
da, da_offset + (i__-1)+ (i__-1)*(ldda), ldda,
@@ -550,7 +554,7 @@ magma_labrd_gpu(
i__3 = i__ - 1;
if (is_cplx) {
- cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy));
}
cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
@@ -558,7 +562,7 @@ magma_labrd_gpu(
&a[i__ + 1 + i__ * a_dim1], c__1);
i__2 = m - i__;
if (is_cplx) {
- cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy));
}
cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one),
&x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], c__1, cblas_scalar(&c_one),
@@ -568,8 +572,9 @@ magma_labrd_gpu(
i__2 = m - i__;
i__3 = i__ + 2;
alpha = a[i__ + 1 + i__ * a_dim1];
- cpu_lapack_larfg(i__2, &alpha,
- &a[std::min(i__3,m) + i__ * a_dim1], c__1, &tauq[i__]);
+ LAPACKE_CHECK(cpu_lapack_larfg(i__2, &alpha,
+ &a[std::min(i__3,m) + i__ * a_dim1],
+ c__1, &tauq[i__]));
e[i__] = magma_real<Ty>(alpha);
a[i__ + 1 + i__ * a_dim1] = c_one;
@@ -583,11 +588,11 @@ magma_labrd_gpu(
da, da_offset + (i__-1)+1+ (i__-1)*(ldda), 1,
queue);
// 2. Multiply ---------------------------------------------
- gpu_blas_gemv(clblasConjTrans, i__2, i__3, c_one,
- da, da_offset + (i__-1)+1+ ((i__-1)+1) * ldda, ldda,
- da, da_offset + (i__-1)+1+ (i__-1) * ldda, c__1,
- c_zero, dy, dy_offset + i__ + 1 + i__ * y_dim1, c__1,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemv(clblasConjTrans, i__2, i__3, c_one,
+ da, da_offset + (i__-1)+1+ ((i__-1)+1) * ldda, ldda,
+ da, da_offset + (i__-1)+1+ (i__-1) * ldda, c__1,
+ c_zero, dy, dy_offset + i__ + 1 + i__ * y_dim1, c__1,
+ 1, &queue, 0, nullptr, &event));
// 3. Put the result back ----------------------------------
magma_getmatrix_async<Ty>(i__3, 1,
@@ -628,7 +633,7 @@ magma_labrd_gpu(
else {
if (is_cplx) {
i__2 = n - i__ + 1;
- cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda);
+ LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda));
magma_setvector<Ty>(i__2,
a + i__ + (i__ )* a_dim1, lda,
da, da_offset + (i__-1)+ (i__-1)*(ldda), ldda,
diff --git a/src/backend/opencl/magma/larfb.cpp b/src/backend/opencl/magma/larfb.cpp
index 5b188f4..20d2902 100644
--- a/src/backend/opencl/magma/larfb.cpp
+++ b/src/backend/opencl/magma/larfb.cpp
@@ -254,8 +254,8 @@ magma_larfb_gpu(
transV = clblasNoTrans;
}
- gpu_gemm_func<Ty> gpu_gemm;
- gpu_trmm_func<Ty> gpu_trmm;
+ gpu_blas_gemm_func<Ty> gpu_blas_gemm;
+ gpu_blas_trmm_func<Ty> gpu_blas_trmm;
cl_event event = NULL;
@@ -264,73 +264,73 @@ magma_larfb_gpu(
// Comments assume H C. When forming H^H C, T gets transposed via transt.
// W = C^H V
- gpu_gemm(
- transType, notransV,
- n, k, m,
- c_one,
- dC(0,0), lddc,
- dV(0,0), lddv,
- c_zero,
- dwork(0), ldwork,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemm(
+ transType, notransV,
+ n, k, m,
+ c_one,
+ dC(0,0), lddc,
+ dV(0,0), lddv,
+ c_zero,
+ dwork(0), ldwork,
+ 1, &queue, 0, nullptr, &event));
// W = W T^H = C^H V T^H
- gpu_trmm(
- clblasRight,
- uplo, transt, clblasNonUnit,
- n, k,
- c_one,
- dT(0,0) , lddt,
- dwork(0), ldwork,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trmm(
+ clblasRight,
+ uplo, transt, clblasNonUnit,
+ n, k,
+ c_one,
+ dT(0,0) , lddt,
+ dwork(0), ldwork,
+ 1, &queue, 0, nullptr, &event));
// C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C
- gpu_gemm(
- notransV, transType,
- m, n, k,
- c_neg_one,
- dV(0,0), lddv,
- dwork(0), ldwork,
- c_one,
- dC(0,0), lddc,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemm(
+ notransV, transType,
+ m, n, k,
+ c_neg_one,
+ dV(0,0), lddv,
+ dwork(0), ldwork,
+ c_one,
+ dC(0,0), lddc,
+ 1, &queue, 0, nullptr, &event));
}
else {
// Form C H or C H^H
// Comments assume C H. When forming C H^H, T gets transposed via trans.
// W = C V
- gpu_gemm(
- clblasNoTrans, notransV,
- m, k, n,
- c_one,
- dC(0,0), lddc,
- dV(0,0), lddv,
- c_zero,
- dwork(0), ldwork,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemm(
+ clblasNoTrans, notransV,
+ m, k, n,
+ c_one,
+ dC(0,0), lddc,
+ dV(0,0), lddv,
+ c_zero,
+ dwork(0), ldwork,
+ 1, &queue, 0, nullptr, &event));
// W = W T = C V T
- gpu_trmm(
- clblasRight, uplo,
- cltrans,
- clblasNonUnit,
- m, k,
- c_one,
- dT(0,0), lddt,
- dwork(0), ldwork,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trmm(
+ clblasRight, uplo,
+ cltrans,
+ clblasNonUnit,
+ m, k,
+ c_one,
+ dT(0,0), lddt,
+ dwork(0), ldwork,
+ 1, &queue, 0, nullptr, &event));
// C = C - W V^H = C - C V T V^H = C (I - V T V^H) = C H
- gpu_gemm(
- clblasNoTrans, transV,
- m, n, k,
- c_neg_one,
- dwork(0), ldwork,
- dV(0,0), lddv,
- c_one,
- dC(0,0), lddc,
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_gemm(
+ clblasNoTrans, transV,
+ m, n, k,
+ c_neg_one,
+ dwork(0), ldwork,
+ dV(0,0), lddv,
+ c_one,
+ dC(0,0), lddc,
+ 1, &queue, 0, nullptr, &event));
}
return info;
diff --git a/src/backend/opencl/magma/magma_blas.h b/src/backend/opencl/magma/magma_blas.h
index 44ebc03..8e2565b 100644
--- a/src/backend/opencl/magma/magma_blas.h
+++ b/src/backend/opencl/magma/magma_blas.h
@@ -24,19 +24,19 @@ using opencl::cdouble;
#define BLAS_FUNC_DEF(NAME) \
template<typename T> \
- struct gpu_##NAME##_func;
-
-#define BLAS_FUNC(NAME, TYPE, PREFIX) \
- template<> \
- struct gpu_##NAME##_func<TYPE> \
- { \
- template<typename... Args> \
- void \
- operator() (Args... args) \
- { \
- CLBLAS_CHECK(clblas##PREFIX##NAME(clblasColumnMajor, \
- args...)); \
- } \
+ struct gpu_blas_##NAME##_func;
+
+#define BLAS_FUNC(NAME, TYPE, PREFIX) \
+ template<> \
+ struct gpu_blas_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ clblasStatus \
+ operator() (Args... args) \
+ { \
+ return clblas##PREFIX##NAME(clblasColumnMajor, \
+ args...); \
+ } \
};
#define BLAS_FUNC_DECL(NAME) \
diff --git a/src/backend/opencl/magma/magma_cpu_blas.h b/src/backend/opencl/magma/magma_cpu_blas.h
index f5df93d..e5f7184 100644
--- a/src/backend/opencl/magma/magma_cpu_blas.h
+++ b/src/backend/opencl/magma/magma_cpu_blas.h
@@ -9,7 +9,7 @@
#ifndef MAGMA_CPU_BLAS
#define MAGMA_CPU_BLAS
-
+#include <err_common.hpp>
#include "magma_types.h"
#ifdef __APPLE__
@@ -38,11 +38,11 @@ typedef int blasint;
#define CPU_BLAS_FUNC_DEF(NAME) \
template<typename T> \
- struct cpu_##NAME##_func;
+ struct cpu_blas_##NAME##_func;
#define CPU_BLAS_FUNC1(NAME, TYPE, X) \
template<> \
- struct cpu_##NAME##_func<TYPE> \
+ struct cpu_blas_##NAME##_func<TYPE> \
{ \
template<typename... Args> \
void \
@@ -50,24 +50,24 @@ typedef int blasint;
{ return cblas_##X##NAME(CblasColMajor, args...); } \
};
-#define CPU_BLAS_FUNC2(NAME, TYPE, X) \
- template<> \
- struct cpu_##NAME##_func<TYPE> \
- { \
- template<typename... Args> \
- void \
- operator() (Args... args) \
- { return cblas_##X##NAME(args...); } \
+#define CPU_BLAS_FUNC2(NAME, TYPE, X) \
+ template<> \
+ struct cpu_blas_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ void \
+ operator() (Args... args) \
+ { return cblas_##X##NAME(args...); } \
};
-#define CPU_BLAS_DECL1(NAME) \
+#define CPU_BLAS_DECL1(NAME) \
CPU_BLAS_FUNC_DEF(NAME) \
CPU_BLAS_FUNC1(NAME, float, s) \
CPU_BLAS_FUNC1(NAME, double, d) \
CPU_BLAS_FUNC1(NAME, magmaFloatComplex, c) \
CPU_BLAS_FUNC1(NAME, magmaDoubleComplex, z) \
-#define CPU_BLAS_DECL2(NAME) \
+#define CPU_BLAS_DECL2(NAME) \
CPU_BLAS_FUNC_DEF(NAME) \
CPU_BLAS_FUNC2(NAME, float, s) \
CPU_BLAS_FUNC2(NAME, double, d) \
diff --git a/src/backend/opencl/magma/magma_cpu_lapack.h b/src/backend/opencl/magma/magma_cpu_lapack.h
index c051306..431923c 100644
--- a/src/backend/opencl/magma/magma_cpu_lapack.h
+++ b/src/backend/opencl/magma/magma_cpu_lapack.h
@@ -10,6 +10,7 @@
#ifndef MAGMA_CPU_LAPACK
#define MAGMA_CPU_LAPACK
+#include <err_common.hpp>
#include "magma_types.h"
#define LAPACKE_sunmqr_work(...) LAPACKE_sormqr_work(__VA_ARGS__)
@@ -42,41 +43,51 @@ int LAPACKE_dlacgv(Args... args) { return 0; }
#endif // MKL/NETLIB
#endif //APPLE
+#define LAPACKE_CHECK(fn) do { \
+ int __info = fn; \
+ if (__info != 0) { \
+ char lapacke_st_msg[32]; \
+ snprintf(lapacke_st_msg, \
+ sizeof(lapacke_st_msg), \
+ "LAPACKE Error (%d)", \
+ (int)(__info)); \
+ AF_ERROR(lapacke_st_msg, \
+ AF_ERR_INTERNAL); \
+ } \
+ } while(0)
+
#define CPU_LAPACK_FUNC_DEF(NAME) \
template<typename T> \
- struct cpu_##NAME##_func;
-
-#define CPU_LAPACK_FUNC1(NAME, TYPE, X) \
- template<> \
- struct cpu_##NAME##_func<TYPE> \
- { \
- template<typename... Args> \
- int \
- operator() (Args... args) \
- { \
- int err = LAPACK_NAME(X##NAME)(LAPACK_COL_MAJOR, args...); \
- if (err != 0) AF_ERROR("Error in "#NAME, AF_ERR_INTERNAL); \
- return err; \
- } \
+ struct cpu_lapack_##NAME##_func;
+
+#define CPU_LAPACK_FUNC1(NAME, TYPE, X) \
+ template<> \
+ struct cpu_lapack_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ int \
+ operator() (Args... args) \
+ { \
+ return LAPACK_NAME(X##NAME)(LAPACK_COL_MAJOR, \
+ args...); \
+ } \
};
-#define CPU_LAPACK_FUNC2(NAME, TYPE, X) \
- template<> \
- struct cpu_##NAME##_func<TYPE> \
- { \
- template<typename... Args> \
- int \
- operator() (Args... args) \
- { \
- int err = LAPACK_NAME(X##NAME)(args...); \
- if (err != 0) AF_ERROR("Error in "#NAME, AF_ERR_INTERNAL); \
- return err; \
- } \
+#define CPU_LAPACK_FUNC2(NAME, TYPE, X) \
+ template<> \
+ struct cpu_lapack_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ int \
+ operator() (Args... args) \
+ { \
+ return LAPACK_NAME(X##NAME)(args...); \
+ } \
};
#define CPU_LAPACK_FUNC3(NAME, TYPE, X) \
template<> \
- struct cpu_##NAME##_func<TYPE> \
+ struct cpu_lapack_##NAME##_func<TYPE> \
{ \
template<typename... Args> \
double \
diff --git a/src/backend/opencl/magma/potrf.cpp b/src/backend/opencl/magma/potrf.cpp
index ddc4f46..d048ed4 100644
--- a/src/backend/opencl/magma/potrf.cpp
+++ b/src/backend/opencl/magma/potrf.cpp
@@ -151,10 +151,10 @@ magma_int_t magma_potrf_gpu(
nb = magma_get_potrf_nb<Ty>(n);
- gpu_gemm_func<Ty> gpu_gemm;
- gpu_trsm_func<Ty> gpu_trsm;
- gpu_herk_func<Ty> gpu_herk;
- cpu_potrf_func<Ty> cpu_potrf;
+ gpu_blas_gemm_func<Ty> gpu_blas_gemm;
+ gpu_blas_trsm_func<Ty> gpu_blas_trsm;
+ gpu_blas_herk_func<Ty> gpu_blas_herk;
+ cpu_lapack_potrf_func<Ty> cpu_lapack_potrf;
err = magma_malloc_cpu<Ty>( &work, nb*nb);
@@ -170,9 +170,9 @@ magma_int_t magma_potrf_gpu(
// use unblocked code
magma_getmatrix<Ty>(n, n, dA, dA_offset, ldda, work, n, queue);
- cpu_potrf(
- uplo == MagmaUpper ? *MagmaUpperStr : *MagmaLowerStr,
- n, work, n);
+ LAPACKE_CHECK(cpu_lapack_potrf(
+ uplo == MagmaUpper ? *MagmaUpperStr : *MagmaLowerStr,
+ n, work, n));
magma_setmatrix<Ty>(n, n, work, n, dA, dA_offset, ldda, queue);
}
@@ -185,14 +185,14 @@ magma_int_t magma_potrf_gpu(
// apply all previous updates to diagonal block
jb = std::min(nb, n-j);
if (j > 0) {
- gpu_herk(
- clblasUpper, transType,
- jb, j,
- m_one,
- dA(0,j), ldda,
- one,
- dA(j,j), ldda,
- 1, &queue, 0, nullptr, &blas_event);
+ CLBLAS_CHECK(gpu_blas_herk(
+ clblasUpper, transType,
+ jb, j,
+ m_one,
+ dA(0,j), ldda,
+ one,
+ dA(j,j), ldda,
+ 1, &queue, 0, nullptr, &blas_event));
}
// start asynchronous data transfer
@@ -200,21 +200,21 @@ magma_int_t magma_potrf_gpu(
// apply all previous updates to block row right of diagonal block
if (j+jb < n) {
- gpu_gemm(
- transType, clblasNoTrans,
- jb, n-j-jb, j,
- mz_one,
- dA(0, j ), ldda,
- dA(0, j+jb), ldda,
- z_one,
- dA(j, j+jb), ldda,
- 1, &queue, 0, nullptr, &blas_event);
+ CLBLAS_CHECK(gpu_blas_gemm(
+ transType, clblasNoTrans,
+ jb, n-j-jb, j,
+ mz_one,
+ dA(0, j ), ldda,
+ dA(0, j+jb), ldda,
+ z_one,
+ dA(j, j+jb), ldda,
+ 1, &queue, 0, nullptr, &blas_event));
}
// simultaneous with above zgemm, transfer data, factor
// diagonal block on CPU, and test for positive definiteness
magma_event_sync(event);
- *info =cpu_potrf( *MagmaUpperStr, jb, work, jb);
+ LAPACKE_CHECK(cpu_lapack_potrf( *MagmaUpperStr, jb, work, jb));
if (*info != 0) {
assert(*info > 0);
@@ -227,14 +227,14 @@ magma_int_t magma_potrf_gpu(
// apply diagonal block to block row right of diagonal block
if (j+jb < n) {
magma_event_sync(event);
- gpu_trsm(
- clblasLeft, clblasUpper,
- transType, clblasNonUnit,
- jb, n-j-jb,
- z_one,
- dA(j, j ), ldda,
- dA(j, j+jb), ldda,
- 1, &queue, 0, nullptr, &blas_event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasLeft, clblasUpper,
+ transType, clblasNonUnit,
+ jb, n-j-jb,
+ z_one,
+ dA(j, j ), ldda,
+ dA(j, j+jb), ldda,
+ 1, &queue, 0, nullptr, &blas_event));
}
}
}
@@ -246,13 +246,13 @@ magma_int_t magma_potrf_gpu(
// apply all previous updates to diagonal block
jb = std::min(nb, n-j);
if (j>0) {
- gpu_herk(
- clblasLower, clblasNoTrans, jb, j,
- m_one,
- dA(j, 0), ldda,
- one,
- dA(j, j), ldda,
- 1, &queue, 0, nullptr, &blas_event);
+ CLBLAS_CHECK(gpu_blas_herk(
+ clblasLower, clblasNoTrans, jb, j,
+ m_one,
+ dA(j, 0), ldda,
+ one,
+ dA(j, j), ldda,
+ 1, &queue, 0, nullptr, &blas_event));
}
// start asynchronous data transfer
@@ -260,22 +260,22 @@ magma_int_t magma_potrf_gpu(
// apply all previous updates to block column below diagonal block
if (j+jb < n) {
- gpu_gemm(
- clblasNoTrans, transType,
- n-j-jb, jb, j,
- mz_one,
- dA(j+jb, 0), ldda,
- dA(j, 0), ldda,
- z_one,
- dA(j+jb, j), ldda,
- 1, &queue, 0, nullptr, &blas_event);
+ CLBLAS_CHECK(gpu_blas_gemm(
+ clblasNoTrans, transType,
+ n-j-jb, jb, j,
+ mz_one,
+ dA(j+jb, 0), ldda,
+ dA(j, 0), ldda,
+ z_one,
+ dA(j+jb, j), ldda,
+ 1, &queue, 0, nullptr, &blas_event));
}
// simultaneous with above zgemm, transfer data, factor
// diagonal block on CPU, and test for positive definiteness
magma_event_sync(event);
- *info = cpu_potrf(
- *MagmaLowerStr, jb, work, jb);
+ LAPACKE_CHECK(cpu_lapack_potrf(
+ *MagmaLowerStr, jb, work, jb));
if (*info != 0) {
assert(*info > 0);
*info += j;
@@ -286,13 +286,13 @@ magma_int_t magma_potrf_gpu(
// apply diagonal block to block column below diagonal
if (j+jb < n) {
magma_event_sync(event);
- gpu_trsm(
- clblasRight, clblasLower, transType, clblasNonUnit,
- n-j-jb, jb,
- z_one,
- dA(j , j), ldda,
- dA(j+jb, j), ldda,
- 1, &queue, 0, nullptr, &blas_event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasRight, clblasLower, transType, clblasNonUnit,
+ n-j-jb, jb,
+ z_one,
+ dA(j , j), ldda,
+ dA(j+jb, j), ldda,
+ 1, &queue, 0, nullptr, &blas_event));
}
}
}
diff --git a/src/backend/opencl/magma/ungqr.cpp b/src/backend/opencl/magma/ungqr.cpp
index 49a120a..5ea05ac 100644
--- a/src/backend/opencl/magma/ungqr.cpp
+++ b/src/backend/opencl/magma/ungqr.cpp
@@ -52,7 +52,6 @@
**********************************************************************/
#include "magma.h"
-#include "magma_blas.h"
#include "magma_data.h"
#include "magma_cpu_lapack.h"
#include "magma_helper.h"
@@ -137,7 +136,7 @@ magma_ungqr_gpu(
cl_mem dW;
magma_malloc<Ty>(&dW, (((n+31)/32)*32)*nb);
- cpu_ungqr_work_func<Ty> cpu_ungqr;
+ cpu_lapack_ungqr_work_func<Ty> cpu_lapack_ungqr;
// Use unblocked code for the last or only block.
if (kk < n) {
@@ -147,10 +146,10 @@ magma_ungqr_gpu(
magma_getmatrix<Ty>(m_kk, k_kk,
dA(kk, kk), ldda, panel, m_kk, queue);
- cpu_ungqr(
- m_kk, n_kk, k_kk,
- panel, m_kk,
- &tau[kk], work, lwork);
+ LAPACKE_CHECK(cpu_lapack_ungqr(
+ m_kk, n_kk, k_kk,
+ panel, m_kk,
+ &tau[kk], work, lwork));
magma_setmatrix<Ty>(m_kk, n_kk,
panel, m_kk, dA(kk, kk), ldda, queue);
diff --git a/src/backend/opencl/magma/unmqr.cpp b/src/backend/opencl/magma/unmqr.cpp
index ed69e51..366810e 100644
--- a/src/backend/opencl/magma/unmqr.cpp
+++ b/src/backend/opencl/magma/unmqr.cpp
@@ -52,7 +52,6 @@
**********************************************************************/
#include "magma.h"
-#include "magma_blas.h"
#include "magma_data.h"
#include "magma_cpu_lapack.h"
#include "magma_helper.h"
@@ -227,7 +226,7 @@ magma_unmqr_gpu(
magma_malloc<Ty>(&dwork, (((n+31)/32)*32)*nb);
- cpu_unmqr_work_func<Ty> cpu_unmqr;
+ cpu_lapack_unmqr_work_func<Ty> cpu_lapack_unmqr;
if ( (left && (! notran)) || ( (!left) && notran ) ) {
i1 = 0;
@@ -283,13 +282,13 @@ magma_unmqr_gpu(
magma_getmatrix<Ty>(ma, ib, a_ref(i, i ), ldda, hA, ma, queue);
magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue);
- *info = cpu_unmqr(
+ LAPACKE_CHECK(cpu_lapack_unmqr(
side == MagmaRight ? 'R' : 'L',
notran ? 'N' : (is_real ? 'T' : 'C'),
mi, ni, ib,
hA, ma, tau+i,
hC, mi,
- hW, lhwork);
+ hW, lhwork));
// send the updated part of C back to the GPU
magma_setmatrix<Ty>( mi, ni, hC, mi, c_ref(ic, jc), lddc, queue);
@@ -351,13 +350,13 @@ magma_unmqr_gpu(
magma_getmatrix<Ty>(ma, ib, a_ref(i, i ), ldda, hA, ma, queue);
magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue);
- *info = cpu_unmqr(
+ LAPACKE_CHECK(cpu_lapack_unmqr(
side == MagmaRight ? 'R' : 'L',
notran ? 'N' : (is_real ? 'T' : 'C'),
mi, ni, ib,
hA, ma, tau+i,
hC, mi,
- hW, lhwork);
+ hW, lhwork));
// send the updated part of C back to the GPU
magma_setmatrix<Ty>(mi, ni, hC, mi, c_ref(ic, jc), lddc, queue);
diff --git a/src/backend/opencl/magma/unmqr2.cpp b/src/backend/opencl/magma/unmqr2.cpp
index 4da4143..0cfc275 100644
--- a/src/backend/opencl/magma/unmqr2.cpp
+++ b/src/backend/opencl/magma/unmqr2.cpp
@@ -251,7 +251,7 @@ magma_unmqr2_gpu(
ic = 1;
}
- cpu_larft_func<Ty> cpu_larft;
+ cpu_lapack_larft_func<Ty> cpu_lapack_larft;
// set nb-1 super-diagonals to 0, and diagonal to 1.
// This way we can copy V directly to the GPU,
@@ -265,10 +265,10 @@ magma_unmqr2_gpu(
/* Form the triangular factor of the block reflector
H = H(i) H(i+1) . . . H(i+ib-1) */
i__4 = nq - i + 1;
- cpu_larft(
- *MagmaForwardStr, *MagmaColumnwiseStr,
- i__4, ib,
- wA(i,i), ldwa, &tau[i], T, ib);
+ LAPACKE_CHECK(cpu_lapack_larft(
+ *MagmaForwardStr, *MagmaColumnwiseStr,
+ i__4, ib,
+ wA(i,i), ldwa, &tau[i], T, ib));
if (left) {
/* H or H' is applied to C(i:m,1:n) */
diff --git a/src/backend/opencl/solve.cpp b/src/backend/opencl/solve.cpp
index fa101e8..6d2bea4 100644
--- a/src/backend/opencl/solve.cpp
+++ b/src/backend/opencl/solve.cpp
@@ -89,7 +89,7 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
int MN = std::min(M, N);
Array<T> B = createEmptyArray<T>(dim4());
- gpu_trsm_func<T> gpu_trsm;
+ gpu_blas_trsm_func<T> gpu_blas_trsm;
cl_event event;
cl_command_queue queue = getQueue()();
@@ -137,14 +137,14 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
(*dA)(), A.getOffset(), A.strides()[1], 1,
(*dT)(), tmp.getOffset() + MN * NB, NB, 0, queue);
- gpu_trsm(
- clblasLeft, clblasUpper,
- clblasConjTrans, clblasNonUnit,
- B.dims()[0], B.dims()[1],
- scalar<T>(1),
- (*dA)(), A.getOffset(), A.strides()[1],
- (*dB)(), B.getOffset(), B.strides()[1],
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasLeft, clblasUpper,
+ clblasConjTrans, clblasNonUnit,
+ B.dims()[0], B.dims()[1],
+ scalar<T>(1),
+ (*dA)(), A.getOffset(), A.strides()[1],
+ (*dB)(), B.getOffset(), B.strides()[1],
+ 1, &queue, 0, nullptr, &event));
magmablas_swapdblk<T>(MN - 1, NB,
(*dT)(), tmp.getOffset() + MN * NB, NB, 0,
@@ -225,19 +225,19 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
{
Array<T> AT = transpose<T>(A, true);
cl::Buffer* AT_buf = AT.get();
- gpu_trsm(
- clblasLeft, clblasLower, clblasConjTrans, clblasNonUnit,
- N, NRHS, scalar<T>(1),
- (*AT_buf)(), AT.getOffset(), AT.strides()[1],
- (*B_buf)(), B.getOffset(), B.strides()[1],
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasLeft, clblasLower, clblasConjTrans, clblasNonUnit,
+ N, NRHS, scalar<T>(1),
+ (*AT_buf)(), AT.getOffset(), AT.strides()[1],
+ (*B_buf)(), B.getOffset(), B.strides()[1],
+ 1, &queue, 0, nullptr, &event));
} else {
- gpu_trsm(
- clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit,
- N, NRHS, scalar<T>(1),
- (*A_buf)(), A.getOffset(), A.strides()[1],
- (*B_buf)(), B.getOffset(), B.strides()[1],
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit,
+ N, NRHS, scalar<T>(1),
+ (*A_buf)(), A.getOffset(), A.strides()[1],
+ (*B_buf)(), B.getOffset(), B.strides()[1],
+ 1, &queue, 0, nullptr, &event));
}
B.resetDims(dim4(N, K));
}
@@ -248,7 +248,7 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
template<typename T>
Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
{
- gpu_trsm_func<T> gpu_trsm;
+ gpu_blas_trsm_func<T> gpu_blas_trsm;
Array<T> B = copyArray<T>(b);
@@ -267,25 +267,25 @@ Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop o
Array<T> AT = transpose<T>(A, true);
cl::Buffer* AT_buf = AT.get();
- gpu_trsm(
- clblasLeft,
- clblasLower,
- clblasConjTrans,
- options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
- N, NRHS, scalar<T>(1),
- (*AT_buf)(), AT.getOffset(), AT.strides()[1],
- (*B_buf)(), B.getOffset(), B.strides()[1],
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasLeft,
+ clblasLower,
+ clblasConjTrans,
+ options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
+ N, NRHS, scalar<T>(1),
+ (*AT_buf)(), AT.getOffset(), AT.strides()[1],
+ (*B_buf)(), B.getOffset(), B.strides()[1],
+ 1, &queue, 0, nullptr, &event));
} else {
- gpu_trsm(
- clblasLeft,
- options & AF_MAT_LOWER ? clblasLower : clblasUpper,
- clblasNoTrans,
- options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
- N, NRHS, scalar<T>(1),
- (*A_buf)(), A.getOffset(), A.strides()[1],
- (*B_buf)(), B.getOffset(), B.strides()[1],
- 1, &queue, 0, nullptr, &event);
+ CLBLAS_CHECK(gpu_blas_trsm(
+ clblasLeft,
+ options & AF_MAT_LOWER ? clblasLower : clblasUpper,
+ clblasNoTrans,
+ options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
+ N, NRHS, scalar<T>(1),
+ (*A_buf)(), A.getOffset(), A.strides()[1],
+ (*B_buf)(), B.getOffset(), B.strides()[1],
+ 1, &queue, 0, nullptr, &event));
}
return B;
diff --git a/src/backend/opencl/svd.cpp b/src/backend/opencl/svd.cpp
index ca1378d..1467d9e 100644
--- a/src/backend/opencl/svd.cpp
+++ b/src/backend/opencl/svd.cpp
@@ -28,7 +28,7 @@ Tr calc_scale(Tr From, Tr To)
//FIXME: I am not sure this is correct, removing this for now
#if 0
//http://www.netlib.org/lapack/explore-3.1.1-html/dlascl.f.html
- cpu_lamch_func<Tr> cpu_lapack_lamch;
+ cpu_lapack_lamch_func<Tr> cpu_lapack_lamch;
Tr S = cpu_lapack_lamch('S');
Tr B = 1.0 / S;
@@ -79,10 +79,10 @@ void svd(Array<T > &arrU,
const int nb = magma_get_gebrd_nb<T>(n);
const int lwork = (m + n) * nb;
- cpu_lacpy_func<T> cpu_lapack_lacpy;
- cpu_bdsqr_work_func<T> cpu_lapack_bdsqr_work;
- cpu_ungbr_work_func<T> cpu_lapack_ungbr_work;
- cpu_lamch_func<Tr> cpu_lapack_lamch;
+ cpu_lapack_lacpy_func<T> cpu_lapack_lacpy;
+ cpu_lapack_bdsqr_work_func<T> cpu_lapack_bdsqr_work;
+ cpu_lapack_ungbr_work_func<T> cpu_lapack_ungbr_work;
+ cpu_lapack_lamch_func<Tr> cpu_lapack_lamch;
// Get machine constants
static const double eps = cpu_lapack_lamch('P');
@@ -144,17 +144,17 @@ void svd(Array<T > &arrU,
// and generate left bidiagonalizing vectors in U
// (CWorkspace: need 2*N + NCU, prefer 2*N + NCU*NB)
// (RWorkspace: 0)
- cpu_lapack_lacpy('L', m, n, &A[0], lda, &U[0], ldu);
+ LAPACKE_CHECK(cpu_lapack_lacpy('L', m, n, &A[0], lda, &U[0], ldu));
int ncu = m;
- cpu_lapack_ungbr_work('Q', m, ncu, n, &U[0], ldu, &tauq[0], &work[0], lwork);
+ LAPACKE_CHECK(cpu_lapack_ungbr_work('Q', m, ncu, n, &U[0], ldu, &tauq[0], &work[0], lwork));
// If right singular vectors desired in VT, copy result to
// VT and generate right bidiagonalizing vectors in VT
// (CWorkspace: need 3*N-1, prefer 2*N + (N-1)*NB)
// (RWorkspace: 0)
- cpu_lapack_lacpy('U', n, n, &A[0], lda, &VT[0], ldvt);
- cpu_lapack_ungbr_work('P', n, n, n, &VT[0], ldvt, &taup[0], &work[0], lwork);
+ LAPACKE_CHECK(cpu_lapack_lacpy('U', n, n, &A[0], lda, &VT[0], ldvt));
+ LAPACKE_CHECK(cpu_lapack_ungbr_work('P', n, n, n, &VT[0], ldvt, &taup[0], &work[0], lwork));
nru = m;
ncvt = n;
@@ -165,8 +165,9 @@ void svd(Array<T > &arrU,
// vectors in VT
// (CWorkspace: need 0)
// (RWorkspace: need BDSPAC)
- cpu_lapack_bdsqr_work('U', n, ncvt, nru, izero, &s0[0], &s1[0], &VT[0], ldvt, &U[0], ldu,
- &cdummy[0], ione, &work[0]);
+ LAPACKE_CHECK(cpu_lapack_bdsqr_work('U', n, ncvt, nru, izero,
+ &s0[0], &s1[0], &VT[0], ldvt, &U[0], ldu,
+ &cdummy[0], ione, &work[0]));
if (want_vectors) {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list