[arrayfire] 307/408: Adding proper error checking in magma

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:19 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 163ab3733748ccc52865285eecc33749f809dc57
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Fri Aug 21 16:21:47 2015 -0400

    Adding proper error checking in magma
---
 src/backend/opencl/magma/gebrd.cpp          |  37 ++++-----
 src/backend/opencl/magma/geqrf2.cpp         |  17 ++--
 src/backend/opencl/magma/geqrf3.cpp         |  19 +++--
 src/backend/opencl/magma/getrf.cpp          | 116 ++++++++++++++--------------
 src/backend/opencl/magma/getrs.cpp          |  30 +++----
 src/backend/opencl/magma/labrd.cpp          | 113 ++++++++++++++-------------
 src/backend/opencl/magma/larfb.cpp          | 110 +++++++++++++-------------
 src/backend/opencl/magma/magma_blas.h       |  26 +++----
 src/backend/opencl/magma/magma_cpu_blas.h   |  26 +++----
 src/backend/opencl/magma/magma_cpu_lapack.h |  65 +++++++++-------
 src/backend/opencl/magma/potrf.cpp          | 116 ++++++++++++++--------------
 src/backend/opencl/magma/ungqr.cpp          |  11 ++-
 src/backend/opencl/magma/unmqr.cpp          |  11 ++-
 src/backend/opencl/magma/unmqr2.cpp         |  10 +--
 src/backend/opencl/solve.cpp                |  80 +++++++++----------
 src/backend/opencl/svd.cpp                  |  23 +++---
 16 files changed, 412 insertions(+), 398 deletions(-)

diff --git a/src/backend/opencl/magma/gebrd.cpp b/src/backend/opencl/magma/gebrd.cpp
index e4df977..dbeeb1f 100644
--- a/src/backend/opencl/magma/gebrd.cpp
+++ b/src/backend/opencl/magma/gebrd.cpp
@@ -266,8 +266,8 @@ magma_gebrd_hybrid(
         magma_setmatrix<Ty>(m, n, a, lda, da, da_offset, ldda, queue);
     }
 
-    gpu_gemm_func<Ty> gpu_blas_gemm;
-    cpu_gebrd_work_func<Ty> cpu_lapack_gebrd_work;
+    gpu_blas_gemm_func<Ty> gpu_blas_gemm;
+    cpu_lapack_gebrd_work_func<Ty> cpu_lapack_gebrd_work;
 
     for (i=0; i< (minmn - nx); i += nb) {
         /*  Reduce rows and columns i:i+nb-1 to bidiagonal form and return
@@ -302,19 +302,19 @@ magma_gebrd_hybrid(
                             work  +               (ldwrkx+1)*nb, ldwrky,
                             dwork, dwork_offset + (ldwrkx+1)*nb, ldwrky, queue);
 
-        gpu_blas_gemm(clblasNoTrans, clblasConjTrans,
-                      nrow, ncol, nb,
-                      c_neg_one, dA(i+nb, i  ),      ldda,
-                      dwork, dwork_offset+(ldwrkx+1)*nb, ldwrky,
-                      c_one,     dA(i+nb, i+nb), ldda,
-                      1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_gemm(clblasNoTrans, clblasConjTrans,
+                                   nrow, ncol, nb,
+                                   c_neg_one, dA(i+nb, i  ),      ldda,
+                                   dwork, dwork_offset+(ldwrkx+1)*nb, ldwrky,
+                                   c_one,     dA(i+nb, i+nb), ldda,
+                                   1, &queue, 0, nullptr, &event));
 
-        gpu_blas_gemm(clblasNoTrans, clblasNoTrans,
-                      nrow, ncol, nb,
-                      c_neg_one, dwork, dwork_offset+nb, ldwrkx,
-                      dA(i,    i+nb), ldda,
-                      c_one,     dA(i+nb, i+nb), ldda,
-                      1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_gemm(clblasNoTrans, clblasNoTrans,
+                                   nrow, ncol, nb,
+                                   c_neg_one, dwork, dwork_offset+nb, ldwrkx,
+                                   dA(i,    i+nb), ldda,
+                                   c_one,     dA(i+nb, i+nb), ldda,
+                                   1, &queue, 0, nullptr, &event));
 
         /* Copy diagonal and off-diagonal elements of B back into A */
         if (m >= n) {
@@ -340,13 +340,14 @@ magma_gebrd_hybrid(
         magma_getmatrix<Ty>(nrow, ncol, dA(i, i), ldda, A(i, i), lda, queue);
     }
 
-    *info = cpu_lapack_gebrd_work(nrow, ncol,
-                                  A(i, i), lda, d+i, e+i,
-                                  tauq+i, taup+i, work, lwork);
+    LAPACKE_CHECK(cpu_lapack_gebrd_work(nrow, ncol,
+                                        A(i, i), lda, d+i, e+i,
+                                        tauq+i, taup+i, work, lwork));
     work[0] = magma_make<Ty>(lwkopt, 0.);
 
     magma_free(dwork);
-    return *info;
+    *info = 0;
+    return 0;
 } /* magma_zgebrd */
 
 #define INSTANTIATE(Ty)                                 \
diff --git a/src/backend/opencl/magma/geqrf2.cpp b/src/backend/opencl/magma/geqrf2.cpp
index 4041976..3191954 100644
--- a/src/backend/opencl/magma/geqrf2.cpp
+++ b/src/backend/opencl/magma/geqrf2.cpp
@@ -52,7 +52,6 @@
  **********************************************************************/
 
 #include "magma.h"
-#include "magma_blas.h"
 #include "magma_data.h"
 #include "magma_cpu_lapack.h"
 #include "magma_helper.h"
@@ -245,8 +244,8 @@ magma_geqrf2_gpu(
                                    0, lwork*sizeof(Ty),
                                    0, NULL, NULL, NULL);
 
-    cpu_geqrf_work_func<Ty> cpu_geqrf;
-    cpu_larft_func<Ty> cpu_larft;
+    cpu_lapack_geqrf_work_func<Ty> cpu_lapack_geqrf;
+    cpu_lapack_larft_func<Ty> cpu_lapack_larft;
 
     nbmin = 2;
     nx    = nb;
@@ -275,14 +274,14 @@ magma_geqrf2_gpu(
             }
 
             magma_queue_sync(queue[0]);
-            *info = cpu_geqrf( rows, ib, work(i), ldwork, tau+i, hwork, lhwork);
+            LAPACKE_CHECK(cpu_lapack_geqrf( rows, ib, work(i), ldwork, tau+i, hwork, lhwork));
 
             /* Form the triangular factor of the block reflector
                H = H(i) H(i+1) . . . H(i+ib-1) */
-            cpu_larft(
-                      *MagmaForwardStr, *MagmaColumnwiseStr,
-                      rows, ib,
-                      work(i), ldwork, tau+i, hwork, ib);
+            LAPACKE_CHECK(cpu_lapack_larft(
+                              *MagmaForwardStr, *MagmaColumnwiseStr,
+                              rows, ib,
+                              work(i), ldwork, tau+i, hwork, ib));
 
             panel_to_q<Ty>( MagmaUpper, ib, work(i), ldwork, hwork+ib*ib );
 
@@ -329,7 +328,7 @@ magma_geqrf2_gpu(
         magma_queue_sync(queue[1]);
 
         lhwork = lwork - rows*ib;
-        *info = cpu_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
+        LAPACKE_CHECK(cpu_lapack_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork));
 
         magma_setmatrix_async<Ty>(rows, ib, work, rows, dA(i, i), ldda, queue[1], NULL);
     }
diff --git a/src/backend/opencl/magma/geqrf3.cpp b/src/backend/opencl/magma/geqrf3.cpp
index 192bd45..8a6a05f 100644
--- a/src/backend/opencl/magma/geqrf3.cpp
+++ b/src/backend/opencl/magma/geqrf3.cpp
@@ -52,7 +52,6 @@
  **********************************************************************/
 
 #include "magma.h"
-#include "magma_blas.h"
 #include "magma_data.h"
 #include "magma_cpu_lapack.h"
 #include "magma_helper.h"
@@ -217,8 +216,8 @@ magma_geqrf3_gpu(
     ldwork = m;
     lddwork= n;
 
-    cpu_geqrf_work_func<Ty> cpu_geqrf;
-    cpu_larft_func<Ty> cpu_larft;
+    cpu_lapack_geqrf_work_func<Ty> cpu_lapack_geqrf;
+    cpu_lapack_larft_func<Ty> cpu_lapack_larft;
 
     if ( (nb > 1) && (nb < k) ) {
         /* Use blocked code initially */
@@ -244,15 +243,15 @@ magma_geqrf3_gpu(
             }
 
             magma_event_sync(event[1]);
-            *info = cpu_geqrf( rows, ib, work_ref(i), ldwork, tau+i, hwork, lhwork);
+            LAPACKE_CHECK(cpu_lapack_geqrf( rows, ib, work_ref(i), ldwork, tau+i, hwork, lhwork));
 
             /* Form the triangular factor of the block reflector
                H = H(i) H(i+1) . . . H(i+ib-1) */
-            cpu_larft(
-                      *MagmaForwardStr, *MagmaColumnwiseStr,
-                      rows, ib,
-                      work_ref(i), ldwork,
-                      tau+i, hwork, ib);
+            LAPACKE_CHECK(cpu_lapack_larft(
+                              *MagmaForwardStr, *MagmaColumnwiseStr,
+                              rows, ib,
+                              work_ref(i), ldwork,
+                              tau+i, hwork, ib));
 
             /* Put 0s in the upper triangular part of a panel (and 1s on the
                diagonal); copy the upper triangular in ut and invert it. */
@@ -296,7 +295,7 @@ magma_geqrf3_gpu(
         magma_getmatrix<Ty>( rows, ib, a_ref(i, i), ldda, work, rows, queue );
 
         lhwork = lwork - rows*ib;
-        *info = cpu_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork);
+        LAPACKE_CHECK(cpu_lapack_geqrf( rows, ib, work, rows, tau+i, work+ib*rows, lhwork));
 
         magma_setmatrix<Ty>( rows, ib, work, rows, a_ref(i, i), ldda, queue );
     }
diff --git a/src/backend/opencl/magma/getrf.cpp b/src/backend/opencl/magma/getrf.cpp
index b398afd..bd9c9d2 100644
--- a/src/backend/opencl/magma/getrf.cpp
+++ b/src/backend/opencl/magma/getrf.cpp
@@ -149,9 +149,9 @@ magma_int_t magma_getrf_gpu(
     if (m == 0 || n == 0)
         return *info;
 
-    gpu_gemm_func<Ty> gpu_gemm;
-    gpu_trsm_func<Ty> gpu_trsm;
-    cpu_getrf_func<Ty> cpu_getrf;
+    gpu_blas_gemm_func<Ty> gpu_blas_gemm;
+    gpu_blas_trsm_func<Ty> gpu_blas_trsm;
+    cpu_lapack_getrf_func<Ty> cpu_lapack_getrf;
 
     /* Function Body */
     mindim = std::min(m, n);
@@ -165,7 +165,7 @@ magma_int_t magma_getrf_gpu(
             return *info;
         }
         magma_getmatrix<Ty>(m, n, dA(0,0), ldda, work(0), m, queue);
-        cpu_getrf( m, n, work, m, ipiv);
+        LAPACKE_CHECK(cpu_lapack_getrf( m, n, work, m, ipiv));
         magma_setmatrix<Ty>(m, n, work(0), m, dA(0,0), ldda, queue);
         magma_free_cpu(work);
     }
@@ -219,29 +219,29 @@ magma_int_t magma_getrf_gpu(
             magma_getmatrix<Ty>(m-j*nb, nb, dAP(0,0), maxm, work(0), ldwork, queue);
 
             if (j > 0 && n > (j + 1) * nb) {
-                gpu_trsm(
-                         clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
-                         n - (j+1)*nb, nb,
-                         c_one,
-                         dAT(j-1,j-1), lddat,
-                         dAT(j-1,j+1), lddat,
-                         1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_trsm(
+                                 clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
+                                 n - (j+1)*nb, nb,
+                                 c_one,
+                                 dAT(j-1,j-1), lddat,
+                                 dAT(j-1,j+1), lddat,
+                                 1, &queue, 0, nullptr, &event));
 
                 if (m > j * nb)  {
-                    gpu_gemm( clblasNoTrans, clblasNoTrans,
-                         n-(j+1)*nb, m-j*nb, nb,
-                         c_neg_one,
-                         dAT(j-1,j+1), lddat,
-                         dAT(j,  j-1), lddat,
-                         c_one,
-                         dAT(j,  j+1), lddat,
-                         1, &queue, 0, nullptr, &event);
+                    CLBLAS_CHECK(gpu_blas_gemm( clblasNoTrans, clblasNoTrans,
+                                                n-(j+1)*nb, m-j*nb, nb,
+                                                c_neg_one,
+                                                dAT(j-1,j+1), lddat,
+                                                dAT(j,  j-1), lddat,
+                                                c_one,
+                                                dAT(j,  j+1), lddat,
+                                                1, &queue, 0, nullptr, &event));
                 }
             }
 
             // do the cpu part
             rows = m - j*nb;
-            cpu_getrf( rows, nb, work, ldwork, ipiv+j*nb);
+            LAPACKE_CHECK(cpu_lapack_getrf( rows, nb, work, ldwork, ipiv+j*nb));
             if (*info == 0 && iinfo > 0)
                 *info = iinfo + j*nb;
 
@@ -257,44 +257,44 @@ magma_int_t magma_getrf_gpu(
 
             // do the small non-parallel computations (next panel update)
             if (s > (j+1)) {
-                gpu_trsm(
-                         clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
-                         nb, nb,
-                         c_one,
-                         dAT(j, j  ), lddat,
-                         dAT(j, j+1), lddat,
-                         1, &queue, 0, nullptr, &event);
-
-
-                gpu_gemm( clblasNoTrans, clblasNoTrans,
-                         nb, m-(j+1)*nb, nb,
-                         c_neg_one,
-                         dAT(j,   j+1), lddat,
-                         dAT(j+1, j  ), lddat,
-                         c_one,
-                         dAT(j+1, j+1), lddat,
-                         1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_trsm(
+                                 clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
+                                 nb, nb,
+                                 c_one,
+                                 dAT(j, j  ), lddat,
+                                 dAT(j, j+1), lddat,
+                                 1, &queue, 0, nullptr, &event));
+
+
+                CLBLAS_CHECK(gpu_blas_gemm( clblasNoTrans, clblasNoTrans,
+                                            nb, m-(j+1)*nb, nb,
+                                            c_neg_one,
+                                            dAT(j,   j+1), lddat,
+                                            dAT(j+1, j  ), lddat,
+                                            c_one,
+                                            dAT(j+1, j+1), lddat,
+                                            1, &queue, 0, nullptr, &event));
             }
             else {
                 if (n > s * nb) {
-                    gpu_trsm(
-                             clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
-                             n-s*nb, nb,
-                             c_one,
-                             dAT(j, j  ), lddat,
-                             dAT(j, j+1), lddat,
-                             1, &queue, 0, nullptr, &event);
+                    CLBLAS_CHECK(gpu_blas_trsm(
+                                     clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
+                                     n-s*nb, nb,
+                                     c_one,
+                                     dAT(j, j  ), lddat,
+                                     dAT(j, j+1), lddat,
+                                     1, &queue, 0, nullptr, &event));
                 }
 
                 if ((n > (j+1) * nb) && (m > (j+1) * nb)) {
-                    gpu_gemm( clblasNoTrans, clblasNoTrans,
-                             n-(j+1)*nb, m-(j+1)*nb, nb,
-                             c_neg_one,
-                             dAT(j,   j+1), lddat,
-                             dAT(j+1, j  ), lddat,
-                             c_one,
-                             dAT(j+1, j+1), lddat,
-                             1, &queue, 0, nullptr, &event);
+                    CLBLAS_CHECK(gpu_blas_gemm( clblasNoTrans, clblasNoTrans,
+                                                n-(j+1)*nb, m-(j+1)*nb, nb,
+                                                c_neg_one,
+                                                dAT(j,   j+1), lddat,
+                                                dAT(j+1, j  ), lddat,
+                                                c_one,
+                                                dAT(j+1, j+1), lddat,
+                                                1, &queue, 0, nullptr, &event));
                 }
             }
         }
@@ -308,7 +308,7 @@ magma_int_t magma_getrf_gpu(
             magma_getmatrix<Ty>(rows, nb0, dAP(0,0), maxm, work(0), ldwork, queue);
 
             // do the cpu part
-            cpu_getrf( rows, nb0, work, ldwork, ipiv+s*nb);
+            LAPACKE_CHECK(cpu_lapack_getrf( rows, nb0, work, ldwork, ipiv+s*nb));
             if (*info == 0 && iinfo > 0)
                 *info = iinfo + s*nb;
 
@@ -322,11 +322,11 @@ magma_int_t magma_getrf_gpu(
             magmablas_transpose<Ty>(rows, nb0, dAP(0,0), maxm, dAT(s,s), lddat, queue);
 
             if (n > s * nb + nb0) {
-                gpu_trsm(
-                         clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
-                         n-s*nb-nb0, nb0,
-                         c_one, dAT(s,s),     lddat,
-                         dAT(s,s)+nb0, lddat, 1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_trsm(
+                                 clblasRight, clblasUpper, clblasNoTrans, clblasUnit,
+                                 n-s*nb-nb0, nb0,
+                                 c_one, dAT(s,s),     lddat,
+                                 dAT(s,s)+nb0, lddat, 1, &queue, 0, nullptr, &event));
             }
         }
 
diff --git a/src/backend/opencl/magma/getrs.cpp b/src/backend/opencl/magma/getrs.cpp
index 6ad943b..1dc106c 100644
--- a/src/backend/opencl/magma/getrs.cpp
+++ b/src/backend/opencl/magma/getrs.cpp
@@ -159,9 +159,9 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
     i1 = 1;
     i2 = n;
 
-    cpu_laswp_func<Ty> cpu_laswp;
-    gpu_trsm_func<Ty> gpu_trsm;
-    gpu_trsv_func<Ty> gpu_trsv;
+    cpu_lapack_laswp_func<Ty> cpu_lapack_laswp;
+    gpu_blas_trsm_func<Ty> gpu_blas_trsm;
+    gpu_blas_trsv_func<Ty> gpu_blas_trsv;
 
     cl_event event = NULL;
 
@@ -180,18 +180,18 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
 
         /* Solve A * X = B. */
         magma_getmatrix<Ty>( n, nrhs, dB, dB_offset, lddb, work, n, queue );
-        cpu_laswp( nrhs, work, n, i1, i2, ipiv, inc);
+        LAPACKE_CHECK(cpu_lapack_laswp( nrhs, work, n, i1, i2, ipiv, inc));
         magma_setmatrix<Ty>( n, nrhs, work, n, dB, dB_offset, lddb, queue );
         if ( nrhs == 1) {
-            gpu_trsv( clblasLower, clblasNoTrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
-            gpu_trsv( clblasUpper, clblasNoTrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
+            CLBLAS_CHECK(gpu_blas_trsv( clblasLower, clblasNoTrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event));
+            CLBLAS_CHECK(gpu_blas_trsv( clblasUpper, clblasNoTrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event));
         } else {
-            gpu_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+            CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
 
             if(cond) {
-                gpu_trsm( clblasLeft, clblasLower, clblasTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasLower, clblasTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
             } else {
-                gpu_trsm( clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
             }
         }
     } else {
@@ -199,18 +199,18 @@ magma_getrs_gpu(magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
 
         /* Solve A' * X = B. */
         if ( nrhs == 1) {
-            gpu_trsv( clblasUpper, cltrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
-            gpu_trsv( clblasLower, cltrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event);
+            CLBLAS_CHECK(gpu_blas_trsv( clblasUpper, cltrans, clblasNonUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event));
+            CLBLAS_CHECK(gpu_blas_trsv( clblasLower, cltrans, clblasUnit, n, dA, dA_offset, ldda, dB, dB_offset, 1, 1, &queue, 0, nullptr, &event));
         } else {
             if(cond) {
-                gpu_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasLower, clblasNoTrans, clblasNonUnit, n, nrhs, c_one, dAT, 0, n, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
             } else {
-                gpu_trsm( clblasLeft, clblasUpper, cltrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasUpper, cltrans, clblasNonUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
             }
-            gpu_trsm( clblasLeft, clblasLower, cltrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event);
+            CLBLAS_CHECK(gpu_blas_trsm( clblasLeft, clblasLower, cltrans, clblasUnit, n, nrhs, c_one, dA, dA_offset, ldda, dB, dB_offset, lddb, 1, &queue, 0, nullptr, &event));
         }
         magma_getmatrix<Ty>( n, nrhs, dB, dB_offset, lddb, work, n, queue );
-        cpu_laswp( nrhs, work, n, i1, i2, ipiv, inc);
+        LAPACKE_CHECK(cpu_lapack_laswp( nrhs, work, n, i1, i2, ipiv, inc));
         magma_setmatrix<Ty>( n, nrhs, work, n, dB, dB_offset, lddb, queue );
     }
 
diff --git a/src/backend/opencl/magma/labrd.cpp b/src/backend/opencl/magma/labrd.cpp
index ee7f120..2487190 100644
--- a/src/backend/opencl/magma/labrd.cpp
+++ b/src/backend/opencl/magma/labrd.cpp
@@ -244,12 +244,12 @@ magma_labrd_gpu(
 
     magma_event_t event = NULL;
 
-    gpu_gemv_func<Ty> gpu_blas_gemv;
-    cpu_gemv_func<Ty> cpu_blas_gemv;
-    cpu_scal_func<Ty> cpu_blas_scal;
-    cpu_axpy_func<Ty> cpu_blas_axpy;
-    cpu_larfg_func<Ty> cpu_lapack_larfg;
-    cpu_lacgv_func<Ty> cpu_lapack_lacgv;
+    gpu_blas_gemv_func<Ty> gpu_blas_gemv;
+    cpu_blas_gemv_func<Ty> cpu_blas_gemv;
+    cpu_blas_scal_func<Ty> cpu_blas_scal;
+    cpu_blas_axpy_func<Ty> cpu_blas_axpy;
+    cpu_lapack_larfg_func<Ty> cpu_lapack_larfg;
+    cpu_lapack_lacgv_func<Ty> cpu_lapack_lacgv;
 
     CBLAS_TRANSPOSE CblasTransParam = is_cplx ? CblasConjTrans : CblasTrans;
 
@@ -261,14 +261,14 @@ magma_labrd_gpu(
             i__3 = i__ - 1;
 
             if (is_cplx) {
-                cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy);
+                LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy));
             }
 
             cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &a[i__ + a_dim1], lda,
                           &y[i__+y_dim1], ldy, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], c__1);
 
             if (is_cplx) {
-                cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy);
+                LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy));
             }
 
             cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &x[i__ + x_dim1], ldx,
@@ -278,8 +278,11 @@ magma_labrd_gpu(
             alpha = a[i__ + i__ * a_dim1];
             i__2 = m - i__ + 1;
             i__3 = i__ + 1;
-            cpu_lapack_larfg(i__2, &alpha,
-                             &a[std::min(i__3,m) + i__ * a_dim1], c__1, &tauq[i__]);
+
+            LAPACKE_CHECK(cpu_lapack_larfg(i__2, &alpha,
+                                          &a[std::min(i__3,m) + i__ * a_dim1],
+                                          c__1, &tauq[i__]));
+
             d[i__] = magma_real<Ty>(alpha);
             if (i__ < n) {
                 a[i__ + i__ * a_dim1] = c_one;
@@ -294,11 +297,11 @@ magma_labrd_gpu(
                                     da, da_offset + (i__-1)+(i__-1)* (ldda), 1,
                                     queue);
                 // 2. Multiply ---------------------------------------------
-                gpu_blas_gemv(clblasConjTrans, i__2, i__3, c_one,
-                              da, da_offset + (i__-1) + ((i__-1) + 1) * (ldda), ldda,
-                              da, da_offset + (i__-1) + (i__-1) * (ldda), c__1, c_zero,
-                              dy, dy_offset + i__ + 1 + i__ * y_dim1, c__1,
-                              1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_gemv(clblasConjTrans, i__2, i__3, c_one,
+                                           da, da_offset + (i__-1) + ((i__-1) + 1) * (ldda), ldda,
+                                           da, da_offset + (i__-1) + (i__-1) * (ldda), c__1, c_zero,
+                                           dy, dy_offset + i__ + 1 + i__ * y_dim1, c__1,
+                                           1, &queue, 0, nullptr, &event));
 
                 // 3. Put the result back ----------------------------------
                 magma_getmatrix_async<Ty>(i__3, 1,
@@ -341,8 +344,8 @@ magma_labrd_gpu(
                 /* Update A(i,i+1:n) */
                 i__2 = n - i__;
                 if (is_cplx) {
-                    cpu_lapack_lacgv(i__2, &a[i__+(i__+1)*a_dim1], lda);
-                    cpu_lapack_lacgv(i__,  &a[i__+a_dim1], lda);
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__+(i__+1)*a_dim1], lda));
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__,  &a[i__+a_dim1], lda));
                 }
 
                 cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one),
@@ -352,15 +355,15 @@ magma_labrd_gpu(
                 i__3 = n - i__;
 
                 if (is_cplx) {
-                    cpu_lapack_lacgv(i__,  &a[i__+a_dim1], lda);
-                    cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx);
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__,  &a[i__+a_dim1], lda));
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx));
                 }
 
                 cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one), &a[(i__ + 1) *
                                                                               a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, cblas_scalar(&c_one), &a[
                                                                                   i__ + (i__ + 1) * a_dim1], lda);
                 if (is_cplx) {
-                    cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx);
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx));
                 }
 
                 /* Generate reflection P(i) to annihilate A(i,i+2:n) */
@@ -368,8 +371,9 @@ magma_labrd_gpu(
                 /* Computing MIN */
                 i__3 = i__ + 2;
                 alpha = a[i__ + (i__ + 1) * a_dim1];
-                cpu_lapack_larfg(i__2, &alpha, &a[i__ + std::min(
-                                         i__3,n) * a_dim1], lda, &taup[i__]);
+                LAPACKE_CHECK(cpu_lapack_larfg(i__2, &alpha,
+                                              &a[i__ + std::min(i__3,n) * a_dim1],
+                                              lda, &taup[i__]));
                 e[i__] = magma_real<Ty>(alpha);
                 a[i__ + (i__ + 1) * a_dim1] = c_one;
 
@@ -384,12 +388,12 @@ magma_labrd_gpu(
                 // 2. Multiply ---------------------------------------------
                 //magma_zcopy(i__3, da+(i__-1)+((i__-1)+1)*(ldda), ldda,
                 //            dy + 1 + lddy, 1);
-                gpu_blas_gemv(clblasNoTrans, i__2, i__3, c_one,
-                              da, da_offset + (i__-1)+1+ ((i__-1)+1) * (ldda), ldda,
-                              da, da_offset + (i__-1) +  ((i__-1)+1) * (ldda), ldda,
-                              //dy + 1 + lddy, 1,
-                              c_zero, dx, dx_offset + i__ + 1 + i__ * x_dim1, c__1,
-                              1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_gemv(clblasNoTrans, i__2, i__3, c_one,
+                                           da, da_offset + (i__-1)+1+ ((i__-1)+1) * (ldda), ldda,
+                                           da, da_offset + (i__-1) +  ((i__-1)+1) * (ldda), ldda,
+                                           //dy + 1 + lddy, 1,
+                                           c_zero, dx, dx_offset + i__ + 1 + i__ * x_dim1, c__1,
+                                           1, &queue, 0, nullptr, &event));
 
                 // 3. Put the result back ----------------------------------
                 magma_getmatrix_async<Ty>(i__2, 1,
@@ -430,7 +434,7 @@ magma_labrd_gpu(
 
                 if (is_cplx) {
                     i__2 = n - i__;
-                    cpu_lapack_lacgv(i__2,  &a[i__+(i__+1)*a_dim1], lda);
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__2,  &a[i__+(i__+1)*a_dim1], lda));
                     // 4. Send the block reflector  A(i+1:m,i) to the GPU after ZLACGV()
                     magma_setvector<Ty>(i__2,
                                         a + i__   + (i__   +1)* a_dim1, lda,
@@ -448,21 +452,21 @@ magma_labrd_gpu(
             i__2 = n - i__ + 1;
             i__3 = i__ - 1;
             if (is_cplx) {
-                cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda);
-                cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda);
+                LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda));
+                LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda));
             }
             cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &y[i__ + y_dim1], ldy,
                           &a[i__ + a_dim1], lda, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], lda);
             i__2 = i__ - 1;
             if (is_cplx) {
-                cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda);
-                cpu_lapack_lacgv(i__3, &x[i__ + x_dim1], ldx);
+                LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda));
+                LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &x[i__ + x_dim1], ldx));
             }
             i__3 = n - i__ + 1;
             cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one), &a[i__ * a_dim1 + 1],
                           lda, &x[i__ + x_dim1], ldx, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], lda);
             if (is_cplx) {
-                cpu_lapack_lacgv(i__2, &x[i__ + x_dim1], ldx);
+                LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__ + x_dim1], ldx));
             }
 
             /* Generate reflection P(i) to annihilate A(i,i+1:n) */
@@ -470,8 +474,8 @@ magma_labrd_gpu(
             /* Computing MIN */
             i__3 = i__ + 1;
             alpha = a[i__ + i__ * a_dim1];
-            cpu_lapack_larfg(i__2, &alpha,
-                             &a[i__ + std::min(i__3,n) * a_dim1], lda, &taup[i__]);
+            LAPACKE_CHECK(cpu_lapack_larfg(i__2, &alpha,
+                                           &a[i__ + std::min(i__3,n) * a_dim1], lda, &taup[i__]));
             d[i__] = magma_real<Ty>(alpha);
             if (i__ < m) {
                 a[i__ + i__ * a_dim1] = c_one;
@@ -489,13 +493,13 @@ magma_labrd_gpu(
                 // 2. Multiply ---------------------------------------------
                 //magma_zcopy(i__3, da+(i__-1)+(i__-1)*(ldda), ldda,
                 //            dy + 1 + lddy, 1);
-                gpu_blas_gemv(clblasNoTrans, i__2, i__3, c_one,
-                              da, da_offset + (i__-1)+1 + (i__-1) * ldda, ldda,
-                              da, da_offset + (i__-1)   + (i__-1) * ldda, ldda,
-                              // dy + 1 + lddy, 1,
-                              c_zero,
-                              dx, dx_offset + i__ + 1 + i__ * x_dim1, c__1,
-                              1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_gemv(clblasNoTrans, i__2, i__3, c_one,
+                                           da, da_offset + (i__-1)+1 + (i__-1) * ldda, ldda,
+                                           da, da_offset + (i__-1)   + (i__-1) * ldda, ldda,
+                                           // dy + 1 + lddy, 1,
+                                           c_zero,
+                                           dx, dx_offset + i__ + 1 + i__ * x_dim1, c__1,
+                                           1, &queue, 0, nullptr, &event));
 
 
                 // 3. Put the result back ----------------------------------
@@ -538,7 +542,7 @@ magma_labrd_gpu(
                 i__2 = n - i__ + 1;
 
                 if (is_cplx) {
-                    cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda);
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda));
                     magma_setvector<Ty>(i__2,
                                         a + i__   + (i__ )* a_dim1, lda,
                                         da, da_offset + (i__-1)+ (i__-1)*(ldda), ldda,
@@ -550,7 +554,7 @@ magma_labrd_gpu(
                 i__3 = i__ - 1;
 
                 if (is_cplx) {
-                    cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy);
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy));
                 }
 
                 cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
@@ -558,7 +562,7 @@ magma_labrd_gpu(
                               &a[i__ + 1 + i__ * a_dim1], c__1);
                 i__2 = m - i__;
                 if (is_cplx) {
-                    cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy);
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy));
                 }
                 cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one),
                               &x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], c__1, cblas_scalar(&c_one),
@@ -568,8 +572,9 @@ magma_labrd_gpu(
                 i__2 = m - i__;
                 i__3 = i__ + 2;
                 alpha = a[i__ + 1 + i__ * a_dim1];
-                cpu_lapack_larfg(i__2, &alpha,
-                                 &a[std::min(i__3,m) + i__ * a_dim1], c__1, &tauq[i__]);
+                LAPACKE_CHECK(cpu_lapack_larfg(i__2, &alpha,
+                                               &a[std::min(i__3,m) + i__ * a_dim1],
+                                               c__1, &tauq[i__]));
                 e[i__] = magma_real<Ty>(alpha);
                 a[i__ + 1 + i__ * a_dim1] = c_one;
 
@@ -583,11 +588,11 @@ magma_labrd_gpu(
                                     da, da_offset + (i__-1)+1+ (i__-1)*(ldda),  1,
                                     queue);
                 // 2. Multiply ---------------------------------------------
-                gpu_blas_gemv(clblasConjTrans, i__2, i__3, c_one,
-                              da, da_offset + (i__-1)+1+ ((i__-1)+1) * ldda, ldda,
-                              da, da_offset + (i__-1)+1+  (i__-1)    * ldda, c__1,
-                              c_zero, dy, dy_offset + i__ + 1 + i__ * y_dim1, c__1,
-                              1, &queue, 0, nullptr, &event);
+                CLBLAS_CHECK(gpu_blas_gemv(clblasConjTrans, i__2, i__3, c_one,
+                                           da, da_offset + (i__-1)+1+ ((i__-1)+1) * ldda, ldda,
+                                           da, da_offset + (i__-1)+1+  (i__-1)    * ldda, c__1,
+                                           c_zero, dy, dy_offset + i__ + 1 + i__ * y_dim1, c__1,
+                                           1, &queue, 0, nullptr, &event));
 
                 // 3. Put the result back ----------------------------------
                 magma_getmatrix_async<Ty>(i__3, 1,
@@ -628,7 +633,7 @@ magma_labrd_gpu(
             else {
                 if (is_cplx) {
                     i__2 = n - i__ + 1;
-                    cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda);
+                    LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda));
                     magma_setvector<Ty>(i__2,
                                         a + i__   + (i__ )* a_dim1, lda,
                                         da, da_offset + (i__-1)+ (i__-1)*(ldda), ldda,
diff --git a/src/backend/opencl/magma/larfb.cpp b/src/backend/opencl/magma/larfb.cpp
index 5b188f4..20d2902 100644
--- a/src/backend/opencl/magma/larfb.cpp
+++ b/src/backend/opencl/magma/larfb.cpp
@@ -254,8 +254,8 @@ magma_larfb_gpu(
         transV   = clblasNoTrans;
     }
 
-    gpu_gemm_func<Ty> gpu_gemm;
-    gpu_trmm_func<Ty> gpu_trmm;
+    gpu_blas_gemm_func<Ty> gpu_blas_gemm;
+    gpu_blas_trmm_func<Ty> gpu_blas_trmm;
 
     cl_event event = NULL;
 
@@ -264,73 +264,73 @@ magma_larfb_gpu(
         // Comments assume H C. When forming H^H C, T gets transposed via transt.
 
         // W = C^H V
-        gpu_gemm(
-                 transType, notransV,
-                 n, k, m,
-                 c_one,
-                 dC(0,0),  lddc,
-                 dV(0,0),  lddv,
-                 c_zero,
-                 dwork(0), ldwork,
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_gemm(
+                         transType, notransV,
+                         n, k, m,
+                         c_one,
+                         dC(0,0),  lddc,
+                         dV(0,0),  lddv,
+                         c_zero,
+                         dwork(0), ldwork,
+                         1, &queue, 0, nullptr, &event));
 
         // W = W T^H = C^H V T^H
-        gpu_trmm(
-                 clblasRight,
-                 uplo, transt, clblasNonUnit,
-                 n, k,
-                 c_one,
-                 dT(0,0) , lddt,
-                 dwork(0), ldwork,
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_trmm(
+                         clblasRight,
+                         uplo, transt, clblasNonUnit,
+                         n, k,
+                         c_one,
+                         dT(0,0) , lddt,
+                         dwork(0), ldwork,
+                         1, &queue, 0, nullptr, &event));
 
         // C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C
-        gpu_gemm(
-                 notransV, transType,
-                 m, n, k,
-                 c_neg_one,
-                 dV(0,0),  lddv,
-                 dwork(0), ldwork,
-                 c_one,
-                 dC(0,0),  lddc,
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_gemm(
+                         notransV, transType,
+                         m, n, k,
+                         c_neg_one,
+                         dV(0,0),  lddv,
+                         dwork(0), ldwork,
+                         c_one,
+                         dC(0,0),  lddc,
+                         1, &queue, 0, nullptr, &event));
     }
     else {
         // Form C H or C H^H
         // Comments assume C H. When forming C H^H, T gets transposed via trans.
 
         // W = C V
-        gpu_gemm(
-                 clblasNoTrans, notransV,
-                 m, k, n,
-                 c_one,
-                 dC(0,0),  lddc,
-                 dV(0,0),  lddv,
-                 c_zero,
-                 dwork(0), ldwork,
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_gemm(
+                         clblasNoTrans, notransV,
+                         m, k, n,
+                         c_one,
+                         dC(0,0),  lddc,
+                         dV(0,0),  lddv,
+                         c_zero,
+                         dwork(0), ldwork,
+                         1, &queue, 0, nullptr, &event));
 
         // W = W T = C V T
-        gpu_trmm(
-                 clblasRight, uplo,
-                 cltrans,
-                 clblasNonUnit,
-                 m, k,
-                 c_one,
-                 dT(0,0),  lddt,
-                 dwork(0), ldwork,
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_trmm(
+                         clblasRight, uplo,
+                         cltrans,
+                         clblasNonUnit,
+                         m, k,
+                         c_one,
+                         dT(0,0),  lddt,
+                         dwork(0), ldwork,
+                         1, &queue, 0, nullptr, &event));
 
         // C = C - W V^H = C - C V T V^H = C (I - V T V^H) = C H
-        gpu_gemm(
-                 clblasNoTrans, transV,
-                 m, n, k,
-                 c_neg_one,
-                 dwork(0), ldwork,
-                 dV(0,0),  lddv,
-                 c_one,
-                 dC(0,0),  lddc,
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_gemm(
+                         clblasNoTrans, transV,
+                         m, n, k,
+                         c_neg_one,
+                         dwork(0), ldwork,
+                         dV(0,0),  lddv,
+                         c_one,
+                         dC(0,0),  lddc,
+                         1, &queue, 0, nullptr, &event));
     }
 
     return info;
diff --git a/src/backend/opencl/magma/magma_blas.h b/src/backend/opencl/magma/magma_blas.h
index 44ebc03..8e2565b 100644
--- a/src/backend/opencl/magma/magma_blas.h
+++ b/src/backend/opencl/magma/magma_blas.h
@@ -24,19 +24,19 @@ using opencl::cdouble;
 
 #define BLAS_FUNC_DEF(NAME)                     \
     template<typename T>                        \
-    struct gpu_##NAME##_func;
-
-#define BLAS_FUNC(NAME, TYPE, PREFIX)                               \
-    template<>                                                      \
-    struct gpu_##NAME##_func<TYPE>                                  \
-    {                                                               \
-        template<typename... Args>                                  \
-            void                                                    \
-            operator() (Args... args)                               \
-        {                                                           \
-            CLBLAS_CHECK(clblas##PREFIX##NAME(clblasColumnMajor,    \
-                                              args...));            \
-        }                                                           \
+    struct gpu_blas_##NAME##_func;
+
+#define BLAS_FUNC(NAME, TYPE, PREFIX)                       \
+    template<>                                              \
+    struct gpu_blas_##NAME##_func<TYPE>                     \
+    {                                                       \
+        template<typename... Args>                          \
+            clblasStatus                                    \
+            operator() (Args... args)                       \
+        {                                                   \
+            return clblas##PREFIX##NAME(clblasColumnMajor,  \
+                                        args...);           \
+        }                                                   \
     };
 
 #define BLAS_FUNC_DECL(NAME)                    \
diff --git a/src/backend/opencl/magma/magma_cpu_blas.h b/src/backend/opencl/magma/magma_cpu_blas.h
index f5df93d..e5f7184 100644
--- a/src/backend/opencl/magma/magma_cpu_blas.h
+++ b/src/backend/opencl/magma/magma_cpu_blas.h
@@ -9,7 +9,7 @@
 
 #ifndef MAGMA_CPU_BLAS
 #define MAGMA_CPU_BLAS
-
+#include <err_common.hpp>
 #include "magma_types.h"
 
 #ifdef __APPLE__
@@ -38,11 +38,11 @@ typedef int blasint;
 
 #define CPU_BLAS_FUNC_DEF(NAME)                 \
     template<typename T>                        \
-    struct cpu_##NAME##_func;
+    struct cpu_blas_##NAME##_func;
 
 #define CPU_BLAS_FUNC1(NAME, TYPE, X)                       \
     template<>                                              \
-    struct cpu_##NAME##_func<TYPE>                          \
+    struct cpu_blas_##NAME##_func<TYPE>                     \
     {                                                       \
         template<typename... Args>                          \
             void                                            \
@@ -50,24 +50,24 @@ typedef int blasint;
         { return cblas_##X##NAME(CblasColMajor, args...); } \
     };
 
-#define CPU_BLAS_FUNC2(NAME, TYPE, X)                       \
-    template<>                                              \
-    struct cpu_##NAME##_func<TYPE>                          \
-    {                                                       \
-        template<typename... Args>                          \
-            void                                            \
-            operator() (Args... args)                       \
-        { return cblas_##X##NAME(args...); }                \
+#define CPU_BLAS_FUNC2(NAME, TYPE, X)           \
+    template<>                                  \
+    struct cpu_blas_##NAME##_func<TYPE>         \
+    {                                           \
+        template<typename... Args>              \
+            void                                \
+            operator() (Args... args)           \
+        { return cblas_##X##NAME(args...); }    \
     };
 
-#define CPU_BLAS_DECL1(NAME)                         \
+#define CPU_BLAS_DECL1(NAME)                        \
     CPU_BLAS_FUNC_DEF(NAME)                         \
     CPU_BLAS_FUNC1(NAME, float,      s)             \
     CPU_BLAS_FUNC1(NAME, double,     d)             \
     CPU_BLAS_FUNC1(NAME, magmaFloatComplex,     c)  \
     CPU_BLAS_FUNC1(NAME, magmaDoubleComplex,    z)  \
 
-#define CPU_BLAS_DECL2(NAME)                         \
+#define CPU_BLAS_DECL2(NAME)                        \
     CPU_BLAS_FUNC_DEF(NAME)                         \
     CPU_BLAS_FUNC2(NAME, float,      s)             \
     CPU_BLAS_FUNC2(NAME, double,     d)             \
diff --git a/src/backend/opencl/magma/magma_cpu_lapack.h b/src/backend/opencl/magma/magma_cpu_lapack.h
index c051306..431923c 100644
--- a/src/backend/opencl/magma/magma_cpu_lapack.h
+++ b/src/backend/opencl/magma/magma_cpu_lapack.h
@@ -10,6 +10,7 @@
 #ifndef MAGMA_CPU_LAPACK
 #define MAGMA_CPU_LAPACK
 
+#include <err_common.hpp>
 #include "magma_types.h"
 
 #define LAPACKE_sunmqr_work(...) LAPACKE_sormqr_work(__VA_ARGS__)
@@ -42,41 +43,51 @@ int LAPACKE_dlacgv(Args... args) { return 0; }
     #endif  // MKL/NETLIB
 #endif  //APPLE
 
+#define LAPACKE_CHECK(fn) do {                  \
+        int __info = fn;                        \
+        if (__info != 0) {                      \
+            char lapacke_st_msg[32];            \
+            snprintf(lapacke_st_msg,            \
+                     sizeof(lapacke_st_msg),    \
+                     "LAPACKE Error (%d)",      \
+                     (int)(__info));            \
+            AF_ERROR(lapacke_st_msg,            \
+                     AF_ERR_INTERNAL);          \
+        }                                       \
+    } while(0)
+
 #define CPU_LAPACK_FUNC_DEF(NAME)               \
     template<typename T>                        \
-    struct cpu_##NAME##_func;
-
-#define CPU_LAPACK_FUNC1(NAME, TYPE, X)                                 \
-    template<>                                                          \
-    struct cpu_##NAME##_func<TYPE>                                      \
-    {                                                                   \
-        template<typename... Args>                                      \
-            int                                                         \
-            operator() (Args... args)                                   \
-        {                                                               \
-            int err = LAPACK_NAME(X##NAME)(LAPACK_COL_MAJOR, args...);  \
-            if (err != 0) AF_ERROR("Error in "#NAME, AF_ERR_INTERNAL);  \
-            return err;                                                 \
-        }                                                               \
+    struct cpu_lapack_##NAME##_func;
+
+#define CPU_LAPACK_FUNC1(NAME, TYPE, X)                     \
+    template<>                                              \
+    struct cpu_lapack_##NAME##_func<TYPE>                   \
+    {                                                       \
+        template<typename... Args>                          \
+            int                                             \
+            operator() (Args... args)                       \
+        {                                                   \
+            return LAPACK_NAME(X##NAME)(LAPACK_COL_MAJOR,   \
+                                        args...);           \
+        }                                                   \
     };
 
-#define CPU_LAPACK_FUNC2(NAME, TYPE, X)                                 \
-    template<>                                                          \
-    struct cpu_##NAME##_func<TYPE>                                      \
-    {                                                                   \
-        template<typename... Args>                                      \
-            int                                                         \
-            operator() (Args... args)                                   \
-        {                                                               \
-            int err = LAPACK_NAME(X##NAME)(args...);                    \
-            if (err != 0) AF_ERROR("Error in "#NAME, AF_ERR_INTERNAL);  \
-            return err;                                                 \
-        }                                                               \
+#define CPU_LAPACK_FUNC2(NAME, TYPE, X)             \
+    template<>                                      \
+    struct cpu_lapack_##NAME##_func<TYPE>           \
+    {                                               \
+        template<typename... Args>                  \
+            int                                     \
+            operator() (Args... args)               \
+        {                                           \
+            return LAPACK_NAME(X##NAME)(args...);   \
+        }                                           \
     };
 
 #define CPU_LAPACK_FUNC3(NAME, TYPE, X)             \
     template<>                                      \
-    struct cpu_##NAME##_func<TYPE>                  \
+    struct cpu_lapack_##NAME##_func<TYPE>           \
     {                                               \
         template<typename... Args>                  \
             double                                  \
diff --git a/src/backend/opencl/magma/potrf.cpp b/src/backend/opencl/magma/potrf.cpp
index ddc4f46..d048ed4 100644
--- a/src/backend/opencl/magma/potrf.cpp
+++ b/src/backend/opencl/magma/potrf.cpp
@@ -151,10 +151,10 @@ magma_int_t magma_potrf_gpu(
 
     nb = magma_get_potrf_nb<Ty>(n);
 
-    gpu_gemm_func<Ty> gpu_gemm;
-    gpu_trsm_func<Ty> gpu_trsm;
-    gpu_herk_func<Ty> gpu_herk;
-    cpu_potrf_func<Ty> cpu_potrf;
+    gpu_blas_gemm_func<Ty> gpu_blas_gemm;
+    gpu_blas_trsm_func<Ty> gpu_blas_trsm;
+    gpu_blas_herk_func<Ty> gpu_blas_herk;
+    cpu_lapack_potrf_func<Ty> cpu_lapack_potrf;
 
 
     err = magma_malloc_cpu<Ty>( &work, nb*nb);
@@ -170,9 +170,9 @@ magma_int_t magma_potrf_gpu(
         // use unblocked code
         magma_getmatrix<Ty>(n, n, dA, dA_offset, ldda, work, n, queue);
 
-        cpu_potrf(
-                  uplo == MagmaUpper ? *MagmaUpperStr : *MagmaLowerStr,
-                  n, work, n);
+        LAPACKE_CHECK(cpu_lapack_potrf(
+                          uplo == MagmaUpper ? *MagmaUpperStr : *MagmaLowerStr,
+                          n, work, n));
 
         magma_setmatrix<Ty>(n, n, work, n, dA, dA_offset, ldda, queue);
     }
@@ -185,14 +185,14 @@ magma_int_t magma_potrf_gpu(
                 // apply all previous updates to diagonal block
                 jb = std::min(nb, n-j);
                 if (j > 0) {
-                    gpu_herk(
-                             clblasUpper, transType,
-                             jb, j,
-                             m_one,
-                             dA(0,j), ldda,
-                             one,
-                             dA(j,j), ldda,
-                             1, &queue, 0, nullptr, &blas_event);
+                    CLBLAS_CHECK(gpu_blas_herk(
+                                     clblasUpper, transType,
+                                     jb, j,
+                                     m_one,
+                                     dA(0,j), ldda,
+                                     one,
+                                     dA(j,j), ldda,
+                                     1, &queue, 0, nullptr, &blas_event));
                 }
 
                 // start asynchronous data transfer
@@ -200,21 +200,21 @@ magma_int_t magma_potrf_gpu(
 
                 // apply all previous updates to block row right of diagonal block
                 if (j+jb < n) {
-                    gpu_gemm(
-                             transType, clblasNoTrans,
-                             jb, n-j-jb, j,
-                             mz_one,
-                             dA(0, j   ), ldda,
-                             dA(0, j+jb), ldda,
-                             z_one,
-                             dA(j, j+jb), ldda,
-                             1, &queue, 0, nullptr, &blas_event);
+                    CLBLAS_CHECK(gpu_blas_gemm(
+                                     transType, clblasNoTrans,
+                                     jb, n-j-jb, j,
+                                     mz_one,
+                                     dA(0, j   ), ldda,
+                                     dA(0, j+jb), ldda,
+                                     z_one,
+                                     dA(j, j+jb), ldda,
+                                     1, &queue, 0, nullptr, &blas_event));
                 }
 
                 // simultaneous with above zgemm, transfer data, factor
                 // diagonal block on CPU, and test for positive definiteness
                 magma_event_sync(event);
-                *info =cpu_potrf( *MagmaUpperStr, jb, work, jb);
+                LAPACKE_CHECK(cpu_lapack_potrf( *MagmaUpperStr, jb, work, jb));
 
                 if (*info != 0) {
                     assert(*info > 0);
@@ -227,14 +227,14 @@ magma_int_t magma_potrf_gpu(
                 // apply diagonal block to block row right of diagonal block
                 if (j+jb < n) {
                     magma_event_sync(event);
-                    gpu_trsm(
-                             clblasLeft, clblasUpper,
-                             transType, clblasNonUnit,
-                             jb, n-j-jb,
-                             z_one,
-                             dA(j, j   ), ldda,
-                             dA(j, j+jb), ldda,
-                             1, &queue, 0, nullptr, &blas_event);
+                    CLBLAS_CHECK(gpu_blas_trsm(
+                                     clblasLeft, clblasUpper,
+                                     transType, clblasNonUnit,
+                                     jb, n-j-jb,
+                                     z_one,
+                                     dA(j, j   ), ldda,
+                                     dA(j, j+jb), ldda,
+                                     1, &queue, 0, nullptr, &blas_event));
                 }
             }
         }
@@ -246,13 +246,13 @@ magma_int_t magma_potrf_gpu(
                 // apply all previous updates to diagonal block
                 jb = std::min(nb, n-j);
                 if (j>0) {
-                    gpu_herk(
-                             clblasLower, clblasNoTrans, jb, j,
-                             m_one,
-                             dA(j, 0), ldda,
-                             one,
-                             dA(j, j), ldda,
-                             1, &queue, 0, nullptr, &blas_event);
+                    CLBLAS_CHECK(gpu_blas_herk(
+                                     clblasLower, clblasNoTrans, jb, j,
+                                     m_one,
+                                     dA(j, 0), ldda,
+                                     one,
+                                     dA(j, j), ldda,
+                                     1, &queue, 0, nullptr, &blas_event));
                 }
 
                 // start asynchronous data transfer
@@ -260,22 +260,22 @@ magma_int_t magma_potrf_gpu(
 
                 // apply all previous updates to block column below diagonal block
                 if (j+jb < n) {
-                    gpu_gemm(
-                             clblasNoTrans, transType,
-                             n-j-jb, jb, j,
-                             mz_one,
-                             dA(j+jb, 0), ldda,
-                             dA(j,    0), ldda,
-                             z_one,
-                             dA(j+jb, j), ldda,
-                             1, &queue, 0, nullptr, &blas_event);
+                    CLBLAS_CHECK(gpu_blas_gemm(
+                                     clblasNoTrans, transType,
+                                     n-j-jb, jb, j,
+                                     mz_one,
+                                     dA(j+jb, 0), ldda,
+                                     dA(j,    0), ldda,
+                                     z_one,
+                                     dA(j+jb, j), ldda,
+                                     1, &queue, 0, nullptr, &blas_event));
                 }
 
                 // simultaneous with above zgemm, transfer data, factor
                 // diagonal block on CPU, and test for positive definiteness
                 magma_event_sync(event);
-                *info = cpu_potrf(
-                                  *MagmaLowerStr, jb, work, jb);
+                LAPACKE_CHECK(cpu_lapack_potrf(
+                                  *MagmaLowerStr, jb, work, jb));
                 if (*info != 0) {
                     assert(*info > 0);
                     *info += j;
@@ -286,13 +286,13 @@ magma_int_t magma_potrf_gpu(
                 // apply diagonal block to block column below diagonal
                 if (j+jb < n) {
                     magma_event_sync(event);
-                    gpu_trsm(
-                             clblasRight, clblasLower, transType, clblasNonUnit,
-                             n-j-jb, jb,
-                             z_one,
-                             dA(j   , j), ldda,
-                             dA(j+jb, j), ldda,
-                             1, &queue, 0, nullptr, &blas_event);
+                    CLBLAS_CHECK(gpu_blas_trsm(
+                                     clblasRight, clblasLower, transType, clblasNonUnit,
+                                     n-j-jb, jb,
+                                     z_one,
+                                     dA(j   , j), ldda,
+                                     dA(j+jb, j), ldda,
+                                     1, &queue, 0, nullptr, &blas_event));
                 }
             }
         }
diff --git a/src/backend/opencl/magma/ungqr.cpp b/src/backend/opencl/magma/ungqr.cpp
index 49a120a..5ea05ac 100644
--- a/src/backend/opencl/magma/ungqr.cpp
+++ b/src/backend/opencl/magma/ungqr.cpp
@@ -52,7 +52,6 @@
  **********************************************************************/
 
 #include "magma.h"
-#include "magma_blas.h"
 #include "magma_data.h"
 #include "magma_cpu_lapack.h"
 #include "magma_helper.h"
@@ -137,7 +136,7 @@ magma_ungqr_gpu(
     cl_mem dW;
     magma_malloc<Ty>(&dW, (((n+31)/32)*32)*nb);
 
-    cpu_ungqr_work_func<Ty> cpu_ungqr;
+    cpu_lapack_ungqr_work_func<Ty> cpu_lapack_ungqr;
 
     // Use unblocked code for the last or only block.
     if (kk < n) {
@@ -147,10 +146,10 @@ magma_ungqr_gpu(
         magma_getmatrix<Ty>(m_kk, k_kk,
                             dA(kk, kk), ldda, panel, m_kk, queue);
 
-        cpu_ungqr(
-                  m_kk, n_kk, k_kk,
-                  panel, m_kk,
-                  &tau[kk], work, lwork);
+        LAPACKE_CHECK(cpu_lapack_ungqr(
+                          m_kk, n_kk, k_kk,
+                          panel, m_kk,
+                          &tau[kk], work, lwork));
 
         magma_setmatrix<Ty>(m_kk, n_kk,
                             panel, m_kk, dA(kk, kk), ldda, queue);
diff --git a/src/backend/opencl/magma/unmqr.cpp b/src/backend/opencl/magma/unmqr.cpp
index ed69e51..366810e 100644
--- a/src/backend/opencl/magma/unmqr.cpp
+++ b/src/backend/opencl/magma/unmqr.cpp
@@ -52,7 +52,6 @@
  **********************************************************************/
 
 #include "magma.h"
-#include "magma_blas.h"
 #include "magma_data.h"
 #include "magma_cpu_lapack.h"
 #include "magma_helper.h"
@@ -227,7 +226,7 @@ magma_unmqr_gpu(
 
     magma_malloc<Ty>(&dwork, (((n+31)/32)*32)*nb);
 
-    cpu_unmqr_work_func<Ty> cpu_unmqr;
+    cpu_lapack_unmqr_work_func<Ty> cpu_lapack_unmqr;
 
     if ( (left && (! notran)) || ( (!left) && notran ) ) {
         i1 = 0;
@@ -283,13 +282,13 @@ magma_unmqr_gpu(
         magma_getmatrix<Ty>(ma, ib, a_ref(i,  i ), ldda, hA, ma, queue);
         magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue);
 
-        *info = cpu_unmqr(
+        LAPACKE_CHECK(cpu_lapack_unmqr(
                           side == MagmaRight ? 'R' : 'L',
                           notran ? 'N' : (is_real ? 'T' : 'C'),
                           mi, ni, ib,
                           hA, ma, tau+i,
                           hC, mi,
-                          hW, lhwork);
+                          hW, lhwork));
 
         // send the updated part of C back to the GPU
         magma_setmatrix<Ty>( mi, ni, hC, mi, c_ref(ic, jc), lddc, queue);
@@ -351,13 +350,13 @@ magma_unmqr_gpu(
         magma_getmatrix<Ty>(ma, ib, a_ref(i,  i ), ldda, hA, ma, queue);
         magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue);
 
-        *info = cpu_unmqr(
+        LAPACKE_CHECK(cpu_lapack_unmqr(
                           side == MagmaRight ? 'R' : 'L',
                           notran ? 'N' : (is_real ? 'T' : 'C'),
                           mi, ni, ib,
                           hA, ma, tau+i,
                           hC, mi,
-                          hW, lhwork);
+                          hW, lhwork));
 
         // send the updated part of C back to the GPU
         magma_setmatrix<Ty>(mi, ni, hC, mi, c_ref(ic, jc), lddc, queue);
diff --git a/src/backend/opencl/magma/unmqr2.cpp b/src/backend/opencl/magma/unmqr2.cpp
index 4da4143..0cfc275 100644
--- a/src/backend/opencl/magma/unmqr2.cpp
+++ b/src/backend/opencl/magma/unmqr2.cpp
@@ -251,7 +251,7 @@ magma_unmqr2_gpu(
         ic = 1;
     }
 
-    cpu_larft_func<Ty> cpu_larft;
+    cpu_lapack_larft_func<Ty> cpu_lapack_larft;
 
     // set nb-1 super-diagonals to 0, and diagonal to 1.
     // This way we can copy V directly to the GPU,
@@ -265,10 +265,10 @@ magma_unmqr2_gpu(
         /* Form the triangular factor of the block reflector
            H = H(i) H(i+1) . . . H(i+ib-1) */
         i__4 = nq - i + 1;
-        cpu_larft(
-                  *MagmaForwardStr, *MagmaColumnwiseStr,
-                  i__4, ib,
-                  wA(i,i), ldwa, &tau[i], T, ib);
+        LAPACKE_CHECK(cpu_lapack_larft(
+                          *MagmaForwardStr, *MagmaColumnwiseStr,
+                          i__4, ib,
+                          wA(i,i), ldwa, &tau[i], T, ib));
 
         if (left) {
             /* H or H' is applied to C(i:m,1:n) */
diff --git a/src/backend/opencl/solve.cpp b/src/backend/opencl/solve.cpp
index fa101e8..6d2bea4 100644
--- a/src/backend/opencl/solve.cpp
+++ b/src/backend/opencl/solve.cpp
@@ -89,7 +89,7 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
     int MN = std::min(M, N);
 
     Array<T> B = createEmptyArray<T>(dim4());
-    gpu_trsm_func<T> gpu_trsm;
+    gpu_blas_trsm_func<T> gpu_blas_trsm;
 
     cl_event event;
     cl_command_queue queue = getQueue()();
@@ -137,14 +137,14 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
                               (*dA)(), A.getOffset(), A.strides()[1], 1,
                               (*dT)(), tmp.getOffset() + MN * NB, NB, 0, queue);
 
-        gpu_trsm(
-                 clblasLeft, clblasUpper,
-                 clblasConjTrans, clblasNonUnit,
-                 B.dims()[0], B.dims()[1],
-                 scalar<T>(1),
-                 (*dA)(), A.getOffset(), A.strides()[1],
-                 (*dB)(), B.getOffset(), B.strides()[1],
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_trsm(
+                         clblasLeft, clblasUpper,
+                         clblasConjTrans, clblasNonUnit,
+                         B.dims()[0], B.dims()[1],
+                         scalar<T>(1),
+                         (*dA)(), A.getOffset(), A.strides()[1],
+                         (*dB)(), B.getOffset(), B.strides()[1],
+                         1, &queue, 0, nullptr, &event));
 
         magmablas_swapdblk<T>(MN - 1, NB,
                               (*dT)(), tmp.getOffset() + MN * NB, NB, 0,
@@ -225,19 +225,19 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
         {
             Array<T> AT = transpose<T>(A, true);
             cl::Buffer* AT_buf = AT.get();
-            gpu_trsm(
-                     clblasLeft, clblasLower, clblasConjTrans, clblasNonUnit,
-                     N, NRHS, scalar<T>(1),
-                     (*AT_buf)(), AT.getOffset(), AT.strides()[1],
-                     (*B_buf)(), B.getOffset(), B.strides()[1],
-                     1, &queue, 0, nullptr, &event);
+            CLBLAS_CHECK(gpu_blas_trsm(
+                             clblasLeft, clblasLower, clblasConjTrans, clblasNonUnit,
+                             N, NRHS, scalar<T>(1),
+                             (*AT_buf)(), AT.getOffset(), AT.strides()[1],
+                             (*B_buf)(), B.getOffset(), B.strides()[1],
+                             1, &queue, 0, nullptr, &event));
         } else {
-            gpu_trsm(
-                     clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit,
-                     N, NRHS, scalar<T>(1),
-                     (*A_buf)(), A.getOffset(), A.strides()[1],
-                     (*B_buf)(), B.getOffset(), B.strides()[1],
-                     1, &queue, 0, nullptr, &event);
+            CLBLAS_CHECK(gpu_blas_trsm(
+                             clblasLeft, clblasUpper, clblasNoTrans, clblasNonUnit,
+                             N, NRHS, scalar<T>(1),
+                             (*A_buf)(), A.getOffset(), A.strides()[1],
+                             (*B_buf)(), B.getOffset(), B.strides()[1],
+                             1, &queue, 0, nullptr, &event));
         }
         B.resetDims(dim4(N, K));
     }
@@ -248,7 +248,7 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b)
 template<typename T>
 Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop options)
 {
-    gpu_trsm_func<T> gpu_trsm;
+    gpu_blas_trsm_func<T> gpu_blas_trsm;
 
     Array<T> B = copyArray<T>(b);
 
@@ -267,25 +267,25 @@ Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop o
         Array<T> AT = transpose<T>(A, true);
 
         cl::Buffer* AT_buf = AT.get();
-        gpu_trsm(
-                 clblasLeft,
-                 clblasLower,
-                 clblasConjTrans,
-                 options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
-                 N, NRHS, scalar<T>(1),
-                 (*AT_buf)(), AT.getOffset(), AT.strides()[1],
-                 (*B_buf)(), B.getOffset(), B.strides()[1],
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_trsm(
+                         clblasLeft,
+                         clblasLower,
+                         clblasConjTrans,
+                         options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
+                         N, NRHS, scalar<T>(1),
+                         (*AT_buf)(), AT.getOffset(), AT.strides()[1],
+                         (*B_buf)(), B.getOffset(), B.strides()[1],
+                         1, &queue, 0, nullptr, &event));
     } else {
-        gpu_trsm(
-                 clblasLeft,
-                 options & AF_MAT_LOWER ? clblasLower : clblasUpper,
-                 clblasNoTrans,
-                 options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
-                 N, NRHS, scalar<T>(1),
-                 (*A_buf)(), A.getOffset(), A.strides()[1],
-                 (*B_buf)(), B.getOffset(), B.strides()[1],
-                 1, &queue, 0, nullptr, &event);
+        CLBLAS_CHECK(gpu_blas_trsm(
+                         clblasLeft,
+                         options & AF_MAT_LOWER ? clblasLower : clblasUpper,
+                         clblasNoTrans,
+                         options & AF_MAT_DIAG_UNIT ? clblasUnit : clblasNonUnit,
+                         N, NRHS, scalar<T>(1),
+                         (*A_buf)(), A.getOffset(), A.strides()[1],
+                         (*B_buf)(), B.getOffset(), B.strides()[1],
+                         1, &queue, 0, nullptr, &event));
     }
 
     return B;
diff --git a/src/backend/opencl/svd.cpp b/src/backend/opencl/svd.cpp
index ca1378d..1467d9e 100644
--- a/src/backend/opencl/svd.cpp
+++ b/src/backend/opencl/svd.cpp
@@ -28,7 +28,7 @@ Tr calc_scale(Tr From, Tr To)
     //FIXME: I am not sure this is correct, removing this for now
 #if 0
     //http://www.netlib.org/lapack/explore-3.1.1-html/dlascl.f.html
-    cpu_lamch_func<Tr> cpu_lapack_lamch;
+    cpu_lapack_lamch_func<Tr> cpu_lapack_lamch;
 
     Tr S = cpu_lapack_lamch('S');
     Tr B = 1.0 / S;
@@ -79,10 +79,10 @@ void svd(Array<T > &arrU,
     const int nb   = magma_get_gebrd_nb<T>(n);
     const int lwork = (m + n) * nb;
 
-    cpu_lacpy_func<T> cpu_lapack_lacpy;
-    cpu_bdsqr_work_func<T> cpu_lapack_bdsqr_work;
-    cpu_ungbr_work_func<T> cpu_lapack_ungbr_work;
-    cpu_lamch_func<Tr> cpu_lapack_lamch;
+    cpu_lapack_lacpy_func<T> cpu_lapack_lacpy;
+    cpu_lapack_bdsqr_work_func<T> cpu_lapack_bdsqr_work;
+    cpu_lapack_ungbr_work_func<T> cpu_lapack_ungbr_work;
+    cpu_lapack_lamch_func<Tr> cpu_lapack_lamch;
 
     // Get machine constants
     static const double eps = cpu_lapack_lamch('P');
@@ -144,17 +144,17 @@ void svd(Array<T > &arrU,
         // and generate left bidiagonalizing vectors in U
         // (CWorkspace: need 2*N + NCU, prefer 2*N + NCU*NB)
         // (RWorkspace: 0)
-        cpu_lapack_lacpy('L', m, n, &A[0], lda, &U[0], ldu);
+        LAPACKE_CHECK(cpu_lapack_lacpy('L', m, n, &A[0], lda, &U[0], ldu));
 
         int ncu = m;
-        cpu_lapack_ungbr_work('Q', m, ncu, n, &U[0], ldu, &tauq[0], &work[0], lwork);
+        LAPACKE_CHECK(cpu_lapack_ungbr_work('Q', m, ncu, n, &U[0], ldu, &tauq[0], &work[0], lwork));
 
         // If right singular vectors desired in VT, copy result to
         // VT and generate right bidiagonalizing vectors in VT
         // (CWorkspace: need 3*N-1, prefer 2*N + (N-1)*NB)
         // (RWorkspace: 0)
-        cpu_lapack_lacpy('U', n, n, &A[0], lda, &VT[0], ldvt);
-        cpu_lapack_ungbr_work('P', n, n, n, &VT[0], ldvt, &taup[0], &work[0], lwork);
+        LAPACKE_CHECK(cpu_lapack_lacpy('U', n, n, &A[0], lda, &VT[0], ldvt));
+        LAPACKE_CHECK(cpu_lapack_ungbr_work('P', n, n, n, &VT[0], ldvt, &taup[0], &work[0], lwork));
 
         nru = m;
         ncvt = n;
@@ -165,8 +165,9 @@ void svd(Array<T > &arrU,
     // vectors in VT
     // (CWorkspace: need 0)
     // (RWorkspace: need BDSPAC)
-    cpu_lapack_bdsqr_work('U', n, ncvt, nru, izero, &s0[0], &s1[0], &VT[0], ldvt, &U[0], ldu,
-                          &cdummy[0], ione, &work[0]);
+    LAPACKE_CHECK(cpu_lapack_bdsqr_work('U', n, ncvt, nru, izero,
+                                        &s0[0], &s1[0], &VT[0], ldvt, &U[0], ldu,
+                                        &cdummy[0], ione, &work[0]));
 
 
     if (want_vectors) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list