[arrayfire] 305/408: Initial support for SVD in OpenCL backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:19 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 73b808193eef3d1d7cb837f6611fac777c935ea3
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Thu Aug 20 20:12:38 2015 -0400
Initial support for SVD in OpenCL backend
- Compiles and runs but errors out at certain sizes
---
src/api/c/svd.cpp | 8 +-
src/backend/opencl/platform.hpp | 1 +
src/backend/opencl/svd.cpp | 204 +++++++++++++++++++++++++++++++++++++++-
3 files changed, 203 insertions(+), 10 deletions(-)
diff --git a/src/api/c/svd.cpp b/src/api/c/svd.cpp
index fc465dd..84f859c 100644
--- a/src/api/c/svd.cpp
+++ b/src/api/c/svd.cpp
@@ -65,11 +65,9 @@ af_err af_svd(af_array *s, af_array *u, af_array *vt, const af_array in)
{
try {
ArrayInfo info = getInfo(in);
-
af::dim4 dims = info.dims();
- ARG_ASSERT(2, (dims.ndims() >= 0 && dims.ndims() <= 3));
-
+ ARG_ASSERT(3, (dims.ndims() >= 0 && dims.ndims() <= 3));
af_dtype type = info.getType();
switch (type) {
@@ -91,10 +89,10 @@ af_err af_svd_inplace(af_array *s, af_array *u, af_array *vt, af_array in)
{
try {
ArrayInfo info = getInfo(in);
-
af::dim4 dims = info.dims();
- ARG_ASSERT(2, (dims.ndims() >= 0 && dims.ndims() <= 3));
+ DIM_ASSERT(3, dims[0] <= dims[1]);
+ ARG_ASSERT(3, (dims.ndims() >= 0 && dims.ndims() <= 3));
af_dtype type = info.getType();
diff --git a/src/backend/opencl/platform.hpp b/src/backend/opencl/platform.hpp
index 7676871..8d3a1d0 100644
--- a/src/backend/opencl/platform.hpp
+++ b/src/backend/opencl/platform.hpp
@@ -11,6 +11,7 @@
#if defined(WITH_GRAPHICS)
#include <fg/window.h>
#endif
+
#include <cl.hpp>
#include <vector>
#include <string>
diff --git a/src/backend/opencl/svd.cpp b/src/backend/opencl/svd.cpp
index 406242e..ca1378d 100644
--- a/src/backend/opencl/svd.cpp
+++ b/src/backend/opencl/svd.cpp
@@ -8,15 +8,209 @@
********************************************************/
#include <Array.hpp>
+#include <svd.hpp> // opencl backend function header
+#include <err_opencl.hpp> // error check functions and Macros
+#include <reduce.hpp>
+#include <copy.hpp>
+#include <blas.hpp>
-#include <svd.hpp> // opencl backend function header
-
-#include <err_opencl.hpp> // error check functions and Macros
- // specific to opencl backend
+#include <magma/magma.h>
+#include <magma/magma_cpu_lapack.h>
+#include <magma/magma_helper.h>
+#if defined(WITH_OPENCL_LINEAR_ALGEBRA)
namespace opencl
{
+template<typename Tr>
+Tr calc_scale(Tr From, Tr To)
+{
+ //FIXME: I am not sure this is correct, removing this for now
+#if 0
+ //http://www.netlib.org/lapack/explore-3.1.1-html/dlascl.f.html
+ cpu_lamch_func<Tr> cpu_lapack_lamch;
+
+ Tr S = cpu_lapack_lamch('S');
+ Tr B = 1.0 / S;
+
+ Tr FromCopy = From, ToCopy = To;
+
+ Tr Mul = 1;
+
+ while (true) {
+ Tr From1 = FromCopy * S, To1 = ToCopy / B;
+ if (std::abs(From1) > std::abs(ToCopy) && ToCopy != 0) {
+ Mul *= S;
+ FromCopy = From1;
+ } else if (std::abs(To1) > std::abs(FromCopy)) {
+ Mul *= B;
+ ToCopy = To1;
+ } else {
+ Mul *= (ToCopy) / (FromCopy);
+ break;
+ }
+ }
+
+ return Mul;
+#else
+ return To / From;
+#endif
+}
+
+template<typename T, typename Tr>
+void svd(Array<T > &arrU,
+ Array<Tr> &arrS,
+ Array<T > &arrVT,
+ Array<T > &arrA,
+ bool want_vectors=true)
+{
+
+ dim4 idims = arrA.dims();
+ dim4 istrides = arrA.dims();
+
+ const int m = (int)idims[0];
+ const int n = (int)idims[1];
+ const int ldda = (int)istrides[1];
+ const int lda = m;
+ const int min_mn = std::min(m, n);
+ const int ldu = m;
+ const int ldvt = n;
+
+ const int nb = magma_get_gebrd_nb<T>(n);
+ const int lwork = (m + n) * nb;
+
+ cpu_lacpy_func<T> cpu_lapack_lacpy;
+ cpu_bdsqr_work_func<T> cpu_lapack_bdsqr_work;
+ cpu_ungbr_work_func<T> cpu_lapack_ungbr_work;
+ cpu_lamch_func<Tr> cpu_lapack_lamch;
+
+ // Get machine constants
+ static const double eps = cpu_lapack_lamch('P');
+ static const double smlnum = std::sqrt(cpu_lapack_lamch('S')) / eps;
+ static const double bignum = 1. / smlnum;
+
+ Tr anrm = abs(reduce_all<af_max_t, T, T>(arrA));
+
+ T scale = scalar<T>(1);
+ static const int ione = 1;
+ static const int izero = 0;
+
+
+ bool iscl = 0;
+ if (anrm > 0. && anrm < smlnum) {
+ iscl = 1;
+ scale = scalar<T>(calc_scale<Tr>(anrm, smlnum));
+ } else if (anrm > bignum) {
+ iscl = 1;
+ scale = scalar<T>(calc_scale<Tr>(anrm, bignum));
+ }
+
+ if (iscl == 1) {
+ multiply_inplace(arrA, abs(scale));
+ }
+
+ int nru = 0;
+ int ncvt = 0;
+
+ std::vector<T> A(m * n);
+ std::vector<Tr> s0(min_mn), s1(min_mn - 1);
+ std::vector<T> tauq(min_mn), taup(min_mn), work(lwork);
+
+ int info = 0;
+
+ copyData(&A[0], arrA);
+
+
+ // Bidiagonalize A
+ // (CWorkspace: need 2*N + M, prefer 2*N + (M + N)*NB)
+ // (RWorkspace: need N)
+ magma_gebrd_hybrid<T>(m, n,
+ &A[0], lda,
+ (*arrA.get())(), arrA.getOffset(), ldda,
+ (void *)&s0[0], (void *)&s1[0],
+ &tauq[0], &taup[0],
+ &work[0], lwork,
+ getQueue()(), &info, false);
+
+ std::vector<T> U(1), VT(1);
+ std::vector<T> cdummy(1);
+
+ if (want_vectors) {
+
+ U = std::vector<T>(m * m);
+ VT = std::vector<T>(n * n);
+
+ // If left singular vectors desired in U, copy result to U
+ // and generate left bidiagonalizing vectors in U
+ // (CWorkspace: need 2*N + NCU, prefer 2*N + NCU*NB)
+ // (RWorkspace: 0)
+ cpu_lapack_lacpy('L', m, n, &A[0], lda, &U[0], ldu);
+
+ int ncu = m;
+ cpu_lapack_ungbr_work('Q', m, ncu, n, &U[0], ldu, &tauq[0], &work[0], lwork);
+
+ // If right singular vectors desired in VT, copy result to
+ // VT and generate right bidiagonalizing vectors in VT
+ // (CWorkspace: need 3*N-1, prefer 2*N + (N-1)*NB)
+ // (RWorkspace: 0)
+ cpu_lapack_lacpy('U', n, n, &A[0], lda, &VT[0], ldvt);
+ cpu_lapack_ungbr_work('P', n, n, n, &VT[0], ldvt, &taup[0], &work[0], lwork);
+
+ nru = m;
+ ncvt = n;
+ }
+
+ // Perform bidiagonal QR iteration, if desired, computing
+ // left singular vectors in U and computing right singular
+ // vectors in VT
+ // (CWorkspace: need 0)
+ // (RWorkspace: need BDSPAC)
+ cpu_lapack_bdsqr_work('U', n, ncvt, nru, izero, &s0[0], &s1[0], &VT[0], ldvt, &U[0], ldu,
+ &cdummy[0], ione, &work[0]);
+
+
+ if (want_vectors) {
+ writeHostDataArray(arrU, &U[0], arrU.elements() * sizeof(T));
+ writeHostDataArray(arrVT, &VT[0], arrVT.elements() * sizeof(T));
+ }
+
+ writeHostDataArray(arrS, &s0[0], arrS.elements() * sizeof(Tr));
+
+ if (iscl == 1) {
+ Tr rscale = scalar<Tr>(1);
+ if (anrm > bignum) {
+ rscale = calc_scale<Tr>(bignum, anrm);
+ } else if (anrm < smlnum) {
+ rscale = calc_scale<Tr>(smlnum, anrm);
+ }
+ multiply_inplace(arrS, rscale);
+ }
+}
+
+
+template<typename T>
+void svdInPlace(Array<T> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
+{
+ initBlas();
+ dim4 iDims = in.dims();
+ int M = iDims[0];
+ int N = iDims[1];
+
+ if (M < N) OPENCL_NOT_SUPPORTED();
+
+ typedef typename af::dtype_traits<T>::base_type Tr;
+ svd<T, Tr>(u, s, vt, in, true);
+}
+
+template<typename T>
+void svd(Array<T> &s, Array<T> &u, Array<T> &vt, const Array<T> &in)
+{
+ Array<T> in_copy = copyArray(in);
+ return svdInPlace(s, u, vt, in_copy);
+}
+
+#else
+
template<typename T>
void svd(Array<T> &s, Array<T> &u, Array<T> &vt, const Array<T> &in)
{
@@ -28,7 +222,7 @@ void svdInPlace(Array<T> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
{
OPENCL_NOT_SUPPORTED();
}
-
+#endif
#define INSTANTIATE(T) \
template void svd(Array<T> &s, Array<T> &u, Array<T> &vt, const Array<T> &in); \
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list