[arrayfire] 164/284: Added matmul offloading to CPU
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:29 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 3c1ab9f0902a37bd7b3c31bc533790d950407b52
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Thu Jan 7 16:10:43 2016 -0500
Added matmul offloading to CPU
---
src/backend/cpu/blas.hpp | 6 +
src/backend/opencl/CMakeLists.txt | 12 +
src/backend/opencl/blas.cpp | 6 +
src/backend/opencl/cpu/cpu_blas.cpp | 268 +++++++++++++++++++++
src/backend/opencl/cpu/cpu_blas.hpp | 23 ++
.../{cpu/blas.hpp => opencl/cpu/cpu_helper.hpp} | 14 +-
test/blas.cpp | 1 +
7 files changed, 322 insertions(+), 8 deletions(-)
diff --git a/src/backend/cpu/blas.hpp b/src/backend/cpu/blas.hpp
index 117d3a2..934a2c6 100644
--- a/src/backend/cpu/blas.hpp
+++ b/src/backend/cpu/blas.hpp
@@ -45,4 +45,10 @@ template<typename T>
Array<T> dot(const Array<T> &lhs, const Array<T> &rhs,
af_mat_prop optLhs, af_mat_prop optRhs);
+typedef std::complex<float> cfloat;
+typedef std::complex<double> cdouble;
+
+template<typename T> struct is_complex { static const bool value = false; };
+template<> struct is_complex<cfloat> { static const bool value = true; };
+template<> struct is_complex<cdouble> { static const bool value = true; };
}
diff --git a/src/backend/opencl/CMakeLists.txt b/src/backend/opencl/CMakeLists.txt
index 86ba1b2..c9c47d0 100644
--- a/src/backend/opencl/CMakeLists.txt
+++ b/src/backend/opencl/CMakeLists.txt
@@ -123,6 +123,12 @@ FILE(GLOB conv_ker_headers
FILE(GLOB conv_ker_sources
"kernel/convolve/*.cpp")
+FILE(GLOB cpu_headers
+ "cpu/*.hpp")
+
+FILE(GLOB cpu_sources
+ "cpu/*.cpp")
+
source_group(backend\\opencl\\Headers FILES ${opencl_headers})
source_group(backend\\opencl\\Sources FILES ${opencl_sources})
source_group(backend\\opencl\\JIT FILES ${jit_sources})
@@ -131,6 +137,8 @@ source_group(backend\\opencl\\kernel\\cl FILES ${opencl_kernels})
source_group(backend\\opencl\\kernel\\Sources FILES ${kernel_sources})
source_group(backend\\opencl\\kernel\\convolve\\Headers FILES ${conv_ker_headers})
source_group(backend\\opencl\\kernel\\convolve\\Sources FILES ${conv_ker_sources})
+source_group(backend\\opencl\\cpu\\Headers FILES ${cpu_headers})
+source_group(backend\\opencl\\cpu\\Sources FILES ${cpu_sources})
IF(LAPACK_FOUND)
FILE(GLOB magma_sources
@@ -206,6 +214,8 @@ IF(DEFINED BLAS_SYM_FILE)
${kernel_sources}
${conv_ker_headers}
${conv_ker_sources}
+ ${cpu_headers}
+ ${cpu_sources}
${backend_headers}
${backend_sources}
${magma_sources}
@@ -244,6 +254,8 @@ ELSE(DEFINED BLAS_SYM_FILE)
${kernel_sources}
${conv_ker_headers}
${conv_ker_sources}
+ ${cpu_sources}
+ ${cpu_sources}
${backend_headers}
${backend_sources}
${c_headers}
diff --git a/src/backend/opencl/blas.cpp b/src/backend/opencl/blas.cpp
index 6173a68..f9f8af1 100644
--- a/src/backend/opencl/blas.cpp
+++ b/src/backend/opencl/blas.cpp
@@ -20,6 +20,8 @@
#include <math.hpp>
#include <transpose.hpp>
+#include <cpu/cpu_blas.hpp>
+
namespace opencl
{
@@ -113,6 +115,10 @@ template<typename T>
Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
af_mat_prop optLhs, af_mat_prop optRhs)
{
+ if(OpenCLCPUOffload()) {
+ return cpu::matmul(lhs, rhs, optLhs, optRhs);
+ }
+
initBlas();
clblasTranspose lOpts = toClblasTranspose(optLhs);
clblasTranspose rOpts = toClblasTranspose(optRhs);
diff --git a/src/backend/opencl/cpu/cpu_blas.cpp b/src/backend/opencl/cpu/cpu_blas.cpp
new file mode 100644
index 0000000..524777a
--- /dev/null
+++ b/src/backend/opencl/cpu/cpu_blas.cpp
@@ -0,0 +1,268 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <cpu/cpu_blas.hpp>
+#include <af/dim4.hpp>
+#include <handle.hpp>
+#include <cassert>
+#include <err_common.hpp>
+#include <platform.hpp>
+#include <af/macros.h>
+
+namespace opencl
+{
+namespace cpu
+{
+
+using std::add_const;
+using std::add_pointer;
+using std::enable_if;
+using std::is_floating_point;
+using std::remove_const;
+using std::conditional;
+
+// Some implementations of BLAS require void* for complex pointers while others use float*/double*
+//
+// Sample cgemm API
+// OpenBLAS
+// void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
+// OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
+// OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda,
+// OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta,
+// float *C, OPENBLAS_CONST blasint ldc);
+//
+// MKL
+// void cblas_cgemm(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB,
+// const MKL_INT M, const MKL_INT N, const MKL_INT K,
+// const void *alpha, const void *A, const MKL_INT lda,
+// const void *B, const MKL_INT ldb, const void *beta,
+// void *C, const MKL_INT ldc);
+// atlas cblas
+// void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+// const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
+// const void *alpha, const void *A, const int lda,
+// const void *B, const int ldb, const void *beta,
+// void *C, const int ldc);
+//
+// LAPACKE
+// void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+// const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
+// const void *alpha, const void *A, const int lda,
+// const void *B, const int ldb, const void *beta,
+// void *C, const int ldc);
+#if defined(IS_OPENBLAS)
+ static const bool cplx_void_ptr = false;
+#else
+ static const bool cplx_void_ptr = true;
+#endif
+
+template<typename T, class Enable = void>
+struct blas_base {
+ using type = typename dtype_traits<T>::base_type;
+};
+
+template<typename T>
+struct blas_base <T, typename enable_if<is_complex<T>::value && cplx_void_ptr>::type> {
+ using type = void;
+};
+
+
+template<typename T>
+using cptr_type = typename conditional< is_complex<T>::value,
+ const typename blas_base<T>::type *,
+ const T*>::type;
+template<typename T>
+using ptr_type = typename conditional< is_complex<T>::value,
+ typename blas_base<T>::type *,
+ T*>::type;
+template<typename T>
+using scale_type = typename conditional< is_complex<T>::value,
+ const typename blas_base<T>::type *,
+ const T>::type;
+
+template<typename T>
+using gemm_func_def = void (*)( const CBLAS_ORDER, const CBLAS_TRANSPOSE, const CBLAS_TRANSPOSE,
+ const blasint, const blasint, const blasint,
+ scale_type<T>, cptr_type<T>, const blasint,
+ cptr_type<T>, const blasint,
+ scale_type<T>, ptr_type<T>, const blasint);
+
+template<typename T>
+using gemv_func_def = void (*)( const CBLAS_ORDER, const CBLAS_TRANSPOSE,
+ const blasint, const blasint,
+ scale_type<T>, cptr_type<T>, const blasint,
+ cptr_type<T>, const blasint,
+ scale_type<T>, ptr_type<T>, const blasint);
+
+#define BLAS_FUNC_DEF( FUNC ) \
+template<typename T> FUNC##_func_def<T> FUNC##_func();
+
+#define BLAS_FUNC( FUNC, TYPE, PREFIX ) \
+ template<> FUNC##_func_def<TYPE> FUNC##_func<TYPE>() \
+{ return &cblas_##PREFIX##FUNC; }
+
+BLAS_FUNC_DEF( gemm )
+BLAS_FUNC(gemm , float , s)
+BLAS_FUNC(gemm , double , d)
+BLAS_FUNC(gemm , cfloat , c)
+BLAS_FUNC(gemm , cdouble , z)
+
+BLAS_FUNC_DEF(gemv)
+BLAS_FUNC(gemv , float , s)
+BLAS_FUNC(gemv , double , d)
+BLAS_FUNC(gemv , cfloat , c)
+BLAS_FUNC(gemv , cdouble , z)
+
+template<typename T, int value>
+typename enable_if<is_floating_point<T>::value, scale_type<T>>::type
+getScale() { return T(value); }
+
+template<typename T, int value>
+typename enable_if<is_complex<T>::value, scale_type<T>>::type
+getScale()
+{
+ static T val = scalar<T>(value);
+ return (const typename blas_base<T>::type *)&val;
+}
+
+CBLAS_TRANSPOSE
+toCblasTranspose(af_mat_prop opt)
+{
+ CBLAS_TRANSPOSE out = CblasNoTrans;
+ switch(opt) {
+ case AF_MAT_NONE : out = CblasNoTrans; break;
+ case AF_MAT_TRANS : out = CblasTrans; break;
+ case AF_MAT_CTRANS : out = CblasConjTrans; break;
+ default : AF_ERROR("INVALID af_mat_prop", AF_ERR_ARG);
+ }
+ return out;
+}
+
+template<typename T>
+Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
+ af_mat_prop optLhs, af_mat_prop optRhs)
+{
+ CBLAS_TRANSPOSE lOpts = toCblasTranspose(optLhs);
+ CBLAS_TRANSPOSE rOpts = toCblasTranspose(optRhs);
+
+ int aRowDim = (lOpts == CblasNoTrans) ? 0 : 1;
+ int aColDim = (lOpts == CblasNoTrans) ? 1 : 0;
+ int bColDim = (rOpts == CblasNoTrans) ? 1 : 0;
+
+ dim4 lDims = lhs.dims();
+ dim4 rDims = rhs.dims();
+ int M = lDims[aRowDim];
+ int N = rDims[bColDim];
+ int K = lDims[aColDim];
+
+ //FIXME: Leaks on errors.
+ Array<T> out = createValueArray<T>(af::dim4(M, N, 1, 1), scalar<T>(0));
+ auto alpha = getScale<T, 1>();
+ auto beta = getScale<T, 0>();
+
+ dim4 lStrides = lhs.strides();
+ dim4 rStrides = rhs.strides();
+ using BT = typename blas_base<T>::type;
+ using CBT = const typename blas_base<T>::type;
+
+ // get host pointers from mapped memory
+ BT *lPtr = getMappedPtr<BT>(lhs.get());
+ BT *rPtr = getMappedPtr<BT>(rhs.get());
+ BT *oPtr = getMappedPtr<BT>(out.get());
+
+ if(rDims[bColDim] == 1) {
+ N = lDims[aColDim];
+ gemv_func<T>()(
+ CblasColMajor, lOpts,
+ lDims[0], lDims[1],
+ alpha,
+ lPtr, lStrides[1],
+ rPtr, rStrides[0],
+ beta,
+ oPtr, 1);
+ } else {
+ gemm_func<T>()(
+ CblasColMajor, lOpts, rOpts,
+ M, N, K,
+ alpha,
+ lPtr, lStrides[1],
+ rPtr, rStrides[1],
+ beta,
+ oPtr, out.dims()[0]);
+ }
+
+ unmapPtr(lhs.get(), lPtr);
+ unmapPtr(rhs.get(), rPtr);
+ unmapPtr(out.get(), oPtr);
+
+ return out;
+}
+
+//template<typename T> T
+//conj(T x) { return x; }
+//
+//template<> cfloat conj<cfloat> (cfloat c) { return std::conj(c); }
+//template<> cdouble conj<cdouble>(cdouble c) { return std::conj(c); }
+//
+//template<typename T, bool conjugate, bool both_conjugate>
+//Array<T> dot_(const Array<T> &lhs, const Array<T> &rhs,
+// af_mat_prop optLhs, af_mat_prop optRhs)
+//{
+// int N = lhs.dims()[0];
+//
+// T out = 0;
+// const T *pL = lhs.get();
+// const T *pR = rhs.get();
+//
+// for(int i = 0; i < N; i++)
+// out += (conjugate ? cpu::conj(pL[i]) : pL[i]) * pR[i];
+//
+// if(both_conjugate) out = cpu::conj(out);
+//
+// return createValueArray(af::dim4(1), out);
+//}
+//
+//template<typename T>
+//Array<T> dot(const Array<T> &lhs, const Array<T> &rhs,
+// af_mat_prop optLhs, af_mat_prop optRhs)
+//{
+// if(optLhs == AF_MAT_CONJ && optRhs == AF_MAT_CONJ) {
+// return dot_<T, false, true>(lhs, rhs, optLhs, optRhs);
+// } else if (optLhs == AF_MAT_CONJ && optRhs == AF_MAT_NONE) {
+// return dot_<T, true, false>(lhs, rhs, optLhs, optRhs);
+// } else if (optLhs == AF_MAT_NONE && optRhs == AF_MAT_CONJ) {
+// return dot_<T, true, false>(rhs, lhs, optRhs, optLhs);
+// } else {
+// return dot_<T, false, false>(lhs, rhs, optLhs, optRhs);
+// }
+//}
+
+#undef BT
+#undef REINTEPRET_CAST
+
+#define INSTANTIATE_BLAS(TYPE) \
+ template Array<TYPE> matmul<TYPE>(const Array<TYPE> &lhs, const Array<TYPE> &rhs, \
+ af_mat_prop optLhs, af_mat_prop optRhs);
+
+INSTANTIATE_BLAS(float)
+INSTANTIATE_BLAS(cfloat)
+INSTANTIATE_BLAS(double)
+INSTANTIATE_BLAS(cdouble)
+
+//#define INSTANTIATE_DOT(TYPE) \
+// template Array<TYPE> dot<TYPE>(const Array<TYPE> &lhs, const Array<TYPE> &rhs, \
+// af_mat_prop optLhs, af_mat_prop optRhs);
+//
+//INSTANTIATE_DOT(float)
+//INSTANTIATE_DOT(double)
+//INSTANTIATE_DOT(cfloat)
+//INSTANTIATE_DOT(cdouble)
+
+}
+}
diff --git a/src/backend/opencl/cpu/cpu_blas.hpp b/src/backend/opencl/cpu/cpu_blas.hpp
new file mode 100644
index 0000000..303b60c
--- /dev/null
+++ b/src/backend/opencl/cpu/cpu_blas.hpp
@@ -0,0 +1,23 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <cpu/cpu_helper.hpp>
+
+namespace opencl
+{
+namespace cpu
+{
+ template<typename T>
+ Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
+ af_mat_prop optLhs, af_mat_prop optRhs);
+// template<typename T>
+// Array<T> dot(const Array<T> &lhs, const Array<T> &rhs,
+// af_mat_prop optLhs, af_mat_prop optRhs);
+}
+}
diff --git a/src/backend/cpu/blas.hpp b/src/backend/opencl/cpu/cpu_helper.hpp
similarity index 77%
copy from src/backend/cpu/blas.hpp
copy to src/backend/opencl/cpu/cpu_helper.hpp
index 117d3a2..afc60d3 100644
--- a/src/backend/cpu/blas.hpp
+++ b/src/backend/opencl/cpu/cpu_helper.hpp
@@ -9,7 +9,9 @@
#include <af/defines.h>
#include <af/blas.h>
+#include <af/lapack.h>
#include <Array.hpp>
+#include <memory.hpp>
#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
@@ -35,14 +37,10 @@ extern "C" {
typedef int blasint;
#endif
+namespace opencl
+{
namespace cpu
{
-
-template<typename T>
-Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
- af_mat_prop optLhs, af_mat_prop optRhs);
-template<typename T>
-Array<T> dot(const Array<T> &lhs, const Array<T> &rhs,
- af_mat_prop optLhs, af_mat_prop optRhs);
-
}
+}
+
diff --git a/test/blas.cpp b/test/blas.cpp
index 507cc6d..b5d92f1 100644
--- a/test/blas.cpp
+++ b/test/blas.cpp
@@ -36,6 +36,7 @@ template<typename T, bool isBVector>
void MatMulCheck(string TestFile)
{
if (noDoubleTests<T>()) return;
+ af::info();
using std::vector;
vector<af::dim4> numDims;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list