[arrayfire] 34/75: Force offload OSX LAPACK on unified memory devices
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:01:13 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit 3c06fa081f781d5d590fc6f9f00a76ea09b1d80a
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Tue Feb 16 14:59:18 2016 -0500
Force offload OSX LAPACK on unified memory devices
---
src/backend/opencl/blas.cpp | 2 +-
src/backend/opencl/platform.cpp | 17 ++++++++++++++---
src/backend/opencl/platform.hpp | 2 +-
3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/src/backend/opencl/blas.cpp b/src/backend/opencl/blas.cpp
index 365e6e5..7753115 100644
--- a/src/backend/opencl/blas.cpp
+++ b/src/backend/opencl/blas.cpp
@@ -121,7 +121,7 @@ Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
af_mat_prop optLhs, af_mat_prop optRhs)
{
#if defined(WITH_OPENCL_LINEAR_ALGEBRA)
- if(OpenCLCPUOffload()) {
+ if(OpenCLCPUOffload(false)) { // Do not force offload gemm on OSX Intel devices
return cpu::matmul(lhs, rhs, optLhs, optRhs);
}
#endif
diff --git a/src/backend/opencl/platform.cpp b/src/backend/opencl/platform.cpp
index 6855e79..c2c13c7 100644
--- a/src/backend/opencl/platform.cpp
+++ b/src/backend/opencl/platform.cpp
@@ -514,11 +514,22 @@ bool isHostUnifiedMemory(const cl::Device &device)
return device.getInfo<CL_DEVICE_HOST_UNIFIED_MEMORY>();
}
-bool OpenCLCPUOffload()
+bool OpenCLCPUOffload(bool forceOffloadOSX)
{
- static const bool sync = getEnvVar("AF_OPENCL_CPU_OFFLOAD") == "1";
+ static const bool offloadEnv = getEnvVar("AF_OPENCL_CPU_OFFLOAD") == "1";
bool offload = false;
- if(sync) offload = isHostUnifiedMemory(getDevice());
+ if(offloadEnv) offload = isHostUnifiedMemory(getDevice());
+#if OS_MAC
+ // FORCED OFFLOAD FOR LAPACK FUNCTIONS ON OSX UNIFIED MEMORY DEVICES
+ //
+ // On OSX Unified Memory devices (Intel), always offload LAPACK but not GEMM
+ // irrespective of the AF_OPENCL_CPU_OFFLOAD value
+ // From GEMM, OpenCLCPUOffload(false) is called which will render the
+ // variable inconsequential to the returned result.
+ //
+ // Issue https://github.com/arrayfire/arrayfire/issues/662
+ offload = offload || forceOffloadOSX;
+#endif
return offload;
}
diff --git a/src/backend/opencl/platform.hpp b/src/backend/opencl/platform.hpp
index 4c745e0..095fdf9 100644
--- a/src/backend/opencl/platform.hpp
+++ b/src/backend/opencl/platform.hpp
@@ -114,7 +114,7 @@ cl_device_type getDeviceType();
bool isHostUnifiedMemory(const cl::Device &device);
-bool OpenCLCPUOffload();
+bool OpenCLCPUOffload(bool forceOffloadOSX = true);
bool isGLSharingSupported();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list