[arrayfire] 30/61: BUGFIX: Getting the device pointer performs memory copy when needed
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Dec 8 11:55:06 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit 37b3c8c84f1da81fa7c37065d57c0152dc10948c
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Mon Nov 30 19:38:52 2015 -0500
BUGFIX: Getting the device pointer performs memory copy when needed
- This includes when data is being accessed by other arrays
- Added necessary tests
---
src/backend/cpu/Array.cpp | 6 +++---
src/backend/cpu/Array.hpp | 20 +++++++++++++++++---
src/backend/cuda/Array.cpp | 10 +++++++---
src/backend/cuda/Array.hpp | 20 +++++++++++++++++---
src/backend/opencl/Array.cpp | 11 +++++++++--
src/backend/opencl/Array.hpp | 21 ++++++++++++++++++---
test/array.cpp | 45 ++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 116 insertions(+), 17 deletions(-)
diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 5321137..3829a9a 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -34,14 +34,14 @@ namespace cpu
{ }
template<typename T>
- Array<T>::Array(dim4 dims, const T * const in_data, bool is_device):
+ Array<T>::Array(dim4 dims, const T * const in_data, bool is_device, bool copy_device):
info(getActiveDeviceId(), dims, dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
- data(is_device ? (T*)in_data : memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
+ data((is_device & !copy_device) ? (T*)in_data : memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
node(), offset(0), ready(true), owner(true)
{
static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type");
static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>");
- if (!is_device) {
+ if (!is_device || copy_device) {
std::copy(in_data, in_data + dims.elements(), data.get());
}
}
diff --git a/src/backend/cpu/Array.hpp b/src/backend/cpu/Array.hpp
index 7f23bc8..e9e40db 100644
--- a/src/backend/cpu/Array.hpp
+++ b/src/backend/cpu/Array.hpp
@@ -73,8 +73,9 @@ namespace cpu
template<typename T>
void *getDevicePtr(const Array<T>& arr)
{
- memPop((T *)arr.get());
- return (void *)arr.get();
+ T *ptr = arr.device();
+ memPop(ptr);
+ return (void *)ptr;
}
// Array Array Implementation
@@ -95,7 +96,7 @@ namespace cpu
Array() = default;
Array(dim4 dims);
- explicit Array(dim4 dims, const T * const in_data, bool is_device);
+ explicit Array(dim4 dims, const T * const in_data, bool is_device, bool copy_device=false);
Array(const Array<T>& parnt, const dim4 &dims, const dim4 &offset, const dim4 &stride);
explicit Array(af::dim4 dims, TNJ::Node_ptr n);
@@ -159,6 +160,19 @@ namespace cpu
return isOwner() ? info.dims() : data_dims;
}
+ T* device()
+ {
+ if (!isOwner() || data.use_count() > 1) {
+ *this = Array(dims(), get(), true, true);
+ }
+ return this->data.get();
+ }
+
+ T* device() const
+ {
+ return const_cast<Array<T>*>(this)->device();
+ }
+
T* get(bool withOffset = true)
{
return const_cast<T*>(static_cast<const Array<T>*>(this)->get(withOffset));
diff --git a/src/backend/cuda/Array.cpp b/src/backend/cuda/Array.cpp
index d7dbec5..8b05fc0 100644
--- a/src/backend/cuda/Array.cpp
+++ b/src/backend/cuda/Array.cpp
@@ -35,9 +35,9 @@ namespace cuda
{}
template<typename T>
- Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device) :
+ Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device, bool copy_device) :
info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
- data((is_device ? (T *)in_data : memAlloc<T>(dims.elements())), memFree<T>),
+ data(((is_device & !copy_device) ? (T *)in_data : memAlloc<T>(dims.elements())), memFree<T>),
data_dims(dims),
node(), offset(0), ready(true), owner(true)
{
@@ -47,7 +47,11 @@ namespace cuda
#endif
if (!is_device) {
CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, dims.elements() * sizeof(T),
- cudaMemcpyHostToDevice, cuda::getStream(cuda::getActiveDeviceId())));
+ cudaMemcpyHostToDevice, cuda::getStream(cuda::getActiveDeviceId())));
+ CUDA_CHECK(cudaStreamSynchronize(cuda::getStream(cuda::getActiveDeviceId())));
+ } else if (copy_device) {
+ CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, dims.elements() * sizeof(T),
+ cudaMemcpyDeviceToDevice, cuda::getStream(cuda::getActiveDeviceId())));
CUDA_CHECK(cudaStreamSynchronize(cuda::getStream(cuda::getActiveDeviceId())));
}
}
diff --git a/src/backend/cuda/Array.hpp b/src/backend/cuda/Array.hpp
index 3616ebb..3117675 100644
--- a/src/backend/cuda/Array.hpp
+++ b/src/backend/cuda/Array.hpp
@@ -88,8 +88,9 @@ namespace cuda
template<typename T>
void *getDevicePtr(const Array<T>& arr)
{
- memPop((T *)arr.get());
- return (void *)arr.get();
+ T *ptr = arr.device();
+ memPop(ptr);
+ return (void *)ptr;
}
template<typename T>
@@ -105,7 +106,7 @@ namespace cuda
bool owner;
Array(af::dim4 dims);
- explicit Array(af::dim4 dims, const T * const in_data, bool is_device = false);
+ explicit Array(af::dim4 dims, const T * const in_data, bool is_device = false, bool copy_device = false);
Array(const Array<T>& parnt, const dim4 &dims, const dim4 &offset, const dim4 &stride);
Array(Param<T> &tmp);
Array(af::dim4 dims, JIT::Node_ptr n);
@@ -168,6 +169,19 @@ namespace cuda
return isOwner() ? dims() : data_dims;
}
+ T* device()
+ {
+ if (!isOwner() || data.use_count() > 1) {
+ *this = Array(dims(), get(), true, true);
+ }
+ return this->data.get();
+ }
+
+ T* device() const
+ {
+ return const_cast<Array<T>*>(this)->device();
+ }
+
T* get(bool withOffset = true)
{
if (!isReady()) eval();
diff --git a/src/backend/opencl/Array.cpp b/src/backend/opencl/Array.cpp
index 466666f..00635e1 100644
--- a/src/backend/opencl/Array.cpp
+++ b/src/backend/opencl/Array.cpp
@@ -58,12 +58,19 @@ namespace opencl
}
template<typename T>
- Array<T>::Array(af::dim4 dims, cl_mem mem) :
+ Array<T>::Array(af::dim4 dims, cl_mem mem, size_t src_offset, bool copy) :
info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
- data(new cl::Buffer(mem), bufferFree),
+ data(copy ? bufferAlloc(info.elements() * sizeof(T)) : new cl::Buffer(mem), bufferFree),
data_dims(dims),
node(), offset(0), ready(true), owner(true)
{
+ if (copy) {
+ clRetainMemObject(mem);
+ cl::Buffer src_buf = cl::Buffer((cl_mem)(mem));
+ getQueue().enqueueCopyBuffer(src_buf, *data.get(),
+ src_offset, 0,
+ sizeof(T) * info.elements());
+ }
}
template<typename T>
diff --git a/src/backend/opencl/Array.hpp b/src/backend/opencl/Array.hpp
index a4ecd2c..50da72e 100644
--- a/src/backend/opencl/Array.hpp
+++ b/src/backend/opencl/Array.hpp
@@ -20,6 +20,7 @@
#include <JIT/Node.hpp>
#include <memory.hpp>
#include <memory>
+#include <err_common.hpp>
namespace opencl
{
@@ -78,8 +79,9 @@ namespace opencl
template<typename T>
void *getDevicePtr(const Array<T>& arr)
{
- memPop((T *)arr.get());
- return (void *)((*arr.get())());
+ cl::Buffer *buf = arr.device();
+ memPop((T *)buf);
+ return (void *)((*buf)());
}
template<typename T>
@@ -99,7 +101,7 @@ namespace opencl
Array(Param &tmp);
explicit Array(af::dim4 dims, JIT::Node_ptr n);
explicit Array(af::dim4 dims, const T * const in_data);
- explicit Array(af::dim4 dims, cl_mem mem);
+ explicit Array(af::dim4 dims, cl_mem mem, size_t offset = 0, bool copy = false);
public:
@@ -149,6 +151,19 @@ namespace opencl
void eval();
void eval() const;
+ cl::Buffer* device()
+ {
+ if (!isOwner() || data.use_count() > 1) {
+ *this = Array(dims(), (*get())(), getOffset(), true);
+ }
+ return this->data.get();
+ }
+
+ cl::Buffer* device() const
+ {
+ return const_cast<Array<T>*>(this)->device();
+ }
+
//FIXME: This should do a copy if it is not owner. You do not want to overwrite parents data
cl::Buffer *get()
{
diff --git a/test/array.cpp b/test/array.cpp
index e3cb622..6c1f511 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -409,3 +409,48 @@ TEST(Array, ISSUE_951)
const af::array a = randu(100, 100);
af::array b = a.cols(0, 20).rows(10, 20);
}
+
+
+TEST(Device, simple)
+{
+ array a = randu(5,5);
+ {
+ float *ptr0 = a.device<float>();
+ float *ptr1 = a.device<float>();
+ ASSERT_EQ(ptr0, ptr1);
+ }
+
+ {
+ float *ptr0 = a.device<float>();
+ a.unlock();
+ float *ptr1 = a.device<float>();
+ ASSERT_EQ(ptr0, ptr1);
+ }
+}
+
+TEST(Device, index)
+{
+ array a = randu(5,5);
+ array b = a(span, 0);
+
+ ASSERT_NE(a.device<float>(), b.device<float>());
+}
+
+TEST(Device, unequal)
+{
+ {
+ array a = randu(5,5);
+ float *ptr = a.device<float>();
+ array b = a;
+ ASSERT_NE(ptr, b.device<float>());
+ ASSERT_EQ(ptr, a.device<float>());
+ }
+
+ {
+ array a = randu(5,5);
+ float *ptr = a.device<float>();
+ array b = a;
+ ASSERT_NE(ptr, a.device<float>());
+ ASSERT_EQ(ptr, b.device<float>());
+ }
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list