[arrayfire] 30/61: BUGFIX: Getting the device pointer performs memory copy when needed

Tue Dec 8 11:55:06 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.

commit 37b3c8c84f1da81fa7c37065d57c0152dc10948c
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Mon Nov 30 19:38:52 2015 -0500

    BUGFIX: Getting the device pointer performs memory copy when needed
    
    - This includes when data is being accessed by other arrays
    - Added necessary tests
---
 src/backend/cpu/Array.cpp    |  6 +++---
 src/backend/cpu/Array.hpp    | 20 +++++++++++++++++---
 src/backend/cuda/Array.cpp   | 10 +++++++---
 src/backend/cuda/Array.hpp   | 20 +++++++++++++++++---
 src/backend/opencl/Array.cpp | 11 +++++++++--
 src/backend/opencl/Array.hpp | 21 ++++++++++++++++++---
 test/array.cpp               | 45 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 116 insertions(+), 17 deletions(-)

diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 5321137..3829a9a 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -34,14 +34,14 @@ namespace cpu
     { }
 
     template<typename T>
-    Array<T>::Array(dim4 dims, const T * const in_data, bool is_device):
+    Array<T>::Array(dim4 dims, const T * const in_data, bool is_device, bool copy_device):
         info(getActiveDeviceId(), dims, dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
-        data(is_device ? (T*)in_data : memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
+        data((is_device & !copy_device) ? (T*)in_data : memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
         node(), offset(0), ready(true), owner(true)
     {
         static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type");
         static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>");
-        if (!is_device) {
+        if (!is_device || copy_device) {
             std::copy(in_data, in_data + dims.elements(), data.get());
         }
     }
diff --git a/src/backend/cpu/Array.hpp b/src/backend/cpu/Array.hpp
index 7f23bc8..e9e40db 100644
--- a/src/backend/cpu/Array.hpp
+++ b/src/backend/cpu/Array.hpp
@@ -73,8 +73,9 @@ namespace cpu
     template<typename T>
     void *getDevicePtr(const Array<T>& arr)
     {
-        memPop((T *)arr.get());
-        return (void *)arr.get();
+        T *ptr = arr.device();
+        memPop(ptr);
+        return (void *)ptr;
     }
 
     // Array Array Implementation
@@ -95,7 +96,7 @@ namespace cpu
 
         Array() = default;
         Array(dim4 dims);
-        explicit Array(dim4 dims, const T * const in_data, bool is_device);
+        explicit Array(dim4 dims, const T * const in_data, bool is_device, bool copy_device=false);
         Array(const Array<T>& parnt, const dim4 &dims, const dim4 &offset, const dim4 &stride);
         explicit Array(af::dim4 dims, TNJ::Node_ptr n);
 
@@ -159,6 +160,19 @@ namespace cpu
             return isOwner() ? info.dims() : data_dims;
         }
 
+        T* device()
+        {
+            if (!isOwner() || data.use_count() > 1) {
+                *this = Array(dims(), get(), true, true);
+            }
+            return this->data.get();
+        }
+
+        T* device() const
+        {
+            return const_cast<Array<T>*>(this)->device();
+        }
+
         T* get(bool withOffset = true)
         {
             return const_cast<T*>(static_cast<const Array<T>*>(this)->get(withOffset));
diff --git a/src/backend/cuda/Array.cpp b/src/backend/cuda/Array.cpp
index d7dbec5..8b05fc0 100644
--- a/src/backend/cuda/Array.cpp
+++ b/src/backend/cuda/Array.cpp
@@ -35,9 +35,9 @@ namespace cuda
     {}
 
     template<typename T>
-    Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device) :
+    Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device, bool copy_device) :
         info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
-        data((is_device ? (T *)in_data : memAlloc<T>(dims.elements())), memFree<T>),
+        data(((is_device & !copy_device) ? (T *)in_data : memAlloc<T>(dims.elements())), memFree<T>),
         data_dims(dims),
         node(), offset(0), ready(true), owner(true)
     {
@@ -47,7 +47,11 @@ namespace cuda
 #endif
         if (!is_device) {
             CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, dims.elements() * sizeof(T),
-                        cudaMemcpyHostToDevice, cuda::getStream(cuda::getActiveDeviceId())));
+                                       cudaMemcpyHostToDevice, cuda::getStream(cuda::getActiveDeviceId())));
+            CUDA_CHECK(cudaStreamSynchronize(cuda::getStream(cuda::getActiveDeviceId())));
+        } else if (copy_device) {
+            CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, dims.elements() * sizeof(T),
+                                       cudaMemcpyDeviceToDevice, cuda::getStream(cuda::getActiveDeviceId())));
             CUDA_CHECK(cudaStreamSynchronize(cuda::getStream(cuda::getActiveDeviceId())));
         }
     }
diff --git a/src/backend/cuda/Array.hpp b/src/backend/cuda/Array.hpp
index 3616ebb..3117675 100644
--- a/src/backend/cuda/Array.hpp
+++ b/src/backend/cuda/Array.hpp
@@ -88,8 +88,9 @@ namespace cuda
     template<typename T>
     void *getDevicePtr(const Array<T>& arr)
     {
-        memPop((T *)arr.get());
-        return (void *)arr.get();
+        T *ptr = arr.device();
+        memPop(ptr);
+        return (void *)ptr;
     }
 
     template<typename T>
@@ -105,7 +106,7 @@ namespace cuda
         bool owner;
 
         Array(af::dim4 dims);
-        explicit Array(af::dim4 dims, const T * const in_data, bool is_device = false);
+        explicit Array(af::dim4 dims, const T * const in_data, bool is_device = false, bool copy_device = false);
         Array(const Array<T>& parnt, const dim4 &dims, const dim4 &offset, const dim4 &stride);
         Array(Param<T> &tmp);
         Array(af::dim4 dims, JIT::Node_ptr n);
@@ -168,6 +169,19 @@ namespace cuda
             return isOwner() ? dims() : data_dims;
         }
 
+        T* device()
+        {
+            if (!isOwner() || data.use_count() > 1) {
+                *this = Array(dims(), get(), true, true);
+            }
+            return this->data.get();
+        }
+
+        T* device() const
+        {
+            return const_cast<Array<T>*>(this)->device();
+        }
+
         T* get(bool withOffset = true)
         {
             if (!isReady()) eval();
diff --git a/src/backend/opencl/Array.cpp b/src/backend/opencl/Array.cpp
index 466666f..00635e1 100644
--- a/src/backend/opencl/Array.cpp
+++ b/src/backend/opencl/Array.cpp
@@ -58,12 +58,19 @@ namespace opencl
     }
 
     template<typename T>
-    Array<T>::Array(af::dim4 dims, cl_mem mem) :
+    Array<T>::Array(af::dim4 dims, cl_mem mem, size_t src_offset, bool copy) :
         info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
-        data(new cl::Buffer(mem), bufferFree),
+        data(copy ? bufferAlloc(info.elements() * sizeof(T)) : new cl::Buffer(mem), bufferFree),
         data_dims(dims),
         node(), offset(0), ready(true), owner(true)
     {
+        if (copy) {
+            clRetainMemObject(mem);
+            cl::Buffer src_buf = cl::Buffer((cl_mem)(mem));
+            getQueue().enqueueCopyBuffer(src_buf, *data.get(),
+                                         src_offset, 0,
+                                         sizeof(T) * info.elements());
+        }
     }
 
     template<typename T>
diff --git a/src/backend/opencl/Array.hpp b/src/backend/opencl/Array.hpp
index a4ecd2c..50da72e 100644
--- a/src/backend/opencl/Array.hpp
+++ b/src/backend/opencl/Array.hpp
@@ -20,6 +20,7 @@
 #include <JIT/Node.hpp>
 #include <memory.hpp>
 #include <memory>
+#include <err_common.hpp>
 
 namespace opencl
 {
@@ -78,8 +79,9 @@ namespace opencl
     template<typename T>
     void *getDevicePtr(const Array<T>& arr)
     {
-        memPop((T *)arr.get());
-        return (void *)((*arr.get())());
+        cl::Buffer *buf = arr.device();
+        memPop((T *)buf);
+        return (void *)((*buf)());
     }
 
     template<typename T>
@@ -99,7 +101,7 @@ namespace opencl
         Array(Param &tmp);
         explicit Array(af::dim4 dims, JIT::Node_ptr n);
         explicit Array(af::dim4 dims, const T * const in_data);
-        explicit Array(af::dim4 dims, cl_mem mem);
+        explicit Array(af::dim4 dims, cl_mem mem, size_t offset = 0, bool copy = false);
 
     public:
 
@@ -149,6 +151,19 @@ namespace opencl
         void eval();
         void eval() const;
 
+        cl::Buffer* device()
+        {
+            if (!isOwner() || data.use_count() > 1) {
+                *this = Array(dims(), (*get())(), getOffset(), true);
+            }
+            return this->data.get();
+        }
+
+        cl::Buffer* device() const
+        {
+            return const_cast<Array<T>*>(this)->device();
+        }
+
         //FIXME: This should do a copy if it is not owner. You do not want to overwrite parents data
         cl::Buffer *get()
         {
diff --git a/test/array.cpp b/test/array.cpp
index e3cb622..6c1f511 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -409,3 +409,48 @@ TEST(Array, ISSUE_951)
     const af::array a = randu(100, 100);
     af::array b = a.cols(0, 20).rows(10, 20);
 }
+
+
+TEST(Device, simple)
+{
+    array a = randu(5,5);
+    {
+        float *ptr0 = a.device<float>();
+        float *ptr1 = a.device<float>();
+        ASSERT_EQ(ptr0, ptr1);
+    }
+
+    {
+        float *ptr0 = a.device<float>();
+        a.unlock();
+        float *ptr1 = a.device<float>();
+        ASSERT_EQ(ptr0, ptr1);
+    }
+}
+
+TEST(Device, index)
+{
+    array a = randu(5,5);
+    array b = a(span, 0);
+
+    ASSERT_NE(a.device<float>(), b.device<float>());
+}
+
+TEST(Device, unequal)
+{
+    {
+        array a = randu(5,5);
+        float *ptr = a.device<float>();
+        array b = a;
+        ASSERT_NE(ptr, b.device<float>());
+        ASSERT_EQ(ptr, a.device<float>());
+    }
+
+    {
+        array a = randu(5,5);
+        float *ptr = a.device<float>();
+        array b = a;
+        ASSERT_NE(ptr, a.device<float>());
+        ASSERT_EQ(ptr, b.device<float>());
+    }
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git