[arrayfire] 16/75: Reorganizing offset to be inside ArrayInfo

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:01:09 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.

commit 1b623a01721ba9ee4857fa7a3e88f6b0925fafe1
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Thu Feb 11 00:09:47 2016 -0500

    Reorganizing offset to be inside ArrayInfo
    
    - Removed unnecessary dim_offset
---
 src/api/c/internal.cpp              | 21 +-------------
 src/api/c/print.cpp                 |  3 +-
 src/backend/ArrayInfo.cpp           |  9 ------
 src/backend/ArrayInfo.hpp           | 14 ++++-----
 src/backend/cpu/Array.cpp           | 43 ++++++++++++++-------------
 src/backend/cpu/Array.hpp           |  8 ++---
 src/backend/cpu/exampleFunction.cpp |  3 +-
 src/backend/cuda/Array.cpp          | 52 ++++++++++++++++++---------------
 src/backend/cuda/Array.hpp          |  8 ++---
 src/backend/opencl/Array.cpp        | 58 ++++++++++++++++++++-----------------
 src/backend/opencl/Array.hpp        |  6 ++--
 11 files changed, 100 insertions(+), 125 deletions(-)

diff --git a/src/api/c/internal.cpp b/src/api/c/internal.cpp
index d086d43..d5f449e 100644
--- a/src/api/c/internal.cpp
+++ b/src/api/c/internal.cpp
@@ -84,26 +84,7 @@ af_err af_get_offset(dim_t *offset, const af_array arr)
 {
     try {
 
-        dim_t res = 0;
-
-        af_dtype ty = getInfo(arr).getType();
-
-        switch (ty) {
-        case f32: res = getArray<float  >(arr).getOffset(); break;
-        case f64: res = getArray<double >(arr).getOffset(); break;
-        case c32: res = getArray<cfloat >(arr).getOffset(); break;
-        case c64: res = getArray<cdouble>(arr).getOffset(); break;
-        case u32: res = getArray<uint   >(arr).getOffset(); break;
-        case s32: res = getArray<int    >(arr).getOffset(); break;
-        case u64: res = getArray<uintl  >(arr).getOffset(); break;
-        case s64: res = getArray<intl   >(arr).getOffset(); break;
-        case u16: res = getArray<ushort >(arr).getOffset(); break;
-        case s16: res = getArray<short  >(arr).getOffset(); break;
-        case b8 : res = getArray<char   >(arr).getOffset(); break;
-        case u8 : res = getArray<uchar  >(arr).getOffset(); break;
-        default: TYPE_ERROR(6, ty);
-        }
-
+        dim_t res = getInfo(arr).getOffset();
         std::swap(*offset, res);
     }
     CATCHALL;
diff --git a/src/api/c/print.cpp b/src/api/c/print.cpp
index 181dd35..b243491 100644
--- a/src/api/c/print.cpp
+++ b/src/api/c/print.cpp
@@ -14,6 +14,7 @@
 #include <sstream>
 #include <af/array.h>
 #include <af/data.h>
+#include <af/internal.h>
 #include <copy.hpp>
 #include <print.hpp>
 #include <ArrayInfo.hpp>
@@ -69,7 +70,7 @@ static void print(const char *exp, af_array arr, const int precision, std::ostre
 
     os << "[" << info.dims() << "]\n";
 #ifndef NDEBUG
-    os <<"   Offsets: [" << info.offsets() << "]" << std::endl;
+    os <<"   Offset: " << info.getOffset() << std::endl;
     os <<"   Strides: [" << info.strides() << "]" << std::endl;
 #endif
 
diff --git a/src/backend/ArrayInfo.cpp b/src/backend/ArrayInfo.cpp
index 219bc19..43d2627 100644
--- a/src/backend/ArrayInfo.cpp
+++ b/src/backend/ArrayInfo.cpp
@@ -18,15 +18,6 @@
 
 using af::dim4;
 
-dim_t
-calcOffset(const af::dim4 &strides, const af::dim4 &offsets)
-{
-    dim_t offset = 0;
-    for (int i = 0; i < 4; i++) offset += offsets[i] * strides[i];
-    return offset;
-}
-
-
 const ArrayInfo&
 getInfo(af_array arr)
 {
diff --git a/src/backend/ArrayInfo.hpp b/src/backend/ArrayInfo.hpp
index ca6fcd3..38e5ea6 100644
--- a/src/backend/ArrayInfo.hpp
+++ b/src/backend/ArrayInfo.hpp
@@ -16,9 +16,6 @@
 #include <vector>
 #include <cstddef>
 
-dim_t
-calcOffset(const af::dim4 &strides, const af::dim4 &offsets);
-
 af::dim4
 calcStrides(const af::dim4 &parentDim);
 
@@ -48,14 +45,15 @@ private:
     int             devId;
     af_dtype        type;
     af::dim4        dim_size;
-    af::dim4        dim_offsets, dim_strides;
+    dim_t           offset;
+    af::dim4        dim_strides;
 
 public:
-    ArrayInfo(int id, af::dim4 size, af::dim4 offset, af::dim4 stride, af_dtype af_type):
+    ArrayInfo(int id, af::dim4 size, dim_t offset_, af::dim4 stride, af_dtype af_type):
         devId(id),
         type(af_type),
         dim_size(size),
-        dim_offsets(offset),
+        offset(offset_),
         dim_strides(stride)
     {
         af_init();
@@ -77,7 +75,7 @@ public:
 
     const af_dtype& getType() const     { return type;                  }
 
-    const af::dim4& offsets() const     { return dim_offsets;           }
+    dim_t getOffset() const             { return offset;                }
 
     const af::dim4& strides() const     { return dim_strides;           }
 
@@ -97,7 +95,7 @@ public:
     {
         dim_size = dims;
         dim_strides = calcStrides(dims);
-        dim_offsets = af::dim4(0,0,0,0);
+        offset = 0;
     }
 
     void resetDims(const af::dim4& dims)
diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 0db8a82..1b6098d 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -33,16 +33,16 @@ using af::dim4;
 
 template<typename T>
 Array<T>::Array(dim4 dims):
-    info(getActiveDeviceId(), dims, dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+    info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
     data(memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
-    node(), offset(0), ready(true), owner(true)
+    node(), ready(true), owner(true)
 { }
 
 template<typename T>
 Array<T>::Array(dim4 dims, const T * const in_data, bool is_device, bool copy_device):
-    info(getActiveDeviceId(), dims, dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+    info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
     data((is_device & !copy_device) ? (T*)in_data : memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
-    node(), offset(0), ready(true), owner(true)
+    node(), ready(true), owner(true)
 {
     static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type");
     static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>");
@@ -53,29 +53,27 @@ Array<T>::Array(dim4 dims, const T * const in_data, bool is_device, bool copy_de
 
 template<typename T>
 Array<T>::Array(af::dim4 dims, TNJ::Node_ptr n) :
-    info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+    info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
     data(), data_dims(dims),
-    node(n), offset(0), ready(false), owner(true)
+    node(n), ready(false), owner(true)
 {
 }
 
 template<typename T>
-Array<T>::Array(const Array<T>& parent, const dim4 &dims, const dim4 &offsets, const dim4 &strides) :
-    info(parent.getDevId(), dims, offsets, strides, (af_dtype)dtype_traits<T>::af_type),
+Array<T>::Array(const Array<T>& parent, const dim4 &dims, const dim_t &offset_, const dim4 &strides) :
+    info(parent.getDevId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
     data(parent.getData()), data_dims(parent.getDataDims()),
     node(),
-    offset(parent.getOffset() + calcOffset(parent.strides(), offsets)),
     ready(true), owner(false)
 { }
 
 template<typename T>
 Array<T>::Array(af::dim4 dims, af::dim4 strides, dim_t offset_,
                 const T * const in_data, bool is_device) :
-    info(getActiveDeviceId(), dims, af::dim4(offset_), strides, (af_dtype)dtype_traits<T>::af_type),
+    info(getActiveDeviceId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
     data(is_device ? (T*)in_data : memAlloc<T>(info.elements()), memFree<T>),
     data_dims(dims),
     node(),
-    offset(offset_),
     ready(true),
     owner(true)
 {
@@ -119,7 +117,7 @@ Node_ptr Array<T>::getNode() const
 
         BufferNode<T> *buf_node = new BufferNode<T>(data,
                                                     bytes,
-                                                    offset,
+                                                    getOffset(),
                                                     dims().get(),
                                                     strides().get(),
                                                     isLinear());
@@ -194,18 +192,23 @@ Array<T> createSubArray(const Array<T>& parent,
     dim4 dDims = parent.getDataDims();
     dim4 pDims = parent.dims();
 
-    dim4 dims   = toDims  (index, pDims);
-    dim4 offset = toOffset(index, dDims);
-    dim4 stride = toStride (index, dDims);
+    dim4 dims    = toDims  (index, pDims);
+    dim4 strides = toStride (index, dDims);
 
-    Array<T> out = Array<T>(parent, dims, offset, stride);
+    // Find total offsets after indexing
+    dim4 offsets = toOffset(index, pDims);
+    dim4 parent_strides = parent.strides();
+    dim_t offset = parent.getOffset();
+    for (int i = 0; i < 4; i++) offset += offsets[i] * parent_strides[i];
+
+    Array<T> out = Array<T>(parent, dims, offset, strides);
 
     if (!copy) return out;
 
-    if (stride[0] != 1 ||
-        stride[1] <  0 ||
-        stride[2] <  0 ||
-        stride[3] <  0) {
+    if (strides[0] != 1 ||
+        strides[1] <  0 ||
+        strides[2] <  0 ||
+        strides[3] <  0) {
 
         out = copyArray(out);
     }
diff --git a/src/backend/cpu/Array.hpp b/src/backend/cpu/Array.hpp
index 891d867..eb17852 100644
--- a/src/backend/cpu/Array.hpp
+++ b/src/backend/cpu/Array.hpp
@@ -100,7 +100,6 @@ namespace cpu
         af::dim4 data_dims;
         TNJ::Node_ptr node;
 
-        dim_t offset;
         bool ready;
         bool owner;
 
@@ -108,7 +107,7 @@ namespace cpu
         Array(dim4 dims);
 
         explicit Array(dim4 dims, const T * const in_data, bool is_device, bool copy_device=false);
-        Array(const Array<T>& parnt, const dim4 &dims, const dim4 &offset, const dim4 &stride);
+        Array(const Array<T>& parnt, const dim4 &dims, const dim_t &offset, const dim4 &stride);
         explicit Array(af::dim4 dims, TNJ::Node_ptr n);
 
     public:
@@ -127,7 +126,6 @@ namespace cpu
     RET_TYPE NAME() const { return info.NAME(); }
 
         INFO_FUNC(const af_dtype& ,getType)
-        INFO_FUNC(const af::dim4& ,offsets)
         INFO_FUNC(const af::dim4& ,strides)
         INFO_FUNC(size_t          ,elements)
         INFO_FUNC(size_t          ,ndims)
@@ -165,7 +163,7 @@ namespace cpu
         void eval();
         void eval() const;
 
-        dim_t getOffset() const { return offset; }
+        dim_t getOffset() const { return info.getOffset(); }
         shared_ptr<T> getData() const {return data; }
 
         dim4 getDataDims() const
@@ -197,7 +195,7 @@ namespace cpu
         const T* get(bool withOffset = true) const
         {
             if (!isReady()) eval();
-            return data.get() + (withOffset ? offset : 0);
+            return data.get() + (withOffset ? getOffset() : 0);
         }
 
         int useCount() const
diff --git a/src/backend/cpu/exampleFunction.cpp b/src/backend/cpu/exampleFunction.cpp
index d45b8a2..0eb8646 100644
--- a/src/backend/cpu/exampleFunction.cpp
+++ b/src/backend/cpu/exampleFunction.cpp
@@ -44,7 +44,7 @@ Array<T> exampleFunction(const Array<T> &in, const af_someenum_t method)
 
     //dim4 in_dims    = in.dims();        // you can retrieve dimensions
 
-    //dim4 in_offsets = in.offsets();     // you can retrieve offsets - used when given array
+    //dim_t in_offset = in.getOffset(); // you can retrieve the offset - used when given array
                                         // is an sub-array pointing to some other array and
                                         // doesn't have memory of its own
 
@@ -77,4 +77,3 @@ INSTANTIATE(cfloat)
 INSTANTIATE(cdouble)
 
 }
-
diff --git a/src/backend/cuda/Array.cpp b/src/backend/cuda/Array.cpp
index c44db35..370e8ec 100644
--- a/src/backend/cuda/Array.cpp
+++ b/src/backend/cuda/Array.cpp
@@ -30,17 +30,17 @@ namespace cuda
 
     template<typename T>
     Array<T>::Array(af::dim4 dims) :
-        info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
         data(memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
-        node(), offset(0), ready(true), owner(true)
+        node(), ready(true), owner(true)
     {}
 
     template<typename T>
     Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device, bool copy_device) :
-        info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
         data(((is_device & !copy_device) ? (T *)in_data : memAlloc<T>(dims.elements())), memFree<T>),
         data_dims(dims),
-        node(), offset(0), ready(true), owner(true)
+        node(), ready(true), owner(true)
     {
 #if __cplusplus > 199711L
         static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type");
@@ -58,42 +58,41 @@ namespace cuda
     }
 
     template<typename T>
-    Array<T>::Array(const Array<T>& parent, const dim4 &dims, const dim4 &offsets, const dim4 &strides) :
-        info(parent.getDevId(), dims, offsets, strides, (af_dtype)dtype_traits<T>::af_type),
+    Array<T>::Array(const Array<T>& parent, const dim4 &dims, const dim_t &offset_, const dim4 &strides) :
+        info(parent.getDevId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
         data(parent.getData()), data_dims(parent.getDataDims()),
         node(),
-        offset(parent.getOffset() + calcOffset(parent.strides(), offsets)),
         ready(true), owner(false)
     { }
 
     template<typename T>
     Array<T>::Array(Param<T> &tmp) :
-        info(getActiveDeviceId(), af::dim4(tmp.dims[0], tmp.dims[1], tmp.dims[2], tmp.dims[3]),
-                  af::dim4(0, 0, 0, 0),
-                  af::dim4(tmp.strides[0], tmp.strides[1], tmp.strides[2], tmp.strides[3]),
-                  (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(),
+             af::dim4(tmp.dims[0], tmp.dims[1], tmp.dims[2], tmp.dims[3]),
+             0,
+             af::dim4(tmp.strides[0], tmp.strides[1], tmp.strides[2], tmp.strides[3]),
+             (af_dtype)dtype_traits<T>::af_type),
         data(tmp.ptr, memFree<T>),
         data_dims(af::dim4(tmp.dims[0], tmp.dims[1], tmp.dims[2], tmp.dims[3])),
-        node(), offset(0), ready(true), owner(true)
+        node(), ready(true), owner(true)
     {
     }
 
     template<typename T>
     Array<T>::Array(af::dim4 dims, JIT::Node_ptr n) :
-        info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
         data(), data_dims(dims),
-        node(n), offset(0), ready(false), owner(true)
+        node(n), ready(false), owner(true)
     {
     }
 
     template<typename T>
     Array<T>::Array(af::dim4 dims, af::dim4 strides, dim_t offset_,
                     const T * const in_data, bool is_device) :
-        info(getActiveDeviceId(), dims, af::dim4(offset_), strides, (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
         data(is_device ? (T*)in_data : memAlloc<T>(info.elements()), memFree<T>),
         data_dims(dims),
         node(),
-        offset(offset_),
         ready(true),
         owner(true)
     {
@@ -216,18 +215,23 @@ namespace cuda
         dim4 dDims = parent.getDataDims();
         dim4 pDims = parent.dims();
 
-        dim4 dims   = toDims  (index, pDims);
-        dim4 offset = toOffset(index, dDims);
-        dim4 stride = toStride (index, dDims);
+        dim4 dims    = toDims  (index, pDims);
+        dim4 strides = toStride (index, dDims);
 
-        Array<T> out = Array<T>(parent, dims, offset, stride);
+        // Find total offsets after indexing
+        dim4 offsets = toOffset(index, pDims);
+        dim4 parent_strides = parent.strides();
+        dim_t offset = parent.getOffset();
+        for (int i = 0; i < 4; i++) offset += offsets[i] * parent_strides[i];
+
+        Array<T> out = Array<T>(parent, dims, offset, strides);
 
         if (!copy) return out;
 
-        if (stride[0] != 1 ||
-            stride[1] <  0 ||
-            stride[2] <  0 ||
-            stride[3] <  0) {
+        if (strides[0] != 1 ||
+            strides[1] <  0 ||
+            strides[2] <  0 ||
+            strides[3] <  0) {
 
             out = copyArray(out);
         }
diff --git a/src/backend/cuda/Array.hpp b/src/backend/cuda/Array.hpp
index b8832db..c6cdd21 100644
--- a/src/backend/cuda/Array.hpp
+++ b/src/backend/cuda/Array.hpp
@@ -98,14 +98,13 @@ namespace cuda
         af::dim4 data_dims;
 
         JIT::Node_ptr node;
-        dim_t offset;
         bool ready;
         bool owner;
 
         Array(af::dim4 dims);
 
         explicit Array(af::dim4 dims, const T * const in_data, bool is_device = false, bool copy_device = false);
-        Array(const Array<T>& parnt, const dim4 &dims, const dim4 &offset, const dim4 &stride);
+        Array(const Array<T>& parnt, const dim4 &dims, const dim_t &offset, const dim4 &stride);
         Array(Param<T> &tmp);
         Array(af::dim4 dims, JIT::Node_ptr n);
     public:
@@ -123,7 +122,6 @@ namespace cuda
     RET_TYPE NAME() const { return info.NAME(); }
 
         INFO_FUNC(const af_dtype& ,getType)
-        INFO_FUNC(const af::dim4& ,offsets)
         INFO_FUNC(const af::dim4& ,strides)
         INFO_FUNC(size_t          ,elements)
         INFO_FUNC(size_t          ,ndims)
@@ -160,7 +158,7 @@ namespace cuda
         void eval();
         void eval() const;
 
-        dim_t getOffset() const { return offset; }
+        dim_t getOffset() const { return info.getOffset(); }
         shared_ptr<T> getData() const { return data; }
 
         dim4 getDataDims() const
@@ -193,7 +191,7 @@ namespace cuda
         const   T* get(bool withOffset = true) const
         {
             if (!isReady()) eval();
-            return data.get() + (withOffset ? offset : 0);
+            return data.get() + (withOffset ? getOffset() : 0);
         }
 
         int useCount() const
diff --git a/src/backend/opencl/Array.cpp b/src/backend/opencl/Array.cpp
index f41b2c7..fb3e63b 100644
--- a/src/backend/opencl/Array.cpp
+++ b/src/backend/opencl/Array.cpp
@@ -30,28 +30,28 @@ namespace opencl
 
     template<typename T>
     Array<T>::Array(af::dim4 dims) :
-        info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
         data(bufferAlloc(info.elements() * sizeof(T)), bufferFree),
         data_dims(dims),
-        node(), offset(0), ready(true), owner(true)
+        node(), ready(true), owner(true)
     {
     }
 
     template<typename T>
     Array<T>::Array(af::dim4 dims, JIT::Node_ptr n) :
-        info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
         data(),
         data_dims(dims),
-        node(n), offset(0), ready(false), owner(true)
+        node(n), ready(false), owner(true)
     {
     }
 
     template<typename T>
     Array<T>::Array(af::dim4 dims, const T * const in_data) :
-        info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
         data(bufferAlloc(info.elements()*sizeof(T)), bufferFree),
         data_dims(dims),
-        node(), offset(0), ready(true), owner(true)
+        node(), ready(true), owner(true)
     {
         static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type");
         static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>");
@@ -60,10 +60,10 @@ namespace opencl
 
     template<typename T>
     Array<T>::Array(af::dim4 dims, cl_mem mem, size_t src_offset, bool copy) :
-        info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
         data(copy ? bufferAlloc(info.elements() * sizeof(T)) : new cl::Buffer(mem), bufferFree),
         data_dims(dims),
-        node(), offset(0), ready(true), owner(true)
+        node(), ready(true), owner(true)
     {
         if (copy) {
             clRetainMemObject(mem);
@@ -75,12 +75,11 @@ namespace opencl
     }
 
     template<typename T>
-    Array<T>::Array(const Array<T>& parent, const dim4 &dims, const dim4 &offsets, const dim4 &stride) :
-        info(parent.getDevId(), dims, offsets, stride, (af_dtype)dtype_traits<T>::af_type),
+    Array<T>::Array(const Array<T>& parent, const dim4 &dims, const dim_t &offset_, const dim4 &stride) :
+        info(parent.getDevId(), dims, offset_, stride, (af_dtype)dtype_traits<T>::af_type),
         data(parent.getData()),
         data_dims(parent.getDataDims()),
         node(),
-        offset(parent.getOffset() + calcOffset(parent.strides(), offsets)),
         ready(true),
         owner(false)
     { }
@@ -88,27 +87,27 @@ namespace opencl
 
     template<typename T>
     Array<T>::Array(Param &tmp) :
-        info(getActiveDeviceId(), af::dim4(tmp.info.dims[0], tmp.info.dims[1], tmp.info.dims[2], tmp.info.dims[3]),
-                  af::dim4(0, 0, 0, 0),
-                  af::dim4(tmp.info.strides[0], tmp.info.strides[1],
-                           tmp.info.strides[2], tmp.info.strides[3]),
-                  (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(),
+             af::dim4(tmp.info.dims[0], tmp.info.dims[1], tmp.info.dims[2], tmp.info.dims[3]),
+             0,
+             af::dim4(tmp.info.strides[0], tmp.info.strides[1],
+                      tmp.info.strides[2], tmp.info.strides[3]),
+             (af_dtype)dtype_traits<T>::af_type),
         data(tmp.data, bufferFree),
         data_dims(af::dim4(tmp.info.dims[0], tmp.info.dims[1], tmp.info.dims[2], tmp.info.dims[3])),
-        node(), offset(0), ready(true), owner(true)
+        node(), ready(true), owner(true)
     {
     }
 
     template<typename T>
     Array<T>::Array(af::dim4 dims, af::dim4 strides, dim_t offset_,
                     const T * const in_data, bool is_device) :
-        info(getActiveDeviceId(), dims, af::dim4(offset_), strides, (af_dtype)dtype_traits<T>::af_type),
+        info(getActiveDeviceId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
         data(is_device ?
              (new cl::Buffer((cl_mem)in_data)) :
              (bufferAlloc(info.elements() * sizeof(T))), bufferFree),
         data_dims(dims),
         node(),
-        offset(offset_),
         ready(true),
         owner(true)
     {
@@ -204,18 +203,23 @@ namespace opencl
         dim4 dDims = parent.getDataDims();
         dim4 pDims = parent.dims();
 
-        dim4 dims   = toDims  (index, pDims);
-        dim4 offset = toOffset(index, dDims);
-        dim4 stride = toStride (index, dDims);
+        dim4 dims    = toDims  (index, pDims);
+        dim4 strides = toStride (index, dDims);
 
-        Array<T> out = Array<T>(parent, dims, offset, stride);
+        // Find total offsets after indexing
+        dim4 offsets = toOffset(index, pDims);
+        dim4 parent_strides = parent.strides();
+        dim_t offset = parent.getOffset();
+        for (int i = 0; i < 4; i++) offset += offsets[i] * parent_strides[i];
+
+        Array<T> out = Array<T>(parent, dims, offset, strides);
 
         if (!copy) return out;
 
-        if (stride[0] != 1 ||
-            stride[1] <  0 ||
-            stride[2] <  0 ||
-            stride[3] <  0) {
+        if (strides[0] != 1 ||
+            strides[1] <  0 ||
+            strides[2] <  0 ||
+            strides[3] <  0) {
 
             out = copyArray(out);
         }
diff --git a/src/backend/opencl/Array.hpp b/src/backend/opencl/Array.hpp
index d1a4d97..207e303 100644
--- a/src/backend/opencl/Array.hpp
+++ b/src/backend/opencl/Array.hpp
@@ -90,13 +90,12 @@ namespace opencl
         af::dim4 data_dims;
 
         JIT::Node_ptr node;
-        dim_t offset;
         bool ready;
         bool owner;
 
         Array(af::dim4 dims);
 
-        Array(const Array<T>& parnt, const dim4 &dims, const dim4 &offset, const dim4 &stride);
+        Array(const Array<T>& parnt, const dim4 &dims, const dim_t &offset, const dim4 &stride);
         Array(Param &tmp);
         explicit Array(af::dim4 dims, JIT::Node_ptr n);
         explicit Array(af::dim4 dims, const T * const in_data);
@@ -117,7 +116,6 @@ namespace opencl
     RET_TYPE NAME() const { return info.NAME(); }
 
         INFO_FUNC(const af_dtype& ,getType)
-        INFO_FUNC(const af::dim4& ,offsets)
         INFO_FUNC(const af::dim4& ,strides)
         INFO_FUNC(size_t          ,elements)
         INFO_FUNC(size_t          ,ndims)
@@ -187,7 +185,7 @@ namespace opencl
 
         const dim_t getOffset() const
         {
-            return offset;
+            return info.getOffset();
         }
 
         Buffer_ptr getData() const

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list