[arrayfire] 18/75: Fixes to internal functions
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:01:10 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit b260cc8ffb2261b5a00cb36d6531fa4b43b747ea
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Thu Feb 11 13:42:19 2016 -0500
Fixes to internal functions
- Was using incorrect number of elements for the total
- Fixed copy because right now isOwner() does not mean isLinear()
- Potentially improves performance when isLinear() is not isOwner()
---
src/api/c/internal.cpp | 4 ++++
src/backend/ArrayInfo.hpp | 1 +
src/backend/cpu/Array.cpp | 4 ++--
src/backend/cpu/copy.cpp | 2 +-
src/backend/cuda/Array.cpp | 4 ++--
src/backend/cuda/copy.cu | 2 +-
src/backend/opencl/Array.cpp | 4 ++--
src/backend/opencl/copy.cpp | 2 +-
8 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/src/api/c/internal.cpp b/src/api/c/internal.cpp
index d5f449e..cad4a46 100644
--- a/src/api/c/internal.cpp
+++ b/src/api/c/internal.cpp
@@ -40,6 +40,10 @@ af_err af_create_array_with_strides(af_array *arr,
dim4 dims(ndims, dims_);
dim4 strides(ndims, strides_);
+ for (int i = ndims; i < 4; i++) {
+ strides[i] = strides[i - 1] * dims[i - 1];
+ }
+
bool isdev = location == afDevice;
af_array res;
diff --git a/src/backend/ArrayInfo.hpp b/src/backend/ArrayInfo.hpp
index 38e5ea6..0983f06 100644
--- a/src/backend/ArrayInfo.hpp
+++ b/src/backend/ArrayInfo.hpp
@@ -82,6 +82,7 @@ public:
size_t elements() const { return dim_size.elements(); }
size_t ndims() const { return dim_size.ndims(); }
const af::dim4& dims() const { return dim_size; }
+ size_t total() const { return offset + dim_strides[3] * dim_size[3]; }
int getDevId() const;
diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 1b6098d..3edca87 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -71,14 +71,14 @@ template<typename T>
Array<T>::Array(af::dim4 dims, af::dim4 strides, dim_t offset_,
const T * const in_data, bool is_device) :
info(getActiveDeviceId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
- data(is_device ? (T*)in_data : memAlloc<T>(info.elements()), memFree<T>),
+ data(is_device ? (T*)in_data : memAlloc<T>(info.total()), memFree<T>),
data_dims(dims),
node(),
ready(true),
owner(true)
{
if (!is_device) {
- std::copy(in_data, in_data + dims.elements(), data.get());
+ std::copy(in_data, in_data + info.total(), data.get());
}
}
diff --git a/src/backend/cpu/copy.cpp b/src/backend/cpu/copy.cpp
index f844d95..0da304b 100644
--- a/src/backend/cpu/copy.cpp
+++ b/src/backend/cpu/copy.cpp
@@ -30,7 +30,7 @@ void copyData(T *to, const Array<T> &from)
{
from.eval();
getQueue().sync();
- if(from.isOwner()) {
+ if(from.isLinear()) {
// FIXME: Check for errors / exceptions
memcpy(to, from.get(), from.elements()*sizeof(T));
} else {
diff --git a/src/backend/cuda/Array.cpp b/src/backend/cuda/Array.cpp
index 370e8ec..c1cf810 100644
--- a/src/backend/cuda/Array.cpp
+++ b/src/backend/cuda/Array.cpp
@@ -90,7 +90,7 @@ namespace cuda
Array<T>::Array(af::dim4 dims, af::dim4 strides, dim_t offset_,
const T * const in_data, bool is_device) :
info(getActiveDeviceId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
- data(is_device ? (T*)in_data : memAlloc<T>(info.elements()), memFree<T>),
+ data(is_device ? (T*)in_data : memAlloc<T>(info.total()), memFree<T>),
data_dims(dims),
node(),
ready(true),
@@ -98,7 +98,7 @@ namespace cuda
{
if (!is_device) {
cudaStream_t stream = getStream(getActiveDeviceId());
- CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, info.elements() * sizeof(T),
+ CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, info.total() * sizeof(T),
cudaMemcpyHostToDevice, stream));
CUDA_CHECK(cudaStreamSynchronize(stream));
}
diff --git a/src/backend/cuda/copy.cu b/src/backend/cuda/copy.cu
index df435d2..35e5c83 100644
--- a/src/backend/cuda/copy.cu
+++ b/src/backend/cuda/copy.cu
@@ -28,7 +28,7 @@ namespace cuda
Array<T> out = A;
const T *ptr = NULL;
- if (A.isOwner() || // No offsets, No strides
+ if (A.isLinear() || // No offsets, No strides
A.ndims() == 1 // Simple offset, no strides.
) {
diff --git a/src/backend/opencl/Array.cpp b/src/backend/opencl/Array.cpp
index fb3e63b..bd576ca 100644
--- a/src/backend/opencl/Array.cpp
+++ b/src/backend/opencl/Array.cpp
@@ -105,14 +105,14 @@ namespace opencl
info(getActiveDeviceId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
data(is_device ?
(new cl::Buffer((cl_mem)in_data)) :
- (bufferAlloc(info.elements() * sizeof(T))), bufferFree),
+ (bufferAlloc(info.total() * sizeof(T))), bufferFree),
data_dims(dims),
node(),
ready(true),
owner(true)
{
if (!is_device) {
- getQueue().enqueueWriteBuffer(*data.get(), CL_TRUE, 0, sizeof(T) * info.elements(), in_data);
+ getQueue().enqueueWriteBuffer(*data.get(), CL_TRUE, 0, sizeof(T) * info.total(), in_data);
}
}
diff --git a/src/backend/opencl/copy.cpp b/src/backend/opencl/copy.cpp
index 39cbf4b..e1716f1 100644
--- a/src/backend/opencl/copy.cpp
+++ b/src/backend/opencl/copy.cpp
@@ -29,7 +29,7 @@ namespace opencl
cl::Buffer buf;
Array<T> out = A;
- if (A.isOwner() || // No offsets, No strides
+ if (A.isLinear() || // No offsets, No strides
A.ndims() == 1 // Simple offset, no strides.
) {
buf = *A.get();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list