[arrayfire] 235/408: PERF: improvements to element wise operations in CPU backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:04 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 26a2e408a747bd17c4722e2b9832ff13d0479635
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Wed Aug 12 15:33:16 2015 -0400
PERF: improvements to element wise operations in CPU backend
- Improved performance when all buffers can be indexed linearly
---
src/backend/cpu/Array.cpp | 51 +++++++++++++++++++++++++-------------
src/backend/cpu/TNJ/BinaryNode.hpp | 13 ++++++++++
src/backend/cpu/TNJ/BufferNode.hpp | 42 +++++++++++++++++++++----------
src/backend/cpu/TNJ/Node.hpp | 8 +++++-
src/backend/cpu/TNJ/ScalarNode.hpp | 7 ++++++
src/backend/cpu/TNJ/UnaryNode.hpp | 11 ++++++++
6 files changed, 101 insertions(+), 31 deletions(-)
diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 56bd9f8..683fc1a 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -65,38 +65,54 @@ namespace cpu
{ }
template<typename T>
- void Array<T>::eval()
+ std::shared_ptr<T> evalNodes(const int &num,
+ const dim4 &odims,
+ const dim4 &ostrs,
+ TNJ::Node_ptr &node)
{
- if (isReady()) return;
- this->setId(getActiveDeviceId());
- data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
+ std::shared_ptr<T> data(memAlloc<T>(num), memFree<T>);
T *ptr = data.get();
- dim4 ostrs = strides();
- dim4 odims = dims();
+ bool is_linear = node->isLinear(odims.get());
- for (int w = 0; w < (int)odims[3]; w++) {
- dim_t offw = w * ostrs[3];
+ if (is_linear) {
+ for (int i = 0; i < num; i++) {
+ ptr[i] = *(T *)node->calc(i);
+ }
+ } else {
+ for (int w = 0; w < (int)odims[3]; w++) {
+ dim_t offw = w * ostrs[3];
- for (int z = 0; z < (int)odims[2]; z++) {
- dim_t offz = z * ostrs[2] + offw;
+ for (int z = 0; z < (int)odims[2]; z++) {
+ dim_t offz = z * ostrs[2] + offw;
- for (int y = 0; y < (int)odims[1]; y++) {
- dim_t offy = y * ostrs[1] + offz;
+ for (int y = 0; y < (int)odims[1]; y++) {
+ dim_t offy = y * ostrs[1] + offz;
- for (int x = 0; x < (int)odims[0]; x++) {
- dim_t id = x + offy;
+ for (int x = 0; x < (int)odims[0]; x++) {
+ dim_t id = x + offy;
- ptr[id] = *(T *)node->calc(x, y, z, w);
+ ptr[id] = *(T *)node->calc(x, y, z, w);
+ }
}
}
}
}
+ return data;
+ }
- ready = true;
+ template<typename T>
+ void Array<T>::eval()
+ {
+ if (isReady()) return;
+ this->setId(getActiveDeviceId());
+
+ data = evalNodes<T>(elements(), dims(), strides(), node);
+
+ ready = true;
Node_ptr prev = node;
prev->reset();
// FIXME: Replace the current node in any JIT possible trees with the new BufferNode
@@ -121,7 +137,8 @@ namespace cpu
bytes,
offset,
dims().get(),
- strides().get());
+ strides().get(),
+ isLinear());
const_cast<Array<T> *>(this)->node = Node_ptr(reinterpret_cast<Node *>(buf_node));
}
diff --git a/src/backend/cpu/TNJ/BinaryNode.hpp b/src/backend/cpu/TNJ/BinaryNode.hpp
index 5c1f5b6..f86869b 100644
--- a/src/backend/cpu/TNJ/BinaryNode.hpp
+++ b/src/backend/cpu/TNJ/BinaryNode.hpp
@@ -12,6 +12,7 @@
#include <optypes.hpp>
#include <vector>
#include <math.hpp>
+#include "Node.hpp"
namespace cpu
{
@@ -54,6 +55,13 @@ namespace TNJ
return (void *)&m_val;
}
+ void *calc(int idx)
+ {
+ m_val = m_op.eval(*(Ti *)m_lhs->calc(idx),
+ *(Ti *)m_rhs->calc(idx));
+ return (void *)&m_val;
+ }
+
void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
{
if (m_is_eval) return;
@@ -72,6 +80,11 @@ namespace TNJ
m_rhs->reset();
m_is_eval = false;
}
+
+ bool isLinear(const dim_t *dims)
+ {
+ return m_lhs->isLinear(dims) && m_rhs->isLinear(dims);
+ }
};
}
diff --git a/src/backend/cpu/TNJ/BufferNode.hpp b/src/backend/cpu/TNJ/BufferNode.hpp
index 581c047..a215aac 100644
--- a/src/backend/cpu/TNJ/BufferNode.hpp
+++ b/src/backend/cpu/TNJ/BufferNode.hpp
@@ -27,36 +27,43 @@ namespace TNJ
protected:
shared_ptr<T> ptr;
unsigned m_bytes;
- dim_t off;
- dim_t strides[4];
- dim_t dims[4];
-
+ bool m_is_linear;
+ dim_t m_off;
+ dim_t m_strides[4];
+ dim_t m_dims[4];
public:
BufferNode(shared_ptr<T> data,
unsigned bytes,
dim_t data_off,
const dim_t *dms,
- const dim_t *strs) :
+ const dim_t *strs,
+ const bool is_linear) :
Node(),
ptr(data),
m_bytes(bytes),
- off(data_off)
+ m_is_linear(is_linear),
+ m_off(data_off)
{
for (int i = 0; i < 4; i++) {
- strides[i] = strs[i];
- dims[i] = dms[i];
+ m_strides[i] = strs[i];
+ m_dims[i] = dms[i];
}
}
void *calc(int x, int y, int z, int w)
{
dim_t l_off = 0;
- l_off += (w < (int)dims[3]) * w * strides[3];
- l_off += (z < (int)dims[2]) * z * strides[2];
- l_off += (y < (int)dims[1]) * y * strides[1];
- l_off += (x < (int)dims[0]) * x;
- return (void *)(ptr.get() + off + l_off);
+ l_off += (w < (int)m_dims[3]) * w * m_strides[3];
+ l_off += (z < (int)m_dims[2]) * z * m_strides[2];
+ l_off += (y < (int)m_dims[1]) * y * m_strides[1];
+ l_off += (x < (int)m_dims[0]) * x;
+ return (void *)(ptr.get() + m_off + l_off);
+ }
+
+ void *calc(int idx)
+ {
+ return (void *)(ptr.get() + idx + m_off);
}
void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
@@ -74,6 +81,15 @@ namespace TNJ
{
m_is_eval = false;
}
+
+ bool isLinear(const dim_t *dims)
+ {
+ return m_is_linear &&
+ dims[0] == m_dims[0] &&
+ dims[1] == m_dims[1] &&
+ dims[2] == m_dims[2] &&
+ dims[3] == m_dims[3];
+ }
};
}
diff --git a/src/backend/cpu/TNJ/Node.hpp b/src/backend/cpu/TNJ/Node.hpp
index 09faefd..21c672d 100644
--- a/src/backend/cpu/TNJ/Node.hpp
+++ b/src/backend/cpu/TNJ/Node.hpp
@@ -11,7 +11,6 @@
#include <af/array.h>
#include <optypes.hpp>
#include <vector>
-#include "Node.hpp"
#include <memory>
namespace cpu
@@ -35,6 +34,12 @@ namespace TNJ
return NULL;
}
+ virtual void *calc(int idx)
+ {
+ m_is_eval = true;
+ return NULL;
+ }
+
virtual void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
{
len = 0;
@@ -42,6 +47,7 @@ namespace TNJ
bytes = 0;
}
+ virtual bool isLinear(const dim_t *dims) { return true; }
virtual void reset() { m_is_eval = false;}
virtual ~Node() {}
diff --git a/src/backend/cpu/TNJ/ScalarNode.hpp b/src/backend/cpu/TNJ/ScalarNode.hpp
index ee2bfbc..c6527fd 100644
--- a/src/backend/cpu/TNJ/ScalarNode.hpp
+++ b/src/backend/cpu/TNJ/ScalarNode.hpp
@@ -34,6 +34,11 @@ namespace TNJ
return (void *)(&m_val);
}
+ void *calc(int idx)
+ {
+ return (void *)&m_val;
+ }
+
void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
{
if (m_is_eval) return;
@@ -43,6 +48,8 @@ namespace TNJ
}
void reset() { m_is_eval = false; }
+
+ bool isLinear(const dim_t *dims) { return true; }
};
}
diff --git a/src/backend/cpu/TNJ/UnaryNode.hpp b/src/backend/cpu/TNJ/UnaryNode.hpp
index 035a756..4320eb3 100644
--- a/src/backend/cpu/TNJ/UnaryNode.hpp
+++ b/src/backend/cpu/TNJ/UnaryNode.hpp
@@ -52,6 +52,12 @@ namespace TNJ
return (void *)(&m_val);
}
+ void *calc(int idx)
+ {
+ m_val = m_op.eval(*(Ti *)m_child->calc(idx));
+ return (void *)&m_val;
+ }
+
void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
{
if (m_is_eval) return;
@@ -68,6 +74,11 @@ namespace TNJ
m_child->reset();
m_is_eval = false;
}
+
+ bool isLinear(const dim_t *dims)
+ {
+ return m_child->isLinear(dims);
+ }
};
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list