[arrayfire] 235/408: PERF: improvements to element wise operations in CPU backend

Mon Sep 21 19:12:04 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 26a2e408a747bd17c4722e2b9832ff13d0479635
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Wed Aug 12 15:33:16 2015 -0400

    PERF: improvements to element wise operations in CPU backend
    
    - Improved performance when all buffers can be indexed linearly
---
 src/backend/cpu/Array.cpp          | 51 +++++++++++++++++++++++++-------------
 src/backend/cpu/TNJ/BinaryNode.hpp | 13 ++++++++++
 src/backend/cpu/TNJ/BufferNode.hpp | 42 +++++++++++++++++++++----------
 src/backend/cpu/TNJ/Node.hpp       |  8 +++++-
 src/backend/cpu/TNJ/ScalarNode.hpp |  7 ++++++
 src/backend/cpu/TNJ/UnaryNode.hpp  | 11 ++++++++
 6 files changed, 101 insertions(+), 31 deletions(-)

diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 56bd9f8..683fc1a 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -65,38 +65,54 @@ namespace cpu
     { }
 
     template<typename T>
-    void Array<T>::eval()
+    std::shared_ptr<T> evalNodes(const int &num,
+                                 const dim4 &odims,
+                                 const dim4 &ostrs,
+                                 TNJ::Node_ptr &node)
     {
-        if (isReady()) return;
 
-        this->setId(getActiveDeviceId());
-        data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
+        std::shared_ptr<T> data(memAlloc<T>(num), memFree<T>);
         T *ptr = data.get();
 
-        dim4 ostrs = strides();
-        dim4 odims = dims();
+        bool is_linear = node->isLinear(odims.get());
 
-        for (int w = 0; w < (int)odims[3]; w++) {
-            dim_t offw = w * ostrs[3];
+        if (is_linear) {
+            for (int i = 0; i < num; i++) {
+                ptr[i] = *(T *)node->calc(i);
+            }
+        } else {
+            for (int w = 0; w < (int)odims[3]; w++) {
+                dim_t offw = w * ostrs[3];
 
-            for (int z = 0; z < (int)odims[2]; z++) {
-                dim_t offz = z * ostrs[2] + offw;
+                for (int z = 0; z < (int)odims[2]; z++) {
+                    dim_t offz = z * ostrs[2] + offw;
 
-                for (int y = 0; y < (int)odims[1]; y++) {
-                    dim_t offy = y * ostrs[1] + offz;
+                    for (int y = 0; y < (int)odims[1]; y++) {
+                        dim_t offy = y * ostrs[1] + offz;
 
-                    for (int x = 0; x < (int)odims[0]; x++) {
-                        dim_t id = x + offy;
+                        for (int x = 0; x < (int)odims[0]; x++) {
+                            dim_t id = x + offy;
 
-                        ptr[id] = *(T *)node->calc(x, y, z, w);
+                            ptr[id] = *(T *)node->calc(x, y, z, w);
+                        }
                     }
                 }
             }
         }
 
+        return data;
+    }
 
-        ready = true;
+    template<typename T>
+    void Array<T>::eval()
+    {
+        if (isReady()) return;
 
+        this->setId(getActiveDeviceId());
+
+        data = evalNodes<T>(elements(), dims(), strides(), node);
+
+        ready = true;
         Node_ptr prev = node;
         prev->reset();
         // FIXME: Replace the current node in any JIT possible trees with the new BufferNode
@@ -121,7 +137,8 @@ namespace cpu
                                                         bytes,
                                                         offset,
                                                         dims().get(),
-                                                        strides().get());
+                                                        strides().get(),
+                                                        isLinear());
 
             const_cast<Array<T> *>(this)->node = Node_ptr(reinterpret_cast<Node *>(buf_node));
         }
diff --git a/src/backend/cpu/TNJ/BinaryNode.hpp b/src/backend/cpu/TNJ/BinaryNode.hpp
index 5c1f5b6..f86869b 100644
--- a/src/backend/cpu/TNJ/BinaryNode.hpp
+++ b/src/backend/cpu/TNJ/BinaryNode.hpp
@@ -12,6 +12,7 @@
 #include <optypes.hpp>
 #include <vector>
 #include <math.hpp>
+#include "Node.hpp"
 
 namespace cpu
 {
@@ -54,6 +55,13 @@ namespace TNJ
             return  (void *)&m_val;
         }
 
+        void *calc(int idx)
+        {
+            m_val = m_op.eval(*(Ti *)m_lhs->calc(idx),
+                              *(Ti *)m_rhs->calc(idx));
+            return (void *)&m_val;
+        }
+
         void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
         {
             if (m_is_eval) return;
@@ -72,6 +80,11 @@ namespace TNJ
             m_rhs->reset();
             m_is_eval = false;
         }
+
+        bool isLinear(const dim_t *dims)
+        {
+            return m_lhs->isLinear(dims) && m_rhs->isLinear(dims);
+        }
     };
 
 }
diff --git a/src/backend/cpu/TNJ/BufferNode.hpp b/src/backend/cpu/TNJ/BufferNode.hpp
index 581c047..a215aac 100644
--- a/src/backend/cpu/TNJ/BufferNode.hpp
+++ b/src/backend/cpu/TNJ/BufferNode.hpp
@@ -27,36 +27,43 @@ namespace TNJ
     protected:
         shared_ptr<T> ptr;
         unsigned m_bytes;
-        dim_t off;
-        dim_t strides[4];
-        dim_t dims[4];
-
+        bool m_is_linear;
+        dim_t m_off;
+        dim_t m_strides[4];
+        dim_t m_dims[4];
     public:
 
         BufferNode(shared_ptr<T> data,
                    unsigned bytes,
                    dim_t data_off,
                    const dim_t *dms,
-                   const dim_t *strs) :
+                   const dim_t *strs,
+                   const bool is_linear) :
             Node(),
             ptr(data),
             m_bytes(bytes),
-            off(data_off)
+            m_is_linear(is_linear),
+            m_off(data_off)
         {
             for (int i = 0; i < 4; i++) {
-                strides[i] = strs[i];
-                dims[i] = dms[i];
+                m_strides[i] = strs[i];
+                m_dims[i] = dms[i];
             }
         }
 
         void *calc(int x, int y, int z, int w)
         {
             dim_t l_off = 0;
-            l_off += (w < (int)dims[3]) * w * strides[3];
-            l_off += (z < (int)dims[2]) * z * strides[2];
-            l_off += (y < (int)dims[1]) * y * strides[1];
-            l_off += (x < (int)dims[0]) * x;
-            return (void *)(ptr.get() + off + l_off);
+            l_off += (w < (int)m_dims[3]) * w * m_strides[3];
+            l_off += (z < (int)m_dims[2]) * z * m_strides[2];
+            l_off += (y < (int)m_dims[1]) * y * m_strides[1];
+            l_off += (x < (int)m_dims[0]) * x;
+            return (void *)(ptr.get() + m_off + l_off);
+        }
+
+        void *calc(int idx)
+        {
+            return (void *)(ptr.get() + idx + m_off);
         }
 
         void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
@@ -74,6 +81,15 @@ namespace TNJ
         {
             m_is_eval = false;
         }
+
+        bool isLinear(const dim_t *dims)
+        {
+            return m_is_linear &&
+                dims[0] == m_dims[0] &&
+                dims[1] == m_dims[1] &&
+                dims[2] == m_dims[2] &&
+                dims[3] == m_dims[3];
+        }
     };
 
 }
diff --git a/src/backend/cpu/TNJ/Node.hpp b/src/backend/cpu/TNJ/Node.hpp
index 09faefd..21c672d 100644
--- a/src/backend/cpu/TNJ/Node.hpp
+++ b/src/backend/cpu/TNJ/Node.hpp
@@ -11,7 +11,6 @@
 #include <af/array.h>
 #include <optypes.hpp>
 #include <vector>
-#include "Node.hpp"
 #include <memory>
 
 namespace cpu
@@ -35,6 +34,12 @@ namespace TNJ
             return NULL;
         }
 
+        virtual void *calc(int idx)
+        {
+            m_is_eval = true;
+            return NULL;
+        }
+
         virtual void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
         {
             len = 0;
@@ -42,6 +47,7 @@ namespace TNJ
             bytes = 0;
         }
 
+        virtual bool isLinear(const dim_t *dims) { return true; }
         virtual void reset() { m_is_eval = false;}
 
         virtual ~Node() {}
diff --git a/src/backend/cpu/TNJ/ScalarNode.hpp b/src/backend/cpu/TNJ/ScalarNode.hpp
index ee2bfbc..c6527fd 100644
--- a/src/backend/cpu/TNJ/ScalarNode.hpp
+++ b/src/backend/cpu/TNJ/ScalarNode.hpp
@@ -34,6 +34,11 @@ namespace TNJ
             return (void *)(&m_val);
         }
 
+        void *calc(int idx)
+        {
+            return (void *)&m_val;
+        }
+
         void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
         {
             if (m_is_eval) return;
@@ -43,6 +48,8 @@ namespace TNJ
         }
 
         void reset() { m_is_eval = false; }
+
+        bool isLinear(const dim_t *dims) { return true; }
     };
 }
 
diff --git a/src/backend/cpu/TNJ/UnaryNode.hpp b/src/backend/cpu/TNJ/UnaryNode.hpp
index 035a756..4320eb3 100644
--- a/src/backend/cpu/TNJ/UnaryNode.hpp
+++ b/src/backend/cpu/TNJ/UnaryNode.hpp
@@ -52,6 +52,12 @@ namespace TNJ
             return (void *)(&m_val);
         }
 
+        void *calc(int idx)
+        {
+            m_val = m_op.eval(*(Ti *)m_child->calc(idx));
+            return (void *)&m_val;
+        }
+
         void getInfo(unsigned &len, unsigned &buf_count, unsigned &bytes)
         {
             if (m_is_eval) return;
@@ -68,6 +74,11 @@ namespace TNJ
             m_child->reset();
             m_is_eval = false;
         }
+
+        bool isLinear(const dim_t *dims)
+        {
+            return m_child->isLinear(dims);
+        }
     };
 
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git