[DRE-commits] [ruby-numo-narray] 06/13: consider alignment. modify loop for simd optimization

Wed Jun 21 08:37:49 UTC 2017

This is an automated email from the git hooks/post-receive script.

uwabami-guest pushed a commit to branch patch-queue/master
in repository ruby-numo-narray.

commit f244001fb810d88d647f45e8a964a353893fd000
Author: Masahiro TANAKA <masa16.tanaka at gmail.com>
Date:   Fri Jun 2 15:14:16 2017 +0900

    consider alignment. modify loop for simd optimization
    
    
    Gbp-Pq: Name 0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch
---
 ext/numo/narray/gen/def/bit.rb          |  1 +
 ext/numo/narray/gen/def/dcomplex.rb     |  1 +
 ext/numo/narray/gen/def/dfloat.rb       |  1 +
 ext/numo/narray/gen/def/int16.rb        |  1 +
 ext/numo/narray/gen/def/int32.rb        |  1 +
 ext/numo/narray/gen/def/int64.rb        |  1 +
 ext/numo/narray/gen/def/int8.rb         |  1 +
 ext/numo/narray/gen/def/robject.rb      |  1 +
 ext/numo/narray/gen/def/scomplex.rb     |  1 +
 ext/numo/narray/gen/def/sfloat.rb       |  1 +
 ext/numo/narray/gen/def/uint16.rb       |  1 +
 ext/numo/narray/gen/def/uint32.rb       |  1 +
 ext/numo/narray/gen/def/uint64.rb       |  1 +
 ext/numo/narray/gen/def/uint8.rb        |  1 +
 ext/numo/narray/gen/tmpl/accum_binary.c | 34 ++++++++++++++++++--------------
 ext/numo/narray/gen/tmpl/binary.c       | 26 +++++++++++-------------
 ext/numo/narray/gen/tmpl/unary.c        | 35 +++++++++++++++++++++++++++------
 ext/numo/narray/numo/template.h         | 15 +++++++++++++-
 18 files changed, 88 insertions(+), 36 deletions(-)

diff --git a/ext/numo/narray/gen/def/bit.rb b/ext/numo/narray/gen/def/bit.rb
index 9173546..c14aeee 100644
--- a/ext/numo/narray/gen/def/bit.rb
+++ b/ext/numo/narray/gen/def/bit.rb
@@ -16,6 +16,7 @@ set is_object:     false
 set is_real:       false
 set is_comparable: false
 set is_double_precision: false
+set need_align:    false
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/dcomplex.rb b/ext/numo/narray/gen/def/dcomplex.rb
index 7ced724..6282faf 100644
--- a/ext/numo/narray/gen/def/dcomplex.rb
+++ b/ext/numo/narray/gen/def/dcomplex.rb
@@ -18,6 +18,7 @@ set is_complex:          true
 set is_object:           false
 set is_comparable:       false
 set is_double_precision: true
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/dfloat.rb b/ext/numo/narray/gen/def/dfloat.rb
index fcbe812..a78a367 100644
--- a/ext/numo/narray/gen/def/dfloat.rb
+++ b/ext/numo/narray/gen/def/dfloat.rb
@@ -16,6 +16,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: true
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/int16.rb b/ext/numo/narray/gen/def/int16.rb
index 99fae54..bbc1b7d 100644
--- a/ext/numo/narray/gen/def/int16.rb
+++ b/ext/numo/narray/gen/def/int16.rb
@@ -15,6 +15,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/int32.rb b/ext/numo/narray/gen/def/int32.rb
index dc519b0..7ea2882 100644
--- a/ext/numo/narray/gen/def/int32.rb
+++ b/ext/numo/narray/gen/def/int32.rb
@@ -15,6 +15,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/int64.rb b/ext/numo/narray/gen/def/int64.rb
index 221bad2..cded10c 100644
--- a/ext/numo/narray/gen/def/int64.rb
+++ b/ext/numo/narray/gen/def/int64.rb
@@ -15,6 +15,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/int8.rb b/ext/numo/narray/gen/def/int8.rb
index 121a205..f171325 100644
--- a/ext/numo/narray/gen/def/int8.rb
+++ b/ext/numo/narray/gen/def/int8.rb
@@ -15,6 +15,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          false
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/robject.rb b/ext/numo/narray/gen/def/robject.rb
index e0996ca..6067d33 100644
--- a/ext/numo/narray/gen/def/robject.rb
+++ b/ext/numo/narray/gen/def/robject.rb
@@ -17,6 +17,7 @@ set is_complex:          false
 set is_object:           true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          false
 
 upcast_rb "Integer"
 upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/scomplex.rb b/ext/numo/narray/gen/def/scomplex.rb
index b0ceffb..63cf678 100644
--- a/ext/numo/narray/gen/def/scomplex.rb
+++ b/ext/numo/narray/gen/def/scomplex.rb
@@ -18,6 +18,7 @@ set is_complex:          true
 set is_object:           false
 set is_comparable:       false
 set is_double_precision: false
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/sfloat.rb b/ext/numo/narray/gen/def/sfloat.rb
index 47be181..cebc46f 100644
--- a/ext/numo/narray/gen/def/sfloat.rb
+++ b/ext/numo/narray/gen/def/sfloat.rb
@@ -16,6 +16,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/uint16.rb b/ext/numo/narray/gen/def/uint16.rb
index 83012c6..000014a 100644
--- a/ext/numo/narray/gen/def/uint16.rb
+++ b/ext/numo/narray/gen/def/uint16.rb
@@ -15,6 +15,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/uint32.rb b/ext/numo/narray/gen/def/uint32.rb
index 1269c4e..6556e11 100644
--- a/ext/numo/narray/gen/def/uint32.rb
+++ b/ext/numo/narray/gen/def/uint32.rb
@@ -15,6 +15,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/uint64.rb b/ext/numo/narray/gen/def/uint64.rb
index 4db9bef..8912e25 100644
--- a/ext/numo/narray/gen/def/uint64.rb
+++ b/ext/numo/narray/gen/def/uint64.rb
@@ -15,6 +15,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          true
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/uint8.rb b/ext/numo/narray/gen/def/uint8.rb
index 39ed153..74b70f3 100644
--- a/ext/numo/narray/gen/def/uint8.rb
+++ b/ext/numo/narray/gen/def/uint8.rb
@@ -15,6 +15,7 @@ set is_object:           false
 set is_real:             true
 set is_comparable:       true
 set is_double_precision: false
+set need_align:          false
 
 upcast_rb "Integer"
 upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/tmpl/accum_binary.c b/ext/numo/narray/gen/tmpl/accum_binary.c
index ed2579f..fa6d562 100644
--- a/ext/numo/narray/gen/tmpl/accum_binary.c
+++ b/ext/numo/narray/gen/tmpl/accum_binary.c
@@ -1,27 +1,31 @@
-<% (is_float ? ["","_nan"] : [""]).each do |j| %>
+//<% (is_float ? ["","_nan"] : [""]).each do |j| %>
 static void
 <%=c_iter%><%=j%>(na_loop_t *const lp)
 {
-    size_t   i;
+    size_t   i, n;
     char    *p1, *p2, *p3;
     ssize_t  s1, s2, s3;
-    dtype    x, y, z;
 
-    INIT_COUNTER(lp, i);
+    INIT_COUNTER(lp, n);
     INIT_PTR(lp, 0, p1, s1);
     INIT_PTR(lp, 1, p2, s2);
     INIT_PTR(lp, 2, p3, s3);
+
     if (s3==0) {
+        dtype z;
         // Reduce loop
         GET_DATA(p3,dtype,z);
-        for (; i--;) {
+        for (i=0; i<n; i++) {
+            dtype x, y;
             GET_DATA_STRIDE(p1,s1,dtype,x);
             GET_DATA_STRIDE(p2,s2,dtype,y);
             m_<%=name%><%=j%>(x,y,z);
         }
         SET_DATA(p3,dtype,z);
+        return;
     } else {
-        for (; i--;) {
+        for (i=0; i<n; i++) {
+            dtype x, y, z;
             GET_DATA_STRIDE(p1,s1,dtype,x);
             GET_DATA_STRIDE(p2,s2,dtype,y);
             GET_DATA(p3,dtype,z);
@@ -30,7 +34,7 @@ static void
         }
     }
 }
-<% end %>
+//<% end %>
 
 static VALUE
 <%=c_func%>_self(int argc, VALUE *argv, VALUE self)
@@ -47,11 +51,11 @@ static VALUE
     // should fix below: [self.ndim,other.ndim].max or?
     naryv[0] = self;
     naryv[1] = argv[0];
-  <% if is_float %>
+    //<% if is_float %>
     reduce = na_reduce_dimension(argc-1, argv+1, 2, naryv, &ndf, <%=c_iter%>_nan);
-  <% else %>
+    //<% else %>
     reduce = na_reduce_dimension(argc-1, argv+1, 2, naryv, &ndf, 0);
-  <% end %>
+    //<% end %>
 
     v =  na_ndloop(&ndf, 4, self, argv[0], reduce, m_<%=name%>_init);
     return <%=type_name%>_extract(v);
@@ -76,15 +80,15 @@ static VALUE
 static VALUE
 <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
 {
-    <% if !is_object %>
+    //<% if !is_object %>
     VALUE klass, v;
-    <% end %>
+    //<% end %>
     if (argc < 1) {
         rb_raise(rb_eArgError,"wrong number of arguments (%d for >=1)",argc);
     }
-    <% if is_object %>
+    //<% if is_object %>
     return <%=c_func%>_self(argc, argv, self);
-    <% else %>
+    //<% else %>
     klass = na_upcast(CLASS_OF(self),CLASS_OF(argv[0]));
     if (klass==cT) {
         return <%=c_func%>_self(argc, argv, self);
@@ -92,5 +96,5 @@ static VALUE
         v = rb_funcall(klass, id_cast, 1, self);
         return rb_funcall2(v, rb_intern("<%=name%>"), argc, argv);
     }
-    <% end %>
+    //<% end %>
 }
diff --git a/ext/numo/narray/gen/tmpl/binary.c b/ext/numo/narray/gen/tmpl/binary.c
index 302635b..060b9c0 100644
--- a/ext/numo/narray/gen/tmpl/binary.c
+++ b/ext/numo/narray/gen/tmpl/binary.c
@@ -20,11 +20,10 @@ static void
     INIT_PTR(lp, 1, p2, s2);
     INIT_PTR(lp, 2, p3, s3);
 
-<% if /Int8$/ !~ class_name %>
-    if ((size_t)p1 % sizeof(dtype) == 0 &&
-        (size_t)p2 % sizeof(dtype) == 0 &&
-        (size_t)p3 % sizeof(dtype) == 0 ) {
-<% end %>
+    //<% if need_align %>
+    if (is_aligned(p1,sizeof(dtype)) &&
+        is_aligned(p2,sizeof(dtype)) &&
+        is_aligned(p3,sizeof(dtype)) ) {
 
         if (s1 == sizeof(dtype) &&
             s2 == sizeof(dtype) &&
@@ -35,11 +34,11 @@ static void
                 ((dtype*)p3)[i] = m_<%=name%>(((dtype*)p1)[i],((dtype*)p2)[i]);
             }
             return;
-        } else
-        if (s1 % sizeof(dtype) == 0 &&
-            s2 % sizeof(dtype) == 0 &&
-            s3 % sizeof(dtype) == 0 ) {
-
+        }
+        if (is_aligned_step(s1,sizeof(dtype)) &&
+            is_aligned_step(s2,sizeof(dtype)) &&
+            is_aligned_step(s3,sizeof(dtype)) ) {
+            //<% end %>
             for (i=0; i<n; i++) {
                 check_intdivzero(*(dtype*)p2);
                 *(dtype*)p3 = m_<%=name%>(*(dtype*)p1,*(dtype*)p2);
@@ -48,11 +47,10 @@ static void
                 p3 += s3;
             }
             return;
+            //<% if need_align %>
         }
-
-<% if /Int8$/ !~ class_name %>
     }
-    for (i=0; i<n; i+=2) {
+    for (i=0; i<n; i++) {
         dtype x, y, z;
         GET_DATA_STRIDE(p1,s1,dtype,x);
         GET_DATA_STRIDE(p2,s2,dtype,y);
@@ -60,7 +58,7 @@ static void
         z = m_<%=name%>(x,y);
         SET_DATA_STRIDE(p3,s3,dtype,z);
     }
-<% end %>
+    //<% end %>
 }
 #undef check_intdivzero
 
diff --git a/ext/numo/narray/gen/tmpl/unary.c b/ext/numo/narray/gen/tmpl/unary.c
index 382c638..0dcc94c 100644
--- a/ext/numo/narray/gen/tmpl/unary.c
+++ b/ext/numo/narray/gen/tmpl/unary.c
@@ -1,25 +1,25 @@
 static void
 <%=c_iter%>(na_loop_t *const lp)
 {
-    size_t  i;
+    size_t  i, n;
     char   *p1, *p2;
     ssize_t s1, s2;
     size_t *idx1, *idx2;
     dtype   x;
 
-    INIT_COUNTER(lp, i);
+    INIT_COUNTER(lp, n);
     INIT_PTR_IDX(lp, 0, p1, s1, idx1);
     INIT_PTR_IDX(lp, 1, p2, s2, idx2);
 
     if (idx1) {
         if (idx2) {
-            for (; i--;) {
+            for (i=0; i<n; i++) {
                 GET_DATA_INDEX(p1,idx1,dtype,x);
                 x = m_<%=name%>(x);
                 SET_DATA_INDEX(p2,idx2,dtype,x);
             }
         } else {
-            for (; i--;) {
+            for (i=0; i<n; i++) {
                 GET_DATA_INDEX(p1,idx1,dtype,x);
                 x = m_<%=name%>(x);
                 SET_DATA_STRIDE(p2,s2,dtype,x);
@@ -27,17 +27,40 @@ static void
         }
     } else {
         if (idx2) {
-            for (; i--;) {
+            for (i=0; i<n; i++) {
                 GET_DATA_STRIDE(p1,s1,dtype,x);
                 x = m_<%=name%>(x);
                 SET_DATA_INDEX(p2,idx2,dtype,x);
             }
         } else {
-            for (; i--;) {
+            //<% if need_align %>
+            if (is_aligned(p1,sizeof(dtype)) &&
+                is_aligned(p2,sizeof(dtype)) ) {
+                if (s1 == sizeof(dtype) &&
+                    s2 == sizeof(dtype) ) {
+                    for (i=0; i<n; i++) {
+                        ((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
+                    }
+                    return;
+                }
+                if (is_aligned_step(s1,sizeof(dtype)) &&
+                    is_aligned_step(s2,sizeof(dtype)) ) {
+                    //<% end %>
+                    for (i=0; i<n; i++) {
+                        *(dtype*)p2 = m_<%=name%>(*(dtype*)p1);
+                        p1 += s1;
+                        p2 += s2;
+                    }
+                    return;
+                    //<% if need_align %>
+                }
+            }
+            for (i=0; i<n; i++) {
                 GET_DATA_STRIDE(p1,s1,dtype,x);
                 x = m_<%=name%>(x);
                 SET_DATA_STRIDE(p2,s2,dtype,x);
             }
+            //<% end %>
         }
     }
 }
diff --git a/ext/numo/narray/numo/template.h b/ext/numo/narray/numo/template.h
index 2fd6e0e..1d69996 100644
--- a/ext/numo/narray/numo/template.h
+++ b/ext/numo/narray/numo/template.h
@@ -133,4 +133,17 @@
     }
 // val -> val&1 ??
 
-#endif /* ifndef NARRAY_H */
+static inline int
+is_aligned(const void *ptr, const size_t alignment)
+{
+    return ((size_t)(ptr) & ((alignment)-1)) == 0;
+}
+
+static inline int
+is_aligned_step(const ssize_t step, const size_t alignment)
+{
+    return ((step) & ((alignment)-1)) == 0;
+}
+
+
+#endif /* ifndef TEMPLATE_H */

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ruby-extras/ruby-numo-narray.git