[DRE-commits] [ruby-numo-narray] 06/13: consider alignment. modify loop for simd optimization
Youhei SASAKI
uwabami-guest at moszumanska.debian.org
Wed Jun 21 08:37:49 UTC 2017
This is an automated email from the git hooks/post-receive script.
uwabami-guest pushed a commit to branch patch-queue/master
in repository ruby-numo-narray.
commit f244001fb810d88d647f45e8a964a353893fd000
Author: Masahiro TANAKA <masa16.tanaka at gmail.com>
Date: Fri Jun 2 15:14:16 2017 +0900
consider alignment. modify loop for simd optimization
Gbp-Pq: Name 0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch
---
ext/numo/narray/gen/def/bit.rb | 1 +
ext/numo/narray/gen/def/dcomplex.rb | 1 +
ext/numo/narray/gen/def/dfloat.rb | 1 +
ext/numo/narray/gen/def/int16.rb | 1 +
ext/numo/narray/gen/def/int32.rb | 1 +
ext/numo/narray/gen/def/int64.rb | 1 +
ext/numo/narray/gen/def/int8.rb | 1 +
ext/numo/narray/gen/def/robject.rb | 1 +
ext/numo/narray/gen/def/scomplex.rb | 1 +
ext/numo/narray/gen/def/sfloat.rb | 1 +
ext/numo/narray/gen/def/uint16.rb | 1 +
ext/numo/narray/gen/def/uint32.rb | 1 +
ext/numo/narray/gen/def/uint64.rb | 1 +
ext/numo/narray/gen/def/uint8.rb | 1 +
ext/numo/narray/gen/tmpl/accum_binary.c | 34 ++++++++++++++++++--------------
ext/numo/narray/gen/tmpl/binary.c | 26 +++++++++++-------------
ext/numo/narray/gen/tmpl/unary.c | 35 +++++++++++++++++++++++++++------
ext/numo/narray/numo/template.h | 15 +++++++++++++-
18 files changed, 88 insertions(+), 36 deletions(-)
diff --git a/ext/numo/narray/gen/def/bit.rb b/ext/numo/narray/gen/def/bit.rb
index 9173546..c14aeee 100644
--- a/ext/numo/narray/gen/def/bit.rb
+++ b/ext/numo/narray/gen/def/bit.rb
@@ -16,6 +16,7 @@ set is_object: false
set is_real: false
set is_comparable: false
set is_double_precision: false
+set need_align: false
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/dcomplex.rb b/ext/numo/narray/gen/def/dcomplex.rb
index 7ced724..6282faf 100644
--- a/ext/numo/narray/gen/def/dcomplex.rb
+++ b/ext/numo/narray/gen/def/dcomplex.rb
@@ -18,6 +18,7 @@ set is_complex: true
set is_object: false
set is_comparable: false
set is_double_precision: true
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/dfloat.rb b/ext/numo/narray/gen/def/dfloat.rb
index fcbe812..a78a367 100644
--- a/ext/numo/narray/gen/def/dfloat.rb
+++ b/ext/numo/narray/gen/def/dfloat.rb
@@ -16,6 +16,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: true
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/int16.rb b/ext/numo/narray/gen/def/int16.rb
index 99fae54..bbc1b7d 100644
--- a/ext/numo/narray/gen/def/int16.rb
+++ b/ext/numo/narray/gen/def/int16.rb
@@ -15,6 +15,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/int32.rb b/ext/numo/narray/gen/def/int32.rb
index dc519b0..7ea2882 100644
--- a/ext/numo/narray/gen/def/int32.rb
+++ b/ext/numo/narray/gen/def/int32.rb
@@ -15,6 +15,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/int64.rb b/ext/numo/narray/gen/def/int64.rb
index 221bad2..cded10c 100644
--- a/ext/numo/narray/gen/def/int64.rb
+++ b/ext/numo/narray/gen/def/int64.rb
@@ -15,6 +15,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/int8.rb b/ext/numo/narray/gen/def/int8.rb
index 121a205..f171325 100644
--- a/ext/numo/narray/gen/def/int8.rb
+++ b/ext/numo/narray/gen/def/int8.rb
@@ -15,6 +15,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: false
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/robject.rb b/ext/numo/narray/gen/def/robject.rb
index e0996ca..6067d33 100644
--- a/ext/numo/narray/gen/def/robject.rb
+++ b/ext/numo/narray/gen/def/robject.rb
@@ -17,6 +17,7 @@ set is_complex: false
set is_object: true
set is_comparable: true
set is_double_precision: false
+set need_align: false
upcast_rb "Integer"
upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/scomplex.rb b/ext/numo/narray/gen/def/scomplex.rb
index b0ceffb..63cf678 100644
--- a/ext/numo/narray/gen/def/scomplex.rb
+++ b/ext/numo/narray/gen/def/scomplex.rb
@@ -18,6 +18,7 @@ set is_complex: true
set is_object: false
set is_comparable: false
set is_double_precision: false
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/sfloat.rb b/ext/numo/narray/gen/def/sfloat.rb
index 47be181..cebc46f 100644
--- a/ext/numo/narray/gen/def/sfloat.rb
+++ b/ext/numo/narray/gen/def/sfloat.rb
@@ -16,6 +16,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float"
diff --git a/ext/numo/narray/gen/def/uint16.rb b/ext/numo/narray/gen/def/uint16.rb
index 83012c6..000014a 100644
--- a/ext/numo/narray/gen/def/uint16.rb
+++ b/ext/numo/narray/gen/def/uint16.rb
@@ -15,6 +15,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/uint32.rb b/ext/numo/narray/gen/def/uint32.rb
index 1269c4e..6556e11 100644
--- a/ext/numo/narray/gen/def/uint32.rb
+++ b/ext/numo/narray/gen/def/uint32.rb
@@ -15,6 +15,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/uint64.rb b/ext/numo/narray/gen/def/uint64.rb
index 4db9bef..8912e25 100644
--- a/ext/numo/narray/gen/def/uint64.rb
+++ b/ext/numo/narray/gen/def/uint64.rb
@@ -15,6 +15,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: true
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/def/uint8.rb b/ext/numo/narray/gen/def/uint8.rb
index 39ed153..74b70f3 100644
--- a/ext/numo/narray/gen/def/uint8.rb
+++ b/ext/numo/narray/gen/def/uint8.rb
@@ -15,6 +15,7 @@ set is_object: false
set is_real: true
set is_comparable: true
set is_double_precision: false
+set need_align: false
upcast_rb "Integer"
upcast_rb "Float", "DFloat"
diff --git a/ext/numo/narray/gen/tmpl/accum_binary.c b/ext/numo/narray/gen/tmpl/accum_binary.c
index ed2579f..fa6d562 100644
--- a/ext/numo/narray/gen/tmpl/accum_binary.c
+++ b/ext/numo/narray/gen/tmpl/accum_binary.c
@@ -1,27 +1,31 @@
-<% (is_float ? ["","_nan"] : [""]).each do |j| %>
+//<% (is_float ? ["","_nan"] : [""]).each do |j| %>
static void
<%=c_iter%><%=j%>(na_loop_t *const lp)
{
- size_t i;
+ size_t i, n;
char *p1, *p2, *p3;
ssize_t s1, s2, s3;
- dtype x, y, z;
- INIT_COUNTER(lp, i);
+ INIT_COUNTER(lp, n);
INIT_PTR(lp, 0, p1, s1);
INIT_PTR(lp, 1, p2, s2);
INIT_PTR(lp, 2, p3, s3);
+
if (s3==0) {
+ dtype z;
// Reduce loop
GET_DATA(p3,dtype,z);
- for (; i--;) {
+ for (i=0; i<n; i++) {
+ dtype x, y;
GET_DATA_STRIDE(p1,s1,dtype,x);
GET_DATA_STRIDE(p2,s2,dtype,y);
m_<%=name%><%=j%>(x,y,z);
}
SET_DATA(p3,dtype,z);
+ return;
} else {
- for (; i--;) {
+ for (i=0; i<n; i++) {
+ dtype x, y, z;
GET_DATA_STRIDE(p1,s1,dtype,x);
GET_DATA_STRIDE(p2,s2,dtype,y);
GET_DATA(p3,dtype,z);
@@ -30,7 +34,7 @@ static void
}
}
}
-<% end %>
+//<% end %>
static VALUE
<%=c_func%>_self(int argc, VALUE *argv, VALUE self)
@@ -47,11 +51,11 @@ static VALUE
// should fix below: [self.ndim,other.ndim].max or?
naryv[0] = self;
naryv[1] = argv[0];
- <% if is_float %>
+ //<% if is_float %>
reduce = na_reduce_dimension(argc-1, argv+1, 2, naryv, &ndf, <%=c_iter%>_nan);
- <% else %>
+ //<% else %>
reduce = na_reduce_dimension(argc-1, argv+1, 2, naryv, &ndf, 0);
- <% end %>
+ //<% end %>
v = na_ndloop(&ndf, 4, self, argv[0], reduce, m_<%=name%>_init);
return <%=type_name%>_extract(v);
@@ -76,15 +80,15 @@ static VALUE
static VALUE
<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
{
- <% if !is_object %>
+ //<% if !is_object %>
VALUE klass, v;
- <% end %>
+ //<% end %>
if (argc < 1) {
rb_raise(rb_eArgError,"wrong number of arguments (%d for >=1)",argc);
}
- <% if is_object %>
+ //<% if is_object %>
return <%=c_func%>_self(argc, argv, self);
- <% else %>
+ //<% else %>
klass = na_upcast(CLASS_OF(self),CLASS_OF(argv[0]));
if (klass==cT) {
return <%=c_func%>_self(argc, argv, self);
@@ -92,5 +96,5 @@ static VALUE
v = rb_funcall(klass, id_cast, 1, self);
return rb_funcall2(v, rb_intern("<%=name%>"), argc, argv);
}
- <% end %>
+ //<% end %>
}
diff --git a/ext/numo/narray/gen/tmpl/binary.c b/ext/numo/narray/gen/tmpl/binary.c
index 302635b..060b9c0 100644
--- a/ext/numo/narray/gen/tmpl/binary.c
+++ b/ext/numo/narray/gen/tmpl/binary.c
@@ -20,11 +20,10 @@ static void
INIT_PTR(lp, 1, p2, s2);
INIT_PTR(lp, 2, p3, s3);
-<% if /Int8$/ !~ class_name %>
- if ((size_t)p1 % sizeof(dtype) == 0 &&
- (size_t)p2 % sizeof(dtype) == 0 &&
- (size_t)p3 % sizeof(dtype) == 0 ) {
-<% end %>
+ //<% if need_align %>
+ if (is_aligned(p1,sizeof(dtype)) &&
+ is_aligned(p2,sizeof(dtype)) &&
+ is_aligned(p3,sizeof(dtype)) ) {
if (s1 == sizeof(dtype) &&
s2 == sizeof(dtype) &&
@@ -35,11 +34,11 @@ static void
((dtype*)p3)[i] = m_<%=name%>(((dtype*)p1)[i],((dtype*)p2)[i]);
}
return;
- } else
- if (s1 % sizeof(dtype) == 0 &&
- s2 % sizeof(dtype) == 0 &&
- s3 % sizeof(dtype) == 0 ) {
-
+ }
+ if (is_aligned_step(s1,sizeof(dtype)) &&
+ is_aligned_step(s2,sizeof(dtype)) &&
+ is_aligned_step(s3,sizeof(dtype)) ) {
+ //<% end %>
for (i=0; i<n; i++) {
check_intdivzero(*(dtype*)p2);
*(dtype*)p3 = m_<%=name%>(*(dtype*)p1,*(dtype*)p2);
@@ -48,11 +47,10 @@ static void
p3 += s3;
}
return;
+ //<% if need_align %>
}
-
-<% if /Int8$/ !~ class_name %>
}
- for (i=0; i<n; i+=2) {
+ for (i=0; i<n; i++) {
dtype x, y, z;
GET_DATA_STRIDE(p1,s1,dtype,x);
GET_DATA_STRIDE(p2,s2,dtype,y);
@@ -60,7 +58,7 @@ static void
z = m_<%=name%>(x,y);
SET_DATA_STRIDE(p3,s3,dtype,z);
}
-<% end %>
+ //<% end %>
}
#undef check_intdivzero
diff --git a/ext/numo/narray/gen/tmpl/unary.c b/ext/numo/narray/gen/tmpl/unary.c
index 382c638..0dcc94c 100644
--- a/ext/numo/narray/gen/tmpl/unary.c
+++ b/ext/numo/narray/gen/tmpl/unary.c
@@ -1,25 +1,25 @@
static void
<%=c_iter%>(na_loop_t *const lp)
{
- size_t i;
+ size_t i, n;
char *p1, *p2;
ssize_t s1, s2;
size_t *idx1, *idx2;
dtype x;
- INIT_COUNTER(lp, i);
+ INIT_COUNTER(lp, n);
INIT_PTR_IDX(lp, 0, p1, s1, idx1);
INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx1) {
if (idx2) {
- for (; i--;) {
+ for (i=0; i<n; i++) {
GET_DATA_INDEX(p1,idx1,dtype,x);
x = m_<%=name%>(x);
SET_DATA_INDEX(p2,idx2,dtype,x);
}
} else {
- for (; i--;) {
+ for (i=0; i<n; i++) {
GET_DATA_INDEX(p1,idx1,dtype,x);
x = m_<%=name%>(x);
SET_DATA_STRIDE(p2,s2,dtype,x);
@@ -27,17 +27,40 @@ static void
}
} else {
if (idx2) {
- for (; i--;) {
+ for (i=0; i<n; i++) {
GET_DATA_STRIDE(p1,s1,dtype,x);
x = m_<%=name%>(x);
SET_DATA_INDEX(p2,idx2,dtype,x);
}
} else {
- for (; i--;) {
+ //<% if need_align %>
+ if (is_aligned(p1,sizeof(dtype)) &&
+ is_aligned(p2,sizeof(dtype)) ) {
+ if (s1 == sizeof(dtype) &&
+ s2 == sizeof(dtype) ) {
+ for (i=0; i<n; i++) {
+ ((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
+ }
+ return;
+ }
+ if (is_aligned_step(s1,sizeof(dtype)) &&
+ is_aligned_step(s2,sizeof(dtype)) ) {
+ //<% end %>
+ for (i=0; i<n; i++) {
+ *(dtype*)p2 = m_<%=name%>(*(dtype*)p1);
+ p1 += s1;
+ p2 += s2;
+ }
+ return;
+ //<% if need_align %>
+ }
+ }
+ for (i=0; i<n; i++) {
GET_DATA_STRIDE(p1,s1,dtype,x);
x = m_<%=name%>(x);
SET_DATA_STRIDE(p2,s2,dtype,x);
}
+ //<% end %>
}
}
}
diff --git a/ext/numo/narray/numo/template.h b/ext/numo/narray/numo/template.h
index 2fd6e0e..1d69996 100644
--- a/ext/numo/narray/numo/template.h
+++ b/ext/numo/narray/numo/template.h
@@ -133,4 +133,17 @@
}
// val -> val&1 ??
-#endif /* ifndef NARRAY_H */
+static inline int
+is_aligned(const void *ptr, const size_t alignment)
+{
+ return ((size_t)(ptr) & ((alignment)-1)) == 0;
+}
+
+static inline int
+is_aligned_step(const ssize_t step, const size_t alignment)
+{
+ return ((step) & ((alignment)-1)) == 0;
+}
+
+
+#endif /* ifndef TEMPLATE_H */
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ruby-extras/ruby-numo-narray.git
More information about the Pkg-ruby-extras-commits
mailing list