[DRE-commits] [ruby-numo-narray] 01/13: Import Git HEAD: 2017-06-09 761e796
Youhei SASAKI
uwabami-guest at moszumanska.debian.org
Wed Jun 21 08:37:48 UTC 2017
This is an automated email from the git hooks/post-receive script.
uwabami-guest pushed a commit to branch patch-queue/master
in repository ruby-numo-narray.
commit 48638b1aa2c2bf6969094ac4a1b218b81766922d
Author: Youhei SASAKI <uwabami at gfd-dennou.org>
Date: Wed Jun 21 17:05:40 2017 +0900
Import Git HEAD: 2017-06-09 761e796
Signed-off-by: Youhei SASAKI <uwabami at gfd-dennou.org>
---
debian/patches/0002-load-numo-extra-in-rspec.patch | 24 ++
.../0003-modify-loop-for-SIMD-optimization.patch | 103 +++++
.../0004-not_nan-macro-for-fast-NaN-check.patch | 329 +++++++++++++++
debian/patches/0005-reduce-alloc-times.patch | 105 +++++
...ignment.-modify-loop-for-simd-optimizatio.patch | 450 +++++++++++++++++++++
.../0007-fix-bug-in-methods-min-max_index.patch | 34 ++
.../patches/0008-fix-cheking-negative-index.patch | 56 +++
.../0009-new-error-Numo-NArray-ValueError.patch | 242 +++++++++++
debian/patches/0010-add-nary_eValueError.patch | 24 ++
...ort_index-avoid-zero-division-when-n-1-38.patch | 37 ++
...num-of-inputs-should-be-lp-nin-not-nf-nin.patch | 25 ++
debian/patches/series | 11 +
12 files changed, 1440 insertions(+)
diff --git a/debian/patches/0002-load-numo-extra-in-rspec.patch b/debian/patches/0002-load-numo-extra-in-rspec.patch
new file mode 100644
index 0000000..27b6eba
--- /dev/null
+++ b/debian/patches/0002-load-numo-extra-in-rspec.patch
@@ -0,0 +1,24 @@
+From 3650f44660039f7a4f392dd8988a1806671404fc Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 1 Jun 2017 08:43:38 +0900
+Subject: [PATCH 02/12] load numo/extra in rspec
+
+---
+ spec/narray_spec.rb | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/spec/narray_spec.rb b/spec/narray_spec.rb
+index 6fec60e..237b165 100644
+--- a/spec/narray_spec.rb
++++ b/spec/narray_spec.rb
+@@ -1,4 +1,6 @@
+-require File.join(File.dirname(__FILE__), "../ext/numo/narray/narray")
++d=File.dirname(__FILE__)
++require File.join(d, "../ext/numo/narray/narray")
++require File.join(d, "../lib/numo/narray/extra")
+ #Numo::NArray.debug = true
+
+ RSpec.configure do |config|
+--
+2.11.0
+
diff --git a/debian/patches/0003-modify-loop-for-SIMD-optimization.patch b/debian/patches/0003-modify-loop-for-SIMD-optimization.patch
new file mode 100644
index 0000000..6f2c827
--- /dev/null
+++ b/debian/patches/0003-modify-loop-for-SIMD-optimization.patch
@@ -0,0 +1,103 @@
+From 5d0e677447f2d348c563d06788e65f49b41e4ce4 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 1 Jun 2017 09:29:02 +0900
+Subject: [PATCH 03/12] modify loop for SIMD optimization
+
+---
+ ext/numo/narray/gen/tmpl/binary.c | 62 ++++++++++++++++++++++++++++++++-------
+ 1 file changed, 52 insertions(+), 10 deletions(-)
+
+diff --git a/ext/numo/narray/gen/tmpl/binary.c b/ext/numo/narray/gen/tmpl/binary.c
+index 135f7b4..302635b 100644
+--- a/ext/numo/narray/gen/tmpl/binary.c
++++ b/ext/numo/narray/gen/tmpl/binary.c
+@@ -1,27 +1,68 @@
++<% if is_int and %w[div mod divmod].include? name %>
++#define check_intdivzero(y) \
++ if ((y)==0) { \
++ lp->err_type = rb_eZeroDivError; \
++ return; \
++ }
++<% else %>
++#define check_intdivzero(y) {}
++<% end %>
++
+ static void
+ <%=c_iter%>(na_loop_t *const lp)
+ {
+ size_t i, n;
+ char *p1, *p2, *p3;
+ ssize_t s1, s2, s3;
+- dtype x, y;
++
+ INIT_COUNTER(lp, n);
+ INIT_PTR(lp, 0, p1, s1);
+ INIT_PTR(lp, 1, p2, s2);
+ INIT_PTR(lp, 2, p3, s3);
+- for (i=n; i--;) {
+- GET_DATA_STRIDE(p1,s1,dtype,x);
+- GET_DATA_STRIDE(p2,s2,dtype,y);
+-<% if is_int and %w[div mod divmod].include? name %>
+- if (y==0) {
+- lp->err_type = rb_eZeroDivError;
++
++<% if /Int8$/ !~ class_name %>
++ if ((size_t)p1 % sizeof(dtype) == 0 &&
++ (size_t)p2 % sizeof(dtype) == 0 &&
++ (size_t)p3 % sizeof(dtype) == 0 ) {
++<% end %>
++
++ if (s1 == sizeof(dtype) &&
++ s2 == sizeof(dtype) &&
++ s3 == sizeof(dtype) ) {
++
++ for (i=0; i<n; i++) {
++ check_intdivzero(*(dtype*)p2);
++ ((dtype*)p3)[i] = m_<%=name%>(((dtype*)p1)[i],((dtype*)p2)[i]);
++ }
++ return;
++ } else
++ if (s1 % sizeof(dtype) == 0 &&
++ s2 % sizeof(dtype) == 0 &&
++ s3 % sizeof(dtype) == 0 ) {
++
++ for (i=0; i<n; i++) {
++ check_intdivzero(*(dtype*)p2);
++ *(dtype*)p3 = m_<%=name%>(*(dtype*)p1,*(dtype*)p2);
++ p1 += s1;
++ p2 += s2;
++ p3 += s3;
++ }
+ return;
+ }
+-<% end %>
+- x = m_<%=name%>(x,y);
+- SET_DATA_STRIDE(p3,s3,dtype,x);
++
++<% if /Int8$/ !~ class_name %>
+ }
++ for (i=0; i<n; i+=2) {
++ dtype x, y, z;
++ GET_DATA_STRIDE(p1,s1,dtype,x);
++ GET_DATA_STRIDE(p2,s2,dtype,y);
++ check_intdivzero(y);
++ z = m_<%=name%>(x,y);
++ SET_DATA_STRIDE(p3,s3,dtype,z);
++ }
++<% end %>
+ }
++#undef check_intdivzero
+
+ static VALUE
+ <%=c_func%>_self(VALUE self, VALUE other)
+@@ -46,6 +87,7 @@ static VALUE
+ return <%=c_func%>_self(self, other);
+ <% else %>
+ VALUE klass, v;
++
+ klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
+ if (klass==cT) {
+ return <%=c_func%>_self(self, other);
+--
+2.11.0
+
diff --git a/debian/patches/0004-not_nan-macro-for-fast-NaN-check.patch b/debian/patches/0004-not_nan-macro-for-fast-NaN-check.patch
new file mode 100644
index 0000000..f78963d
--- /dev/null
+++ b/debian/patches/0004-not_nan-macro-for-fast-NaN-check.patch
@@ -0,0 +1,329 @@
+From 996851fd3ce3bca1bd33edc0d41a3ce83be49090 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 1 Jun 2017 09:29:55 +0900
+Subject: [PATCH 04/12] not_nan macro for fast NaN check
+
+---
+ ext/numo/narray/numo/types/complex_macro.h | 40 +++++++++--------
+ ext/numo/narray/numo/types/real_accum.h | 69 +++++++++++++++---------------
+ 2 files changed, 56 insertions(+), 53 deletions(-)
+
+diff --git a/ext/numo/narray/numo/types/complex_macro.h b/ext/numo/narray/numo/types/complex_macro.h
+index 7adadbb..19c52fd 100644
+--- a/ext/numo/narray/numo/types/complex_macro.h
++++ b/ext/numo/narray/numo/types/complex_macro.h
+@@ -117,26 +117,28 @@ static inline dtype c_from_dcomplex(dcomplex x) {
+ #define m_sum_init INT2FIX(0)
+ #define m_mulsum_init INT2FIX(0)
+
++#define not_nan(x) (REAL(x)==REAL(x) && IMAG(x)==IMAG(x))
++
+ #define m_mulsum(x,y,z) {z = m_add(m_mul(x,y),z);}
+-#define m_mulsum_nan(x,y,z) { \
+- if(!m_isnan(x) && !m_isnan(y)) { \
+- z = m_add(m_mul(x,y),z); \
++#define m_mulsum_nan(x,y,z) { \
++ if(not_nan(x) && not_nan(y)) { \
++ z = m_add(m_mul(x,y),z); \
+ }}
+
+ #define m_cumsum(x,y) {(x)=m_add(x,y);}
+-#define m_cumsum_nan(x,y) { \
+- if (m_isnan(x)) { \
+- (x) = (y); \
+- } else if (!m_isnan(y)) { \
+- (x) = m_add(x,y); \
++#define m_cumsum_nan(x,y) { \
++ if (!not_nan(x)) { \
++ (x) = (y); \
++ } else if (not_nan(y)) { \
++ (x) = m_add(x,y); \
+ }}
+
+ #define m_cumprod(x,y) {(x)=m_mul(x,y);}
+-#define m_cumprod_nan(x,y) { \
+- if (m_isnan(x)) { \
+- (x) = (y); \
+- } else if (!m_isnan(y)) { \
+- (x) = m_mul(x,y); \
++#define m_cumprod_nan(x,y) { \
++ if (!not_nan(x)) { \
++ (x) = (y); \
++ } else if (not_nan(y)) { \
++ (x) = m_mul(x,y); \
+ }}
+
+ static inline dtype f_sum(size_t n, char *p, ssize_t stride)
+@@ -161,7 +163,7 @@ static inline dtype f_sum_nan(size_t n, char *p, ssize_t stride)
+ y = c_zero();
+ for (; i--;) {
+ x = *(dtype*)p;
+- if (!c_isnan(x)) {
++ if (not_nan(x)) {
+ y = c_add(x,y);
+ }
+ p += stride;
+@@ -205,7 +207,7 @@ static inline dtype f_kahan_sum_nan(size_t n, char *p, ssize_t stride)
+ r = c_zero();
+ for (; i--;) {
+ x = *(dtype*)p;
+- if (!c_isnan(x)) {
++ if (not_nan(x)) {
+ if (fabs(REAL(x)) > fabs(REAL(y))) {
+ double z=REAL(x); REAL(x)=REAL(y); REAL(y)=z;
+ }
+@@ -245,7 +247,7 @@ static inline dtype f_prod_nan(size_t n, char *p, ssize_t stride)
+ y = c_one();
+ for (; i--;) {
+ x = *(dtype*)p;
+- if (!c_isnan(x)) {
++ if (not_nan(x)) {
+ y = c_mul(x,y);
+ }
+ p += stride;
+@@ -278,7 +280,7 @@ static inline dtype f_mean_nan(size_t n, char *p, ssize_t stride)
+ y = c_zero();
+ for (; i--;) {
+ x = *(dtype*)p;
+- if (!c_isnan(x)) {
++ if (not_nan(x)) {
+ y = c_add(x,y);
+ count++;
+ }
+@@ -316,7 +318,7 @@ static inline rtype f_var_nan(size_t n, char *p, ssize_t stride)
+
+ for (; i--;) {
+ x = *(dtype*)p;
+- if (!c_isnan(x)) {
++ if (not_nan(x)) {
+ y += c_abs_square(c_sub(x,m));
+ count++;
+ }
+@@ -360,7 +362,7 @@ static inline rtype f_rms_nan(size_t n, char *p, ssize_t stride)
+
+ for (; i--;) {
+ x = *(dtype*)p;
+- if (!c_isnan(x)) {
++ if (not_nan(x)) {
+ y += c_abs_square(x);
+ count++;
+ }
+diff --git a/ext/numo/narray/numo/types/real_accum.h b/ext/numo/narray/numo/types/real_accum.h
+index d0f68f6..b22f453 100644
+--- a/ext/numo/narray/numo/types/real_accum.h
++++ b/ext/numo/narray/numo/types/real_accum.h
+@@ -1,24 +1,25 @@
++#define not_nan(x) ((x)==(x))
+
+ #define m_mulsum(x,y,z) {z = m_add(m_mul(x,y),z);}
+-#define m_mulsum_nan(x,y,z) { \
+- if(!m_isnan(x) && !m_isnan(y)) { \
+- z = m_add(m_mul(x,y),z); \
++#define m_mulsum_nan(x,y,z) { \
++ if(not_nan(x) && not_nan(y)) { \
++ z = m_add(m_mul(x,y),z); \
+ }}
+
+ #define m_cumsum(x,y) {(x)=m_add(x,y);}
+-#define m_cumsum_nan(x,y) { \
+- if (m_isnan(x)) { \
+- (x) = (y); \
+- } else if (!m_isnan(y)) { \
+- (x) = m_add(x,y); \
++#define m_cumsum_nan(x,y) { \
++ if (!not_nan(x)) { \
++ (x) = (y); \
++ } else if (not_nan(y)) { \
++ (x) = m_add(x,y); \
+ }}
+
+ #define m_cumprod(x,y) {(x)=m_mul(x,y);}
+-#define m_cumprod_nan(x,y) { \
+- if (m_isnan(x)) { \
+- (x) = (y); \
+- } else if (!m_isnan(y)) { \
+- (x) = m_mul(x,y); \
++#define m_cumprod_nan(x,y) { \
++ if (!not_nan(x)) { \
++ (x) = (y); \
++ } else if (not_nan(y)) { \
++ (x) = m_mul(x,y); \
+ }}
+
+ static inline dtype f_sum(size_t n, char *p, ssize_t stride)
+@@ -28,8 +29,8 @@ static inline dtype f_sum(size_t n, char *p, ssize_t stride)
+
+ for (; i--;) {
+ x = *(dtype*)p;
+- p += stride;
+ y = m_add(x,y);
++ p += stride;
+ }
+ return y;
+ }
+@@ -42,7 +43,7 @@ static inline dtype f_sum_nan(size_t n, char *p, ssize_t stride)
+ for (; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(x)) {
++ if (not_nan(x)) {
+ y = m_add(x,y);
+ }
+ }
+@@ -71,7 +72,7 @@ static inline dtype f_prod_nan(size_t n, char *p, ssize_t stride)
+ for (; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(x)) {
++ if (not_nan(x)) {
+ y = m_mul(x,y);
+ }
+ }
+@@ -102,7 +103,7 @@ static inline dtype f_mean_nan(size_t n, char *p, ssize_t stride)
+ for (; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(x)) {
++ if (not_nan(x)) {
+ y = m_add(x,y);
+ count++;
+ }
+@@ -141,7 +142,7 @@ static inline dtype f_var_nan(size_t n, char *p, ssize_t stride)
+ for (; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(x)) {
++ if (not_nan(x)) {
+ a = m_abs(m_sub(x,m));
+ y = m_add(y,m_square(a));
+ count++;
+@@ -184,7 +185,7 @@ static inline dtype f_rms_nan(size_t n, char *p, ssize_t stride)
+ for (; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(x)) {
++ if (not_nan(x)) {
+ y = m_add(y,m_square(m_abs(x)));
+ count++;
+ }
+@@ -201,11 +202,11 @@ static inline dtype f_min_nan(size_t n, char *p, ssize_t stride)
+
+ y = *(dtype*)p;
+ p += stride;
+- if (m_isnan(y)) {return y;}
++ if (!not_nan(y)) {return y;}
+ for (i--; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+- if (m_isnan(x)) {return x;}
++ if (!not_nan(x)) {return x;}
+ if (m_lt(x,y)) {
+ y = x;
+ }
+@@ -221,7 +222,7 @@ static inline dtype f_min(size_t n, char *p, ssize_t stride)
+ for (; i--; ) {
+ y = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(y)) {
++ if (not_nan(y)) {
+ for (; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+@@ -242,11 +243,11 @@ static inline dtype f_max_nan(size_t n, char *p, ssize_t stride)
+
+ y = *(dtype*)p;
+ p += stride;
+- if (m_isnan(y)) {return y;}
++ if (!not_nan(y)) {return y;}
+ for (i--; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+- if (m_isnan(x)) {return x;}
++ if (!not_nan(x)) {return x;}
+ if (m_gt(x,y)) {
+ y = x;
+ }
+@@ -262,7 +263,7 @@ static inline dtype f_max(size_t n, char *p, ssize_t stride)
+ for (; i--; ) {
+ y = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(y)) {
++ if (not_nan(y)) {
+ for (; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+@@ -283,11 +284,11 @@ static inline size_t f_min_index_nan(size_t n, char *p, ssize_t stride)
+
+ y = *(dtype*)p;
+ p += stride;
+- if (m_isnan(y)) {return j;}
++ if (!not_nan(y)) {return j;}
+ for (i=1; i<n; i++) {
+ x = *(dtype*)p;
+ p += stride;
+- if (m_isnan(x)) {return i;}
++ if (!not_nan(x)) {return i;}
+ if (m_lt(x,y)) {
+ y = x;
+ j = i;
+@@ -304,7 +305,7 @@ static inline size_t f_min_index(size_t n, char *p, ssize_t stride)
+ for (i=0; i<n; i++) {
+ y = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(y)) {
++ if (not_nan(y)) {
+ j = i;
+ for (; i<n; i++) {
+ x = *(dtype*)p;
+@@ -327,11 +328,11 @@ static inline size_t f_max_index_nan(size_t n, char *p, ssize_t stride)
+
+ y = *(dtype*)p;
+ p += stride;
+- if (m_isnan(y)) {return j;}
++ if (!not_nan(y)) {return j;}
+ for (i=1; i<n; i++) {
+ x = *(dtype*)p;
+ p += stride;
+- if (m_isnan(x)) {return i;}
++ if (!not_nan(x)) {return i;}
+ if (m_gt(x,y)) {
+ y = x;
+ j = i;
+@@ -348,7 +349,7 @@ static inline size_t f_max_index(size_t n, char *p, ssize_t stride)
+ for (i=0; i<n; i++) {
+ y = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(y)) {
++ if (not_nan(y)) {
+ j = i;
+ for (; i<n; i++) {
+ x = *(dtype*)p;
+@@ -372,14 +373,14 @@ f_minmax_nan(size_t n, char *p, ssize_t stride, dtype *amin, dtype *amax)
+
+ min = max = *(dtype*)p;
+ p += stride;
+- if (m_isnan(min)) {
++ if (!not_nan(min)) {
+ *amin = *amax = min;
+ return;
+ }
+ for (i--; i--;) {
+ x = *(dtype*)p;
+ p += stride;
+- if (m_isnan(x)) {
++ if (!not_nan(x)) {
+ *amin = *amax = x;
+ return;
+ }
+@@ -412,7 +413,7 @@ f_minmax(size_t n, char *p, ssize_t stride, dtype *amin, dtype *amax)
+ for (; i--; ) {
+ min = *(dtype*)p;
+ p += stride;
+- if (!m_isnan(min)) {
++ if (not_nan(min)) {
+ max = min;
+ for (; i--;) {
+ x = *(dtype*)p;
+--
+2.11.0
+
diff --git a/debian/patches/0005-reduce-alloc-times.patch b/debian/patches/0005-reduce-alloc-times.patch
new file mode 100644
index 0000000..bfdd8a2
--- /dev/null
+++ b/debian/patches/0005-reduce-alloc-times.patch
@@ -0,0 +1,105 @@
+From 48bb77db5680f940984a166013cd70b19110859d Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Fri, 2 Jun 2017 15:12:27 +0900
+Subject: [PATCH 05/12] reduce alloc times
+
+---
+ ext/numo/narray/ndloop.c | 41 +++++++++++++++++++++--------------------
+ 1 file changed, 21 insertions(+), 20 deletions(-)
+
+diff --git a/ext/numo/narray/ndloop.c b/ext/numo/narray/ndloop.c
+index e431a7f..39c6ead 100644
+--- a/ext/numo/narray/ndloop.c
++++ b/ext/numo/narray/ndloop.c
+@@ -43,7 +43,7 @@ typedef struct NA_MD_LOOP {
+ int nin;
+ int ndim; // n of total dimention
+ unsigned int copy_flag; // set i-th bit if i-th arg is cast
+- size_t *n_ptr; // memory for n
++ void *ptr; // memory for n
+ na_loop_iter_t *iter_ptr; // memory for iter
+ size_t *n; // n of elements for each dim
+ na_loop_t user; // loop in user function
+@@ -337,6 +337,9 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
+ int narg;
+ int max_nd;
+
++ char *buf;
++ size_t n1, n2, n3, n4, n5;
++
+ long args_len;
+
+ na_loop_iter_t *iter;
+@@ -364,23 +367,28 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
+ lp->writeback = -1;
+ lp->init_aidx = -1;
+
+- lp->n = NULL;
+- lp->n_ptr = NULL;
+- lp->xargs = NULL;
+- lp->user.args = NULL;
++ lp->ptr = NULL;
+ lp->user.n = NULL;
+- lp->iter_ptr = NULL;
+- lp->trans_map = NULL;
+
+ ndloop_find_max_dimension(lp, nf, args);
+ narg = lp->nin + nf->nout;
+ max_nd = lp->ndim + lp->user.ndim;
+
+- lp->n = lp->n_ptr = ALLOC_N(size_t, max_nd+1);
+- lp->xargs = ALLOC_N(na_loop_xargs_t, narg);
+- lp->user.args = ALLOC_N(na_loop_args_t, narg);
+- iter = ALLOC_N(na_loop_iter_t, narg*(max_nd+1));
+- lp->iter_ptr = iter;
++ n1 = sizeof(size_t)*(max_nd+1);
++ n2 = sizeof(na_loop_xargs_t)*narg;
++ n2 = ((n2-1)/8+1)*8;
++ n3 = sizeof(na_loop_args_t)*narg;
++ n3 = ((n3-1)/8+1)*8;
++ n4 = sizeof(na_loop_iter_t)*narg*(max_nd+1);
++ n4 = ((n4-1)/8+1)*8;
++ n5 = sizeof(int)*(max_nd+1);
++
++ lp->ptr = buf = (char*)xmalloc(n1+n2+n3+n4+n5);
++ lp->n = (size_t*)buf; buf+=n1;
++ lp->xargs = (na_loop_xargs_t*)buf; buf+=n2;
++ lp->user.args = (na_loop_args_t*)buf; buf+=n3;
++ lp->iter_ptr = iter = (na_loop_iter_t*)buf; buf+=n4;
++ lp->trans_map = (int*)buf;
+
+ for (j=0; j<narg; j++) {
+ LARG(lp,j).value = Qnil;
+@@ -406,7 +414,6 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
+ // array loop
+ // [*,+,*,+,*] => [*,*,*,+,+]
+ // trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
+- lp->trans_map = ALLOC_N(int, max_nd+1);
+ if (NDF_TEST(nf,NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
+ trans_dim = 0;
+ for (i=0; i<max_nd; i++) {
+@@ -450,7 +457,6 @@ ndloop_release(VALUE vlp)
+ na_release_lock(v);
+ }
+ }
+- //xfree(lp);
+ for (j=0; j<lp->narg; j++) {
+ //printf("lp->xargs[%d].bufcp=%lx\n",j,(size_t)(lp->xargs[j].bufcp));
+ if (lp->xargs[j].bufcp) {
+@@ -463,12 +469,7 @@ ndloop_release(VALUE vlp)
+ }
+ }
+ }
+- if (lp->trans_map) xfree(lp->trans_map);
+- xfree(lp->xargs);
+- xfree(lp->iter_ptr);
+- xfree(lp->user.args);
+- xfree(lp->n_ptr);
+- //rb_gc_force_recycle(vlp);
++ xfree(lp->ptr);
+ return Qnil;
+ }
+
+--
+2.11.0
+
diff --git a/debian/patches/0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch b/debian/patches/0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch
new file mode 100644
index 0000000..df5d60e
--- /dev/null
+++ b/debian/patches/0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch
@@ -0,0 +1,450 @@
+From b06d7394bd90ac271de05e3f42213a0daf653204 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Fri, 2 Jun 2017 15:14:16 +0900
+Subject: [PATCH 06/12] consider alignment. modify loop for simd optimization
+
+---
+ ext/numo/narray/gen/def/bit.rb | 1 +
+ ext/numo/narray/gen/def/dcomplex.rb | 1 +
+ ext/numo/narray/gen/def/dfloat.rb | 1 +
+ ext/numo/narray/gen/def/int16.rb | 1 +
+ ext/numo/narray/gen/def/int32.rb | 1 +
+ ext/numo/narray/gen/def/int64.rb | 1 +
+ ext/numo/narray/gen/def/int8.rb | 1 +
+ ext/numo/narray/gen/def/robject.rb | 1 +
+ ext/numo/narray/gen/def/scomplex.rb | 1 +
+ ext/numo/narray/gen/def/sfloat.rb | 1 +
+ ext/numo/narray/gen/def/uint16.rb | 1 +
+ ext/numo/narray/gen/def/uint32.rb | 1 +
+ ext/numo/narray/gen/def/uint64.rb | 1 +
+ ext/numo/narray/gen/def/uint8.rb | 1 +
+ ext/numo/narray/gen/tmpl/accum_binary.c | 34 ++++++++++++++++++--------------
+ ext/numo/narray/gen/tmpl/binary.c | 26 +++++++++++-------------
+ ext/numo/narray/gen/tmpl/unary.c | 35 +++++++++++++++++++++++++++------
+ ext/numo/narray/numo/template.h | 15 +++++++++++++-
+ 18 files changed, 88 insertions(+), 36 deletions(-)
+
+diff --git a/ext/numo/narray/gen/def/bit.rb b/ext/numo/narray/gen/def/bit.rb
+index 9173546..c14aeee 100644
+--- a/ext/numo/narray/gen/def/bit.rb
++++ b/ext/numo/narray/gen/def/bit.rb
+@@ -16,6 +16,7 @@ set is_object: false
+ set is_real: false
+ set is_comparable: false
+ set is_double_precision: false
++set need_align: false
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/dcomplex.rb b/ext/numo/narray/gen/def/dcomplex.rb
+index 7ced724..6282faf 100644
+--- a/ext/numo/narray/gen/def/dcomplex.rb
++++ b/ext/numo/narray/gen/def/dcomplex.rb
+@@ -18,6 +18,7 @@ set is_complex: true
+ set is_object: false
+ set is_comparable: false
+ set is_double_precision: true
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/dfloat.rb b/ext/numo/narray/gen/def/dfloat.rb
+index fcbe812..a78a367 100644
+--- a/ext/numo/narray/gen/def/dfloat.rb
++++ b/ext/numo/narray/gen/def/dfloat.rb
+@@ -16,6 +16,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: true
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/int16.rb b/ext/numo/narray/gen/def/int16.rb
+index 99fae54..bbc1b7d 100644
+--- a/ext/numo/narray/gen/def/int16.rb
++++ b/ext/numo/narray/gen/def/int16.rb
+@@ -15,6 +15,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/int32.rb b/ext/numo/narray/gen/def/int32.rb
+index dc519b0..7ea2882 100644
+--- a/ext/numo/narray/gen/def/int32.rb
++++ b/ext/numo/narray/gen/def/int32.rb
+@@ -15,6 +15,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/int64.rb b/ext/numo/narray/gen/def/int64.rb
+index 221bad2..cded10c 100644
+--- a/ext/numo/narray/gen/def/int64.rb
++++ b/ext/numo/narray/gen/def/int64.rb
+@@ -15,6 +15,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/int8.rb b/ext/numo/narray/gen/def/int8.rb
+index 121a205..f171325 100644
+--- a/ext/numo/narray/gen/def/int8.rb
++++ b/ext/numo/narray/gen/def/int8.rb
+@@ -15,6 +15,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: false
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/robject.rb b/ext/numo/narray/gen/def/robject.rb
+index e0996ca..6067d33 100644
+--- a/ext/numo/narray/gen/def/robject.rb
++++ b/ext/numo/narray/gen/def/robject.rb
+@@ -17,6 +17,7 @@ set is_complex: false
+ set is_object: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: false
+
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/scomplex.rb b/ext/numo/narray/gen/def/scomplex.rb
+index b0ceffb..63cf678 100644
+--- a/ext/numo/narray/gen/def/scomplex.rb
++++ b/ext/numo/narray/gen/def/scomplex.rb
+@@ -18,6 +18,7 @@ set is_complex: true
+ set is_object: false
+ set is_comparable: false
+ set is_double_precision: false
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/sfloat.rb b/ext/numo/narray/gen/def/sfloat.rb
+index 47be181..cebc46f 100644
+--- a/ext/numo/narray/gen/def/sfloat.rb
++++ b/ext/numo/narray/gen/def/sfloat.rb
+@@ -16,6 +16,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/uint16.rb b/ext/numo/narray/gen/def/uint16.rb
+index 83012c6..000014a 100644
+--- a/ext/numo/narray/gen/def/uint16.rb
++++ b/ext/numo/narray/gen/def/uint16.rb
+@@ -15,6 +15,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/uint32.rb b/ext/numo/narray/gen/def/uint32.rb
+index 1269c4e..6556e11 100644
+--- a/ext/numo/narray/gen/def/uint32.rb
++++ b/ext/numo/narray/gen/def/uint32.rb
+@@ -15,6 +15,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/uint64.rb b/ext/numo/narray/gen/def/uint64.rb
+index 4db9bef..8912e25 100644
+--- a/ext/numo/narray/gen/def/uint64.rb
++++ b/ext/numo/narray/gen/def/uint64.rb
+@@ -15,6 +15,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: true
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/uint8.rb b/ext/numo/narray/gen/def/uint8.rb
+index 39ed153..74b70f3 100644
+--- a/ext/numo/narray/gen/def/uint8.rb
++++ b/ext/numo/narray/gen/def/uint8.rb
+@@ -15,6 +15,7 @@ set is_object: false
+ set is_real: true
+ set is_comparable: true
+ set is_double_precision: false
++set need_align: false
+
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/tmpl/accum_binary.c b/ext/numo/narray/gen/tmpl/accum_binary.c
+index ed2579f..fa6d562 100644
+--- a/ext/numo/narray/gen/tmpl/accum_binary.c
++++ b/ext/numo/narray/gen/tmpl/accum_binary.c
+@@ -1,27 +1,31 @@
+-<% (is_float ? ["","_nan"] : [""]).each do |j| %>
++//<% (is_float ? ["","_nan"] : [""]).each do |j| %>
+ static void
+ <%=c_iter%><%=j%>(na_loop_t *const lp)
+ {
+- size_t i;
++ size_t i, n;
+ char *p1, *p2, *p3;
+ ssize_t s1, s2, s3;
+- dtype x, y, z;
+
+- INIT_COUNTER(lp, i);
++ INIT_COUNTER(lp, n);
+ INIT_PTR(lp, 0, p1, s1);
+ INIT_PTR(lp, 1, p2, s2);
+ INIT_PTR(lp, 2, p3, s3);
++
+ if (s3==0) {
++ dtype z;
+ // Reduce loop
+ GET_DATA(p3,dtype,z);
+- for (; i--;) {
++ for (i=0; i<n; i++) {
++ dtype x, y;
+ GET_DATA_STRIDE(p1,s1,dtype,x);
+ GET_DATA_STRIDE(p2,s2,dtype,y);
+ m_<%=name%><%=j%>(x,y,z);
+ }
+ SET_DATA(p3,dtype,z);
++ return;
+ } else {
+- for (; i--;) {
++ for (i=0; i<n; i++) {
++ dtype x, y, z;
+ GET_DATA_STRIDE(p1,s1,dtype,x);
+ GET_DATA_STRIDE(p2,s2,dtype,y);
+ GET_DATA(p3,dtype,z);
+@@ -30,7 +34,7 @@ static void
+ }
+ }
+ }
+-<% end %>
++//<% end %>
+
+ static VALUE
+ <%=c_func%>_self(int argc, VALUE *argv, VALUE self)
+@@ -47,11 +51,11 @@ static VALUE
+ // should fix below: [self.ndim,other.ndim].max or?
+ naryv[0] = self;
+ naryv[1] = argv[0];
+- <% if is_float %>
++ //<% if is_float %>
+ reduce = na_reduce_dimension(argc-1, argv+1, 2, naryv, &ndf, <%=c_iter%>_nan);
+- <% else %>
++ //<% else %>
+ reduce = na_reduce_dimension(argc-1, argv+1, 2, naryv, &ndf, 0);
+- <% end %>
++ //<% end %>
+
+ v = na_ndloop(&ndf, 4, self, argv[0], reduce, m_<%=name%>_init);
+ return <%=type_name%>_extract(v);
+@@ -76,15 +80,15 @@ static VALUE
+ static VALUE
+ <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
+ {
+- <% if !is_object %>
++ //<% if !is_object %>
+ VALUE klass, v;
+- <% end %>
++ //<% end %>
+ if (argc < 1) {
+ rb_raise(rb_eArgError,"wrong number of arguments (%d for >=1)",argc);
+ }
+- <% if is_object %>
++ //<% if is_object %>
+ return <%=c_func%>_self(argc, argv, self);
+- <% else %>
++ //<% else %>
+ klass = na_upcast(CLASS_OF(self),CLASS_OF(argv[0]));
+ if (klass==cT) {
+ return <%=c_func%>_self(argc, argv, self);
+@@ -92,5 +96,5 @@ static VALUE
+ v = rb_funcall(klass, id_cast, 1, self);
+ return rb_funcall2(v, rb_intern("<%=name%>"), argc, argv);
+ }
+- <% end %>
++ //<% end %>
+ }
+diff --git a/ext/numo/narray/gen/tmpl/binary.c b/ext/numo/narray/gen/tmpl/binary.c
+index 302635b..060b9c0 100644
+--- a/ext/numo/narray/gen/tmpl/binary.c
++++ b/ext/numo/narray/gen/tmpl/binary.c
+@@ -20,11 +20,10 @@ static void
+ INIT_PTR(lp, 1, p2, s2);
+ INIT_PTR(lp, 2, p3, s3);
+
+-<% if /Int8$/ !~ class_name %>
+- if ((size_t)p1 % sizeof(dtype) == 0 &&
+- (size_t)p2 % sizeof(dtype) == 0 &&
+- (size_t)p3 % sizeof(dtype) == 0 ) {
+-<% end %>
++ //<% if need_align %>
++ if (is_aligned(p1,sizeof(dtype)) &&
++ is_aligned(p2,sizeof(dtype)) &&
++ is_aligned(p3,sizeof(dtype)) ) {
+
+ if (s1 == sizeof(dtype) &&
+ s2 == sizeof(dtype) &&
+@@ -35,11 +34,11 @@ static void
+ ((dtype*)p3)[i] = m_<%=name%>(((dtype*)p1)[i],((dtype*)p2)[i]);
+ }
+ return;
+- } else
+- if (s1 % sizeof(dtype) == 0 &&
+- s2 % sizeof(dtype) == 0 &&
+- s3 % sizeof(dtype) == 0 ) {
+-
++ }
++ if (is_aligned_step(s1,sizeof(dtype)) &&
++ is_aligned_step(s2,sizeof(dtype)) &&
++ is_aligned_step(s3,sizeof(dtype)) ) {
++ //<% end %>
+ for (i=0; i<n; i++) {
+ check_intdivzero(*(dtype*)p2);
+ *(dtype*)p3 = m_<%=name%>(*(dtype*)p1,*(dtype*)p2);
+@@ -48,11 +47,10 @@ static void
+ p3 += s3;
+ }
+ return;
++ //<% if need_align %>
+ }
+-
+-<% if /Int8$/ !~ class_name %>
+ }
+- for (i=0; i<n; i+=2) {
++ for (i=0; i<n; i++) {
+ dtype x, y, z;
+ GET_DATA_STRIDE(p1,s1,dtype,x);
+ GET_DATA_STRIDE(p2,s2,dtype,y);
+@@ -60,7 +58,7 @@ static void
+ z = m_<%=name%>(x,y);
+ SET_DATA_STRIDE(p3,s3,dtype,z);
+ }
+-<% end %>
++ //<% end %>
+ }
+ #undef check_intdivzero
+
+diff --git a/ext/numo/narray/gen/tmpl/unary.c b/ext/numo/narray/gen/tmpl/unary.c
+index 382c638..0dcc94c 100644
+--- a/ext/numo/narray/gen/tmpl/unary.c
++++ b/ext/numo/narray/gen/tmpl/unary.c
+@@ -1,25 +1,25 @@
+ static void
+ <%=c_iter%>(na_loop_t *const lp)
+ {
+- size_t i;
++ size_t i, n;
+ char *p1, *p2;
+ ssize_t s1, s2;
+ size_t *idx1, *idx2;
+ dtype x;
+
+- INIT_COUNTER(lp, i);
++ INIT_COUNTER(lp, n);
+ INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+ INIT_PTR_IDX(lp, 1, p2, s2, idx2);
+
+ if (idx1) {
+ if (idx2) {
+- for (; i--;) {
++ for (i=0; i<n; i++) {
+ GET_DATA_INDEX(p1,idx1,dtype,x);
+ x = m_<%=name%>(x);
+ SET_DATA_INDEX(p2,idx2,dtype,x);
+ }
+ } else {
+- for (; i--;) {
++ for (i=0; i<n; i++) {
+ GET_DATA_INDEX(p1,idx1,dtype,x);
+ x = m_<%=name%>(x);
+ SET_DATA_STRIDE(p2,s2,dtype,x);
+@@ -27,17 +27,40 @@ static void
+ }
+ } else {
+ if (idx2) {
+- for (; i--;) {
++ for (i=0; i<n; i++) {
+ GET_DATA_STRIDE(p1,s1,dtype,x);
+ x = m_<%=name%>(x);
+ SET_DATA_INDEX(p2,idx2,dtype,x);
+ }
+ } else {
+- for (; i--;) {
++ //<% if need_align %>
++ if (is_aligned(p1,sizeof(dtype)) &&
++ is_aligned(p2,sizeof(dtype)) ) {
++ if (s1 == sizeof(dtype) &&
++ s2 == sizeof(dtype) ) {
++ for (i=0; i<n; i++) {
++ ((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
++ }
++ return;
++ }
++ if (is_aligned_step(s1,sizeof(dtype)) &&
++ is_aligned_step(s2,sizeof(dtype)) ) {
++ //<% end %>
++ for (i=0; i<n; i++) {
++ *(dtype*)p2 = m_<%=name%>(*(dtype*)p1);
++ p1 += s1;
++ p2 += s2;
++ }
++ return;
++ //<% if need_align %>
++ }
++ }
++ for (i=0; i<n; i++) {
+ GET_DATA_STRIDE(p1,s1,dtype,x);
+ x = m_<%=name%>(x);
+ SET_DATA_STRIDE(p2,s2,dtype,x);
+ }
++ //<% end %>
+ }
+ }
+ }
+diff --git a/ext/numo/narray/numo/template.h b/ext/numo/narray/numo/template.h
+index 2fd6e0e..1d69996 100644
+--- a/ext/numo/narray/numo/template.h
++++ b/ext/numo/narray/numo/template.h
+@@ -133,4 +133,17 @@
+ }
+ // val -> val&1 ??
+
+-#endif /* ifndef NARRAY_H */
++static inline int
++is_aligned(const void *ptr, const size_t alignment)
++{
++ return ((size_t)(ptr) & ((alignment)-1)) == 0;
++}
++
++static inline int
++is_aligned_step(const ssize_t step, const size_t alignment)
++{
++ return ((step) & ((alignment)-1)) == 0;
++}
++
++
++#endif /* ifndef TEMPLATE_H */
+--
+2.11.0
+
diff --git a/debian/patches/0007-fix-bug-in-methods-min-max_index.patch b/debian/patches/0007-fix-bug-in-methods-min-max_index.patch
new file mode 100644
index 0000000..f846b9c
--- /dev/null
+++ b/debian/patches/0007-fix-bug-in-methods-min-max_index.patch
@@ -0,0 +1,34 @@
+From abe5ea8aa92a18ea7c7eaec8e46e8b7738426ec4 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Sat, 3 Jun 2017 17:13:22 +0900
+Subject: [PATCH 07/12] fix bug in methods: min/max_index
+
+---
+ ext/numo/narray/numo/types/real_accum.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/ext/numo/narray/numo/types/real_accum.h b/ext/numo/narray/numo/types/real_accum.h
+index b22f453..8435075 100644
+--- a/ext/numo/narray/numo/types/real_accum.h
++++ b/ext/numo/narray/numo/types/real_accum.h
+@@ -306,7 +306,7 @@ static inline size_t f_min_index(size_t n, char *p, ssize_t stride)
+ y = *(dtype*)p;
+ p += stride;
+ if (not_nan(y)) {
+- j = i;
++ j = i; i++;
+ for (; i<n; i++) {
+ x = *(dtype*)p;
+ p += stride;
+@@ -350,7 +350,7 @@ static inline size_t f_max_index(size_t n, char *p, ssize_t stride)
+ y = *(dtype*)p;
+ p += stride;
+ if (not_nan(y)) {
+- j = i;
++ j = i; i++;
+ for (; i<n; i++) {
+ x = *(dtype*)p;
+ p += stride;
+--
+2.11.0
+
diff --git a/debian/patches/0008-fix-cheking-negative-index.patch b/debian/patches/0008-fix-cheking-negative-index.patch
new file mode 100644
index 0000000..b5ea7e1
--- /dev/null
+++ b/debian/patches/0008-fix-cheking-negative-index.patch
@@ -0,0 +1,56 @@
+From 18def25c4b241c987508a3f7570b7b5a67b651b4 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Mon, 5 Jun 2017 01:59:45 +0900
+Subject: [PATCH 08/12] fix cheking negative index
+
+---
+ ext/numo/narray/index.c | 25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+diff --git a/ext/numo/narray/index.c b/ext/numo/narray/index.c
+index 3a649b3..ae25845 100644
+--- a/ext/numo/narray/index.c
++++ b/ext/numo/narray/index.c
+@@ -173,26 +173,27 @@ static void
+ na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, na_index_arg_t *q)
+ {
+ int n;
+- ssize_t beg, end;
++ VALUE excl_end;
++ ssize_t beg, end, beg_orig, end_orig;
++ const char *dot = "..", *edot = "...";
+
+- beg = NUM2LONG(rb_funcall(range,id_beg,0));
+- if (beg<0) {
++ beg = beg_orig = NUM2SSIZET(rb_funcall(range,id_beg,0));
++ if (beg < 0) {
+ beg += size;
+ }
+-
+- end = NUM2LONG(rb_funcall(range,id_end,0));
+- if (end<0) {
++ end = end_orig = NUM2SSIZET(rb_funcall(range,id_end,0));
++ if (end < 0) {
+ end += size;
+ }
+-
+- if (RTEST(rb_funcall(range,id_exclude_end,0))) {
++ excl_end = rb_funcall(range,id_exclude_end,0);
++ if (RTEST(excl_end)) {
+ end--;
++ dot = edot;
+ }
+- if (beg < -size || beg >= size ||
+- end < -size || end >= size) {
++ if (beg < 0 || beg >= size || end < 0 || end >= size) {
+ rb_raise(rb_eRangeError,
+- "beg=%"SZF"d,end=%"SZF"d is out of array size (%"SZF"d)",
+- beg, end, size);
++ "%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
++ beg_orig, dot, end_orig, size);
+ }
+ n = (end-beg)/step+1;
+ if (n<0) n=0;
+--
+2.11.0
+
diff --git a/debian/patches/0009-new-error-Numo-NArray-ValueError.patch b/debian/patches/0009-new-error-Numo-NArray-ValueError.patch
new file mode 100644
index 0000000..5aa5dc9
--- /dev/null
+++ b/debian/patches/0009-new-error-Numo-NArray-ValueError.patch
@@ -0,0 +1,242 @@
+From bf18ff52eb031d4b7e3a6d73eb65b83d2dcb695f Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 8 Jun 2017 21:56:13 +0900
+Subject: [PATCH 09/12] new error: Numo::NArray::ValueError. raise error when
+ Numo::Int16[-32768].abs
+
+---
+ ext/numo/narray/narray.c | 2 ++
+ ext/numo/narray/numo/types/int16.h | 7 +++++--
+ ext/numo/narray/numo/types/int32.h | 7 +++++--
+ ext/numo/narray/numo/types/int64.h | 7 +++++--
+ ext/numo/narray/numo/types/int8.h | 7 +++++--
+ ext/numo/narray/numo/types/int_macro.h | 8 +++++++-
+ ext/numo/narray/numo/types/uint16.h | 7 +++++--
+ ext/numo/narray/numo/types/uint32.h | 7 +++++--
+ ext/numo/narray/numo/types/uint64.h | 7 +++++--
+ ext/numo/narray/numo/types/uint8.h | 7 +++++--
+ 10 files changed, 49 insertions(+), 17 deletions(-)
+
+diff --git a/ext/numo/narray/narray.c b/ext/numo/narray/narray.c
+index ab38812..743cac2 100644
+--- a/ext/numo/narray/narray.c
++++ b/ext/numo/narray/narray.c
+@@ -14,6 +14,7 @@ VALUE nary_eCastError;
+ VALUE nary_eShapeError;
+ VALUE nary_eOperationError;
+ VALUE nary_eDimensionError;
++VALUE nary_eValueError;
+
+ static ID id_contiguous_stride;
+ static ID id_allocate;
+@@ -1851,6 +1852,7 @@ Init_narray()
+ nary_eShapeError = rb_define_class_under(cNArray, "ShapeError", rb_eStandardError);
+ nary_eOperationError = rb_define_class_under(cNArray, "OperationError", rb_eStandardError);
+ nary_eDimensionError = rb_define_class_under(cNArray, "DimensionError", rb_eStandardError);
++ nary_eValueError = rb_define_class_under(cNArray, "ValueError", rb_eStandardError);
+
+ rb_define_singleton_method(cNArray, "debug=", na_debug_set, 1);
+ rb_define_singleton_method(cNArray, "profile", na_profile, 0);
+diff --git a/ext/numo/narray/numo/types/int16.h b/ext/numo/narray/numo/types/int16.h
+index 9342f6a..0115b80 100644
+--- a/ext/numo/narray/numo/types/int16.h
++++ b/ext/numo/narray/numo/types/int16.h
+@@ -8,8 +8,6 @@ typedef int16_t rtype;
+ #define m_extract(x) INT2NUM((int)*(dtype*)(x))
+ #define m_sprintf(s,x) sprintf(s,"%d",(int)(x))
+
+-#include "int_macro.h"
+-
+ #ifndef INT16_MIN
+ #define INT16_MIN (-32767-1)
+ #endif
+@@ -17,5 +15,10 @@ typedef int16_t rtype;
+ #define INT16_MAX (32767)
+ #endif
+
++#define DATA_MIN INT16_MIN
++#define DATA_MAX INT16_MAX
++
+ #define M_MIN m_data_to_num(INT16_MIN)
+ #define M_MAX m_data_to_num(INT16_MAX)
++
++#include "int_macro.h"
+diff --git a/ext/numo/narray/numo/types/int32.h b/ext/numo/narray/numo/types/int32.h
+index 5d472d2..059dee4 100644
+--- a/ext/numo/narray/numo/types/int32.h
++++ b/ext/numo/narray/numo/types/int32.h
+@@ -8,8 +8,6 @@ typedef int32_t rtype;
+ #define m_extract(x) INT322NUM((int32_t)*(dtype*)(x))
+ #define m_sprintf(s,x) sprintf(s,"%"PRId32,(int32_t)(x))
+
+-#include "int_macro.h"
+-
+ #ifndef INT32_MIN
+ #define INT32_MIN (-2147483647-1)
+ #endif
+@@ -17,5 +15,10 @@ typedef int32_t rtype;
+ #define INT32_MAX (2147483647)
+ #endif
+
++#define DATA_MIN INT32_MIN
++#define DATA_MAX INT32_MAX
++
+ #define M_MIN m_data_to_num(INT32_MIN)
+ #define M_MAX m_data_to_num(INT32_MAX)
++
++#include "int_macro.h"
+diff --git a/ext/numo/narray/numo/types/int64.h b/ext/numo/narray/numo/types/int64.h
+index bfb9426..e3ed8ba 100644
+--- a/ext/numo/narray/numo/types/int64.h
++++ b/ext/numo/narray/numo/types/int64.h
+@@ -8,8 +8,6 @@ typedef int64_t rtype;
+ #define m_extract(x) INT642NUM((int64_t)*(dtype*)(x))
+ #define m_sprintf(s,x) sprintf(s,"%"PRId64,(int64_t)(x))
+
+-#include "int_macro.h"
+-
+ #ifndef INT64_MIN
+ #define INT64_MIN (-9223372036854775807l-1)
+ #endif
+@@ -17,5 +15,10 @@ typedef int64_t rtype;
+ #define INT64_MAX (9223372036854775807l)
+ #endif
+
++#define DATA_MIN INT64_MIN
++#define DATA_MAX INT64_MAX
++
+ #define M_MIN m_data_to_num(INT64_MIN)
+ #define M_MAX m_data_to_num(INT64_MAX)
++
++#include "int_macro.h"
+diff --git a/ext/numo/narray/numo/types/int8.h b/ext/numo/narray/numo/types/int8.h
+index 676d5e9..5c1f8ba 100644
+--- a/ext/numo/narray/numo/types/int8.h
++++ b/ext/numo/narray/numo/types/int8.h
+@@ -8,8 +8,6 @@ typedef int8_t rtype;
+ #define m_extract(x) INT2NUM((int)*(dtype*)(x))
+ #define m_sprintf(s,x) sprintf(s,"%d",(int)(x))
+
+-#include "int_macro.h"
+-
+ #ifndef INT8_MIN
+ #define INT8_MIN (-127-1)
+ #endif
+@@ -17,5 +15,10 @@ typedef int8_t rtype;
+ #define INT8_MAX (127)
+ #endif
+
++#define DATA_MIN INT8_MIN
++#define DATA_MAX INT8_MAX
++
+ #define M_MIN INT2FIX(INT8_MIN)
+ #define M_MAX INT2FIX(INT8_MAX)
++
++#include "int_macro.h"
+diff --git a/ext/numo/narray/numo/types/int_macro.h b/ext/numo/narray/numo/types/int_macro.h
+index d795426..e3b4dd8 100644
+--- a/ext/numo/narray/numo/types/int_macro.h
++++ b/ext/numo/narray/numo/types/int_macro.h
+@@ -1,8 +1,14 @@
+ #include "xint_macro.h"
+
+-#define m_abs(x) ((x<0)?-x:x)
+ #define m_sign(x) (((x)==0) ? 0 : (((x)>0) ? 1 : -1))
+
++static inline dtype m_abs(dtype x) {
++ if (x==DATA_MIN) {
++ rb_raise(nary_eValueError, "cannot convert the minimum integer");
++ }
++ return (x<0)?-x:x;
++}
++
+ static inline dtype int_reciprocal(dtype x) {
+ switch (x) {
+ case 1:
+diff --git a/ext/numo/narray/numo/types/uint16.h b/ext/numo/narray/numo/types/uint16.h
+index 880c861..c6623fb 100644
+--- a/ext/numo/narray/numo/types/uint16.h
++++ b/ext/numo/narray/numo/types/uint16.h
+@@ -8,11 +8,14 @@ typedef u_int16_t rtype;
+ #define m_extract(x) UINT2NUM((unsigned int)*(dtype*)(x))
+ #define m_sprintf(s,x) sprintf(s,"%u",(unsigned int)(x))
+
+-#include "uint_macro.h"
+-
+ #ifndef UINT16_MAX
+ #define UINT16_MAX (65535)
+ #endif
+
++#define DATA_MIN UINT16_MIN
++#define DATA_MAX UINT16_MAX
++
+ #define M_MIN INT2FIX(0)
+ #define M_MAX m_data_to_num(UINT16_MAX)
++
++#include "uint_macro.h"
+diff --git a/ext/numo/narray/numo/types/uint32.h b/ext/numo/narray/numo/types/uint32.h
+index 8435271..4f7b3c2 100644
+--- a/ext/numo/narray/numo/types/uint32.h
++++ b/ext/numo/narray/numo/types/uint32.h
+@@ -8,11 +8,14 @@ typedef u_int32_t rtype;
+ #define m_extract(x) UINT322NUM((u_int32_t)*(dtype*)(x))
+ #define m_sprintf(s,x) sprintf(s,"%"PRIu32,(u_int32_t)(x))
+
+-#include "uint_macro.h"
+-
+ #ifndef UINT32_MAX
+ #define UINT32_MAX (4294967295u)
+ #endif
+
++#define DATA_MIN UINT32_MIN
++#define DATA_MAX UINT32_MAX
++
+ #define M_MIN INT2FIX(0)
+ #define M_MAX m_data_to_num(UINT32_MAX)
++
++#include "uint_macro.h"
+diff --git a/ext/numo/narray/numo/types/uint64.h b/ext/numo/narray/numo/types/uint64.h
+index 0ad200e..655fb34 100644
+--- a/ext/numo/narray/numo/types/uint64.h
++++ b/ext/numo/narray/numo/types/uint64.h
+@@ -8,11 +8,14 @@ typedef u_int64_t rtype;
+ #define m_extract(x) UINT642NUM((u_int64_t)*(dtype*)(x))
+ #define m_sprintf(s,x) sprintf(s,"%"PRIu64,(u_int64_t)(x))
+
+-#include "uint_macro.h"
+-
+ #ifndef UINT64_MAX
+ #define UINT64_MAX (18446744073709551615ul)
+ #endif
+
++#define DATA_MIN UINT64_MIN
++#define DATA_MAX UINT64_MAX
++
+ #define M_MIN INT2FIX(0)
+ #define M_MAX m_data_to_num(UINT64_MAX)
++
++#include "uint_macro.h"
+diff --git a/ext/numo/narray/numo/types/uint8.h b/ext/numo/narray/numo/types/uint8.h
+index 4fe24e5..da573fc 100644
+--- a/ext/numo/narray/numo/types/uint8.h
++++ b/ext/numo/narray/numo/types/uint8.h
+@@ -8,11 +8,14 @@ typedef u_int8_t rtype;
+ #define m_extract(x) UINT2NUM((unsigned int)*(dtype*)(x))
+ #define m_sprintf(s,x) sprintf(s,"%u",(unsigned int)(x))
+
+-#include "uint_macro.h"
+-
+ #ifndef UINT8_MAX
+ #define UINT8_MAX (255)
+ #endif
+
++#define DATA_MIN UINT8_MIN
++#define DATA_MAX UINT8_MAX
++
+ #define M_MIN INT2FIX(0)
+ #define M_MAX m_data_to_num(UINT8_MAX)
++
++#include "uint_macro.h"
+--
+2.11.0
+
diff --git a/debian/patches/0010-add-nary_eValueError.patch b/debian/patches/0010-add-nary_eValueError.patch
new file mode 100644
index 0000000..538dc9d
--- /dev/null
+++ b/debian/patches/0010-add-nary_eValueError.patch
@@ -0,0 +1,24 @@
+From ae3e2e1ee186e15c5a434feba4cf84ea4a2d0b43 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 8 Jun 2017 22:09:00 +0900
+Subject: [PATCH 10/12] add nary_eValueError
+
+---
+ ext/numo/narray/numo/narray.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/ext/numo/narray/numo/narray.h b/ext/numo/narray/numo/narray.h
+index 7f7ebf2..e8d67db 100644
+--- a/ext/numo/narray/numo/narray.h
++++ b/ext/numo/narray/numo/narray.h
+@@ -133,6 +133,7 @@ extern VALUE nary_eCastError;
+ extern VALUE nary_eShapeError;
+ extern VALUE nary_eOperationError;
+ extern VALUE nary_eDimensionError;
++extern VALUE nary_eValueError;
+ extern const rb_data_type_t na_data_type;
+
+ //EXTERN const int na_sizeof[NA_NTYPES+1];
+--
+2.11.0
+
diff --git a/debian/patches/0011-fix-sort_index-avoid-zero-division-when-n-1-38.patch b/debian/patches/0011-fix-sort_index-avoid-zero-division-when-n-1-38.patch
new file mode 100644
index 0000000..3a1a6d0
--- /dev/null
+++ b/debian/patches/0011-fix-sort_index-avoid-zero-division-when-n-1-38.patch
@@ -0,0 +1,37 @@
+From ec7513a46a8ed69e1eabf02a897944ced320bf5a Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Fri, 9 Jun 2017 19:56:40 +0900
+Subject: [PATCH 11/12] fix sort_index: avoid zero-division when n==1 (#38)
+
+---
+ ext/numo/narray/gen/tmpl/sort_index.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/ext/numo/narray/gen/tmpl/sort_index.c b/ext/numo/narray/gen/tmpl/sort_index.c
+index 39dc160..35687ff 100644
+--- a/ext/numo/narray/gen/tmpl/sort_index.c
++++ b/ext/numo/narray/gen/tmpl/sort_index.c
+@@ -18,6 +18,11 @@ static void
+
+ //printf("(ptr=%lx, d_ptr=%lx,d_step=%ld, i_ptr=%lx,i_step=%ld, o_ptr=%lx,o_step=%ld)\n",(size_t)ptr,(size_t)d_ptr,(ssize_t)d_step,(size_t)i_ptr,(ssize_t)i_step,(size_t)o_ptr,(ssize_t)o_step);
+
++ if (n==1) {
++ *(idx_t*)o_ptr = *(idx_t*)(i_ptr);
++ return;
++ }
++
+ for (i=0; i<n; i++) {
+ ptr[i] = d_ptr + d_step * i;
+ //printf("(%ld,%.3f)",i,*(double*)ptr[i]);
+@@ -94,7 +99,7 @@ static VALUE
+ }
+ rb_funcall(idx, rb_intern("seq"), 0);
+
+- size = na->size*sizeof(void*);
++ size = na->size*sizeof(void*); // max capa
+ buf = rb_alloc_tmp_buffer(&tmp, size);
+ res = na_ndloop3(&ndf, buf, 3, self, idx, reduce);
+ rb_free_tmp_buffer(&tmp);
+--
+2.11.0
+
diff --git a/debian/patches/0012-fix-bug-num-of-inputs-should-be-lp-nin-not-nf-nin.patch b/debian/patches/0012-fix-bug-num-of-inputs-should-be-lp-nin-not-nf-nin.patch
new file mode 100644
index 0000000..150e31a
--- /dev/null
+++ b/debian/patches/0012-fix-bug-num-of-inputs-should-be-lp-nin-not-nf-nin.patch
@@ -0,0 +1,25 @@
+From 761e7967df379d5c6a551f79db34e9bc36c999c6 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Fri, 9 Jun 2017 19:58:02 +0900
+Subject: [PATCH 12/12] fix bug: num of inputs should be lp->nin, not nf->nin.
+
+---
+ ext/numo/narray/ndloop.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/ext/numo/narray/ndloop.c b/ext/numo/narray/ndloop.c
+index 39c6ead..57ae075 100644
+--- a/ext/numo/narray/ndloop.c
++++ b/ext/numo/narray/ndloop.c
+@@ -397,7 +397,7 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
+ LARG(lp,j).ndim = 0;
+ lp->xargs[j].iter = &(iter[(max_nd+1)*j]);
+ lp->xargs[j].bufcp = NULL;
+- lp->xargs[j].flag = (j<nf->nin) ? NDL_READ : NDL_WRITE;
++ lp->xargs[j].flag = (j<lp->nin) ? NDL_READ : NDL_WRITE;
+ lp->xargs[j].free_user_iter = 0;
+ }
+
+--
+2.11.0
+
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 0000000..32f6065
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1,11 @@
+0002-load-numo-extra-in-rspec.patch
+0003-modify-loop-for-SIMD-optimization.patch
+0004-not_nan-macro-for-fast-NaN-check.patch
+0005-reduce-alloc-times.patch
+0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch
+0007-fix-bug-in-methods-min-max_index.patch
+0008-fix-cheking-negative-index.patch
+0009-new-error-Numo-NArray-ValueError.patch
+0010-add-nary_eValueError.patch
+0011-fix-sort_index-avoid-zero-division-when-n-1-38.patch
+0012-fix-bug-num-of-inputs-should-be-lp-nin-not-nf-nin.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ruby-extras/ruby-numo-narray.git
More information about the Pkg-ruby-extras-commits
mailing list