[DRE-commits] [ruby-numo-narray] 01/13: Import Git HEAD: 2017-06-09 761e796

Wed Jun 21 08:37:48 UTC 2017

This is an automated email from the git hooks/post-receive script.

uwabami-guest pushed a commit to branch patch-queue/master
in repository ruby-numo-narray.

commit 48638b1aa2c2bf6969094ac4a1b218b81766922d
Author: Youhei SASAKI <uwabami at gfd-dennou.org>
Date:   Wed Jun 21 17:05:40 2017 +0900

    Import Git HEAD: 2017-06-09 761e796
    
    Signed-off-by: Youhei SASAKI <uwabami at gfd-dennou.org>
---
 debian/patches/0002-load-numo-extra-in-rspec.patch |  24 ++
 .../0003-modify-loop-for-SIMD-optimization.patch   | 103 +++++
 .../0004-not_nan-macro-for-fast-NaN-check.patch    | 329 +++++++++++++++
 debian/patches/0005-reduce-alloc-times.patch       | 105 +++++
 ...ignment.-modify-loop-for-simd-optimizatio.patch | 450 +++++++++++++++++++++
 .../0007-fix-bug-in-methods-min-max_index.patch    |  34 ++
 .../patches/0008-fix-cheking-negative-index.patch  |  56 +++
 .../0009-new-error-Numo-NArray-ValueError.patch    | 242 +++++++++++
 debian/patches/0010-add-nary_eValueError.patch     |  24 ++
 ...ort_index-avoid-zero-division-when-n-1-38.patch |  37 ++
 ...num-of-inputs-should-be-lp-nin-not-nf-nin.patch |  25 ++
 debian/patches/series                              |  11 +
 12 files changed, 1440 insertions(+)

diff --git a/debian/patches/0002-load-numo-extra-in-rspec.patch b/debian/patches/0002-load-numo-extra-in-rspec.patch
new file mode 100644
index 0000000..27b6eba
--- /dev/null
+++ b/debian/patches/0002-load-numo-extra-in-rspec.patch
@@ -0,0 +1,24 @@
+From 3650f44660039f7a4f392dd8988a1806671404fc Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 1 Jun 2017 08:43:38 +0900
+Subject: [PATCH 02/12] load numo/extra in rspec
+
+---
+ spec/narray_spec.rb | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/spec/narray_spec.rb b/spec/narray_spec.rb
+index 6fec60e..237b165 100644
+--- a/spec/narray_spec.rb
++++ b/spec/narray_spec.rb
+@@ -1,4 +1,6 @@
+-require File.join(File.dirname(__FILE__), "../ext/numo/narray/narray")
++d=File.dirname(__FILE__)
++require File.join(d, "../ext/numo/narray/narray")
++require File.join(d, "../lib/numo/narray/extra")
+ #Numo::NArray.debug = true
+ 
+ RSpec.configure do |config|
+-- 
+2.11.0
+
diff --git a/debian/patches/0003-modify-loop-for-SIMD-optimization.patch b/debian/patches/0003-modify-loop-for-SIMD-optimization.patch
new file mode 100644
index 0000000..6f2c827
--- /dev/null
+++ b/debian/patches/0003-modify-loop-for-SIMD-optimization.patch
@@ -0,0 +1,103 @@
+From 5d0e677447f2d348c563d06788e65f49b41e4ce4 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 1 Jun 2017 09:29:02 +0900
+Subject: [PATCH 03/12] modify loop for SIMD optimization
+
+---
+ ext/numo/narray/gen/tmpl/binary.c | 62 ++++++++++++++++++++++++++++++++-------
+ 1 file changed, 52 insertions(+), 10 deletions(-)
+
+diff --git a/ext/numo/narray/gen/tmpl/binary.c b/ext/numo/narray/gen/tmpl/binary.c
+index 135f7b4..302635b 100644
+--- a/ext/numo/narray/gen/tmpl/binary.c
++++ b/ext/numo/narray/gen/tmpl/binary.c
+@@ -1,27 +1,68 @@
++<% if is_int and %w[div mod divmod].include? name %>
++#define check_intdivzero(y)              \
++    if ((y)==0) {                        \
++        lp->err_type = rb_eZeroDivError; \
++        return;                          \
++    }
++<% else %>
++#define check_intdivzero(y) {}
++<% end %>
++
+ static void
+ <%=c_iter%>(na_loop_t *const lp)
+ {
+     size_t   i, n;
+     char    *p1, *p2, *p3;
+     ssize_t  s1, s2, s3;
+-    dtype    x, y;
++
+     INIT_COUNTER(lp, n);
+     INIT_PTR(lp, 0, p1, s1);
+     INIT_PTR(lp, 1, p2, s2);
+     INIT_PTR(lp, 2, p3, s3);
+-    for (i=n; i--;) {
+-        GET_DATA_STRIDE(p1,s1,dtype,x);
+-        GET_DATA_STRIDE(p2,s2,dtype,y);
+-<% if is_int and %w[div mod divmod].include? name %>
+-        if (y==0) {
+-            lp->err_type = rb_eZeroDivError;
++
++<% if /Int8$/ !~ class_name %>
++    if ((size_t)p1 % sizeof(dtype) == 0 &&
++        (size_t)p2 % sizeof(dtype) == 0 &&
++        (size_t)p3 % sizeof(dtype) == 0 ) {
++<% end %>
++
++        if (s1 == sizeof(dtype) &&
++            s2 == sizeof(dtype) &&
++            s3 == sizeof(dtype) ) {
++
++            for (i=0; i<n; i++) {
++                check_intdivzero(*(dtype*)p2);
++                ((dtype*)p3)[i] = m_<%=name%>(((dtype*)p1)[i],((dtype*)p2)[i]);
++            }
++            return;
++        } else
++        if (s1 % sizeof(dtype) == 0 &&
++            s2 % sizeof(dtype) == 0 &&
++            s3 % sizeof(dtype) == 0 ) {
++
++            for (i=0; i<n; i++) {
++                check_intdivzero(*(dtype*)p2);
++                *(dtype*)p3 = m_<%=name%>(*(dtype*)p1,*(dtype*)p2);
++                p1 += s1;
++                p2 += s2;
++                p3 += s3;
++            }
+             return;
+         }
+-<% end %>
+-        x = m_<%=name%>(x,y);
+-        SET_DATA_STRIDE(p3,s3,dtype,x);
++
++<% if /Int8$/ !~ class_name %>
+     }
++    for (i=0; i<n; i+=2) {
++        dtype x, y, z;
++        GET_DATA_STRIDE(p1,s1,dtype,x);
++        GET_DATA_STRIDE(p2,s2,dtype,y);
++        check_intdivzero(y);
++        z = m_<%=name%>(x,y);
++        SET_DATA_STRIDE(p3,s3,dtype,z);
++    }
++<% end %>
+ }
++#undef check_intdivzero
+ 
+ static VALUE
+ <%=c_func%>_self(VALUE self, VALUE other)
+@@ -46,6 +87,7 @@ static VALUE
+     return <%=c_func%>_self(self, other);
+     <% else %>
+     VALUE klass, v;
++
+     klass = na_upcast(CLASS_OF(self),CLASS_OF(other));
+     if (klass==cT) {
+         return <%=c_func%>_self(self, other);
+-- 
+2.11.0
+
diff --git a/debian/patches/0004-not_nan-macro-for-fast-NaN-check.patch b/debian/patches/0004-not_nan-macro-for-fast-NaN-check.patch
new file mode 100644
index 0000000..f78963d
--- /dev/null
+++ b/debian/patches/0004-not_nan-macro-for-fast-NaN-check.patch
@@ -0,0 +1,329 @@
+From 996851fd3ce3bca1bd33edc0d41a3ce83be49090 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 1 Jun 2017 09:29:55 +0900
+Subject: [PATCH 04/12] not_nan macro for fast NaN check
+
+---
+ ext/numo/narray/numo/types/complex_macro.h | 40 +++++++++--------
+ ext/numo/narray/numo/types/real_accum.h    | 69 +++++++++++++++---------------
+ 2 files changed, 56 insertions(+), 53 deletions(-)
+
+diff --git a/ext/numo/narray/numo/types/complex_macro.h b/ext/numo/narray/numo/types/complex_macro.h
+index 7adadbb..19c52fd 100644
+--- a/ext/numo/narray/numo/types/complex_macro.h
++++ b/ext/numo/narray/numo/types/complex_macro.h
+@@ -117,26 +117,28 @@ static inline dtype c_from_dcomplex(dcomplex x) {
+ #define m_sum_init INT2FIX(0)
+ #define m_mulsum_init INT2FIX(0)
+ 
++#define not_nan(x) (REAL(x)==REAL(x) && IMAG(x)==IMAG(x))
++
+ #define m_mulsum(x,y,z) {z = m_add(m_mul(x,y),z);}
+-#define m_mulsum_nan(x,y,z) {            \
+-        if(!m_isnan(x) && !m_isnan(y)) { \
+-            z = m_add(m_mul(x,y),z);     \
++#define m_mulsum_nan(x,y,z) {          \
++        if(not_nan(x) && not_nan(y)) { \
++            z = m_add(m_mul(x,y),z);   \
+         }}
+ 
+ #define m_cumsum(x,y) {(x)=m_add(x,y);}
+-#define m_cumsum_nan(x,y) {       \
+-        if (m_isnan(x)) {         \
+-            (x) = (y);            \
+-        } else if (!m_isnan(y)) { \
+-            (x) = m_add(x,y);     \
++#define m_cumsum_nan(x,y) {      \
++        if (!not_nan(x)) {       \
++            (x) = (y);           \
++        } else if (not_nan(y)) { \
++            (x) = m_add(x,y);    \
+         }}
+ 
+ #define m_cumprod(x,y) {(x)=m_mul(x,y);}
+-#define m_cumprod_nan(x,y) {      \
+-        if (m_isnan(x)) {         \
+-            (x) = (y);            \
+-        } else if (!m_isnan(y)) { \
+-            (x) = m_mul(x,y);     \
++#define m_cumprod_nan(x,y) {     \
++        if (!not_nan(x)) {       \
++            (x) = (y);           \
++        } else if (not_nan(y)) { \
++            (x) = m_mul(x,y);    \
+         }}
+ 
+ static inline dtype f_sum(size_t n, char *p, ssize_t stride)
+@@ -161,7 +163,7 @@ static inline dtype f_sum_nan(size_t n, char *p, ssize_t stride)
+     y = c_zero();
+     for (; i--;) {
+         x = *(dtype*)p;
+-        if (!c_isnan(x)) {
++        if (not_nan(x)) {
+             y = c_add(x,y);
+         }
+         p += stride;
+@@ -205,7 +207,7 @@ static inline dtype f_kahan_sum_nan(size_t n, char *p, ssize_t stride)
+     r = c_zero();
+     for (; i--;) {
+         x = *(dtype*)p;
+-        if (!c_isnan(x)) {
++        if (not_nan(x)) {
+             if (fabs(REAL(x)) > fabs(REAL(y))) {
+                 double z=REAL(x); REAL(x)=REAL(y); REAL(y)=z;
+             }
+@@ -245,7 +247,7 @@ static inline dtype f_prod_nan(size_t n, char *p, ssize_t stride)
+     y = c_one();
+     for (; i--;) {
+         x = *(dtype*)p;
+-        if (!c_isnan(x)) {
++        if (not_nan(x)) {
+             y = c_mul(x,y);
+         }
+         p += stride;
+@@ -278,7 +280,7 @@ static inline dtype f_mean_nan(size_t n, char *p, ssize_t stride)
+     y = c_zero();
+     for (; i--;) {
+         x = *(dtype*)p;
+-        if (!c_isnan(x)) {
++        if (not_nan(x)) {
+             y = c_add(x,y);
+             count++;
+         }
+@@ -316,7 +318,7 @@ static inline rtype f_var_nan(size_t n, char *p, ssize_t stride)
+ 
+     for (; i--;) {
+         x = *(dtype*)p;
+-        if (!c_isnan(x)) {
++        if (not_nan(x)) {
+             y += c_abs_square(c_sub(x,m));
+             count++;
+         }
+@@ -360,7 +362,7 @@ static inline rtype f_rms_nan(size_t n, char *p, ssize_t stride)
+ 
+     for (; i--;) {
+         x = *(dtype*)p;
+-        if (!c_isnan(x)) {
++        if (not_nan(x)) {
+             y += c_abs_square(x);
+             count++;
+         }
+diff --git a/ext/numo/narray/numo/types/real_accum.h b/ext/numo/narray/numo/types/real_accum.h
+index d0f68f6..b22f453 100644
+--- a/ext/numo/narray/numo/types/real_accum.h
++++ b/ext/numo/narray/numo/types/real_accum.h
+@@ -1,24 +1,25 @@
++#define not_nan(x) ((x)==(x))
+ 
+ #define m_mulsum(x,y,z) {z = m_add(m_mul(x,y),z);}
+-#define m_mulsum_nan(x,y,z) {            \
+-        if(!m_isnan(x) && !m_isnan(y)) { \
+-            z = m_add(m_mul(x,y),z);     \
++#define m_mulsum_nan(x,y,z) {          \
++        if(not_nan(x) && not_nan(y)) { \
++            z = m_add(m_mul(x,y),z);   \
+         }}
+ 
+ #define m_cumsum(x,y) {(x)=m_add(x,y);}
+-#define m_cumsum_nan(x,y) {       \
+-        if (m_isnan(x)) {         \
+-            (x) = (y);            \
+-        } else if (!m_isnan(y)) { \
+-            (x) = m_add(x,y);     \
++#define m_cumsum_nan(x,y) {      \
++        if (!not_nan(x)) {       \
++            (x) = (y);           \
++        } else if (not_nan(y)) { \
++            (x) = m_add(x,y);    \
+         }}
+ 
+ #define m_cumprod(x,y) {(x)=m_mul(x,y);}
+-#define m_cumprod_nan(x,y) {      \
+-        if (m_isnan(x)) {         \
+-            (x) = (y);            \
+-        } else if (!m_isnan(y)) { \
+-            (x) = m_mul(x,y);     \
++#define m_cumprod_nan(x,y) {     \
++        if (!not_nan(x)) {       \
++            (x) = (y);           \
++        } else if (not_nan(y)) { \
++            (x) = m_mul(x,y);    \
+         }}
+ 
+ static inline dtype f_sum(size_t n, char *p, ssize_t stride)
+@@ -28,8 +29,8 @@ static inline dtype f_sum(size_t n, char *p, ssize_t stride)
+ 
+     for (; i--;) {
+         x = *(dtype*)p;
+-        p += stride;
+         y = m_add(x,y);
++        p += stride;
+     }
+     return y;
+ }
+@@ -42,7 +43,7 @@ static inline dtype f_sum_nan(size_t n, char *p, ssize_t stride)
+     for (; i--;) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(x)) {
++        if (not_nan(x)) {
+             y = m_add(x,y);
+         }
+     }
+@@ -71,7 +72,7 @@ static inline dtype f_prod_nan(size_t n, char *p, ssize_t stride)
+     for (; i--;) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(x)) {
++        if (not_nan(x)) {
+             y = m_mul(x,y);
+         }
+     }
+@@ -102,7 +103,7 @@ static inline dtype f_mean_nan(size_t n, char *p, ssize_t stride)
+     for (; i--;) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(x)) {
++        if (not_nan(x)) {
+             y = m_add(x,y);
+             count++;
+         }
+@@ -141,7 +142,7 @@ static inline dtype f_var_nan(size_t n, char *p, ssize_t stride)
+     for (; i--;) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(x)) {
++        if (not_nan(x)) {
+             a = m_abs(m_sub(x,m));
+             y = m_add(y,m_square(a));
+             count++;
+@@ -184,7 +185,7 @@ static inline dtype f_rms_nan(size_t n, char *p, ssize_t stride)
+     for (; i--;) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(x)) {
++        if (not_nan(x)) {
+             y = m_add(y,m_square(m_abs(x)));
+             count++;
+         }
+@@ -201,11 +202,11 @@ static inline dtype f_min_nan(size_t n, char *p, ssize_t stride)
+ 
+     y = *(dtype*)p;
+     p += stride;
+-    if (m_isnan(y)) {return y;}
++    if (!not_nan(y)) {return y;}
+     for (i--; i--;) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (m_isnan(x)) {return x;}
++        if (!not_nan(x)) {return x;}
+         if (m_lt(x,y)) {
+             y = x;
+         }
+@@ -221,7 +222,7 @@ static inline dtype f_min(size_t n, char *p, ssize_t stride)
+     for (; i--; ) {
+         y = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(y)) {
++        if (not_nan(y)) {
+             for (; i--;) {
+                 x = *(dtype*)p;
+                 p += stride;
+@@ -242,11 +243,11 @@ static inline dtype f_max_nan(size_t n, char *p, ssize_t stride)
+ 
+     y = *(dtype*)p;
+     p += stride;
+-    if (m_isnan(y)) {return y;}
++    if (!not_nan(y)) {return y;}
+     for (i--; i--;) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (m_isnan(x)) {return x;}
++        if (!not_nan(x)) {return x;}
+         if (m_gt(x,y)) {
+             y = x;
+         }
+@@ -262,7 +263,7 @@ static inline dtype f_max(size_t n, char *p, ssize_t stride)
+     for (; i--; ) {
+         y = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(y)) {
++        if (not_nan(y)) {
+             for (; i--;) {
+                 x = *(dtype*)p;
+                 p += stride;
+@@ -283,11 +284,11 @@ static inline size_t f_min_index_nan(size_t n, char *p, ssize_t stride)
+ 
+     y = *(dtype*)p;
+     p += stride;
+-    if (m_isnan(y)) {return j;}
++    if (!not_nan(y)) {return j;}
+     for (i=1; i<n; i++) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (m_isnan(x)) {return i;}
++        if (!not_nan(x)) {return i;}
+         if (m_lt(x,y)) {
+             y = x;
+             j = i;
+@@ -304,7 +305,7 @@ static inline size_t f_min_index(size_t n, char *p, ssize_t stride)
+     for (i=0; i<n; i++) {
+         y = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(y)) {
++        if (not_nan(y)) {
+             j = i;
+             for (; i<n; i++) {
+                 x = *(dtype*)p;
+@@ -327,11 +328,11 @@ static inline size_t f_max_index_nan(size_t n, char *p, ssize_t stride)
+ 
+     y = *(dtype*)p;
+     p += stride;
+-    if (m_isnan(y)) {return j;}
++    if (!not_nan(y)) {return j;}
+     for (i=1; i<n; i++) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (m_isnan(x)) {return i;}
++        if (!not_nan(x)) {return i;}
+         if (m_gt(x,y)) {
+             y = x;
+             j = i;
+@@ -348,7 +349,7 @@ static inline size_t f_max_index(size_t n, char *p, ssize_t stride)
+     for (i=0; i<n; i++) {
+         y = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(y)) {
++        if (not_nan(y)) {
+             j = i;
+             for (; i<n; i++) {
+                 x = *(dtype*)p;
+@@ -372,14 +373,14 @@ f_minmax_nan(size_t n, char *p, ssize_t stride, dtype *amin, dtype *amax)
+ 
+     min = max = *(dtype*)p;
+     p += stride;
+-    if (m_isnan(min)) {
++    if (!not_nan(min)) {
+         *amin = *amax = min;
+         return;
+     }
+     for (i--; i--;) {
+         x = *(dtype*)p;
+         p += stride;
+-        if (m_isnan(x)) {
++        if (!not_nan(x)) {
+             *amin = *amax = x;
+             return;
+         }
+@@ -412,7 +413,7 @@ f_minmax(size_t n, char *p, ssize_t stride, dtype *amin, dtype *amax)
+     for (; i--; ) {
+         min = *(dtype*)p;
+         p += stride;
+-        if (!m_isnan(min)) {
++        if (not_nan(min)) {
+             max = min;
+             for (; i--;) {
+                 x = *(dtype*)p;
+-- 
+2.11.0
+
diff --git a/debian/patches/0005-reduce-alloc-times.patch b/debian/patches/0005-reduce-alloc-times.patch
new file mode 100644
index 0000000..bfdd8a2
--- /dev/null
+++ b/debian/patches/0005-reduce-alloc-times.patch
@@ -0,0 +1,105 @@
+From 48bb77db5680f940984a166013cd70b19110859d Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Fri, 2 Jun 2017 15:12:27 +0900
+Subject: [PATCH 05/12] reduce alloc times
+
+---
+ ext/numo/narray/ndloop.c | 41 +++++++++++++++++++++--------------------
+ 1 file changed, 21 insertions(+), 20 deletions(-)
+
+diff --git a/ext/numo/narray/ndloop.c b/ext/numo/narray/ndloop.c
+index e431a7f..39c6ead 100644
+--- a/ext/numo/narray/ndloop.c
++++ b/ext/numo/narray/ndloop.c
+@@ -43,7 +43,7 @@ typedef struct NA_MD_LOOP {
+     int  nin;
+     int  ndim;                // n of total dimention
+     unsigned int copy_flag;   // set i-th bit if i-th arg is cast
+-    size_t  *n_ptr;           // memory for n
++    void    *ptr;             // memory for n
+     na_loop_iter_t *iter_ptr; // memory for iter
+     size_t  *n;               // n of elements for each dim
+     na_loop_t  user;          // loop in user function
+@@ -337,6 +337,9 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
+     int narg;
+     int max_nd;
+ 
++    char *buf;
++    size_t n1, n2, n3, n4, n5;
++
+     long args_len;
+ 
+     na_loop_iter_t *iter;
+@@ -364,23 +367,28 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
+     lp->writeback = -1;
+     lp->init_aidx = -1;
+ 
+-    lp->n = NULL;
+-    lp->n_ptr = NULL;
+-    lp->xargs = NULL;
+-    lp->user.args = NULL;
++    lp->ptr = NULL;
+     lp->user.n = NULL;
+-    lp->iter_ptr = NULL;
+-    lp->trans_map = NULL;
+ 
+     ndloop_find_max_dimension(lp, nf, args);
+     narg = lp->nin + nf->nout;
+     max_nd = lp->ndim + lp->user.ndim;
+ 
+-    lp->n    = lp->n_ptr = ALLOC_N(size_t, max_nd+1);
+-    lp->xargs = ALLOC_N(na_loop_xargs_t, narg);
+-    lp->user.args = ALLOC_N(na_loop_args_t, narg);
+-    iter = ALLOC_N(na_loop_iter_t, narg*(max_nd+1));
+-    lp->iter_ptr = iter;
++    n1 = sizeof(size_t)*(max_nd+1);
++    n2 = sizeof(na_loop_xargs_t)*narg;
++    n2 = ((n2-1)/8+1)*8;
++    n3 = sizeof(na_loop_args_t)*narg;
++    n3 = ((n3-1)/8+1)*8;
++    n4 = sizeof(na_loop_iter_t)*narg*(max_nd+1);
++    n4 = ((n4-1)/8+1)*8;
++    n5 = sizeof(int)*(max_nd+1);
++
++    lp->ptr = buf = (char*)xmalloc(n1+n2+n3+n4+n5);
++    lp->n = (size_t*)buf; buf+=n1;
++    lp->xargs = (na_loop_xargs_t*)buf; buf+=n2;
++    lp->user.args = (na_loop_args_t*)buf; buf+=n3;
++    lp->iter_ptr = iter = (na_loop_iter_t*)buf; buf+=n4;
++    lp->trans_map = (int*)buf;
+ 
+     for (j=0; j<narg; j++) {
+         LARG(lp,j).value = Qnil;
+@@ -406,7 +414,6 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
+     //              array          loop
+     //           [*,+,*,+,*] => [*,*,*,+,+]
+     // trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
+-    lp->trans_map = ALLOC_N(int, max_nd+1);
+     if (NDF_TEST(nf,NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
+         trans_dim = 0;
+         for (i=0; i<max_nd; i++) {
+@@ -450,7 +457,6 @@ ndloop_release(VALUE vlp)
+             na_release_lock(v);
+         }
+     }
+-    //xfree(lp);
+     for (j=0; j<lp->narg; j++) {
+         //printf("lp->xargs[%d].bufcp=%lx\n",j,(size_t)(lp->xargs[j].bufcp));
+         if (lp->xargs[j].bufcp) {
+@@ -463,12 +469,7 @@ ndloop_release(VALUE vlp)
+             }
+         }
+     }
+-    if (lp->trans_map) xfree(lp->trans_map);
+-    xfree(lp->xargs);
+-    xfree(lp->iter_ptr);
+-    xfree(lp->user.args);
+-    xfree(lp->n_ptr);
+-    //rb_gc_force_recycle(vlp);
++    xfree(lp->ptr);
+     return Qnil;
+ }
+ 
+-- 
+2.11.0
+
diff --git a/debian/patches/0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch b/debian/patches/0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch
new file mode 100644
index 0000000..df5d60e
--- /dev/null
+++ b/debian/patches/0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch
@@ -0,0 +1,450 @@
+From b06d7394bd90ac271de05e3f42213a0daf653204 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Fri, 2 Jun 2017 15:14:16 +0900
+Subject: [PATCH 06/12] consider alignment. modify loop for simd optimization
+
+---
+ ext/numo/narray/gen/def/bit.rb          |  1 +
+ ext/numo/narray/gen/def/dcomplex.rb     |  1 +
+ ext/numo/narray/gen/def/dfloat.rb       |  1 +
+ ext/numo/narray/gen/def/int16.rb        |  1 +
+ ext/numo/narray/gen/def/int32.rb        |  1 +
+ ext/numo/narray/gen/def/int64.rb        |  1 +
+ ext/numo/narray/gen/def/int8.rb         |  1 +
+ ext/numo/narray/gen/def/robject.rb      |  1 +
+ ext/numo/narray/gen/def/scomplex.rb     |  1 +
+ ext/numo/narray/gen/def/sfloat.rb       |  1 +
+ ext/numo/narray/gen/def/uint16.rb       |  1 +
+ ext/numo/narray/gen/def/uint32.rb       |  1 +
+ ext/numo/narray/gen/def/uint64.rb       |  1 +
+ ext/numo/narray/gen/def/uint8.rb        |  1 +
+ ext/numo/narray/gen/tmpl/accum_binary.c | 34 ++++++++++++++++++--------------
+ ext/numo/narray/gen/tmpl/binary.c       | 26 +++++++++++-------------
+ ext/numo/narray/gen/tmpl/unary.c        | 35 +++++++++++++++++++++++++++------
+ ext/numo/narray/numo/template.h         | 15 +++++++++++++-
+ 18 files changed, 88 insertions(+), 36 deletions(-)
+
+diff --git a/ext/numo/narray/gen/def/bit.rb b/ext/numo/narray/gen/def/bit.rb
+index 9173546..c14aeee 100644
+--- a/ext/numo/narray/gen/def/bit.rb
++++ b/ext/numo/narray/gen/def/bit.rb
+@@ -16,6 +16,7 @@ set is_object:     false
+ set is_real:       false
+ set is_comparable: false
+ set is_double_precision: false
++set need_align:    false
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/dcomplex.rb b/ext/numo/narray/gen/def/dcomplex.rb
+index 7ced724..6282faf 100644
+--- a/ext/numo/narray/gen/def/dcomplex.rb
++++ b/ext/numo/narray/gen/def/dcomplex.rb
+@@ -18,6 +18,7 @@ set is_complex:          true
+ set is_object:           false
+ set is_comparable:       false
+ set is_double_precision: true
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/dfloat.rb b/ext/numo/narray/gen/def/dfloat.rb
+index fcbe812..a78a367 100644
+--- a/ext/numo/narray/gen/def/dfloat.rb
++++ b/ext/numo/narray/gen/def/dfloat.rb
+@@ -16,6 +16,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: true
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/int16.rb b/ext/numo/narray/gen/def/int16.rb
+index 99fae54..bbc1b7d 100644
+--- a/ext/numo/narray/gen/def/int16.rb
++++ b/ext/numo/narray/gen/def/int16.rb
+@@ -15,6 +15,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/int32.rb b/ext/numo/narray/gen/def/int32.rb
+index dc519b0..7ea2882 100644
+--- a/ext/numo/narray/gen/def/int32.rb
++++ b/ext/numo/narray/gen/def/int32.rb
+@@ -15,6 +15,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/int64.rb b/ext/numo/narray/gen/def/int64.rb
+index 221bad2..cded10c 100644
+--- a/ext/numo/narray/gen/def/int64.rb
++++ b/ext/numo/narray/gen/def/int64.rb
+@@ -15,6 +15,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/int8.rb b/ext/numo/narray/gen/def/int8.rb
+index 121a205..f171325 100644
+--- a/ext/numo/narray/gen/def/int8.rb
++++ b/ext/numo/narray/gen/def/int8.rb
+@@ -15,6 +15,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          false
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/robject.rb b/ext/numo/narray/gen/def/robject.rb
+index e0996ca..6067d33 100644
+--- a/ext/numo/narray/gen/def/robject.rb
++++ b/ext/numo/narray/gen/def/robject.rb
+@@ -17,6 +17,7 @@ set is_complex:          false
+ set is_object:           true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          false
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/scomplex.rb b/ext/numo/narray/gen/def/scomplex.rb
+index b0ceffb..63cf678 100644
+--- a/ext/numo/narray/gen/def/scomplex.rb
++++ b/ext/numo/narray/gen/def/scomplex.rb
+@@ -18,6 +18,7 @@ set is_complex:          true
+ set is_object:           false
+ set is_comparable:       false
+ set is_double_precision: false
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/sfloat.rb b/ext/numo/narray/gen/def/sfloat.rb
+index 47be181..cebc46f 100644
+--- a/ext/numo/narray/gen/def/sfloat.rb
++++ b/ext/numo/narray/gen/def/sfloat.rb
+@@ -16,6 +16,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float"
+diff --git a/ext/numo/narray/gen/def/uint16.rb b/ext/numo/narray/gen/def/uint16.rb
+index 83012c6..000014a 100644
+--- a/ext/numo/narray/gen/def/uint16.rb
++++ b/ext/numo/narray/gen/def/uint16.rb
+@@ -15,6 +15,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/uint32.rb b/ext/numo/narray/gen/def/uint32.rb
+index 1269c4e..6556e11 100644
+--- a/ext/numo/narray/gen/def/uint32.rb
++++ b/ext/numo/narray/gen/def/uint32.rb
+@@ -15,6 +15,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/uint64.rb b/ext/numo/narray/gen/def/uint64.rb
+index 4db9bef..8912e25 100644
+--- a/ext/numo/narray/gen/def/uint64.rb
++++ b/ext/numo/narray/gen/def/uint64.rb
+@@ -15,6 +15,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          true
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/def/uint8.rb b/ext/numo/narray/gen/def/uint8.rb
+index 39ed153..74b70f3 100644
+--- a/ext/numo/narray/gen/def/uint8.rb
++++ b/ext/numo/narray/gen/def/uint8.rb
+@@ -15,6 +15,7 @@ set is_object:           false
+ set is_real:             true
+ set is_comparable:       true
+ set is_double_precision: false
++set need_align:          false
+ 
+ upcast_rb "Integer"
+ upcast_rb "Float", "DFloat"
+diff --git a/ext/numo/narray/gen/tmpl/accum_binary.c b/ext/numo/narray/gen/tmpl/accum_binary.c
+index ed2579f..fa6d562 100644
+--- a/ext/numo/narray/gen/tmpl/accum_binary.c
++++ b/ext/numo/narray/gen/tmpl/accum_binary.c
+@@ -1,27 +1,31 @@
+-<% (is_float ? ["","_nan"] : [""]).each do |j| %>
++//<% (is_float ? ["","_nan"] : [""]).each do |j| %>
+ static void
+ <%=c_iter%><%=j%>(na_loop_t *const lp)
+ {
+-    size_t   i;
++    size_t   i, n;
+     char    *p1, *p2, *p3;
+     ssize_t  s1, s2, s3;
+-    dtype    x, y, z;
+ 
+-    INIT_COUNTER(lp, i);
++    INIT_COUNTER(lp, n);
+     INIT_PTR(lp, 0, p1, s1);
+     INIT_PTR(lp, 1, p2, s2);
+     INIT_PTR(lp, 2, p3, s3);
++
+     if (s3==0) {
++        dtype z;
+         // Reduce loop
+         GET_DATA(p3,dtype,z);
+-        for (; i--;) {
++        for (i=0; i<n; i++) {
++            dtype x, y;
+             GET_DATA_STRIDE(p1,s1,dtype,x);
+             GET_DATA_STRIDE(p2,s2,dtype,y);
+             m_<%=name%><%=j%>(x,y,z);
+         }
+         SET_DATA(p3,dtype,z);
++        return;
+     } else {
+-        for (; i--;) {
++        for (i=0; i<n; i++) {
++            dtype x, y, z;
+             GET_DATA_STRIDE(p1,s1,dtype,x);
+             GET_DATA_STRIDE(p2,s2,dtype,y);
+             GET_DATA(p3,dtype,z);
+@@ -30,7 +34,7 @@ static void
+         }
+     }
+ }
+-<% end %>
++//<% end %>
+ 
+ static VALUE
+ <%=c_func%>_self(int argc, VALUE *argv, VALUE self)
+@@ -47,11 +51,11 @@ static VALUE
+     // should fix below: [self.ndim,other.ndim].max or?
+     naryv[0] = self;
+     naryv[1] = argv[0];
+-  <% if is_float %>
++    //<% if is_float %>
+     reduce = na_reduce_dimension(argc-1, argv+1, 2, naryv, &ndf, <%=c_iter%>_nan);
+-  <% else %>
++    //<% else %>
+     reduce = na_reduce_dimension(argc-1, argv+1, 2, naryv, &ndf, 0);
+-  <% end %>
++    //<% end %>
+ 
+     v =  na_ndloop(&ndf, 4, self, argv[0], reduce, m_<%=name%>_init);
+     return <%=type_name%>_extract(v);
+@@ -76,15 +80,15 @@ static VALUE
+ static VALUE
+ <%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
+ {
+-    <% if !is_object %>
++    //<% if !is_object %>
+     VALUE klass, v;
+-    <% end %>
++    //<% end %>
+     if (argc < 1) {
+         rb_raise(rb_eArgError,"wrong number of arguments (%d for >=1)",argc);
+     }
+-    <% if is_object %>
++    //<% if is_object %>
+     return <%=c_func%>_self(argc, argv, self);
+-    <% else %>
++    //<% else %>
+     klass = na_upcast(CLASS_OF(self),CLASS_OF(argv[0]));
+     if (klass==cT) {
+         return <%=c_func%>_self(argc, argv, self);
+@@ -92,5 +96,5 @@ static VALUE
+         v = rb_funcall(klass, id_cast, 1, self);
+         return rb_funcall2(v, rb_intern("<%=name%>"), argc, argv);
+     }
+-    <% end %>
++    //<% end %>
+ }
+diff --git a/ext/numo/narray/gen/tmpl/binary.c b/ext/numo/narray/gen/tmpl/binary.c
+index 302635b..060b9c0 100644
+--- a/ext/numo/narray/gen/tmpl/binary.c
++++ b/ext/numo/narray/gen/tmpl/binary.c
+@@ -20,11 +20,10 @@ static void
+     INIT_PTR(lp, 1, p2, s2);
+     INIT_PTR(lp, 2, p3, s3);
+ 
+-<% if /Int8$/ !~ class_name %>
+-    if ((size_t)p1 % sizeof(dtype) == 0 &&
+-        (size_t)p2 % sizeof(dtype) == 0 &&
+-        (size_t)p3 % sizeof(dtype) == 0 ) {
+-<% end %>
++    //<% if need_align %>
++    if (is_aligned(p1,sizeof(dtype)) &&
++        is_aligned(p2,sizeof(dtype)) &&
++        is_aligned(p3,sizeof(dtype)) ) {
+ 
+         if (s1 == sizeof(dtype) &&
+             s2 == sizeof(dtype) &&
+@@ -35,11 +34,11 @@ static void
+                 ((dtype*)p3)[i] = m_<%=name%>(((dtype*)p1)[i],((dtype*)p2)[i]);
+             }
+             return;
+-        } else
+-        if (s1 % sizeof(dtype) == 0 &&
+-            s2 % sizeof(dtype) == 0 &&
+-            s3 % sizeof(dtype) == 0 ) {
+-
++        }
++        if (is_aligned_step(s1,sizeof(dtype)) &&
++            is_aligned_step(s2,sizeof(dtype)) &&
++            is_aligned_step(s3,sizeof(dtype)) ) {
++            //<% end %>
+             for (i=0; i<n; i++) {
+                 check_intdivzero(*(dtype*)p2);
+                 *(dtype*)p3 = m_<%=name%>(*(dtype*)p1,*(dtype*)p2);
+@@ -48,11 +47,10 @@ static void
+                 p3 += s3;
+             }
+             return;
++            //<% if need_align %>
+         }
+-
+-<% if /Int8$/ !~ class_name %>
+     }
+-    for (i=0; i<n; i+=2) {
++    for (i=0; i<n; i++) {
+         dtype x, y, z;
+         GET_DATA_STRIDE(p1,s1,dtype,x);
+         GET_DATA_STRIDE(p2,s2,dtype,y);
+@@ -60,7 +58,7 @@ static void
+         z = m_<%=name%>(x,y);
+         SET_DATA_STRIDE(p3,s3,dtype,z);
+     }
+-<% end %>
++    //<% end %>
+ }
+ #undef check_intdivzero
+ 
+diff --git a/ext/numo/narray/gen/tmpl/unary.c b/ext/numo/narray/gen/tmpl/unary.c
+index 382c638..0dcc94c 100644
+--- a/ext/numo/narray/gen/tmpl/unary.c
++++ b/ext/numo/narray/gen/tmpl/unary.c
+@@ -1,25 +1,25 @@
+ static void
+ <%=c_iter%>(na_loop_t *const lp)
+ {
+-    size_t  i;
++    size_t  i, n;
+     char   *p1, *p2;
+     ssize_t s1, s2;
+     size_t *idx1, *idx2;
+     dtype   x;
+ 
+-    INIT_COUNTER(lp, i);
++    INIT_COUNTER(lp, n);
+     INIT_PTR_IDX(lp, 0, p1, s1, idx1);
+     INIT_PTR_IDX(lp, 1, p2, s2, idx2);
+ 
+     if (idx1) {
+         if (idx2) {
+-            for (; i--;) {
++            for (i=0; i<n; i++) {
+                 GET_DATA_INDEX(p1,idx1,dtype,x);
+                 x = m_<%=name%>(x);
+                 SET_DATA_INDEX(p2,idx2,dtype,x);
+             }
+         } else {
+-            for (; i--;) {
++            for (i=0; i<n; i++) {
+                 GET_DATA_INDEX(p1,idx1,dtype,x);
+                 x = m_<%=name%>(x);
+                 SET_DATA_STRIDE(p2,s2,dtype,x);
+@@ -27,17 +27,40 @@ static void
+         }
+     } else {
+         if (idx2) {
+-            for (; i--;) {
++            for (i=0; i<n; i++) {
+                 GET_DATA_STRIDE(p1,s1,dtype,x);
+                 x = m_<%=name%>(x);
+                 SET_DATA_INDEX(p2,idx2,dtype,x);
+             }
+         } else {
+-            for (; i--;) {
++            //<% if need_align %>
++            if (is_aligned(p1,sizeof(dtype)) &&
++                is_aligned(p2,sizeof(dtype)) ) {
++                if (s1 == sizeof(dtype) &&
++                    s2 == sizeof(dtype) ) {
++                    for (i=0; i<n; i++) {
++                        ((dtype*)p2)[i] = m_<%=name%>(((dtype*)p1)[i]);
++                    }
++                    return;
++                }
++                if (is_aligned_step(s1,sizeof(dtype)) &&
++                    is_aligned_step(s2,sizeof(dtype)) ) {
++                    //<% end %>
++                    for (i=0; i<n; i++) {
++                        *(dtype*)p2 = m_<%=name%>(*(dtype*)p1);
++                        p1 += s1;
++                        p2 += s2;
++                    }
++                    return;
++                    //<% if need_align %>
++                }
++            }
++            for (i=0; i<n; i++) {
+                 GET_DATA_STRIDE(p1,s1,dtype,x);
+                 x = m_<%=name%>(x);
+                 SET_DATA_STRIDE(p2,s2,dtype,x);
+             }
++            //<% end %>
+         }
+     }
+ }
+diff --git a/ext/numo/narray/numo/template.h b/ext/numo/narray/numo/template.h
+index 2fd6e0e..1d69996 100644
+--- a/ext/numo/narray/numo/template.h
++++ b/ext/numo/narray/numo/template.h
+@@ -133,4 +133,17 @@
+     }
+ // val -> val&1 ??
+ 
+-#endif /* ifndef NARRAY_H */
++static inline int
++is_aligned(const void *ptr, const size_t alignment)
++{
++    return ((size_t)(ptr) & ((alignment)-1)) == 0;
++}
++
++static inline int
++is_aligned_step(const ssize_t step, const size_t alignment)
++{
++    return ((step) & ((alignment)-1)) == 0;
++}
++
++
++#endif /* ifndef TEMPLATE_H */
+-- 
+2.11.0
+
diff --git a/debian/patches/0007-fix-bug-in-methods-min-max_index.patch b/debian/patches/0007-fix-bug-in-methods-min-max_index.patch
new file mode 100644
index 0000000..f846b9c
--- /dev/null
+++ b/debian/patches/0007-fix-bug-in-methods-min-max_index.patch
@@ -0,0 +1,34 @@
+From abe5ea8aa92a18ea7c7eaec8e46e8b7738426ec4 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Sat, 3 Jun 2017 17:13:22 +0900
+Subject: [PATCH 07/12] fix bug in methods: min/max_index
+
+---
+ ext/numo/narray/numo/types/real_accum.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/ext/numo/narray/numo/types/real_accum.h b/ext/numo/narray/numo/types/real_accum.h
+index b22f453..8435075 100644
+--- a/ext/numo/narray/numo/types/real_accum.h
++++ b/ext/numo/narray/numo/types/real_accum.h
+@@ -306,7 +306,7 @@ static inline size_t f_min_index(size_t n, char *p, ssize_t stride)
+         y = *(dtype*)p;
+         p += stride;
+         if (not_nan(y)) {
+-            j = i;
++            j = i; i++;
+             for (; i<n; i++) {
+                 x = *(dtype*)p;
+                 p += stride;
+@@ -350,7 +350,7 @@ static inline size_t f_max_index(size_t n, char *p, ssize_t stride)
+         y = *(dtype*)p;
+         p += stride;
+         if (not_nan(y)) {
+-            j = i;
++            j = i; i++;
+             for (; i<n; i++) {
+                 x = *(dtype*)p;
+                 p += stride;
+-- 
+2.11.0
+
diff --git a/debian/patches/0008-fix-cheking-negative-index.patch b/debian/patches/0008-fix-cheking-negative-index.patch
new file mode 100644
index 0000000..b5ea7e1
--- /dev/null
+++ b/debian/patches/0008-fix-cheking-negative-index.patch
@@ -0,0 +1,56 @@
+From 18def25c4b241c987508a3f7570b7b5a67b651b4 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Mon, 5 Jun 2017 01:59:45 +0900
+Subject: [PATCH 08/12] fix cheking negative index
+
+---
+ ext/numo/narray/index.c | 25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+diff --git a/ext/numo/narray/index.c b/ext/numo/narray/index.c
+index 3a649b3..ae25845 100644
+--- a/ext/numo/narray/index.c
++++ b/ext/numo/narray/index.c
+@@ -173,26 +173,27 @@ static void
+ na_parse_range(VALUE range, ssize_t step, int orig_dim, ssize_t size, na_index_arg_t *q)
+ {
+     int n;
+-    ssize_t beg, end;
++    VALUE excl_end;
++    ssize_t beg, end, beg_orig, end_orig;
++    const char *dot = "..", *edot = "...";
+ 
+-    beg = NUM2LONG(rb_funcall(range,id_beg,0));
+-    if (beg<0) {
++    beg = beg_orig = NUM2SSIZET(rb_funcall(range,id_beg,0));
++    if (beg < 0) {
+         beg += size;
+     }
+-
+-    end = NUM2LONG(rb_funcall(range,id_end,0));
+-    if (end<0) {
++    end = end_orig = NUM2SSIZET(rb_funcall(range,id_end,0));
++    if (end < 0) {
+         end += size;
+     }
+-
+-    if (RTEST(rb_funcall(range,id_exclude_end,0))) {
++    excl_end = rb_funcall(range,id_exclude_end,0);
++    if (RTEST(excl_end)) {
+         end--;
++        dot = edot;
+     }
+-    if (beg < -size || beg >= size ||
+-        end < -size || end >= size) {
++    if (beg < 0 || beg >= size || end < 0 || end >= size) {
+         rb_raise(rb_eRangeError,
+-                 "beg=%"SZF"d,end=%"SZF"d is out of array size (%"SZF"d)",
+-                 beg, end, size);
++                 "%"SZF"d%s%"SZF"d is out of range for size=%"SZF"d",
++                 beg_orig, dot, end_orig, size);
+     }
+     n = (end-beg)/step+1;
+     if (n<0) n=0;
+-- 
+2.11.0
+
diff --git a/debian/patches/0009-new-error-Numo-NArray-ValueError.patch b/debian/patches/0009-new-error-Numo-NArray-ValueError.patch
new file mode 100644
index 0000000..5aa5dc9
--- /dev/null
+++ b/debian/patches/0009-new-error-Numo-NArray-ValueError.patch
@@ -0,0 +1,242 @@
+From bf18ff52eb031d4b7e3a6d73eb65b83d2dcb695f Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 8 Jun 2017 21:56:13 +0900
+Subject: [PATCH 09/12] new error: Numo::NArray::ValueError. raise error when
+ Numo::Int16[-32768].abs
+
+---
+ ext/numo/narray/narray.c               | 2 ++
+ ext/numo/narray/numo/types/int16.h     | 7 +++++--
+ ext/numo/narray/numo/types/int32.h     | 7 +++++--
+ ext/numo/narray/numo/types/int64.h     | 7 +++++--
+ ext/numo/narray/numo/types/int8.h      | 7 +++++--
+ ext/numo/narray/numo/types/int_macro.h | 8 +++++++-
+ ext/numo/narray/numo/types/uint16.h    | 7 +++++--
+ ext/numo/narray/numo/types/uint32.h    | 7 +++++--
+ ext/numo/narray/numo/types/uint64.h    | 7 +++++--
+ ext/numo/narray/numo/types/uint8.h     | 7 +++++--
+ 10 files changed, 49 insertions(+), 17 deletions(-)
+
+diff --git a/ext/numo/narray/narray.c b/ext/numo/narray/narray.c
+index ab38812..743cac2 100644
+--- a/ext/numo/narray/narray.c
++++ b/ext/numo/narray/narray.c
+@@ -14,6 +14,7 @@ VALUE nary_eCastError;
+ VALUE nary_eShapeError;
+ VALUE nary_eOperationError;
+ VALUE nary_eDimensionError;
++VALUE nary_eValueError;
+ 
+ static ID id_contiguous_stride;
+ static ID id_allocate;
+@@ -1851,6 +1852,7 @@ Init_narray()
+     nary_eShapeError = rb_define_class_under(cNArray, "ShapeError", rb_eStandardError);
+     nary_eOperationError = rb_define_class_under(cNArray, "OperationError", rb_eStandardError);
+     nary_eDimensionError = rb_define_class_under(cNArray, "DimensionError", rb_eStandardError);
++    nary_eValueError = rb_define_class_under(cNArray, "ValueError", rb_eStandardError);
+ 
+     rb_define_singleton_method(cNArray, "debug=", na_debug_set, 1);
+     rb_define_singleton_method(cNArray, "profile", na_profile, 0);
+diff --git a/ext/numo/narray/numo/types/int16.h b/ext/numo/narray/numo/types/int16.h
+index 9342f6a..0115b80 100644
+--- a/ext/numo/narray/numo/types/int16.h
++++ b/ext/numo/narray/numo/types/int16.h
+@@ -8,8 +8,6 @@ typedef int16_t rtype;
+ #define m_extract(x)     INT2NUM((int)*(dtype*)(x))
+ #define m_sprintf(s,x)   sprintf(s,"%d",(int)(x))
+ 
+-#include "int_macro.h"
+-
+ #ifndef INT16_MIN
+ #define INT16_MIN (-32767-1)
+ #endif
+@@ -17,5 +15,10 @@ typedef int16_t rtype;
+ #define INT16_MAX (32767)
+ #endif
+ 
++#define DATA_MIN INT16_MIN
++#define DATA_MAX INT16_MAX
++
+ #define M_MIN  m_data_to_num(INT16_MIN)
+ #define M_MAX  m_data_to_num(INT16_MAX)
++
++#include "int_macro.h"
+diff --git a/ext/numo/narray/numo/types/int32.h b/ext/numo/narray/numo/types/int32.h
+index 5d472d2..059dee4 100644
+--- a/ext/numo/narray/numo/types/int32.h
++++ b/ext/numo/narray/numo/types/int32.h
+@@ -8,8 +8,6 @@ typedef int32_t rtype;
+ #define m_extract(x)     INT322NUM((int32_t)*(dtype*)(x))
+ #define m_sprintf(s,x)   sprintf(s,"%"PRId32,(int32_t)(x))
+ 
+-#include "int_macro.h"
+-
+ #ifndef INT32_MIN
+ #define INT32_MIN (-2147483647-1)
+ #endif
+@@ -17,5 +15,10 @@ typedef int32_t rtype;
+ #define INT32_MAX (2147483647)
+ #endif
+ 
++#define DATA_MIN INT32_MIN
++#define DATA_MAX INT32_MAX
++
+ #define M_MIN  m_data_to_num(INT32_MIN)
+ #define M_MAX  m_data_to_num(INT32_MAX)
++
++#include "int_macro.h"
+diff --git a/ext/numo/narray/numo/types/int64.h b/ext/numo/narray/numo/types/int64.h
+index bfb9426..e3ed8ba 100644
+--- a/ext/numo/narray/numo/types/int64.h
++++ b/ext/numo/narray/numo/types/int64.h
+@@ -8,8 +8,6 @@ typedef int64_t rtype;
+ #define m_extract(x)     INT642NUM((int64_t)*(dtype*)(x))
+ #define m_sprintf(s,x)   sprintf(s,"%"PRId64,(int64_t)(x))
+ 
+-#include "int_macro.h"
+-
+ #ifndef INT64_MIN
+ #define INT64_MIN (-9223372036854775807l-1)
+ #endif
+@@ -17,5 +15,10 @@ typedef int64_t rtype;
+ #define INT64_MAX (9223372036854775807l)
+ #endif
+ 
++#define DATA_MIN INT64_MIN
++#define DATA_MAX INT64_MAX
++
+ #define M_MIN  m_data_to_num(INT64_MIN)
+ #define M_MAX  m_data_to_num(INT64_MAX)
++
++#include "int_macro.h"
+diff --git a/ext/numo/narray/numo/types/int8.h b/ext/numo/narray/numo/types/int8.h
+index 676d5e9..5c1f8ba 100644
+--- a/ext/numo/narray/numo/types/int8.h
++++ b/ext/numo/narray/numo/types/int8.h
+@@ -8,8 +8,6 @@ typedef int8_t rtype;
+ #define m_extract(x)     INT2NUM((int)*(dtype*)(x))
+ #define m_sprintf(s,x)   sprintf(s,"%d",(int)(x))
+ 
+-#include "int_macro.h"
+-
+ #ifndef INT8_MIN
+ #define INT8_MIN (-127-1)
+ #endif
+@@ -17,5 +15,10 @@ typedef int8_t rtype;
+ #define INT8_MAX (127)
+ #endif
+ 
++#define DATA_MIN INT8_MIN
++#define DATA_MAX INT8_MAX
++
+ #define M_MIN  INT2FIX(INT8_MIN)
+ #define M_MAX  INT2FIX(INT8_MAX)
++
++#include "int_macro.h"
+diff --git a/ext/numo/narray/numo/types/int_macro.h b/ext/numo/narray/numo/types/int_macro.h
+index d795426..e3b4dd8 100644
+--- a/ext/numo/narray/numo/types/int_macro.h
++++ b/ext/numo/narray/numo/types/int_macro.h
+@@ -1,8 +1,14 @@
+ #include "xint_macro.h"
+ 
+-#define m_abs(x)     ((x<0)?-x:x)
+ #define m_sign(x)    (((x)==0) ? 0 : (((x)>0) ? 1 : -1))
+ 
++static inline dtype m_abs(dtype x) {
++    if (x==DATA_MIN) {
++        rb_raise(nary_eValueError, "cannot convert the minimum integer");
++    }
++    return (x<0)?-x:x;
++}
++
+ static inline dtype int_reciprocal(dtype x) {
+     switch (x) {
+     case 1:
+diff --git a/ext/numo/narray/numo/types/uint16.h b/ext/numo/narray/numo/types/uint16.h
+index 880c861..c6623fb 100644
+--- a/ext/numo/narray/numo/types/uint16.h
++++ b/ext/numo/narray/numo/types/uint16.h
+@@ -8,11 +8,14 @@ typedef u_int16_t rtype;
+ #define m_extract(x)     UINT2NUM((unsigned int)*(dtype*)(x))
+ #define m_sprintf(s,x)   sprintf(s,"%u",(unsigned int)(x))
+ 
+-#include "uint_macro.h"
+-
+ #ifndef UINT16_MAX
+ #define UINT16_MAX (65535)
+ #endif
+ 
++#define DATA_MIN UINT16_MIN
++#define DATA_MAX UINT16_MAX
++
+ #define M_MIN  INT2FIX(0)
+ #define M_MAX  m_data_to_num(UINT16_MAX)
++
++#include "uint_macro.h"
+diff --git a/ext/numo/narray/numo/types/uint32.h b/ext/numo/narray/numo/types/uint32.h
+index 8435271..4f7b3c2 100644
+--- a/ext/numo/narray/numo/types/uint32.h
++++ b/ext/numo/narray/numo/types/uint32.h
+@@ -8,11 +8,14 @@ typedef u_int32_t rtype;
+ #define m_extract(x)     UINT322NUM((u_int32_t)*(dtype*)(x))
+ #define m_sprintf(s,x)   sprintf(s,"%"PRIu32,(u_int32_t)(x))
+ 
+-#include "uint_macro.h"
+-
+ #ifndef UINT32_MAX
+ #define UINT32_MAX (4294967295u)
+ #endif
+ 
++#define DATA_MIN UINT32_MIN
++#define DATA_MAX UINT32_MAX
++
+ #define M_MIN  INT2FIX(0)
+ #define M_MAX  m_data_to_num(UINT32_MAX)
++
++#include "uint_macro.h"
+diff --git a/ext/numo/narray/numo/types/uint64.h b/ext/numo/narray/numo/types/uint64.h
+index 0ad200e..655fb34 100644
+--- a/ext/numo/narray/numo/types/uint64.h
++++ b/ext/numo/narray/numo/types/uint64.h
+@@ -8,11 +8,14 @@ typedef u_int64_t rtype;
+ #define m_extract(x)     UINT642NUM((u_int64_t)*(dtype*)(x))
+ #define m_sprintf(s,x)   sprintf(s,"%"PRIu64,(u_int64_t)(x))
+ 
+-#include "uint_macro.h"
+-
+ #ifndef UINT64_MAX
+ #define UINT64_MAX (18446744073709551615ul)
+ #endif
+ 
++#define DATA_MIN UINT64_MIN
++#define DATA_MAX UINT64_MAX
++
+ #define M_MIN  INT2FIX(0)
+ #define M_MAX  m_data_to_num(UINT64_MAX)
++
++#include "uint_macro.h"
+diff --git a/ext/numo/narray/numo/types/uint8.h b/ext/numo/narray/numo/types/uint8.h
+index 4fe24e5..da573fc 100644
+--- a/ext/numo/narray/numo/types/uint8.h
++++ b/ext/numo/narray/numo/types/uint8.h
+@@ -8,11 +8,14 @@ typedef u_int8_t rtype;
+ #define m_extract(x)     UINT2NUM((unsigned int)*(dtype*)(x))
+ #define m_sprintf(s,x)   sprintf(s,"%u",(unsigned int)(x))
+ 
+-#include "uint_macro.h"
+-
+ #ifndef UINT8_MAX
+ #define UINT8_MAX (255)
+ #endif
+ 
++#define DATA_MIN UINT8_MIN
++#define DATA_MAX UINT8_MAX
++
+ #define M_MIN  INT2FIX(0)
+ #define M_MAX  m_data_to_num(UINT8_MAX)
++
++#include "uint_macro.h"
+-- 
+2.11.0
+
diff --git a/debian/patches/0010-add-nary_eValueError.patch b/debian/patches/0010-add-nary_eValueError.patch
new file mode 100644
index 0000000..538dc9d
--- /dev/null
+++ b/debian/patches/0010-add-nary_eValueError.patch
@@ -0,0 +1,24 @@
+From ae3e2e1ee186e15c5a434feba4cf84ea4a2d0b43 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Thu, 8 Jun 2017 22:09:00 +0900
+Subject: [PATCH 10/12] add nary_eValueError
+
+---
+ ext/numo/narray/numo/narray.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/ext/numo/narray/numo/narray.h b/ext/numo/narray/numo/narray.h
+index 7f7ebf2..e8d67db 100644
+--- a/ext/numo/narray/numo/narray.h
++++ b/ext/numo/narray/numo/narray.h
+@@ -133,6 +133,7 @@ extern VALUE nary_eCastError;
+ extern VALUE nary_eShapeError;
+ extern VALUE nary_eOperationError;
+ extern VALUE nary_eDimensionError;
++extern VALUE nary_eValueError;
+ extern const rb_data_type_t na_data_type;
+ 
+ //EXTERN const int na_sizeof[NA_NTYPES+1];
+-- 
+2.11.0
+
diff --git a/debian/patches/0011-fix-sort_index-avoid-zero-division-when-n-1-38.patch b/debian/patches/0011-fix-sort_index-avoid-zero-division-when-n-1-38.patch
new file mode 100644
index 0000000..3a1a6d0
--- /dev/null
+++ b/debian/patches/0011-fix-sort_index-avoid-zero-division-when-n-1-38.patch
@@ -0,0 +1,37 @@
+From ec7513a46a8ed69e1eabf02a897944ced320bf5a Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Fri, 9 Jun 2017 19:56:40 +0900
+Subject: [PATCH 11/12] fix sort_index: avoid zero-division when n==1 (#38)
+
+---
+ ext/numo/narray/gen/tmpl/sort_index.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/ext/numo/narray/gen/tmpl/sort_index.c b/ext/numo/narray/gen/tmpl/sort_index.c
+index 39dc160..35687ff 100644
+--- a/ext/numo/narray/gen/tmpl/sort_index.c
++++ b/ext/numo/narray/gen/tmpl/sort_index.c
+@@ -18,6 +18,11 @@ static void
+ 
+     //printf("(ptr=%lx, d_ptr=%lx,d_step=%ld, i_ptr=%lx,i_step=%ld, o_ptr=%lx,o_step=%ld)\n",(size_t)ptr,(size_t)d_ptr,(ssize_t)d_step,(size_t)i_ptr,(ssize_t)i_step,(size_t)o_ptr,(ssize_t)o_step);
+ 
++    if (n==1) {
++        *(idx_t*)o_ptr = *(idx_t*)(i_ptr);
++        return;
++    }
++
+     for (i=0; i<n; i++) {
+         ptr[i] = d_ptr + d_step * i;
+         //printf("(%ld,%.3f)",i,*(double*)ptr[i]);
+@@ -94,7 +99,7 @@ static VALUE
+     }
+     rb_funcall(idx, rb_intern("seq"), 0);
+ 
+-    size = na->size*sizeof(void*);
++    size = na->size*sizeof(void*); // max capa
+     buf = rb_alloc_tmp_buffer(&tmp, size);
+     res = na_ndloop3(&ndf, buf, 3, self, idx, reduce);
+     rb_free_tmp_buffer(&tmp);
+-- 
+2.11.0
+
diff --git a/debian/patches/0012-fix-bug-num-of-inputs-should-be-lp-nin-not-nf-nin.patch b/debian/patches/0012-fix-bug-num-of-inputs-should-be-lp-nin-not-nf-nin.patch
new file mode 100644
index 0000000..150e31a
--- /dev/null
+++ b/debian/patches/0012-fix-bug-num-of-inputs-should-be-lp-nin-not-nf-nin.patch
@@ -0,0 +1,25 @@
+From 761e7967df379d5c6a551f79db34e9bc36c999c6 Mon Sep 17 00:00:00 2001
+From: Masahiro TANAKA <masa16.tanaka at gmail.com>
+Date: Fri, 9 Jun 2017 19:58:02 +0900
+Subject: [PATCH 12/12] fix bug: num of inputs should be lp->nin, not nf->nin.
+
+---
+ ext/numo/narray/ndloop.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/ext/numo/narray/ndloop.c b/ext/numo/narray/ndloop.c
+index 39c6ead..57ae075 100644
+--- a/ext/numo/narray/ndloop.c
++++ b/ext/numo/narray/ndloop.c
+@@ -397,7 +397,7 @@ ndloop_alloc(na_md_loop_t *lp, ndfunc_t *nf, VALUE args,
+         LARG(lp,j).ndim = 0;
+         lp->xargs[j].iter = &(iter[(max_nd+1)*j]);
+         lp->xargs[j].bufcp = NULL;
+-        lp->xargs[j].flag = (j<nf->nin) ? NDL_READ : NDL_WRITE;
++        lp->xargs[j].flag = (j<lp->nin) ? NDL_READ : NDL_WRITE;
+         lp->xargs[j].free_user_iter = 0;
+     }
+ 
+-- 
+2.11.0
+
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 0000000..32f6065
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1,11 @@
+0002-load-numo-extra-in-rspec.patch
+0003-modify-loop-for-SIMD-optimization.patch
+0004-not_nan-macro-for-fast-NaN-check.patch
+0005-reduce-alloc-times.patch
+0006-consider-alignment.-modify-loop-for-simd-optimizatio.patch
+0007-fix-bug-in-methods-min-max_index.patch
+0008-fix-cheking-negative-index.patch
+0009-new-error-Numo-NArray-ValueError.patch
+0010-add-nary_eValueError.patch
+0011-fix-sort_index-avoid-zero-division-when-n-1-38.patch
+0012-fix-bug-num-of-inputs-should-be-lp-nin-not-nf-nin.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ruby-extras/ruby-numo-narray.git