r45498 - in /packages/openblas/trunk/debian: changelog patches/32bit_athlon.diff patches/gemv_crash_big_data.diff patches/series patches/sgemv_unitialized_buffer.diff
sebastien at users.alioth.debian.org
sebastien at users.alioth.debian.org
Sat Jan 5 14:07:33 UTC 2013
Author: sebastien
Date: Sat Jan 5 14:07:32 2013
New Revision: 45498
URL: http://svn.debian.org/wsvn/debian-science/?sc=1&rev=45498
Log:
Backport fixes for #696000, #697231 and #697233
Added:
packages/openblas/trunk/debian/patches/32bit_athlon.diff
packages/openblas/trunk/debian/patches/gemv_crash_big_data.diff
packages/openblas/trunk/debian/patches/sgemv_unitialized_buffer.diff
Modified:
packages/openblas/trunk/debian/changelog
packages/openblas/trunk/debian/patches/series
Modified: packages/openblas/trunk/debian/changelog
URL: http://svn.debian.org/wsvn/debian-science/packages/openblas/trunk/debian/changelog?rev=45498&op=diff
==============================================================================
--- packages/openblas/trunk/debian/changelog (original)
+++ packages/openblas/trunk/debian/changelog Sat Jan 5 14:07:32 2013
@@ -1,3 +1,14 @@
+openblas (0.1.1-7) unstable; urgency=low
+
+ * sgemv_unitialized_buffer.diff: new patch taken from upstream, ensures that
+ vectorized sgemv does not use unitialized data (Closes: #696000)
+ * gemv_crash_big_data.diff: new patch taken from upstream, fixes crashes of
+ gemv on big input data (Closes: #697231)
+ * 32bit_athlon.diff: new patch taken from upstream, fixes crashes on 32-bit
+ Athlon CPUs (Closes: #697233)
+
+ -- Sébastien Villemot <sebastien at debian.org> Sat, 05 Jan 2013 14:13:23 +0100
+
openblas (0.1.1-6) unstable; urgency=low
* kill_threads_at_unload.diff: new patch, taken upstream (Closes: #673061)
Added: packages/openblas/trunk/debian/patches/32bit_athlon.diff
URL: http://svn.debian.org/wsvn/debian-science/packages/openblas/trunk/debian/patches/32bit_athlon.diff?rev=45498&op=file
==============================================================================
--- packages/openblas/trunk/debian/patches/32bit_athlon.diff (added)
+++ packages/openblas/trunk/debian/patches/32bit_athlon.diff Sat Jan 5 14:07:32 2013
@@ -1,0 +1,21 @@
+Description: Fix crash on 32-bit Athlon CPU
+Origin: upstream, https://github.com/xianyi/OpenBLAS/commit/9fb341a9f8d94e4d532d51b1216d92e74a67a569
+Bug-Debian: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=697233
+Last-Update: 2013-01-04
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- a/kernel/setparam-ref.c
++++ b/kernel/setparam-ref.c
+@@ -634,10 +634,10 @@ static void init_parameter(void) {
+ TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
+ #endif
+
+-#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH)
++#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)
+
+ #ifdef DEBUG
+- fprintf(stderr, "Katmai, Coppermine, Banias\n");
++ fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
+ #endif
+
+ TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
Added: packages/openblas/trunk/debian/patches/gemv_crash_big_data.diff
URL: http://svn.debian.org/wsvn/debian-science/packages/openblas/trunk/debian/patches/gemv_crash_big_data.diff?rev=45498&op=file
==============================================================================
--- packages/openblas/trunk/debian/patches/gemv_crash_big_data.diff (added)
+++ packages/openblas/trunk/debian/patches/gemv_crash_big_data.diff Sat Jan 5 14:07:32 2013
@@ -1,0 +1,680 @@
+Description: Fix crashes of gemv on big input data
+Origin: upstream,
+ https://github.com/xianyi/OpenBLAS/commit/fd3046b32a1f7049fcb2bfb255d72e4204e5522e
+ https://github.com/xianyi/OpenBLAS/commit/0d1518add98bc3c0e83887be74cda3b23c8937ee
+ https://github.com/xianyi/OpenBLAS/commit/69200884e13e98b79487cfd1c78faf054278ec2f
+ https://github.com/xianyi/OpenBLAS/commit/5f0117385e1d4f986ad75fa66b873b014a7792c2
+ https://github.com/xianyi/OpenBLAS/commit/cea1a885b5cd38bea67feb6437ef0c3622a96c58
+Bug: https://github.com/xianyi/OpenBLAS/issues/154
+ https://github.com/xianyi/OpenBLAS/issues/173
+Bug-Debian: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=697231
+Last-Update: 2013-01-04
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- a/kernel/x86/gemv_t_sse.S
++++ b/kernel/x86/gemv_t_sse.S
+@@ -89,17 +89,24 @@
+ #endif
+
+ #define STACKSIZE 16
++#define ARGS 20
+
+-#define M 4 + STACKSIZE(%esp)
+-#define N 8 + STACKSIZE(%esp)
+-#define ALPHA 16 + STACKSIZE(%esp)
+-#define A 20 + STACKSIZE(%esp)
+-#define STACK_LDA 24 + STACKSIZE(%esp)
+-#define STACK_X 28 + STACKSIZE(%esp)
+-#define STACK_INCX 32 + STACKSIZE(%esp)
+-#define Y 36 + STACKSIZE(%esp)
+-#define STACK_INCY 40 + STACKSIZE(%esp)
+-#define BUFFER 44 + STACKSIZE(%esp)
++#define M 4 + STACKSIZE+ARGS(%esp)
++#define N 8 + STACKSIZE+ARGS(%esp)
++#define ALPHA 16 + STACKSIZE+ARGS(%esp)
++#define A 20 + STACKSIZE+ARGS(%esp)
++#define STACK_LDA 24 + STACKSIZE+ARGS(%esp)
++#define STACK_X 28 + STACKSIZE+ARGS(%esp)
++#define STACK_INCX 32 + STACKSIZE+ARGS(%esp)
++#define Y 36 + STACKSIZE+ARGS(%esp)
++#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
++#define BUFFER 44 + STACKSIZE+ARGS(%esp)
++
++#define MMM 0+STACKSIZE(%esp)
++#define NN 4+STACKSIZE(%esp)
++#define AA 8+STACKSIZE(%esp)
++#define LDAX 12+STACKSIZE(%esp)
++#define XX 16+STACKSIZE(%esp)
+
+ #define I %eax
+ #define J %ebx
+@@ -114,6 +121,7 @@
+
+ PROLOGUE
+
++ subl $ARGS,%esp
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+@@ -122,7 +130,40 @@
+ PROFCODE
+
+ movl STACK_LDA, LDA
++ movl LDA,LDAX # backup LDA
+ movl STACK_X, X
++ movl X,XX
++ movl N,J
++ movl J,NN # backup N
++ movl A,J
++ movl J,AA # backup A
++ movl M,J
++ movl J,MMM # mov M to MMM
++.L0t:
++ xorl J,J
++ addl $1,J
++ sall $21,J # J=2^22
++ subl J,MMM # MMM=MMM-J
++ movl J,M
++ jge .L00t
++ ALIGN_4
++
++ movl MMM,%eax
++ addl J,%eax
++ jle .L999x
++ movl %eax,M
++
++.L00t:
++ movl AA,%eax
++ movl %eax,A # mov AA to A
++
++ movl NN,%eax
++ movl %eax,N # reset N
++
++
++ movl LDAX, LDA # reset LDA
++ movl XX,X
++
+ movl STACK_INCX, INCX
+ movl STACK_INCY, INCY
+
+@@ -642,10 +683,22 @@
+ ALIGN_4
+
+ .L999:
++ movl M,J
++ leal (,J,SIZE),%eax
++ addl %eax,AA
++ movl XX,J
++ addl %eax,J
++ movl J,XX
++ jmp .L0t
++ ALIGN_4
++
++.L999x:
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
++
++ addl $ARGS,%esp
+ ret
+
+ EPILOGUE
+--- a/kernel/x86/gemv_t_sse2.S
++++ b/kernel/x86/gemv_t_sse2.S
+@@ -76,18 +76,24 @@
+ #endif
+
+ #define STACKSIZE 16
++#define ARGS 16
++
++#define M 4 + STACKSIZE+ARGS(%esp)
++#define N 8 + STACKSIZE+ARGS(%esp)
++#define ALPHA 16 + STACKSIZE+ARGS(%esp)
++#define A 24 + STACKSIZE+ARGS(%esp)
++#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
++#define STACK_X 32 + STACKSIZE+ARGS(%esp)
++#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
++#define Y 40 + STACKSIZE+ARGS(%esp)
++#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
++#define BUFFER 48 + STACKSIZE+ARGS(%esp)
++
++#define MMM 0+STACKSIZE(%esp)
++#define AA 4+STACKSIZE(%esp)
++#define LDAX 8+STACKSIZE(%esp)
++#define NN 12+STACKSIZE(%esp)
+
+-#define M 4 + STACKSIZE(%esp)
+-#define N 8 + STACKSIZE(%esp)
+-#define ALPHA 16 + STACKSIZE(%esp)
+-#define A 24 + STACKSIZE(%esp)
+-#define STACK_LDA 28 + STACKSIZE(%esp)
+-#define STACK_X 32 + STACKSIZE(%esp)
+-#define STACK_INCX 36 + STACKSIZE(%esp)
+-#define Y 40 + STACKSIZE(%esp)
+-#define STACK_INCY 44 + STACKSIZE(%esp)
+-#define BUFFER 48 + STACKSIZE(%esp)
+-
+ #define I %eax
+ #define J %ebx
+
+@@ -101,6 +107,8 @@
+
+ PROLOGUE
+
++ subl $ARGS,%esp
++
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+@@ -108,7 +116,38 @@
+
+ PROFCODE
+
++
+ movl STACK_LDA, LDA
++ movl LDA,LDAX # backup LDA
++ movl N,J
++ movl J,NN # backup N
++ movl A,J
++ movl J,AA # backup A
++ movl M,J
++ movl J,MMM # mov M to MMM
++.L0t:
++ xorl J,J
++ addl $1,J
++ sall $22,J # J=2^22
++ subl J,MMM # MMM=MMM-J
++ movl J,M
++ jge .L00t
++ ALIGN_4
++
++ movl MMM,%eax
++ addl J,%eax
++ jle .L999x
++ movl %eax,M
++
++.L00t:
++ movl AA,%eax
++ movl %eax,A # mov AA to A
++
++ movl NN,%eax
++ movl %eax,N # reset N
++
++
++ movl LDAX, LDA # reset LDA
+ movl STACK_X, X
+ movl STACK_INCX, INCX
+ movl STACK_INCY, INCY
+@@ -117,6 +156,7 @@
+ leal (,INCY, SIZE), INCY
+ leal (,LDA, SIZE), LDA
+
++
+ subl $-16 * SIZE, A
+
+ cmpl $0, N
+@@ -560,10 +600,19 @@
+ ALIGN_4
+
+ .L999:
++ movl M,J
++ leal (,J,SIZE),%eax
++ addl %eax,AA
++ jmp .L0t
++ ALIGN_4
++
++.L999x:
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
++
++ addl $ARGS,%esp
+ ret
+
+ EPILOGUE
+--- a/kernel/x86_64/sgemv_t.S
++++ b/kernel/x86_64/sgemv_t.S
+@@ -47,7 +47,7 @@
+
+ #ifndef WINDOWS_ABI
+
+-#define STACKSIZE 64
++#define STACKSIZE 128
+
+ #define OLD_M %rdi
+ #define OLD_N %rsi
+@@ -57,6 +57,10 @@
+ #define STACK_Y 16 + STACKSIZE(%rsp)
+ #define STACK_INCY 24 + STACKSIZE(%rsp)
+ #define STACK_BUFFER 32 + STACKSIZE(%rsp)
++#define MMM 56(%rsp)
++#define NN 64(%rsp)
++#define AA 72(%rsp)
++#define LDAX 80(%rsp)
+
+ #else
+
+@@ -71,6 +75,10 @@
+ #define STACK_Y 72 + STACKSIZE(%rsp)
+ #define STACK_INCY 80 + STACKSIZE(%rsp)
+ #define STACK_BUFFER 88 + STACKSIZE(%rsp)
++#defien MMM 216(%rsp)
++#defien NN 224(%rsp)
++#define AA 232(%rsp)
++#define LDAX 240(%rsp)
+
+ #endif
+
+@@ -127,29 +135,46 @@
+ movups %xmm14, 192(%rsp)
+ movups %xmm15, 208(%rsp)
+
+- movq OLD_M, M
+- movq OLD_N, N
+- movq OLD_A, A
+- movq OLD_LDA, LDA
++ movq OLD_M, MMM
++ movq OLD_N, NN
++ movq OLD_A, AA
++ movq OLD_LDA, LDAX
+ movq OLD_X, X
+ #else
+- movq OLD_M, M
+- movq OLD_N, N
+- movq OLD_A, A
+- movq OLD_LDA, LDA
++ movq OLD_M, MMM
++ movq OLD_N, NN
++ movq OLD_A, AA
++ movq OLD_LDA, LDAX
+ #endif
+-
+- movq STACK_INCX, INCX
+- movq STACK_Y, Y
+- movq STACK_INCY, INCY
+- movq STACK_BUFFER, BUFFER
+-
+ #ifndef WINDOWS_ABI
+ pshufd $0, %xmm0, ALPHA
+ #else
+ pshufd $0, %xmm3, ALPHA
+ #endif
+
++
++.L0t:
++ xorq M,M
++ addq $1,M
++ salq $22,M
++ subq M,MMM
++ jge .L00t
++ ALIGN_4
++
++ movq MMM,%rax
++ addq M,%rax
++ jle .L999x
++ movq %rax,M
++
++.L00t:
++ movq LDAX,LDA
++ movq NN,N
++ movq AA,A
++ movq STACK_INCX, INCX
++ movq STACK_Y, Y
++ movq STACK_INCY, INCY
++ movq STACK_BUFFER, BUFFER
++
+ leaq (,INCX, SIZE), INCX
+ leaq (,INCY, SIZE), INCY
+ leaq (,LDA, SIZE), LDA
+@@ -6341,6 +6366,12 @@
+ ALIGN_4
+
+ .L999:
++ leaq (,M,SIZE),%rax
++ addq %rax,AA
++ jmp .L0t
++ ALIGN_4
++
++.L999x:
+ movq 0(%rsp), %rbx
+ movq 8(%rsp), %rbp
+ movq 16(%rsp), %r12
+--- a/kernel/x86/gemv_n_sse.S
++++ b/kernel/x86/gemv_n_sse.S
+@@ -89,17 +89,22 @@
+ #endif
+
+ #define STACKSIZE 16
++#define ARGS 16
+
+-#define M 4 + STACKSIZE(%esp)
+-#define N 8 + STACKSIZE(%esp)
+-#define ALPHA 16 + STACKSIZE(%esp)
+-#define A 20 + STACKSIZE(%esp)
+-#define STACK_LDA 24 + STACKSIZE(%esp)
+-#define STACK_X 28 + STACKSIZE(%esp)
+-#define STACK_INCX 32 + STACKSIZE(%esp)
+-#define Y 36 + STACKSIZE(%esp)
+-#define STACK_INCY 40 + STACKSIZE(%esp)
+-#define BUFFER 44 + STACKSIZE(%esp)
++#define M 4 + STACKSIZE+ARGS(%esp)
++#define N 8 + STACKSIZE+ARGS(%esp)
++#define ALPHA 16 + STACKSIZE+ARGS(%esp)
++#define A 20 + STACKSIZE+ARGS(%esp)
++#define STACK_LDA 24 + STACKSIZE+ARGS(%esp)
++#define STACK_X 28 + STACKSIZE+ARGS(%esp)
++#define STACK_INCX 32 + STACKSIZE+ARGS(%esp)
++#define Y 36 + STACKSIZE+ARGS(%esp)
++#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
++#define BUFFER 44 + STACKSIZE+ARGS(%esp)
++#define MMM 0+ARGS(%esp)
++#define YY 4+ARGS(%esp)
++#define AA 8+ARGS(%esp)
++#define LDAX 12+ARGS(%esp)
+
+ #define I %eax
+ #define J %ebx
+@@ -114,6 +119,7 @@
+
+ PROLOGUE
+
++ subl $ARGS,%esp
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+@@ -121,7 +127,34 @@
+
+ PROFCODE
+
++ movl Y,J
++ movl J,YY # backup Y
++ movl A,J
++ movl J,AA # backup A
++ movl M,J
++ movl J,MMM # backup MM
++.L0t:
++ xorl J,J
++ addl $1,J
++ sall $21,J
++ subl J,MMM
++ movl J,M
++ jge .L00t
++ ALIGN_4
++
++ movl MMM,%eax
++ addl J,%eax
++ jle .L999x
++ movl %eax,M
++
++.L00t:
++ movl AA,%eax
++ movl %eax,A
++
++ movl YY,J
++ movl J,Y
+ movl STACK_LDA, LDA
++
+ movl STACK_X, X
+ movl STACK_INCX, INCX
+
+@@ -651,12 +684,22 @@
+ addss 0 * SIZE(X), %xmm0
+ movss %xmm0, (Y1)
+ ALIGN_3
+-
+ .L999:
++ movl M,J
++ leal (,J,SIZE),%eax
++ addl %eax,AA
++ movl YY,J
++ addl %eax,J
++ movl J,YY
++ jmp .L0t
++ ALIGN_4
++
++.L999x:
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
++ addl $ARGS,%esp
+ ret
+
+ EPILOGUE
+--- a/kernel/x86/gemv_n_sse2.S
++++ b/kernel/x86/gemv_n_sse2.S
+@@ -76,17 +76,22 @@
+ #endif
+
+ #define STACKSIZE 16
++#define ARGS 16
+
+-#define M 4 + STACKSIZE(%esp)
+-#define N 8 + STACKSIZE(%esp)
+-#define ALPHA 16 + STACKSIZE(%esp)
+-#define A 24 + STACKSIZE(%esp)
+-#define STACK_LDA 28 + STACKSIZE(%esp)
+-#define STACK_X 32 + STACKSIZE(%esp)
+-#define STACK_INCX 36 + STACKSIZE(%esp)
+-#define Y 40 + STACKSIZE(%esp)
+-#define STACK_INCY 44 + STACKSIZE(%esp)
+-#define BUFFER 48 + STACKSIZE(%esp)
++#define M 4 + STACKSIZE+ARGS(%esp)
++#define N 8 + STACKSIZE+ARGS(%esp)
++#define ALPHA 16 + STACKSIZE+ARGS(%esp)
++#define A 24 + STACKSIZE+ARGS(%esp)
++#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
++#define STACK_X 32 + STACKSIZE+ARGS(%esp)
++#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
++#define Y 40 + STACKSIZE+ARGS(%esp)
++#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
++#define BUFFER 48 + STACKSIZE+ARGS(%esp)
++
++#define MMM 0+ARGS(%esp)
++#define YY 4+ARGS(%esp)
++#define AA 8+ARGS(%esp)
+
+ #define I %eax
+ #define J %ebx
+@@ -101,6 +106,8 @@
+
+ PROLOGUE
+
++
++ subl $ARGS,%esp
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+@@ -108,6 +115,33 @@
+
+ PROFCODE
+
++ movl Y,J
++ movl J,YY # backup Y
++ movl A,J
++ movl J,AA # backup A
++ movl M,J
++ movl J,MMM # backup MM
++.L0t:
++ xorl J,J
++ addl $1,J
++ sall $20,J
++ subl J,MMM
++ movl J,M
++ jge .L00t
++ ALIGN_4
++
++ movl MMM,%eax
++ addl J,%eax
++ jle .L999x
++ movl %eax,M
++
++.L00t:
++ movl AA,%eax
++ movl %eax,A
++
++ movl YY,J
++ movl J,Y
++
+ movl STACK_LDA, LDA
+ movl STACK_X, X
+ movl STACK_INCX, INCX
+@@ -677,10 +711,22 @@
+ ALIGN_3
+
+ .L999:
++ movl M,J
++ leal (,J,SIZE),%eax
++ addl %eax,AA
++ movl YY,J
++ addl %eax,J
++ movl J,YY
++ jmp .L0t
++ ALIGN_4
++
++.L999x:
++
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
++ addl $ARGS,%esp
+ ret
+
+ EPILOGUE
+--- a/kernel/x86_64/dgemv_t.S
++++ b/kernel/x86_64/dgemv_t.S
+@@ -47,7 +47,7 @@
+
+ #ifndef WINDOWS_ABI
+
+-#define STACKSIZE 64
++#define STACKSIZE 128
+
+ #define OLD_M %rdi
+ #define OLD_N %rsi
+@@ -57,7 +57,10 @@
+ #define STACK_Y 16 + STACKSIZE(%rsp)
+ #define STACK_INCY 24 + STACKSIZE(%rsp)
+ #define STACK_BUFFER 32 + STACKSIZE(%rsp)
+-
++#define MMM 56(%rsp)
++#define NN 64(%rsp)
++#define AA 72(%rsp)
++#define LDAX 80(%rsp)
+ #else
+
+ #define STACKSIZE 256
+@@ -71,6 +74,11 @@
+ #define STACK_Y 72 + STACKSIZE(%rsp)
+ #define STACK_INCY 80 + STACKSIZE(%rsp)
+ #define STACK_BUFFER 88 + STACKSIZE(%rsp)
++//Temp variables for M,N,A,LDA
++#define MMM 224(%rsp)
++#define NN 232(%rsp)
++#define AA 240(%rsp)
++#define LDAX 248(%rsp)
+
+ #endif
+
+@@ -131,13 +139,51 @@
+ movq OLD_A, A
+ movq OLD_LDA, LDA
+ movq OLD_X, X
++
++ movq M, MMM
++ movq N, NN
++ movq A, AA
++ movq LDA, LDAX
++
+ #else
+- movq OLD_M, M
+- movq OLD_N, N
+- movq OLD_A, A
+- movq OLD_LDA, LDA
++ movq OLD_M, MMM
++ movq OLD_N, NN
++ movq OLD_A, AA
++ movq OLD_LDA, LDAX
++#endif
++#ifdef HAVE_SSE3
++#ifndef WINDOWS_ABI
++ movddup %xmm0, ALPHA
++#else
++ movddup %xmm3, ALPHA
++#endif
++#else
++#ifndef WINDOWS_ABI
++ movapd %xmm0, ALPHA
++#else
++ movapd %xmm3, ALPHA
++#endif
++ unpcklpd ALPHA, ALPHA
+ #endif
+
++
++
++.L0x:
++ xorq M,M
++ addq $1,M
++ salq $22,M
++ subq M,MMM
++ jge .L00
++
++ movq MMM,%rax
++ addq M,%rax
++ jle .L999x
++ movq %rax,M
++
++.L00:
++ movq LDAX,LDA
++ movq NN,N
++ movq AA,A
+ movq STACK_INCX, INCX
+ movq STACK_Y, Y
+ movq STACK_INCY, INCY
+@@ -153,21 +199,6 @@
+
+ subq $-16 * SIZE, A
+
+-#ifdef HAVE_SSE3
+-#ifndef WINDOWS_ABI
+- movddup %xmm0, ALPHA
+-#else
+- movddup %xmm3, ALPHA
+-#endif
+-#else
+-#ifndef WINDOWS_ABI
+- movapd %xmm0, ALPHA
+-#else
+- movapd %xmm3, ALPHA
+-#endif
+- unpcklpd ALPHA, ALPHA
+-#endif
+-
+ testq M, M
+ jle .L999
+ testq N, N
+@@ -854,7 +885,6 @@
+
+ .L21:
+ #endif
+-
+ subq $4, N
+
+ leaq 16 * SIZE(BUFFER), X1
+@@ -2461,6 +2491,12 @@
+ ALIGN_4
+
+ .L999:
++ leaq (, M, SIZE), %rax
++ addq %rax,AA
++ jmp .L0x;
++ ALIGN_4
++
++.L999x:
+ movq 0(%rsp), %rbx
+ movq 8(%rsp), %rbp
+ movq 16(%rsp), %r12
Modified: packages/openblas/trunk/debian/patches/series
URL: http://svn.debian.org/wsvn/debian-science/packages/openblas/trunk/debian/patches/series?rev=45498&op=diff
==============================================================================
--- packages/openblas/trunk/debian/patches/series (original)
+++ packages/openblas/trunk/debian/patches/series Sat Jan 5 14:07:32 2013
@@ -3,3 +3,6 @@
hurd.diff
generic_profile.diff
kill_threads_at_unload.diff
+32bit_athlon.diff
+sgemv_unitialized_buffer.diff
+gemv_crash_big_data.diff
Added: packages/openblas/trunk/debian/patches/sgemv_unitialized_buffer.diff
URL: http://svn.debian.org/wsvn/debian-science/packages/openblas/trunk/debian/patches/sgemv_unitialized_buffer.diff?rev=45498&op=file
==============================================================================
--- packages/openblas/trunk/debian/patches/sgemv_unitialized_buffer.diff (added)
+++ packages/openblas/trunk/debian/patches/sgemv_unitialized_buffer.diff Sat Jan 5 14:07:32 2013
@@ -1,0 +1,30 @@
+Description: Ensure that vectorized sgemv does not use unitialized data
+Origin: upstream, https://github.com/xianyi/OpenBLAS/commit/91ed4e4450ceabd71493e0bf80e7455df414bebf
+Bug: https://github.com/xianyi/OpenBLAS/issues/171
+Bug-Debian: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=696000
+Last-Update: 2013-01-04
+---
+This patch header follows DEP-3: http://dep.debian.net/deps/dep3/
+--- a/kernel/x86/gemv_t_sse.S
++++ b/kernel/x86/gemv_t_sse.S
+@@ -198,6 +198,20 @@
+ jg .L06
+ ALIGN_4
+
++//Padding zero to prevent loading the dirty number from buffer.
++ movl M, I
++ movl $8, J
++ andl $7, I
++ xorps %xmm0, %xmm0
++ subl I, J
++ ALIGN_2
++.L07:
++ movss %xmm0, 0 * SIZE(Y1)
++ addl $SIZE, Y1
++ decl J
++ jg .L07
++ ALIGN_4
++
+ .L10:
+ movl Y, Y1
+
More information about the debian-science-commits
mailing list