[clblas] 01/54: Fix kernel crash on nvidia, caused by float4 alignemtn error, see https://github.com/clMathLibraries/clBLAS/issues/108 for more details

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jan 14 20:07:32 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clblas.

commit dff63b437da37ffcf23af2f18d27c42889f49a74
Author: Hugh Perkins <hughperkins at gmail.com>
Date:   Thu Jun 18 18:15:53 2015 +0800

    Fix kernel crash on nvidia, caused by float4 alignemtn error, see https://github.com/clMathLibraries/clBLAS/issues/108 for more details
---
 src/library/blas/gens/clTemplates/ger.cl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/library/blas/gens/clTemplates/ger.cl b/src/library/blas/gens/clTemplates/ger.cl
index 0006087..99bbcbd 100644
--- a/src/library/blas/gens/clTemplates/ger.cl
+++ b/src/library/blas/gens/clTemplates/ger.cl
@@ -52,7 +52,8 @@ __kernel void %PREFIXger_C_kernel( __global %TYPE const* restrict _X, __global %
 	}
 
 	// create local memory
-	__local %TYPE localX[ BH * %V ];
+  __local %TYPE%V localXV[ BH ];
+  __local %TYPE *localX = (__local %TYPE *)localXV;
 	__local %TYPE localY[ BW ];
 
 	uint lID = get_local_id( 0 );
@@ -193,7 +194,8 @@ __kernel void %PREFIXger_R_kernel( __global %TYPE const* restrict _X, __global %
 	}
 
     __local %TYPE localX[ BH ];
-    __local %TYPE localY[ BW * %V ];
+    __local %TYPE%V localYV[ BW ];
+    __local %TYPE *localY = (__local %TYPE *)localYV;
 
     uint lID = get_local_id( 0 );
     uint gID = get_group_id( 0 );

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git



More information about the debian-science-commits mailing list