[clblas] 16/75: Fix barriers in dtrsm specialized kernels

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Jan 24 23:30:31 UTC 2017


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/master
in repository clblas.

commit d32081a26f766500a7786013a1acc2e708c15957
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Tue Jan 19 17:21:35 2016 -0500

    Fix barriers in dtrsm specialized kernels
---
 src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp | 2 ++
 src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp | 2 ++
 src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp | 2 ++
 src/library/blas/xtrsm.cc                           | 7 ++++---
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp b/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
index d60b482..1f7c19c 100644
--- a/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
@@ -101,6 +101,7 @@ else\n
 	}\n
 }\n
 
+barrier(CLK_LOCAL_MEM_FENCE);\n
 /*
 * the lower case
 */
@@ -135,6 +136,7 @@ for (i = BLOCK_SIZE - 2; i >= 0; i--) {\n
 	workspace[tx] = *(Bs + i*BLOCK_SIZE + tx); \n
 	x = workspace + i + 1; \n
 	y = Bs + i*BLOCK_SIZE; \n
+    barrier(CLK_LOCAL_MEM_FENCE);\n
 
 	txw = (tx - i - 1); \n
 
diff --git a/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp b/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
index d435d34..bc9c296 100644
--- a/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
@@ -94,6 +94,7 @@ uint na)\n
 	  Bs[tx*BLOCK_SIZE+tx] = ONE / ( Bs[tx*BLOCK_SIZE+tx]) ;\n
 	}\n      
     }\n
+    barrier(CLK_LOCAL_MEM_FENCE);\n
 
 	  /* the upper case */
   for( i=0; i < BLOCK_SIZE; i++ ) {\n
@@ -110,6 +111,7 @@ uint na)\n
     //dtrmv
     workspace[tx] = *(Bs+i*BLOCK_SIZE+tx);\n
     y = Bs+i*BLOCK_SIZE;\n
+    barrier(CLK_LOCAL_MEM_FENCE);\n
 
     _Pragma("unroll")\n
     //for( j=tx; j < i; j++ )
diff --git a/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp b/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
index 9ea6708..0ffbebf 100644
--- a/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
@@ -94,6 +94,7 @@ else\n
         Bs[tx*BLOCK_SIZE + tx] = ONE / (Bs[tx*BLOCK_SIZE + tx]); \n
     }\n
 }\n
+barrier(CLK_LOCAL_MEM_FENCE);\n
 
 
 /* the upper case */
@@ -111,6 +112,7 @@ for (i = 0; i < BLOCK_SIZE; i++) {\n
 	//dtrmv
 	workspace[tx] = *(Bs + i*BLOCK_SIZE + tx); \n
 	y = Bs + i*BLOCK_SIZE; \n
+    barrier(CLK_LOCAL_MEM_FENCE);\n
 
 _Pragma("unroll")\n
 	//for( j=tx; j < i; j++ )
diff --git a/src/library/blas/xtrsm.cc b/src/library/blas/xtrsm.cc
index b6f553b..c9325a3 100644
--- a/src/library/blas/xtrsm.cc
+++ b/src/library/blas/xtrsm.cc
@@ -1683,7 +1683,7 @@ clblasDtrsm(
     const cl_event *eventWaitList,
     cl_event *events)
 {
-   /*
+#if 0
    CHECK_QUEUES(numCommandQueues, commandQueues);
    CHECK_EVENTS(numEventsInWaitList, eventWaitList);
 
@@ -1718,7 +1718,8 @@ clblasDtrsm(
    functor->release();
 
    return res;
-   */
+
+#else
 	bool specialCaseHandled = false;
 
 	//outer block size = 192
@@ -1780,7 +1781,7 @@ clblasDtrsm(
 	   numEventsInWaitList,
 	   eventWaitList,
 	   events);
-
+#endif
 }
 
 extern "C"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git



More information about the debian-science-commits mailing list