[clblas] 16/75: Fix barriers in dtrsm specialized kernels
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Jan 24 23:30:31 UTC 2017
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/master
in repository clblas.
commit d32081a26f766500a7786013a1acc2e708c15957
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Tue Jan 19 17:21:35 2016 -0500
Fix barriers in dtrsm specialized kernels
---
src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp | 2 ++
src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp | 2 ++
src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp | 2 ++
src/library/blas/xtrsm.cc | 7 ++++---
4 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp b/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
index d60b482..1f7c19c 100644
--- a/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
@@ -101,6 +101,7 @@ else\n
}\n
}\n
+barrier(CLK_LOCAL_MEM_FENCE);\n
/*
* the lower case
*/
@@ -135,6 +136,7 @@ for (i = BLOCK_SIZE - 2; i >= 0; i--) {\n
workspace[tx] = *(Bs + i*BLOCK_SIZE + tx); \n
x = workspace + i + 1; \n
y = Bs + i*BLOCK_SIZE; \n
+ barrier(CLK_LOCAL_MEM_FENCE);\n
txw = (tx - i - 1); \n
diff --git a/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp b/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
index d435d34..bc9c296 100644
--- a/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
@@ -94,6 +94,7 @@ uint na)\n
Bs[tx*BLOCK_SIZE+tx] = ONE / ( Bs[tx*BLOCK_SIZE+tx]) ;\n
}\n
}\n
+ barrier(CLK_LOCAL_MEM_FENCE);\n
/* the upper case */
for( i=0; i < BLOCK_SIZE; i++ ) {\n
@@ -110,6 +111,7 @@ uint na)\n
//dtrmv
workspace[tx] = *(Bs+i*BLOCK_SIZE+tx);\n
y = Bs+i*BLOCK_SIZE;\n
+ barrier(CLK_LOCAL_MEM_FENCE);\n
_Pragma("unroll")\n
//for( j=tx; j < i; j++ )
diff --git a/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp b/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
index 9ea6708..0ffbebf 100644
--- a/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
@@ -94,6 +94,7 @@ else\n
Bs[tx*BLOCK_SIZE + tx] = ONE / (Bs[tx*BLOCK_SIZE + tx]); \n
}\n
}\n
+barrier(CLK_LOCAL_MEM_FENCE);\n
/* the upper case */
@@ -111,6 +112,7 @@ for (i = 0; i < BLOCK_SIZE; i++) {\n
//dtrmv
workspace[tx] = *(Bs + i*BLOCK_SIZE + tx); \n
y = Bs + i*BLOCK_SIZE; \n
+ barrier(CLK_LOCAL_MEM_FENCE);\n
_Pragma("unroll")\n
//for( j=tx; j < i; j++ )
diff --git a/src/library/blas/xtrsm.cc b/src/library/blas/xtrsm.cc
index b6f553b..c9325a3 100644
--- a/src/library/blas/xtrsm.cc
+++ b/src/library/blas/xtrsm.cc
@@ -1683,7 +1683,7 @@ clblasDtrsm(
const cl_event *eventWaitList,
cl_event *events)
{
- /*
+#if 0
CHECK_QUEUES(numCommandQueues, commandQueues);
CHECK_EVENTS(numEventsInWaitList, eventWaitList);
@@ -1718,7 +1718,8 @@ clblasDtrsm(
functor->release();
return res;
- */
+
+#else
bool specialCaseHandled = false;
//outer block size = 192
@@ -1780,7 +1781,7 @@ clblasDtrsm(
numEventsInWaitList,
eventWaitList,
events);
-
+#endif
}
extern "C"
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list