[clblas] 36/67: mod192 dtrsm using dtrtri
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Oct 27 08:02:13 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clblas.
commit 31c9214fd174a6ae3f55662643938418c84f1db3
Author: Timmy <timmy.liu at amd.com>
Date: Mon Sep 14 14:31:55 2015 -0500
mod192 dtrsm using dtrtri
---
src/library/blas/xtrsm.cc | 77 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
diff --git a/src/library/blas/xtrsm.cc b/src/library/blas/xtrsm.cc
index fc3df26..4fc408f 100644
--- a/src/library/blas/xtrsm.cc
+++ b/src/library/blas/xtrsm.cc
@@ -100,6 +100,8 @@ static void force_trsm_column_major(Args & args)
assert(false); \
}
+#define min(x,y) ((x)<(y)?(x):(y))
+
static void makeKernel(
cl_kernel *clKernel,
cl_command_queue clQueue,
@@ -589,6 +591,81 @@ static clblasStatus gpu_dtrsm192(
diag_dtrtri192(commandQueues[0], N, uplo, diag, A, offA, InvA, lda, inner_block_size, outer_block_size, events);
+ if (transA == clblasNoTrans)
+ {
+ /* the non-transpose case */
+ if (uplo == clblasLower)
+ {
+ /* the lower case */
+ /* handle the first block seperately with alpha */
+ // lower is not implemented yet
+
+
+ }
+ else
+ {
+ /* the upper case */
+ /* handle the first block seperately with alpha */
+ int nn = min(outer_block_size, (int)N);
+ //DGEMM_RIGHT( M, nn, nn, alpha, _(B,0,0), _(InvA,0,0), zero, _(X,0,0) );
+ err = clblasDgemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans, M, nn, nn, alpha, B, offB, ldb, InvA, offInvA, ldInvA, zero, X, offX, ldX, 1, commandQueues, 0, NULL, events);
+ CL_CHECK(err);
+
+ if (outer_block_size < N)
+ {
+
+ //DGEMM_RIGHT( M, N-nb, nb, neg_one, _(X,0,0), _(A,0,nb), alpha, _(B,0,nb) );
+ err = clblasDgemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans, M, N - outer_block_size, outer_block_size, neg_one, X, offX, ldX, A, offA + lda*outer_block_size, lda, alpha, B, offB + outer_block_size*ldb, ldb, 1, commandQueues, 0, NULL, events);
+ assert(err == CL_SUCCESS);
+
+ /* the rest blocks */
+ for (i = outer_block_size; i < N; i += outer_block_size)
+ {
+ nn = min(outer_block_size, (int)N - i);
+ //DGEMM_RIGHT( M, nn, nn, one, _(B,0,i), _(InvA,0,i), zero, _(X,0,i) );
+ err = clblasDgemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans, M, nn, nn, one, B, offB + i*ldb, ldb, InvA, offInvA + i*outer_block_size, ldInvA, zero, X, offX + i*ldX, ldX, 1, commandQueues, 0, NULL, events);
+ assert(err == CL_SUCCESS);
+
+ if (i + outer_block_size >= N)
+ break;
+
+ //DGEMM_RIGHT( M, N-i-nb, nb, neg_one, _(X,0,i), _(A,i,i+nb), one, _(B,0,i+nb) );
+ err = clblasDgemm(clblasColumnMajor, clblasNoTrans, clblasNoTrans, M, N - i - outer_block_size, outer_block_size, neg_one, X, offX + i*ldX, ldX, A, offA + i + (outer_block_size + i)*lda, lda, one, B, offB + (i + outer_block_size)*ldb, ldb, 1, commandQueues, 0, NULL, events);
+ assert(err == CL_SUCCESS);
+ }
+ }
+ }
+ }
+ else
+ {
+
+ /* the transpose case */
+ // trans is not implemented yet
+ }
+
+ {
+ size_t src_origin[3] = { 0, 0, 0 };
+ size_t dst_origin[3] = { offB*sizeof(double), 0, 0 };
+ size_t region[3] = { M*sizeof(double), N, 1 };
+
+
+ err = clEnqueueCopyBufferRect(commandQueues[0],
+ X,
+ B,
+ src_origin,
+ dst_origin,
+ region,
+ ldX*sizeof(double), 0,
+ ldb*sizeof(double), 0,
+ 0, NULL,
+ events);
+ CL_CHECK(err);
+
+ clReleaseMemObject(InvA);
+ clReleaseMemObject(X);
+
+ }
+
specialCaseHandled = true;
return clblasSuccess;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list