[clblas] 40/67: bug fix
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Oct 27 08:02:13 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clblas.
commit c4e79642089fde29497c8647c7b108bb69519cc2
Author: Timmy <timmy.liu at amd.com>
Date: Mon Sep 21 14:35:07 2015 -0500
bug fix
---
src/library/blas/xtrsm.cc | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/src/library/blas/xtrsm.cc b/src/library/blas/xtrsm.cc
index 73f13a1..6610996 100644
--- a/src/library/blas/xtrsm.cc
+++ b/src/library/blas/xtrsm.cc
@@ -165,7 +165,6 @@ static void makeKernel(
CL_CHECK(err)
}
else {
- //std::cout << kernelSource << std::endl;
clProgram = clCreateProgramWithSource(
clContext,
1, &kernelSource,
@@ -749,7 +748,6 @@ cl_int diag_dtrtri128(
int outer_block_size,
cl_event *event)
{
- std::cout << "enter diag_dtrtri128 " << std::endl;
const char *diag_dtrtri_kernel_upper_KernelSource = NULL;
cl_kernel *diag_dtrtri_kernel_upper_ClKernel = NULL;
size_t diag_dtrtri_kernel_upper_KernelBinarySize = 0;
@@ -871,6 +869,9 @@ cl_int diag_dtrtri128(
err = clEnqueueNDRangeKernel(queue, *diag_dtrtri_kernel_upper_ClKernel, 1, NULL,
globalThreads, globalLocal,
0, NULL, NULL);
+ CL_CHECK(err);
+ //err = clFinish(queue);
+ //CL_CHECK(err);
if (err != CL_SUCCESS) {
//printf( "kernel -diag_dtrtri_kernel_upper- failed with %d\n", err );
@@ -899,7 +900,8 @@ cl_int diag_dtrtri128(
queue,
A, offA, d_dinvA, i, lda, M, event);
CL_CHECK(err);
-
+ //err = clFinish(queue);
+ //CL_CHECK(err);
break;
case 32:
@@ -915,6 +917,8 @@ cl_int diag_dtrtri128(
queue,
A, offA, d_dinvA, i, lda, M, event);
CL_CHECK(err);
+ //err = clFinish(queue);
+ //CL_CHECK(err);
err = call_kernel_triple_update128(&triple_dgemm_update_128_32_PART2_R_clKernel,
triple_dgemm_update_128_32_PART2_R_src,
TrtriBuildOptions,
@@ -924,6 +928,8 @@ cl_int diag_dtrtri128(
queue,
A, offA, d_dinvA, i, lda, M, event);
CL_CHECK(err);
+ //err = clFinish(queue);
+ //CL_CHECK(err);
break;
@@ -940,6 +946,8 @@ cl_int diag_dtrtri128(
queue,
A, offA, d_dinvA, i, lda, M, event);
CL_CHECK(err);
+ //err = clFinish(queue);
+ //CL_CHECK(err);
err = call_kernel_triple_update128(&triple_dgemm_update_128_64_PART2_R_clKernel,
triple_dgemm_update_128_64_PART2_R_src,
@@ -950,6 +958,8 @@ cl_int diag_dtrtri128(
queue,
A, offA, d_dinvA, i, lda, M, event);
CL_CHECK(err);
+ //err = clFinish(queue);
+ //CL_CHECK(err);
break;
@@ -1020,8 +1030,6 @@ static clblasStatus gpu_dtrsm128(
{
if (order != clblasColumnMajor)
return clblasNotImplemented;
- if (M < 16 || N < 16)
- return clblasNotImplemented;
//for now
if (side == clblasRight)
@@ -1076,7 +1084,7 @@ static clblasStatus gpu_dtrsm128(
err = clearBuffer(commandQueues[0], InvA, size_InvA);
CL_CHECK(err);
- err = diag_dtrtri128(commandQueues[0], N, uplo, diag, A, offA, InvA, ldA, inner_block_size, outer_block_size, events);
+ err = diag_dtrtri128(commandQueues[0], M, uplo, diag, A, offA, InvA, ldA, inner_block_size, outer_block_size, events);
CL_CHECK(err);
//
@@ -1134,23 +1142,22 @@ static clblasStatus gpu_dtrsm128(
{
/* the upper case */
/* handle the first block seperately with alpha */
- std::cout << "dtrtri trsm " << std::endl;
int mm = (M % outer_block_size == 0) ? outer_block_size : (M % outer_block_size);
i = M - mm;
- //DGEMM_LEFT(mm, N, mm, alpha, _(InvA, 0, i), _(B, i, 0), zero, _(X, i, 0));
+ DGEMM_LEFT(mm, N, mm, alpha, _(InvA, 0, i), _(B, i, 0), zero, _(X, i, 0));
if (i - outer_block_size >= 0)
{
- //DGEMM_LEFT(i, N, mm, neg_one, _(A, 0, i), _(X, i, 0), alpha, _(B, 0, 0));
+ DGEMM_LEFT(i, N, mm, neg_one, _(A, 0, i), _(X, i, 0), alpha, _(B, 0, 0));
/* the rest blocks */
for (i = M - mm - outer_block_size; i >= 0; i -= outer_block_size) {
- //DGEMM_LEFT(outer_block_size, N, outer_block_size, one, _(InvA, 0, i), _(B, i, 0), zero, _(X, i, 0));
+ DGEMM_LEFT(outer_block_size, N, outer_block_size, one, _(InvA, 0, i), _(B, i, 0), zero, _(X, i, 0));
if (i - outer_block_size < 0)
break;
- //DGEMM_LEFT(i, N, outer_block_size, neg_one, _(A, 0, i), _(X, i, 0), one, _(B, 0, 0));
+ DGEMM_LEFT(i, N, outer_block_size, neg_one, _(A, 0, i), _(X, i, 0), one, _(B, 0, 0));
}
}
}
@@ -1191,7 +1198,6 @@ static clblasStatus gpu_dtrsm128(
{
/* the upper case */
/* handle the first block seperately with alpha */
- std::cout << "dtrtri trsm " << std::endl;
int mm = min(outer_block_size, (int)M);
DGEMM_LEFT(mm, N, mm, alpha, _(InvA, 0, 0), _(B, 0, 0), zero, _(X, 0, 0));
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list