[clblas] 14/75: Fixing issue with beta == 0 in UserGemm kernels
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Jan 24 23:30:31 UTC 2017
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/master
in repository clblas.
commit 627c6545591d16da159bc6f479070441916f827c
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Fri Jan 15 20:28:13 2016 -0500
Fixing issue with beta == 0 in UserGemm kernels
Related to 1af16a8bdf3ef3b356054a9038afd2f0b94b0627
---
.../sgemm_Col_NN_B0_MX096_NX096_KX16_src.cpp | 72 +++++++++++-----------
.../sgemm_Col_NT_B0_MX096_NX096_KX16_src.cpp | 72 +++++++++++-----------
.../sgemm_Col_TN_B0_MX096_NX096_KX16_src.cpp | 72 +++++++++++-----------
3 files changed, 108 insertions(+), 108 deletions(-)
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B0_MX096_NX096_KX16_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B0_MX096_NX096_KX16_src.cpp
index 5151bde..c1f9256 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B0_MX096_NX096_KX16_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NN_B0_MX096_NX096_KX16_src.cpp
@@ -161,47 +161,47 @@ __kernel void sgemm_Col_NN_B0_MX096_NX096_KX16 (
C+= gidy*96*ldc;
C+= idy*ldc;
- C[0*ldc] = alpha*rC[0][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[0][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[0][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[0][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[0][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[0][5] + beta*C[80*ldc];
+ C[0 *ldc] = alpha*rC[0][0];
+ C[16*ldc] = alpha*rC[0][1];
+ C[32*ldc] = alpha*rC[0][2];
+ C[48*ldc] = alpha*rC[0][3];
+ C[64*ldc] = alpha*rC[0][4];
+ C[80*ldc] = alpha*rC[0][5];
C+=16;
- C[0*ldc] = alpha*rC[1][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[1][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[1][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[1][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[1][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[1][5] + beta*C[80*ldc];
+ C[0 *ldc] = alpha*rC[1][0];
+ C[16*ldc] = alpha*rC[1][1];
+ C[32*ldc] = alpha*rC[1][2];
+ C[48*ldc] = alpha*rC[1][3];
+ C[64*ldc] = alpha*rC[1][4];
+ C[80*ldc] = alpha*rC[1][5];
C+=16;
- C[0*ldc] = alpha*rC[2][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[2][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[2][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[2][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[2][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[2][5] + beta*C[80*ldc];
+ C[0 *ldc] = alpha*rC[2][0];
+ C[16*ldc] = alpha*rC[2][1];
+ C[32*ldc] = alpha*rC[2][2];
+ C[48*ldc] = alpha*rC[2][3];
+ C[64*ldc] = alpha*rC[2][4];
+ C[80*ldc] = alpha*rC[2][5];
C+=16;
- C[0*ldc] = alpha*rC[3][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[3][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[3][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[3][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[3][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[3][5] + beta*C[80*ldc];
+ C[0 *ldc] = alpha*rC[3][0];
+ C[16*ldc] = alpha*rC[3][1];
+ C[32*ldc] = alpha*rC[3][2];
+ C[48*ldc] = alpha*rC[3][3];
+ C[64*ldc] = alpha*rC[3][4];
+ C[80*ldc] = alpha*rC[3][5];
C+=16;
- C[0*ldc] = alpha*rC[4][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[4][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[4][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[4][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[4][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[4][5] + beta*C[80*ldc];
+ C[0 *ldc] = alpha*rC[4][0];
+ C[16*ldc] = alpha*rC[4][1];
+ C[32*ldc] = alpha*rC[4][2];
+ C[48*ldc] = alpha*rC[4][3];
+ C[64*ldc] = alpha*rC[4][4];
+ C[80*ldc] = alpha*rC[4][5];
C+=16;
- C[0*ldc] = alpha*rC[5][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[5][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[5][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[5][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[5][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[5][5] + beta*C[80*ldc];
+ C[0 *ldc] = alpha*rC[5][0];
+ C[16*ldc] = alpha*rC[5][1];
+ C[32*ldc] = alpha*rC[5][2];
+ C[48*ldc] = alpha*rC[5][3];
+ C[64*ldc] = alpha*rC[5][4];
+ C[80*ldc] = alpha*rC[5][5];
}
);
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B0_MX096_NX096_KX16_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B0_MX096_NX096_KX16_src.cpp
index d22eca6..a8d0fec 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B0_MX096_NX096_KX16_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_NT_B0_MX096_NX096_KX16_src.cpp
@@ -163,47 +163,47 @@ __kernel void sgemm_Col_NT_B0_MX096_NX096_KX16 (
C+= gidy*96*ldc;
C+= idy*ldc;
- C[0*ldc] = alpha*rC[0][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[0][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[0][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[0][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[0][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[0][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[0][0];
+ C[16*ldc] = alpha*rC[0][1];
+ C[32*ldc] = alpha*rC[0][2];
+ C[48*ldc] = alpha*rC[0][3];
+ C[64*ldc] = alpha*rC[0][4];
+ C[80*ldc] = alpha*rC[0][5];
C+=16;
- C[0*ldc] = alpha*rC[1][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[1][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[1][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[1][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[1][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[1][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[1][0];
+ C[16*ldc] = alpha*rC[1][1];
+ C[32*ldc] = alpha*rC[1][2];
+ C[48*ldc] = alpha*rC[1][3];
+ C[64*ldc] = alpha*rC[1][4];
+ C[80*ldc] = alpha*rC[1][5];
C+=16;
- C[0*ldc] = alpha*rC[2][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[2][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[2][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[2][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[2][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[2][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[2][0];
+ C[16*ldc] = alpha*rC[2][1];
+ C[32*ldc] = alpha*rC[2][2];
+ C[48*ldc] = alpha*rC[2][3];
+ C[64*ldc] = alpha*rC[2][4];
+ C[80*ldc] = alpha*rC[2][5];
C+=16;
- C[0*ldc] = alpha*rC[3][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[3][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[3][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[3][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[3][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[3][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[3][0];
+ C[16*ldc] = alpha*rC[3][1];
+ C[32*ldc] = alpha*rC[3][2];
+ C[48*ldc] = alpha*rC[3][3];
+ C[64*ldc] = alpha*rC[3][4];
+ C[80*ldc] = alpha*rC[3][5];
C+=16;
- C[0*ldc] = alpha*rC[4][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[4][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[4][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[4][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[4][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[4][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[4][0];
+ C[16*ldc] = alpha*rC[4][1];
+ C[32*ldc] = alpha*rC[4][2];
+ C[48*ldc] = alpha*rC[4][3];
+ C[64*ldc] = alpha*rC[4][4];
+ C[80*ldc] = alpha*rC[4][5];
C+=16;
- C[0*ldc] = alpha*rC[5][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[5][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[5][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[5][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[5][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[5][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[5][0];
+ C[16*ldc] = alpha*rC[5][1];
+ C[32*ldc] = alpha*rC[5][2];
+ C[48*ldc] = alpha*rC[5][3];
+ C[64*ldc] = alpha*rC[5][4];
+ C[80*ldc] = alpha*rC[5][5];
}
);
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B0_MX096_NX096_KX16_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B0_MX096_NX096_KX16_src.cpp
index 2668bfa..48323fc 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B0_MX096_NX096_KX16_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/sgemm_Col_TN_B0_MX096_NX096_KX16_src.cpp
@@ -162,47 +162,47 @@ __kernel void sgemm_Col_TN_B0_MX096_NX096_KX16 (
C+= gidy*96*ldc;
C+= idy*ldc;
- C[0*ldc] = alpha*rC[0][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[0][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[0][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[0][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[0][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[0][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[0][0];
+ C[16*ldc] = alpha*rC[0][1];
+ C[32*ldc] = alpha*rC[0][2];
+ C[48*ldc] = alpha*rC[0][3];
+ C[64*ldc] = alpha*rC[0][4];
+ C[80*ldc] = alpha*rC[0][5];
C+=16;
- C[0*ldc] = alpha*rC[1][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[1][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[1][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[1][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[1][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[1][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[1][0];
+ C[16*ldc] = alpha*rC[1][1];
+ C[32*ldc] = alpha*rC[1][2];
+ C[48*ldc] = alpha*rC[1][3];
+ C[64*ldc] = alpha*rC[1][4];
+ C[80*ldc] = alpha*rC[1][5];
C+=16;
- C[0*ldc] = alpha*rC[2][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[2][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[2][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[2][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[2][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[2][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[2][0];
+ C[16*ldc] = alpha*rC[2][1];
+ C[32*ldc] = alpha*rC[2][2];
+ C[48*ldc] = alpha*rC[2][3];
+ C[64*ldc] = alpha*rC[2][4];
+ C[80*ldc] = alpha*rC[2][5];
C+=16;
- C[0*ldc] = alpha*rC[3][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[3][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[3][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[3][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[3][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[3][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[3][0];
+ C[16*ldc] = alpha*rC[3][1];
+ C[32*ldc] = alpha*rC[3][2];
+ C[48*ldc] = alpha*rC[3][3];
+ C[64*ldc] = alpha*rC[3][4];
+ C[80*ldc] = alpha*rC[3][5];
C+=16;
- C[0*ldc] = alpha*rC[4][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[4][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[4][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[4][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[4][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[4][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[4][0];
+ C[16*ldc] = alpha*rC[4][1];
+ C[32*ldc] = alpha*rC[4][2];
+ C[48*ldc] = alpha*rC[4][3];
+ C[64*ldc] = alpha*rC[4][4];
+ C[80*ldc] = alpha*rC[4][5];
C+=16;
- C[0*ldc] = alpha*rC[5][0] + beta*C[0*ldc];
- C[16*ldc] = alpha*rC[5][1] + beta*C[16*ldc];
- C[32*ldc] = alpha*rC[5][2] + beta*C[32*ldc];
- C[48*ldc] = alpha*rC[5][3] + beta*C[48*ldc];
- C[64*ldc] = alpha*rC[5][4] + beta*C[64*ldc];
- C[80*ldc] = alpha*rC[5][5] + beta*C[80*ldc];
+ C[0*ldc] = alpha*rC[5][0];
+ C[16*ldc] = alpha*rC[5][1];
+ C[32*ldc] = alpha*rC[5][2];
+ C[48*ldc] = alpha*rC[5][3];
+ C[64*ldc] = alpha*rC[5][4];
+ C[80*ldc] = alpha*rC[5][5];
}
);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list