[clblas] 86/125: fix correctness bug of csyr2k and zsyr2k when beta == (0,0); also fix bug in complex + for calculating reference results in ktest
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Fri May 29 06:57:25 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clblas.
commit 42284c0641a2d94ee0193e452e3ee66818575ffa
Author: Timmy <timmy.liu at amd.com>
Date: Fri Jun 20 09:30:05 2014 -0500
fix correctness bug of csyr2k and zsyr2k when beta == (0,0); also fix bug in complex + for calculating reference results in ktest
---
src/library/blas/gens/blas_kgen.h | 15 +++++
src/library/blas/gens/syrxk.c | 16 ++++--
src/library/blas/gens/tilemul.c | 85 ++++++++++++++++++++++++++++
src/library/tools/ktest/naive/naive_blas.cpp | 4 +-
4 files changed, 113 insertions(+), 7 deletions(-)
diff --git a/src/library/blas/gens/blas_kgen.h b/src/library/blas/gens/blas_kgen.h
index 694bafd..6fb1410 100644
--- a/src/library/blas/gens/blas_kgen.h
+++ b/src/library/blas/gens/blas_kgen.h
@@ -52,6 +52,7 @@
#include "tile.h"
#include "fetch.h"
+
#define BLAS_KGEN_FORMAT 1
#define genInternalLoopEnd(ctx) kgenEndBranch(ctx, NULL)
@@ -539,6 +540,18 @@ sprintfComplexMulUpdate(
bool conjB,
TileMulCore core);
+void
+sprintfComplexMulUpdate_syr2k_beta0(
+ Kstring *expr,
+ const Kstring *dst,
+ const Kstring *a,
+ const Kstring *b,
+ const Kstring *c,
+ bool isDouble,
+ bool conjA,
+ bool conjB,
+ TileMulCore core);
+
/**
* @brief Sprintf expression of fast scalar mad
*
@@ -892,4 +905,6 @@ checkGenRestoreTailCoords(
UpdateResultFlags
tailStatusToUpresFlags(TailStatus status);
+
+
#endif /* BLAS_KGEN_H_ */
diff --git a/src/library/blas/gens/syrxk.c b/src/library/blas/gens/syrxk.c
index 54574ed..816f9a2 100644
--- a/src/library/blas/gens/syrxk.c
+++ b/src/library/blas/gens/syrxk.c
@@ -876,11 +876,19 @@ genUpdateSingleOptimized(
sprintfComplexMulUpdate(&expr, k3, tempC, &betaStr, NULL,
isDouble, false, false, core);
kgenAddStmtToBatch(batch, MAD_STMT_PRIO, expr.buf);
+
+ sprintfComplexMulUpdate(&expr, tempC, result, &alphaStr, k3,
+ isDouble, false, false, core);
+ kgenAddStmtToBatch(batch, MAD_STMT_PRIO, expr.buf);
}
+ else
+ {
+ //fix correctness bug for c/z syr2k when beta = (0,0)
+ sprintfComplexMulUpdate_syr2k_beta0(&expr, tempC, result, &alphaStr, NULL,
+ isDouble, false, false, core);
+ kgenAddStmtToBatch(batch, MAD_STMT_PRIO, expr.buf);
+ }
- sprintfComplexMulUpdate(&expr, tempC, result, &alphaStr, k3,
- isDouble, false, false, core);
- kgenAddStmtToBatch(batch, MAD_STMT_PRIO, expr.buf);
}
else {
if (betaName != NULL) {
@@ -1171,7 +1179,6 @@ genUpdateIsoscelesDiagTile(
if (nrStored) {
sprintfTileElement(&tempElem, &tileTempC, iter.row % tempRows,
iter.col % tempCols, nrStored);
-
kgenBatchPrintf(batch, STORE_STMT_PRIO,
"*(__global %s*)(&%s[%s]) = %s;\n",
glbType, dstPtr, offExpr.buf, tempElem.buf);
@@ -1720,7 +1727,6 @@ genUpdateResult(
// the function above put a respective code into a conditional path
kgenBeginBranch(ctx, "else");
}
-
ret = genResultUpdateWithFlags( ctx,
funcID,
gset,
diff --git a/src/library/blas/gens/tilemul.c b/src/library/blas/gens/tilemul.c
index 1ff00b7..3baaac5 100644
--- a/src/library/blas/gens/tilemul.c
+++ b/src/library/blas/gens/tilemul.c
@@ -28,6 +28,7 @@
#include "blas_kgen.h"
+
#define MAX_LENGTH 4096
#define BITS_INT (sizeof(int) * 8)
@@ -693,6 +694,90 @@ sprintfComplexMulUpdate(
}
}
+void
+sprintfComplexMulUpdate_syr2k_beta0(
+ Kstring *expr,
+ const Kstring *dst,
+ const Kstring *a,
+ const Kstring *b,
+ const Kstring *c,
+ bool isDouble,
+ bool conjA,
+ bool conjB,
+ TileMulCore core)
+{
+ Kstring swSrc1; // swapped element of the first source
+ // real and imaginary part of the second source
+ Kstring reSrc2, imSrc2;
+ const Kstring *src11, *src12, *src21, *src22;
+ const char *sign1 = "", *sign2 = "", *sign3 = "";
+ const char *baseType;
+
+ baseType = (isDouble) ? "double2" : "float2";
+
+ /*
+ * Prepare components for multiplying. We should get the following
+ * vectorized operations:
+ *
+ * c = b * a1 + bsw * (-a2, a2) if both 'a' and 'b' are not conjugated
+ * c = b * a1 + bsw * (a2, -a2) if 'b' is conjugated and 'a' is not
+ * c = a * b1 + asw * (-b2, b2) if 'a' is conjugated and 'b' is not
+ * c = asw * (-b2) + a * (b1, -b1) if both 'a' and 'b' are conjugated
+ *
+ * Where (a1, a2) and (b1, b2) are complex components of 'a' and 'b',
+ * and asw and bsw - swapped elements of 'a' and 'b' respectively.
+ */
+
+ src11 = (conjB) ? a : b;
+ src21 = (conjB) ? b : a;
+
+ kstrcpy(&swSrc1, src11->buf);
+ swapComplexComponents(&swSrc1, 1);
+ takeComplexApart(&reSrc2, &imSrc2, src21);
+
+ if (conjA && conjB) {
+ src12 = src11;
+ src11 = &swSrc1;
+ src21 = &imSrc2;
+ src22 = &reSrc2;
+ sign1 = sign3 = "-";
+ }
+ else {
+ src12 = &swSrc1;
+ src21 = &reSrc2;
+ src22 = &imSrc2;
+ if (conjA || conjB) {
+ sign3 = "-";
+ }
+ else {
+ sign2 = "-";
+ }
+ }
+
+ if (core == TILEMUL_MAD) {
+ const char *strC = (c == NULL) ? "0" : c->buf;
+
+ ksprintf(expr, "%s = mad(%s, %s%s, %s);\n"
+ "%s = mad(%s, (%s)(%s%s, %s%s), %s);\n",
+ "sctmp", src11->buf, sign1, src21->buf, strC,
+ dst->buf, src12->buf, baseType, sign2, src22->buf,
+ sign3, src22->buf, "sctmp");
+ }
+ else {
+ const char *op = (dst == c) ? "+=" : "=";
+
+ ksprintf(expr, "%s %s %s * %s%s + %s * (%s)(%s%s, %s%s)",
+ dst->buf, op, src11->buf, sign1,
+ src21->buf, src12->buf, baseType, sign2, src22->buf,
+ sign3, src22->buf);
+ if (!((c == NULL) || (c == dst))) {
+ kstrcatf(expr, " + %s", c->buf);
+ }
+ kstrcatf(expr, "%s", ";\n");
+ }
+}
+
+
int
genMulTiles(
struct KgenContext *ctx,
diff --git a/src/library/tools/ktest/naive/naive_blas.cpp b/src/library/tools/ktest/naive/naive_blas.cpp
index 5c2c608..afd7887 100644
--- a/src/library/tools/ktest/naive/naive_blas.cpp
+++ b/src/library/tools/ktest/naive/naive_blas.cpp
@@ -350,13 +350,13 @@ operator/(FloatComplex a, cl_float b)
static __inline DoubleComplex
operator+(DoubleComplex a, DoubleComplex b)
{
- return doubleComplex(CREAL(a) + CREAL(b), CIMAG(b) + CIMAG(b));
+ return doubleComplex(CREAL(a) + CREAL(b), CIMAG(a) + CIMAG(b));
}
static __inline DoubleComplex
operator-(DoubleComplex a, DoubleComplex b)
{
- return doubleComplex(CREAL(a) - CREAL(b), CIMAG(b) - CIMAG(b));
+ return doubleComplex(CREAL(a) - CREAL(b), CIMAG(a) - CIMAG(b));
}
static __inline DoubleComplex
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list