[libclc] 45/79: Add vstore_half_rtz implementation

Mon Mar 19 16:50:59 UTC 2018

This is an automated email from the git hooks/post-receive script.

aboll-guest pushed a commit to branch master
in repository libclc.

commit 88e1b33c6b80bfe9495544cf18bbf492f7a82716
Author: Jan Vesely <jan.vesely at rutgers.edu>
Date:   Tue Feb 6 18:44:43 2018 +0000

    Add vstore_half_rtz implementation
    
    Passes CTS on carrizo
    
    Reviewer: Jeroen Ketema <j.ketema at xs4all.nl>
    Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
    
    git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@324373 91177308-0d34-0410-b5e6-96231b3b80d8
---
 generic/include/clc/shared/vstore.h |  2 ++
 generic/lib/shared/vstore.cl        | 35 ++++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/generic/include/clc/shared/vstore.h b/generic/include/clc/shared/vstore.h
index e479e33..549e6bf 100644
--- a/generic/include/clc/shared/vstore.h
+++ b/generic/include/clc/shared/vstore.h
@@ -37,10 +37,12 @@ _CLC_VECTOR_VSTORE_PRIM1(ulong)
 _CLC_VECTOR_VSTORE_PRIM1(float)
 
 _CLC_VECTOR_VSTORE_HALF_PRIM1(float,)
+_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtz)
 
 #ifdef cl_khr_fp64
   _CLC_VECTOR_VSTORE_PRIM1(double)
   _CLC_VECTOR_VSTORE_HALF_PRIM1(double,)
+  _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz)
 #endif
 
 #ifdef cl_khr_fp16
diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
index bafd76d..cbddd59 100644
--- a/generic/lib/shared/vstore.cl
+++ b/generic/lib/shared/vstore.cl
@@ -108,15 +108,48 @@ _CLC_DEF _CLC_OVERLOAD float __clc_noop(float x)
 {
 	return x;
 }
+_CLC_DEF _CLC_OVERLOAD float __clc_rtz(float x)
+{
+	/* Remove lower 13 bits to make sure the number is rounded down */
+	int mask = 0xffffe000;
+	const int exp = (as_uint(x) >> 23 & 0xff) - 127;
+	/* Denormals cannot be flushed, and they use different bit for rounding */
+	if (exp < -14)
+		mask <<= min(-(exp + 14), 10);
+	/* RTZ does not produce Inf for large numbers */
+	if (fabs(x) > 65504.0f && !isinf(x))
+		return copysign(65504.0f, x);
+	/* Handle nan corner case */
+	if (isnan(x))
+		return x;
+	return as_float(as_uint(x) & mask);
+}
 #ifdef cl_khr_fp64
 _CLC_DEF _CLC_OVERLOAD double __clc_noop(double x)
 {
 	return x;
 }
+_CLC_DEF _CLC_OVERLOAD double __clc_rtz(double x)
+{
+	/* Remove lower 42 bits to make sure the number is rounded down */
+	ulong mask = 0xfffffc0000000000UL;
+	const int exp = (as_ulong(x) >> 52 & 0x7ff) - 1023;
+	/* Denormals cannot be flushed, and they use different bit for rounding */
+	if (exp < -14)
+		mask <<= min(-(exp + 14), 10);
+	/* RTZ does not produce Inf for large numbers */
+	if (fabs(x) > 65504.0 && !isinf(x))
+		return copysign(65504.0, x);
+	/* Handle nan corner case */
+	if (isnan(x))
+		return x;
+	return as_double(as_ulong(x) & mask);
+}
 #endif
 
 #define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
-	__FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop)
+	__FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \
+	__FUNC(SUFFIX ## _rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz)
 
 #define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
 	__XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/libclc.git