[libclc] 48/79: Add vstore_half_rte implementation
Andreas Boll
aboll-guest at moszumanska.debian.org
Mon Mar 19 16:50:59 UTC 2018
This is an automated email from the git hooks/post-receive script.
aboll-guest pushed a commit to branch master
in repository libclc.
commit 81da85d0a8f9121d2858216ec959af8fb8a0ee54
Author: Jan Vesely <jan.vesely at rutgers.edu>
Date: Tue Feb 6 18:44:50 2018 +0000
Add vstore_half_rte implementation
Passes CTS on carrizo
Reviewer: Jeroen Ketema <j.ketema at xs4all.nl>
Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@324376 91177308-0d34-0410-b5e6-96231b3b80d8
---
generic/include/clc/shared/vstore.h | 2 ++
generic/lib/shared/vstore.cl | 45 ++++++++++++++++++++++++++++++++++++-
2 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/generic/include/clc/shared/vstore.h b/generic/include/clc/shared/vstore.h
index b510e0a..ebad330 100644
--- a/generic/include/clc/shared/vstore.h
+++ b/generic/include/clc/shared/vstore.h
@@ -40,6 +40,7 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float,)
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtz)
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtn)
_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtp)
+_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rte)
#ifdef cl_khr_fp64
_CLC_VECTOR_VSTORE_PRIM1(double)
@@ -47,6 +48,7 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtp)
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz)
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtn)
_CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtp)
+ _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rte)
#endif
#ifdef cl_khr_fp16
diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
index 2bfb369..c035095 100644
--- a/generic/lib/shared/vstore.cl
+++ b/generic/lib/shared/vstore.cl
@@ -147,6 +147,27 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rtp(float x)
{
return ((as_uint(x) & 0x80000000) == 0) ? __clc_rti(x) : __clc_rtz(x);
}
+_CLC_DEF _CLC_OVERLOAD float __clc_rte(float x)
+{
+ /* Mantisa + implicit bit */
+ const uint mantissa = (as_uint(x) & 0x7fffff) | (1u << 23);
+ const int exp = (as_uint(x) >> 23 & 0xff) - 127;
+ int shift = 13;
+ if (exp < -14) {
+ /* The default assumes lower 13 bits are rounded,
+ * but it might be more for denormals.
+ * Shifting beyond last == 0b, and qr == 00b is not necessary */
+ shift += min(-(exp + 14), 15);
+ }
+ int mask = (1 << shift) - 1;
+ const uint grs = mantissa & mask;
+ const uint last = mantissa & (1 << shift);
+ /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1.
+ * exp > 15 should round to inf. */
+ bool roundup = (grs > (1 << (shift - 1))) ||
+ (grs == (1 << (shift - 1)) && last != 0) || (exp > 15);
+ return roundup ? __clc_rti(x) : __clc_rtz(x);
+}
#ifdef cl_khr_fp64
_CLC_DEF _CLC_OVERLOAD double __clc_noop(double x)
@@ -192,13 +213,35 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rtp(double x)
{
return ((as_ulong(x) & 0x8000000000000000UL) == 0) ? __clc_rti(x) : __clc_rtz(x);
}
+_CLC_DEF _CLC_OVERLOAD double __clc_rte(double x)
+{
+ /* Mantisa + implicit bit */
+ const ulong mantissa = (as_ulong(x) & 0xfffffffffffff) | (1UL << 52);
+ const int exp = (as_ulong(x) >> 52 & 0x7ff) - 1023;
+ int shift = 42;
+ if (exp < -14) {
+ /* The default assumes lower 13 bits are rounded,
+ * but it might be more for denormals.
+ * Shifting beyond last == 0b, and qr == 00b is not necessary */
+ shift += min(-(exp + 14), 15);
+ }
+ ulong mask = (1UL << shift) - 1UL;
+ const ulong grs = mantissa & mask;
+ const ulong last = mantissa & (1UL << shift);
+ /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1.
+ * exp > 15 should round to inf. */
+ bool roundup = (grs > (1UL << (shift - 1UL))) ||
+ (grs == (1UL << (shift - 1UL)) && last != 0) || (exp > 15);
+ return roundup ? __clc_rti(x) : __clc_rtz(x);
+}
#endif
#define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
__FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \
__FUNC(SUFFIX ## _rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz) \
__FUNC(SUFFIX ## _rtn, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtn) \
- __FUNC(SUFFIX ## _rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp)
+ __FUNC(SUFFIX ## _rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp) \
+ __FUNC(SUFFIX ## _rte, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rte)
#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
__XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/libclc.git
More information about the Pkg-opencl-commits
mailing list