[libclc] 54/92: shared: Implement aligned vector loads (vloada_half)
Andreas Boll
aboll-guest at moszumanska.debian.org
Mon Nov 6 15:12:01 UTC 2017
This is an automated email from the git hooks/post-receive script.
aboll-guest pushed a commit to branch master
in repository libclc.
commit c3e385334ac47ebe296a36a56e72e3416033a52b
Author: Jan Vesely <jan.vesely at rutgers.edu>
Date: Sun Oct 22 14:21:56 2017 +0000
shared: Implement aligned vector loads (vloada_half)
Passes newly posted piglits on turks and carrizo
v2: add scalar vloada_half
v3: fix typo
Reviewer: Aaron Watry
Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@316290 91177308-0d34-0410-b5e6-96231b3b80d8
---
generic/include/clc/shared/vload.h | 40 ++++++++++++++++++++++----------------
generic/lib/shared/vload.cl | 10 ++++++++--
generic/lib/shared/vload_half.inc | 26 +++++++++++++++++--------
3 files changed, 49 insertions(+), 27 deletions(-)
diff --git a/generic/include/clc/shared/vload.h b/generic/include/clc/shared/vload.h
index 8c262dd..c0b066a 100644
--- a/generic/include/clc/shared/vload.h
+++ b/generic/include/clc/shared/vload.h
@@ -12,22 +12,24 @@
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
_CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
- _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
+ _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
#define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
- _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
-
-#define _CLC_VECTOR_VLOAD_PRIM() \
- _CLC_VECTOR_VLOAD_PRIM1(char) \
- _CLC_VECTOR_VLOAD_PRIM1(uchar) \
- _CLC_VECTOR_VLOAD_PRIM1(short) \
- _CLC_VECTOR_VLOAD_PRIM1(ushort) \
- _CLC_VECTOR_VLOAD_PRIM1(int) \
- _CLC_VECTOR_VLOAD_PRIM1(uint) \
- _CLC_VECTOR_VLOAD_PRIM1(long) \
- _CLC_VECTOR_VLOAD_PRIM1(ulong) \
- _CLC_VECTOR_VLOAD_PRIM1(float) \
- _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
+ _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
+
+// Declare vector load prototypes
+_CLC_VECTOR_VLOAD_PRIM1(char)
+_CLC_VECTOR_VLOAD_PRIM1(uchar)
+_CLC_VECTOR_VLOAD_PRIM1(short)
+_CLC_VECTOR_VLOAD_PRIM1(ushort)
+_CLC_VECTOR_VLOAD_PRIM1(int)
+_CLC_VECTOR_VLOAD_PRIM1(uint)
+_CLC_VECTOR_VLOAD_PRIM1(long)
+_CLC_VECTOR_VLOAD_PRIM1(ulong)
+_CLC_VECTOR_VLOAD_PRIM1(float)
+_CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
+// Use suffix to declare aligned vloada_halfN
+_CLC_VECTOR_VLOAD_PRIM3(a_half, half, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64: enable
@@ -38,15 +40,19 @@
_CLC_VECTOR_VLOAD_PRIM1(half)
#endif
-_CLC_VECTOR_VLOAD_PRIM()
-// Plain vload_half also needs to be declared
+// Scalar vload_half also needs to be declared
_CLC_VLOAD_DECL(_half, half, float, , __constant)
_CLC_VLOAD_DECL(_half, half, float, , __global)
_CLC_VLOAD_DECL(_half, half, float, , __local)
_CLC_VLOAD_DECL(_half, half, float, , __private)
+// Scalar vloada_half is not part of the specs but CTS expects it
+_CLC_VLOAD_DECL(a_half, half, float, , __constant)
+_CLC_VLOAD_DECL(a_half, half, float, , __global)
+_CLC_VLOAD_DECL(a_half, half, float, , __local)
+_CLC_VLOAD_DECL(a_half, half, float, , __private)
+
#undef _CLC_VLOAD_DECL
#undef _CLC_VECTOR_VLOAD_DECL
#undef _CLC_VECTOR_VLOAD_PRIM3
#undef _CLC_VECTOR_VLOAD_PRIM1
-#undef _CLC_VECTOR_VLOAD_PRIM
diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
index 0892270..9c37fcf 100644
--- a/generic/lib/shared/vload.cl
+++ b/generic/lib/shared/vload.cl
@@ -85,15 +85,21 @@ float __clc_vload_half_float_helper__private(const __private half *);
VEC_LOAD8(val.lo, AS) \
VEC_LOAD8(val.hi, AS)
-#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
+#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
_CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \
offset *= VEC_SIZE; \
TYPE __tmp; \
VEC_LOAD##VEC_SIZE(__tmp, AS) \
return __tmp; \
+ } \
+ _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS half *mem) { \
+ offset *= OFFSET_SIZE; \
+ TYPE __tmp; \
+ VEC_LOAD##VEC_SIZE(__tmp, AS) \
+ return __tmp; \
}
-#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)
+#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS)
#define __CLC_BODY "vload_half.inc"
#include <clc/math/gentype.inc>
diff --git a/generic/lib/shared/vload_half.inc b/generic/lib/shared/vload_half.inc
index 00dae8a..11b2bf7 100644
--- a/generic/lib/shared/vload_half.inc
+++ b/generic/lib/shared/vload_half.inc
@@ -1,13 +1,23 @@
#if __CLC_FPSIZE == 32
+
#ifdef __CLC_VECSIZE
- FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
- FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
- FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
- FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
+
+#if __CLC_VECSIZE == 3
+# define __CLC_OFFSET 4
#else
- FUNC(, 1, __CLC_GENTYPE, __private);
- FUNC(, 1, __CLC_GENTYPE, __local);
- FUNC(, 1, __CLC_GENTYPE, __global);
- FUNC(, 1, __CLC_GENTYPE, __constant);
+# define __CLC_OFFSET __CLC_VECSIZE
+#endif
+
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __private);
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local);
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global);
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant);
+
+#undef __CLC_OFFSET
+#else
+ FUNC(, 1, 1, __CLC_GENTYPE, __private);
+ FUNC(, 1, 1, __CLC_GENTYPE, __local);
+ FUNC(, 1, 1, __CLC_GENTYPE, __global);
+ FUNC(, 1, 1, __CLC_GENTYPE, __constant);
#endif
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/libclc.git
More information about the Pkg-opencl-commits
mailing list