[libclc] 55/92: shared: Implement aligned vector stores (vstorea_half)

Andreas Boll aboll-guest at moszumanska.debian.org
Mon Nov 6 15:12:01 UTC 2017


This is an automated email from the git hooks/post-receive script.

aboll-guest pushed a commit to branch master
in repository libclc.

commit fabea1e9468119b1671fb5fd09df354a7c306658
Author: Jan Vesely <jan.vesely at rutgers.edu>
Date:   Sun Oct 22 14:21:59 2017 +0000

    shared: Implement aligned vector stores (vstorea_half)
    
    Float version passes newly posted piglit tests on turks, float and double pass on carrizo.
    v2: scalar vstorea_half
    v3: fix typo
    
    Reviewer: Aaron Watry
    Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
    
    git-svn-id: https://llvm.org/svn/llvm-project/libclc/trunk@316291 91177308-0d34-0410-b5e6-96231b3b80d8
---
 generic/include/clc/shared/vstore.h | 41 +++++++++++++++++++++++++------------
 generic/lib/shared/vstore.cl        | 30 ++++++++++++++-------------
 generic/lib/shared/vstore_half.inc  | 21 +++++++++++++------
 3 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/generic/include/clc/shared/vstore.h b/generic/include/clc/shared/vstore.h
index 0e3f694..a246d52 100644
--- a/generic/include/clc/shared/vstore.h
+++ b/generic/include/clc/shared/vstore.h
@@ -16,37 +16,52 @@
 #define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
   _CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE) \
 
-#define _CLC_VECTOR_VSTORE_PRIM() \
-    _CLC_VECTOR_VSTORE_PRIM1(char) \
-    _CLC_VECTOR_VSTORE_PRIM1(uchar) \
-    _CLC_VECTOR_VSTORE_PRIM1(short) \
-    _CLC_VECTOR_VSTORE_PRIM1(ushort) \
-    _CLC_VECTOR_VSTORE_PRIM1(int) \
-    _CLC_VECTOR_VSTORE_PRIM1(uint) \
-    _CLC_VECTOR_VSTORE_PRIM1(long) \
-    _CLC_VECTOR_VSTORE_PRIM1(ulong) \
-    _CLC_VECTOR_VSTORE_PRIM1(float) \
-    _CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
+_CLC_VECTOR_VSTORE_PRIM1(char)
+_CLC_VECTOR_VSTORE_PRIM1(uchar)
+_CLC_VECTOR_VSTORE_PRIM1(short)
+_CLC_VECTOR_VSTORE_PRIM1(ushort)
+_CLC_VECTOR_VSTORE_PRIM1(int)
+_CLC_VECTOR_VSTORE_PRIM1(uint)
+_CLC_VECTOR_VSTORE_PRIM1(long)
+_CLC_VECTOR_VSTORE_PRIM1(ulong)
+_CLC_VECTOR_VSTORE_PRIM1(float)
+_CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
+// Use suffix to declare aligned vstorea_halfN
+_CLC_VECTOR_VSTORE_PRIM3(a_half, half, float)
 
 #ifdef cl_khr_fp64
   _CLC_VECTOR_VSTORE_PRIM1(double)
   _CLC_VECTOR_VSTORE_PRIM3(_half, half, double)
+  // Use suffix to declare aligned vstorea_halfN
+  _CLC_VECTOR_VSTORE_PRIM3(a_half, half, double)
+
+  // Scalar vstore_half also needs to be declared
   _CLC_VSTORE_DECL(_half, half, double, , __private)
   _CLC_VSTORE_DECL(_half, half, double, , __local)
   _CLC_VSTORE_DECL(_half, half, double, , __global)
+
+  // Scalar vstorea_half is not part of the specs but CTS expects it
+  _CLC_VSTORE_DECL(a_half, half, double, , __private)
+  _CLC_VSTORE_DECL(a_half, half, double, , __local)
+  _CLC_VSTORE_DECL(a_half, half, double, , __global)
 #endif
 
 #ifdef cl_khr_fp16
   _CLC_VECTOR_VSTORE_PRIM1(half)
 #endif
 
-_CLC_VECTOR_VSTORE_PRIM()
+// Scalar vstore_half also needs to be declared
 _CLC_VSTORE_DECL(_half, half, float, , __private)
 _CLC_VSTORE_DECL(_half, half, float, , __local)
 _CLC_VSTORE_DECL(_half, half, float, , __global)
 
+// Scalar vstorea_half is not part of the specs but CTS expects it
+_CLC_VSTORE_DECL(a_half, half, float, , __private)
+_CLC_VSTORE_DECL(a_half, half, float, , __local)
+_CLC_VSTORE_DECL(a_half, half, float, , __global)
+
+
 #undef _CLC_VSTORE_DECL
 #undef _CLC_VECTOR_VSTORE_DECL
 #undef _CLC_VECTOR_VSTORE_PRIM3
 #undef _CLC_VECTOR_VSTORE_PRIM1
-#undef _CLC_VECTOR_VSTORE_PRIM
diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
index 3343c16..e5383a8 100644
--- a/generic/lib/shared/vstore.cl
+++ b/generic/lib/shared/vstore.cl
@@ -33,23 +33,22 @@
     VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
     VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
 
-#define VSTORE_TYPES() \
-    VSTORE_ADDR_SPACES(char) \
-    VSTORE_ADDR_SPACES(uchar) \
-    VSTORE_ADDR_SPACES(short) \
-    VSTORE_ADDR_SPACES(ushort) \
-    VSTORE_ADDR_SPACES(int) \
-    VSTORE_ADDR_SPACES(uint) \
-    VSTORE_ADDR_SPACES(long) \
-    VSTORE_ADDR_SPACES(ulong) \
-    VSTORE_ADDR_SPACES(float) \
+VSTORE_ADDR_SPACES(char)
+VSTORE_ADDR_SPACES(uchar)
+VSTORE_ADDR_SPACES(short)
+VSTORE_ADDR_SPACES(ushort)
+VSTORE_ADDR_SPACES(int)
+VSTORE_ADDR_SPACES(uint)
+VSTORE_ADDR_SPACES(long)
+VSTORE_ADDR_SPACES(ulong)
+VSTORE_ADDR_SPACES(float)
 
-VSTORE_TYPES()
 
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
     VSTORE_ADDR_SPACES(double)
 #endif
+
 #ifdef cl_khr_fp16
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
     VSTORE_ADDR_SPACES(half)
@@ -95,13 +94,17 @@ DECLARE_HELPER(double, __local, __builtin_store_half);
 	VEC_STORE8(STYPE, AS, val.lo) \
 	VEC_STORE8(STYPE, AS, val.hi)
 
-#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \
+#define __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
   _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \
     offset *= VEC_SIZE; \
     VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
+  } \
+  _CLC_OVERLOAD _CLC_DEF void vstorea_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \
+    offset *= OFFSET; \
+    VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
   }
 
-#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS)
+#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS)
 
 #define __CLC_BODY "vstore_half.inc"
 #include <clc/math/gentype.inc>
@@ -115,6 +118,5 @@ DECLARE_HELPER(double, __local, __builtin_store_half);
 #undef VEC_LOAD2
 #undef VEC_LOAD1
 #undef DECLARE_HELPER
-#undef VSTORE_TYPES
 #undef VSTORE_ADDR_SPACES
 #undef VSTORE_VECTORIZE
diff --git a/generic/lib/shared/vstore_half.inc b/generic/lib/shared/vstore_half.inc
index fee52bc..ee4e38b 100644
--- a/generic/lib/shared/vstore_half.inc
+++ b/generic/lib/shared/vstore_half.inc
@@ -1,10 +1,19 @@
 
 #ifdef __CLC_VECSIZE
-  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
-  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
-  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
+
+#if __CLC_VECSIZE == 3
+#  define __CLC_OFFSET 4
+#else
+#  define __CLC_OFFSET __CLC_VECSIZE
+#endif
+
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
+
+#undef __CLC_OFFSET
 #else
-  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
-  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
-  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
+  FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
+  FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
+  FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
 #endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/libclc.git



More information about the Pkg-opencl-commits mailing list