[beignet] 04/04: Add LLVM 3.9 support
Rebecca Palmer
rnpalmer-guest at moszumanska.debian.org
Wed Sep 28 20:27:09 UTC 2016
This is an automated email from the git hooks/post-receive script.
rnpalmer-guest pushed a commit to branch master
in repository beignet.
commit de51a3e9ceed565af2d47eceaea4b6ed635c863c
Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
Date: Wed Sep 28 21:24:56 2016 +0100
Add LLVM 3.9 support
---
debian/changelog | 1 +
debian/control | 3 +
debian/patches/llvm39-support.patch | 1337 +++++++++++++++++++++++++++++++++++
debian/patches/series | 1 +
4 files changed, 1342 insertions(+)
diff --git a/debian/changelog b/debian/changelog
index b4e067f..9dd42f8 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,7 @@ beignet (1.2.0-2) UNRELEASED; urgency=medium
* Fix FTBFS on kfreebsd-*.
* Make tests work with Python 3 and switch back to it.
* Add pow-powr-tests.patch.
+ * Add LLVM 3.9 support.
-- Rebecca N. Palmer <rebecca_palmer at zoho.com> Sun, 25 Sep 2016 21:58:07 +0100
diff --git a/debian/control b/debian/control
index ae8a383..9dfd827 100644
--- a/debian/control
+++ b/debian/control
@@ -14,6 +14,9 @@ Build-Depends:
clang-3.8,
libclang-3.8-dev,
llvm-3.8-dev,
+ clang-3.9 [linux-any],
+ libclang-3.9-dev [linux-any],
+ llvm-3.9-dev [linux-any],
ocl-icd-dev,
ocl-icd-opencl-dev,
pkg-config,
diff --git a/debian/patches/llvm39-support.patch b/debian/patches/llvm39-support.patch
new file mode 100644
index 0000000..deb2414
--- /dev/null
+++ b/debian/patches/llvm39-support.patch
@@ -0,0 +1,1337 @@
+Description: Support LLVM 3.9
+
+Origin: (mostly) upstream 6ebe485...0056da7
+Author: Pan Xiuli, Rebecca Palmer
+
+--- a/CMake/FindLLVM.cmake
++++ b/CMake/FindLLVM.cmake
+@@ -8,12 +8,12 @@
+ # LLVM_FOUND - True if llvm found.
+ if (LLVM_INSTALL_DIR)
+ find_program(LLVM_CONFIG_EXECUTABLE
+- NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
++ NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
+ DOC "llvm-config executable"
+ PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH)
+ else (LLVM_INSTALL_DIR)
+ find_program(LLVM_CONFIG_EXECUTABLE
+- NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
++ NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
+ DOC "llvm-config executable")
+ endif (LLVM_INSTALL_DIR)
+
+--- a/backend/src/backend/gen_program.cpp
++++ b/backend/src/backend/gen_program.cpp
+@@ -334,7 +334,11 @@ namespace gbe {
+ //the first byte stands for binary_type.
+ binary_content.assign(binary+1, size-1);
+ llvm::StringRef llvm_bin_str(binary_content);
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ llvm::LLVMContext& c = GBEGetLLVMContext();
++#else
+ llvm::LLVMContext& c = llvm::getGlobalContext();
++#endif
+ llvm::SMDiagnostic Err;
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+ std::unique_ptr<llvm::MemoryBuffer> memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+@@ -488,10 +492,17 @@ namespace gbe {
+ #endif
+ errSize = 0;
+ }else{
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ // Src now will be removed automatically. So clone it.
++ llvm::Module* src = llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module).release();
++#else
+ llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module;
++#endif
+ llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module;
+
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ if (LLVMLinkModules2(wrap(dst), wrap(src))) {
++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
+ if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource_Removed, &errMsg)) {
+ #else
+ if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource, &errMsg)) {
+--- a/backend/src/backend/program.cpp
++++ b/backend/src/backend/program.cpp
+@@ -133,7 +133,13 @@ namespace gbe {
+ bool strictMath = true;
+ if (fast_relaxed_math || !OCL_STRICT_CONFORMANCE)
+ strictMath = false;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ llvm::Module * linked_module = module ? llvm::CloneModule((llvm::Module*)module).release() : NULL;
++ // Src now will be removed automatically. So clone it.
++ if (llvmToGen(*unit, fileName, linked_module, optLevel, strictMath, OCL_PROFILING_LOG, error) == false) {
++#else
+ if (llvmToGen(*unit, fileName, module, optLevel, strictMath, OCL_PROFILING_LOG, error) == false) {
++#endif
+ if (fileName)
+ error = std::string(fileName) + " not found";
+ delete unit;
+@@ -1057,7 +1063,11 @@ EXTEND_QUOTE:
+ //FIXME: if use new allocated context to link two modules there would be context mismatch
+ //for some functions, so we use global context now, need switch to new context later.
+ llvm::Module * out_module;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ llvm::LLVMContext* llvm_ctx = &GBEGetLLVMContext();
++#else
+ llvm::LLVMContext* llvm_ctx = &llvm::getGlobalContext();
++#endif
+
+ if (buildModuleFromSource(source, &out_module, llvm_ctx, dumpLLVMFileName, dumpSPIRBinaryName, clOpt,
+ stringSize, err, errSize)) {
+--- a/backend/src/ir/function.hpp
++++ b/backend/src/ir/function.hpp
+@@ -208,22 +208,22 @@ namespace ir {
+ }
+ #else
+ bool isImage1dT() const {
+- return typeBaseName.compare("image1d_t") == 0;
++ return typeBaseName.find("image1d_t") !=std::string::npos;
+ }
+ bool isImage1dArrayT() const {
+- return typeBaseName.compare("image1d_array_t") == 0;
++ return typeBaseName.find("image1d_array_t") !=std::string::npos;
+ }
+ bool isImage1dBufferT() const {
+- return typeBaseName.compare("image1d_buffer_t") == 0;
++ return typeBaseName.find("image1d_buffer_t") !=std::string::npos;
+ }
+ bool isImage2dT() const {
+- return typeBaseName.compare("image2d_t") == 0;
++ return typeBaseName.find("image2d_t") !=std::string::npos;
+ }
+ bool isImage2dArrayT() const {
+- return typeBaseName.compare("image2d_array_t") == 0;
++ return typeBaseName.find("image2d_array_t") !=std::string::npos;
+ }
+ bool isImage3dT() const {
+- return typeBaseName.compare("image3d_t") == 0;
++ return typeBaseName.find("image3d_t") !=std::string::npos;
+ }
+ bool isSamplerType() const {
+ return typeBaseName.compare("sampler_t") == 0;
+--- a/backend/src/libocl/include/ocl.h
++++ b/backend/src/libocl/include/ocl.h
+@@ -18,6 +18,67 @@
+ #ifndef __OCL_H__
+ #define __OCL_H__
+
++/* LLVM 3.9 has these pre defined undef them first */
++#ifdef cl_khr_3d_image_writes
++#undef cl_khr_3d_image_writes
++#endif
++#ifdef cl_khr_byte_addressable_store
++#undef cl_khr_byte_addressable_store
++#endif
++#ifdef cl_khr_fp16
++#undef cl_khr_fp16
++#endif
++#ifdef cl_khr_fp64
++#undef cl_khr_fp64
++#endif
++#ifdef cl_khr_global_int32_base_atomics
++#undef cl_khr_global_int32_base_atomics
++#endif
++#ifdef cl_khr_global_int32_extended_atomics
++#undef cl_khr_global_int32_extended_atomics
++#endif
++#ifdef cl_khr_gl_sharing
++#undef cl_khr_gl_sharing
++#endif
++#ifdef cl_khr_icd
++#undef cl_khr_icd
++#endif
++#ifdef cl_khr_local_int32_base_atomics
++#undef cl_khr_local_int32_base_atomics
++#endif
++#ifdef cl_khr_local_int32_extended_atomics
++#undef cl_khr_local_int32_extended_atomics
++#endif
++
++#ifdef cl_khr_d3d10_sharing
++#undef cl_khr_d3d10_sharing
++#endif
++#ifdef cl_khr_gl_event
++#undef cl_khr_gl_event
++#endif
++#ifdef cl_khr_int64_base_atomics
++#undef cl_khr_int64_base_atomics
++#endif
++#ifdef cl_khr_int64_extended_atomics
++#undef cl_khr_int64_extended_atomics
++#endif
++
++#ifdef cl_khr_d3d11_sharing
++#undef cl_khr_d3d11_sharing
++#endif
++#ifdef cl_khr_depth_images
++#undef cl_khr_depth_images
++#endif
++#ifdef cl_khr_dx9_media_sharing
++#undef cl_khr_dx9_media_sharing
++#endif
++#ifdef cl_khr_gl_depth_images
++#undef cl_khr_gl_depth_images
++#endif
++#ifdef cl_khr_spir
++#undef cl_khr_spir
++#endif
++
+ #include "ocl_defines.h"
+ #include "ocl_types.h"
+ #include "ocl_as.h"
+@@ -40,6 +101,20 @@
+ #include "ocl_workitem.h"
+ #include "ocl_simd.h"
+ #include "ocl_work_group.h"
++
++/* Move these out from ocl_defines.h for only one define */
++#define cl_khr_global_int32_base_atomics
++#define cl_khr_global_int32_extended_atomics
++#define cl_khr_local_int32_base_atomics
++#define cl_khr_local_int32_extended_atomics
++#define cl_khr_byte_addressable_store
++#define cl_khr_icd
++#define cl_khr_gl_sharing
++#define cl_khr_spir
++#define cl_khr_fp16
++#define cl_khr_3d_image_writes
++#define cl_intel_subgroups
++
+ #pragma OPENCL EXTENSION cl_khr_fp64 : disable
+ #pragma OPENCL EXTENSION cl_khr_fp16 : disable
+ #endif
+--- a/backend/src/libocl/include/ocl_image.h
++++ b/backend/src/libocl/include/ocl_image.h
+@@ -20,152 +20,189 @@
+
+ #include "ocl_types.h"
+
+-OVERLOADABLE int4 read_imagei(image1d_t cl_image, const sampler_t sampler, int coord);
+-OVERLOADABLE int4 read_imagei(image1d_t cl_image, const sampler_t sampler, float coord);
+-OVERLOADABLE int4 read_imagei(image1d_t cl_image, int coord);
+-OVERLOADABLE void write_imagei(image1d_t cl_image, int coord, int4 color);
+-OVERLOADABLE void write_imagei(image1d_t cl_image, float coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image1d_t cl_image, const sampler_t sampler, int coord);
+-OVERLOADABLE uint4 read_imageui(image1d_t cl_image, const sampler_t sampler, float coord);
+-OVERLOADABLE uint4 read_imageui(image1d_t cl_image, int coord);
+-OVERLOADABLE void write_imageui(image1d_t cl_image, int coord, uint4 color);
+-OVERLOADABLE void write_imageui(image1d_t cl_image, float coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image1d_t cl_image, const sampler_t sampler, int coord);
+-OVERLOADABLE float4 read_imagef(image1d_t cl_image, const sampler_t sampler, float coord);
+-OVERLOADABLE float4 read_imagef(image1d_t cl_image, int coord);
+-OVERLOADABLE void write_imagef(image1d_t cl_image, int coord, float4 color);
+-OVERLOADABLE void write_imagef(image1d_t cl_image, float coord, float4 color);
+-OVERLOADABLE int4 read_imagei(image1d_buffer_t cl_image, int coord);
+-OVERLOADABLE void write_imagei(image1d_buffer_t cl_image, int coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image1d_buffer_t cl_image, int coord);
+-OVERLOADABLE void write_imageui(image1d_buffer_t cl_image, int coord, uint4 color);
+-OVERLOADABLE void write_imageui(image1d_buffer_t cl_image, float coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image1d_buffer_t cl_image, int coord);
+-OVERLOADABLE void write_imagef(image1d_buffer_t cl_image, int coord, float4 color);
+-
+-OVERLOADABLE int get_image_channel_data_type(image1d_t image);
+-OVERLOADABLE int get_image_channel_order(image1d_t image);
+-OVERLOADABLE int get_image_width(image1d_t image);
+-OVERLOADABLE int get_image_channel_data_type(image1d_buffer_t image);
+-OVERLOADABLE int get_image_channel_order(image1d_buffer_t image);
+-OVERLOADABLE int get_image_width(image1d_buffer_t image);
+-OVERLOADABLE int4 read_imagei(image2d_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE int4 read_imagei(image2d_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE int4 read_imagei(image2d_t cl_image, int2 coord);
+-OVERLOADABLE void write_imagei(image2d_t cl_image, int2 coord, int4 color);
+-OVERLOADABLE void write_imagei(image2d_t cl_image, float2 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image2d_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_t cl_image, int2 coord);
+-OVERLOADABLE void write_imageui(image2d_t cl_image, int2 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image2d_t cl_image, float2 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image2d_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE float4 read_imagef(image2d_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE float4 read_imagef(image2d_t cl_image, int2 coord);
+-OVERLOADABLE void write_imagef(image2d_t cl_image, int2 coord, float4 color);
+-OVERLOADABLE void write_imagef(image2d_t cl_image, float2 coord, float4 color);
+-OVERLOADABLE int4 read_imagei(image1d_array_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE int4 read_imagei(image1d_array_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE int4 read_imagei(image1d_array_t cl_image, int2 coord);
+-OVERLOADABLE void write_imagei(image1d_array_t cl_image, int2 coord, int4 color);
+-OVERLOADABLE void write_imagei(image1d_array_t cl_image, float2 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image1d_array_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE uint4 read_imageui(image1d_array_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE uint4 read_imageui(image1d_array_t cl_image, int2 coord);
+-OVERLOADABLE void write_imageui(image1d_array_t cl_image, int2 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image1d_array_t cl_image, float2 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image1d_array_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE float4 read_imagef(image1d_array_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE float4 read_imagef(image1d_array_t cl_image, int2 coord);
+-OVERLOADABLE void write_imagef(image1d_array_t cl_image, int2 coord, float4 color);
+-OVERLOADABLE void write_imagef(image1d_array_t cl_image, float2 coord, float4 color);
+-
+-OVERLOADABLE int get_image_channel_data_type(image2d_t image);
+-OVERLOADABLE int get_image_channel_order(image2d_t image);
+-OVERLOADABLE int get_image_width(image2d_t image);
+-OVERLOADABLE int get_image_height(image2d_t image);
+-OVERLOADABLE int2 get_image_dim(image2d_t image);
+-
+-OVERLOADABLE int get_image_channel_data_type(image1d_array_t image);
+-OVERLOADABLE int get_image_channel_order(image1d_array_t image);
+-OVERLOADABLE int get_image_width(image1d_array_t image);
+-OVERLOADABLE size_t get_image_array_size(image1d_array_t image);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, int4 coord);
+-OVERLOADABLE void write_imagei(image3d_t cl_image, int4 coord, int4 color);
+-OVERLOADABLE void write_imagei(image3d_t cl_image, float4 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, int4 coord);
+-OVERLOADABLE void write_imageui(image3d_t cl_image, int4 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image3d_t cl_image, float4 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, int4 coord);
+-OVERLOADABLE void write_imagef(image3d_t cl_image, int4 coord, float4 color);
+-OVERLOADABLE void write_imagef(image3d_t cl_image, float4 coord, float4 color);
+-
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, int3 coord);
+-OVERLOADABLE void write_imagei(image3d_t cl_image, int3 coord, int4 color);
+-OVERLOADABLE void write_imagei(image3d_t cl_image, float3 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, int3 coord);
+-OVERLOADABLE void write_imageui(image3d_t cl_image, int3 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image3d_t cl_image, float3 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, int3 coord);
+-OVERLOADABLE void write_imagef(image3d_t cl_image, int3 coord, float4 color);
+-OVERLOADABLE void write_imagef(image3d_t cl_image, float3 coord, float4 color);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, int4 coord);
+-OVERLOADABLE void write_imagei(image2d_array_t cl_image, int4 coord, int4 color);
+-OVERLOADABLE void write_imagei(image2d_array_t cl_image, float4 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, int4 coord);
+-OVERLOADABLE void write_imageui(image2d_array_t cl_image, int4 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image2d_array_t cl_image, float4 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, int4 coord);
+-OVERLOADABLE void write_imagef(image2d_array_t cl_image, int4 coord, float4 color);
+-OVERLOADABLE void write_imagef(image2d_array_t cl_image, float4 coord, float4 color);
+-
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, int3 coord);
+-OVERLOADABLE void write_imagei(image2d_array_t cl_image, int3 coord, int4 color);
+-OVERLOADABLE void write_imagei(image2d_array_t cl_image, float3 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, int3 coord);
+-OVERLOADABLE void write_imageui(image2d_array_t cl_image, int3 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image2d_array_t cl_image, float3 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, int3 coord);
+-OVERLOADABLE void write_imagef(image2d_array_t cl_image, int3 coord, float4 color);
+-OVERLOADABLE void write_imagef(image2d_array_t cl_image, float3 coord, float4 color);
+-
+-OVERLOADABLE int get_image_channel_data_type(image3d_t image);
+-OVERLOADABLE int get_image_channel_order(image3d_t image);
+-OVERLOADABLE int get_image_width(image3d_t image);
+-OVERLOADABLE int get_image_height(image3d_t image);
+-OVERLOADABLE int get_image_depth(image3d_t image);
+-OVERLOADABLE int4 get_image_dim(image3d_t image);
+-
+-
+-OVERLOADABLE int get_image_channel_data_type(image2d_array_t image);
+-OVERLOADABLE int get_image_channel_order(image2d_array_t image);
+-OVERLOADABLE int get_image_width(image2d_array_t image);
+-OVERLOADABLE int get_image_height(image2d_array_t image);
+-OVERLOADABLE int2 get_image_dim(image2d_array_t image);
+-OVERLOADABLE size_t get_image_array_size(image2d_array_t image);
++OVERLOADABLE int4 read_imagei(read_only image1d_t cl_image, const sampler_t sampler, int coord);
++OVERLOADABLE int4 read_imagei(read_only image1d_t cl_image, const sampler_t sampler, float coord);
++OVERLOADABLE int4 read_imagei(read_only image1d_t cl_image, int coord);
++OVERLOADABLE void write_imagei(write_only image1d_t cl_image, int coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image1d_t cl_image, float coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image1d_t cl_image, const sampler_t sampler, int coord);
++OVERLOADABLE uint4 read_imageui(read_only image1d_t cl_image, const sampler_t sampler, float coord);
++OVERLOADABLE uint4 read_imageui(read_only image1d_t cl_image, int coord);
++OVERLOADABLE void write_imageui(write_only image1d_t cl_image, int coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image1d_t cl_image, float coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image1d_t cl_image, const sampler_t sampler, int coord);
++OVERLOADABLE float4 read_imagef(read_only image1d_t cl_image, const sampler_t sampler, float coord);
++OVERLOADABLE float4 read_imagef(read_only image1d_t cl_image, int coord);
++OVERLOADABLE void write_imagef(write_only image1d_t cl_image, int coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image1d_t cl_image, float coord, float4 color);
++OVERLOADABLE int4 read_imagei(read_only image1d_buffer_t cl_image, int coord);
++OVERLOADABLE void write_imagei(write_only image1d_buffer_t cl_image, int coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image1d_buffer_t cl_image, int coord);
++OVERLOADABLE void write_imageui(write_only image1d_buffer_t cl_image, int coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image1d_buffer_t cl_image, float coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image1d_buffer_t cl_image, int coord);
++OVERLOADABLE void write_imagef(write_only image1d_buffer_t cl_image, int coord, float4 color);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image1d_t image);
++OVERLOADABLE int get_image_channel_order(read_only image1d_t image);
++OVERLOADABLE int get_image_width(read_only image1d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image1d_buffer_t image);
++OVERLOADABLE int get_image_channel_order(read_only image1d_buffer_t image);
++OVERLOADABLE int get_image_width(read_only image1d_buffer_t image);
++
++OVERLOADABLE int4 read_imagei(read_only image2d_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_t cl_image, int2 coord);
++OVERLOADABLE void write_imagei(write_only image2d_t cl_image, int2 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image2d_t cl_image, float2 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image2d_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_t cl_image, int2 coord);
++OVERLOADABLE void write_imageui(write_only image2d_t cl_image, int2 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image2d_t cl_image, float2 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image2d_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_t cl_image, int2 coord);
++OVERLOADABLE void write_imagef(write_only image2d_t cl_image, int2 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image2d_t cl_image, float2 coord, float4 color);
++OVERLOADABLE int4 read_imagei(read_only image1d_array_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE int4 read_imagei(read_only image1d_array_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE int4 read_imagei(read_only image1d_array_t cl_image, int2 coord);
++OVERLOADABLE void write_imagei(write_only image1d_array_t cl_image, int2 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image1d_array_t cl_image, float2 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image1d_array_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE uint4 read_imageui(read_only image1d_array_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE uint4 read_imageui(read_only image1d_array_t cl_image, int2 coord);
++OVERLOADABLE void write_imageui(write_only image1d_array_t cl_image, int2 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image1d_array_t cl_image, float2 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image1d_array_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE float4 read_imagef(read_only image1d_array_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE float4 read_imagef(read_only image1d_array_t cl_image, int2 coord);
++OVERLOADABLE void write_imagef(write_only image1d_array_t cl_image, int2 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image1d_array_t cl_image, float2 coord, float4 color);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image2d_t image);
++OVERLOADABLE int get_image_channel_order(read_only image2d_t image);
++OVERLOADABLE int get_image_width(read_only image2d_t image);
++OVERLOADABLE int get_image_height(read_only image2d_t image);
++OVERLOADABLE int2 get_image_dim(read_only image2d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image1d_array_t image);
++OVERLOADABLE int get_image_channel_order(read_only image1d_array_t image);
++OVERLOADABLE int get_image_width(read_only image1d_array_t image);
++OVERLOADABLE size_t get_image_array_size(read_only image1d_array_t image);
++
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, int4 coord);
++OVERLOADABLE void write_imagei(write_only image3d_t cl_image, int4 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image3d_t cl_image, float4 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, int4 coord);
++OVERLOADABLE void write_imageui(write_only image3d_t cl_image, int4 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image3d_t cl_image, float4 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, int4 coord);
++OVERLOADABLE void write_imagef(write_only image3d_t cl_image, int4 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image3d_t cl_image, float4 coord, float4 color);
++
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, int3 coord);
++OVERLOADABLE void write_imagei(write_only image3d_t cl_image, int3 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image3d_t cl_image, float3 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, int3 coord);
++OVERLOADABLE void write_imageui(write_only image3d_t cl_image, int3 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image3d_t cl_image, float3 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, int3 coord);
++OVERLOADABLE void write_imagef(write_only image3d_t cl_image, int3 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image3d_t cl_image, float3 coord, float4 color);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, int4 coord);
++OVERLOADABLE void write_imagei(write_only image2d_array_t cl_image, int4 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image2d_array_t cl_image, float4 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, int4 coord);
++OVERLOADABLE void write_imageui(write_only image2d_array_t cl_image, int4 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image2d_array_t cl_image, float4 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, int4 coord);
++OVERLOADABLE void write_imagef(write_only image2d_array_t cl_image, int4 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image2d_array_t cl_image, float4 coord, float4 color);
++
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, int3 coord);
++OVERLOADABLE void write_imagei(write_only image2d_array_t cl_image, int3 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image2d_array_t cl_image, float3 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, int3 coord);
++OVERLOADABLE void write_imageui(write_only image2d_array_t cl_image, int3 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image2d_array_t cl_image, float3 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, int3 coord);
++OVERLOADABLE void write_imagef(write_only image2d_array_t cl_image, int3 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image2d_array_t cl_image, float3 coord, float4 color);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image3d_t image);
++OVERLOADABLE int get_image_channel_order(read_only image3d_t image);
++OVERLOADABLE int get_image_width(read_only image3d_t image);
++OVERLOADABLE int get_image_height(read_only image3d_t image);
++OVERLOADABLE int get_image_depth(read_only image3d_t image);
++OVERLOADABLE int4 get_image_dim(read_only image3d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image2d_array_t image);
++OVERLOADABLE int get_image_channel_order(read_only image2d_array_t image);
++OVERLOADABLE int get_image_width(read_only image2d_array_t image);
++OVERLOADABLE int get_image_height(read_only image2d_array_t image);
++OVERLOADABLE int2 get_image_dim(read_only image2d_array_t image);
++OVERLOADABLE size_t get_image_array_size(read_only image2d_array_t image);
++
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE int get_image_channel_data_type(write_only image1d_t image);
++OVERLOADABLE int get_image_channel_order(write_only image1d_t image);
++OVERLOADABLE int get_image_width(write_only image1d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image1d_buffer_t image);
++OVERLOADABLE int get_image_channel_order(write_only image1d_buffer_t image);
++OVERLOADABLE int get_image_width(write_only image1d_buffer_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image2d_t image);
++OVERLOADABLE int get_image_channel_order(write_only image2d_t image);
++OVERLOADABLE int get_image_width(write_only image2d_t image);
++OVERLOADABLE int get_image_height(write_only image2d_t image);
++OVERLOADABLE int2 get_image_dim(write_only image2d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image1d_array_t image);
++OVERLOADABLE int get_image_channel_order(write_only image1d_array_t image);
++OVERLOADABLE int get_image_width(write_only image1d_array_t image);
++OVERLOADABLE size_t get_image_array_size(write_only image1d_array_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image3d_t image);
++OVERLOADABLE int get_image_channel_order(write_only image3d_t image);
++OVERLOADABLE int get_image_width(write_only image3d_t image);
++OVERLOADABLE int get_image_height(write_only image3d_t image);
++OVERLOADABLE int get_image_depth(write_only image3d_t image);
++OVERLOADABLE int4 get_image_dim(write_only image3d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image2d_array_t image);
++OVERLOADABLE int get_image_channel_order(write_only image2d_array_t image);
++OVERLOADABLE int get_image_width(write_only image2d_array_t image);
++OVERLOADABLE int get_image_height(write_only image2d_array_t image);
++OVERLOADABLE int2 get_image_dim(write_only image2d_array_t image);
++OVERLOADABLE size_t get_image_array_size(write_only image2d_array_t image);
++#endif
+
+ #endif
+--- a/backend/src/libocl/src/ocl_image.cl
++++ b/backend/src/libocl/src/ocl_image.cl
+@@ -29,21 +29,21 @@
+ ///////////////////////////////////////////////////////////////////////////////
+
+ #define DECL_GEN_OCL_RW_IMAGE(image_type, n) \
+- OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image, sampler_t sampler, \
++ OVERLOADABLE int4 __gen_ocl_read_imagei(read_only image_type image, sampler_t sampler, \
+ float ##n coord, uint sampler_offset); \
+- OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image, sampler_t sampler, \
++ OVERLOADABLE int4 __gen_ocl_read_imagei(read_only image_type image, sampler_t sampler, \
+ int ##n coord, uint sampler_offset); \
+- OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image, sampler_t sampler, \
++ OVERLOADABLE uint4 __gen_ocl_read_imageui(read_only image_type image, sampler_t sampler, \
+ float ##n coord, uint sampler_offset); \
+- OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image, sampler_t sampler, \
++ OVERLOADABLE uint4 __gen_ocl_read_imageui(read_only image_type image, sampler_t sampler, \
+ int ##n coord, uint sampler_offset); \
+- OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image, sampler_t sampler, \
++ OVERLOADABLE float4 __gen_ocl_read_imagef(read_only image_type image, sampler_t sampler, \
+ float ##n coord, uint sampler_offset); \
+- OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image, sampler_t sampler, \
++ OVERLOADABLE float4 __gen_ocl_read_imagef(read_only image_type image, sampler_t sampler, \
+ int ##n coord, uint sampler_offset); \
+- OVERLOADABLE void __gen_ocl_write_imagei(image_type image, int ##n coord , int4 color); \
+- OVERLOADABLE void __gen_ocl_write_imageui(image_type image, int ##n coord, uint4 color);\
+- OVERLOADABLE void __gen_ocl_write_imagef(image_type image, int ##n coord, float4 color);
++ OVERLOADABLE void __gen_ocl_write_imagei(write_only image_type image, int ##n coord , int4 color); \
++ OVERLOADABLE void __gen_ocl_write_imageui(write_only image_type image, int ##n coord, uint4 color);\
++ OVERLOADABLE void __gen_ocl_write_imagef(write_only image_type image, int ##n coord, float4 color);
+
+ #define DECL_GEN_OCL_QUERY_IMAGE(image_type) \
+ OVERLOADABLE int __gen_ocl_get_image_width(image_type image); \
+@@ -62,57 +62,104 @@ DECL_GEN_OCL_RW_IMAGE(image3d_t, 3)
+ DECL_GEN_OCL_RW_IMAGE(image2d_array_t, 4)
+ DECL_GEN_OCL_RW_IMAGE(image3d_t, 4)
+
+-DECL_GEN_OCL_QUERY_IMAGE(image1d_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image1d_buffer_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image1d_array_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image2d_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image2d_array_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image3d_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image1d_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image1d_buffer_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image1d_array_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image2d_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image2d_array_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image3d_t)
++
++#if __clang_major__*10 + __clang_minor__ >= 39
++DECL_GEN_OCL_QUERY_IMAGE(write_only image1d_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image1d_buffer_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image1d_array_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image2d_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image2d_array_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image3d_t)
++#endif
+ ///////////////////////////////////////////////////////////////////////////////
+ // helper functions to validate array index.
+ ///////////////////////////////////////////////////////////////////////////////
+-INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, image1d_array_t image)
++INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, read_only image1d_array_t image)
+ {
+ float array_size = __gen_ocl_get_image_depth(image);
+ coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
+ return coord;
+ }
+
+-INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, image2d_array_t image)
++INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, read_only image2d_array_t image)
+ {
+ float array_size = __gen_ocl_get_image_depth(image);
+ coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+ return coord;
+ }
+
+-INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, image2d_array_t image)
++INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, read_only image2d_array_t image)
+ {
+ float array_size = __gen_ocl_get_image_depth(image);
+ coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+ return coord;
+ }
+
+-INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, image1d_array_t image)
++INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, read_only image1d_array_t image)
+ {
+ int array_size = __gen_ocl_get_image_depth(image);
+ coord.s1 = clamp(coord.s1, 0, array_size - 1);
+ return coord;
+ }
+
+-INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, image2d_array_t image)
++INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, read_only image2d_array_t image)
+ {
+ int array_size = __gen_ocl_get_image_depth(image);
+ coord.s2 = clamp(coord.s2, 0, array_size - 1);
+ return coord;
+ }
+
+-INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, image2d_array_t image)
++INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, read_only image2d_array_t image)
+ {
+ int array_size = __gen_ocl_get_image_depth(image);
+ coord.s2 = clamp(coord.s2, 0, array_size - 1);
+ return coord;
+ }
+
++#if __clang_major__*10 + __clang_minor__ >= 39
++INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, write_only image1d_array_t image)
++{
++ float array_size = __gen_ocl_get_image_depth(image);
++ coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
++ return coord;
++}
++INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, write_only image2d_array_t image)
++{
++ float array_size = __gen_ocl_get_image_depth(image);
++ coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
++ return coord;
++}
++INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, write_only image2d_array_t image)
++{
++ float array_size = __gen_ocl_get_image_depth(image);
++ coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
++ return coord;
++}
++INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, write_only image1d_array_t image)
++{
++ int array_size = __gen_ocl_get_image_depth(image);
++ coord.s1 = clamp(coord.s1, 0, array_size - 1);
++ return coord;
++}
++INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, write_only image2d_array_t image)
++{
++ int array_size = __gen_ocl_get_image_depth(image);
++ coord.s2 = clamp(coord.s2, 0, array_size - 1);
++ return coord;
++}
++INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, write_only image2d_array_t image)
++{
++ int array_size = __gen_ocl_get_image_depth(image);
++ coord.s2 = clamp(coord.s2, 0, array_size - 1);
++ return coord;
++}
++#endif
+ // For non array image type, we need to do nothing.
+ #define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \
+ INLINE_OVERLOADABLE coord_type __gen_validate_array_index(coord_type coord, image_type image) \
+@@ -120,17 +167,29 @@ INLINE_OVERLOADABLE coord_type __gen_val
+ return coord; \
+ }
+
+-GEN_VALIDATE_ARRAY_INDEX(float, image1d_t)
+-GEN_VALIDATE_ARRAY_INDEX(int, image1d_t)
+-GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t)
+-GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t)
+-GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t)
+-GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t)
+-GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t)
+-GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t)
+-GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t)
+-GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t)
+-
++GEN_VALIDATE_ARRAY_INDEX(float, read_only image1d_t)
++GEN_VALIDATE_ARRAY_INDEX(int, read_only image1d_t)
++GEN_VALIDATE_ARRAY_INDEX(float2, read_only image2d_t)
++GEN_VALIDATE_ARRAY_INDEX(int2, read_only image2d_t)
++GEN_VALIDATE_ARRAY_INDEX(float4, read_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(int4, read_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(float3, read_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(int3, read_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(float, read_only image1d_buffer_t)
++GEN_VALIDATE_ARRAY_INDEX(int, read_only image1d_buffer_t)
++
++#if __clang_major__*10 + __clang_minor__ >= 39
++GEN_VALIDATE_ARRAY_INDEX(float, write_only image1d_t)
++GEN_VALIDATE_ARRAY_INDEX(int, write_only image1d_t)
++GEN_VALIDATE_ARRAY_INDEX(float2, write_only image2d_t)
++GEN_VALIDATE_ARRAY_INDEX(int2, write_only image2d_t)
++GEN_VALIDATE_ARRAY_INDEX(float4, write_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(int4, write_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(float3, write_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(int3, write_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(float, write_only image1d_buffer_t)
++GEN_VALIDATE_ARRAY_INDEX(int, write_only image1d_buffer_t)
++#endif
+ ///////////////////////////////////////////////////////////////////////////////
+ // Helper functions to work around some coordiate boundary issues.
+ // The major issue on Gen7/Gen7.5 are the sample message could not sampling
+@@ -293,7 +352,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_n
+ // For integer coordinates
+ #define DECL_READ_IMAGE0(int_clamping_fix, image_type, \
+ image_data_type, suffix, coord_type, n) \
+- OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
++ OVERLOADABLE image_data_type read_image ##suffix(read_only image_type cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+@@ -308,7 +367,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_n
+ // For float coordinates
+ #define DECL_READ_IMAGE1(int_clamping_fix, image_type, \
+ image_data_type, suffix, coord_type, n) \
+- OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
++ OVERLOADABLE image_data_type read_image ##suffix(read_only image_type cl_image, \
+ const sampler_t sampler, \
+ coord_type coord) \
+ { \
+@@ -333,7 +392,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_n
+
+ #define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type, \
+ suffix, coord_type, n) \
+- OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
++ OVERLOADABLE image_data_type read_image ##suffix(read_only image_type cl_image, \
+ coord_type coord) \
+ { \
+ coord = __gen_validate_array_index(coord, cl_image); \
+@@ -344,7 +403,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_n
+ }
+
+ #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, coord_type) \
+- OVERLOADABLE void write_image ##suffix(image_type cl_image, \
++ OVERLOADABLE void write_image ##suffix(write_only image_type cl_image, \
+ coord_type coord, \
+ image_data_type color) \
+ { \
+@@ -375,7 +434,7 @@ DECL_IMAGE_TYPE(image2d_array_t, 3)
+
+ #define DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image_type, image_data_type, \
+ suffix, coord_type) \
+- OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
++ OVERLOADABLE image_data_type read_image ##suffix(read_only image_type cl_image, \
+ coord_type coord) \
+ { \
+ sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
+@@ -388,7 +447,7 @@ DECL_IMAGE_TYPE(image2d_array_t, 3)
+ }
+
+ #define DECL_WRITE_IMAGE1D_BUFFER(image_type, image_data_type, suffix, coord_type) \
+- OVERLOADABLE void write_image ##suffix(image_type cl_image, \
++ OVERLOADABLE void write_image ##suffix(write_only image_type cl_image, \
+ coord_type coord, \
+ image_data_type color) \
+ { \
+@@ -493,69 +552,123 @@ DECL_IMAGE_1DArray(0, float4, f)
+ #define DECL_IMAGE_INFO_COMMON(image_type) \
+ OVERLOADABLE int get_image_channel_data_type(image_type image) \
+ { \
+- return __gen_ocl_get_image_channel_data_type(image); \
++ return __gen_ocl_get_image_channel_data_type(image); \
+ } \
+ OVERLOADABLE int get_image_channel_order(image_type image) \
+ { \
+- return __gen_ocl_get_image_channel_order(image); \
++ return __gen_ocl_get_image_channel_order(image); \
+ } \
+ OVERLOADABLE int get_image_width(image_type image) \
+ { \
+- return __gen_ocl_get_image_width(image); \
++ return __gen_ocl_get_image_width(image); \
+ }
+
+-DECL_IMAGE_INFO_COMMON(image1d_t)
+-DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
+-DECL_IMAGE_INFO_COMMON(image1d_array_t)
+-DECL_IMAGE_INFO_COMMON(image2d_t)
+-DECL_IMAGE_INFO_COMMON(image3d_t)
+-DECL_IMAGE_INFO_COMMON(image2d_array_t)
++DECL_IMAGE_INFO_COMMON(read_only image1d_t)
++DECL_IMAGE_INFO_COMMON(read_only image1d_buffer_t)
++DECL_IMAGE_INFO_COMMON(read_only image1d_array_t)
++DECL_IMAGE_INFO_COMMON(read_only image2d_t)
++DECL_IMAGE_INFO_COMMON(read_only image3d_t)
++DECL_IMAGE_INFO_COMMON(read_only image2d_array_t)
++
++#if __clang_major__*10 + __clang_minor__ >= 39
++DECL_IMAGE_INFO_COMMON(write_only image1d_t)
++DECL_IMAGE_INFO_COMMON(write_only image1d_buffer_t)
++DECL_IMAGE_INFO_COMMON(write_only image1d_array_t)
++DECL_IMAGE_INFO_COMMON(write_only image2d_t)
++DECL_IMAGE_INFO_COMMON(write_only image3d_t)
++DECL_IMAGE_INFO_COMMON(write_only image2d_array_t)
++#endif
+
+ // 2D extra Info
+-OVERLOADABLE int get_image_height(image2d_t image)
++OVERLOADABLE int get_image_height(read_only image2d_t image)
+ {
+ return __gen_ocl_get_image_height(image);
+ }
+-OVERLOADABLE int2 get_image_dim(image2d_t image)
++OVERLOADABLE int2 get_image_dim(read_only image2d_t image)
+ {
+ return (int2){get_image_width(image), get_image_height(image)};
+ }
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE int get_image_height(write_only image2d_t image)
++{
++ return __gen_ocl_get_image_height(image);
++}
++OVERLOADABLE int2 get_image_dim(write_only image2d_t image)
++{
++ return (int2){get_image_width(image), get_image_height(image)};
++}
++#endif
+ // End of 2D
+
+ // 3D extra Info
+-OVERLOADABLE int get_image_height(image3d_t image)
++OVERLOADABLE int get_image_height(read_only image3d_t image)
+ {
+ return __gen_ocl_get_image_height(image);
+ }
+-OVERLOADABLE int get_image_depth(image3d_t image)
++OVERLOADABLE int get_image_depth(read_only image3d_t image)
+ {
+ return __gen_ocl_get_image_depth(image);
+ }
+-OVERLOADABLE int4 get_image_dim(image3d_t image)
++OVERLOADABLE int4 get_image_dim(read_only image3d_t image)
+ {
+ return (int4) (get_image_width(image),
+ get_image_height(image),
+ get_image_depth(image),
+ 0);
+ }
+-
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE int get_image_height(write_only image3d_t image)
++{
++ return __gen_ocl_get_image_height(image);
++}
++OVERLOADABLE int get_image_depth(write_only image3d_t image)
++{
++ return __gen_ocl_get_image_depth(image);
++}
++OVERLOADABLE int4 get_image_dim(write_only image3d_t image)
++{
++ return (int4) (get_image_width(image),
++ get_image_height(image),
++ get_image_depth(image),
++ 0);
++}
++#endif
+ // 2D Array extra Info
+-OVERLOADABLE int get_image_height(image2d_array_t image)
++OVERLOADABLE int get_image_height(read_only image2d_array_t image)
+ {
+ return __gen_ocl_get_image_height(image);
+ }
+-OVERLOADABLE int2 get_image_dim(image2d_array_t image)
++OVERLOADABLE int2 get_image_dim(read_only image2d_array_t image)
+ {
+ return (int2){get_image_width(image), get_image_height(image)};
+ }
+-OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
++OVERLOADABLE size_t get_image_array_size(read_only image2d_array_t image)
+ {
+ return __gen_ocl_get_image_depth(image);
+ }
+-
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE int get_image_height(write_only image2d_array_t image)
++{
++ return __gen_ocl_get_image_height(image);
++}
++OVERLOADABLE int2 get_image_dim(write_only image2d_array_t image)
++{
++ return (int2){get_image_width(image), get_image_height(image)};
++}
++OVERLOADABLE size_t get_image_array_size(write_only image2d_array_t image)
++{
++ return __gen_ocl_get_image_depth(image);
++}
++#endif
+ // 1D Array info
+-OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
++OVERLOADABLE size_t get_image_array_size(read_only image1d_array_t image)
+ {
+ return __gen_ocl_get_image_depth(image);
+ }
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE size_t get_image_array_size(write_only image1d_array_t image)
++{
++ return __gen_ocl_get_image_depth(image);
++}
++#endif
+ // End of 1DArray
+--- a/backend/src/libocl/tmpl/ocl_defines.tmpl.h
++++ b/backend/src/libocl/tmpl/ocl_defines.tmpl.h
+@@ -27,15 +27,5 @@
+ #define __kernel_exec(X, TYPE) __kernel __attribute__((work_group_size_hint(X,1,1))) \
+ __attribute__((vec_type_hint(TYPE)))
+ #define kernel_exec(X, TYPE) __kernel_exec(X, TYPE)
+-#define cl_khr_global_int32_base_atomics
+-#define cl_khr_global_int32_extended_atomics
+-#define cl_khr_local_int32_base_atomics
+-#define cl_khr_local_int32_extended_atomics
+-#define cl_khr_byte_addressable_store
+-#define cl_khr_icd
+-#define cl_khr_gl_sharing
+-#define cl_khr_spir
+-#define cl_khr_fp16
+-#define cl_khr_3d_image_writes
+
+ #endif /* end of __OCL_COMMON_DEF_H__ */
+--- a/backend/src/llvm/llvm_bitcode_link.cpp
++++ b/backend/src/llvm/llvm_bitcode_link.cpp
+@@ -145,6 +145,7 @@ namespace gbe
+ return NULL;
+
+ std::vector<const char *> kernels;
++ std::vector<const char *> kerneltmp;
+ std::vector<const char *> builtinFuncs;
+ /* Add the memset and memcpy functions here. */
+ builtinFuncs.push_back("__gen_memcpy_gg");
+@@ -184,7 +185,12 @@ namespace gbe
+ for (Module::iterator SF = mod->begin(), E = mod->end(); SF != E; ++SF) {
+ if (SF->isDeclaration()) continue;
+ if (!isKernelFunction(*SF)) continue;
+- kernels.push_back(SF->getName().data());
++ // mod will be deleted after link, copy the names.
++ const char *funcName = SF->getName().data();
++ char * tmp = new char[strlen(funcName)+1];
++ strcpy(tmp,funcName);
++ kernels.push_back(tmp);
++ kerneltmp.push_back(tmp);
+
+ if (!materializedFuncCall(*mod, *clonedLib, *SF, materializedFuncs, Gvs)) {
+ delete clonedLib;
+@@ -273,7 +279,11 @@ namespace gbe
+ /* We use beignet's bitcode as dst because it will have a lot of
+ lazy functions which will not be loaded. */
+ char* errorMsg;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ if(LLVMLinkModules2(wrap(clonedLib), wrap(mod))) {
++#else
+ if(LLVMLinkModules(wrap(clonedLib), wrap(mod), LLVMLinkerDestroySource, &errorMsg)) {
++#endif
+ delete clonedLib;
+ printf("Fatal Error: link the bitcode error:\n%s\n", errorMsg);
+ return NULL;
+@@ -284,11 +294,25 @@ namespace gbe
+ llvm::PassManager passes;
+ #endif
+
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=9
++ auto PreserveKernel = [=](const GlobalValue &GV) {
++ for(size_t i = 0;i < kernels.size(); ++i)
++ if(strcmp(GV.getName().data(), kernels[i]))
++ return true;
++ return false;
++ };
++
++ passes.add(createInternalizePass(PreserveKernel));
++#else
+ passes.add(createInternalizePass(kernels));
++#endif
+ passes.add(createGlobalDCEPass());
+
+ passes.run(*clonedLib);
+
++ for(size_t i = 0;i < kerneltmp.size(); i++)
++ delete[] kerneltmp[i];
++
+ return clonedLib;
+ }
+
+--- a/backend/src/llvm/llvm_gen_backend.cpp
++++ b/backend/src/llvm/llvm_gen_backend.cpp
+@@ -1230,6 +1230,10 @@ namespace gbe
+ }
+ MDNode *typeNameNode = NULL;
+ MDNode *typeBaseNameNode = NULL;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ typeNameNode = F.getMetadata("kernel_arg_type");
++ typeBaseNameNode = F.getMetadata("kernel_arg_base_type");
++#else
+ MDNode *node = getKernelFunctionMetadata(&F);
+ for(uint j = 0;node && j < node->getNumOperands() - 1; j++) {
+ MDNode *attrNode = dyn_cast_or_null<MDNode>(node->getOperand(1 + j));
+@@ -1243,15 +1247,21 @@ namespace gbe
+ typeBaseNameNode = attrNode;
+ }
+ }
++#endif
+
+ unsigned argID = 0;
+ ir::FunctionArgument::InfoFromLLVM llvmInfo;
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, argID++) {
++ unsigned opID = argID;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9
++ opID += 1;
++#endif
++
+ if(typeNameNode) {
+- llvmInfo.typeName= (cast<MDString>(typeNameNode->getOperand(1 + argID)))->getString();
++ llvmInfo.typeName= (cast<MDString>(typeNameNode->getOperand(opID)))->getString();
+ }
+ if(typeBaseNameNode) {
+- llvmInfo.typeBaseName= (cast<MDString>(typeBaseNameNode->getOperand(1 + argID)))->getString();
++ llvmInfo.typeBaseName= (cast<MDString>(typeBaseNameNode->getOperand(opID)))->getString();
+ }
+ bool isImage = llvmInfo.isImageType();
+ if (I->getType()->isPointerTy() || isImage) {
+@@ -1974,6 +1984,92 @@ namespace gbe
+
+ std::string functionAttributes;
+
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ /* LLVM 3.9 change kernel arg info as function metadata */
++ addrSpaceNode = F.getMetadata("kernel_arg_addr_space");
++ accessQualNode = F.getMetadata("kernel_arg_access_qual");
++ typeNameNode = F.getMetadata("kernel_arg_type");
++ typeBaseNameNode = F.getMetadata("kernel_arg_base_type");
++ typeQualNode = F.getMetadata("kernel_arg_type_qual");
++ argNameNode = F.getMetadata("kernel_arg_name");
++ MDNode *attrNode;
++ if ((attrNode = F.getMetadata("vec_type_hint"))) {
++ GBE_ASSERT(attrNode->getNumOperands() == 2);
++ functionAttributes += "vec_type_hint";
++ auto *Op1 = cast<ValueAsMetadata>(attrNode->getOperand(0));
++ Value *V = Op1 ? Op1->getValue() : NULL;
++ ConstantInt *sign =
++ mdconst::extract<ConstantInt>(attrNode->getOperand(1));
++ size_t signValue = sign->getZExtValue();
++ Type *vtype = V->getType();
++ Type *stype = vtype;
++ uint32_t elemNum = 0;
++ if (vtype->isVectorTy()) {
++ VectorType *vectorType = cast<VectorType>(vtype);
++ stype = vectorType->getElementType();
++ elemNum = vectorType->getNumElements();
++ }
++
++ std::string typeName = getTypeName(ctx, stype, signValue);
++
++ std::stringstream param;
++ char buffer[100] = {0};
++ param << "(";
++ param << typeName;
++ if (vtype->isVectorTy())
++ param << elemNum;
++ param << ")";
++ param >> buffer;
++ functionAttributes += buffer;
++ functionAttributes += " ";
++ }
++ if ((attrNode = F.getMetadata("reqd_work_group_size"))) {
++ GBE_ASSERT(attrNode->getNumOperands() == 3);
++ ConstantInt *x = mdconst::extract<ConstantInt>(attrNode->getOperand(0));
++ ConstantInt *y = mdconst::extract<ConstantInt>(attrNode->getOperand(1));
++ ConstantInt *z = mdconst::extract<ConstantInt>(attrNode->getOperand(2));
++ GBE_ASSERT(x && y && z);
++ reqd_wg_sz[0] = x->getZExtValue();
++ reqd_wg_sz[1] = y->getZExtValue();
++ reqd_wg_sz[2] = z->getZExtValue();
++ functionAttributes += "reqd_work_group_size";
++ std::stringstream param;
++ char buffer[100] = {0};
++ param << "(";
++ param << reqd_wg_sz[0];
++ param << ",";
++ param << reqd_wg_sz[1];
++ param << ",";
++ param << reqd_wg_sz[2];
++ param << ")";
++ param >> buffer;
++ functionAttributes += buffer;
++ functionAttributes += " ";
++ }
++ if ((attrNode = F.getMetadata("work_group_size_hint"))) {
++ GBE_ASSERT(attrNode->getNumOperands() == 3);
++ ConstantInt *x = mdconst::extract<ConstantInt>(attrNode->getOperand(0));
++ ConstantInt *y = mdconst::extract<ConstantInt>(attrNode->getOperand(1));
++ ConstantInt *z = mdconst::extract<ConstantInt>(attrNode->getOperand(2));
++ GBE_ASSERT(x && y && z);
++ hint_wg_sz[0] = x->getZExtValue();
++ hint_wg_sz[1] = y->getZExtValue();
++ hint_wg_sz[2] = z->getZExtValue();
++ functionAttributes += "work_group_size_hint";
++ std::stringstream param;
++ char buffer[100] = {0};
++ param << "(";
++ param << hint_wg_sz[0];
++ param << ",";
++ param << hint_wg_sz[1];
++ param << ",";
++ param << hint_wg_sz[2];
++ param << ")";
++ param >> buffer;
++ functionAttributes += buffer;
++ functionAttributes += " ";
++ }
++#else
+ /* First find the meta data belong to this function. */
+ MDNode *node = getKernelFunctionMetadata(&F);
+
+@@ -2095,6 +2191,7 @@ namespace gbe
+ functionAttributes += " ";
+ }
+ }
++#endif /* LLVM 3.9 Function metadata */
+
+ ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], reqd_wg_sz[1], reqd_wg_sz[2]);
+
+@@ -2110,29 +2207,33 @@ namespace gbe
+ const AttrListPtr &PAL = F.getAttributes();
+ #endif /* LLVM_VERSION_MINOR <= 1 */
+ for (; I != E; ++I, ++argID) {
++ uint32_t opID = argID;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9
++ opID += 1;
++#endif
+ const std::string &argName = I->getName().str();
+ Type *type = I->getType();
+ if(addrSpaceNode) {
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 5
+- llvmInfo.addrSpace = (cast<ConstantInt>(addrSpaceNode->getOperand(1 + argID)))->getZExtValue();
++ llvmInfo.addrSpace = (cast<ConstantInt>(addrSpaceNode->getOperand(opID)))->getZExtValue();
+ #else
+- llvmInfo.addrSpace = (mdconst::extract<ConstantInt>(addrSpaceNode->getOperand(1 + argID)))->getZExtValue();
++ llvmInfo.addrSpace = (mdconst::extract<ConstantInt>(addrSpaceNode->getOperand(opID)))->getZExtValue();
+ #endif
+ }
+ if(typeNameNode) {
+- llvmInfo.typeName = (cast<MDString>(typeNameNode->getOperand(1 + argID)))->getString();
++ llvmInfo.typeName = (cast<MDString>(typeNameNode->getOperand(opID)))->getString();
+ }
+ if(typeBaseNameNode){
+- llvmInfo.typeBaseName = (cast<MDString>(typeBaseNameNode->getOperand(1 + argID)))->getString();
++ llvmInfo.typeBaseName = (cast<MDString>(typeBaseNameNode->getOperand(opID)))->getString();
+ }
+ if(accessQualNode) {
+- llvmInfo.accessQual = (cast<MDString>(accessQualNode->getOperand(1 + argID)))->getString();
++ llvmInfo.accessQual = (cast<MDString>(accessQualNode->getOperand(opID)))->getString();
+ }
+ if(typeQualNode) {
+- llvmInfo.typeQual = (cast<MDString>(typeQualNode->getOperand(1 + argID)))->getString();
++ llvmInfo.typeQual = (cast<MDString>(typeQualNode->getOperand(opID)))->getString();
+ }
+ if(argNameNode){
+- llvmInfo.argName = (cast<MDString>(argNameNode->getOperand(1 + argID)))->getString();
++ llvmInfo.argName = (cast<MDString>(argNameNode->getOperand(opID)))->getString();
+ }
+
+ // function arguments are uniform values.
+--- a/backend/src/llvm/llvm_includes.hpp
++++ b/backend/src/llvm/llvm_includes.hpp
+@@ -127,4 +127,9 @@
+ #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+ #endif
+
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++#include "llvm/Transforms/IPO/FunctionAttrs.h"
++#include "llvm/Transforms/Scalar/GVN.h"
++#endif
++
+ #endif /* __GBE_IR_LLVM_INCLUDES_HPP__ */
+--- a/backend/src/llvm/llvm_passes.cpp
++++ b/backend/src/llvm/llvm_passes.cpp
+@@ -41,9 +41,12 @@ using namespace llvm;
+ namespace gbe
+ {
+ bool isKernelFunction(const llvm::Function &F) {
++ bool bKernel = false;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ bKernel = F.getMetadata("kernel_arg_name") != NULL;
++#else
+ const Module *module = F.getParent();
+ const Module::NamedMDListType& globalMD = module->getNamedMDList();
+- bool bKernel = false;
+ for(auto i = globalMD.begin(); i != globalMD.end(); i++) {
+ const NamedMDNode &md = *i;
+ if(strcmp(md.getName().data(), "opencl.kernels") != 0) continue;
+@@ -58,6 +61,7 @@ namespace gbe
+ if(op == &F) bKernel = true;
+ }
+ }
++#endif
+ return bKernel;
+ }
+
+--- a/backend/src/llvm/llvm_to_gen.cpp
++++ b/backend/src/llvm/llvm_to_gen.cpp
+@@ -46,6 +46,13 @@ namespace gbe
+ BVAR(OCL_OUTPUT_CFG_GEN_IR, false);
+ using namespace llvm;
+
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ llvm::LLVMContext& GBEGetLLVMContext() {
++ static llvm::LLVMContext GBEContext;
++ return GBEContext;
++ }
++#endif
++
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
+ #define TARGETLIBRARY TargetLibraryInfoImpl
+ #else
+@@ -142,7 +149,9 @@ namespace gbe
+ MPM.add(createBarrierNodupPass(false)); // remove noduplicate fnAttr before inlining.
+ MPM.add(createFunctionInliningPass(20000));
+ MPM.add(createBarrierNodupPass(true)); // restore noduplicate fnAttr after inlining.
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ MPM.add(createPostOrderFunctionAttrsLegacyPass());
++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8
+ MPM.add(createPostOrderFunctionAttrsPass()); // Set readonly/readnone attrs
+ #else
+ MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
+@@ -294,7 +303,11 @@ namespace gbe
+ if (module) {
+ cl_mod = reinterpret_cast<Module*>(const_cast<void*>(module));
+ } else if (fileName){
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ llvm::LLVMContext& c = GBEGetLLVMContext();
++#else
+ llvm::LLVMContext& c = llvm::getGlobalContext();
++#endif
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+ cl_mod = parseIRFile(fileName, Err, c).release();
+ #else
+@@ -349,7 +362,11 @@ namespace gbe
+ passes.add(createIntrinsicLoweringPass());
+ passes.add(createStripAttributesPass()); // Strip unsupported attributes and calling conventions.
+ passes.add(createFunctionInliningPass(20000));
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++ passes.add(createSROAPass());
++#else
+ passes.add(createScalarReplAggregatesPass(64, true, -1, -1, 64));
++#endif
+ passes.add(createLoadStoreOptimizationPass());
+ passes.add(createConstantPropagationPass());
+ passes.add(createPromoteMemoryToRegisterPass());
+--- a/backend/src/llvm/llvm_to_gen.hpp
++++ b/backend/src/llvm/llvm_to_gen.hpp
+@@ -23,6 +23,9 @@
+ */
+ #ifndef __GBE_IR_LLVM_TO_GEN_HPP__
+ #define __GBE_IR_LLVM_TO_GEN_HPP__
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++#include "llvm/IR/LLVMContext.h"
++#endif
+
+ namespace gbe {
+ namespace ir {
+@@ -34,6 +37,9 @@ namespace gbe {
+ optLevel 0 equal to clang -O1 and 1 equal to clang -O2*/
+ bool llvmToGen(ir::Unit &unit, const char *fileName, const void* module,
+ int optLevel, bool strictMath, int profiling, std::string &errors);
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++ extern llvm::LLVMContext& GBEGetLLVMContext();
++#endif
+
+ } /* namespace gbe */
+
+--- a/kernels/test_get_arg_info.cl
++++ b/kernels/test_get_arg_info.cl
+@@ -3,6 +3,6 @@ typedef struct _test_arg_struct {
+ int b;
+ }test_arg_struct;
+
+-kernel void test_get_arg_info(read_only global float const volatile *src, read_write local int *dst, test_arg_struct extra) {
++kernel void test_get_arg_info(global float const volatile *src, local int *dst, test_arg_struct extra) {
+
+ }
+--- a/src/kernels/cl_internal_copy_buffer_to_image_2d.cl
++++ b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl
+@@ -1,4 +1,4 @@
+-kernel void __cl_copy_buffer_to_image_2d(__read_only image2d_t image, global uchar* buffer,
++kernel void __cl_copy_buffer_to_image_2d(__write_only image2d_t image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+ unsigned int src_offset)
+--- a/src/kernels/cl_internal_copy_buffer_to_image_3d.cl
++++ b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl
+@@ -1,4 +1,4 @@
+-kernel void __cl_copy_buffer_to_image_3d(__read_only image3d_t image, global uchar* buffer,
++kernel void __cl_copy_buffer_to_image_3d(__write_only image3d_t image, global uchar* buffer,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+ unsigned int src_offset)
+--- a/utests/CMakeLists.txt
++++ b/utests/CMakeLists.txt
+@@ -255,7 +255,6 @@ set (utests_sources
+ compiler_double_div.cpp
+ compiler_double_convert.cpp
+ load_program_from_gen_bin.cpp
+- load_program_from_spir.cpp
+ get_arg_info.cpp
+ profiling_exec.cpp
+ enqueue_copy_buf.cpp
diff --git a/debian/patches/series b/debian/patches/series
index 176aa1a..0fc23b2 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -10,3 +10,4 @@ docs-broken-links.patch
cl_accelerator_intel.patch
support-python3.patch
pow-powr-tests.patch
+llvm39-support.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-commits
mailing list