[beignet] 04/04: Add LLVM 3.9 support

Rebecca Palmer rnpalmer-guest at moszumanska.debian.org
Wed Sep 28 20:27:09 UTC 2016


This is an automated email from the git hooks/post-receive script.

rnpalmer-guest pushed a commit to branch master
in repository beignet.

commit de51a3e9ceed565af2d47eceaea4b6ed635c863c
Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
Date:   Wed Sep 28 21:24:56 2016 +0100

    Add LLVM 3.9 support
---
 debian/changelog                    |    1 +
 debian/control                      |    3 +
 debian/patches/llvm39-support.patch | 1337 +++++++++++++++++++++++++++++++++++
 debian/patches/series               |    1 +
 4 files changed, 1342 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index b4e067f..9dd42f8 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,7 @@ beignet (1.2.0-2) UNRELEASED; urgency=medium
   * Fix FTBFS on kfreebsd-*.
   * Make tests work with Python 3 and switch back to it.
   * Add pow-powr-tests.patch.
+  * Add LLVM 3.9 support.
 
  -- Rebecca N. Palmer <rebecca_palmer at zoho.com>  Sun, 25 Sep 2016 21:58:07 +0100
 
diff --git a/debian/control b/debian/control
index ae8a383..9dfd827 100644
--- a/debian/control
+++ b/debian/control
@@ -14,6 +14,9 @@ Build-Depends:
  clang-3.8,
  libclang-3.8-dev,
  llvm-3.8-dev,
+ clang-3.9 [linux-any],
+ libclang-3.9-dev [linux-any],
+ llvm-3.9-dev [linux-any],
  ocl-icd-dev,
  ocl-icd-opencl-dev,
  pkg-config,
diff --git a/debian/patches/llvm39-support.patch b/debian/patches/llvm39-support.patch
new file mode 100644
index 0000000..deb2414
--- /dev/null
+++ b/debian/patches/llvm39-support.patch
@@ -0,0 +1,1337 @@
+Description: Support LLVM 3.9
+
+Origin: (mostly) upstream 6ebe485...0056da7
+Author: Pan Xiuli, Rebecca Palmer
+
+--- a/CMake/FindLLVM.cmake
++++ b/CMake/FindLLVM.cmake
+@@ -8,12 +8,12 @@
+ # LLVM_FOUND       - True if llvm found.
+ if (LLVM_INSTALL_DIR)
+   find_program(LLVM_CONFIG_EXECUTABLE
+-               NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
++               NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
+                DOC "llvm-config executable"
+                PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH)
+ else (LLVM_INSTALL_DIR)
+   find_program(LLVM_CONFIG_EXECUTABLE
+-               NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
++               NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
+                DOC "llvm-config executable")
+ endif (LLVM_INSTALL_DIR)
+ 
+--- a/backend/src/backend/gen_program.cpp
++++ b/backend/src/backend/gen_program.cpp
+@@ -334,7 +334,11 @@ namespace gbe {
+     //the first byte stands for binary_type.
+     binary_content.assign(binary+1, size-1);
+     llvm::StringRef llvm_bin_str(binary_content);
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++    llvm::LLVMContext& c = GBEGetLLVMContext();
++#else
+     llvm::LLVMContext& c = llvm::getGlobalContext();
++#endif
+     llvm::SMDiagnostic Err;
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+     std::unique_ptr<llvm::MemoryBuffer> memory_buffer = llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+@@ -488,10 +492,17 @@ namespace gbe {
+ #endif
+       errSize = 0;
+     }else{
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++      // Src now will be removed automatically. So clone it.
++      llvm::Module* src = llvm::CloneModule((llvm::Module*)((GenProgram*)src_program)->module).release();
++#else
+       llvm::Module* src = (llvm::Module*)((GenProgram*)src_program)->module;
++#endif
+       llvm::Module* dst = (llvm::Module*)((GenProgram*)dst_program)->module;
+ 
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++      if (LLVMLinkModules2(wrap(dst), wrap(src))) {
++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
+       if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource_Removed, &errMsg)) {
+ #else
+       if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource, &errMsg)) {
+--- a/backend/src/backend/program.cpp
++++ b/backend/src/backend/program.cpp
+@@ -133,7 +133,13 @@ namespace gbe {
+     bool strictMath = true;
+     if (fast_relaxed_math || !OCL_STRICT_CONFORMANCE)
+       strictMath = false;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++    llvm::Module * linked_module = module ? llvm::CloneModule((llvm::Module*)module).release() : NULL;
++    // Src now will be removed automatically. So clone it.
++    if (llvmToGen(*unit, fileName, linked_module, optLevel, strictMath, OCL_PROFILING_LOG, error) == false) {
++#else
+     if (llvmToGen(*unit, fileName, module, optLevel, strictMath, OCL_PROFILING_LOG, error) == false) {
++#endif
+       if (fileName)
+         error = std::string(fileName) + " not found";
+       delete unit;
+@@ -1057,7 +1063,11 @@ EXTEND_QUOTE:
+     //FIXME: if use new allocated context to link two modules there would be context mismatch
+     //for some functions, so we use global context now, need switch to new context later.
+     llvm::Module * out_module;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++    llvm::LLVMContext* llvm_ctx = &GBEGetLLVMContext();
++#else
+     llvm::LLVMContext* llvm_ctx = &llvm::getGlobalContext();
++#endif
+ 
+     if (buildModuleFromSource(source, &out_module, llvm_ctx, dumpLLVMFileName, dumpSPIRBinaryName, clOpt,
+                               stringSize, err, errSize)) {
+--- a/backend/src/ir/function.hpp
++++ b/backend/src/ir/function.hpp
+@@ -208,22 +208,22 @@ namespace ir {
+       }
+ #else
+       bool isImage1dT() const {
+-        return typeBaseName.compare("image1d_t") == 0;
++        return typeBaseName.find("image1d_t") !=std::string::npos;
+       }
+       bool isImage1dArrayT() const {
+-        return typeBaseName.compare("image1d_array_t") == 0;
++        return typeBaseName.find("image1d_array_t") !=std::string::npos;
+       }
+       bool isImage1dBufferT() const {
+-        return typeBaseName.compare("image1d_buffer_t") == 0;
++        return typeBaseName.find("image1d_buffer_t") !=std::string::npos;
+       }
+       bool isImage2dT() const {
+-        return typeBaseName.compare("image2d_t") == 0;
++        return typeBaseName.find("image2d_t") !=std::string::npos;
+       }
+       bool isImage2dArrayT() const {
+-        return typeBaseName.compare("image2d_array_t") == 0;
++        return typeBaseName.find("image2d_array_t") !=std::string::npos;
+       }
+       bool isImage3dT() const {
+-        return typeBaseName.compare("image3d_t") == 0;
++        return typeBaseName.find("image3d_t") !=std::string::npos;
+       }
+       bool isSamplerType() const {
+         return typeBaseName.compare("sampler_t") == 0;
+--- a/backend/src/libocl/include/ocl.h
++++ b/backend/src/libocl/include/ocl.h
+@@ -18,6 +18,67 @@
+ #ifndef __OCL_H__
+ #define __OCL_H__
+ 
++/* LLVM 3.9 has these pre defined undef them first */
++#ifdef cl_khr_3d_image_writes
++#undef cl_khr_3d_image_writes
++#endif
++#ifdef cl_khr_byte_addressable_store
++#undef cl_khr_byte_addressable_store
++#endif
++#ifdef cl_khr_fp16
++#undef cl_khr_fp16
++#endif
++#ifdef cl_khr_fp64
++#undef cl_khr_fp64
++#endif
++#ifdef cl_khr_global_int32_base_atomics
++#undef cl_khr_global_int32_base_atomics
++#endif
++#ifdef cl_khr_global_int32_extended_atomics
++#undef cl_khr_global_int32_extended_atomics
++#endif
++#ifdef cl_khr_gl_sharing
++#undef cl_khr_gl_sharing
++#endif
++#ifdef cl_khr_icd
++#undef cl_khr_icd
++#endif
++#ifdef cl_khr_local_int32_base_atomics
++#undef cl_khr_local_int32_base_atomics
++#endif
++#ifdef cl_khr_local_int32_extended_atomics
++#undef cl_khr_local_int32_extended_atomics
++#endif
++
++#ifdef cl_khr_d3d10_sharing
++#undef cl_khr_d3d10_sharing
++#endif
++#ifdef cl_khr_gl_event
++#undef cl_khr_gl_event
++#endif
++#ifdef cl_khr_int64_base_atomics
++#undef cl_khr_int64_base_atomics
++#endif
++#ifdef cl_khr_int64_extended_atomics
++#undef cl_khr_int64_extended_atomics
++#endif
++
++#ifdef cl_khr_d3d11_sharing
++#undef cl_khr_d3d11_sharing
++#endif
++#ifdef cl_khr_depth_images
++#undef cl_khr_depth_images
++#endif
++#ifdef cl_khr_dx9_media_sharing
++#undef cl_khr_dx9_media_sharing
++#endif
++#ifdef cl_khr_gl_depth_images
++#undef cl_khr_gl_depth_images
++#endif
++#ifdef cl_khr_spir
++#undef cl_khr_spir
++#endif
++
+ #include "ocl_defines.h"
+ #include "ocl_types.h"
+ #include "ocl_as.h"
+@@ -40,6 +101,20 @@
+ #include "ocl_workitem.h"
+ #include "ocl_simd.h"
+ #include "ocl_work_group.h"
++
++/* Move these out from ocl_defines.h for only one define */
++#define cl_khr_global_int32_base_atomics
++#define cl_khr_global_int32_extended_atomics
++#define cl_khr_local_int32_base_atomics
++#define cl_khr_local_int32_extended_atomics
++#define cl_khr_byte_addressable_store
++#define cl_khr_icd
++#define cl_khr_gl_sharing
++#define cl_khr_spir
++#define cl_khr_fp16
++#define cl_khr_3d_image_writes
++#define cl_intel_subgroups
++
+ #pragma OPENCL EXTENSION cl_khr_fp64 : disable
+ #pragma OPENCL EXTENSION cl_khr_fp16 : disable
+ #endif
+--- a/backend/src/libocl/include/ocl_image.h
++++ b/backend/src/libocl/include/ocl_image.h
+@@ -20,152 +20,189 @@
+ 
+ #include "ocl_types.h"
+ 
+-OVERLOADABLE int4 read_imagei(image1d_t cl_image, const sampler_t sampler, int coord);
+-OVERLOADABLE int4 read_imagei(image1d_t cl_image, const sampler_t sampler, float coord);
+-OVERLOADABLE int4 read_imagei(image1d_t cl_image, int coord);
+-OVERLOADABLE void write_imagei(image1d_t cl_image, int coord, int4 color);
+-OVERLOADABLE void write_imagei(image1d_t cl_image, float coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image1d_t cl_image, const sampler_t sampler, int coord);
+-OVERLOADABLE uint4 read_imageui(image1d_t cl_image, const sampler_t sampler, float coord);
+-OVERLOADABLE uint4 read_imageui(image1d_t cl_image, int coord);
+-OVERLOADABLE void write_imageui(image1d_t cl_image, int coord, uint4 color);
+-OVERLOADABLE void write_imageui(image1d_t cl_image, float coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image1d_t cl_image, const sampler_t sampler, int coord);
+-OVERLOADABLE float4 read_imagef(image1d_t cl_image, const sampler_t sampler, float coord);
+-OVERLOADABLE float4 read_imagef(image1d_t cl_image, int coord);
+-OVERLOADABLE void write_imagef(image1d_t cl_image, int coord, float4 color);
+-OVERLOADABLE void write_imagef(image1d_t cl_image, float coord, float4 color);
+-OVERLOADABLE int4 read_imagei(image1d_buffer_t cl_image, int coord);
+-OVERLOADABLE void write_imagei(image1d_buffer_t cl_image, int coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image1d_buffer_t cl_image, int coord);
+-OVERLOADABLE void write_imageui(image1d_buffer_t cl_image, int coord, uint4 color);
+-OVERLOADABLE void write_imageui(image1d_buffer_t cl_image, float coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image1d_buffer_t cl_image, int coord);
+-OVERLOADABLE void write_imagef(image1d_buffer_t cl_image, int coord, float4 color);
+-
+-OVERLOADABLE int get_image_channel_data_type(image1d_t image);
+-OVERLOADABLE int get_image_channel_order(image1d_t image);
+-OVERLOADABLE int get_image_width(image1d_t image);
+-OVERLOADABLE int get_image_channel_data_type(image1d_buffer_t image);
+-OVERLOADABLE int get_image_channel_order(image1d_buffer_t image);
+-OVERLOADABLE int get_image_width(image1d_buffer_t image);
+-OVERLOADABLE int4 read_imagei(image2d_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE int4 read_imagei(image2d_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE int4 read_imagei(image2d_t cl_image, int2 coord);
+-OVERLOADABLE void write_imagei(image2d_t cl_image, int2 coord, int4 color);
+-OVERLOADABLE void write_imagei(image2d_t cl_image, float2 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image2d_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_t cl_image, int2 coord);
+-OVERLOADABLE void write_imageui(image2d_t cl_image, int2 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image2d_t cl_image, float2 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image2d_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE float4 read_imagef(image2d_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE float4 read_imagef(image2d_t cl_image, int2 coord);
+-OVERLOADABLE void write_imagef(image2d_t cl_image, int2 coord, float4 color);
+-OVERLOADABLE void write_imagef(image2d_t cl_image, float2 coord, float4 color);
+-OVERLOADABLE int4 read_imagei(image1d_array_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE int4 read_imagei(image1d_array_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE int4 read_imagei(image1d_array_t cl_image, int2 coord);
+-OVERLOADABLE void write_imagei(image1d_array_t cl_image, int2 coord, int4 color);
+-OVERLOADABLE void write_imagei(image1d_array_t cl_image, float2 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image1d_array_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE uint4 read_imageui(image1d_array_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE uint4 read_imageui(image1d_array_t cl_image, int2 coord);
+-OVERLOADABLE void write_imageui(image1d_array_t cl_image, int2 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image1d_array_t cl_image, float2 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image1d_array_t cl_image, const sampler_t sampler, int2 coord);
+-OVERLOADABLE float4 read_imagef(image1d_array_t cl_image, const sampler_t sampler, float2 coord);
+-OVERLOADABLE float4 read_imagef(image1d_array_t cl_image, int2 coord);
+-OVERLOADABLE void write_imagef(image1d_array_t cl_image, int2 coord, float4 color);
+-OVERLOADABLE void write_imagef(image1d_array_t cl_image, float2 coord, float4 color);
+-
+-OVERLOADABLE int get_image_channel_data_type(image2d_t image);
+-OVERLOADABLE int get_image_channel_order(image2d_t image);
+-OVERLOADABLE int get_image_width(image2d_t image);
+-OVERLOADABLE int get_image_height(image2d_t image);
+-OVERLOADABLE int2 get_image_dim(image2d_t image);
+-
+-OVERLOADABLE int get_image_channel_data_type(image1d_array_t image);
+-OVERLOADABLE int get_image_channel_order(image1d_array_t image);
+-OVERLOADABLE int get_image_width(image1d_array_t image);
+-OVERLOADABLE size_t get_image_array_size(image1d_array_t image);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, int4 coord);
+-OVERLOADABLE void write_imagei(image3d_t cl_image, int4 coord, int4 color);
+-OVERLOADABLE void write_imagei(image3d_t cl_image, float4 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, int4 coord);
+-OVERLOADABLE void write_imageui(image3d_t cl_image, int4 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image3d_t cl_image, float4 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, int4 coord);
+-OVERLOADABLE void write_imagef(image3d_t cl_image, int4 coord, float4 color);
+-OVERLOADABLE void write_imagef(image3d_t cl_image, float4 coord, float4 color);
+-
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE int4 read_imagei(image3d_t cl_image, int3 coord);
+-OVERLOADABLE void write_imagei(image3d_t cl_image, int3 coord, int4 color);
+-OVERLOADABLE void write_imagei(image3d_t cl_image, float3 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE uint4 read_imageui(image3d_t cl_image, int3 coord);
+-OVERLOADABLE void write_imageui(image3d_t cl_image, int3 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image3d_t cl_image, float3 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE float4 read_imagef(image3d_t cl_image, int3 coord);
+-OVERLOADABLE void write_imagef(image3d_t cl_image, int3 coord, float4 color);
+-OVERLOADABLE void write_imagef(image3d_t cl_image, float3 coord, float4 color);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, int4 coord);
+-OVERLOADABLE void write_imagei(image2d_array_t cl_image, int4 coord, int4 color);
+-OVERLOADABLE void write_imagei(image2d_array_t cl_image, float4 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, int4 coord);
+-OVERLOADABLE void write_imageui(image2d_array_t cl_image, int4 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image2d_array_t cl_image, float4 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, const sampler_t sampler, int4 coord);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, const sampler_t sampler, float4 coord);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, int4 coord);
+-OVERLOADABLE void write_imagef(image2d_array_t cl_image, int4 coord, float4 color);
+-OVERLOADABLE void write_imagef(image2d_array_t cl_image, float4 coord, float4 color);
+-
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE int4 read_imagei(image2d_array_t cl_image, int3 coord);
+-OVERLOADABLE void write_imagei(image2d_array_t cl_image, int3 coord, int4 color);
+-OVERLOADABLE void write_imagei(image2d_array_t cl_image, float3 coord, int4 color);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE uint4 read_imageui(image2d_array_t cl_image, int3 coord);
+-OVERLOADABLE void write_imageui(image2d_array_t cl_image, int3 coord, uint4 color);
+-OVERLOADABLE void write_imageui(image2d_array_t cl_image, float3 coord, uint4 color);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, const sampler_t sampler, int3 coord);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, const sampler_t sampler, float3 coord);
+-OVERLOADABLE float4 read_imagef(image2d_array_t cl_image, int3 coord);
+-OVERLOADABLE void write_imagef(image2d_array_t cl_image, int3 coord, float4 color);
+-OVERLOADABLE void write_imagef(image2d_array_t cl_image, float3 coord, float4 color);
+-
+-OVERLOADABLE int get_image_channel_data_type(image3d_t image);
+-OVERLOADABLE int get_image_channel_order(image3d_t image);
+-OVERLOADABLE int get_image_width(image3d_t image);
+-OVERLOADABLE int get_image_height(image3d_t image);
+-OVERLOADABLE int get_image_depth(image3d_t image);
+-OVERLOADABLE int4 get_image_dim(image3d_t image);
+-
+-
+-OVERLOADABLE int get_image_channel_data_type(image2d_array_t image);
+-OVERLOADABLE int get_image_channel_order(image2d_array_t image);
+-OVERLOADABLE int get_image_width(image2d_array_t image);
+-OVERLOADABLE int get_image_height(image2d_array_t image);
+-OVERLOADABLE int2 get_image_dim(image2d_array_t image);
+-OVERLOADABLE size_t get_image_array_size(image2d_array_t image);
++OVERLOADABLE int4 read_imagei(read_only image1d_t cl_image, const sampler_t sampler, int coord);
++OVERLOADABLE int4 read_imagei(read_only image1d_t cl_image, const sampler_t sampler, float coord);
++OVERLOADABLE int4 read_imagei(read_only image1d_t cl_image, int coord);
++OVERLOADABLE void write_imagei(write_only image1d_t cl_image, int coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image1d_t cl_image, float coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image1d_t cl_image, const sampler_t sampler, int coord);
++OVERLOADABLE uint4 read_imageui(read_only image1d_t cl_image, const sampler_t sampler, float coord);
++OVERLOADABLE uint4 read_imageui(read_only image1d_t cl_image, int coord);
++OVERLOADABLE void write_imageui(write_only image1d_t cl_image, int coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image1d_t cl_image, float coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image1d_t cl_image, const sampler_t sampler, int coord);
++OVERLOADABLE float4 read_imagef(read_only image1d_t cl_image, const sampler_t sampler, float coord);
++OVERLOADABLE float4 read_imagef(read_only image1d_t cl_image, int coord);
++OVERLOADABLE void write_imagef(write_only image1d_t cl_image, int coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image1d_t cl_image, float coord, float4 color);
++OVERLOADABLE int4 read_imagei(read_only image1d_buffer_t cl_image, int coord);
++OVERLOADABLE void write_imagei(write_only image1d_buffer_t cl_image, int coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image1d_buffer_t cl_image, int coord);
++OVERLOADABLE void write_imageui(write_only image1d_buffer_t cl_image, int coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image1d_buffer_t cl_image, float coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image1d_buffer_t cl_image, int coord);
++OVERLOADABLE void write_imagef(write_only image1d_buffer_t cl_image, int coord, float4 color);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image1d_t image);
++OVERLOADABLE int get_image_channel_order(read_only image1d_t image);
++OVERLOADABLE int get_image_width(read_only image1d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image1d_buffer_t image);
++OVERLOADABLE int get_image_channel_order(read_only image1d_buffer_t image);
++OVERLOADABLE int get_image_width(read_only image1d_buffer_t image);
++
++OVERLOADABLE int4 read_imagei(read_only image2d_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_t cl_image, int2 coord);
++OVERLOADABLE void write_imagei(write_only image2d_t cl_image, int2 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image2d_t cl_image, float2 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image2d_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_t cl_image, int2 coord);
++OVERLOADABLE void write_imageui(write_only image2d_t cl_image, int2 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image2d_t cl_image, float2 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image2d_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_t cl_image, int2 coord);
++OVERLOADABLE void write_imagef(write_only image2d_t cl_image, int2 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image2d_t cl_image, float2 coord, float4 color);
++OVERLOADABLE int4 read_imagei(read_only image1d_array_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE int4 read_imagei(read_only image1d_array_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE int4 read_imagei(read_only image1d_array_t cl_image, int2 coord);
++OVERLOADABLE void write_imagei(write_only image1d_array_t cl_image, int2 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image1d_array_t cl_image, float2 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image1d_array_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE uint4 read_imageui(read_only image1d_array_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE uint4 read_imageui(read_only image1d_array_t cl_image, int2 coord);
++OVERLOADABLE void write_imageui(write_only image1d_array_t cl_image, int2 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image1d_array_t cl_image, float2 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image1d_array_t cl_image, const sampler_t sampler, int2 coord);
++OVERLOADABLE float4 read_imagef(read_only image1d_array_t cl_image, const sampler_t sampler, float2 coord);
++OVERLOADABLE float4 read_imagef(read_only image1d_array_t cl_image, int2 coord);
++OVERLOADABLE void write_imagef(write_only image1d_array_t cl_image, int2 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image1d_array_t cl_image, float2 coord, float4 color);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image2d_t image);
++OVERLOADABLE int get_image_channel_order(read_only image2d_t image);
++OVERLOADABLE int get_image_width(read_only image2d_t image);
++OVERLOADABLE int get_image_height(read_only image2d_t image);
++OVERLOADABLE int2 get_image_dim(read_only image2d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image1d_array_t image);
++OVERLOADABLE int get_image_channel_order(read_only image1d_array_t image);
++OVERLOADABLE int get_image_width(read_only image1d_array_t image);
++OVERLOADABLE size_t get_image_array_size(read_only image1d_array_t image);
++
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, int4 coord);
++OVERLOADABLE void write_imagei(write_only image3d_t cl_image, int4 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image3d_t cl_image, float4 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, int4 coord);
++OVERLOADABLE void write_imageui(write_only image3d_t cl_image, int4 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image3d_t cl_image, float4 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, int4 coord);
++OVERLOADABLE void write_imagef(write_only image3d_t cl_image, int4 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image3d_t cl_image, float4 coord, float4 color);
++
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE int4 read_imagei(read_only image3d_t cl_image, int3 coord);
++OVERLOADABLE void write_imagei(write_only image3d_t cl_image, int3 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image3d_t cl_image, float3 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE uint4 read_imageui(read_only image3d_t cl_image, int3 coord);
++OVERLOADABLE void write_imageui(write_only image3d_t cl_image, int3 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image3d_t cl_image, float3 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE float4 read_imagef(read_only image3d_t cl_image, int3 coord);
++OVERLOADABLE void write_imagef(write_only image3d_t cl_image, int3 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image3d_t cl_image, float3 coord, float4 color);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, int4 coord);
++OVERLOADABLE void write_imagei(write_only image2d_array_t cl_image, int4 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image2d_array_t cl_image, float4 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, int4 coord);
++OVERLOADABLE void write_imageui(write_only image2d_array_t cl_image, int4 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image2d_array_t cl_image, float4 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, const sampler_t sampler, int4 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, const sampler_t sampler, float4 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, int4 coord);
++OVERLOADABLE void write_imagef(write_only image2d_array_t cl_image, int4 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image2d_array_t cl_image, float4 coord, float4 color);
++
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE int4 read_imagei(read_only image2d_array_t cl_image, int3 coord);
++OVERLOADABLE void write_imagei(write_only image2d_array_t cl_image, int3 coord, int4 color);
++OVERLOADABLE void write_imagei(write_only image2d_array_t cl_image, float3 coord, int4 color);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE uint4 read_imageui(read_only image2d_array_t cl_image, int3 coord);
++OVERLOADABLE void write_imageui(write_only image2d_array_t cl_image, int3 coord, uint4 color);
++OVERLOADABLE void write_imageui(write_only image2d_array_t cl_image, float3 coord, uint4 color);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, const sampler_t sampler, int3 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, const sampler_t sampler, float3 coord);
++OVERLOADABLE float4 read_imagef(read_only image2d_array_t cl_image, int3 coord);
++OVERLOADABLE void write_imagef(write_only image2d_array_t cl_image, int3 coord, float4 color);
++OVERLOADABLE void write_imagef(write_only image2d_array_t cl_image, float3 coord, float4 color);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image3d_t image);
++OVERLOADABLE int get_image_channel_order(read_only image3d_t image);
++OVERLOADABLE int get_image_width(read_only image3d_t image);
++OVERLOADABLE int get_image_height(read_only image3d_t image);
++OVERLOADABLE int get_image_depth(read_only image3d_t image);
++OVERLOADABLE int4 get_image_dim(read_only image3d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(read_only image2d_array_t image);
++OVERLOADABLE int get_image_channel_order(read_only image2d_array_t image);
++OVERLOADABLE int get_image_width(read_only image2d_array_t image);
++OVERLOADABLE int get_image_height(read_only image2d_array_t image);
++OVERLOADABLE int2 get_image_dim(read_only image2d_array_t image);
++OVERLOADABLE size_t get_image_array_size(read_only image2d_array_t image);
++
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE int get_image_channel_data_type(write_only image1d_t image);
++OVERLOADABLE int get_image_channel_order(write_only image1d_t image);
++OVERLOADABLE int get_image_width(write_only image1d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image1d_buffer_t image);
++OVERLOADABLE int get_image_channel_order(write_only image1d_buffer_t image);
++OVERLOADABLE int get_image_width(write_only image1d_buffer_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image2d_t image);
++OVERLOADABLE int get_image_channel_order(write_only image2d_t image);
++OVERLOADABLE int get_image_width(write_only image2d_t image);
++OVERLOADABLE int get_image_height(write_only image2d_t image);
++OVERLOADABLE int2 get_image_dim(write_only image2d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image1d_array_t image);
++OVERLOADABLE int get_image_channel_order(write_only image1d_array_t image);
++OVERLOADABLE int get_image_width(write_only image1d_array_t image);
++OVERLOADABLE size_t get_image_array_size(write_only image1d_array_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image3d_t image);
++OVERLOADABLE int get_image_channel_order(write_only image3d_t image);
++OVERLOADABLE int get_image_width(write_only image3d_t image);
++OVERLOADABLE int get_image_height(write_only image3d_t image);
++OVERLOADABLE int get_image_depth(write_only image3d_t image);
++OVERLOADABLE int4 get_image_dim(write_only image3d_t image);
++
++OVERLOADABLE int get_image_channel_data_type(write_only image2d_array_t image);
++OVERLOADABLE int get_image_channel_order(write_only image2d_array_t image);
++OVERLOADABLE int get_image_width(write_only image2d_array_t image);
++OVERLOADABLE int get_image_height(write_only image2d_array_t image);
++OVERLOADABLE int2 get_image_dim(write_only image2d_array_t image);
++OVERLOADABLE size_t get_image_array_size(write_only image2d_array_t image);
++#endif
+ 
+ #endif
+--- a/backend/src/libocl/src/ocl_image.cl
++++ b/backend/src/libocl/src/ocl_image.cl
+@@ -29,21 +29,21 @@
+ ///////////////////////////////////////////////////////////////////////////////
+ 
+ #define DECL_GEN_OCL_RW_IMAGE(image_type, n) \
+-  OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image, sampler_t sampler,            \
++  OVERLOADABLE int4 __gen_ocl_read_imagei(read_only image_type image, sampler_t sampler,            \
+                                           float ##n coord, uint sampler_offset);          \
+-  OVERLOADABLE int4 __gen_ocl_read_imagei(image_type image, sampler_t sampler,            \
++  OVERLOADABLE int4 __gen_ocl_read_imagei(read_only image_type image, sampler_t sampler,            \
+                                           int ##n coord, uint sampler_offset);            \
+-  OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image, sampler_t sampler,          \
++  OVERLOADABLE uint4 __gen_ocl_read_imageui(read_only image_type image, sampler_t sampler,          \
+                                             float ##n coord, uint sampler_offset);        \
+-  OVERLOADABLE uint4 __gen_ocl_read_imageui(image_type image, sampler_t sampler,          \
++  OVERLOADABLE uint4 __gen_ocl_read_imageui(read_only image_type image, sampler_t sampler,          \
+                                             int ##n coord, uint sampler_offset);          \
+-  OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image, sampler_t sampler,          \
++  OVERLOADABLE float4 __gen_ocl_read_imagef(read_only image_type image, sampler_t sampler,          \
+                                             float ##n coord, uint sampler_offset);        \
+-  OVERLOADABLE float4 __gen_ocl_read_imagef(image_type image, sampler_t sampler,          \
++  OVERLOADABLE float4 __gen_ocl_read_imagef(read_only image_type image, sampler_t sampler,          \
+                                             int ##n coord, uint sampler_offset);          \
+-  OVERLOADABLE void __gen_ocl_write_imagei(image_type image, int ##n coord , int4 color); \
+-  OVERLOADABLE void __gen_ocl_write_imageui(image_type image, int ##n coord, uint4 color);\
+-  OVERLOADABLE void __gen_ocl_write_imagef(image_type image, int ##n coord, float4 color);
++  OVERLOADABLE void __gen_ocl_write_imagei(write_only image_type image, int ##n coord , int4 color); \
++  OVERLOADABLE void __gen_ocl_write_imageui(write_only image_type image, int ##n coord, uint4 color);\
++  OVERLOADABLE void __gen_ocl_write_imagef(write_only image_type image, int ##n coord, float4 color);
+ 
+ #define DECL_GEN_OCL_QUERY_IMAGE(image_type) \
+   OVERLOADABLE int __gen_ocl_get_image_width(image_type image);                           \
+@@ -62,57 +62,104 @@ DECL_GEN_OCL_RW_IMAGE(image3d_t, 3)
+ DECL_GEN_OCL_RW_IMAGE(image2d_array_t, 4)
+ DECL_GEN_OCL_RW_IMAGE(image3d_t, 4)
+ 
+-DECL_GEN_OCL_QUERY_IMAGE(image1d_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image1d_buffer_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image1d_array_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image2d_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image2d_array_t)
+-DECL_GEN_OCL_QUERY_IMAGE(image3d_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image1d_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image1d_buffer_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image1d_array_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image2d_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image2d_array_t)
++DECL_GEN_OCL_QUERY_IMAGE(read_only image3d_t)
++
++#if __clang_major__*10 + __clang_minor__ >= 39
++DECL_GEN_OCL_QUERY_IMAGE(write_only image1d_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image1d_buffer_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image1d_array_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image2d_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image2d_array_t)
++DECL_GEN_OCL_QUERY_IMAGE(write_only image3d_t)
++#endif
+ ///////////////////////////////////////////////////////////////////////////////
+ // helper functions to validate array index.
+ ///////////////////////////////////////////////////////////////////////////////
+-INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, image1d_array_t image)
++INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, read_only image1d_array_t image)
+ {
+   float array_size = __gen_ocl_get_image_depth(image);
+   coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
+   return coord;
+ }
+ 
+-INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, image2d_array_t image)
++INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, read_only image2d_array_t image)
+ {
+   float array_size = __gen_ocl_get_image_depth(image);
+   coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+   return coord;
+ }
+ 
+-INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, image2d_array_t image)
++INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, read_only image2d_array_t image)
+ {
+   float array_size = __gen_ocl_get_image_depth(image);
+   coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
+   return coord;
+ }
+ 
+-INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, image1d_array_t image)
++INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, read_only image1d_array_t image)
+ {
+   int array_size = __gen_ocl_get_image_depth(image);
+   coord.s1 = clamp(coord.s1, 0, array_size - 1);
+   return coord;
+ }
+ 
+-INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, image2d_array_t image)
++INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, read_only image2d_array_t image)
+ {
+   int array_size = __gen_ocl_get_image_depth(image);
+   coord.s2 = clamp(coord.s2, 0, array_size - 1);
+   return coord;
+ }
+ 
+-INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, image2d_array_t image)
++INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, read_only image2d_array_t image)
+ {
+   int array_size = __gen_ocl_get_image_depth(image);
+   coord.s2 = clamp(coord.s2, 0, array_size - 1);
+   return coord;
+ }
+ 
++#if __clang_major__*10 + __clang_minor__ >= 39
++INLINE_OVERLOADABLE float2 __gen_validate_array_index(float2 coord, write_only image1d_array_t image)
++{
++  float array_size = __gen_ocl_get_image_depth(image);
++  coord.s1 = clamp(rint(coord.s1), 0.f, array_size - 1.f);
++  return coord;
++}
++INLINE_OVERLOADABLE float4 __gen_validate_array_index(float4 coord, write_only image2d_array_t image)
++{
++  float array_size = __gen_ocl_get_image_depth(image);
++  coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
++  return coord;
++}
++INLINE_OVERLOADABLE float3 __gen_validate_array_index(float3 coord, write_only image2d_array_t image)
++{
++  float array_size = __gen_ocl_get_image_depth(image);
++  coord.s2 = clamp(rint(coord.s2), 0.f, array_size - 1.f);
++  return coord;
++}
++INLINE_OVERLOADABLE int2 __gen_validate_array_index(int2 coord, write_only image1d_array_t image)
++{
++  int array_size = __gen_ocl_get_image_depth(image);
++  coord.s1 = clamp(coord.s1, 0, array_size - 1);
++  return coord;
++}
++INLINE_OVERLOADABLE int4 __gen_validate_array_index(int4 coord, write_only image2d_array_t image)
++{
++  int array_size = __gen_ocl_get_image_depth(image);
++  coord.s2 = clamp(coord.s2, 0, array_size - 1);
++  return coord;
++}
++INLINE_OVERLOADABLE int3 __gen_validate_array_index(int3 coord, write_only image2d_array_t image)
++{
++  int array_size = __gen_ocl_get_image_depth(image);
++  coord.s2 = clamp(coord.s2, 0, array_size - 1);
++  return coord;
++}
++#endif
+ // For non array image type, we need to do nothing.
+ #define GEN_VALIDATE_ARRAY_INDEX(coord_type, image_type) \
+ INLINE_OVERLOADABLE coord_type __gen_validate_array_index(coord_type coord, image_type image) \
+@@ -120,17 +167,29 @@ INLINE_OVERLOADABLE coord_type __gen_val
+   return coord; \
+ }
+ 
+-GEN_VALIDATE_ARRAY_INDEX(float, image1d_t)
+-GEN_VALIDATE_ARRAY_INDEX(int, image1d_t)
+-GEN_VALIDATE_ARRAY_INDEX(float2, image2d_t)
+-GEN_VALIDATE_ARRAY_INDEX(int2, image2d_t)
+-GEN_VALIDATE_ARRAY_INDEX(float4, image3d_t)
+-GEN_VALIDATE_ARRAY_INDEX(int4, image3d_t)
+-GEN_VALIDATE_ARRAY_INDEX(float3, image3d_t)
+-GEN_VALIDATE_ARRAY_INDEX(int3, image3d_t)
+-GEN_VALIDATE_ARRAY_INDEX(float, image1d_buffer_t)
+-GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t)
+-
++GEN_VALIDATE_ARRAY_INDEX(float, read_only image1d_t)
++GEN_VALIDATE_ARRAY_INDEX(int, read_only image1d_t)
++GEN_VALIDATE_ARRAY_INDEX(float2, read_only image2d_t)
++GEN_VALIDATE_ARRAY_INDEX(int2, read_only image2d_t)
++GEN_VALIDATE_ARRAY_INDEX(float4, read_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(int4, read_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(float3, read_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(int3, read_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(float, read_only image1d_buffer_t)
++GEN_VALIDATE_ARRAY_INDEX(int, read_only image1d_buffer_t)
++
++#if __clang_major__*10 + __clang_minor__ >= 39
++GEN_VALIDATE_ARRAY_INDEX(float, write_only image1d_t)
++GEN_VALIDATE_ARRAY_INDEX(int, write_only image1d_t)
++GEN_VALIDATE_ARRAY_INDEX(float2, write_only image2d_t)
++GEN_VALIDATE_ARRAY_INDEX(int2, write_only image2d_t)
++GEN_VALIDATE_ARRAY_INDEX(float4, write_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(int4, write_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(float3, write_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(int3, write_only image3d_t)
++GEN_VALIDATE_ARRAY_INDEX(float, write_only image1d_buffer_t)
++GEN_VALIDATE_ARRAY_INDEX(int, write_only image1d_buffer_t)
++#endif
+ ///////////////////////////////////////////////////////////////////////////////
+ // Helper functions to work around some coordiate boundary issues.
+ // The major issue on Gen7/Gen7.5 are the sample message could not sampling
+@@ -293,7 +352,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_n
+ // For integer coordinates
+ #define DECL_READ_IMAGE0(int_clamping_fix, image_type,                        \
+                          image_data_type, suffix, coord_type, n)              \
+-  OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image,       \
++  OVERLOADABLE image_data_type read_image ##suffix(read_only image_type cl_image,       \
+                                         const sampler_t sampler,              \
+                                         coord_type coord)                     \
+   {                                                                           \
+@@ -308,7 +367,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_n
+ // For float coordinates
+ #define DECL_READ_IMAGE1(int_clamping_fix, image_type,                        \
+                          image_data_type, suffix, coord_type, n)              \
+-  OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image,       \
++  OVERLOADABLE image_data_type read_image ##suffix(read_only image_type cl_image,       \
+                                         const sampler_t sampler,              \
+                                         coord_type coord)                     \
+   {                                                                           \
+@@ -333,7 +392,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_n
+ 
+ #define DECL_READ_IMAGE_NOSAMPLER(image_type, image_data_type,                \
+                                   suffix, coord_type, n)                      \
+-  OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image,       \
++  OVERLOADABLE image_data_type read_image ##suffix(read_only image_type cl_image,       \
+                                                coord_type coord)              \
+   {                                                                           \
+     coord = __gen_validate_array_index(coord, cl_image);                      \
+@@ -344,7 +403,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_n
+   }
+ 
+ #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, coord_type)     \
+-  OVERLOADABLE void write_image ##suffix(image_type cl_image,                 \
++  OVERLOADABLE void write_image ##suffix(write_only image_type cl_image,                 \
+                                          coord_type coord,                    \
+                                          image_data_type color)               \
+   {                                                                           \
+@@ -375,7 +434,7 @@ DECL_IMAGE_TYPE(image2d_array_t, 3)
+ 
+ #define DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image_type, image_data_type,       \
+                                   suffix, coord_type)                         \
+-  OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image,       \
++  OVERLOADABLE image_data_type read_image ##suffix(read_only image_type cl_image,       \
+                                                coord_type coord)              \
+   {                                                                           \
+     sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
+@@ -388,7 +447,7 @@ DECL_IMAGE_TYPE(image2d_array_t, 3)
+   }
+ 
+ #define DECL_WRITE_IMAGE1D_BUFFER(image_type, image_data_type, suffix, coord_type)     \
+-  OVERLOADABLE void write_image ##suffix(image_type cl_image,                 \
++  OVERLOADABLE void write_image ##suffix(write_only image_type cl_image,                 \
+                                          coord_type coord,                    \
+                                          image_data_type color)               \
+   {                                                                           \
+@@ -493,69 +552,123 @@ DECL_IMAGE_1DArray(0, float4, f)
+ #define DECL_IMAGE_INFO_COMMON(image_type)                                    \
+   OVERLOADABLE  int get_image_channel_data_type(image_type image)             \
+   {                                                                           \
+-    return __gen_ocl_get_image_channel_data_type(image);                 \
++    return __gen_ocl_get_image_channel_data_type(image);                      \
+   }                                                                           \
+   OVERLOADABLE  int get_image_channel_order(image_type image)                 \
+   {                                                                           \
+-    return __gen_ocl_get_image_channel_order(image);                     \
++    return __gen_ocl_get_image_channel_order(image);                          \
+   }                                                                           \
+   OVERLOADABLE int get_image_width(image_type image)                          \
+   {                                                                           \
+-    return __gen_ocl_get_image_width(image);                             \
++    return __gen_ocl_get_image_width(image);                                  \
+   }
+ 
+-DECL_IMAGE_INFO_COMMON(image1d_t)
+-DECL_IMAGE_INFO_COMMON(image1d_buffer_t)
+-DECL_IMAGE_INFO_COMMON(image1d_array_t)
+-DECL_IMAGE_INFO_COMMON(image2d_t)
+-DECL_IMAGE_INFO_COMMON(image3d_t)
+-DECL_IMAGE_INFO_COMMON(image2d_array_t)
++DECL_IMAGE_INFO_COMMON(read_only image1d_t)
++DECL_IMAGE_INFO_COMMON(read_only image1d_buffer_t)
++DECL_IMAGE_INFO_COMMON(read_only image1d_array_t)
++DECL_IMAGE_INFO_COMMON(read_only image2d_t)
++DECL_IMAGE_INFO_COMMON(read_only image3d_t)
++DECL_IMAGE_INFO_COMMON(read_only image2d_array_t)
++
++#if __clang_major__*10 + __clang_minor__ >= 39
++DECL_IMAGE_INFO_COMMON(write_only image1d_t)
++DECL_IMAGE_INFO_COMMON(write_only image1d_buffer_t)
++DECL_IMAGE_INFO_COMMON(write_only image1d_array_t)
++DECL_IMAGE_INFO_COMMON(write_only image2d_t)
++DECL_IMAGE_INFO_COMMON(write_only image3d_t)
++DECL_IMAGE_INFO_COMMON(write_only image2d_array_t)
++#endif
+ 
+ // 2D extra Info
+-OVERLOADABLE int get_image_height(image2d_t image)
++OVERLOADABLE int get_image_height(read_only image2d_t image)
+ {
+   return __gen_ocl_get_image_height(image);
+ }
+-OVERLOADABLE int2 get_image_dim(image2d_t image)
++OVERLOADABLE int2 get_image_dim(read_only image2d_t image)
+ {
+   return (int2){get_image_width(image), get_image_height(image)};
+ }
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE int get_image_height(write_only image2d_t image)
++{
++  return __gen_ocl_get_image_height(image);
++}
++OVERLOADABLE int2 get_image_dim(write_only image2d_t image)
++{
++  return (int2){get_image_width(image), get_image_height(image)};
++}
++#endif
+ // End of 2D
+ 
+ // 3D extra Info
+-OVERLOADABLE int get_image_height(image3d_t image)
++OVERLOADABLE int get_image_height(read_only image3d_t image)
+ {
+   return __gen_ocl_get_image_height(image);
+ }
+-OVERLOADABLE int get_image_depth(image3d_t image)
++OVERLOADABLE int get_image_depth(read_only image3d_t image)
+ {
+   return __gen_ocl_get_image_depth(image);
+ }
+-OVERLOADABLE int4 get_image_dim(image3d_t image)
++OVERLOADABLE int4 get_image_dim(read_only image3d_t image)
+ {
+   return (int4) (get_image_width(image),
+                  get_image_height(image),
+                  get_image_depth(image),
+                  0);
+ }
+-
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE int get_image_height(write_only image3d_t image)
++{
++  return __gen_ocl_get_image_height(image);
++}
++OVERLOADABLE int get_image_depth(write_only image3d_t image)
++{
++  return __gen_ocl_get_image_depth(image);
++}
++OVERLOADABLE int4 get_image_dim(write_only image3d_t image)
++{
++  return (int4) (get_image_width(image),
++                 get_image_height(image),
++                 get_image_depth(image),
++                 0);
++}
++#endif
+ // 2D Array extra Info
+-OVERLOADABLE int get_image_height(image2d_array_t image)
++OVERLOADABLE int get_image_height(read_only image2d_array_t image)
+ {
+   return __gen_ocl_get_image_height(image);
+ }
+-OVERLOADABLE int2 get_image_dim(image2d_array_t image)
++OVERLOADABLE int2 get_image_dim(read_only image2d_array_t image)
+ {
+   return (int2){get_image_width(image), get_image_height(image)};
+ }
+-OVERLOADABLE size_t get_image_array_size(image2d_array_t image)
++OVERLOADABLE size_t get_image_array_size(read_only image2d_array_t image)
+ {
+   return __gen_ocl_get_image_depth(image);
+ }
+-
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE int get_image_height(write_only image2d_array_t image)
++{
++  return __gen_ocl_get_image_height(image);
++}
++OVERLOADABLE int2 get_image_dim(write_only image2d_array_t image)
++{
++  return (int2){get_image_width(image), get_image_height(image)};
++}
++OVERLOADABLE size_t get_image_array_size(write_only image2d_array_t image)
++{
++  return __gen_ocl_get_image_depth(image);
++}
++#endif
+ // 1D Array info
+-OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
++OVERLOADABLE size_t get_image_array_size(read_only image1d_array_t image)
+ {
+   return __gen_ocl_get_image_depth(image);
+ }
++#if __clang_major__*10 + __clang_minor__ >= 39
++OVERLOADABLE size_t get_image_array_size(write_only image1d_array_t image)
++{
++  return __gen_ocl_get_image_depth(image);
++}
++#endif
+ // End of 1DArray
+--- a/backend/src/libocl/tmpl/ocl_defines.tmpl.h
++++ b/backend/src/libocl/tmpl/ocl_defines.tmpl.h
+@@ -27,15 +27,5 @@
+ #define __kernel_exec(X, TYPE) __kernel __attribute__((work_group_size_hint(X,1,1))) \
+                                         __attribute__((vec_type_hint(TYPE)))
+ #define kernel_exec(X, TYPE) __kernel_exec(X, TYPE)
+-#define cl_khr_global_int32_base_atomics
+-#define cl_khr_global_int32_extended_atomics
+-#define cl_khr_local_int32_base_atomics
+-#define cl_khr_local_int32_extended_atomics
+-#define cl_khr_byte_addressable_store
+-#define cl_khr_icd
+-#define cl_khr_gl_sharing
+-#define cl_khr_spir
+-#define cl_khr_fp16
+-#define cl_khr_3d_image_writes
+ 
+ #endif /* end of __OCL_COMMON_DEF_H__ */
+--- a/backend/src/llvm/llvm_bitcode_link.cpp
++++ b/backend/src/llvm/llvm_bitcode_link.cpp
+@@ -145,6 +145,7 @@ namespace gbe
+       return NULL;
+ 
+     std::vector<const char *> kernels;
++    std::vector<const char *> kerneltmp;
+     std::vector<const char *> builtinFuncs;
+     /* Add the memset and memcpy functions here. */
+     builtinFuncs.push_back("__gen_memcpy_gg");
+@@ -184,7 +185,12 @@ namespace gbe
+     for (Module::iterator SF = mod->begin(), E = mod->end(); SF != E; ++SF) {
+       if (SF->isDeclaration()) continue;
+       if (!isKernelFunction(*SF)) continue;
+-      kernels.push_back(SF->getName().data());
++      // mod will be deleted after link, copy the names.
++      const char *funcName = SF->getName().data();
++      char * tmp = new char[strlen(funcName)+1];
++      strcpy(tmp,funcName);
++      kernels.push_back(tmp);
++      kerneltmp.push_back(tmp);
+ 
+       if (!materializedFuncCall(*mod, *clonedLib, *SF, materializedFuncs, Gvs)) {
+         delete clonedLib;
+@@ -273,7 +279,11 @@ namespace gbe
+     /* We use beignet's bitcode as dst because it will have a lot of
+        lazy functions which will not be loaded. */
+     char* errorMsg;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++    if(LLVMLinkModules2(wrap(clonedLib), wrap(mod))) {
++#else
+     if(LLVMLinkModules(wrap(clonedLib), wrap(mod), LLVMLinkerDestroySource, &errorMsg)) {
++#endif
+       delete clonedLib;
+       printf("Fatal Error: link the bitcode error:\n%s\n", errorMsg);
+       return NULL;
+@@ -284,11 +294,25 @@ namespace gbe
+     llvm::PassManager passes;
+ #endif
+ 
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=9
++    auto PreserveKernel = [=](const GlobalValue &GV) {
++      for(size_t i = 0;i < kernels.size(); ++i)
++        if(strcmp(GV.getName().data(), kernels[i]))
++          return true;
++      return false;
++    };
++
++    passes.add(createInternalizePass(PreserveKernel));
++#else
+     passes.add(createInternalizePass(kernels));
++#endif
+     passes.add(createGlobalDCEPass());
+ 
+     passes.run(*clonedLib);
+ 
++    for(size_t i = 0;i < kerneltmp.size(); i++)
++      delete[] kerneltmp[i];
++
+     return clonedLib;
+   }
+ 
+--- a/backend/src/llvm/llvm_gen_backend.cpp
++++ b/backend/src/llvm/llvm_gen_backend.cpp
+@@ -1230,6 +1230,10 @@ namespace gbe
+     }
+     MDNode *typeNameNode = NULL;
+     MDNode *typeBaseNameNode = NULL;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++    typeNameNode = F.getMetadata("kernel_arg_type");
++    typeBaseNameNode = F.getMetadata("kernel_arg_base_type");
++#else
+     MDNode *node = getKernelFunctionMetadata(&F);
+     for(uint j = 0;node && j < node->getNumOperands() - 1; j++) {
+       MDNode *attrNode = dyn_cast_or_null<MDNode>(node->getOperand(1 + j));
+@@ -1243,15 +1247,21 @@ namespace gbe
+         typeBaseNameNode = attrNode;
+       }
+     }
++#endif
+ 
+     unsigned argID = 0;
+     ir::FunctionArgument::InfoFromLLVM llvmInfo;
+     for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, argID++) {
++      unsigned opID = argID;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9
++      opID += 1;
++#endif
++
+       if(typeNameNode) {
+-        llvmInfo.typeName= (cast<MDString>(typeNameNode->getOperand(1 + argID)))->getString();
++        llvmInfo.typeName= (cast<MDString>(typeNameNode->getOperand(opID)))->getString();
+       }
+       if(typeBaseNameNode) {
+-        llvmInfo.typeBaseName= (cast<MDString>(typeBaseNameNode->getOperand(1 + argID)))->getString();
++        llvmInfo.typeBaseName= (cast<MDString>(typeBaseNameNode->getOperand(opID)))->getString();
+       }
+       bool isImage = llvmInfo.isImageType();
+       if (I->getType()->isPointerTy() || isImage) {
+@@ -1974,6 +1984,92 @@ namespace gbe
+ 
+     std::string functionAttributes;
+ 
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++    /* LLVM 3.9 change kernel arg info as function metadata */
++    addrSpaceNode = F.getMetadata("kernel_arg_addr_space");
++    accessQualNode = F.getMetadata("kernel_arg_access_qual");
++    typeNameNode = F.getMetadata("kernel_arg_type");
++    typeBaseNameNode = F.getMetadata("kernel_arg_base_type");
++    typeQualNode = F.getMetadata("kernel_arg_type_qual");
++    argNameNode = F.getMetadata("kernel_arg_name");
++    MDNode *attrNode;
++    if ((attrNode = F.getMetadata("vec_type_hint"))) {
++      GBE_ASSERT(attrNode->getNumOperands() == 2);
++      functionAttributes += "vec_type_hint";
++      auto *Op1 = cast<ValueAsMetadata>(attrNode->getOperand(0));
++      Value *V = Op1 ? Op1->getValue() : NULL;
++      ConstantInt *sign =
++          mdconst::extract<ConstantInt>(attrNode->getOperand(1));
++      size_t signValue = sign->getZExtValue();
++      Type *vtype = V->getType();
++      Type *stype = vtype;
++      uint32_t elemNum = 0;
++      if (vtype->isVectorTy()) {
++        VectorType *vectorType = cast<VectorType>(vtype);
++        stype = vectorType->getElementType();
++        elemNum = vectorType->getNumElements();
++      }
++
++      std::string typeName = getTypeName(ctx, stype, signValue);
++
++      std::stringstream param;
++      char buffer[100] = {0};
++      param << "(";
++      param << typeName;
++      if (vtype->isVectorTy())
++        param << elemNum;
++      param << ")";
++      param >> buffer;
++      functionAttributes += buffer;
++      functionAttributes += " ";
++    }
++    if ((attrNode = F.getMetadata("reqd_work_group_size"))) {
++      GBE_ASSERT(attrNode->getNumOperands() == 3);
++      ConstantInt *x = mdconst::extract<ConstantInt>(attrNode->getOperand(0));
++      ConstantInt *y = mdconst::extract<ConstantInt>(attrNode->getOperand(1));
++      ConstantInt *z = mdconst::extract<ConstantInt>(attrNode->getOperand(2));
++      GBE_ASSERT(x && y && z);
++      reqd_wg_sz[0] = x->getZExtValue();
++      reqd_wg_sz[1] = y->getZExtValue();
++      reqd_wg_sz[2] = z->getZExtValue();
++      functionAttributes += "reqd_work_group_size";
++      std::stringstream param;
++      char buffer[100] = {0};
++      param << "(";
++      param << reqd_wg_sz[0];
++      param << ",";
++      param << reqd_wg_sz[1];
++      param << ",";
++      param << reqd_wg_sz[2];
++      param << ")";
++      param >> buffer;
++      functionAttributes += buffer;
++      functionAttributes += " ";
++    }
++    if ((attrNode = F.getMetadata("work_group_size_hint"))) {
++      GBE_ASSERT(attrNode->getNumOperands() == 3);
++      ConstantInt *x = mdconst::extract<ConstantInt>(attrNode->getOperand(0));
++      ConstantInt *y = mdconst::extract<ConstantInt>(attrNode->getOperand(1));
++      ConstantInt *z = mdconst::extract<ConstantInt>(attrNode->getOperand(2));
++      GBE_ASSERT(x && y && z);
++      hint_wg_sz[0] = x->getZExtValue();
++      hint_wg_sz[1] = y->getZExtValue();
++      hint_wg_sz[2] = z->getZExtValue();
++      functionAttributes += "work_group_size_hint";
++      std::stringstream param;
++      char buffer[100] = {0};
++      param << "(";
++      param << hint_wg_sz[0];
++      param << ",";
++      param << hint_wg_sz[1];
++      param << ",";
++      param << hint_wg_sz[2];
++      param << ")";
++      param >> buffer;
++      functionAttributes += buffer;
++      functionAttributes += " ";
++    }
++#else
+     /* First find the meta data belong to this function. */
+     MDNode *node = getKernelFunctionMetadata(&F);
+ 
+@@ -2095,6 +2191,7 @@ namespace gbe
+         functionAttributes += " ";
+       }
+     }
++#endif /* LLVM 3.9 Function metadata */
+ 
+     ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], reqd_wg_sz[1], reqd_wg_sz[2]);
+ 
+@@ -2110,29 +2207,33 @@ namespace gbe
+       const AttrListPtr &PAL = F.getAttributes();
+ #endif /* LLVM_VERSION_MINOR <= 1 */
+       for (; I != E; ++I, ++argID) {
++        uint32_t opID = argID;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9
++        opID += 1;
++#endif
+         const std::string &argName = I->getName().str();
+         Type *type = I->getType();
+         if(addrSpaceNode) {
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 5
+-          llvmInfo.addrSpace = (cast<ConstantInt>(addrSpaceNode->getOperand(1 + argID)))->getZExtValue();
++          llvmInfo.addrSpace = (cast<ConstantInt>(addrSpaceNode->getOperand(opID)))->getZExtValue();
+ #else
+-          llvmInfo.addrSpace = (mdconst::extract<ConstantInt>(addrSpaceNode->getOperand(1 + argID)))->getZExtValue();
++          llvmInfo.addrSpace = (mdconst::extract<ConstantInt>(addrSpaceNode->getOperand(opID)))->getZExtValue();
+ #endif
+         }
+         if(typeNameNode) {
+-          llvmInfo.typeName = (cast<MDString>(typeNameNode->getOperand(1 + argID)))->getString();
++          llvmInfo.typeName = (cast<MDString>(typeNameNode->getOperand(opID)))->getString();
+         }
+         if(typeBaseNameNode){
+-          llvmInfo.typeBaseName = (cast<MDString>(typeBaseNameNode->getOperand(1 + argID)))->getString();
++          llvmInfo.typeBaseName = (cast<MDString>(typeBaseNameNode->getOperand(opID)))->getString();
+         }
+         if(accessQualNode) {
+-          llvmInfo.accessQual = (cast<MDString>(accessQualNode->getOperand(1 + argID)))->getString();
++          llvmInfo.accessQual = (cast<MDString>(accessQualNode->getOperand(opID)))->getString();
+         }
+         if(typeQualNode) {
+-          llvmInfo.typeQual = (cast<MDString>(typeQualNode->getOperand(1 + argID)))->getString();
++          llvmInfo.typeQual = (cast<MDString>(typeQualNode->getOperand(opID)))->getString();
+         }
+         if(argNameNode){
+-          llvmInfo.argName = (cast<MDString>(argNameNode->getOperand(1 + argID)))->getString();
++          llvmInfo.argName = (cast<MDString>(argNameNode->getOperand(opID)))->getString();
+         }
+ 
+         // function arguments are uniform values.
+--- a/backend/src/llvm/llvm_includes.hpp
++++ b/backend/src/llvm/llvm_includes.hpp
+@@ -127,4 +127,9 @@
+ #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+ #endif
+ 
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++#include "llvm/Transforms/IPO/FunctionAttrs.h"
++#include "llvm/Transforms/Scalar/GVN.h"
++#endif
++
+ #endif /* __GBE_IR_LLVM_INCLUDES_HPP__ */
+--- a/backend/src/llvm/llvm_passes.cpp
++++ b/backend/src/llvm/llvm_passes.cpp
+@@ -41,9 +41,12 @@ using namespace llvm;
+ namespace gbe
+ {
+   bool isKernelFunction(const llvm::Function &F) {
++    bool bKernel = false;
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++    bKernel = F.getMetadata("kernel_arg_name") != NULL;
++#else
+     const Module *module = F.getParent();
+     const Module::NamedMDListType& globalMD = module->getNamedMDList();
+-    bool bKernel = false;
+     for(auto i = globalMD.begin(); i != globalMD.end(); i++) {
+       const NamedMDNode &md = *i;
+       if(strcmp(md.getName().data(), "opencl.kernels") != 0) continue;
+@@ -58,6 +61,7 @@ namespace gbe
+         if(op == &F) bKernel = true;
+       }
+     }
++#endif
+     return bKernel;
+   }
+ 
+--- a/backend/src/llvm/llvm_to_gen.cpp
++++ b/backend/src/llvm/llvm_to_gen.cpp
+@@ -46,6 +46,13 @@ namespace gbe
+   BVAR(OCL_OUTPUT_CFG_GEN_IR, false);
+   using namespace llvm;
+ 
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++  llvm::LLVMContext& GBEGetLLVMContext() {
++    static llvm::LLVMContext GBEContext;
++    return GBEContext;
++  }
++#endif
++
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
+   #define TARGETLIBRARY  TargetLibraryInfoImpl
+ #else
+@@ -142,7 +149,9 @@ namespace gbe
+     MPM.add(createBarrierNodupPass(false));   // remove noduplicate fnAttr before inlining.
+     MPM.add(createFunctionInliningPass(20000));
+     MPM.add(createBarrierNodupPass(true));    // restore noduplicate fnAttr after inlining.
+-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++    MPM.add(createPostOrderFunctionAttrsLegacyPass());
++#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8
+     MPM.add(createPostOrderFunctionAttrsPass());       // Set readonly/readnone attrs
+ #else
+     MPM.add(createFunctionAttrsPass());       // Set readonly/readnone attrs
+@@ -294,7 +303,11 @@ namespace gbe
+     if (module) {
+       cl_mod = reinterpret_cast<Module*>(const_cast<void*>(module));
+     } else if (fileName){
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++      llvm::LLVMContext& c = GBEGetLLVMContext();
++#else
+       llvm::LLVMContext& c = llvm::getGlobalContext();
++#endif
+ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+       cl_mod = parseIRFile(fileName, Err, c).release();
+ #else
+@@ -349,7 +362,11 @@ namespace gbe
+     passes.add(createIntrinsicLoweringPass());
+     passes.add(createStripAttributesPass());     // Strip unsupported attributes and calling conventions.
+     passes.add(createFunctionInliningPass(20000));
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
++    passes.add(createSROAPass());
++#else
+     passes.add(createScalarReplAggregatesPass(64, true, -1, -1, 64));
++#endif
+     passes.add(createLoadStoreOptimizationPass());
+     passes.add(createConstantPropagationPass());
+     passes.add(createPromoteMemoryToRegisterPass());
+--- a/backend/src/llvm/llvm_to_gen.hpp
++++ b/backend/src/llvm/llvm_to_gen.hpp
+@@ -23,6 +23,9 @@
+  */
+ #ifndef __GBE_IR_LLVM_TO_GEN_HPP__
+ #define __GBE_IR_LLVM_TO_GEN_HPP__
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++#include "llvm/IR/LLVMContext.h"
++#endif
+ 
+ namespace gbe {
+   namespace ir {
+@@ -34,6 +37,9 @@ namespace gbe {
+ 		  optLevel 0 equal to clang -O1 and 1 equal to clang -O2*/
+   bool llvmToGen(ir::Unit &unit, const char *fileName, const void* module,
+                  int optLevel, bool strictMath, int profiling, std::string &errors);
++#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
++  extern llvm::LLVMContext& GBEGetLLVMContext();
++#endif
+ 
+ } /* namespace gbe */
+ 
+--- a/kernels/test_get_arg_info.cl
++++ b/kernels/test_get_arg_info.cl
+@@ -3,6 +3,6 @@ typedef struct _test_arg_struct {
+     int b;
+ }test_arg_struct;
+ 
+-kernel void test_get_arg_info(read_only global float const volatile *src, read_write local int *dst, test_arg_struct extra) {
++kernel void test_get_arg_info(global float const volatile *src, local int *dst, test_arg_struct extra) {
+ 
+ }
+--- a/src/kernels/cl_internal_copy_buffer_to_image_2d.cl
++++ b/src/kernels/cl_internal_copy_buffer_to_image_2d.cl
+@@ -1,4 +1,4 @@
+-kernel void __cl_copy_buffer_to_image_2d(__read_only image2d_t image, global uchar* buffer,
++kernel void __cl_copy_buffer_to_image_2d(__write_only image2d_t image, global uchar* buffer,
+                                         unsigned int region0, unsigned int region1, unsigned int region2,
+                                         unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+                                         unsigned int src_offset)
+--- a/src/kernels/cl_internal_copy_buffer_to_image_3d.cl
++++ b/src/kernels/cl_internal_copy_buffer_to_image_3d.cl
+@@ -1,4 +1,4 @@
+-kernel void __cl_copy_buffer_to_image_3d(__read_only image3d_t image, global uchar* buffer,
++kernel void __cl_copy_buffer_to_image_3d(__write_only image3d_t image, global uchar* buffer,
+                                         unsigned int region0, unsigned int region1, unsigned int region2,
+                                         unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2,
+                                         unsigned int src_offset)
+--- a/utests/CMakeLists.txt
++++ b/utests/CMakeLists.txt
+@@ -255,7 +255,6 @@ set (utests_sources
+   compiler_double_div.cpp
+   compiler_double_convert.cpp
+   load_program_from_gen_bin.cpp
+-  load_program_from_spir.cpp
+   get_arg_info.cpp
+   profiling_exec.cpp
+   enqueue_copy_buf.cpp
diff --git a/debian/patches/series b/debian/patches/series
index 176aa1a..0fc23b2 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -10,3 +10,4 @@ docs-broken-links.patch
 cl_accelerator_intel.patch
 support-python3.patch
 pow-powr-tests.patch
+llvm39-support.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git



More information about the Pkg-opencl-commits mailing list