[beignet] 01/08: Imported Upstream version 1.3.1

Rebecca Palmer rnpalmer-guest at moszumanska.debian.org
Tue Aug 1 21:38:03 UTC 2017


This is an automated email from the git hooks/post-receive script.

rnpalmer-guest pushed a commit to branch master
in repository beignet.

commit 388d63a6fe07a0f41aa6efa8dbb6bf8934e4df03
Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
Date:   Sun Jul 30 14:21:30 2017 +0100

    Imported Upstream version 1.3.1
---
 CMake/FindLLVM.cmake                               |   4 +-
 CMakeLists.txt                                     |  88 ++++++++++------
 backend/src/backend/gen_insn_compact.cpp           |   2 +
 .../src/backend/gen_insn_selection_optimize.cpp    |   5 +-
 backend/src/backend/program.cpp                    |  19 ++--
 backend/src/llvm/llvm_gen_backend.cpp              |  34 +++++-
 backend/src/llvm/llvm_passes.cpp                   |  19 ++--
 docs/Beignet.mdwn                                  |  26 +++--
 docs/NEWS.mdwn                                     |   3 +
 docs/howto/gl-buffer-sharing-howto.mdwn            |  82 +++++++++++++++
 examples/CMakeLists.txt                            |  80 ++++++++------
 examples/gl_buffer_sharing/gl_buffer_sharing.cpp   | 115 +++++++++++++++++++++
 kernels/runtime_fill_gl_image.cl                   |  15 +++
 src/cl_context.c                                   |   3 +-
 src/cl_device_data.h                               |   2 +
 src/cl_device_id.c                                 |  10 +-
 src/cl_device_id.h                                 |   2 +-
 src/cl_event.c                                     |   2 +-
 src/cl_gen75_device.h                              |   2 +-
 src/cl_gen7_device.h                               |   2 +-
 src/cl_gen8_device.h                               |   2 +-
 src/cl_gen9_device.h                               |   4 +-
 src/cl_gt_device.h                                 |  14 ++-
 src/cl_image.c                                     |   2 +
 src/cl_khr_icd.c                                   |  28 ++---
 src/cl_platform_id.c                               |   2 +-
 src/cl_platform_id.h                               |   6 +-
 src/cl_program.c                                   |   1 +
 utests/CMakeLists.txt                              |  22 ++--
 utests/compiler_atomic_functions_20.cpp            |   4 +-
 utests/compiler_device_enqueue.cpp                 |   4 +-
 utests/compiler_generic_atomic.cpp                 |   2 +
 utests/compiler_generic_pointer.cpp                |   2 +
 utests/compiler_pipe_builtin.cpp                   |   9 +-
 utests/compiler_program_global.cpp                 |   5 +-
 utests/compiler_sampler.cpp                        |   2 +
 utests/runtime_pipe_query.cpp                      |   2 +
 utests/utest_helper.cpp                            |  22 ++--
 utests/utest_helper.hpp                            |   6 +-
 39 files changed, 499 insertions(+), 155 deletions(-)

diff --git a/CMake/FindLLVM.cmake b/CMake/FindLLVM.cmake
index 6129909..5457f24 100644
--- a/CMake/FindLLVM.cmake
+++ b/CMake/FindLLVM.cmake
@@ -8,12 +8,12 @@
 # LLVM_FOUND       - True if llvm found.
 if (LLVM_INSTALL_DIR)
   find_program(LLVM_CONFIG_EXECUTABLE
-               NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
+               NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
                DOC "llvm-config executable"
                PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH)
 else (LLVM_INSTALL_DIR)
   find_program(LLVM_CONFIG_EXECUTABLE
-               NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
+               NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
                DOC "llvm-config executable")
 endif (LLVM_INSTALL_DIR)
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 02b5d88..face3ce 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -195,23 +195,21 @@ ELSE(XFIXES_FOUND)
 ENDIF(XFIXES_FOUND)
 ENDIF(X11_FOUND)
 
-OPTION(ENABLE_GL_SHARING "cl_khr_gl_sharing" OFF)
-
-IF(ENABLE_GL_SHARING)
-  pkg_check_modules(OPENGL REQUIRED gl)
-  IF(OPENGL_FOUND)
-    MESSAGE(STATUS "Looking for OpenGL - found at ${OPENGL_PREFIX}")
-  ELSE(OPENGL_FOUND)
-    MESSAGE(STATUS "Looking for OpenGL - not found")
-  ENDIF(OPENGL_FOUND)
-  pkg_check_modules(EGL REQUIRED egl>=11.0.0)
-  IF(EGL_FOUND)
-    MESSAGE(STATUS "Looking for EGL - found at ${EGL_PREFIX}")
-  ELSE(EGL_FOUND)
-    MESSAGE(STATUS "Looking for EGL - not found")
-  ENDIF(EGL_FOUND)
-ENDIF(ENABLE_GL_SHARING)
-
+pkg_check_modules(OPENGL QUIET gl>=13.0.0)
+IF(OPENGL_FOUND)
+  MESSAGE(STATUS "Looking for OpenGL - found at ${OPENGL_PREFIX} ${OPENGL_VERSION}")
+ELSE(OPENGL_FOUND)
+  MESSAGE(STATUS "Looking for OpenGL (>=13.0.0) - not found, cl_khr_gl_sharing will be disabled")
+ENDIF(OPENGL_FOUND)
+pkg_check_modules(EGL QUIET egl>=13.0.0)
+IF(EGL_FOUND)
+  MESSAGE(STATUS "Looking for EGL - found at ${EGL_PREFIX} ${EGL_VERSION}")
+ELSE(EGL_FOUND)
+  MESSAGE(STATUS "Looking for EGL (>=13.0.0) - not found, cl_khr_gl_sharing will be disabled")
+ENDIF(EGL_FOUND)
+
+OPTION(OCLICD_COMPAT "OCL ICD compatibility mode" ON)
+IF(OCLICD_COMPAT)
 Find_Package(OCLIcd)
 IF(OCLIcd_FOUND)
   MESSAGE(STATUS "Looking for OCL ICD header file - found")
@@ -222,7 +220,9 @@ IF(OCLIcd_FOUND)
   install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${ICD_FILE_NAME} DESTINATION /etc/OpenCL/vendors)
 ELSE(OCLIcd_FOUND)
   MESSAGE(STATUS "Looking for OCL ICD header file - not found")
+  MESSAGE(FATAL_ERROR "OCL ICD loader miss. If you really want to disable OCL ICD support, please run cmake with option -DOCLICD_COMPAT=0.")
 ENDIF(OCLIcd_FOUND)
+ENDIF(OCLICD_COMPAT)
 
 Find_Package(PythonInterp)
 
@@ -231,20 +231,18 @@ IF (EXPERIMENTAL_DOUBLE)
   ADD_DEFINITIONS(-DENABLE_FP64)
 ENDIF(EXPERIMENTAL_DOUBLE)
 
-OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" OFF)
-IF (ENABLE_OPENCL_20)
-  Find_Program(LSPCI lspci)
-  IF (NOT LSPCI)
-    MESSAGE(FATAL_ERROR "Looking for lspci - not found")
-  ENDIF (NOT LSPCI)
-  EXECUTE_PROCESS(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/GetGenID.sh"
-                          RESULT_VARIABLE SUPPORT_OCL20_DEVICE
-                          OUTPUT_VARIABLE PCI_ID_NOT_USED)
-
-  IF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
-    MESSAGE(FATAL_ERROR "Only SKL and newer devices support OpenCL 2.0 now, your device don't support.")
-  ENDIF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
+SET(CAN_OPENCL_20 ON)
+IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+  SET(CAN_OPENCL_20 OFF)
+ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
+  SET(CAN_OPENCL_20 OFF)
+ENDIF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
+IF (LLVM_VERSION_NODOT VERSION_LESS 39)
+  SET(CAN_OPENCL_20 OFF)
+ENDIF (LLVM_VERSION_NODOT VERSION_LESS 39)
 
+IF (ENABLE_OPENCL_20)
   IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
     MESSAGE(FATAL_ERROR "Please update libdrm to version 2.4.66 or later to enable OpenCL 2.0.")
   ENDIF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
@@ -253,12 +251,32 @@ IF (ENABLE_OPENCL_20)
     MESSAGE(FATAL_ERROR "Please update LLVM to version 3.9 or later to enable OpenCL 2.0.")
   ENDIF (LLVM_VERSION_NODOT VERSION_LESS 39)
 
-  ADD_DEFINITIONS(-DENABLE_OPENCL_20)
+  IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+    MESSAGE(FATAL_ERROR "Please use x64 host to enable OpenCL 2.0.")
+  ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
 ENDIF(ENABLE_OPENCL_20)
 
+IF (DEFINED ENABLE_OPENCL_20)
+  IF (ENABLE_OPENCL_20 AND CAN_OPENCL_20)
+    SET(CAN_OPENCL_20 ON)
+  ELSE(ENABLE_OPENCL_20 AND CAN_OPENCL_20)
+    SET(CAN_OPENCL_20 OFF)
+  ENDIF (ENABLE_OPENCL_20 AND CAN_OPENCL_20)
+ENDIF (DEFINED ENABLE_OPENCL_20)
+
+OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" ${CAN_OPENCL_20})
+
+IF (CAN_OPENCL_20)
+  SET (ENABLE_OPENCL_20 ON)
+  MESSAGE(STATUS "Building with OpenCL 2.0.")
+  ADD_DEFINITIONS(-DENABLE_OPENCL_20)
+ELSE (CAN_OPENCL_20)
+  MESSAGE(STATUS "Building with OpenCL 1.2.")
+ENDIF(CAN_OPENCL_20)
+
 set (LIBCL_DRIVER_VERSION_MAJOR 1)
 set (LIBCL_DRIVER_VERSION_MINOR 3)
-set (LIBCL_DRIVER_VERSION_PATCH 0)
+set (LIBCL_DRIVER_VERSION_PATCH 1)
 if (ENABLE_OPENCL_20)
   set (LIBCL_C_VERSION_MAJOR 2)
   set (LIBCL_C_VERSION_MINOR 0)
@@ -279,7 +297,6 @@ IF(NOT X11_FOUND)
 ENDIF(NOT X11_FOUND)
 
 # libva & libva-x11
-#pkg_check_modules(LIBVA REQUIRED libva>=0.36.0)
 pkg_check_modules(LIBVA REQUIRED libva)
 pkg_check_modules(LIBVA-X11 REQUIRED libva-x11)
 set(LIBVA_BUF_SH_DEP false)
@@ -301,7 +318,12 @@ IF(LIBVA_FOUND AND LIBVA-X11_FOUND)
   ELSE(LIBVA_VERSION VERSION_LESS "0.36.0" OR LIBVA-X11_VERSION VERSION_LESS "0.36.0")
     set(LIBVA_BUF_SH_DEP true)
   ENDIF(LIBVA_VERSION VERSION_LESS "0.36.0" OR LIBVA-X11_VERSION VERSION_LESS "0.36.0")
+ELSE(LIBVA_FOUND AND LIBVA-X11_FOUND)
+  MESSAGE(STATUS "Example libva_buffer_sharing and v4l2_buffer_sharing will not be built")
 ENDIF(LIBVA_FOUND AND LIBVA-X11_FOUND)
+IF(NOT (OPENGL_FOUND AND EGL_FOUND AND X11_FOUND))
+  MESSAGE(STATUS "Example gl_buffer_sharing will not be built")
+ENDIF(NOT (OPENGL_FOUND AND EGL_FOUND AND X11_FOUND))
 ENDIF(BUILD_EXAMPLES)
 
 ADD_SUBDIRECTORY(include)
diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp
index 62fcb61..22305f7 100644
--- a/backend/src/backend/gen_insn_compact.cpp
+++ b/backend/src/backend/gen_insn_compact.cpp
@@ -804,6 +804,8 @@ namespace gbe {
     if( control_index == -1) return false;
     if( src0.negation + src1.negation + src2.negation > 1)
       return false;
+    if( src0.absolute + src1.absolute + src2.absolute > 0)
+      return false;
 
     GenCompactInstruction *insn = p->nextCompact(opcode);
     insn->src3Insn.bits1.control_index = control_index;
diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index 512a5bd..d2e0fb9 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -162,7 +162,10 @@ namespace gbe
     assert(insn.opcode == SEL_OP_MOV);
     const GenRegister& src = insn.src(0);
     const GenRegister& dst = insn.dst(0);
-    if (src.type != dst.type || src.file != dst.file || src.hstride != dst.hstride)
+    if (src.type != dst.type || src.file != dst.file)
+      return;
+
+    if (src.hstride != GEN_HORIZONTAL_STRIDE_0 && src.hstride != dst.hstride )
       return;
 
     if (liveout.find(dst.reg()) != liveout.end())
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 85d0aa9..09c79d8 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -31,6 +31,7 @@
 #include "ir/value.hpp"
 #include "ir/unit.hpp"
 #include "ir/printf.hpp"
+#include "src/cl_device_data.h"
 
 #ifdef GBE_COMPILER_AVAILABLE
 #include "llvm/llvm_to_gen.hpp"
@@ -855,6 +856,7 @@ namespace gbe {
                                      size_t *errSize,
                                      uint32_t &oclVersion)
   {
+    uint32_t maxoclVersion = oclVersion;
     std::string pchFileName;
     bool findPCH = false;
 #if defined(__ANDROID__)
@@ -1022,15 +1024,9 @@ EXTEND_QUOTE:
     }
 
     if (useDefaultCLCVersion) {
-#ifdef ENABLE_OPENCL_20
-      clOpt.push_back("-D__OPENCL_C_VERSION__=200");
-      clOpt.push_back("-cl-std=CL2.0");
-      oclVersion = 200;
-#else
       clOpt.push_back("-D__OPENCL_C_VERSION__=120");
       clOpt.push_back("-cl-std=CL1.2");
       oclVersion = 120;
-#endif
     }
     //for clCompilerProgram usage.
     if(temp_header_path){
@@ -1061,7 +1057,12 @@ EXTEND_QUOTE:
       clOpt.push_back("-include-pch");
       clOpt.push_back(pchFileName);
     }
-
+    if (oclVersion > maxoclVersion){
+      if (err && stringSize > 0 && errSize) {
+         *errSize = snprintf(err, stringSize, "Requested OpenCL version %lf is higher than maximum supported version %lf\n", (float)oclVersion/100.0,(float)maxoclVersion/100.0);
+      }
+      return false;
+    }
     return true;
   }
 
@@ -1076,7 +1077,7 @@ EXTEND_QUOTE:
     std::vector<std::string> clOpt;
     std::string dumpLLVMFileName, dumpASMFileName;
     std::string dumpSPIRBinaryName;
-    uint32_t oclVersion = 0;
+    uint32_t oclVersion = MAX_OCLVERSION(deviceID);
     if (!processSourceAndOption(source, options, NULL, clOpt,
                                 dumpLLVMFileName, dumpASMFileName, dumpSPIRBinaryName,
                                 optLevel,
@@ -1139,7 +1140,7 @@ EXTEND_QUOTE:
     std::vector<std::string> clOpt;
     std::string dumpLLVMFileName, dumpASMFileName;
     std::string dumpSPIRBinaryName;
-    uint32_t oclVersion = 0;
+    uint32_t oclVersion = MAX_OCLVERSION(deviceID);
     if (!processSourceAndOption(source, options, temp_header_path, clOpt,
                                 dumpLLVMFileName, dumpASMFileName, dumpSPIRBinaryName,
                                 optLevel, stringSize, err, errSize, oclVersion))
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 664d2ff..3fefa92 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -308,7 +308,7 @@ namespace gbe
         if(StrTy)
           return getTypeByteSize(unit,StrTy);
       }
-      GBE_ASSERTM(false, "Unspported type name");
+      GBE_ASSERTM(false, "Unsupported type name");
       return 0;
   }
 #undef TYPESIZEVEC
@@ -3272,11 +3272,41 @@ namespace gbe
       case Instruction::Sub:
       case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break;
       case Instruction::Mul:
+      {
+        //LLVM always put constant to src1, but also add the src0 constant check.
+        ConstantInt *c = dyn_cast<ConstantInt>(I.getOperand(0));
+        int index = 0;
+        if (c == NULL) {
+          c = dyn_cast<ConstantInt>(I.getOperand(0));
+          index = 1;
+        }
+        if (c != NULL && isPowerOf<2>(c->getSExtValue())) {
+          c = ConstantInt::get(c->getType(), logi2(c->getZExtValue()));
+          if(index == 0)
+            ctx.SHL(type, dst, src1, this->getRegister(c));
+          else
+            ctx.SHL(type, dst, src0, this->getRegister(c));
+        } else {
+          ctx.MUL(type, dst, src0, src1);
+        }
+        break;
+      }
       case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break;
       case Instruction::URem: ctx.REM(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
       case Instruction::SRem:
       case Instruction::FRem: ctx.REM(type, dst, src0, src1); break;
-      case Instruction::UDiv: ctx.DIV(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
+      case Instruction::UDiv:
+      {
+        //Only check divisor for DIV
+        ConstantInt *c = dyn_cast<ConstantInt>(I.getOperand(1));
+        if (c != NULL && isPowerOf<2>(c->getZExtValue())) {
+          c = ConstantInt::get(c->getType(), logi2(c->getZExtValue()));
+          ctx.SHR(getUnsignedType(ctx, I.getType()), dst, src0, this->getRegister(c));
+        } else {
+          ctx.DIV(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+        }
+        break;
+      }
       case Instruction::SDiv:
       case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break;
       case Instruction::And:  ctx.AND(type, dst, src0, src1); break;
diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp
index 367a2c3..c5f3ffe 100644
--- a/backend/src/llvm/llvm_passes.cpp
+++ b/backend/src/llvm/llvm_passes.cpp
@@ -276,8 +276,6 @@ namespace gbe
         uint32_t align = getAlignmentByte(unit, elementType);
         size += getPadding(size, align);
 
-        Constant* newConstSize = 
-          ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), size);
 
         Value *operand = GEPInst->getOperand(op); 
 
@@ -308,13 +306,22 @@ namespace gbe
           }
         }
 #endif
-        Value* tmpMul = operand;
+        Value* tmpOffset = operand;
         if (size != 1) {
-          tmpMul = BinaryOperator::Create(Instruction::Mul, newConstSize, operand,
-                                         "", GEPInst);
+          if (isPowerOf<2>(size)) {
+            Constant* shiftAmnt =
+              ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), logi2(size));
+            tmpOffset = BinaryOperator::Create(Instruction::Shl, operand, shiftAmnt,
+                                           "", GEPInst);
+          } else{
+            Constant* sizeConst =
+              ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), size);
+            tmpOffset = BinaryOperator::Create(Instruction::Mul, sizeConst, operand,
+                                           "", GEPInst);
+          }
         }
         currentAddrInst = 
-          BinaryOperator::Create(Instruction::Add, currentAddrInst, tmpMul,
+          BinaryOperator::Create(Instruction::Add, currentAddrInst, tmpOffset,
               "", GEPInst);
       }
 
diff --git a/docs/Beignet.mdwn b/docs/Beignet.mdwn
index 5c62b4c..b1019da 100644
--- a/docs/Beignet.mdwn
+++ b/docs/Beignet.mdwn
@@ -29,7 +29,8 @@ two more packages (the following package name is for Ubuntu):
 - ocl-icd-libopencl1
 
 If you don't want to enable ICD, or your system doesn't have ICD OpenCL support,
-you can still link to the beignet OpenCL library. You can find the beignet/libcl.so
+you must explicitly disable ICD support by running cmake with option `-DOCLICD_COMPAT=0`
+then you can still link to the beignet OpenCL library. You can find the beignet/libcl.so
 in your system's library installation directories.
 
 Note that the compiler depends on LLVM (Low-Level Virtual Machine project), and the
@@ -160,15 +161,13 @@ Supported Targets
 
 OpenCL 2.0
 ----------
-From release v1.3.0, beignet support OpenCL 2.0. By default, OpenCL 2.0 support is disabled, you can enable it when cmake with option
--DENABLE_OPENCL_20=1. Please remember that to enable OpenCL 2.0, there are some dependencies. First, OpenCL 2.0 only support the targets
-from Skylake, include Skylake, Kabylake and Apollolake. Then, clang supports all OpenCL 2.0 feature from 3.9. So to enable OpenCL 2.0,
-you must update LLVM/clang to 3.9 or later. And also requeires libdrm at least 2.4.66.
-After enable OpenCL 2.0, beignet complies with OpenCL 2.0 spec, but some OpenCL 2.0 features are simulated by software, there is no performance
-gain, such as pipe and device queues, especially device queues.
-If you build beignet with OpenCL 2.0 enabled and your kernel don't use the OpenCL 2.0 features, please pass a build option -cl-std=CL1.2 for
-performance, the OpenCL 2.0 uses more registers and has lots of int64 operations, which may hurt performance, and beignet will continue to improve
-OpenCL 2.0 performance.
+From release v1.3.0, beignet supports OpenCL 2.0 on Skylake and later hardware.
+This requires LLVM/Clang 3.9 or later, libdrm 2.4.66 or later and x86_64 linux.
+As required by the OpenCL specification, kernels are compiled as OpenCL C 1.2 by default; to use 2.0 they
+must explicitly request it with the -cl-std=CL2.0 build option.  As OpenCL 2.0 is likely to be slower than
+1.2, we recommend that this is used only where needed.  (This is because 2.0 uses more registers and has
+lots of int64 operations, and some of the 2.0 features (pipes and especially device queues) are implemented
+in software so do not provide any performance gain.)  Beignet will continue to improve OpenCL 2.0 performance.
 
 Known Issues
 ------------
@@ -222,10 +221,8 @@ Known Issues
   This loses some precision but gains performance.
 
 * cl\_khr\_gl\_sharing.
-  This extension highly depends on mesa support. It seems that mesa would not provide
-  such type of extensions, we may have to hack with mesa source code to support this
-  extension. This feature used to work with a previous mesa git version. But now, it's
-  simply broken.
+  This extension is partially implemented(the most commonly used part), and we will implement
+  other parts based on requirement.
 
 Project repository
 ------------------
@@ -283,6 +280,7 @@ Documents for OpenCL application developers
 - [[Kernel Optimization Guide|Beignet/optimization-guide]]
 - [[Libva Buffer Sharing|Beignet/howto/libva-buffer-sharing-howto]]
 - [[V4l2 Buffer Sharing|Beignet/howto/v4l2-buffer-sharing-howto]]
+- [[OpenGL Buffer Sharing|Beignet/howto/gl-buffer-sharing-howto]]
 - [[Video Motion Estimation|Beignet/howto/video-motion-estimation-howto]]
 - [[Stand Alone Unit Test|Beignet/howto/stand-alone-utest-howto]]
 - [[Android build|Beignet/android-build-howto]]
diff --git a/docs/NEWS.mdwn b/docs/NEWS.mdwn
index 465f38b..601a07f 100644
--- a/docs/NEWS.mdwn
+++ b/docs/NEWS.mdwn
@@ -1,5 +1,8 @@
 # News
 
+## Mar 13, 2017
+[Beignet 1.3.1](https://01.org/beignet/downloads/beignet-1.3.1-2017-03-13) is released. This is a bug-fix release.
+
 ## Jan 20, 2017
 [Beignet 1.3.0](https://01.org/beignet/downloads/beignet-1.3.0-2017-01-20) is released. This is a major release. Please see the release notes for more information.
 
diff --git a/docs/howto/gl-buffer-sharing-howto.mdwn b/docs/howto/gl-buffer-sharing-howto.mdwn
new file mode 100644
index 0000000..6b3a751
--- /dev/null
+++ b/docs/howto/gl-buffer-sharing-howto.mdwn
@@ -0,0 +1,82 @@
+GL Buffer Sharing HowTo
+=========================
+
+Beignet now support cl_khr_gl_sharing partially(the most commonly used part), which is an offcial
+extension of Khronos OpenCL. With this extension, Beignet can create memory object from OpenGL/OpenGL
+ES buffer, texture or renderbuffer object with zero-copy. Currently, we just support create memory
+object from GL buffer object or 2d texture(the most common target type). We will support creating
+from other GL target type if necessary.
+
+Prerequisite
+------------
+
+Mesa GL library and Mesa EGL libray are required. Both version should be greater or equal than
+13.0.0.
+
+Steps
+-----
+
+A typical procedure of using cl_khr_gl_sharing is as below:
+
+- Basic egl routine(eglGetDisplay, eglInitialize, eglCreateContext...).
+
+- Create GL 2d texture in normal OpenGL way.
+
+- Check whether cl_khr_gl_sharing is supported by Beignet (Whether cl_khr_gl_sharing is present
+  in CL_DEVICE_EXTENSIONS string).
+
+- Create cl context with following cl_context_properties:
+    cl_context_properties *props=new cl_context_properties[7];
+    int i = 0;
+    props[i++] = CL_CONTEXT_PLATFORM;
+    props[i++] = (cl_context_properties)platform; //Valid OpenCL handle
+    props[i++] = CL_EGL_DISPLAY_KHR;  //We only support CL_EGL_DISPLAY_KHR now
+    props[i++] = (cl_context_properties)eglGetCurrentDisplay(); //EGLDisplay handle of the display
+    props[i++] = CL_GL_CONTEXT_KHR; //We only support CL_GL_CONTEXT_KHR now
+    props[i++] = (cl_context_properties)eglGetCurrentContext(); //EGLContext created by above EGLDisplay
+    props[i++] = 0;
+
+- Create cl image object from GL 2d texture by calling clCreateFromGLTexture.
+
+- Ensure any pending GL operations which access this GL 2d texture have completed by glFinish.
+
+- Acquire cl image object by calling clEnqueueAcquireGLObjects.
+
+- Access this cl image object as an usual cl image object.
+
+- Relase cl image object by calling clEnqueueReleaseGLObjects.
+
+- Ensure any pending OpenCL operations which access this cl image object have completed by clFinish.
+
+- Do other operation on GL 2d texture.
+
+Sample code
+-----------
+
+We have developed an example showing how to utilize cl_khr_gl_sharing in examples/gl_buffer_sharing
+directory. A cl image object is created from a gl 2d texutre and processed by OpenCL kernel, then
+is shown on screen.
+
+Steps to build and run this example:
+
+- Install mesa gl and egl library(version >= 13.0.0). X11 is also required.
+
+- Add option -DBUILD_EXAMPLES=ON to enable building examples when running cmake, such as:
+  `> mkdir build`
+  `> cd build`
+  `> cmake -DBUILD_EXAMPLES=ON ../`
+
+- Build source code:
+  `> make`
+
+- Export your X Display (if you login to your machine by ssh):
+  `> export DISPLAY=:0.0`
+
+- Run:
+  `> cd examples`
+  `> . ../utests/setenv.sh`
+  `> ./example-gl_buffer_sharing`
+
+More references
+---------------
+https://www.khronos.org/registry/OpenCL/specs/opencl-1.2-extensions.pdf
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 850b3d9..a9e35c2 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,44 +1,64 @@
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}
                     ${CMAKE_CURRENT_SOURCE_DIR}/../utests
                     ${CMAKE_CURRENT_SOURCE_DIR}/../include
-                    ${X11_INCLUDE_DIR})
+                    ${X11_INCLUDE_DIR}
+                    ${OPENGL_INCLUDE_DIRS}
+                    ${EGL_INCLUDE_DIRS})
+
+set (ocl_example_helper_sources
+  ../utests/utest_error.c
+  ../utests/utest_assert.cpp
+  ../utests/utest_file_map.cpp
+  ../utests/utest_helper.cpp)
+
+IF(OPENGL_FOUND AND EGL_FOUND AND X11_FOUND)
+  ADD_DEFINITIONS(-DHAS_GL_EGL_X11)
+ENDIF(OPENGL_FOUND AND EGL_FOUND AND X11_FOUND)
+
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-deprecated-declarations" )
+
+ADD_LIBRARY(ocl_example_helper SHARED ${ocl_example_helper_sources})
+
+TARGET_LINK_LIBRARIES(ocl_example_helper cl m ${X11_LIBRARIES} ${OPENGL_LIBRARIES} ${EGL_LIBRARIES})
 
 IF(LIBVA_BUF_SH_DEP OR V4L2_BUF_SH_DEP)
-EXECUTE_PROCESS(COMMAND ls "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva" OUTPUT_VARIABLE LS_RESULT)
-IF ("LS_RESULT" STREQUAL "")
-EXECUTE_PROCESS(COMMAND git submodule init WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
-EXECUTE_PROCESS(COMMAND git submodule update WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
-EXECUTE_PROCESS(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva)
-ENDIF ("LS_RESULT" STREQUAL "")
+  EXECUTE_PROCESS(COMMAND ls "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva" OUTPUT_VARIABLE LS_RESULT)
+  IF ("LS_RESULT" STREQUAL "")
+    EXECUTE_PROCESS(COMMAND git submodule init WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
+    EXECUTE_PROCESS(COMMAND git submodule update WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
+    EXECUTE_PROCESS(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva)
+  ENDIF ("LS_RESULT" STREQUAL "")
 
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/va
-                    ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/test/common)
+  INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/va
+                      ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/test/common)
 
-link_directories (${LIBVA_LIBDIR}
-                  ${LIBVA-X11_LIBDIR})
+  link_directories (${LIBVA_LIBDIR}
+                    ${LIBVA-X11_LIBDIR})
 
-set (va_ocl_basic_sources
-  ../utests/utest_error.c
-  ../utests/utest_assert.cpp
-  ../utests/utest_file_map.cpp
-  ../utests/utest_helper.cpp
-  ./thirdparty/libva/test/common/va_display.c
-  ./thirdparty/libva/test/common/va_display_x11.c)
+  set (va_display_sources
+    ./thirdparty/libva/test/common/va_display.c
+    ./thirdparty/libva/test/common/va_display_x11.c)
 
-ADD_DEFINITIONS(-DHAVE_VA_X11)
+  ADD_DEFINITIONS(-DHAVE_VA_X11)
 
-ADD_LIBRARY(va_ocl_basic SHARED ${va_ocl_basic_sources})
+  ADD_LIBRARY(va_display SHARED ${va_display_sources})
 
-TARGET_LINK_LIBRARIES(va_ocl_basic cl m va va-x11 ${X11_X11_LIB})
+  TARGET_LINK_LIBRARIES(va_display cl m va va-x11 ${X11_LIBRARIES})
 
-IF(LIBVA_BUF_SH_DEP)
-ADD_DEFINITIONS(-DINPUT_NV12_DEFAULT="${CMAKE_CURRENT_SOURCE_DIR}/libva_buffer_sharing/256_128.nv12")
-ADD_EXECUTABLE(example-libva_buffer_sharing ./libva_buffer_sharing/libva_buffer_sharing.cpp)
-TARGET_LINK_LIBRARIES(example-libva_buffer_sharing va_ocl_basic)
-ENDIF(LIBVA_BUF_SH_DEP)
+  IF(LIBVA_BUF_SH_DEP)
+    ADD_DEFINITIONS(-DINPUT_NV12_DEFAULT="${CMAKE_CURRENT_SOURCE_DIR}/libva_buffer_sharing/256_128.nv12")
+    ADD_EXECUTABLE(example-libva_buffer_sharing ./libva_buffer_sharing/libva_buffer_sharing.cpp)
+    TARGET_LINK_LIBRARIES(example-libva_buffer_sharing ocl_example_helper va_display)
+  ENDIF(LIBVA_BUF_SH_DEP)
 
-IF(V4L2_BUF_SH_DEP)
-ADD_EXECUTABLE(example-v4l2_buffer_sharing ./v4l2_buffer_sharing/v4l2_buffer_sharing.cpp)
-TARGET_LINK_LIBRARIES(example-v4l2_buffer_sharing va_ocl_basic)
-ENDIF(V4L2_BUF_SH_DEP)
+  IF(V4L2_BUF_SH_DEP)
+    ADD_EXECUTABLE(example-v4l2_buffer_sharing ./v4l2_buffer_sharing/v4l2_buffer_sharing.cpp)
+    TARGET_LINK_LIBRARIES(example-v4l2_buffer_sharing ocl_example_helper va_display)
+  ENDIF(V4L2_BUF_SH_DEP)
 ENDIF(LIBVA_BUF_SH_DEP OR V4L2_BUF_SH_DEP)
+
+IF(OPENGL_FOUND AND EGL_FOUND AND X11_FOUND)
+  ADD_EXECUTABLE(example-gl_buffer_sharing ./gl_buffer_sharing/gl_buffer_sharing.cpp)
+  TARGET_LINK_LIBRARIES(example-gl_buffer_sharing ocl_example_helper ${OPENGL_LIBRARIES} ${EGL_LIBRARIES})
+ENDIF(OPENGL_FOUND AND EGL_FOUND AND X11_FOUND)
diff --git a/examples/gl_buffer_sharing/gl_buffer_sharing.cpp b/examples/gl_buffer_sharing/gl_buffer_sharing.cpp
new file mode 100644
index 0000000..2d7e10d
--- /dev/null
+++ b/examples/gl_buffer_sharing/gl_buffer_sharing.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "utest_helper.hpp"
+#include <stdio.h>
+
+static cl_int cl_status;
+
+const size_t w = EGL_WINDOW_WIDTH;
+const size_t h = EGL_WINDOW_HEIGHT;
+static GLuint tex;
+
+static void draw(){
+  XEvent event;
+
+  float vertices[8] = {-1, 1, 1, 1, 1, -1, -1, -1};
+  float tex_coords[8] = {0, 0, 1, 0, 1, 1, 0, 1};
+  uint32_t color0 = 0x0000ff00;
+
+  for (;;)
+  {
+    XNextEvent(xDisplay, &event);
+
+    if (event.type == Expose)
+    {
+      glClearColor(0.0, 1.0, 0.0, 1.0);
+      glClear(GL_COLOR_BUFFER_BIT);
+
+      OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+      OCL_SET_ARG(1, sizeof(color0), &color0);
+      globals[0] = w;
+      globals[1] = h;
+      locals[0] = 16;
+      locals[1] = 16;
+      glFinish();
+      OCL_ENQUEUE_ACQUIRE_GL_OBJECTS(0);
+      OCL_NDRANGE(2);
+      OCL_ENQUEUE_RELEASE_GL_OBJECTS(0);
+      OCL_FINISH();
+
+      glBindTexture(GL_TEXTURE_2D, tex);
+      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
+      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+      glEnable(GL_TEXTURE_2D);
+      glDisable(GL_BLEND);
+      glVertexPointer(2, GL_FLOAT, sizeof(float) * 2, vertices);
+      glEnableClientState(GL_VERTEX_ARRAY);
+      glClientActiveTexture(GL_TEXTURE0);
+      glTexCoordPointer(2, GL_FLOAT, sizeof(float) * 2, tex_coords);
+      glEnableClientState(GL_TEXTURE_COORD_ARRAY);
+      glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+      glFinish();
+      eglSwapBuffers(eglDisplay, eglSurface);
+    }
+    if (event.type == KeyPress)
+      break;
+  }
+}
+
+
+static void initialize_ocl_gl(){
+
+  //ocl initialization: basic & create kernel & check extension
+  printf("\n***********************OpenCL info: ***********************\n");
+  if ((cl_status = cl_test_init("runtime_fill_gl_image.cl", "runtime_fill_gl_image", SOURCE)) != 0){
+    fprintf(stderr, "cl_test_init error\n");
+    exit(1);
+  }
+
+  if (eglContext == EGL_NO_CONTEXT) {
+    fprintf(stderr, "There is no valid egl context! Exit!\n");
+    exit(1);
+  }
+
+  XMapWindow(xDisplay, xWindow);
+
+  // Setup kernel and images
+  glGenTextures(1, &tex);
+  glBindTexture(GL_TEXTURE_2D, tex);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, NULL);
+
+  //Create cl image from miplevel 0
+  OCL_CREATE_GL_IMAGE(buf[0], 0, GL_TEXTURE_2D, 0, tex);
+}
+
+int main(int argc, char *argv[])
+{
+  initialize_ocl_gl();
+
+  draw();
+
+  //destroy resource of cl & gl
+  cl_test_destroy();
+
+  printf("\nExample run successfully!\n");
+}
diff --git a/kernels/runtime_fill_gl_image.cl b/kernels/runtime_fill_gl_image.cl
new file mode 100644
index 0000000..79d4054
--- /dev/null
+++ b/kernels/runtime_fill_gl_image.cl
@@ -0,0 +1,15 @@
+__kernel void
+runtime_fill_gl_image(image2d_t img, int color)
+{
+  int2 coord;
+  float4 color_v4;
+  int lgid_x = get_group_id(0);
+  int lgid_y = get_group_id(1);
+  int num_groups_x = get_num_groups(0);
+  int num_groups_y = get_num_groups(1);
+
+  coord.x = get_global_id(0);
+  coord.y = get_global_id(1);
+  color_v4 = (float4)( lgid_x/(float)num_groups_x, lgid_y/(float)num_groups_y, 1.0, 1.0);
+  write_imagef(img, coord, color_v4);
+}
diff --git a/src/cl_context.c b/src/cl_context.c
index 3f2e757..1ba2302 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -373,7 +373,7 @@ cl_context_delete(cl_context ctx)
       ctx->internal_prgs[i] = NULL;
     }
 
-    if (ctx->internal_kernels[i]) {
+    if (ctx->built_in_kernels[i]) {
       cl_kernel_delete(ctx->built_in_kernels[i]);
       ctx->built_in_kernels[i] = NULL;
     }
@@ -383,6 +383,7 @@ cl_context_delete(cl_context ctx)
   ctx->built_in_prgs = NULL;
 
   cl_free(ctx->prop_user);
+  cl_free(ctx->devices);
   cl_driver_delete(ctx->drv);
   CL_OBJECT_DESTROY_BASE(ctx);
   cl_free(ctx);
diff --git a/src/cl_device_data.h b/src/cl_device_data.h
index 4ee4ca3..f3c5204 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -363,5 +363,7 @@
 
 #define IS_GEN9(devid)     (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid))
 
+#define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120)
+
 #endif /* __CL_DEVICE_DATA_H__ */
 
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 31f8616..d4f4208 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -255,12 +255,16 @@ static struct _cl_device_id intel_kbl_gt4_device = {
 };
 
 LOCAL cl_device_id
-cl_get_gt_device(void)
+cl_get_gt_device(cl_device_type device_type)
 {
   cl_device_id ret = NULL;
   const int device_id = cl_driver_get_device_id();
   cl_device_id device = NULL;
 
+  //cl_get_gt_device only return GPU type device.
+  if (((CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_DEFAULT) & device_type) == 0)
+    return NULL;
+
 #define DECL_INFO_STRING(BREAK, STRUCT, FIELD, STRING) \
     STRUCT.FIELD = STRING; \
     STRUCT.JOIN(FIELD,_sz) = sizeof(STRING); \
@@ -877,7 +881,7 @@ cl_get_device_ids(cl_platform_id    platform,
   cl_device_id device;
 
   /* Do we have a usable device? */
-  device = cl_get_gt_device();
+  device = cl_get_gt_device(device_type);
   if (device) {
     cl_self_test_res ret = cl_self_test(device, SELF_TEST_PASS);
     if (ret == SELF_TEST_ATOMIC_FAIL) {
@@ -1603,7 +1607,7 @@ cl_devices_list_check(cl_uint num_devices, const cl_device_id *devices)
     }
 
     // TODO: We now just support Gen Device.
-    if (devices[i] != cl_get_gt_device()) {
+    if (devices[i] != cl_get_gt_device(devices[i]->device_type)) {
       return CL_INVALID_DEVICE;
     }
   }
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 9d8b512..6b8f2eb 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -154,7 +154,7 @@ extern cl_int cl_get_device_ids(cl_platform_id    platform,
                                 cl_uint *         num_devices);
 
 /* Get the intel GPU device we currently have in this machine (if any) */
-extern cl_device_id cl_get_gt_device(void);
+extern cl_device_id cl_get_gt_device(cl_device_type device_type);
 
 /* Provide info about the device */
 extern cl_int cl_get_device_info(cl_device_id     device,
diff --git a/src/cl_event.c b/src/cl_event.c
index 3e1dc22..a2b16be 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -579,7 +579,7 @@ cl_event_exec(cl_event event, cl_int exec_to_status, cl_bool ignore_depends)
 
     if (ret != CL_SUCCESS) {
       assert(ret < 0);
-      DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error staus is %d",
+      DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error status is %d",
              event, event->event_type, ret);
       ret = cl_event_set_status(event, ret);
       assert(ret == CL_SUCCESS);
diff --git a/src/cl_gen75_device.h b/src/cl_gen75_device.h
index 7ef2b82..a07583f 100644
--- a/src/cl_gen75_device.h
+++ b/src/cl_gen75_device.h
@@ -21,7 +21,7 @@
 .max_parameter_size = 1024,
 .global_mem_cache_line_size = 64, /* XXX */
 .global_mem_cache_size = 8 << 10, /* XXX */
-.local_mem_type = CL_GLOBAL,
+.local_mem_type = CL_LOCAL,
 .local_mem_size = 64 << 10,
 .scratch_mem_size = 2 << 20,
 .max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
diff --git a/src/cl_gen7_device.h b/src/cl_gen7_device.h
index e755cad..01aa0f3 100644
--- a/src/cl_gen7_device.h
+++ b/src/cl_gen7_device.h
@@ -21,7 +21,7 @@
 .max_parameter_size = 1024,
 .global_mem_cache_line_size = 64, /* XXX */
 .global_mem_cache_size = 8 << 10, /* XXX */
-.local_mem_type = CL_GLOBAL,
+.local_mem_type = CL_LOCAL,
 .local_mem_size = 64 << 10,
 .scratch_mem_size = 12 << 10,
 .max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
diff --git a/src/cl_gen8_device.h b/src/cl_gen8_device.h
index 08fde48..e596825 100644
--- a/src/cl_gen8_device.h
+++ b/src/cl_gen8_device.h
@@ -21,7 +21,7 @@
 .max_parameter_size = 1024,
 .global_mem_cache_line_size = 64, /* XXX */
 .global_mem_cache_size = 8 << 10, /* XXX */
-.local_mem_type = CL_GLOBAL,
+.local_mem_type = CL_LOCAL,
 .local_mem_size = 64 << 10,
 .scratch_mem_size = 2 << 20,
 .max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
diff --git a/src/cl_gen9_device.h b/src/cl_gen9_device.h
index f50f9c7..b0a3ab8 100644
--- a/src/cl_gen9_device.h
+++ b/src/cl_gen9_device.h
@@ -21,11 +21,13 @@
 .max_parameter_size = 1024,
 .global_mem_cache_line_size = 64, /* XXX */
 .global_mem_cache_size = 8 << 10, /* XXX */
-.local_mem_type = CL_GLOBAL,
+.local_mem_type = CL_LOCAL,
 .local_mem_size = 64 << 10,
 .scratch_mem_size = 2 << 20,
 .max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul,
 .global_mem_size = 4 * 1024 * 1024 * 1024ul,
 
+#define GEN9_DEVICE 1
 #include "cl_gt_device.h"
+#undef GEN9_DEVICE
 
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index cf5ad7a..ca4f3c5 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -16,7 +16,15 @@
  *
  * Author: Benjamin Segovia <benjamin.segovia at intel.com>
  */
-
+#undef LIBCL_VERSION_STRING
+#undef LIBCL_C_VERSION_STRING
+#ifdef GEN9_DEVICE
+#define LIBCL_VERSION_STRING GEN9_LIBCL_VERSION_STRING
+#define LIBCL_C_VERSION_STRING GEN9_LIBCL_C_VERSION_STRING
+#else
+#define LIBCL_VERSION_STRING NONGEN9_LIBCL_VERSION_STRING
+#define LIBCL_C_VERSION_STRING NONGEN9_LIBCL_C_VERSION_STRING
+#endif
 /* Common fields for both all GT devices (IVB / SNB) */
 .device_type = CL_DEVICE_TYPE_GPU,
 .device_id=0,/* == device_id (set when requested) */
@@ -39,11 +47,7 @@
 .native_vector_width_float = 4,
 .native_vector_width_double = 2,
 .native_vector_width_half = 8,
-#ifdef ENABLE_OPENCL_20
-.address_bits = 64,
-#else
 .address_bits = 32,
-#endif
 .svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER,
 .preferred_platform_atomic_alignment = 0,
 .preferred_global_atomic_alignment = 0,
diff --git a/src/cl_image.c b/src/cl_image.c
index d059304..5ff459a 100644
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -144,7 +144,9 @@ cl_image_get_intel_format(const cl_image_format *fmt)
     case CL_RG:
       switch (type) {
         case CL_UNORM_INT8:     return I965_SURFACEFORMAT_R8G8_UNORM;
+        case CL_UNORM_INT16:    return I965_SURFACEFORMAT_R16G16_UNORM;
         case CL_UNSIGNED_INT8:  return I965_SURFACEFORMAT_R8G8_UINT;
+        case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16_UINT;
         default: return INTEL_UNSUPPORTED_FORMAT;
       };
 #if 0
diff --git a/src/cl_khr_icd.c b/src/cl_khr_icd.c
index 7b3600c..e4daf79 100644
--- a/src/cl_khr_icd.c
+++ b/src/cl_khr_icd.c
@@ -18,10 +18,14 @@
 
 #include "cl_platform_id.h"
 #include "CL/cl_intel.h" // for clGetKernelSubGroupInfoKHR
-/* The interop functions are not implemented in Beignet */
-#define CL_GL_INTEROP(x) NULL
-/* OpenCL 1.2 is not implemented in Beignet */
-#define CL_1_2_NOTYET(x) NULL
+/* The interop functions are only available if sharing is enabled */
+#ifdef HAS_GL_EGL
+#define CL_GL_INTEROP(x) x
+#else
+#define CL_GL_INTEROP(x) (void *) NULL
+#endif
+/* These are not yet implemented in Beignet */
+#define CL_NOTYET(x) (void *) NULL
 
 /** Return platform list through ICD interface
  * This code is used only if a client is linked directly against the library
@@ -114,13 +118,13 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
   clGetExtensionFunctionAddress,
   CL_GL_INTEROP(clCreateFromGLBuffer),
   CL_GL_INTEROP(clCreateFromGLTexture2D),
-  CL_GL_INTEROP(clCreateFromGLTexture3D),
-  CL_GL_INTEROP(clCreateFromGLRenderbuffer),
-  CL_GL_INTEROP(clGetGLObjectInfo),
-  CL_GL_INTEROP(clGetGLTextureInfo),
+  CL_NOTYET(clCreateFromGLTexture3D),
+  CL_NOTYET(clCreateFromGLRenderbuffer),
+  CL_NOTYET(clGetGLObjectInfo),
+  CL_NOTYET(clGetGLTextureInfo),
   CL_GL_INTEROP(clEnqueueAcquireGLObjects),
   CL_GL_INTEROP(clEnqueueReleaseGLObjects),
-  CL_GL_INTEROP(clGetGLContextInfoKHR),
+  CL_NOTYET(clGetGLContextInfoKHR),
   (void *) NULL,
   (void *) NULL,
   (void *) NULL,
@@ -135,9 +139,9 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
   clEnqueueReadBufferRect,
   clEnqueueWriteBufferRect,
   clEnqueueCopyBufferRect,
-  CL_1_2_NOTYET(clCreateSubDevicesEXT),
-  CL_1_2_NOTYET(clRetainDeviceEXT),
-  CL_1_2_NOTYET(clReleaseDeviceEXT),
+  CL_NOTYET(clCreateSubDevicesEXT),
+  CL_NOTYET(clRetainDeviceEXT),
+  CL_NOTYET(clReleaseDeviceEXT),
 #ifdef CL_VERSION_1_2
   (void *) NULL,
   clCreateSubDevices,
diff --git a/src/cl_platform_id.c b/src/cl_platform_id.c
index 1f21f5d..2afafb2 100644
--- a/src/cl_platform_id.c
+++ b/src/cl_platform_id.c
@@ -32,7 +32,7 @@
 
 static struct _cl_platform_id intel_platform_data = {
   DECL_INFO_STRING(profile, "FULL_PROFILE")
-  DECL_INFO_STRING(version, LIBCL_VERSION_STRING)
+  DECL_INFO_STRING(version, GEN9_LIBCL_VERSION_STRING)
   DECL_INFO_STRING(name, "Intel Gen OCL Driver")
   DECL_INFO_STRING(vendor, "Intel")
   DECL_INFO_STRING(icd_suffix_khr, "Intel")
diff --git a/src/cl_platform_id.h b/src/cl_platform_id.h
index 3fdb920..89e0857 100644
--- a/src/cl_platform_id.h
+++ b/src/cl_platform_id.h
@@ -72,8 +72,10 @@ extern cl_int cl_get_platform_ids(cl_uint          num_entries,
 #else
 #define LIBCL_DRIVER_VERSION_STRING _JOINT(LIBCL_DRIVER_VERSION_MAJOR, LIBCL_DRIVER_VERSION_MINOR)
 #endif
-#define LIBCL_VERSION_STRING "OpenCL " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
-#define LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
+#define GEN9_LIBCL_VERSION_STRING "OpenCL " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
+#define GEN9_LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
+#define NONGEN9_LIBCL_VERSION_STRING "OpenCL 1.2 beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
+#define NONGEN9_LIBCL_C_VERSION_STRING "OpenCL C 1.2 beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
 
 #endif /* __CL_PLATFORM_ID_H__ */
 
diff --git a/src/cl_program.c b/src/cl_program.c
index 0358705..363aed5 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -334,6 +334,7 @@ cl_program_create_from_binary(cl_context             ctx,
   else if (isGenBinary((unsigned char*)program->binary)) {
     program->opaque = interp_program_new_from_binary(program->ctx->devices[0]->device_id, program->binary, program->binary_sz);
     if (UNLIKELY(program->opaque == NULL)) {
+      DEBUGP(DL_ERROR, "Incompatible binary, please delete the binary and generate again.");
       err = CL_INVALID_PROGRAM;
       goto error;
     }
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index db61844..43cf7f3 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -289,7 +289,15 @@ set (utests_sources
   compiler_mix.cpp
   compiler_math_3op.cpp
   compiler_bsort.cpp
-  builtin_kernel_block_motion_estimate_intel.cpp)
+  builtin_kernel_block_motion_estimate_intel.cpp
+  compiler_program_global.cpp
+  compiler_generic_atomic.cpp
+  compiler_atomic_functions_20.cpp
+  compiler_sampler.cpp
+  compiler_generic_pointer.cpp
+  runtime_pipe_query.cpp
+  compiler_pipe_builtin.cpp
+  compiler_device_enqueue.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
@@ -297,18 +305,6 @@ if (LLVM_VERSION_NODOT VERSION_GREATER 34)
       compiler_overflow.cpp)
 endif (LLVM_VERSION_NODOT VERSION_GREATER 34)
 
-if (ENABLE_OPENCL_20)
-  SET(utests_sources
-      ${utests_sources}
-      compiler_program_global.cpp
-      compiler_generic_atomic.cpp
-      compiler_atomic_functions_20.cpp
-      compiler_sampler.cpp
-      compiler_generic_pointer.cpp
-      runtime_pipe_query.cpp
-      compiler_pipe_builtin.cpp
-      compiler_device_enqueue.cpp)
-endif (ENABLE_OPENCL_20)
 
 if (NOT_BUILD_STAND_ALONE_UTEST)
   if (X11_FOUND)
diff --git a/utests/compiler_atomic_functions_20.cpp b/utests/compiler_atomic_functions_20.cpp
index ea1ace5..e11d077 100644
--- a/utests/compiler_atomic_functions_20.cpp
+++ b/utests/compiler_atomic_functions_20.cpp
@@ -58,6 +58,8 @@ static void cpu_compiler_atomic(int *dst, int *src)
 
 static void compiler_atomic_functions(const char* kernel_name)
 {
+  if(!cl_check_ocl20(false))
+    return;
   const size_t n = GROUP_NUM * LOCAL_SIZE;
   int cpu_dst[24] = {0}, cpu_src[256];
 
@@ -65,7 +67,7 @@ static void compiler_atomic_functions(const char* kernel_name)
   locals[0] = LOCAL_SIZE;
 
   // Setup kernel and buffers
-  OCL_CREATE_KERNEL_FROM_FILE("compiler_atomic_functions_20", kernel_name);
+  OCL_CALL(cl_kernel_init, "compiler_atomic_functions_20.cl", kernel_name, SOURCE, "-cl-std=CL2.0");
   OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL);
   OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL);
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_device_enqueue.cpp b/utests/compiler_device_enqueue.cpp
index a9e3e2d..698be4d 100644
--- a/utests/compiler_device_enqueue.cpp
+++ b/utests/compiler_device_enqueue.cpp
@@ -2,12 +2,14 @@
 
 void compiler_device_enqueue(void)
 {
+  if(!cl_check_ocl20(false))
+    return;
   const size_t n = 32;
   const uint32_t global_sz = 3;
   uint32_t result = 0;
 
   // Setup kernel and buffers
-  OCL_CREATE_KERNEL("compiler_device_enqueue");
+  OCL_CALL(cl_kernel_init, "compiler_device_enqueue.cl", "compiler_device_enqueue", SOURCE, "-cl-std=CL2.0");
   OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
   OCL_SET_ARG(0, sizeof(uint32_t), &global_sz);
   OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_generic_atomic.cpp b/utests/compiler_generic_atomic.cpp
index 9ed5f53..e35e994 100644
--- a/utests/compiler_generic_atomic.cpp
+++ b/utests/compiler_generic_atomic.cpp
@@ -3,6 +3,8 @@
 template<typename T>
 void test_atomic(const char* kernelName)
 {
+  if(!cl_check_ocl20(false))
+    return;
   const int n = 16;
   T cpu_src[16];
 
diff --git a/utests/compiler_generic_pointer.cpp b/utests/compiler_generic_pointer.cpp
index 5984694..318312a 100644
--- a/utests/compiler_generic_pointer.cpp
+++ b/utests/compiler_generic_pointer.cpp
@@ -3,6 +3,8 @@
 template<typename T>
 void test(const char* kernelName)
 {
+  if(!cl_check_ocl20(false))
+    return;
   const int n = 16;
   T cpu_src[16];
 
diff --git a/utests/compiler_pipe_builtin.cpp b/utests/compiler_pipe_builtin.cpp
index c8ec077..99270d1 100644
--- a/utests/compiler_pipe_builtin.cpp
+++ b/utests/compiler_pipe_builtin.cpp
@@ -8,11 +8,13 @@ typedef struct{
 #define PIPE_BUILTIN(TYPE,GROUP) \
 static void compiler_pipe_##GROUP##_##TYPE(void) \
 { \
+  if(!cl_check_ocl20(false))\
+    return;\
   const size_t w = 16;  \
   uint32_t ans_host = 0;  \
   uint32_t ans_device = 0;  \
   /* pipe write kernel*/  \
-  OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", "compiler_pipe_"#GROUP"_write_"#TYPE);  \
+  OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", "compiler_pipe_"#GROUP"_write_"#TYPE, SOURCE, "-cl-std=CL2.0");\
   OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sizeof(TYPE), w, NULL);\
   OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, w * sizeof(TYPE), NULL);\
   OCL_MAP_BUFFER(1);\
@@ -26,7 +28,7 @@ static void compiler_pipe_##GROUP##_##TYPE(void) \
   OCL_NDRANGE(1);\
   OCL_CALL(clReleaseKernel, kernel);\
   /* pipe read kernel */\
-  OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", "compiler_pipe_"#GROUP"_read_"#TYPE);\
+  OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", "compiler_pipe_"#GROUP"_read_"#TYPE, SOURCE, "-cl-std=CL2.0");\
   OCL_CREATE_BUFFER(buf[2], CL_MEM_READ_WRITE, w * sizeof(TYPE), NULL);\
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);\
   OCL_SET_ARG(1, sizeof(cl_mem), &buf[2]);\
@@ -52,8 +54,9 @@ PIPE_BUILTIN(mystruct, workgroup)
 static void compiler_pipe_query(void) {
   const size_t w = 32;
   const size_t sz = 16;
+  if(!cl_check_ocl20(false)){return;}
   /* pipe write kernel */
-  OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", "compiler_pipe_query");
+  OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", "compiler_pipe_query", SOURCE, "-cl-std=CL2.0");
   OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sizeof(uint32_t), w, NULL);
   OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, sz * sizeof(uint32_t), NULL);
   OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_program_global.cpp b/utests/compiler_program_global.cpp
index ef7c655..caadf8b 100644
--- a/utests/compiler_program_global.cpp
+++ b/utests/compiler_program_global.cpp
@@ -21,8 +21,9 @@ static int init_program(const char* name, cl_context ctx, cl_program *pg )
 
 void compiler_program_global()
 {
+  if(!cl_check_ocl20(false))
+    return;
   const int n = 16;
-  int cpu_src[16];
   cl_int err;
 
   // Setup kernel and buffers
@@ -50,7 +51,7 @@ void compiler_program_global()
 
   OCL_MAP_BUFFER(0);
   for (int i = 0; i <  n; ++i)
-    cpu_src[i] = ((int*)buf_data[0])[i] = i;
+    ((int*)buf_data[0])[i] = i;
   OCL_UNMAP_BUFFER(0);
 
   // Run the kernel on GPU
diff --git a/utests/compiler_sampler.cpp b/utests/compiler_sampler.cpp
index f8bf622..853e052 100644
--- a/utests/compiler_sampler.cpp
+++ b/utests/compiler_sampler.cpp
@@ -3,6 +3,8 @@
 
 void compiler_sampler(void)
 {
+  if(!cl_check_ocl20(false))
+    return;
   OCL_CREATE_KERNEL("compiler_sampler");
 
   OCL_ASSERT(ctx != 0);
diff --git a/utests/runtime_pipe_query.cpp b/utests/runtime_pipe_query.cpp
index 3ce8258..e46f5ff 100644
--- a/utests/runtime_pipe_query.cpp
+++ b/utests/runtime_pipe_query.cpp
@@ -1,6 +1,8 @@
 #include <string.h>
 #include "utest_helper.hpp"
 static void runtime_pipe_query(void) {
+  if(!cl_check_ocl20(false))
+    return;
   const size_t w = 16;
   const size_t sz = 8;
   cl_uint retnum, retsz;
diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp
index b57d2ad..7052a14 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -939,13 +939,14 @@ int cl_check_subgroups_short(void)
   return 1;
 }
 
-int cl_check_ocl20(void)
+int cl_check_ocl20(bool or_beignet)
 {
   size_t param_value_size;
   size_t ret_sz;
   OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, 0, 0, &param_value_size);
   if(param_value_size == 0) {
     printf("Not OpenCL 2.0 device, ");
+    if(or_beignet){
     if(cl_check_beignet()) {
       printf("Beignet extension test!");
       return 1;
@@ -953,6 +954,10 @@ int cl_check_ocl20(void)
       printf("Not beignet device , Skip!");
       return 0;
     }
+    }else{
+      printf("Skip!");
+      return 0;
+    }
   }
   char* device_version_str = (char* )malloc(param_value_size * sizeof(char) );
   OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, (void*)device_version_str, &ret_sz);
@@ -961,11 +966,16 @@ int cl_check_ocl20(void)
   if(!strstr(device_version_str, "2.0")) {
     free(device_version_str);
     printf("Not OpenCL 2.0 device, ");
-    if(cl_check_beignet()) {
-      printf("Beignet extension test!");
-      return 1;
-    } else {
-      printf("Not beignet device , Skip!");
+    if(or_beignet){
+      if(cl_check_beignet()) {
+        printf("Beignet extension test!");
+        return 1;
+      } else {
+        printf("Not beignet device , Skip!");
+        return 0;
+      }
+    }else{
+      printf("Skip!");
       return 0;
     }
   }
diff --git a/utests/utest_helper.hpp b/utests/utest_helper.hpp
index a761325..e2a6a88 100644
--- a/utests/utest_helper.hpp
+++ b/utests/utest_helper.hpp
@@ -52,6 +52,8 @@
 extern EGLDisplay  eglDisplay;
 extern EGLContext  eglContext;
 extern EGLSurface  eglSurface;
+extern Display *xDisplay;
+extern Window xWindow;
 #endif
 
 union uint32_cast {
@@ -310,8 +312,8 @@ extern clGetKernelSubGroupInfoKHR_cb* utestclGetKernelSubGroupInfoKHR;
 /* Check if cl_intel_motion_estimation enabled. */
 extern int cl_check_motion_estimation(void);
 
-/* Check is cl version 2.0. */
-extern int cl_check_ocl20(void);
+/* Check is cl version 2.0 or Beignet extension. */
+extern int cl_check_ocl20(bool or_beignet = true);
 
 /* Check is FP16 enabled. */
 extern int cl_check_half(void);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git



More information about the Pkg-opencl-commits mailing list