[beignet] 01/08: Imported Upstream version 1.3.1
Rebecca Palmer
rnpalmer-guest at moszumanska.debian.org
Tue Aug 1 21:38:03 UTC 2017
This is an automated email from the git hooks/post-receive script.
rnpalmer-guest pushed a commit to branch master
in repository beignet.
commit 388d63a6fe07a0f41aa6efa8dbb6bf8934e4df03
Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
Date: Sun Jul 30 14:21:30 2017 +0100
Imported Upstream version 1.3.1
---
CMake/FindLLVM.cmake | 4 +-
CMakeLists.txt | 88 ++++++++++------
backend/src/backend/gen_insn_compact.cpp | 2 +
.../src/backend/gen_insn_selection_optimize.cpp | 5 +-
backend/src/backend/program.cpp | 19 ++--
backend/src/llvm/llvm_gen_backend.cpp | 34 +++++-
backend/src/llvm/llvm_passes.cpp | 19 ++--
docs/Beignet.mdwn | 26 +++--
docs/NEWS.mdwn | 3 +
docs/howto/gl-buffer-sharing-howto.mdwn | 82 +++++++++++++++
examples/CMakeLists.txt | 80 ++++++++------
examples/gl_buffer_sharing/gl_buffer_sharing.cpp | 115 +++++++++++++++++++++
kernels/runtime_fill_gl_image.cl | 15 +++
src/cl_context.c | 3 +-
src/cl_device_data.h | 2 +
src/cl_device_id.c | 10 +-
src/cl_device_id.h | 2 +-
src/cl_event.c | 2 +-
src/cl_gen75_device.h | 2 +-
src/cl_gen7_device.h | 2 +-
src/cl_gen8_device.h | 2 +-
src/cl_gen9_device.h | 4 +-
src/cl_gt_device.h | 14 ++-
src/cl_image.c | 2 +
src/cl_khr_icd.c | 28 ++---
src/cl_platform_id.c | 2 +-
src/cl_platform_id.h | 6 +-
src/cl_program.c | 1 +
utests/CMakeLists.txt | 22 ++--
utests/compiler_atomic_functions_20.cpp | 4 +-
utests/compiler_device_enqueue.cpp | 4 +-
utests/compiler_generic_atomic.cpp | 2 +
utests/compiler_generic_pointer.cpp | 2 +
utests/compiler_pipe_builtin.cpp | 9 +-
utests/compiler_program_global.cpp | 5 +-
utests/compiler_sampler.cpp | 2 +
utests/runtime_pipe_query.cpp | 2 +
utests/utest_helper.cpp | 22 ++--
utests/utest_helper.hpp | 6 +-
39 files changed, 499 insertions(+), 155 deletions(-)
diff --git a/CMake/FindLLVM.cmake b/CMake/FindLLVM.cmake
index 6129909..5457f24 100644
--- a/CMake/FindLLVM.cmake
+++ b/CMake/FindLLVM.cmake
@@ -8,12 +8,12 @@
# LLVM_FOUND - True if llvm found.
if (LLVM_INSTALL_DIR)
find_program(LLVM_CONFIG_EXECUTABLE
- NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
+ NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
DOC "llvm-config executable"
PATHS ${LLVM_INSTALL_DIR} NO_DEFAULT_PATH)
else (LLVM_INSTALL_DIR)
find_program(LLVM_CONFIG_EXECUTABLE
- NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
+ NAMES llvm-config-37 llvm-config-3.7 llvm-config-36 llvm-config-3.6 llvm-config-38 llvm-config-3.8 llvm-config-39 llvm-config-3.9 llvm-config llvm-config-35 llvm-config-3.5 llvm-config-34 llvm-config-3.4
DOC "llvm-config executable")
endif (LLVM_INSTALL_DIR)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 02b5d88..face3ce 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -195,23 +195,21 @@ ELSE(XFIXES_FOUND)
ENDIF(XFIXES_FOUND)
ENDIF(X11_FOUND)
-OPTION(ENABLE_GL_SHARING "cl_khr_gl_sharing" OFF)
-
-IF(ENABLE_GL_SHARING)
- pkg_check_modules(OPENGL REQUIRED gl)
- IF(OPENGL_FOUND)
- MESSAGE(STATUS "Looking for OpenGL - found at ${OPENGL_PREFIX}")
- ELSE(OPENGL_FOUND)
- MESSAGE(STATUS "Looking for OpenGL - not found")
- ENDIF(OPENGL_FOUND)
- pkg_check_modules(EGL REQUIRED egl>=11.0.0)
- IF(EGL_FOUND)
- MESSAGE(STATUS "Looking for EGL - found at ${EGL_PREFIX}")
- ELSE(EGL_FOUND)
- MESSAGE(STATUS "Looking for EGL - not found")
- ENDIF(EGL_FOUND)
-ENDIF(ENABLE_GL_SHARING)
-
+pkg_check_modules(OPENGL QUIET gl>=13.0.0)
+IF(OPENGL_FOUND)
+ MESSAGE(STATUS "Looking for OpenGL - found at ${OPENGL_PREFIX} ${OPENGL_VERSION}")
+ELSE(OPENGL_FOUND)
+ MESSAGE(STATUS "Looking for OpenGL (>=13.0.0) - not found, cl_khr_gl_sharing will be disabled")
+ENDIF(OPENGL_FOUND)
+pkg_check_modules(EGL QUIET egl>=13.0.0)
+IF(EGL_FOUND)
+ MESSAGE(STATUS "Looking for EGL - found at ${EGL_PREFIX} ${EGL_VERSION}")
+ELSE(EGL_FOUND)
+ MESSAGE(STATUS "Looking for EGL (>=13.0.0) - not found, cl_khr_gl_sharing will be disabled")
+ENDIF(EGL_FOUND)
+
+OPTION(OCLICD_COMPAT "OCL ICD compatibility mode" ON)
+IF(OCLICD_COMPAT)
Find_Package(OCLIcd)
IF(OCLIcd_FOUND)
MESSAGE(STATUS "Looking for OCL ICD header file - found")
@@ -222,7 +220,9 @@ IF(OCLIcd_FOUND)
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${ICD_FILE_NAME} DESTINATION /etc/OpenCL/vendors)
ELSE(OCLIcd_FOUND)
MESSAGE(STATUS "Looking for OCL ICD header file - not found")
+ MESSAGE(FATAL_ERROR "OCL ICD loader miss. If you really want to disable OCL ICD support, please run cmake with option -DOCLICD_COMPAT=0.")
ENDIF(OCLIcd_FOUND)
+ENDIF(OCLICD_COMPAT)
Find_Package(PythonInterp)
@@ -231,20 +231,18 @@ IF (EXPERIMENTAL_DOUBLE)
ADD_DEFINITIONS(-DENABLE_FP64)
ENDIF(EXPERIMENTAL_DOUBLE)
-OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" OFF)
-IF (ENABLE_OPENCL_20)
- Find_Program(LSPCI lspci)
- IF (NOT LSPCI)
- MESSAGE(FATAL_ERROR "Looking for lspci - not found")
- ENDIF (NOT LSPCI)
- EXECUTE_PROCESS(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/GetGenID.sh"
- RESULT_VARIABLE SUPPORT_OCL20_DEVICE
- OUTPUT_VARIABLE PCI_ID_NOT_USED)
-
- IF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
- MESSAGE(FATAL_ERROR "Only SKL and newer devices support OpenCL 2.0 now, your device don't support.")
- ENDIF (NOT SUPPORT_OCL20_DEVICE EQUAL 1)
+SET(CAN_OPENCL_20 ON)
+IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+ SET(CAN_OPENCL_20 OFF)
+ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
+ SET(CAN_OPENCL_20 OFF)
+ENDIF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
+IF (LLVM_VERSION_NODOT VERSION_LESS 39)
+ SET(CAN_OPENCL_20 OFF)
+ENDIF (LLVM_VERSION_NODOT VERSION_LESS 39)
+IF (ENABLE_OPENCL_20)
IF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
MESSAGE(FATAL_ERROR "Please update libdrm to version 2.4.66 or later to enable OpenCL 2.0.")
ENDIF (NOT HAVE_DRM_INTEL_BO_SET_SOFTPIN)
@@ -253,12 +251,32 @@ IF (ENABLE_OPENCL_20)
MESSAGE(FATAL_ERROR "Please update LLVM to version 3.9 or later to enable OpenCL 2.0.")
ENDIF (LLVM_VERSION_NODOT VERSION_LESS 39)
- ADD_DEFINITIONS(-DENABLE_OPENCL_20)
+ IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
+ MESSAGE(FATAL_ERROR "Please use x64 host to enable OpenCL 2.0.")
+ ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
ENDIF(ENABLE_OPENCL_20)
+IF (DEFINED ENABLE_OPENCL_20)
+ IF (ENABLE_OPENCL_20 AND CAN_OPENCL_20)
+ SET(CAN_OPENCL_20 ON)
+ ELSE(ENABLE_OPENCL_20 AND CAN_OPENCL_20)
+ SET(CAN_OPENCL_20 OFF)
+ ENDIF (ENABLE_OPENCL_20 AND CAN_OPENCL_20)
+ENDIF (DEFINED ENABLE_OPENCL_20)
+
+OPTION(ENABLE_OPENCL_20 "Enable opencl 2.0 support" ${CAN_OPENCL_20})
+
+IF (CAN_OPENCL_20)
+ SET (ENABLE_OPENCL_20 ON)
+ MESSAGE(STATUS "Building with OpenCL 2.0.")
+ ADD_DEFINITIONS(-DENABLE_OPENCL_20)
+ELSE (CAN_OPENCL_20)
+ MESSAGE(STATUS "Building with OpenCL 1.2.")
+ENDIF(CAN_OPENCL_20)
+
set (LIBCL_DRIVER_VERSION_MAJOR 1)
set (LIBCL_DRIVER_VERSION_MINOR 3)
-set (LIBCL_DRIVER_VERSION_PATCH 0)
+set (LIBCL_DRIVER_VERSION_PATCH 1)
if (ENABLE_OPENCL_20)
set (LIBCL_C_VERSION_MAJOR 2)
set (LIBCL_C_VERSION_MINOR 0)
@@ -279,7 +297,6 @@ IF(NOT X11_FOUND)
ENDIF(NOT X11_FOUND)
# libva & libva-x11
-#pkg_check_modules(LIBVA REQUIRED libva>=0.36.0)
pkg_check_modules(LIBVA REQUIRED libva)
pkg_check_modules(LIBVA-X11 REQUIRED libva-x11)
set(LIBVA_BUF_SH_DEP false)
@@ -301,7 +318,12 @@ IF(LIBVA_FOUND AND LIBVA-X11_FOUND)
ELSE(LIBVA_VERSION VERSION_LESS "0.36.0" OR LIBVA-X11_VERSION VERSION_LESS "0.36.0")
set(LIBVA_BUF_SH_DEP true)
ENDIF(LIBVA_VERSION VERSION_LESS "0.36.0" OR LIBVA-X11_VERSION VERSION_LESS "0.36.0")
+ELSE(LIBVA_FOUND AND LIBVA-X11_FOUND)
+ MESSAGE(STATUS "Example libva_buffer_sharing and v4l2_buffer_sharing will not be built")
ENDIF(LIBVA_FOUND AND LIBVA-X11_FOUND)
+IF(NOT (OPENGL_FOUND AND EGL_FOUND AND X11_FOUND))
+ MESSAGE(STATUS "Example gl_buffer_sharing will not be built")
+ENDIF(NOT (OPENGL_FOUND AND EGL_FOUND AND X11_FOUND))
ENDIF(BUILD_EXAMPLES)
ADD_SUBDIRECTORY(include)
diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp
index 62fcb61..22305f7 100644
--- a/backend/src/backend/gen_insn_compact.cpp
+++ b/backend/src/backend/gen_insn_compact.cpp
@@ -804,6 +804,8 @@ namespace gbe {
if( control_index == -1) return false;
if( src0.negation + src1.negation + src2.negation > 1)
return false;
+ if( src0.absolute + src1.absolute + src2.absolute > 0)
+ return false;
GenCompactInstruction *insn = p->nextCompact(opcode);
insn->src3Insn.bits1.control_index = control_index;
diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp b/backend/src/backend/gen_insn_selection_optimize.cpp
index 512a5bd..d2e0fb9 100644
--- a/backend/src/backend/gen_insn_selection_optimize.cpp
+++ b/backend/src/backend/gen_insn_selection_optimize.cpp
@@ -162,7 +162,10 @@ namespace gbe
assert(insn.opcode == SEL_OP_MOV);
const GenRegister& src = insn.src(0);
const GenRegister& dst = insn.dst(0);
- if (src.type != dst.type || src.file != dst.file || src.hstride != dst.hstride)
+ if (src.type != dst.type || src.file != dst.file)
+ return;
+
+ if (src.hstride != GEN_HORIZONTAL_STRIDE_0 && src.hstride != dst.hstride )
return;
if (liveout.find(dst.reg()) != liveout.end())
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 85d0aa9..09c79d8 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -31,6 +31,7 @@
#include "ir/value.hpp"
#include "ir/unit.hpp"
#include "ir/printf.hpp"
+#include "src/cl_device_data.h"
#ifdef GBE_COMPILER_AVAILABLE
#include "llvm/llvm_to_gen.hpp"
@@ -855,6 +856,7 @@ namespace gbe {
size_t *errSize,
uint32_t &oclVersion)
{
+ uint32_t maxoclVersion = oclVersion;
std::string pchFileName;
bool findPCH = false;
#if defined(__ANDROID__)
@@ -1022,15 +1024,9 @@ EXTEND_QUOTE:
}
if (useDefaultCLCVersion) {
-#ifdef ENABLE_OPENCL_20
- clOpt.push_back("-D__OPENCL_C_VERSION__=200");
- clOpt.push_back("-cl-std=CL2.0");
- oclVersion = 200;
-#else
clOpt.push_back("-D__OPENCL_C_VERSION__=120");
clOpt.push_back("-cl-std=CL1.2");
oclVersion = 120;
-#endif
}
//for clCompilerProgram usage.
if(temp_header_path){
@@ -1061,7 +1057,12 @@ EXTEND_QUOTE:
clOpt.push_back("-include-pch");
clOpt.push_back(pchFileName);
}
-
+ if (oclVersion > maxoclVersion){
+ if (err && stringSize > 0 && errSize) {
+ *errSize = snprintf(err, stringSize, "Requested OpenCL version %lf is higher than maximum supported version %lf\n", (float)oclVersion/100.0,(float)maxoclVersion/100.0);
+ }
+ return false;
+ }
return true;
}
@@ -1076,7 +1077,7 @@ EXTEND_QUOTE:
std::vector<std::string> clOpt;
std::string dumpLLVMFileName, dumpASMFileName;
std::string dumpSPIRBinaryName;
- uint32_t oclVersion = 0;
+ uint32_t oclVersion = MAX_OCLVERSION(deviceID);
if (!processSourceAndOption(source, options, NULL, clOpt,
dumpLLVMFileName, dumpASMFileName, dumpSPIRBinaryName,
optLevel,
@@ -1139,7 +1140,7 @@ EXTEND_QUOTE:
std::vector<std::string> clOpt;
std::string dumpLLVMFileName, dumpASMFileName;
std::string dumpSPIRBinaryName;
- uint32_t oclVersion = 0;
+ uint32_t oclVersion = MAX_OCLVERSION(deviceID);
if (!processSourceAndOption(source, options, temp_header_path, clOpt,
dumpLLVMFileName, dumpASMFileName, dumpSPIRBinaryName,
optLevel, stringSize, err, errSize, oclVersion))
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 664d2ff..3fefa92 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -308,7 +308,7 @@ namespace gbe
if(StrTy)
return getTypeByteSize(unit,StrTy);
}
- GBE_ASSERTM(false, "Unspported type name");
+ GBE_ASSERTM(false, "Unsupported type name");
return 0;
}
#undef TYPESIZEVEC
@@ -3272,11 +3272,41 @@ namespace gbe
case Instruction::Sub:
case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break;
case Instruction::Mul:
+ {
+ //LLVM always put constant to src1, but also add the src0 constant check.
+ ConstantInt *c = dyn_cast<ConstantInt>(I.getOperand(0));
+ int index = 0;
+ if (c == NULL) {
+ c = dyn_cast<ConstantInt>(I.getOperand(0));
+ index = 1;
+ }
+ if (c != NULL && isPowerOf<2>(c->getSExtValue())) {
+ c = ConstantInt::get(c->getType(), logi2(c->getZExtValue()));
+ if(index == 0)
+ ctx.SHL(type, dst, src1, this->getRegister(c));
+ else
+ ctx.SHL(type, dst, src0, this->getRegister(c));
+ } else {
+ ctx.MUL(type, dst, src0, src1);
+ }
+ break;
+ }
case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break;
case Instruction::URem: ctx.REM(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
case Instruction::SRem:
case Instruction::FRem: ctx.REM(type, dst, src0, src1); break;
- case Instruction::UDiv: ctx.DIV(getUnsignedType(ctx, I.getType()), dst, src0, src1); break;
+ case Instruction::UDiv:
+ {
+ //Only check divisor for DIV
+ ConstantInt *c = dyn_cast<ConstantInt>(I.getOperand(1));
+ if (c != NULL && isPowerOf<2>(c->getZExtValue())) {
+ c = ConstantInt::get(c->getType(), logi2(c->getZExtValue()));
+ ctx.SHR(getUnsignedType(ctx, I.getType()), dst, src0, this->getRegister(c));
+ } else {
+ ctx.DIV(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+ }
+ break;
+ }
case Instruction::SDiv:
case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break;
case Instruction::And: ctx.AND(type, dst, src0, src1); break;
diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp
index 367a2c3..c5f3ffe 100644
--- a/backend/src/llvm/llvm_passes.cpp
+++ b/backend/src/llvm/llvm_passes.cpp
@@ -276,8 +276,6 @@ namespace gbe
uint32_t align = getAlignmentByte(unit, elementType);
size += getPadding(size, align);
- Constant* newConstSize =
- ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), size);
Value *operand = GEPInst->getOperand(op);
@@ -308,13 +306,22 @@ namespace gbe
}
}
#endif
- Value* tmpMul = operand;
+ Value* tmpOffset = operand;
if (size != 1) {
- tmpMul = BinaryOperator::Create(Instruction::Mul, newConstSize, operand,
- "", GEPInst);
+ if (isPowerOf<2>(size)) {
+ Constant* shiftAmnt =
+ ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), logi2(size));
+ tmpOffset = BinaryOperator::Create(Instruction::Shl, operand, shiftAmnt,
+ "", GEPInst);
+ } else{
+ Constant* sizeConst =
+ ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), size);
+ tmpOffset = BinaryOperator::Create(Instruction::Mul, sizeConst, operand,
+ "", GEPInst);
+ }
}
currentAddrInst =
- BinaryOperator::Create(Instruction::Add, currentAddrInst, tmpMul,
+ BinaryOperator::Create(Instruction::Add, currentAddrInst, tmpOffset,
"", GEPInst);
}
diff --git a/docs/Beignet.mdwn b/docs/Beignet.mdwn
index 5c62b4c..b1019da 100644
--- a/docs/Beignet.mdwn
+++ b/docs/Beignet.mdwn
@@ -29,7 +29,8 @@ two more packages (the following package name is for Ubuntu):
- ocl-icd-libopencl1
If you don't want to enable ICD, or your system doesn't have ICD OpenCL support,
-you can still link to the beignet OpenCL library. You can find the beignet/libcl.so
+you must explicitly disable ICD support by running cmake with option `-DOCLICD_COMPAT=0`
+then you can still link to the beignet OpenCL library. You can find the beignet/libcl.so
in your system's library installation directories.
Note that the compiler depends on LLVM (Low-Level Virtual Machine project), and the
@@ -160,15 +161,13 @@ Supported Targets
OpenCL 2.0
----------
-From release v1.3.0, beignet support OpenCL 2.0. By default, OpenCL 2.0 support is disabled, you can enable it when cmake with option
--DENABLE_OPENCL_20=1. Please remember that to enable OpenCL 2.0, there are some dependencies. First, OpenCL 2.0 only support the targets
-from Skylake, include Skylake, Kabylake and Apollolake. Then, clang supports all OpenCL 2.0 feature from 3.9. So to enable OpenCL 2.0,
-you must update LLVM/clang to 3.9 or later. And also requeires libdrm at least 2.4.66.
-After enable OpenCL 2.0, beignet complies with OpenCL 2.0 spec, but some OpenCL 2.0 features are simulated by software, there is no performance
-gain, such as pipe and device queues, especially device queues.
-If you build beignet with OpenCL 2.0 enabled and your kernel don't use the OpenCL 2.0 features, please pass a build option -cl-std=CL1.2 for
-performance, the OpenCL 2.0 uses more registers and has lots of int64 operations, which may hurt performance, and beignet will continue to improve
-OpenCL 2.0 performance.
+From release v1.3.0, beignet supports OpenCL 2.0 on Skylake and later hardware.
+This requires LLVM/Clang 3.9 or later, libdrm 2.4.66 or later and x86_64 linux.
+As required by the OpenCL specification, kernels are compiled as OpenCL C 1.2 by default; to use 2.0 they
+must explicitly request it with the -cl-std=CL2.0 build option. As OpenCL 2.0 is likely to be slower than
+1.2, we recommend that this is used only where needed. (This is because 2.0 uses more registers and has
+lots of int64 operations, and some of the 2.0 features (pipes and especially device queues) are implemented
+in software so do not provide any performance gain.) Beignet will continue to improve OpenCL 2.0 performance.
Known Issues
------------
@@ -222,10 +221,8 @@ Known Issues
This loses some precision but gains performance.
* cl\_khr\_gl\_sharing.
- This extension highly depends on mesa support. It seems that mesa would not provide
- such type of extensions, we may have to hack with mesa source code to support this
- extension. This feature used to work with a previous mesa git version. But now, it's
- simply broken.
+ This extension is partially implemented(the most commonly used part), and we will implement
+ other parts based on requirement.
Project repository
------------------
@@ -283,6 +280,7 @@ Documents for OpenCL application developers
- [[Kernel Optimization Guide|Beignet/optimization-guide]]
- [[Libva Buffer Sharing|Beignet/howto/libva-buffer-sharing-howto]]
- [[V4l2 Buffer Sharing|Beignet/howto/v4l2-buffer-sharing-howto]]
+- [[OpenGL Buffer Sharing|Beignet/howto/gl-buffer-sharing-howto]]
- [[Video Motion Estimation|Beignet/howto/video-motion-estimation-howto]]
- [[Stand Alone Unit Test|Beignet/howto/stand-alone-utest-howto]]
- [[Android build|Beignet/android-build-howto]]
diff --git a/docs/NEWS.mdwn b/docs/NEWS.mdwn
index 465f38b..601a07f 100644
--- a/docs/NEWS.mdwn
+++ b/docs/NEWS.mdwn
@@ -1,5 +1,8 @@
# News
+## Mar 13, 2017
+[Beignet 1.3.1](https://01.org/beignet/downloads/beignet-1.3.1-2017-03-13) is released. This is a bug-fix release.
+
## Jan 20, 2017
[Beignet 1.3.0](https://01.org/beignet/downloads/beignet-1.3.0-2017-01-20) is released. This is a major release. Please see the release notes for more information.
diff --git a/docs/howto/gl-buffer-sharing-howto.mdwn b/docs/howto/gl-buffer-sharing-howto.mdwn
new file mode 100644
index 0000000..6b3a751
--- /dev/null
+++ b/docs/howto/gl-buffer-sharing-howto.mdwn
@@ -0,0 +1,82 @@
+GL Buffer Sharing HowTo
+=========================
+
+Beignet now support cl_khr_gl_sharing partially(the most commonly used part), which is an offcial
+extension of Khronos OpenCL. With this extension, Beignet can create memory object from OpenGL/OpenGL
+ES buffer, texture or renderbuffer object with zero-copy. Currently, we just support create memory
+object from GL buffer object or 2d texture(the most common target type). We will support creating
+from other GL target type if necessary.
+
+Prerequisite
+------------
+
+Mesa GL library and Mesa EGL libray are required. Both version should be greater or equal than
+13.0.0.
+
+Steps
+-----
+
+A typical procedure of using cl_khr_gl_sharing is as below:
+
+- Basic egl routine(eglGetDisplay, eglInitialize, eglCreateContext...).
+
+- Create GL 2d texture in normal OpenGL way.
+
+- Check whether cl_khr_gl_sharing is supported by Beignet (Whether cl_khr_gl_sharing is present
+ in CL_DEVICE_EXTENSIONS string).
+
+- Create cl context with following cl_context_properties:
+ cl_context_properties *props=new cl_context_properties[7];
+ int i = 0;
+ props[i++] = CL_CONTEXT_PLATFORM;
+ props[i++] = (cl_context_properties)platform; //Valid OpenCL handle
+ props[i++] = CL_EGL_DISPLAY_KHR; //We only support CL_EGL_DISPLAY_KHR now
+ props[i++] = (cl_context_properties)eglGetCurrentDisplay(); //EGLDisplay handle of the display
+ props[i++] = CL_GL_CONTEXT_KHR; //We only support CL_GL_CONTEXT_KHR now
+ props[i++] = (cl_context_properties)eglGetCurrentContext(); //EGLContext created by above EGLDisplay
+ props[i++] = 0;
+
+- Create cl image object from GL 2d texture by calling clCreateFromGLTexture.
+
+- Ensure any pending GL operations which access this GL 2d texture have completed by glFinish.
+
+- Acquire cl image object by calling clEnqueueAcquireGLObjects.
+
+- Access this cl image object as an usual cl image object.
+
+- Relase cl image object by calling clEnqueueReleaseGLObjects.
+
+- Ensure any pending OpenCL operations which access this cl image object have completed by clFinish.
+
+- Do other operation on GL 2d texture.
+
+Sample code
+-----------
+
+We have developed an example showing how to utilize cl_khr_gl_sharing in examples/gl_buffer_sharing
+directory. A cl image object is created from a gl 2d texutre and processed by OpenCL kernel, then
+is shown on screen.
+
+Steps to build and run this example:
+
+- Install mesa gl and egl library(version >= 13.0.0). X11 is also required.
+
+- Add option -DBUILD_EXAMPLES=ON to enable building examples when running cmake, such as:
+ `> mkdir build`
+ `> cd build`
+ `> cmake -DBUILD_EXAMPLES=ON ../`
+
+- Build source code:
+ `> make`
+
+- Export your X Display (if you login to your machine by ssh):
+ `> export DISPLAY=:0.0`
+
+- Run:
+ `> cd examples`
+ `> . ../utests/setenv.sh`
+ `> ./example-gl_buffer_sharing`
+
+More references
+---------------
+https://www.khronos.org/registry/OpenCL/specs/opencl-1.2-extensions.pdf
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 850b3d9..a9e35c2 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,44 +1,64 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../utests
${CMAKE_CURRENT_SOURCE_DIR}/../include
- ${X11_INCLUDE_DIR})
+ ${X11_INCLUDE_DIR}
+ ${OPENGL_INCLUDE_DIRS}
+ ${EGL_INCLUDE_DIRS})
+
+set (ocl_example_helper_sources
+ ../utests/utest_error.c
+ ../utests/utest_assert.cpp
+ ../utests/utest_file_map.cpp
+ ../utests/utest_helper.cpp)
+
+IF(OPENGL_FOUND AND EGL_FOUND AND X11_FOUND)
+ ADD_DEFINITIONS(-DHAS_GL_EGL_X11)
+ENDIF(OPENGL_FOUND AND EGL_FOUND AND X11_FOUND)
+
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-deprecated-declarations" )
+
+ADD_LIBRARY(ocl_example_helper SHARED ${ocl_example_helper_sources})
+
+TARGET_LINK_LIBRARIES(ocl_example_helper cl m ${X11_LIBRARIES} ${OPENGL_LIBRARIES} ${EGL_LIBRARIES})
IF(LIBVA_BUF_SH_DEP OR V4L2_BUF_SH_DEP)
-EXECUTE_PROCESS(COMMAND ls "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva" OUTPUT_VARIABLE LS_RESULT)
-IF ("LS_RESULT" STREQUAL "")
-EXECUTE_PROCESS(COMMAND git submodule init WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
-EXECUTE_PROCESS(COMMAND git submodule update WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
-EXECUTE_PROCESS(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva)
-ENDIF ("LS_RESULT" STREQUAL "")
+ EXECUTE_PROCESS(COMMAND ls "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva" OUTPUT_VARIABLE LS_RESULT)
+ IF ("LS_RESULT" STREQUAL "")
+ EXECUTE_PROCESS(COMMAND git submodule init WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
+ EXECUTE_PROCESS(COMMAND git submodule update WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
+ EXECUTE_PROCESS(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva)
+ ENDIF ("LS_RESULT" STREQUAL "")
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/va
- ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/test/common)
+ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/va
+ ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/libva/test/common)
-link_directories (${LIBVA_LIBDIR}
- ${LIBVA-X11_LIBDIR})
+ link_directories (${LIBVA_LIBDIR}
+ ${LIBVA-X11_LIBDIR})
-set (va_ocl_basic_sources
- ../utests/utest_error.c
- ../utests/utest_assert.cpp
- ../utests/utest_file_map.cpp
- ../utests/utest_helper.cpp
- ./thirdparty/libva/test/common/va_display.c
- ./thirdparty/libva/test/common/va_display_x11.c)
+ set (va_display_sources
+ ./thirdparty/libva/test/common/va_display.c
+ ./thirdparty/libva/test/common/va_display_x11.c)
-ADD_DEFINITIONS(-DHAVE_VA_X11)
+ ADD_DEFINITIONS(-DHAVE_VA_X11)
-ADD_LIBRARY(va_ocl_basic SHARED ${va_ocl_basic_sources})
+ ADD_LIBRARY(va_display SHARED ${va_display_sources})
-TARGET_LINK_LIBRARIES(va_ocl_basic cl m va va-x11 ${X11_X11_LIB})
+ TARGET_LINK_LIBRARIES(va_display cl m va va-x11 ${X11_LIBRARIES})
-IF(LIBVA_BUF_SH_DEP)
-ADD_DEFINITIONS(-DINPUT_NV12_DEFAULT="${CMAKE_CURRENT_SOURCE_DIR}/libva_buffer_sharing/256_128.nv12")
-ADD_EXECUTABLE(example-libva_buffer_sharing ./libva_buffer_sharing/libva_buffer_sharing.cpp)
-TARGET_LINK_LIBRARIES(example-libva_buffer_sharing va_ocl_basic)
-ENDIF(LIBVA_BUF_SH_DEP)
+ IF(LIBVA_BUF_SH_DEP)
+ ADD_DEFINITIONS(-DINPUT_NV12_DEFAULT="${CMAKE_CURRENT_SOURCE_DIR}/libva_buffer_sharing/256_128.nv12")
+ ADD_EXECUTABLE(example-libva_buffer_sharing ./libva_buffer_sharing/libva_buffer_sharing.cpp)
+ TARGET_LINK_LIBRARIES(example-libva_buffer_sharing ocl_example_helper va_display)
+ ENDIF(LIBVA_BUF_SH_DEP)
-IF(V4L2_BUF_SH_DEP)
-ADD_EXECUTABLE(example-v4l2_buffer_sharing ./v4l2_buffer_sharing/v4l2_buffer_sharing.cpp)
-TARGET_LINK_LIBRARIES(example-v4l2_buffer_sharing va_ocl_basic)
-ENDIF(V4L2_BUF_SH_DEP)
+ IF(V4L2_BUF_SH_DEP)
+ ADD_EXECUTABLE(example-v4l2_buffer_sharing ./v4l2_buffer_sharing/v4l2_buffer_sharing.cpp)
+ TARGET_LINK_LIBRARIES(example-v4l2_buffer_sharing ocl_example_helper va_display)
+ ENDIF(V4L2_BUF_SH_DEP)
ENDIF(LIBVA_BUF_SH_DEP OR V4L2_BUF_SH_DEP)
+
+IF(OPENGL_FOUND AND EGL_FOUND AND X11_FOUND)
+ ADD_EXECUTABLE(example-gl_buffer_sharing ./gl_buffer_sharing/gl_buffer_sharing.cpp)
+ TARGET_LINK_LIBRARIES(example-gl_buffer_sharing ocl_example_helper ${OPENGL_LIBRARIES} ${EGL_LIBRARIES})
+ENDIF(OPENGL_FOUND AND EGL_FOUND AND X11_FOUND)
diff --git a/examples/gl_buffer_sharing/gl_buffer_sharing.cpp b/examples/gl_buffer_sharing/gl_buffer_sharing.cpp
new file mode 100644
index 0000000..2d7e10d
--- /dev/null
+++ b/examples/gl_buffer_sharing/gl_buffer_sharing.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "utest_helper.hpp"
+#include <stdio.h>
+
+static cl_int cl_status;
+
+const size_t w = EGL_WINDOW_WIDTH;
+const size_t h = EGL_WINDOW_HEIGHT;
+static GLuint tex;
+
+static void draw(){
+ XEvent event;
+
+ float vertices[8] = {-1, 1, 1, 1, 1, -1, -1, -1};
+ float tex_coords[8] = {0, 0, 1, 0, 1, 1, 0, 1};
+ uint32_t color0 = 0x0000ff00;
+
+ for (;;)
+ {
+ XNextEvent(xDisplay, &event);
+
+ if (event.type == Expose)
+ {
+ glClearColor(0.0, 1.0, 0.0, 1.0);
+ glClear(GL_COLOR_BUFFER_BIT);
+
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(color0), &color0);
+ globals[0] = w;
+ globals[1] = h;
+ locals[0] = 16;
+ locals[1] = 16;
+ glFinish();
+ OCL_ENQUEUE_ACQUIRE_GL_OBJECTS(0);
+ OCL_NDRANGE(2);
+ OCL_ENQUEUE_RELEASE_GL_OBJECTS(0);
+ OCL_FINISH();
+
+ glBindTexture(GL_TEXTURE_2D, tex);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glEnable(GL_TEXTURE_2D);
+ glDisable(GL_BLEND);
+ glVertexPointer(2, GL_FLOAT, sizeof(float) * 2, vertices);
+ glEnableClientState(GL_VERTEX_ARRAY);
+ glClientActiveTexture(GL_TEXTURE0);
+ glTexCoordPointer(2, GL_FLOAT, sizeof(float) * 2, tex_coords);
+ glEnableClientState(GL_TEXTURE_COORD_ARRAY);
+ glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+ glFinish();
+ eglSwapBuffers(eglDisplay, eglSurface);
+ }
+ if (event.type == KeyPress)
+ break;
+ }
+}
+
+
+static void initialize_ocl_gl(){
+
+ //ocl initialization: basic & create kernel & check extension
+ printf("\n***********************OpenCL info: ***********************\n");
+ if ((cl_status = cl_test_init("runtime_fill_gl_image.cl", "runtime_fill_gl_image", SOURCE)) != 0){
+ fprintf(stderr, "cl_test_init error\n");
+ exit(1);
+ }
+
+ if (eglContext == EGL_NO_CONTEXT) {
+ fprintf(stderr, "There is no valid egl context! Exit!\n");
+ exit(1);
+ }
+
+ XMapWindow(xDisplay, xWindow);
+
+ // Setup kernel and images
+ glGenTextures(1, &tex);
+ glBindTexture(GL_TEXTURE_2D, tex);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, NULL);
+
+ //Create cl image from miplevel 0
+ OCL_CREATE_GL_IMAGE(buf[0], 0, GL_TEXTURE_2D, 0, tex);
+}
+
+int main(int argc, char *argv[])
+{
+ initialize_ocl_gl();
+
+ draw();
+
+ //destroy resource of cl & gl
+ cl_test_destroy();
+
+ printf("\nExample run successfully!\n");
+}
diff --git a/kernels/runtime_fill_gl_image.cl b/kernels/runtime_fill_gl_image.cl
new file mode 100644
index 0000000..79d4054
--- /dev/null
+++ b/kernels/runtime_fill_gl_image.cl
@@ -0,0 +1,15 @@
+__kernel void
+runtime_fill_gl_image(image2d_t img, int color)
+{
+ int2 coord;
+ float4 color_v4;
+ int lgid_x = get_group_id(0);
+ int lgid_y = get_group_id(1);
+ int num_groups_x = get_num_groups(0);
+ int num_groups_y = get_num_groups(1);
+
+ coord.x = get_global_id(0);
+ coord.y = get_global_id(1);
+ color_v4 = (float4)( lgid_x/(float)num_groups_x, lgid_y/(float)num_groups_y, 1.0, 1.0);
+ write_imagef(img, coord, color_v4);
+}
diff --git a/src/cl_context.c b/src/cl_context.c
index 3f2e757..1ba2302 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -373,7 +373,7 @@ cl_context_delete(cl_context ctx)
ctx->internal_prgs[i] = NULL;
}
- if (ctx->internal_kernels[i]) {
+ if (ctx->built_in_kernels[i]) {
cl_kernel_delete(ctx->built_in_kernels[i]);
ctx->built_in_kernels[i] = NULL;
}
@@ -383,6 +383,7 @@ cl_context_delete(cl_context ctx)
ctx->built_in_prgs = NULL;
cl_free(ctx->prop_user);
+ cl_free(ctx->devices);
cl_driver_delete(ctx->drv);
CL_OBJECT_DESTROY_BASE(ctx);
cl_free(ctx);
diff --git a/src/cl_device_data.h b/src/cl_device_data.h
index 4ee4ca3..f3c5204 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -363,5 +363,7 @@
#define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid))
+#define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120)
+
#endif /* __CL_DEVICE_DATA_H__ */
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 31f8616..d4f4208 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -255,12 +255,16 @@ static struct _cl_device_id intel_kbl_gt4_device = {
};
LOCAL cl_device_id
-cl_get_gt_device(void)
+cl_get_gt_device(cl_device_type device_type)
{
cl_device_id ret = NULL;
const int device_id = cl_driver_get_device_id();
cl_device_id device = NULL;
+ //cl_get_gt_device only return GPU type device.
+ if (((CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_DEFAULT) & device_type) == 0)
+ return NULL;
+
#define DECL_INFO_STRING(BREAK, STRUCT, FIELD, STRING) \
STRUCT.FIELD = STRING; \
STRUCT.JOIN(FIELD,_sz) = sizeof(STRING); \
@@ -877,7 +881,7 @@ cl_get_device_ids(cl_platform_id platform,
cl_device_id device;
/* Do we have a usable device? */
- device = cl_get_gt_device();
+ device = cl_get_gt_device(device_type);
if (device) {
cl_self_test_res ret = cl_self_test(device, SELF_TEST_PASS);
if (ret == SELF_TEST_ATOMIC_FAIL) {
@@ -1603,7 +1607,7 @@ cl_devices_list_check(cl_uint num_devices, const cl_device_id *devices)
}
// TODO: We now just support Gen Device.
- if (devices[i] != cl_get_gt_device()) {
+ if (devices[i] != cl_get_gt_device(devices[i]->device_type)) {
return CL_INVALID_DEVICE;
}
}
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 9d8b512..6b8f2eb 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -154,7 +154,7 @@ extern cl_int cl_get_device_ids(cl_platform_id platform,
cl_uint * num_devices);
/* Get the intel GPU device we currently have in this machine (if any) */
-extern cl_device_id cl_get_gt_device(void);
+extern cl_device_id cl_get_gt_device(cl_device_type device_type);
/* Provide info about the device */
extern cl_int cl_get_device_info(cl_device_id device,
diff --git a/src/cl_event.c b/src/cl_event.c
index 3e1dc22..a2b16be 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -579,7 +579,7 @@ cl_event_exec(cl_event event, cl_int exec_to_status, cl_bool ignore_depends)
if (ret != CL_SUCCESS) {
assert(ret < 0);
- DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error staus is %d",
+ DEBUGP(DL_WARNING, "Exec event %p error, type is %d, error status is %d",
event, event->event_type, ret);
ret = cl_event_set_status(event, ret);
assert(ret == CL_SUCCESS);
diff --git a/src/cl_gen75_device.h b/src/cl_gen75_device.h
index 7ef2b82..a07583f 100644
--- a/src/cl_gen75_device.h
+++ b/src/cl_gen75_device.h
@@ -21,7 +21,7 @@
.max_parameter_size = 1024,
.global_mem_cache_line_size = 64, /* XXX */
.global_mem_cache_size = 8 << 10, /* XXX */
-.local_mem_type = CL_GLOBAL,
+.local_mem_type = CL_LOCAL,
.local_mem_size = 64 << 10,
.scratch_mem_size = 2 << 20,
.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
diff --git a/src/cl_gen7_device.h b/src/cl_gen7_device.h
index e755cad..01aa0f3 100644
--- a/src/cl_gen7_device.h
+++ b/src/cl_gen7_device.h
@@ -21,7 +21,7 @@
.max_parameter_size = 1024,
.global_mem_cache_line_size = 64, /* XXX */
.global_mem_cache_size = 8 << 10, /* XXX */
-.local_mem_type = CL_GLOBAL,
+.local_mem_type = CL_LOCAL,
.local_mem_size = 64 << 10,
.scratch_mem_size = 12 << 10,
.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
diff --git a/src/cl_gen8_device.h b/src/cl_gen8_device.h
index 08fde48..e596825 100644
--- a/src/cl_gen8_device.h
+++ b/src/cl_gen8_device.h
@@ -21,7 +21,7 @@
.max_parameter_size = 1024,
.global_mem_cache_line_size = 64, /* XXX */
.global_mem_cache_size = 8 << 10, /* XXX */
-.local_mem_type = CL_GLOBAL,
+.local_mem_type = CL_LOCAL,
.local_mem_size = 64 << 10,
.scratch_mem_size = 2 << 20,
.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
diff --git a/src/cl_gen9_device.h b/src/cl_gen9_device.h
index f50f9c7..b0a3ab8 100644
--- a/src/cl_gen9_device.h
+++ b/src/cl_gen9_device.h
@@ -21,11 +21,13 @@
.max_parameter_size = 1024,
.global_mem_cache_line_size = 64, /* XXX */
.global_mem_cache_size = 8 << 10, /* XXX */
-.local_mem_type = CL_GLOBAL,
+.local_mem_type = CL_LOCAL,
.local_mem_size = 64 << 10,
.scratch_mem_size = 2 << 20,
.max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul,
.global_mem_size = 4 * 1024 * 1024 * 1024ul,
+#define GEN9_DEVICE 1
#include "cl_gt_device.h"
+#undef GEN9_DEVICE
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index cf5ad7a..ca4f3c5 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -16,7 +16,15 @@
*
* Author: Benjamin Segovia <benjamin.segovia at intel.com>
*/
-
+#undef LIBCL_VERSION_STRING
+#undef LIBCL_C_VERSION_STRING
+#ifdef GEN9_DEVICE
+#define LIBCL_VERSION_STRING GEN9_LIBCL_VERSION_STRING
+#define LIBCL_C_VERSION_STRING GEN9_LIBCL_C_VERSION_STRING
+#else
+#define LIBCL_VERSION_STRING NONGEN9_LIBCL_VERSION_STRING
+#define LIBCL_C_VERSION_STRING NONGEN9_LIBCL_C_VERSION_STRING
+#endif
/* Common fields for both all GT devices (IVB / SNB) */
.device_type = CL_DEVICE_TYPE_GPU,
.device_id=0,/* == device_id (set when requested) */
@@ -39,11 +47,7 @@
.native_vector_width_float = 4,
.native_vector_width_double = 2,
.native_vector_width_half = 8,
-#ifdef ENABLE_OPENCL_20
-.address_bits = 64,
-#else
.address_bits = 32,
-#endif
.svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER,
.preferred_platform_atomic_alignment = 0,
.preferred_global_atomic_alignment = 0,
diff --git a/src/cl_image.c b/src/cl_image.c
index d059304..5ff459a 100644
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -144,7 +144,9 @@ cl_image_get_intel_format(const cl_image_format *fmt)
case CL_RG:
switch (type) {
case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8_UNORM;
+ case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16_UNORM;
case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8_UINT;
+ case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16_UINT;
default: return INTEL_UNSUPPORTED_FORMAT;
};
#if 0
diff --git a/src/cl_khr_icd.c b/src/cl_khr_icd.c
index 7b3600c..e4daf79 100644
--- a/src/cl_khr_icd.c
+++ b/src/cl_khr_icd.c
@@ -18,10 +18,14 @@
#include "cl_platform_id.h"
#include "CL/cl_intel.h" // for clGetKernelSubGroupInfoKHR
-/* The interop functions are not implemented in Beignet */
-#define CL_GL_INTEROP(x) NULL
-/* OpenCL 1.2 is not implemented in Beignet */
-#define CL_1_2_NOTYET(x) NULL
+/* The interop functions are only available if sharing is enabled */
+#ifdef HAS_GL_EGL
+#define CL_GL_INTEROP(x) x
+#else
+#define CL_GL_INTEROP(x) (void *) NULL
+#endif
+/* These are not yet implemented in Beignet */
+#define CL_NOTYET(x) (void *) NULL
/** Return platform list through ICD interface
* This code is used only if a client is linked directly against the library
@@ -114,13 +118,13 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
clGetExtensionFunctionAddress,
CL_GL_INTEROP(clCreateFromGLBuffer),
CL_GL_INTEROP(clCreateFromGLTexture2D),
- CL_GL_INTEROP(clCreateFromGLTexture3D),
- CL_GL_INTEROP(clCreateFromGLRenderbuffer),
- CL_GL_INTEROP(clGetGLObjectInfo),
- CL_GL_INTEROP(clGetGLTextureInfo),
+ CL_NOTYET(clCreateFromGLTexture3D),
+ CL_NOTYET(clCreateFromGLRenderbuffer),
+ CL_NOTYET(clGetGLObjectInfo),
+ CL_NOTYET(clGetGLTextureInfo),
CL_GL_INTEROP(clEnqueueAcquireGLObjects),
CL_GL_INTEROP(clEnqueueReleaseGLObjects),
- CL_GL_INTEROP(clGetGLContextInfoKHR),
+ CL_NOTYET(clGetGLContextInfoKHR),
(void *) NULL,
(void *) NULL,
(void *) NULL,
@@ -135,9 +139,9 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
clEnqueueReadBufferRect,
clEnqueueWriteBufferRect,
clEnqueueCopyBufferRect,
- CL_1_2_NOTYET(clCreateSubDevicesEXT),
- CL_1_2_NOTYET(clRetainDeviceEXT),
- CL_1_2_NOTYET(clReleaseDeviceEXT),
+ CL_NOTYET(clCreateSubDevicesEXT),
+ CL_NOTYET(clRetainDeviceEXT),
+ CL_NOTYET(clReleaseDeviceEXT),
#ifdef CL_VERSION_1_2
(void *) NULL,
clCreateSubDevices,
diff --git a/src/cl_platform_id.c b/src/cl_platform_id.c
index 1f21f5d..2afafb2 100644
--- a/src/cl_platform_id.c
+++ b/src/cl_platform_id.c
@@ -32,7 +32,7 @@
static struct _cl_platform_id intel_platform_data = {
DECL_INFO_STRING(profile, "FULL_PROFILE")
- DECL_INFO_STRING(version, LIBCL_VERSION_STRING)
+ DECL_INFO_STRING(version, GEN9_LIBCL_VERSION_STRING)
DECL_INFO_STRING(name, "Intel Gen OCL Driver")
DECL_INFO_STRING(vendor, "Intel")
DECL_INFO_STRING(icd_suffix_khr, "Intel")
diff --git a/src/cl_platform_id.h b/src/cl_platform_id.h
index 3fdb920..89e0857 100644
--- a/src/cl_platform_id.h
+++ b/src/cl_platform_id.h
@@ -72,8 +72,10 @@ extern cl_int cl_get_platform_ids(cl_uint num_entries,
#else
#define LIBCL_DRIVER_VERSION_STRING _JOINT(LIBCL_DRIVER_VERSION_MAJOR, LIBCL_DRIVER_VERSION_MINOR)
#endif
-#define LIBCL_VERSION_STRING "OpenCL " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
-#define LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
+#define GEN9_LIBCL_VERSION_STRING "OpenCL " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
+#define GEN9_LIBCL_C_VERSION_STRING "OpenCL C " _JOINT(LIBCL_C_VERSION_MAJOR, LIBCL_C_VERSION_MINOR) " beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
+#define NONGEN9_LIBCL_VERSION_STRING "OpenCL 1.2 beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
+#define NONGEN9_LIBCL_C_VERSION_STRING "OpenCL C 1.2 beignet " LIBCL_DRIVER_VERSION_STRING BEIGNET_GIT_SHA1_STRING
#endif /* __CL_PLATFORM_ID_H__ */
diff --git a/src/cl_program.c b/src/cl_program.c
index 0358705..363aed5 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -334,6 +334,7 @@ cl_program_create_from_binary(cl_context ctx,
else if (isGenBinary((unsigned char*)program->binary)) {
program->opaque = interp_program_new_from_binary(program->ctx->devices[0]->device_id, program->binary, program->binary_sz);
if (UNLIKELY(program->opaque == NULL)) {
+ DEBUGP(DL_ERROR, "Incompatible binary, please delete the binary and generate again.");
err = CL_INVALID_PROGRAM;
goto error;
}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index db61844..43cf7f3 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -289,7 +289,15 @@ set (utests_sources
compiler_mix.cpp
compiler_math_3op.cpp
compiler_bsort.cpp
- builtin_kernel_block_motion_estimate_intel.cpp)
+ builtin_kernel_block_motion_estimate_intel.cpp
+ compiler_program_global.cpp
+ compiler_generic_atomic.cpp
+ compiler_atomic_functions_20.cpp
+ compiler_sampler.cpp
+ compiler_generic_pointer.cpp
+ runtime_pipe_query.cpp
+ compiler_pipe_builtin.cpp
+ compiler_device_enqueue.cpp)
if (LLVM_VERSION_NODOT VERSION_GREATER 34)
SET(utests_sources
@@ -297,18 +305,6 @@ if (LLVM_VERSION_NODOT VERSION_GREATER 34)
compiler_overflow.cpp)
endif (LLVM_VERSION_NODOT VERSION_GREATER 34)
-if (ENABLE_OPENCL_20)
- SET(utests_sources
- ${utests_sources}
- compiler_program_global.cpp
- compiler_generic_atomic.cpp
- compiler_atomic_functions_20.cpp
- compiler_sampler.cpp
- compiler_generic_pointer.cpp
- runtime_pipe_query.cpp
- compiler_pipe_builtin.cpp
- compiler_device_enqueue.cpp)
-endif (ENABLE_OPENCL_20)
if (NOT_BUILD_STAND_ALONE_UTEST)
if (X11_FOUND)
diff --git a/utests/compiler_atomic_functions_20.cpp b/utests/compiler_atomic_functions_20.cpp
index ea1ace5..e11d077 100644
--- a/utests/compiler_atomic_functions_20.cpp
+++ b/utests/compiler_atomic_functions_20.cpp
@@ -58,6 +58,8 @@ static void cpu_compiler_atomic(int *dst, int *src)
static void compiler_atomic_functions(const char* kernel_name)
{
+ if(!cl_check_ocl20(false))
+ return;
const size_t n = GROUP_NUM * LOCAL_SIZE;
int cpu_dst[24] = {0}, cpu_src[256];
@@ -65,7 +67,7 @@ static void compiler_atomic_functions(const char* kernel_name)
locals[0] = LOCAL_SIZE;
// Setup kernel and buffers
- OCL_CREATE_KERNEL_FROM_FILE("compiler_atomic_functions_20", kernel_name);
+ OCL_CALL(cl_kernel_init, "compiler_atomic_functions_20.cl", kernel_name, SOURCE, "-cl-std=CL2.0");
OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL);
OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL);
OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_device_enqueue.cpp b/utests/compiler_device_enqueue.cpp
index a9e3e2d..698be4d 100644
--- a/utests/compiler_device_enqueue.cpp
+++ b/utests/compiler_device_enqueue.cpp
@@ -2,12 +2,14 @@
void compiler_device_enqueue(void)
{
+ if(!cl_check_ocl20(false))
+ return;
const size_t n = 32;
const uint32_t global_sz = 3;
uint32_t result = 0;
// Setup kernel and buffers
- OCL_CREATE_KERNEL("compiler_device_enqueue");
+ OCL_CALL(cl_kernel_init, "compiler_device_enqueue.cl", "compiler_device_enqueue", SOURCE, "-cl-std=CL2.0");
OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
OCL_SET_ARG(0, sizeof(uint32_t), &global_sz);
OCL_SET_ARG(1, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_generic_atomic.cpp b/utests/compiler_generic_atomic.cpp
index 9ed5f53..e35e994 100644
--- a/utests/compiler_generic_atomic.cpp
+++ b/utests/compiler_generic_atomic.cpp
@@ -3,6 +3,8 @@
template<typename T>
void test_atomic(const char* kernelName)
{
+ if(!cl_check_ocl20(false))
+ return;
const int n = 16;
T cpu_src[16];
diff --git a/utests/compiler_generic_pointer.cpp b/utests/compiler_generic_pointer.cpp
index 5984694..318312a 100644
--- a/utests/compiler_generic_pointer.cpp
+++ b/utests/compiler_generic_pointer.cpp
@@ -3,6 +3,8 @@
template<typename T>
void test(const char* kernelName)
{
+ if(!cl_check_ocl20(false))
+ return;
const int n = 16;
T cpu_src[16];
diff --git a/utests/compiler_pipe_builtin.cpp b/utests/compiler_pipe_builtin.cpp
index c8ec077..99270d1 100644
--- a/utests/compiler_pipe_builtin.cpp
+++ b/utests/compiler_pipe_builtin.cpp
@@ -8,11 +8,13 @@ typedef struct{
#define PIPE_BUILTIN(TYPE,GROUP) \
static void compiler_pipe_##GROUP##_##TYPE(void) \
{ \
+ if(!cl_check_ocl20(false))\
+ return;\
const size_t w = 16; \
uint32_t ans_host = 0; \
uint32_t ans_device = 0; \
/* pipe write kernel*/ \
- OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", "compiler_pipe_"#GROUP"_write_"#TYPE); \
+ OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", "compiler_pipe_"#GROUP"_write_"#TYPE, SOURCE, "-cl-std=CL2.0");\
OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sizeof(TYPE), w, NULL);\
OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, w * sizeof(TYPE), NULL);\
OCL_MAP_BUFFER(1);\
@@ -26,7 +28,7 @@ static void compiler_pipe_##GROUP##_##TYPE(void) \
OCL_NDRANGE(1);\
OCL_CALL(clReleaseKernel, kernel);\
/* pipe read kernel */\
- OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", "compiler_pipe_"#GROUP"_read_"#TYPE);\
+ OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", "compiler_pipe_"#GROUP"_read_"#TYPE, SOURCE, "-cl-std=CL2.0");\
OCL_CREATE_BUFFER(buf[2], CL_MEM_READ_WRITE, w * sizeof(TYPE), NULL);\
OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);\
OCL_SET_ARG(1, sizeof(cl_mem), &buf[2]);\
@@ -52,8 +54,9 @@ PIPE_BUILTIN(mystruct, workgroup)
static void compiler_pipe_query(void) {
const size_t w = 32;
const size_t sz = 16;
+ if(!cl_check_ocl20(false)){return;}
/* pipe write kernel */
- OCL_CREATE_KERNEL_FROM_FILE("compiler_pipe_builtin", "compiler_pipe_query");
+ OCL_CALL(cl_kernel_init, "compiler_pipe_builtin.cl", "compiler_pipe_query", SOURCE, "-cl-std=CL2.0");
OCL_CALL2(clCreatePipe, buf[0], ctx, 0, sizeof(uint32_t), w, NULL);
OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, sz * sizeof(uint32_t), NULL);
OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
diff --git a/utests/compiler_program_global.cpp b/utests/compiler_program_global.cpp
index ef7c655..caadf8b 100644
--- a/utests/compiler_program_global.cpp
+++ b/utests/compiler_program_global.cpp
@@ -21,8 +21,9 @@ static int init_program(const char* name, cl_context ctx, cl_program *pg )
void compiler_program_global()
{
+ if(!cl_check_ocl20(false))
+ return;
const int n = 16;
- int cpu_src[16];
cl_int err;
// Setup kernel and buffers
@@ -50,7 +51,7 @@ void compiler_program_global()
OCL_MAP_BUFFER(0);
for (int i = 0; i < n; ++i)
- cpu_src[i] = ((int*)buf_data[0])[i] = i;
+ ((int*)buf_data[0])[i] = i;
OCL_UNMAP_BUFFER(0);
// Run the kernel on GPU
diff --git a/utests/compiler_sampler.cpp b/utests/compiler_sampler.cpp
index f8bf622..853e052 100644
--- a/utests/compiler_sampler.cpp
+++ b/utests/compiler_sampler.cpp
@@ -3,6 +3,8 @@
void compiler_sampler(void)
{
+ if(!cl_check_ocl20(false))
+ return;
OCL_CREATE_KERNEL("compiler_sampler");
OCL_ASSERT(ctx != 0);
diff --git a/utests/runtime_pipe_query.cpp b/utests/runtime_pipe_query.cpp
index 3ce8258..e46f5ff 100644
--- a/utests/runtime_pipe_query.cpp
+++ b/utests/runtime_pipe_query.cpp
@@ -1,6 +1,8 @@
#include <string.h>
#include "utest_helper.hpp"
static void runtime_pipe_query(void) {
+ if(!cl_check_ocl20(false))
+ return;
const size_t w = 16;
const size_t sz = 8;
cl_uint retnum, retsz;
diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp
index b57d2ad..7052a14 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -939,13 +939,14 @@ int cl_check_subgroups_short(void)
return 1;
}
-int cl_check_ocl20(void)
+int cl_check_ocl20(bool or_beignet)
{
size_t param_value_size;
size_t ret_sz;
OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, 0, 0, ¶m_value_size);
if(param_value_size == 0) {
printf("Not OpenCL 2.0 device, ");
+ if(or_beignet){
if(cl_check_beignet()) {
printf("Beignet extension test!");
return 1;
@@ -953,6 +954,10 @@ int cl_check_ocl20(void)
printf("Not beignet device , Skip!");
return 0;
}
+ }else{
+ printf("Skip!");
+ return 0;
+ }
}
char* device_version_str = (char* )malloc(param_value_size * sizeof(char) );
OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_OPENCL_C_VERSION, param_value_size, (void*)device_version_str, &ret_sz);
@@ -961,11 +966,16 @@ int cl_check_ocl20(void)
if(!strstr(device_version_str, "2.0")) {
free(device_version_str);
printf("Not OpenCL 2.0 device, ");
- if(cl_check_beignet()) {
- printf("Beignet extension test!");
- return 1;
- } else {
- printf("Not beignet device , Skip!");
+ if(or_beignet){
+ if(cl_check_beignet()) {
+ printf("Beignet extension test!");
+ return 1;
+ } else {
+ printf("Not beignet device , Skip!");
+ return 0;
+ }
+ }else{
+ printf("Skip!");
return 0;
}
}
diff --git a/utests/utest_helper.hpp b/utests/utest_helper.hpp
index a761325..e2a6a88 100644
--- a/utests/utest_helper.hpp
+++ b/utests/utest_helper.hpp
@@ -52,6 +52,8 @@
extern EGLDisplay eglDisplay;
extern EGLContext eglContext;
extern EGLSurface eglSurface;
+extern Display *xDisplay;
+extern Window xWindow;
#endif
union uint32_cast {
@@ -310,8 +312,8 @@ extern clGetKernelSubGroupInfoKHR_cb* utestclGetKernelSubGroupInfoKHR;
/* Check if cl_intel_motion_estimation enabled. */
extern int cl_check_motion_estimation(void);
-/* Check is cl version 2.0. */
-extern int cl_check_ocl20(void);
+/* Check is cl version 2.0 or Beignet extension. */
+extern int cl_check_ocl20(bool or_beignet = true);
/* Check is FP16 enabled. */
extern int cl_check_half(void);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-commits
mailing list