[beignet] 01/03: Imported Upstream version 1.1.1
Rebecca Palmer
rnpalmer-guest at moszumanska.debian.org
Tue Oct 20 07:17:05 UTC 2015
This is an automated email from the git hooks/post-receive script.
rnpalmer-guest pushed a commit to branch master
in repository beignet.
commit ecc6c75b4884c1dc2a70d088ace8448c4addff3c
Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
Date: Mon Oct 19 22:03:21 2015 +0100
Imported Upstream version 1.1.1
---
CMakeLists.txt | 2 +-
backend/src/backend/gen_context.cpp | 71 ++++++++-----
backend/src/backend/gen_context.hpp | 6 ++
backend/src/backend/gen_insn_scheduling.cpp | 6 +-
backend/src/backend/gen_insn_selection.cpp | 17 +++-
backend/src/backend/gen_program.cpp | 9 +-
backend/src/backend/gen_program.hpp | 4 +-
backend/src/backend/gen_register.hpp | 16 ++-
backend/src/backend/program.cpp | 151 +++++++++++++++++++++-------
backend/src/backend/program.h | 11 +-
backend/src/ir/printf.cpp | 21 +++-
backend/src/ir/printf.hpp | 17 ++--
backend/src/ir/structurizer.cpp | 24 ++++-
backend/src/ir/structurizer.hpp | 4 +-
backend/src/libocl/include/ocl_float.h | 1 +
backend/src/libocl/src/ocl_image.cl | 14 ++-
backend/src/libocl/tmpl/ocl_common.tmpl.cl | 4 +-
backend/src/llvm/llvm_printf_parser.cpp | 2 +-
docs/NEWS.mdwn | 3 +
kernels/image_1D_buffer.cl | 12 +--
src/cl_api.c | 18 +++-
src/cl_device_data.h | 5 +-
src/cl_device_id.c | 42 ++++----
src/cl_device_id.h | 1 +
src/cl_enqueue.c | 5 +-
src/cl_event.c | 55 ++++++++++
src/cl_event.h | 11 ++
src/cl_gbe_loader.cpp | 5 +
src/cl_gbe_loader.h | 1 +
src/cl_gt_device.h | 3 +-
src/cl_kernel.c | 18 ++--
src/cl_program.c | 75 +++++++++++---
src/intel/intel_driver.c | 8 +-
utests/compiler_degrees.cpp | 4 +-
utests/get_arg_info.cpp | 2 +-
utests/get_cl_info.cpp | 107 ++++++++++++++++++++
utests/image_1D_buffer.cpp | 73 ++++++--------
utests/utest_generator.py | 15 +--
38 files changed, 629 insertions(+), 214 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9713a32..2ed8429 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,7 +18,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0)
PROJECT(OCL)
set (LIBCL_DRIVER_VERSION_MAJOR 1)
set (LIBCL_DRIVER_VERSION_MINOR 1)
-set (LIBCL_DRIVER_VERSION_PATCH 0)
+set (LIBCL_DRIVER_VERSION_PATCH 1)
set (LIBCL_C_VERSION_MAJOR 1)
set (LIBCL_C_VERSION_MINOR 2)
if( ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index e16b0a9..2983d52 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -49,6 +49,7 @@ namespace gbe
this->p = NULL;
this->sel = NULL;
this->ra = NULL;
+ this->asmFileName = NULL;
this->ifEndifFix = false;
this->regSpillTick = 0;
}
@@ -76,6 +77,10 @@ namespace gbe
this->regSpillTick = 0;
}
+ void GenContext::setASMFileName(const char* asmFname) {
+ this->asmFileName = asmFname;
+ }
+
void GenContext::newSelection(void) {
this->sel = GBE_NEW(Selection, *this);
}
@@ -2291,35 +2296,17 @@ namespace gbe
genKernel->insnNum = p->store.size();
genKernel->insns = GBE_NEW_ARRAY_NO_ARG(GenInstruction, genKernel->insnNum);
std::memcpy(genKernel->insns, &p->store[0], genKernel->insnNum * sizeof(GenInstruction));
- if (OCL_OUTPUT_ASM) {
- std::cout << genKernel->getName() << "'s disassemble begin:" << std::endl;
- ir::LabelIndex curLabel = (ir::LabelIndex)0;
- GenCompactInstruction * pCom = NULL;
- GenInstruction insn[2];
- std::cout << " L0:" << std::endl;
- for (uint32_t insnID = 0; insnID < genKernel->insnNum; ) {
- if (labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID &&
- curLabel < this->getFunction().labelNum()) {
- std::cout << " L" << curLabel + 1 << ":" << std::endl;
- curLabel = (ir::LabelIndex)(curLabel + 1);
- while(labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID) {
- std::cout << " L" << curLabel + 1 << ":" << std::endl;
- curLabel = (ir::LabelIndex)(curLabel + 1);
- }
- }
- std::cout << " (" << std::setw(8) << insnID << ") ";
- pCom = (GenCompactInstruction*)&p->store[insnID];
- if(pCom->bits1.cmpt_control == 1) {
- decompactInstruction(pCom, &insn);
- gen_disasm(stdout, &insn, deviceID, 1);
- insnID++;
- } else {
- gen_disasm(stdout, &p->store[insnID], deviceID, 0);
- insnID = insnID + 2;
- }
+ if (OCL_OUTPUT_ASM)
+ outputAssembly(stdout, genKernel);
+
+ if (this->asmFileName) {
+ FILE *asmDumpStream = fopen(this->asmFileName, "a");
+ if (asmDumpStream) {
+ outputAssembly(asmDumpStream, genKernel);
+ fclose(asmDumpStream);
}
- std::cout << genKernel->getName() << "'s disassemble end." << std::endl;
}
+
return true;
}
@@ -2327,5 +2314,35 @@ namespace gbe
return GBE_NEW(GenKernel, name, deviceID);
}
+ void GenContext::outputAssembly(FILE *file, GenKernel* genKernel) {
+ fprintf(file, "%s's disassemble begin:\n", genKernel->getName());
+ ir::LabelIndex curLabel = (ir::LabelIndex)0;
+ GenCompactInstruction * pCom = NULL;
+ GenInstruction insn[2];
+ fprintf(file, " L0:\n");
+ for (uint32_t insnID = 0; insnID < genKernel->insnNum; ) {
+ if (labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID &&
+ curLabel < this->getFunction().labelNum()) {
+ fprintf(file, " L%i:\n", curLabel + 1);
+ curLabel = (ir::LabelIndex)(curLabel + 1);
+ while(labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID) {
+ fprintf(file, " L%i:\n", curLabel + 1);
+ curLabel = (ir::LabelIndex)(curLabel + 1);
+ }
+ }
+ fprintf(file, " (%8i) ", insnID);
+ pCom = (GenCompactInstruction*)&p->store[insnID];
+ if(pCom->bits1.cmpt_control == 1) {
+ decompactInstruction(pCom, &insn);
+ gen_disasm(file, &insn, deviceID, 1);
+ insnID++;
+ } else {
+ gen_disasm(file, &p->store[insnID], deviceID, 0);
+ insnID = insnID + 2;
+ }
+ }
+ fprintf(file, "%s's disassemble end.\n", genKernel->getName());
+ }
+
} /* namespace gbe */
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 69fe513..8ef725f 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -43,6 +43,7 @@ namespace gbe
class SelectionInstruction; // Pre-RA Gen instruction
class SelectionReg; // Pre-RA Gen register
class GenRegister;
+ class GenKernel;
typedef enum {
NO_ERROR,
REGISTER_ALLOCATION_FAIL,
@@ -68,6 +69,8 @@ namespace gbe
#define GEN7_SCRATCH_SIZE (12 * KB)
/*! Start new code generation with specific parameters */
void startNewCG(uint32_t simdWidth, uint32_t reservedSpillRegs, bool limitRegisterPressure);
+ /*! Set the file name for the ASM dump */
+ void setASMFileName(const char* asmFname);
/*! Target device ID*/
uint32_t deviceID;
/*! Implements base class */
@@ -217,8 +220,11 @@ namespace gbe
CompileErrorCode errCode;
bool ifEndifFix;
uint32_t regSpillTick;
+ const char* asmFileName;
/*! Build the curbe patch list for the given kernel */
void buildPatchList(void);
+ /* Helper for printing the assembly */
+ void outputAssembly(FILE *file, GenKernel* genKernel);
/*! Calc the group's slm offset from R0.0, to work around HSW SLM bug*/
virtual void emitSLMOffset(void) { };
/*! new selection of device */
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index b3b7042..358a2ce 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -583,7 +583,11 @@ namespace gbe
for (int32_t insnID = 0; insnID < insnNum; ++insnID) {
ScheduleDAGNode *node = tracker.insnNodes[insnID];
if (node->insn.isBranch() || node->insn.isLabel()
- || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF || node->insn.opcode == SEL_OP_WHILE
+ || node->insn.opcode == SEL_OP_EOT
+ || node->insn.opcode == SEL_OP_IF
+ || node->insn.opcode == SEL_OP_ELSE
+ || node->insn.opcode == SEL_OP_ENDIF
+ || node->insn.opcode == SEL_OP_WHILE
|| node->insn.opcode == SEL_OP_READ_ARF
|| node->insn.opcode == SEL_OP_BARRIER)
tracker.makeBarrier(insnID, insnNum);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b0ba9e3..520aede 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2290,10 +2290,16 @@ namespace gbe
GenRegister dst = sel.selReg(insn.getDst(0), type);
sel.push();
+ if (sel.isScalarReg(insn.getDst(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+
switch (opcode) {
case ir::OP_SIMD_SIZE:
{
- const GenRegister src = GenRegister::immud(sel.curr.execWidth);
+ const GenRegister src = GenRegister::immud(sel.ctx.getSimdWidth());
sel.MOV(dst, src);
}
break;
@@ -3205,8 +3211,8 @@ namespace gbe
}
sel.MOV(dst, imm.getIntegerValue() ? GenRegister::immuw(0xffff) : GenRegister::immuw(0));
break;
- case TYPE_U32:
- case TYPE_S32:
+ case TYPE_U32: sel.MOV(dst, GenRegister::immud(imm.getIntegerValue())); break;
+ case TYPE_S32: sel.MOV(dst, GenRegister::immd(imm.getIntegerValue())); break;
case TYPE_FLOAT:
sel.MOV(GenRegister::retype(dst, GEN_TYPE_F),
GenRegister::immf(imm.asFloatValue()));
@@ -4998,6 +5004,11 @@ namespace gbe
}
sel.push();
+ if (sel.isScalarReg(insn.getDst(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
if (src1.file == GEN_IMMEDIATE_VALUE)
sel.SIMD_SHUFFLE(dst, src0, src1);
else {
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index c761a2f..04da692 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -172,6 +172,7 @@ namespace gbe {
ctx = GBE_NEW(Gen9Context, unit, name, deviceID, relaxMath);
}
GBE_ASSERTM(ctx != NULL, "Fail to create the gen context\n");
+ ctx->setASMFileName(this->asm_file_name);
for (; codeGen < codeGenNum; ++codeGen) {
const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth;
@@ -352,13 +353,14 @@ namespace gbe {
const char *fileName,
const void* module,
const void* llvm_ctx,
+ const char* asm_file_name,
size_t stringSize,
char *err,
size_t *errSize,
int optLevel)
{
using namespace gbe;
- GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
+ GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx, asm_file_name);
#ifdef GBE_COMPILER_AVAILABLE
std::string error;
// Try to compile the program
@@ -384,7 +386,7 @@ namespace gbe {
return (gbe_program) program;
}
- static void genProgramLinkFromLLVM(gbe_program dst_program,
+ static bool genProgramLinkFromLLVM(gbe_program dst_program,
gbe_program src_program,
size_t stringSize,
char * err,
@@ -406,10 +408,12 @@ namespace gbe {
err[stringSize-1] = '\0';
*errSize = strlen(err);
}
+ return true;
}
}
// Everything run fine
#endif
+ return false;
}
static void genProgramBuildFromLLVM(gbe_program program,
@@ -442,7 +446,6 @@ namespace gbe {
std::memcpy(err, error.c_str(), msgSize);
*errSize = error.size();
}
- GBE_DELETE(p);
}
releaseLLVMContextLock();
#endif
diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp
index af1a9fa..75d77ba 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -60,7 +60,8 @@ namespace gbe
{
public:
/*! Create an empty program */
- GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {}
+ GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL, const char* asm_fname = NULL) :
+ deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx), asm_file_name(asm_fname) {}
/*! Current device ID*/
uint32_t deviceID;
/*! Destroy the program */
@@ -75,6 +76,7 @@ namespace gbe
}
void* module;
void* llvm_ctx;
+ const char* asm_file_name;
/*! Use custom allocators */
GBE_CLASS(GenProgram);
};
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 4f37e30..a15fd60 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -995,7 +995,13 @@ namespace gbe
}
static INLINE GenRegister uw1(uint32_t file, uint32_t nr, uint32_t subnr) {
- return offset(retype(vec1(file, nr, 0), GEN_TYPE_UW), 0, typeSize(GEN_TYPE_UW)*subnr);
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_UW,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
}
static INLINE GenRegister ub16(uint32_t file, uint32_t nr, uint32_t subnr) {
@@ -1019,7 +1025,13 @@ namespace gbe
}
static INLINE GenRegister ub1(uint32_t file, uint32_t nr, uint32_t subnr) {
- return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UB), subnr);
+ return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_UB,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
}
static INLINE GenRegister f1grf(uint32_t nr, uint32_t subnr) {
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index e4cdeaa..0ee76fc 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -518,8 +518,8 @@ namespace gbe {
#ifdef GBE_COMPILER_AVAILABLE
BVAR(OCL_OUTPUT_BUILD_LOG, false);
- static bool buildModuleFromSource(const char* input, llvm::Module** out_module, llvm::LLVMContext* llvm_ctx,
- std::vector<std::string>& options, size_t stringSize, char *err,
+ static bool buildModuleFromSource(const char *source, llvm::Module** out_module, llvm::LLVMContext* llvm_ctx,
+ std::string dumpLLVMFileName, std::vector<std::string>& options, size_t stringSize, char *err,
size_t *errSize) {
// Arguments to pass to the clang frontend
vector<const char *> args;
@@ -551,8 +551,7 @@ namespace gbe {
args.push_back("-triple");
args.push_back("spir");
#endif /* LLVM_VERSION_MINOR <= 2 */
- args.push_back(input);
-
+ args.push_back("stringInput.cl");
args.push_back("-ffp-contract=off");
// The compiler invocation needs a DiagnosticsEngine so it can report problems
@@ -574,6 +573,14 @@ namespace gbe {
&args[0],
&args[0] + args.size(),
Diags);
+ llvm::StringRef srcString(source);
+ (*CI).getPreprocessorOpts().addRemappedFile("stringInput.cl",
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 5
+ llvm::MemoryBuffer::getMemBuffer(srcString)
+#else
+ llvm::MemoryBuffer::getMemBuffer(srcString).release()
+#endif
+ );
// Create the compiler instance
clang::CompilerInstance Clang;
@@ -628,6 +635,34 @@ namespace gbe {
#endif
*out_module = module;
+
+// Dump the LLVM if requested.
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 6)
+ if (!dumpLLVMFileName.empty()) {
+ std::string err;
+ llvm::raw_fd_ostream ostream (dumpLLVMFileName.c_str(),
+ err,
+ #if LLVM_VERSION_MINOR == 3
+ 0
+ #else
+ llvm::sys::fs::F_None
+ #endif
+ );
+
+ if (err.empty()) {
+ (*out_module)->print(ostream, 0);
+ } //Otherwise, you'll have to make do without the dump.
+ }
+#else
+ if (!dumpLLVMFileName.empty()) {
+ std::error_code err;
+ llvm::raw_fd_ostream ostream (dumpLLVMFileName.c_str(),
+ err, llvm::sys::fs::F_None);
+ if (!err) {
+ (*out_module)->print(ostream, 0);
+ } //Otherwise, you'll have to make do without the dump.
+ }
+#endif
return true;
}
@@ -640,7 +675,8 @@ namespace gbe {
const char *options,
const char *temp_header_path,
std::vector<std::string>& clOpt,
- std::string& clName,
+ std::string& dumpLLVMFileName,
+ std::string& dumpASMFileName,
int& optLevel,
size_t stringSize,
char *err,
@@ -719,6 +755,16 @@ namespace gbe {
clOpt.push_back("__FAST_RELAXED_MATH__=1");
}
+ if(str.find("-dump-opt-llvm=") != std::string::npos) {
+ dumpLLVMFileName = str.substr(str.find("=") + 1);
+ continue; // Don't push this str back; ignore it.
+ }
+
+ if(str.find("-dump-opt-asm=") != std::string::npos) {
+ dumpASMFileName = str.substr(str.find("=") + 1);
+ continue; // Don't push this str back; ignore it.
+ }
+
clOpt.push_back(str);
}
free(str);
@@ -741,21 +787,6 @@ namespace gbe {
}
}
- char clStr[] = "/tmp/XXXXXX.cl";
- int clFd = mkstemps(clStr, 3);
- clName = std::string(clStr);
-
- FILE *clFile = fdopen(clFd, "w");
- FATAL_IF(clFile == NULL, "Failed to open temporary file");
- // XXX enable cl_khr_fp64 may cause some potential bugs.
- // we may need to revisit here latter when we want to support fp64 completely.
- // For now, as we don't support fp64 actually, just disable it by default.
-#if 0
- #define ENABLE_CL_KHR_FP64_STR "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
- if (options && !strstr(const_cast<char *>(options), "-cl-std=CL1.1"))
- fwrite(ENABLE_CL_KHR_FP64_STR, strlen(ENABLE_CL_KHR_FP64_STR), 1, clFile);
-#endif
-
if (!findPCH || invalidPCH) {
clOpt.push_back("-include");
clOpt.push_back("ocl.h");
@@ -765,9 +796,6 @@ namespace gbe {
clOpt.push_back(pchFileName);
}
- // Write the source to the cl file
- fwrite(source, strlen(source), 1, clFile);
- fclose(clFile);
return true;
}
@@ -780,9 +808,11 @@ namespace gbe {
{
int optLevel = 1;
std::vector<std::string> clOpt;
- std::string clName;
- if (!processSourceAndOption(source, options, NULL, clOpt, clName,
- optLevel, stringSize, err, errSize))
+ std::string dumpLLVMFileName, dumpASMFileName;
+ if (!processSourceAndOption(source, options, NULL, clOpt,
+ dumpLLVMFileName, dumpASMFileName,
+ optLevel,
+ stringSize, err, errSize))
return NULL;
gbe_program p;
@@ -793,7 +823,7 @@ namespace gbe {
if (!llvm::llvm_is_multithreaded())
llvm_mutex.lock();
- if (buildModuleFromSource(clName.c_str(), &out_module, llvm_ctx, clOpt,
+ if (buildModuleFromSource(source, &out_module, llvm_ctx, dumpLLVMFileName, clOpt,
stringSize, err, errSize)) {
// Now build the program from llvm
size_t clangErrSize = 0;
@@ -804,8 +834,14 @@ namespace gbe {
clangErrSize = *errSize;
}
- p = gbe_program_new_from_llvm(deviceID, NULL, out_module, llvm_ctx, stringSize,
- err, errSize, optLevel);
+ if (!dumpASMFileName.empty()) {
+ FILE *asmDumpStream = fopen(dumpASMFileName.c_str(), "w");
+ if (asmDumpStream)
+ fclose(asmDumpStream);
+ }
+ p = gbe_program_new_from_llvm(deviceID, NULL, out_module, llvm_ctx,
+ dumpASMFileName.empty() ? NULL : dumpASMFileName.c_str(),
+ stringSize, err, errSize, optLevel);
if (err != NULL)
*errSize += clangErrSize;
if (OCL_OUTPUT_BUILD_LOG && options)
@@ -816,7 +852,6 @@ namespace gbe {
if (!llvm::llvm_is_multithreaded())
llvm_mutex.unlock();
- remove(clName.c_str());
return p;
}
#endif
@@ -833,8 +868,9 @@ namespace gbe {
{
int optLevel = 1;
std::vector<std::string> clOpt;
- std::string clName;
- if (!processSourceAndOption(source, options, temp_header_path, clOpt, clName,
+ std::string dumpLLVMFileName, dumpASMFileName;
+ if (!processSourceAndOption(source, options, temp_header_path, clOpt,
+ dumpLLVMFileName, dumpASMFileName,
optLevel, stringSize, err, errSize))
return NULL;
@@ -844,7 +880,8 @@ namespace gbe {
//for some functions, so we use global context now, need switch to new context later.
llvm::Module * out_module;
llvm::LLVMContext* llvm_ctx = &llvm::getGlobalContext();
- if (buildModuleFromSource(clName.c_str(), &out_module, llvm_ctx, clOpt,
+
+ if (buildModuleFromSource(source, &out_module, llvm_ctx, dumpLLVMFileName, clOpt,
stringSize, err, errSize)) {
// Now build the program from llvm
if (err != NULL) {
@@ -859,30 +896,70 @@ namespace gbe {
llvm::errs() << options;
} else
p = NULL;
- remove(clName.c_str());
releaseLLVMContextLock();
return p;
}
#endif
#ifdef GBE_COMPILER_AVAILABLE
- static void programLinkProgram(gbe_program dst_program,
+ static bool programLinkProgram(gbe_program dst_program,
gbe_program src_program,
size_t stringSize,
char * err,
size_t * errSize)
{
+ bool ret = 0;
acquireLLVMContextLock();
- gbe_program_link_from_llvm(dst_program, src_program, stringSize, err, errSize);
+ ret = gbe_program_link_from_llvm(dst_program, src_program, stringSize, err, errSize);
releaseLLVMContextLock();
if (OCL_OUTPUT_BUILD_LOG && err)
llvm::errs() << err;
+ return ret;
}
#endif
+#ifdef GBE_COMPILER_AVAILABLE
+ static bool programCheckOption(const char * option)
+ {
+ vector<const char *> args;
+ if (option == NULL) return 1; //if NULL, return ok
+ std::string s(option);
+ size_t pos = s.find("-create-library");
+ //clang don't accept -create-library and -enable-link-options, erase them
+ if(pos != std::string::npos) {
+ s.erase(pos, strlen("-create-library"));
+ }
+ pos = s.find("-enable-link-options");
+ if(pos != std::string::npos) {
+ s.erase(pos, strlen("-enable-link-options"));
+ }
+ args.push_back(s.c_str());
+
+ // The compiler invocation needs a DiagnosticsEngine so it can report problems
+ std::string ErrorString;
+ llvm::raw_string_ostream ErrorInfo(ErrorString);
+ llvm::IntrusiveRefCntPtr<clang::DiagnosticOptions> DiagOpts = new clang::DiagnosticOptions();
+ DiagOpts->ShowCarets = false;
+ DiagOpts->ShowPresumedLoc = true;
+
+ clang::TextDiagnosticPrinter *DiagClient =
+ new clang::TextDiagnosticPrinter(ErrorInfo, &*DiagOpts);
+ llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> DiagID(new clang::DiagnosticIDs());
+ clang::DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient);
+
+ // Create the compiler invocation
+ std::unique_ptr<clang::CompilerInvocation> CI(new clang::CompilerInvocation);
+ return clang::CompilerInvocation::CreateFromArgs(*CI,
+ &args[0],
+ &args[0] + args.size(),
+ Diags);
+ }
+#endif
+
+
static size_t programGetGlobalConstantSize(gbe_program gbeProgram) {
if (gbeProgram == NULL) return 0;
const gbe::Program *program = (const gbe::Program*) gbeProgram;
@@ -1127,6 +1204,7 @@ void releaseLLVMContextLock()
GBE_EXPORT_SYMBOL gbe_program_new_from_source_cb *gbe_program_new_from_source = NULL;
GBE_EXPORT_SYMBOL gbe_program_compile_from_source_cb *gbe_program_compile_from_source = NULL;
GBE_EXPORT_SYMBOL gbe_program_link_program_cb *gbe_program_link_program = NULL;
+GBE_EXPORT_SYMBOL gbe_program_check_opt_cb *gbe_program_check_opt = NULL;
GBE_EXPORT_SYMBOL gbe_program_new_from_binary_cb *gbe_program_new_from_binary = NULL;
GBE_EXPORT_SYMBOL gbe_program_new_from_llvm_binary_cb *gbe_program_new_from_llvm_binary = NULL;
GBE_EXPORT_SYMBOL gbe_program_serialize_to_binary_cb *gbe_program_serialize_to_binary = NULL;
@@ -1182,6 +1260,7 @@ namespace gbe
gbe_program_new_from_source = gbe::programNewFromSource;
gbe_program_compile_from_source = gbe::programCompileFromSource;
gbe_program_link_program = gbe::programLinkProgram;
+ gbe_program_check_opt = gbe::programCheckOption;
gbe_program_get_global_constant_size = gbe::programGetGlobalConstantSize;
gbe_program_get_global_constant_data = gbe::programGetGlobalConstantData;
gbe_program_clean_llvm_resource = gbe::programCleanLlvmResource;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 3637ebb..84ce333 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -30,6 +30,7 @@
#include <stdint.h>
#include <stdlib.h>
+#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
@@ -181,14 +182,19 @@ typedef gbe_program (gbe_program_compile_from_source_cb)(uint32_t deviceID,
char *err,
size_t *err_size);
extern gbe_program_compile_from_source_cb *gbe_program_compile_from_source;
+
/*! link the programs. */
-typedef void (gbe_program_link_program_cb)(gbe_program dst_program,
+typedef bool (gbe_program_link_program_cb)(gbe_program dst_program,
gbe_program src_program,
size_t stringSize,
char * err,
size_t * errSize);
extern gbe_program_link_program_cb *gbe_program_link_program;
+/*! check link option. */
+typedef bool (gbe_program_check_opt_cb)(const char *option);
+extern gbe_program_check_opt_cb *gbe_program_check_opt;
+
/*! create s new genprogram for link. */
typedef gbe_program (gbe_program_new_gen_program_cb)(uint32_t deviceID,
const void *module,
@@ -212,6 +218,7 @@ typedef gbe_program (gbe_program_new_from_llvm_cb)(uint32_t deviceID,
const char *fileName,
const void *module,
const void *llvm_ctx,
+ const char *asm_file_name,
size_t string_size,
char *err,
size_t *err_size,
@@ -219,7 +226,7 @@ typedef gbe_program (gbe_program_new_from_llvm_cb)(uint32_t deviceID,
extern gbe_program_new_from_llvm_cb *gbe_program_new_from_llvm;
/*! link the programs from llvm level. */
-typedef void (gbe_program_link_from_llvm_cb)(gbe_program dst_program,
+typedef bool (gbe_program_link_from_llvm_cb)(gbe_program dst_program,
gbe_program src_program,
size_t stringSize,
char * err,
diff --git a/backend/src/ir/printf.cpp b/backend/src/ir/printf.cpp
index 3d9b2fd..eb1c199 100644
--- a/backend/src/ir/printf.cpp
+++ b/backend/src/ir/printf.cpp
@@ -31,6 +31,23 @@ namespace gbe
pthread_mutex_t PrintfSet::lock = PTHREAD_MUTEX_INITIALIZER;
+ PrintfSlot::~PrintfSlot(void)
+ {
+ if (ptr)
+ {
+ if (type == PRINTF_SLOT_TYPE_STRING) {
+ free(ptr);
+ ptr = NULL;
+ } else if (type == PRINTF_SLOT_TYPE_STATE) {
+ delete state;
+ state = NULL;
+ } else {
+ type = PRINTF_SLOT_TYPE_NONE;
+ ptr = NULL;
+ }
+ }
+ }
+
uint32_t PrintfSet::append(PrintfFmt* fmt, Unit& unit)
{
fmts.push_back(*fmt);
@@ -40,12 +57,12 @@ namespace gbe
if (f->type == PRINTF_SLOT_TYPE_STRING)
continue;
- slots.push_back(&(*f));
+ slots.push_back(*f);
}
/* Update the total size of size. */
if (slots.size() > 0)
- sizeOfSize = slots.back()->state->out_buf_sizeof_offset
+ sizeOfSize = slots.back().state->out_buf_sizeof_offset
+ getPrintfBufferElementSize(slots.size() - 1);
return (uint32_t)fmts.size();
diff --git a/backend/src/ir/printf.hpp b/backend/src/ir/printf.hpp
index cbab759..df58437 100644
--- a/backend/src/ir/printf.hpp
+++ b/backend/src/ir/printf.hpp
@@ -159,10 +159,7 @@ namespace gbe
ptr = p;
}
- ~PrintfSlot(void) {
- if (ptr)
- free(ptr);
- }
+ ~PrintfSlot(void);
};
class Context;
@@ -177,7 +174,7 @@ namespace gbe
}
for (size_t i = 0; i < other.slots.size(); ++i) {
- PrintfSlot* s = other.slots[i];
+ PrintfSlot s = other.slots[i];
slots.push_back(s);
}
@@ -215,15 +212,15 @@ namespace gbe
uint8_t getIndexBufBTI() const { return btiIndexBuf; }
uint32_t getPrintfBufferElementSize(uint32_t i) {
- PrintfSlot* slot = slots[i];
+ PrintfSlot& slot = slots[i];
int vec_num = 1;
- if (slot->state->vector_n > 0) {
- vec_num = slot->state->vector_n;
+ if (slot.state->vector_n > 0) {
+ vec_num = slot.state->vector_n;
}
assert(vec_num > 0 && vec_num <= 16);
- switch (slot->state->conversion_specifier) {
+ switch (slot.state->conversion_specifier) {
case PRINTF_CONVERSION_I:
case PRINTF_CONVERSION_D:
case PRINTF_CONVERSION_O:
@@ -257,7 +254,7 @@ namespace gbe
private:
vector<PrintfFmt> fmts;
- vector<PrintfSlot*> slots;
+ vector<PrintfSlot> slots;
uint32_t sizeOfSize; // Total sizeof size.
friend struct LockOutput;
uint8_t btiBuf;
diff --git a/backend/src/ir/structurizer.cpp b/backend/src/ir/structurizer.cpp
index 6c4e455..38d3dd1 100644
--- a/backend/src/ir/structurizer.cpp
+++ b/backend/src/ir/structurizer.cpp
@@ -458,6 +458,17 @@ namespace ir {
return p_block;
}
+ void CFGStructurizer::collectInsnNum(Block* block, const BasicBlock* bb)
+ {
+ BasicBlock::const_iterator iter = bb->begin();
+ BasicBlock::const_iterator iter_end = bb->end();
+ while(iter != iter_end)
+ {
+ block->insnNum++;
+ iter++;
+ }
+ }
+
bool CFGStructurizer::checkForBarrier(const BasicBlock* bb)
{
BasicBlock::const_iterator iter = bb->begin();
@@ -600,6 +611,7 @@ namespace ir {
loops = fn->getLoops();
fn->foreachBlock([&](ir::BasicBlock &bb){
orderedBlks.push_back(bbmap[&bb]);
+ collectInsnNum(bbmap[&bb], &bb);
});
}
@@ -721,6 +733,7 @@ namespace ir {
p->canBeHandled = false;
break;
}
+ p->insnNum += (*iter)->insnNum;
iter++;
}
return insertBlock(p);
@@ -770,6 +783,7 @@ namespace ir {
if(loopSets.size() == 1)
{
Block* p = new SelfLoopBlock(*loopSets.begin());
+ p->insnNum = (*loopSets.begin())->insnNum;
p->canBeHandled = true;
(*loopSets.begin())->getExit()->isLoopExit = true;
return insertBlock(p);
@@ -881,7 +895,8 @@ namespace ir {
if (TrueBB->succ_size() == 1 && FalseBB->succ_size() == 1
&& TrueBB->pred_size() == 1 && FalseBB->pred_size() == 1
&& *TrueBB->succ_begin() == *FalseBB->succ_begin()
- && !TrueBB->hasBarrier() && !FalseBB->hasBarrier() ) {
+ && !TrueBB->hasBarrier() && !FalseBB->hasBarrier()
+ && TrueBB->insnNum < 1000 && FalseBB->insnNum < 1000) {
// if-else pattern
ifSets.insert(block);
if(block->fallthrough() == TrueBB) {
@@ -895,17 +910,19 @@ namespace ir {
}else{
GBE_ASSERT(0);
}
+ mergedBB->insnNum = block->insnNum + TrueBB->insnNum + FalseBB->insnNum;
if(block->canBeHandled == false || TrueBB->canBeHandled == false || FalseBB->canBeHandled == false)
block->canBeHandled = false;
insertBlock(mergedBB);
} else if (TrueBB->succ_size() == 1 && TrueBB->pred_size() == 1 &&
- *TrueBB->succ_begin() == FalseBB && !TrueBB->hasBarrier() ) {
+ *TrueBB->succ_begin() == FalseBB && !TrueBB->hasBarrier() && TrueBB->insnNum < 1000 ) {
// if-then pattern, false is empty
ifSets.insert(block);
ifSets.insert(TrueBB);
mergedBB = new IfThenBlock(block, TrueBB);
+ mergedBB->insnNum = block->insnNum + TrueBB->insnNum;
if(block->fallthrough() == FalseBB)
block->inversePredicate = false;
@@ -914,11 +931,12 @@ namespace ir {
insertBlock(mergedBB);
} else if (FalseBB->succ_size() == 1 && FalseBB->pred_size() == 1 &&
- *FalseBB->succ_begin() == TrueBB && !FalseBB->hasBarrier() ) {
+ *FalseBB->succ_begin() == TrueBB && !FalseBB->hasBarrier() && FalseBB->insnNum < 1000 ) {
// if-then pattern, true is empty
ifSets.insert(block);
ifSets.insert(FalseBB);
mergedBB = new IfThenBlock(block, FalseBB);
+ mergedBB->insnNum = block->insnNum + FalseBB->insnNum;
if(block->fallthrough() == TrueBB)
block->inversePredicate = false;
diff --git a/backend/src/ir/structurizer.hpp b/backend/src/ir/structurizer.hpp
index 8207644..09b2a7f 100644
--- a/backend/src/ir/structurizer.hpp
+++ b/backend/src/ir/structurizer.hpp
@@ -53,7 +53,7 @@ namespace ir {
class Block
{
public:
- Block(BlockType type, const BlockList& children): has_barrier(false), mark(false), canBeHandled(true), inversePredicate(true)
+ Block(BlockType type, const BlockList& children): has_barrier(false), mark(false), canBeHandled(true), inversePredicate(true), insnNum(0)
{
this->btype = type;
this->children = children;
@@ -105,6 +105,7 @@ namespace ir {
* m-->n
* */
bool inversePredicate;
+ int insnNum;
};
/* represents basic block */
@@ -211,6 +212,7 @@ namespace ir {
Block* mergeLoopBlock(BlockList& loopSets);
int ifPatternMatch(Block *block);
int patternMatch(Block *block);
+ void collectInsnNum(Block* block, const BasicBlock* bb);
private:
void handleSelfLoopBlock(Block *loopblock, LabelIndex& whileLabel);
diff --git a/backend/src/libocl/include/ocl_float.h b/backend/src/libocl/include/ocl_float.h
index 916233b..e63eaf9 100644
--- a/backend/src/libocl/include/ocl_float.h
+++ b/backend/src/libocl/include/ocl_float.h
@@ -88,6 +88,7 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){
#define M_PI_4_F 0.7853981633974483F
#define M_1_PI_F 0.3183098861837907F
#define M_2_PI_F 0.6366197723675814F
+#define M_180_PI_F 57.295779513082321F
#define M_2_SQRTPI_F 1.1283791670955126F
#define M_SQRT2_F 1.4142135623730951F
#define M_SQRT1_2_F 0.7071067811865476F
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index a8dbc92..eb1a2ff 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -387,10 +387,22 @@ DECL_IMAGE_TYPE(image2d_array_t, 3)
cl_image, defaultSampler, convert_float2(effectCoord), 0); \
}
+#define DECL_WRITE_IMAGE1D_BUFFER(image_type, image_data_type, suffix, coord_type) \
+ OVERLOADABLE void write_image ##suffix(image_type cl_image, \
+ coord_type coord, \
+ image_data_type color) \
+ { \
+ int2 effectCoord; \
+ effectCoord.s0 = coord %8192; \
+ effectCoord.s1 = coord / 8192; \
+ __gen_ocl_write_image ##suffix(cl_image, effectCoord, color); \
+ }
+
+
#define DECL_IMAGE_1DBuffer(int_clamping_fix, image_data_type, suffix) \
DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image1d_buffer_t, image_data_type, \
suffix, int) \
- DECL_WRITE_IMAGE(image1d_buffer_t, image_data_type, suffix, int)
+ DECL_WRITE_IMAGE1D_BUFFER(image1d_buffer_t, image_data_type, suffix, int)
DECL_IMAGE_1DBuffer(GEN_FIX_INT_CLAMPING, int4, i)
DECL_IMAGE_1DBuffer(GEN_FIX_INT_CLAMPING, uint4, ui)
diff --git a/backend/src/libocl/tmpl/ocl_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
index 76aca2b..b6b09b5 100644
--- a/backend/src/libocl/tmpl/ocl_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
@@ -44,7 +44,7 @@ OVERLOADABLE float clamp(float v, float l, float u) {
OVERLOADABLE float degrees(float radians) {
- return (180 / M_PI_F) * radians;
+ return M_180_PI_F * radians;
}
OVERLOADABLE float radians(float degrees) {
return (M_PI_F / 180) * degrees;
@@ -96,7 +96,7 @@ OVERLOADABLE half clamp(half v, half l, half u) {
return max(min(v, u), l);
}
OVERLOADABLE half degrees(half radians) {
- return ((half)(180 / M_PI_F)) * radians;
+ return ((half)(M_180_PI_F)) * radians;
}
OVERLOADABLE half radians(half degrees) {
return ((half)(M_PI_F / 180)) * degrees;
diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
index 2f85443..3d84457 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -291,7 +291,7 @@ again:
#if 0
{
int j = 0;
- for (auto &s : *printf_fmt) {
+ for (auto &s : printf_fmt->first) {
j++;
if (s.type == PRINTF_SLOT_TYPE_STATE) {
fprintf(stderr, "---- %d ---: state : \n", j);
diff --git a/docs/NEWS.mdwn b/docs/NEWS.mdwn
index 7aa8e94..c9aef28 100644
--- a/docs/NEWS.mdwn
+++ b/docs/NEWS.mdwn
@@ -1,5 +1,8 @@
# News
+## Oct 08, 2015
+[Beignet 1.1.1](https://01.org/beignet/downloads/beignet-1.1.1-2015-10-08) is released. This is a bug-fix release.
+
## Jul 21, 2015
[Beignet 1.1.0](https://01.org/beignet/downloads/beignet-1.1.0-2015-07-31) is released. This is a major release. Please see the release notes for more information.
diff --git a/kernels/image_1D_buffer.cl b/kernels/image_1D_buffer.cl
index e8e0a86..2c1da69 100644
--- a/kernels/image_1D_buffer.cl
+++ b/kernels/image_1D_buffer.cl
@@ -1,13 +1,7 @@
-__kernel void image_1D_buffer(image1d_buffer_t image1, image1d_t image2, sampler_t sampler, __global int *results)
+__kernel void image_1D_buffer(image1d_buffer_t image1, image1d_buffer_t image2)
{
int x = get_global_id(0);
- int offset = x;
- int4 col = read_imagei(image1, x);
- int4 test = (col != read_imagei(image2, sampler, x));
-
- if (test.x || test.y || test.z || test.w)
- results[offset] = 0;
- else
- results[offset] = 1;
+ uint4 color = read_imageui(image1, x);
+ write_imageui(image2, x, color);
}
diff --git a/src/cl_api.c b/src/cl_api.c
index 69eb0bc..cef5bbb 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -77,6 +77,7 @@ handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
if (e->type != CL_COMMAND_USER &&
e->queue->props & CL_QUEUE_PROFILING_ENABLE) {
cl_event_get_timestamp(e, CL_PROFILING_COMMAND_QUEUED);
+ cl_event_get_queued_cpu_timestamp(e);
}
if(event != NULL)
@@ -1015,10 +1016,11 @@ clLinkProgram(cl_context context,
INVALID_VALUE_IF (pfn_notify == 0 && user_data != NULL);
INVALID_VALUE_IF (num_input_programs == 0 && input_programs != NULL);
INVALID_VALUE_IF (num_input_programs != 0 && input_programs == NULL);
+ INVALID_VALUE_IF (num_input_programs == 0 && input_programs == NULL);
program = cl_program_link(context, num_input_programs, input_programs, options, &err);
- program->is_built = CL_TRUE;
+ if(program) program->is_built = CL_TRUE;
if (pfn_notify) pfn_notify(program, user_data);
@@ -1251,6 +1253,11 @@ cl_int clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_index, cl_kernel_arg_inf
cl_int err = CL_SUCCESS;
CHECK_KERNEL(kernel);
+ if(kernel->program->build_opts == NULL ||
+ strstr(kernel->program->build_opts,"-cl-kernel-arg-info") == NULL ) {
+ err = CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
+ goto error;
+ }
if (param_name != CL_KERNEL_ARG_ADDRESS_QUALIFIER
&& param_name != CL_KERNEL_ARG_ACCESS_QUALIFIER
&& param_name != CL_KERNEL_ARG_TYPE_NAME
@@ -1468,6 +1475,7 @@ clGetEventProfilingInfo(cl_event event,
cl_ulong ret_val;
CHECK_EVENT(event);
+ cl_event_update_status(event, 0);
if (event->type == CL_COMMAND_USER ||
!(event->queue->props & CL_QUEUE_PROFILING_ENABLE) ||
@@ -1482,15 +1490,15 @@ clGetEventProfilingInfo(cl_event event,
}
if (param_name == CL_PROFILING_COMMAND_QUEUED) {
- ret_val = event->timestamp[0];
+ ret_val = event->queued_timestamp;
} else if (param_name == CL_PROFILING_COMMAND_SUBMIT) {
- ret_val = event->timestamp[1];
+ ret_val= event->queued_timestamp + cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[1]);
} else if (param_name == CL_PROFILING_COMMAND_START) {
err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_START);
- ret_val = event->timestamp[2];
+ ret_val = event->queued_timestamp + cl_event_get_start_timestamp(event);
} else if (param_name == CL_PROFILING_COMMAND_END) {
err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_END);
- ret_val = event->timestamp[3];
+ ret_val = event->queued_timestamp + cl_event_get_end_timestamp(event);
} else {
err = CL_INVALID_VALUE;
goto error;
diff --git a/src/cl_device_data.h b/src/cl_device_data.h
index b7b64c0..3552a16 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -21,6 +21,7 @@
#define __CL_DEVICE_DATA_H__
#define INVALID_CHIP_ID -1 //returned by intel_get_device_id if no device found
+#define INTEL_VENDOR_ID 0x8086 // Vendor ID for Intel
#define PCI_CHIP_GM45_GM 0x2A42
#define PCI_CHIP_IGD_E_G 0x2E02
@@ -201,7 +202,8 @@
#define PCI_CHIP_BROADWLL_W_GT2 0x161D /* Intel(R) Broadwell Workstation - GT2 */
#define PCI_CHIP_BROADWLL_U_GT2 0x161E /* Intel(R) Broadwell ULX - GT2 */
#define PCI_CHIP_BROADWLL_M_GT3 0x1622 /* Intel(R) Broadwell Mobile - Halo (EDRAM) - GT3 */
-#define PCI_CHIP_BROADWLL_D_GT3 0x1626 /* Intel(R) Broadwell U-Processor - GT3 */
+#define PCI_CHIP_BROADWLL_D_GT3 0x1626 /* Intel(R) Broadwell U-Processor HD 6000 - GT3 */
+#define PCI_CHIP_BROADWLL_UI_GT3 0x162B /* Intel(R) Broadwell U-Process Iris 6100 - GT3 */
#define PCI_CHIP_BROADWLL_S_GT3 0x162A /* Intel(R) Broadwell Server - GT3 */
#define PCI_CHIP_BROADWLL_W_GT3 0x162D /* Intel(R) Broadwell Workstation - GT3 */
#define PCI_CHIP_BROADWLL_U_GT3 0x162E /* Intel(R) Broadwell ULX - GT3 */
@@ -225,6 +227,7 @@
devid == PCI_CHIP_BROADWLL_D_GT3 || \
devid == PCI_CHIP_BROADWLL_S_GT3 || \
devid == PCI_CHIP_BROADWLL_W_GT3 || \
+ devid == PCI_CHIP_BROADWLL_UI_GT3 || \
devid == PCI_CHIP_BROADWLL_U_GT3)
#define IS_BROADWELL(devid) (IS_BRW_GT1(devid) || IS_BRW_GT2(devid) || IS_BRW_GT3(devid))
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index e9e2c16..1778292 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -350,7 +350,7 @@ cl_get_gt_device(void)
case PCI_CHIP_HASWELL_CRW_E3:
DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell CRW GT3 reserved");
has_break:
- device->vendor_id = device_id;
+ device->device_id = device_id;
device->platform = cl_get_platform_default();
ret = device;
cl_intel_platform_get_default_extension(ret);
@@ -363,7 +363,7 @@ has_break:
case PCI_CHIP_IVYBRIDGE_S_GT1:
DECL_INFO_STRING(ivb_gt1_break, intel_ivb_gt1_device, name, "Intel(R) HD Graphics IvyBridge S GT1");
ivb_gt1_break:
- intel_ivb_gt1_device.vendor_id = device_id;
+ intel_ivb_gt1_device.device_id = device_id;
intel_ivb_gt1_device.platform = cl_get_platform_default();
ret = &intel_ivb_gt1_device;
cl_intel_platform_get_default_extension(ret);
@@ -376,7 +376,7 @@ ivb_gt1_break:
case PCI_CHIP_IVYBRIDGE_S_GT2:
DECL_INFO_STRING(ivb_gt2_break, intel_ivb_gt2_device, name, "Intel(R) HD Graphics IvyBridge S GT2");
ivb_gt2_break:
- intel_ivb_gt2_device.vendor_id = device_id;
+ intel_ivb_gt2_device.device_id = device_id;
intel_ivb_gt2_device.platform = cl_get_platform_default();
ret = &intel_ivb_gt2_device;
cl_intel_platform_get_default_extension(ret);
@@ -385,7 +385,7 @@ ivb_gt2_break:
case PCI_CHIP_BAYTRAIL_T:
DECL_INFO_STRING(baytrail_t_device_break, intel_baytrail_t_device, name, "Intel(R) HD Graphics Bay Trail-T");
baytrail_t_device_break:
- intel_baytrail_t_device.vendor_id = device_id;
+ intel_baytrail_t_device.device_id = device_id;
intel_baytrail_t_device.platform = cl_get_platform_default();
ret = &intel_baytrail_t_device;
cl_intel_platform_get_default_extension(ret);
@@ -403,41 +403,43 @@ baytrail_t_device_break:
DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell ULX GT1");
brw_gt1_break:
/* For Gen8 and later, half float is suppported and we will enable cl_khr_fp16. */
- intel_brw_gt1_device.vendor_id = device_id;
+ intel_brw_gt1_device.device_id = device_id;
intel_brw_gt1_device.platform = cl_get_platform_default();
ret = &intel_brw_gt1_device;
cl_intel_platform_enable_fp16_extension(ret);
break;
case PCI_CHIP_BROADWLL_M_GT2:
- DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell Mobile GT2");
+ DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5600 BroadWell Mobile GT2");
case PCI_CHIP_BROADWLL_D_GT2:
- DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell U-Processor GT2");
+ DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2");
case PCI_CHIP_BROADWLL_S_GT2:
DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell Server GT2");
case PCI_CHIP_BROADWLL_W_GT2:
DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell Workstation GT2");
case PCI_CHIP_BROADWLL_U_GT2:
- DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell ULX GT2");
+ DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5300 BroadWell ULX GT2");
brw_gt2_break:
- intel_brw_gt2_device.vendor_id = device_id;
+ intel_brw_gt2_device.device_id = device_id;
intel_brw_gt2_device.platform = cl_get_platform_default();
ret = &intel_brw_gt2_device;
cl_intel_platform_enable_fp16_extension(ret);
break;
case PCI_CHIP_BROADWLL_M_GT3:
- DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell Mobile GT3");
+ DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Pro Graphics 6200 BroadWell Mobile GT3");
case PCI_CHIP_BROADWLL_D_GT3:
- DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell U-Processor GT3");
+ DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics 6000 BroadWell U-Processor GT3");
+ case PCI_CHIP_BROADWLL_UI_GT3:
+ DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Graphics 6100 BroadWell U-Processor GT3");
case PCI_CHIP_BROADWLL_S_GT3:
- DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell Server GT3");
+ DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Pro Graphics P6300 BroadWell Server GT3");
case PCI_CHIP_BROADWLL_W_GT3:
DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell Workstation GT3");
case PCI_CHIP_BROADWLL_U_GT3:
DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell ULX GT3");
brw_gt3_break:
- intel_brw_gt3_device.vendor_id = device_id;
+ intel_brw_gt3_device.device_id = device_id;
intel_brw_gt3_device.platform = cl_get_platform_default();
ret = &intel_brw_gt3_device;
cl_intel_platform_enable_fp16_extension(ret);
@@ -449,7 +451,7 @@ brw_gt3_break:
case PCI_CHIP_CHV_3:
DECL_INFO_STRING(chv_break, intel_chv_device, name, "Intel(R) HD Graphics Cherryview");
chv_break:
- intel_chv_device.vendor_id = device_id;
+ intel_chv_device.device_id = device_id;
intel_chv_device.platform = cl_get_platform_default();
ret = &intel_chv_device;
cl_intel_platform_enable_fp16_extension(ret);
@@ -467,7 +469,7 @@ chv_break:
case PCI_CHIP_SKYLAKE_SRV_GT1:
DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1");
skl_gt1_break:
- intel_skl_gt1_device.vendor_id = device_id;
+ intel_skl_gt1_device.device_id = device_id;
intel_skl_gt1_device.platform = cl_get_platform_default();
ret = &intel_skl_gt1_device;
cl_intel_platform_enable_fp16_extension(ret);
@@ -486,7 +488,7 @@ skl_gt1_break:
case PCI_CHIP_SKYLAKE_SRV_GT2:
DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2");
skl_gt2_break:
- intel_skl_gt2_device.vendor_id = device_id;
+ intel_skl_gt2_device.device_id = device_id;
intel_skl_gt2_device.platform = cl_get_platform_default();
ret = &intel_skl_gt2_device;
cl_intel_platform_enable_fp16_extension(ret);
@@ -499,7 +501,7 @@ skl_gt2_break:
case PCI_CHIP_SKYLAKE_SRV_GT3:
DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3");
skl_gt3_break:
- intel_skl_gt3_device.vendor_id = device_id;
+ intel_skl_gt3_device.device_id = device_id;
intel_skl_gt3_device.platform = cl_get_platform_default();
ret = &intel_skl_gt3_device;
cl_intel_platform_enable_fp16_extension(ret);
@@ -510,7 +512,7 @@ skl_gt3_break:
case PCI_CHIP_SKYLAKE_SRV_GT4:
DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4");
skl_gt4_break:
- intel_skl_gt4_device.vendor_id = device_id;
+ intel_skl_gt4_device.device_id = device_id;
intel_skl_gt4_device.platform = cl_get_platform_default();
ret = &intel_skl_gt4_device;
cl_intel_platform_enable_fp16_extension(ret);
@@ -901,9 +903,9 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
{
size_t work_group_size, thread_cnt;
int simd_width = interp_kernel_get_simd_width(kernel->opaque);
- int vendor_id = kernel->program->ctx->device->vendor_id;
+ int device_id = kernel->program->ctx->device->device_id;
if (!interp_kernel_use_slm(kernel->opaque)) {
- if (!IS_BAYTRAIL_T(vendor_id) || simd_width == 16)
+ if (!IS_BAYTRAIL_T(device_id) || simd_width == 16)
work_group_size = simd_width * 64;
else
work_group_size = kernel->program->ctx->device->max_compute_unit *
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 6daa31c..b5db91c 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -24,6 +24,7 @@
struct _cl_device_id {
DEFINE_ICD(dispatch)
cl_device_type device_type;
+ cl_uint device_id;
cl_uint vendor_id;
cl_uint max_compute_unit; // maximum EU number
cl_uint max_thread_per_unit; // maximum EU threads per EU.
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index e858d5e..9e34bb8 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -36,7 +36,10 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data)
assert(mem->type == CL_MEM_BUFFER_TYPE ||
mem->type == CL_MEM_SUBBUFFER_TYPE);
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- if (!mem->is_userptr) {
+ //cl_buffer_get_subdata sometime is very very very slow in linux kernel, in skl and chv,
+ //and it is randomly. So temporary disable it, use map/copy/unmap to read.
+ //Should re-enable it after find root cause.
+ if (0 && !mem->is_userptr) {
if (cl_buffer_get_subdata(mem->bo, data->offset + buffer->sub_offset,
data->size, data->ptr) != 0)
err = CL_MAP_FAILURE;
diff --git a/src/cl_event.c b/src/cl_event.c
index bbc1776..bf44197 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -613,6 +613,61 @@ cl_int cl_event_barrier_with_wait_list(cl_command_queue queue,
return CL_SUCCESS;
}
+cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time)
+{
+ struct timespec ts;
+
+ if(clock_gettime(CLOCK_MONOTONIC_RAW,&ts) != 0){
+ printf("CPU Timmer error\n");
+ return CL_FALSE;
+ }
+ *cpu_time = (1000000000.0) * (cl_ulong) ts.tv_sec + (cl_ulong) ts.tv_nsec;
+
+ return CL_SUCCESS;
+}
+
+cl_int cl_event_get_queued_cpu_timestamp(cl_event event)
+{
+ cl_int ret_val;
+
+ ret_val = cl_event_get_cpu_timestamp(&event->queued_timestamp);
+
+ return ret_val;
+}
+
+cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp)
+{
+ cl_ulong ret_val;
+
+ if(end_timestamp > start_timestamp){
+ ret_val = end_timestamp - start_timestamp;
+ }
+ else {
+ /*if start time stamp is greater than end timstamp then set ret value to max*/
+ ret_val = ((cl_ulong) 1 << 32);
+ }
+
+ return ret_val;
+}
+
+cl_ulong cl_event_get_start_timestamp(cl_event event)
+{
+ cl_ulong ret_val;
+
+ ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[2]);
+
+ return ret_val;
+}
+
+cl_ulong cl_event_get_end_timestamp(cl_event event)
+{
+ cl_ulong ret_val;
+
+ ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[3]);
+
+ return ret_val;
+}
+
cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
{
cl_ulong ret_val = 0;
diff --git a/src/cl_event.h b/src/cl_event.h
index e3cd2b2..f7bf09f 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -70,6 +70,7 @@ struct _cl_event {
enqueue_callback* waits_head; /* The head of enqueues list wait on this event */
cl_bool emplict; /* Identify this event whether created by api emplict*/
cl_ulong timestamp[4];/* The time stamps for profiling. */
+ cl_ulong queued_timestamp;
};
/* Create a new event object */
@@ -96,6 +97,16 @@ void cl_event_update_status(cl_event, cl_int);
cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*);
/* Create the barrier event */
cl_int cl_event_barrier_with_wait_list(cl_command_queue, cl_uint, const cl_event *, cl_event*);
+/* Get the cpu time */
+cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time);
+/*Get the cpu time for queued*/
+cl_int cl_event_get_queued_cpu_timestamp(cl_event event);
+/*get timestamp delate between end and start*/
+cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp);
+/*Get start time stamp*/
+cl_ulong cl_event_get_start_timestamp(cl_event event);
+/*Get end time stamp*/
+cl_ulong cl_event_get_end_timestamp(cl_event event);
/* Do the event profiling */
cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
/* insert the user event */
diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp
index c3454e8..e832a53 100644
--- a/src/cl_gbe_loader.cpp
+++ b/src/cl_gbe_loader.cpp
@@ -27,6 +27,7 @@ gbe_program_new_from_source_cb *compiler_program_new_from_source = NULL;
gbe_program_compile_from_source_cb *compiler_program_compile_from_source = NULL;
gbe_program_new_gen_program_cb *compiler_program_new_gen_program = NULL;
gbe_program_link_program_cb *compiler_program_link_program = NULL;
+gbe_program_check_opt_cb *compiler_program_check_opt = NULL;
gbe_program_build_from_llvm_cb *compiler_program_build_from_llvm = NULL;
gbe_program_new_from_llvm_binary_cb *compiler_program_new_from_llvm_binary = NULL;
gbe_program_serialize_to_binary_cb *compiler_program_serialize_to_binary = NULL;
@@ -279,6 +280,10 @@ struct GbeLoaderInitializer
if (compiler_program_link_program == NULL)
return;
+ compiler_program_check_opt = *(gbe_program_check_opt_cb **)dlsym(dlhCompiler, "gbe_program_check_opt");
+ if (compiler_program_check_opt == NULL)
+ return;
+
compiler_program_build_from_llvm = *(gbe_program_build_from_llvm_cb **)dlsym(dlhCompiler, "gbe_program_build_from_llvm");
if (compiler_program_build_from_llvm == NULL)
return;
diff --git a/src/cl_gbe_loader.h b/src/cl_gbe_loader.h
index 6fa4c98..de91c85 100644
--- a/src/cl_gbe_loader.h
+++ b/src/cl_gbe_loader.h
@@ -28,6 +28,7 @@ extern gbe_program_new_from_source_cb *compiler_program_new_from_source;
extern gbe_program_compile_from_source_cb *compiler_program_compile_from_source;
extern gbe_program_new_gen_program_cb *compiler_program_new_gen_program;
extern gbe_program_link_program_cb *compiler_program_link_program;
+extern gbe_program_check_opt_cb *compiler_program_check_opt;
extern gbe_program_build_from_llvm_cb *compiler_program_build_from_llvm;
extern gbe_program_new_from_llvm_binary_cb *compiler_program_new_from_llvm_binary;
extern gbe_program_serialize_to_binary_cb *compiler_program_serialize_to_binary;
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 4b43c20..bd87cc4 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -19,7 +19,8 @@
/* Common fields for both all GT devices (IVB / SNB) */
.device_type = CL_DEVICE_TYPE_GPU,
-.vendor_id = 0, /* == device_id (set when requested) */
+.device_id=0,/* == device_id (set when requested) */
+.vendor_id = INTEL_VENDOR_ID,
.max_work_item_dimensions = 3,
.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1},
.max_2d_global_work_sizes = {8192, 8192, 1},
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 28d88b6..286e57c 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -227,11 +227,11 @@ cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_
switch (param_name) {
case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
- if (param_value_size < sizeof(cl_kernel_arg_address_qualifier))
- return CL_INVALID_VALUE;
if (param_value_size_ret)
*param_value_size_ret = sizeof(cl_kernel_arg_address_qualifier);
if (!param_value) return CL_SUCCESS;
+ if (param_value_size < sizeof(cl_kernel_arg_address_qualifier))
+ return CL_INVALID_VALUE;
if ((cl_ulong)ret_info == 0) {
*(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_PRIVATE;
} else if ((cl_ulong)ret_info == 1 || (cl_ulong)ret_info == 4) {
@@ -243,16 +243,16 @@ cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_
} else {
/* If no address qualifier is specified, the default address qualifier
which is CL_KERNEL_ARG_ADDRESS_PRIVATE is returned. */
- *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_LOCAL;
+ *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_PRIVATE;
}
return CL_SUCCESS;
case CL_KERNEL_ARG_ACCESS_QUALIFIER:
- if (param_value_size < sizeof(cl_kernel_arg_access_qualifier))
- return CL_INVALID_VALUE;
if (param_value_size_ret)
*param_value_size_ret = sizeof(cl_kernel_arg_access_qualifier);
if (!param_value) return CL_SUCCESS;
+ if (param_value_size < sizeof(cl_kernel_arg_access_qualifier))
+ return CL_INVALID_VALUE;
if (!strcmp((char*)ret_info, "write_only")) {
*(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
} else if (!strcmp((char*)ret_info, "read_only")) {
@@ -267,21 +267,21 @@ cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_
case CL_KERNEL_ARG_TYPE_NAME:
case CL_KERNEL_ARG_NAME:
str_len = strlen(ret_info);
- if (param_value_size < str_len + 1)
- return CL_INVALID_VALUE;
if (param_value_size_ret)
*param_value_size_ret = str_len + 1;
if (!param_value) return CL_SUCCESS;
+ if (param_value_size < str_len + 1)
+ return CL_INVALID_VALUE;
memcpy(param_value, ret_info, str_len);
((char *)param_value)[str_len] = 0;
return CL_SUCCESS;
case CL_KERNEL_ARG_TYPE_QUALIFIER:
- if (param_value_size < sizeof(cl_kernel_arg_type_qualifier))
- return CL_INVALID_VALUE;
if (param_value_size_ret)
*param_value_size_ret = sizeof(cl_kernel_arg_type_qualifier);
if (!param_value) return CL_SUCCESS;
+ if (param_value_size < sizeof(cl_kernel_arg_type_qualifier))
+ return CL_INVALID_VALUE;
if (strstr((char*)ret_info, "const") &&
(arg_type == GBE_ARG_GLOBAL_PTR ||
arg_type == GBE_ARG_CONSTANT_PTR ||
diff --git a/src/cl_program.c b/src/cl_program.c
index db53757..82dd3e3 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -224,6 +224,10 @@ cl_program_create_from_binary(cl_context ctx,
}
program = cl_program_new(ctx);
+ if (UNLIKELY(program == NULL)) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
// TODO: Need to check the binary format here to return CL_INVALID_BINARY.
TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char)));
@@ -237,7 +241,7 @@ cl_program_create_from_binary(cl_context ctx,
TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char)));
memcpy(typed_binary+1, binaries[0], lengths[0]);
*typed_binary = 1;
- program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->vendor_id, typed_binary, program->binary_sz+1);
+ program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, typed_binary, program->binary_sz+1);
cl_free(typed_binary);
if (UNLIKELY(program->opaque == NULL)) {
err = CL_INVALID_PROGRAM;
@@ -254,7 +258,7 @@ cl_program_create_from_binary(cl_context ctx,
err= CL_INVALID_BINARY;
goto error;
}
- program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->vendor_id, program->binary, program->binary_sz);
+ program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, program->binary, program->binary_sz);
if (UNLIKELY(program->opaque == NULL)) {
err = CL_INVALID_PROGRAM;
@@ -263,7 +267,7 @@ cl_program_create_from_binary(cl_context ctx,
program->source_type = FROM_LLVM;
}
else if (*program->binary == 0) {
- program->opaque = interp_program_new_from_binary(program->ctx->device->vendor_id, program->binary, program->binary_sz);
+ program->opaque = interp_program_new_from_binary(program->ctx->device->device_id, program->binary, program->binary_sz);
if (UNLIKELY(program->opaque == NULL)) {
err = CL_INVALID_PROGRAM;
goto error;
@@ -379,7 +383,12 @@ cl_program_create_from_llvm(cl_context ctx,
INVALID_VALUE_IF (file_name == NULL);
program = cl_program_new(ctx);
- program->opaque = compiler_program_new_from_llvm(ctx->device->vendor_id, file_name, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1);
+ if (UNLIKELY(program == NULL)) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
+
+ program->opaque = compiler_program_new_from_llvm(ctx->device->device_id, file_name, NULL, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1);
if (UNLIKELY(program->opaque == NULL)) {
err = CL_INVALID_PROGRAM;
goto error;
@@ -417,6 +426,11 @@ cl_program_create_from_source(cl_context ctx,
// the real compilation step will be done at build time since we do not have
// yet the compilation options
program = cl_program_new(ctx);
+ if (UNLIKELY(program == NULL)) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
+
TRY_ALLOC (lens, cl_calloc(count, sizeof(int32_t)));
for (i = 0; i < (int) count; ++i) {
size_t len;
@@ -532,7 +546,7 @@ cl_program_build(cl_program p, const char *options)
goto error;
}
- p->opaque = compiler_program_new_from_source(p->ctx->device->vendor_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
+ p->opaque = compiler_program_new_from_source(p->ctx->device->device_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
if (UNLIKELY(p->opaque == NULL)) {
if (p->build_log_sz > 0 && strstr(p->build_log, "error: error reading 'options'"))
err = CL_INVALID_BUILD_OPTIONS;
@@ -560,7 +574,7 @@ cl_program_build(cl_program p, const char *options)
/* Create all the kernels */
TRY (cl_program_load_gen_program, p);
} else if (p->source_type == FROM_BINARY) {
- p->opaque = interp_program_new_from_binary(p->ctx->device->vendor_id, p->binary, p->binary_sz);
+ p->opaque = interp_program_new_from_binary(p->ctx->device->device_id, p->binary, p->binary_sz);
if (UNLIKELY(p->opaque == NULL)) {
err = CL_BUILD_PROGRAM_FAILURE;
goto error;
@@ -604,21 +618,52 @@ cl_program_link(cl_context context,
cl_int err = CL_SUCCESS;
cl_int i = 0;
int copyed = 0;
+ cl_bool ret = 0;
+ int avialable_program = 0;
+
+ //Although we don't use options, but still need check options
+ if(!compiler_program_check_opt(options)) {
+ err = CL_INVALID_LINKER_OPTIONS;
+ goto error;
+ }
+
+ for(i = 0; i < num_input_programs; i++) {
+ //num_input_programs >0 and input_programs MUST not NULL, so compare with input_programs[0] directly.
+ if(input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY ||
+ input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) {
+ avialable_program++;
+ }
+ }
+
+ //None of program contain a compilerd binary or library.
+ if(avialable_program == 0) {
+ goto done;
+ }
+
+ //Must all of program contain a compilerd binary or library.
+ if(avialable_program < num_input_programs) {
+ err = CL_INVALID_OPERATION;
+ goto error;
+ }
+
p = cl_program_new(context);
+ if (UNLIKELY(p == NULL)) {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
if (!check_cl_version_option(p, options)) {
err = CL_BUILD_PROGRAM_FAILURE;
goto error;
}
- p->opaque = compiler_program_new_gen_program(context->device->vendor_id, NULL, NULL);
-
+ p->opaque = compiler_program_new_gen_program(context->device->device_id, NULL, NULL);
for(i = 0; i < num_input_programs; i++) {
// if program create with llvm binary, need deserilize first to get module.
if(input_programs[i])
- compiler_program_link_program(p->opaque, input_programs[i]->opaque,
- p->build_log_max_sz, p->build_log, &p->build_log_sz);
- if (UNLIKELY(p->opaque == NULL)) {
+ ret = compiler_program_link_program(p->opaque, input_programs[i]->opaque,
+ p->build_log_max_sz, p->build_log, &p->build_log_sz);
+ if (UNLIKELY(ret)) {
err = CL_LINK_PROGRAM_FAILURE;
goto error;
}
@@ -650,14 +695,14 @@ cl_program_link(cl_context context,
copyed += sz;
}
done:
- p->is_built = 1;
- p->build_status = CL_BUILD_SUCCESS;
+ if(p) p->is_built = 1;
+ if(p) p->build_status = CL_BUILD_SUCCESS;
if (errcode_ret)
*errcode_ret = err;
return p;
error:
- p->build_status = CL_BUILD_ERROR;
+ if(p) p->build_status = CL_BUILD_ERROR;
if (errcode_ret)
*errcode_ret = err;
return p;
@@ -741,7 +786,7 @@ cl_program_compile(cl_program p,
}
}
- p->opaque = compiler_program_compile_from_source(p->ctx->device->vendor_id, p->source, temp_header_path,
+ p->opaque = compiler_program_compile_from_source(p->ctx->device->device_id, p->source, temp_header_path,
p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
char rm_path[255]="rm ";
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 9c72777..507c910 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -808,10 +808,10 @@ intel_update_device_info(cl_device_id device)
/* Prefer driver-queried max compute units if supported */
if (!drm_intel_get_eu_total(driver->fd, &eu_total))
device->max_compute_unit = eu_total;
- else if (IS_CHERRYVIEW(device->vendor_id))
+ else if (IS_CHERRYVIEW(device->device_id))
printf(CHV_CONFIG_WARNING);
#else
- if (IS_CHERRYVIEW(device->vendor_id))
+ if (IS_CHERRYVIEW(device->device_id))
printf(CHV_CONFIG_WARNING);
#endif
@@ -821,10 +821,10 @@ intel_update_device_info(cl_device_id device)
/* Prefer driver-queried subslice count if supported */
if (!drm_intel_get_subslice_total(driver->fd, &subslice_total))
device->sub_slice_count = subslice_total;
- else if (IS_CHERRYVIEW(device->vendor_id))
+ else if (IS_CHERRYVIEW(device->device_id))
printf(CHV_CONFIG_WARNING);
#else
- if (IS_CHERRYVIEW(device->vendor_id))
+ if (IS_CHERRYVIEW(device->device_id))
printf(CHV_CONFIG_WARNING);
#endif
diff --git a/utests/compiler_degrees.cpp b/utests/compiler_degrees.cpp
index 7a17ca7..2079267 100644
--- a/utests/compiler_degrees.cpp
+++ b/utests/compiler_degrees.cpp
@@ -1,5 +1,7 @@
#include "utest_helper.hpp"
+#define M_180_PI_F 57.295779513082321F
+
void compiler_degrees(void)
{
const int n = 32;
@@ -24,7 +26,7 @@ void compiler_degrees(void)
OCL_MAP_BUFFER(1);
for (int i = 0; i < n; ++i) {
- OCL_ASSERT(((float *)buf_data[1])[i] == src[i] * (180 / 3.141592653589793F));
+ OCL_ASSERT(((float *)buf_data[1])[i] == src[i] * M_180_PI_F);
}
OCL_UNMAP_BUFFER(1);
}
diff --git a/utests/get_arg_info.cpp b/utests/get_arg_info.cpp
index c1ea1ef..effeb64 100644
--- a/utests/get_arg_info.cpp
+++ b/utests/get_arg_info.cpp
@@ -10,7 +10,7 @@ void test_get_arg_info(void)
char name[64];
// Setup kernel and buffers
- OCL_CREATE_KERNEL("test_get_arg_info");
+ OCL_CALL (cl_kernel_init, "test_get_arg_info.cl", "test_get_arg_info", SOURCE, "-cl-kernel-arg-info");
//Arg 0
ret = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_ADDRESS_QUALIFIER,
diff --git a/utests/get_cl_info.cpp b/utests/get_cl_info.cpp
index e2dc0d7..7c03d95 100644
--- a/utests/get_cl_info.cpp
+++ b/utests/get_cl_info.cpp
@@ -364,6 +364,113 @@ void get_program_build_info(void)
MAKE_UTEST_FROM_FUNCTION(get_program_build_info);
+
+// This method uses clGetProgramBuildInfo to check the llvm dump build options sent
+// and verifies that the llvm dump file is actually generated in the backend.
+void get_build_llvm_info(void)
+{
+ map<cl_program_info, void *> maps;
+ cl_build_status expect_status;
+ char llvm_file[] = "test_llvm_dump.txt";
+ char build_opt[] = "-dump-opt-llvm=test_llvm_dump.txt";
+ FILE *fp = NULL;
+ int sz;
+
+ //Remove any pre-existing file
+ if( (fp = fopen(llvm_file, "r")) != NULL) {
+ fclose(fp);
+ std::remove(llvm_file);
+ }
+
+ OCL_CALL (cl_kernel_init, "compiler_if_else.cl", "compiler_if_else", SOURCE, build_opt);
+
+ /* Do our test.*/
+ expect_status = CL_BUILD_SUCCESS;
+ maps.insert(make_pair(CL_PROGRAM_BUILD_STATUS,
+ (void *)(new Info_Result<cl_build_status>(expect_status))));
+ sz = strlen(build_opt) + 1;
+ maps.insert(make_pair(CL_PROGRAM_BUILD_OPTIONS,
+ (void *)(new Info_Result<char *>(build_opt, sz))));
+
+ for (map<cl_program_info, void *>::iterator x = maps.begin(); x != maps.end(); ++x) {
+ switch (x->first) {
+ case CL_PROGRAM_BUILD_STATUS:
+ CALL_PROG_BUILD_INFO_AND_RET(cl_build_status);
+ break;
+ case CL_PROGRAM_BUILD_OPTIONS:
+ CALL_PROG_BUILD_INFO_AND_RET(char *);
+ break;
+ default:
+ break;
+ }
+ }
+
+ //Test is successful if the backend created the file
+ if( (fp = fopen(llvm_file, "r")) == NULL) {
+ std::cout << "LLVM file creation.. FAILED";
+ OCL_ASSERT(0);
+ } else {
+ fclose(fp);
+ std::cout << "LLVM file created.. SUCCESS";
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(get_build_llvm_info);
+
+
+// This method uses clGetProgramBuildInfo to check the asm dump build options sent
+// And verifies that the asm dump file is actually generated in the backend.
+void get_build_asm_info(void)
+{
+ map<cl_program_info, void *> maps;
+ cl_build_status expect_status;
+ char asm_file[] = "test_asm_dump.txt";
+ char build_opt[] ="-dump-opt-asm=test_asm_dump.txt";
+ FILE *fp = NULL;
+ int sz;
+
+ //Remove any pre-existing file
+ if( (fp = fopen(asm_file, "r")) != NULL) {
+ fclose(fp);
+ std::remove(asm_file);
+ }
+
+ OCL_CALL (cl_kernel_init, "compiler_if_else.cl", "compiler_if_else", SOURCE, build_opt);
+
+ /* Do our test.*/
+ expect_status = CL_BUILD_SUCCESS;
+ maps.insert(make_pair(CL_PROGRAM_BUILD_STATUS,
+ (void *)(new Info_Result<cl_build_status>(expect_status))));
+ sz = strlen(build_opt) + 1;
+ maps.insert(make_pair(CL_PROGRAM_BUILD_OPTIONS,
+ (void *)(new Info_Result<char *>(build_opt, sz))));
+
+ for (map<cl_program_info, void *>::iterator x = maps.begin(); x != maps.end(); ++x) {
+ switch (x->first) {
+ case CL_PROGRAM_BUILD_STATUS:
+ CALL_PROG_BUILD_INFO_AND_RET(cl_build_status);
+ break;
+ case CL_PROGRAM_BUILD_OPTIONS:
+ CALL_PROG_BUILD_INFO_AND_RET(char *);
+ break;
+ default:
+ break;
+ }
+ }
+
+ //Test is successful if the backend created the file
+ if( (fp = fopen(asm_file, "r")) == NULL) {
+ std::cout << "ASM file creation.. FAILED";
+ OCL_ASSERT(0);
+ } else {
+ fclose(fp);
+ std::cout << "ASM file created.. SUCCESS";
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(get_build_asm_info);
+
+
/* ***************************************************** *
* clGetContextInfo *
* ***************************************************** */
diff --git a/utests/image_1D_buffer.cpp b/utests/image_1D_buffer.cpp
index d8d761f..e2cfcde 100644
--- a/utests/image_1D_buffer.cpp
+++ b/utests/image_1D_buffer.cpp
@@ -3,78 +3,63 @@
void image_1D_buffer(void)
{
- size_t buffer_sz = 1024;
- char *buf_content = (char *)malloc(buffer_sz * sizeof(char));
+ size_t buffer_sz = 65536;
+ char *buf_content = (char *)malloc(buffer_sz * sizeof(int));
int error;
cl_image_desc image_desc;
cl_image_format image_format;
- cl_sampler sampler;
- cl_mem image1, image2;
cl_mem ret_mem = NULL;
OCL_CREATE_KERNEL("image_1D_buffer");
for (int32_t i = 0; i < (int32_t)buffer_sz; ++i)
- buf_content[i] = (rand() & 127);
+ buf_content[i] = (rand() & 0xFFFFFFFF);
- cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- buffer_sz, buf_content, &error);
- OCL_ASSERT(error == CL_SUCCESS);
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, buffer_sz * sizeof(int), buf_content);
+ OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, buffer_sz * sizeof(int), NULL);
memset(&image_desc, 0x0, sizeof(cl_image_desc));
memset(&image_format, 0x0, sizeof(cl_image_format));
image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
- image_desc.image_row_pitch = buffer_sz;
- image_desc.image_width = buffer_sz / sizeof(uint32_t); //assume rgba32
- image_desc.buffer = buff;
-
- image_format.image_channel_order = CL_RGBA;
- image_format.image_channel_data_type = CL_UNSIGNED_INT8;
-
- image1 = clCreateImage(ctx, CL_MEM_READ_ONLY, &image_format,
- &image_desc, NULL, &error );
- OCL_ASSERT(error == CL_SUCCESS);
-
- error = clGetImageInfo(image1, CL_IMAGE_BUFFER, sizeof(ret_mem), &ret_mem, NULL);
- OCL_ASSERT(error == CL_SUCCESS);
- OCL_ASSERT(ret_mem == buff);
+ image_desc.image_row_pitch = buffer_sz * sizeof(int);
+ image_desc.image_width = buffer_sz; //assume r32
+ image_desc.buffer = buf[0];
+ image_format.image_channel_order = CL_R;
+ image_format.image_channel_data_type = CL_UNSIGNED_INT32;
- memset(&image_desc, 0x0, sizeof(cl_image_desc));
- image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
- image_desc.image_width = buffer_sz / sizeof(uint32_t);
- image2 = clCreateImage(ctx, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
- &image_format, &image_desc, buf_content, &error);
+ // Create the source image1d_buffer.
+ OCL_CREATE_IMAGE(buf[2], CL_MEM_READ_ONLY, &image_format, &image_desc, NULL);
+ error = clGetImageInfo(buf[2], CL_IMAGE_BUFFER, sizeof(ret_mem), &ret_mem, NULL);
OCL_ASSERT(error == CL_SUCCESS);
+ OCL_ASSERT(ret_mem == buf[0]);
- // Create sampler to use
- sampler = clCreateSampler(ctx, false, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+ // Create the destination image1d_buffer.
+ image_desc.buffer = buf[1];
+ OCL_CREATE_IMAGE(buf[3], CL_MEM_READ_ONLY, &image_format, &image_desc, NULL);
+ error = clGetImageInfo(buf[3], CL_IMAGE_BUFFER, sizeof(ret_mem), &ret_mem, NULL);
OCL_ASSERT(error == CL_SUCCESS);
+ OCL_ASSERT(ret_mem == buf[1]);
- cl_mem result_buf = buf[0] = clCreateBuffer(ctx, 0, buffer_sz, NULL, &error);
- OCL_ASSERT(error == CL_SUCCESS);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[2]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[3]);
- OCL_SET_ARG(0, sizeof(cl_mem), &image1);
- OCL_SET_ARG(1, sizeof(cl_mem), &image2);
- OCL_SET_ARG(2, sizeof(sampler), &sampler);
- OCL_SET_ARG(3, sizeof(cl_mem), &result_buf);
-
- globals[0] = buffer_sz/sizeof(int32_t);
+ globals[0] = buffer_sz;
locals[0] = 16;
OCL_NDRANGE(1);
/* Now check the result. */
OCL_MAP_BUFFER(0);
- for (uint32_t i = 0; i < buffer_sz/sizeof(int32_t); i++)
- OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 1);
+ OCL_MAP_BUFFER(1);
+ for (uint32_t i = 0; i < buffer_sz; i++) {
+ if (((uint32_t*)buf_data[1])[i] != ((uint32_t*)buf_data[0])[i])
+ printf("i %d expected %x got %x \n", i, ((uint32_t*)buf_data[0])[i], ((uint32_t*)buf_data[1])[i]);
+ OCL_ASSERT(((uint32_t*)buf_data[1])[i] == ((uint32_t*)buf_data[0])[i]);
+ }
OCL_UNMAP_BUFFER(0);
-
- clReleaseSampler(sampler);
- clReleaseMemObject(image1);
- clReleaseMemObject(image2);
- clReleaseMemObject(buff);
+ OCL_UNMAP_BUFFER(1);
}
MAKE_UTEST_FROM_FUNCTION(image_1D_buffer);
diff --git a/utests/utest_generator.py b/utests/utest_generator.py
index c220575..91cc938 100644
--- a/utests/utest_generator.py
+++ b/utests/utest_generator.py
@@ -110,7 +110,8 @@ def udebug(ulpSize,returnType,function):
float ULPSIZE_NO_FAST_MATH = %s;
ULPSIZE_FACTOR = select_ulpsize(ULPSIZE_FAST_MATH,ULPSIZE_NO_FAST_MATH);
-
+ bool fast_math = ULPSIZE_FACTOR == ULPSIZE_FAST_MATH;
+
if (isinf(cpu_data[index])){
INFORNAN="INF";
}
@@ -137,25 +138,25 @@ def udebug(ulpSize,returnType,function):
}
else if ((ULPSIZE >= 0 && diff <= ULPSIZE) || (ULPSIZE < 0 && diff == 0)){
printf("%s expect:%s\\n", log, ULPSIZE);
- }
+ }
else
printf_c("%s expect:%s\\n", log, ULPSIZE);
#else
if (isinf(cpu_data[index])){
sprintf(log, "%s expect:%s\\n", log, INFORNAN);
- OCL_ASSERTM(isinf(gpu_data[index]),log);
- }
+ OCL_ASSERTM(isinf(gpu_data[index]) || fast_math,log);
+ }
else if (isnan(cpu_data[index])){
sprintf(log, "%s expect:%s\\n", log, INFORNAN);
- OCL_ASSERTM(isnan(gpu_data[index]),log);
- }
+ OCL_ASSERTM(isnan(gpu_data[index]) || fast_math,log);
+ }
else{
sprintf(log, "%s expect:%s\\n", log, ULPSIZE);
if (ULPSIZE < 0)
OCL_ASSERTM(gpu_data[index] == cpu_data[index], log);
else
OCL_ASSERTM(fabs(gpu_data[index]-cpu_data[index]) <= ULPSIZE, log);
- }
+ }
#endif
}
}\n'''%(returnType,Min_ulp(function),\
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-commits
mailing list