[beignet] 01/03: Imported Upstream version 1.1.1

Rebecca Palmer rnpalmer-guest at moszumanska.debian.org
Tue Oct 20 07:17:05 UTC 2015


This is an automated email from the git hooks/post-receive script.

rnpalmer-guest pushed a commit to branch master
in repository beignet.

commit ecc6c75b4884c1dc2a70d088ace8448c4addff3c
Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
Date:   Mon Oct 19 22:03:21 2015 +0100

    Imported Upstream version 1.1.1
---
 CMakeLists.txt                              |   2 +-
 backend/src/backend/gen_context.cpp         |  71 ++++++++-----
 backend/src/backend/gen_context.hpp         |   6 ++
 backend/src/backend/gen_insn_scheduling.cpp |   6 +-
 backend/src/backend/gen_insn_selection.cpp  |  17 +++-
 backend/src/backend/gen_program.cpp         |   9 +-
 backend/src/backend/gen_program.hpp         |   4 +-
 backend/src/backend/gen_register.hpp        |  16 ++-
 backend/src/backend/program.cpp             | 151 +++++++++++++++++++++-------
 backend/src/backend/program.h               |  11 +-
 backend/src/ir/printf.cpp                   |  21 +++-
 backend/src/ir/printf.hpp                   |  17 ++--
 backend/src/ir/structurizer.cpp             |  24 ++++-
 backend/src/ir/structurizer.hpp             |   4 +-
 backend/src/libocl/include/ocl_float.h      |   1 +
 backend/src/libocl/src/ocl_image.cl         |  14 ++-
 backend/src/libocl/tmpl/ocl_common.tmpl.cl  |   4 +-
 backend/src/llvm/llvm_printf_parser.cpp     |   2 +-
 docs/NEWS.mdwn                              |   3 +
 kernels/image_1D_buffer.cl                  |  12 +--
 src/cl_api.c                                |  18 +++-
 src/cl_device_data.h                        |   5 +-
 src/cl_device_id.c                          |  42 ++++----
 src/cl_device_id.h                          |   1 +
 src/cl_enqueue.c                            |   5 +-
 src/cl_event.c                              |  55 ++++++++++
 src/cl_event.h                              |  11 ++
 src/cl_gbe_loader.cpp                       |   5 +
 src/cl_gbe_loader.h                         |   1 +
 src/cl_gt_device.h                          |   3 +-
 src/cl_kernel.c                             |  18 ++--
 src/cl_program.c                            |  75 +++++++++++---
 src/intel/intel_driver.c                    |   8 +-
 utests/compiler_degrees.cpp                 |   4 +-
 utests/get_arg_info.cpp                     |   2 +-
 utests/get_cl_info.cpp                      | 107 ++++++++++++++++++++
 utests/image_1D_buffer.cpp                  |  73 ++++++--------
 utests/utest_generator.py                   |  15 +--
 38 files changed, 629 insertions(+), 214 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9713a32..2ed8429 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,7 +18,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0)
 PROJECT(OCL)
 set (LIBCL_DRIVER_VERSION_MAJOR 1)
 set (LIBCL_DRIVER_VERSION_MINOR 1)
-set (LIBCL_DRIVER_VERSION_PATCH 0)
+set (LIBCL_DRIVER_VERSION_PATCH 1)
 set (LIBCL_C_VERSION_MAJOR 1)
 set (LIBCL_C_VERSION_MINOR 2)
 if( ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index e16b0a9..2983d52 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -49,6 +49,7 @@ namespace gbe
     this->p = NULL;
     this->sel = NULL;
     this->ra = NULL;
+    this->asmFileName = NULL;
     this->ifEndifFix = false;
     this->regSpillTick = 0;
   }
@@ -76,6 +77,10 @@ namespace gbe
     this->regSpillTick = 0;
   }
 
+  void GenContext::setASMFileName(const char* asmFname) {
+    this->asmFileName = asmFname;
+  }
+
   void GenContext::newSelection(void) {
     this->sel = GBE_NEW(Selection, *this);
   }
@@ -2291,35 +2296,17 @@ namespace gbe
     genKernel->insnNum = p->store.size();
     genKernel->insns = GBE_NEW_ARRAY_NO_ARG(GenInstruction, genKernel->insnNum);
     std::memcpy(genKernel->insns, &p->store[0], genKernel->insnNum * sizeof(GenInstruction));
-    if (OCL_OUTPUT_ASM) {
-      std::cout << genKernel->getName() << "'s disassemble begin:" << std::endl;
-      ir::LabelIndex curLabel = (ir::LabelIndex)0;
-      GenCompactInstruction * pCom = NULL;
-      GenInstruction insn[2];
-      std::cout << "  L0:" << std::endl;
-      for (uint32_t insnID = 0; insnID < genKernel->insnNum; ) {
-        if (labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID &&
-            curLabel < this->getFunction().labelNum()) {
-          std::cout << "  L" << curLabel + 1 << ":" << std::endl;
-          curLabel = (ir::LabelIndex)(curLabel + 1);
-          while(labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID) {
-            std::cout << "  L" << curLabel + 1 << ":" << std::endl;
-            curLabel = (ir::LabelIndex)(curLabel + 1);
-          }
-        }
-        std::cout << "    (" << std::setw(8) << insnID << ")  ";
-        pCom = (GenCompactInstruction*)&p->store[insnID];
-        if(pCom->bits1.cmpt_control == 1) {
-          decompactInstruction(pCom, &insn);
-          gen_disasm(stdout, &insn, deviceID, 1);
-          insnID++;
-        } else {
-          gen_disasm(stdout, &p->store[insnID], deviceID, 0);
-          insnID = insnID + 2;
-        }
+    if (OCL_OUTPUT_ASM)
+      outputAssembly(stdout, genKernel);
+
+    if (this->asmFileName) {
+      FILE *asmDumpStream = fopen(this->asmFileName, "a");
+      if (asmDumpStream) {
+        outputAssembly(asmDumpStream, genKernel);
+        fclose(asmDumpStream);
       }
-      std::cout << genKernel->getName() << "'s disassemble end." << std::endl;
     }
+
     return true;
   }
 
@@ -2327,5 +2314,35 @@ namespace gbe
     return GBE_NEW(GenKernel, name, deviceID);
   }
 
+  void GenContext::outputAssembly(FILE *file, GenKernel* genKernel) {
+    fprintf(file, "%s's disassemble begin:\n", genKernel->getName());
+    ir::LabelIndex curLabel = (ir::LabelIndex)0;
+    GenCompactInstruction * pCom = NULL;
+    GenInstruction insn[2];
+    fprintf(file, "  L0:\n");
+    for (uint32_t insnID = 0; insnID < genKernel->insnNum; ) {
+      if (labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID &&
+          curLabel < this->getFunction().labelNum()) {
+        fprintf(file, "  L%i:\n", curLabel + 1);
+        curLabel = (ir::LabelIndex)(curLabel + 1);
+        while(labelPos.find((ir::LabelIndex)(curLabel + 1))->second == insnID) {
+          fprintf(file, "  L%i:\n", curLabel + 1);
+          curLabel = (ir::LabelIndex)(curLabel + 1);
+        }
+      }
+      fprintf(file, "    (%8i)  ", insnID);
+      pCom = (GenCompactInstruction*)&p->store[insnID];
+      if(pCom->bits1.cmpt_control == 1) {
+        decompactInstruction(pCom, &insn);
+        gen_disasm(file, &insn, deviceID, 1);
+        insnID++;
+      } else {
+        gen_disasm(file, &p->store[insnID], deviceID, 0);
+        insnID = insnID + 2;
+      }
+    }
+    fprintf(file, "%s's disassemble end.\n", genKernel->getName());
+  }
+
 } /* namespace gbe */
 
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 69fe513..8ef725f 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -43,6 +43,7 @@ namespace gbe
   class SelectionInstruction; // Pre-RA Gen instruction
   class SelectionReg;         // Pre-RA Gen register
   class GenRegister;
+  class GenKernel;
   typedef enum {
     NO_ERROR,
     REGISTER_ALLOCATION_FAIL,
@@ -68,6 +69,8 @@ namespace gbe
     #define GEN7_SCRATCH_SIZE  (12 * KB)
     /*! Start new code generation with specific parameters */
     void startNewCG(uint32_t simdWidth, uint32_t reservedSpillRegs, bool limitRegisterPressure);
+    /*! Set the file name for the ASM dump */
+    void setASMFileName(const char* asmFname);
     /*! Target device ID*/
     uint32_t deviceID;
     /*! Implements base class */
@@ -217,8 +220,11 @@ namespace gbe
     CompileErrorCode errCode;
     bool ifEndifFix;
     uint32_t regSpillTick;
+    const char* asmFileName;
     /*! Build the curbe patch list for the given kernel */
     void buildPatchList(void);
+    /* Helper for printing the assembly */
+    void outputAssembly(FILE *file, GenKernel* genKernel);
     /*! Calc the group's slm offset from R0.0, to work around HSW SLM bug*/
     virtual void emitSLMOffset(void) { };
     /*! new selection of device */
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index b3b7042..358a2ce 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -583,7 +583,11 @@ namespace gbe
     for (int32_t insnID = 0; insnID < insnNum; ++insnID) {
       ScheduleDAGNode *node = tracker.insnNodes[insnID];
       if (node->insn.isBranch() || node->insn.isLabel()
-          || node->insn.opcode == SEL_OP_EOT || node->insn.opcode == SEL_OP_IF || node->insn.opcode == SEL_OP_WHILE
+          || node->insn.opcode == SEL_OP_EOT
+          || node->insn.opcode == SEL_OP_IF
+          || node->insn.opcode == SEL_OP_ELSE
+          || node->insn.opcode == SEL_OP_ENDIF
+          || node->insn.opcode == SEL_OP_WHILE
           || node->insn.opcode == SEL_OP_READ_ARF
           || node->insn.opcode == SEL_OP_BARRIER)
         tracker.makeBarrier(insnID, insnNum);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b0ba9e3..520aede 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2290,10 +2290,16 @@ namespace gbe
       GenRegister dst = sel.selReg(insn.getDst(0), type);
 
       sel.push();
+      if (sel.isScalarReg(insn.getDst(0))) {
+        sel.curr.execWidth = 1;
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.curr.noMask = 1;
+      }
+
       switch (opcode) {
         case ir::OP_SIMD_SIZE:
           {
-            const GenRegister src = GenRegister::immud(sel.curr.execWidth);
+            const GenRegister src = GenRegister::immud(sel.ctx.getSimdWidth());
             sel.MOV(dst, src);
           }
           break;
@@ -3205,8 +3211,8 @@ namespace gbe
           }
           sel.MOV(dst, imm.getIntegerValue() ? GenRegister::immuw(0xffff) : GenRegister::immuw(0));
         break;
-        case TYPE_U32:
-        case TYPE_S32:
+        case TYPE_U32: sel.MOV(dst, GenRegister::immud(imm.getIntegerValue())); break;
+        case TYPE_S32: sel.MOV(dst, GenRegister::immd(imm.getIntegerValue())); break;
         case TYPE_FLOAT:
           sel.MOV(GenRegister::retype(dst, GEN_TYPE_F),
                   GenRegister::immf(imm.asFloatValue()));
@@ -4998,6 +5004,11 @@ namespace gbe
       }
 
       sel.push();
+      if (sel.isScalarReg(insn.getDst(0))) {
+        sel.curr.execWidth = 1;
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.curr.noMask = 1;
+      }
       if (src1.file == GEN_IMMEDIATE_VALUE)
         sel.SIMD_SHUFFLE(dst, src0, src1);
       else {
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index c761a2f..04da692 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -172,6 +172,7 @@ namespace gbe {
       ctx = GBE_NEW(Gen9Context, unit, name, deviceID, relaxMath);
     }
     GBE_ASSERTM(ctx != NULL, "Fail to create the gen context\n");
+    ctx->setASMFileName(this->asm_file_name);
 
     for (; codeGen < codeGenNum; ++codeGen) {
       const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth;
@@ -352,13 +353,14 @@ namespace gbe {
                                            const char *fileName,
                                            const void* module,
                                            const void* llvm_ctx,
+                                           const char* asm_file_name,
                                            size_t stringSize,
                                            char *err,
                                            size_t *errSize,
                                            int optLevel)
   {
     using namespace gbe;
-    GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx);
+    GenProgram *program = GBE_NEW(GenProgram, deviceID, module, llvm_ctx, asm_file_name);
 #ifdef GBE_COMPILER_AVAILABLE
     std::string error;
     // Try to compile the program
@@ -384,7 +386,7 @@ namespace gbe {
     return (gbe_program) program;
   }
 
-  static void genProgramLinkFromLLVM(gbe_program           dst_program,
+  static bool genProgramLinkFromLLVM(gbe_program           dst_program,
                                      gbe_program           src_program,
                                      size_t                stringSize,
                                      char *                err,
@@ -406,10 +408,12 @@ namespace gbe {
           err[stringSize-1] = '\0';
           *errSize = strlen(err);
         }
+        return true;
       }
     }
     // Everything run fine
 #endif
+    return false;
   }
 
   static void genProgramBuildFromLLVM(gbe_program program,
@@ -442,7 +446,6 @@ namespace gbe {
         std::memcpy(err, error.c_str(), msgSize);
         *errSize = error.size();
       }
-      GBE_DELETE(p);
     }
     releaseLLVMContextLock();
 #endif
diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp
index af1a9fa..75d77ba 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -60,7 +60,8 @@ namespace gbe
   {
   public:
     /*! Create an empty program */
-    GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL) : deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx) {}
+    GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL, const char* asm_fname = NULL) :
+      deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx), asm_file_name(asm_fname) {}
     /*! Current device ID*/
     uint32_t deviceID;
     /*! Destroy the program */
@@ -75,6 +76,7 @@ namespace gbe
     }
     void* module;
     void* llvm_ctx;
+    const char* asm_file_name;
     /*! Use custom allocators */
     GBE_CLASS(GenProgram);
   };
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 4f37e30..a15fd60 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -995,7 +995,13 @@ namespace gbe
     }
 
     static INLINE GenRegister uw1(uint32_t file, uint32_t nr, uint32_t subnr) {
-      return offset(retype(vec1(file, nr, 0), GEN_TYPE_UW), 0, typeSize(GEN_TYPE_UW)*subnr);
+      return GenRegister(file,
+                         nr,
+                         subnr,
+                         GEN_TYPE_UW,
+                         GEN_VERTICAL_STRIDE_0,
+                         GEN_WIDTH_1,
+                         GEN_HORIZONTAL_STRIDE_0);
     }
 
     static INLINE GenRegister ub16(uint32_t file, uint32_t nr, uint32_t subnr) {
@@ -1019,7 +1025,13 @@ namespace gbe
     }
 
     static INLINE GenRegister ub1(uint32_t file, uint32_t nr, uint32_t subnr) {
-      return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UB), subnr);
+      return GenRegister(file,
+                         nr,
+                         subnr,
+                         GEN_TYPE_UB,
+                         GEN_VERTICAL_STRIDE_0,
+                         GEN_WIDTH_1,
+                         GEN_HORIZONTAL_STRIDE_0);
     }
 
     static INLINE GenRegister f1grf(uint32_t nr, uint32_t subnr) {
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index e4cdeaa..0ee76fc 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -518,8 +518,8 @@ namespace gbe {
 #ifdef GBE_COMPILER_AVAILABLE
   BVAR(OCL_OUTPUT_BUILD_LOG, false);
 
-  static bool buildModuleFromSource(const char* input, llvm::Module** out_module, llvm::LLVMContext* llvm_ctx,
-                                    std::vector<std::string>& options, size_t stringSize, char *err,
+  static bool buildModuleFromSource(const char *source, llvm::Module** out_module, llvm::LLVMContext* llvm_ctx,
+                                    std::string dumpLLVMFileName, std::vector<std::string>& options, size_t stringSize, char *err,
                                     size_t *errSize) {
     // Arguments to pass to the clang frontend
     vector<const char *> args;
@@ -551,8 +551,7 @@ namespace gbe {
     args.push_back("-triple");
     args.push_back("spir");
 #endif /* LLVM_VERSION_MINOR <= 2 */
-    args.push_back(input);
-
+    args.push_back("stringInput.cl");
     args.push_back("-ffp-contract=off");
 
     // The compiler invocation needs a DiagnosticsEngine so it can report problems
@@ -574,6 +573,14 @@ namespace gbe {
                                               &args[0],
                                               &args[0] + args.size(),
                                               Diags);
+    llvm::StringRef srcString(source);
+    (*CI).getPreprocessorOpts().addRemappedFile("stringInput.cl",
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 5
+                llvm::MemoryBuffer::getMemBuffer(srcString)
+#else
+                llvm::MemoryBuffer::getMemBuffer(srcString).release()
+#endif
+                );
 
     // Create the compiler instance
     clang::CompilerInstance Clang;
@@ -628,6 +635,34 @@ namespace gbe {
 #endif
 
     *out_module = module;
+
+// Dump the LLVM if requested.
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR < 6)
+    if (!dumpLLVMFileName.empty()) {
+      std::string err;
+      llvm::raw_fd_ostream ostream (dumpLLVMFileName.c_str(),
+                                    err,
+      #if LLVM_VERSION_MINOR == 3
+                                    0
+      #else
+                                    llvm::sys::fs::F_None
+      #endif
+                                    );
+
+      if (err.empty()) {
+        (*out_module)->print(ostream, 0);
+      } //Otherwise, you'll have to make do without the dump.
+    }
+#else
+    if (!dumpLLVMFileName.empty()) {
+      std::error_code err;
+      llvm::raw_fd_ostream ostream (dumpLLVMFileName.c_str(),
+                                    err, llvm::sys::fs::F_None);
+      if (!err) {
+        (*out_module)->print(ostream, 0);
+      } //Otherwise, you'll have to make do without the dump.
+    }
+#endif
     return true;
   }
 
@@ -640,7 +675,8 @@ namespace gbe {
                                      const char *options,
                                      const char *temp_header_path,
                                      std::vector<std::string>& clOpt,
-                                     std::string& clName,
+                                     std::string& dumpLLVMFileName,
+                                     std::string& dumpASMFileName,
                                      int& optLevel,
                                      size_t stringSize,
                                      char *err,
@@ -719,6 +755,16 @@ namespace gbe {
           clOpt.push_back("__FAST_RELAXED_MATH__=1");
         }
 
+        if(str.find("-dump-opt-llvm=") != std::string::npos) {
+          dumpLLVMFileName = str.substr(str.find("=") + 1);
+          continue; // Don't push this str back; ignore it.
+        }
+
+        if(str.find("-dump-opt-asm=") != std::string::npos) {
+          dumpASMFileName = str.substr(str.find("=") + 1);
+          continue; // Don't push this str back; ignore it.
+        }
+
         clOpt.push_back(str);
       }
       free(str);
@@ -741,21 +787,6 @@ namespace gbe {
       }
     }
 
-    char clStr[] = "/tmp/XXXXXX.cl";
-    int clFd = mkstemps(clStr, 3);
-    clName = std::string(clStr);
-
-    FILE *clFile = fdopen(clFd, "w");
-    FATAL_IF(clFile == NULL, "Failed to open temporary file");
-    // XXX enable cl_khr_fp64 may cause some potential bugs.
-    // we may need to revisit here latter when we want to support fp64 completely.
-    // For now, as we don't support fp64 actually, just disable it by default.
-#if 0
-    #define ENABLE_CL_KHR_FP64_STR "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-    if (options && !strstr(const_cast<char *>(options), "-cl-std=CL1.1"))
-      fwrite(ENABLE_CL_KHR_FP64_STR, strlen(ENABLE_CL_KHR_FP64_STR), 1, clFile);
-#endif
-
     if (!findPCH || invalidPCH) {
       clOpt.push_back("-include");
       clOpt.push_back("ocl.h");
@@ -765,9 +796,6 @@ namespace gbe {
       clOpt.push_back(pchFileName);
     }
 
-    // Write the source to the cl file
-    fwrite(source, strlen(source), 1, clFile);
-    fclose(clFile);
     return true;
   }
 
@@ -780,9 +808,11 @@ namespace gbe {
   {
     int optLevel = 1;
     std::vector<std::string> clOpt;
-    std::string clName;
-    if (!processSourceAndOption(source, options, NULL, clOpt, clName,
-                                optLevel, stringSize, err, errSize))
+    std::string dumpLLVMFileName, dumpASMFileName;
+    if (!processSourceAndOption(source, options, NULL, clOpt,
+                                dumpLLVMFileName, dumpASMFileName,
+                                optLevel,
+                                stringSize, err, errSize))
       return NULL;
 
     gbe_program p;
@@ -793,7 +823,7 @@ namespace gbe {
     if (!llvm::llvm_is_multithreaded())
       llvm_mutex.lock();
 
-    if (buildModuleFromSource(clName.c_str(), &out_module, llvm_ctx, clOpt,
+    if (buildModuleFromSource(source, &out_module, llvm_ctx, dumpLLVMFileName, clOpt,
                               stringSize, err, errSize)) {
     // Now build the program from llvm
       size_t clangErrSize = 0;
@@ -804,8 +834,14 @@ namespace gbe {
         clangErrSize = *errSize;
       }
 
-      p = gbe_program_new_from_llvm(deviceID, NULL, out_module, llvm_ctx, stringSize,
-                                    err, errSize, optLevel);
+      if (!dumpASMFileName.empty()) {
+        FILE *asmDumpStream = fopen(dumpASMFileName.c_str(), "w");
+        if (asmDumpStream)
+          fclose(asmDumpStream);
+      }
+      p = gbe_program_new_from_llvm(deviceID, NULL, out_module, llvm_ctx,
+                                    dumpASMFileName.empty() ? NULL : dumpASMFileName.c_str(),
+                                    stringSize, err, errSize, optLevel);
       if (err != NULL)
         *errSize += clangErrSize;
       if (OCL_OUTPUT_BUILD_LOG && options)
@@ -816,7 +852,6 @@ namespace gbe {
     if (!llvm::llvm_is_multithreaded())
       llvm_mutex.unlock();
 
-    remove(clName.c_str());
     return p;
   }
 #endif
@@ -833,8 +868,9 @@ namespace gbe {
   {
     int optLevel = 1;
     std::vector<std::string> clOpt;
-    std::string clName;
-    if (!processSourceAndOption(source, options, temp_header_path, clOpt, clName,
+    std::string dumpLLVMFileName, dumpASMFileName;
+    if (!processSourceAndOption(source, options, temp_header_path, clOpt,
+                                dumpLLVMFileName, dumpASMFileName,
                                 optLevel, stringSize, err, errSize))
       return NULL;
 
@@ -844,7 +880,8 @@ namespace gbe {
     //for some functions, so we use global context now, need switch to new context later.
     llvm::Module * out_module;
     llvm::LLVMContext* llvm_ctx = &llvm::getGlobalContext();
-    if (buildModuleFromSource(clName.c_str(), &out_module, llvm_ctx, clOpt,
+
+    if (buildModuleFromSource(source, &out_module, llvm_ctx, dumpLLVMFileName, clOpt,
                               stringSize, err, errSize)) {
     // Now build the program from llvm
       if (err != NULL) {
@@ -859,30 +896,70 @@ namespace gbe {
         llvm::errs() << options;
     } else
       p = NULL;
-    remove(clName.c_str());
     releaseLLVMContextLock();
     return p;
   }
 #endif
 
 #ifdef GBE_COMPILER_AVAILABLE
-  static void programLinkProgram(gbe_program           dst_program,
+  static bool programLinkProgram(gbe_program           dst_program,
                                  gbe_program           src_program,
                                  size_t                stringSize,
                                  char *                err,
                                  size_t *              errSize)
   {
+    bool ret = 0;
     acquireLLVMContextLock();
 
-    gbe_program_link_from_llvm(dst_program, src_program, stringSize, err, errSize);
+    ret = gbe_program_link_from_llvm(dst_program, src_program, stringSize, err, errSize);
 
     releaseLLVMContextLock();
 
     if (OCL_OUTPUT_BUILD_LOG && err)
       llvm::errs() << err;
+    return ret;
   }
 #endif
 
+#ifdef GBE_COMPILER_AVAILABLE
+    static bool programCheckOption(const char * option)
+    {
+      vector<const char *> args;
+      if (option == NULL) return 1;   //if NULL, return ok
+      std::string s(option);
+      size_t pos = s.find("-create-library");
+      //clang don't accept -create-library and -enable-link-options, erase them
+      if(pos != std::string::npos) {
+        s.erase(pos, strlen("-create-library"));
+      }
+      pos = s.find("-enable-link-options");
+      if(pos != std::string::npos) {
+        s.erase(pos, strlen("-enable-link-options"));
+      }
+      args.push_back(s.c_str());
+
+      // The compiler invocation needs a DiagnosticsEngine so it can report problems
+      std::string ErrorString;
+      llvm::raw_string_ostream ErrorInfo(ErrorString);
+      llvm::IntrusiveRefCntPtr<clang::DiagnosticOptions> DiagOpts = new clang::DiagnosticOptions();
+      DiagOpts->ShowCarets = false;
+      DiagOpts->ShowPresumedLoc = true;
+
+      clang::TextDiagnosticPrinter *DiagClient =
+                               new clang::TextDiagnosticPrinter(ErrorInfo, &*DiagOpts);
+      llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> DiagID(new clang::DiagnosticIDs());
+      clang::DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient);
+
+      // Create the compiler invocation
+      std::unique_ptr<clang::CompilerInvocation> CI(new clang::CompilerInvocation);
+      return clang::CompilerInvocation::CreateFromArgs(*CI,
+                                                       &args[0],
+                                                       &args[0] + args.size(),
+                                                       Diags);
+    }
+#endif
+
+
   static size_t programGetGlobalConstantSize(gbe_program gbeProgram) {
     if (gbeProgram == NULL) return 0;
     const gbe::Program *program = (const gbe::Program*) gbeProgram;
@@ -1127,6 +1204,7 @@ void releaseLLVMContextLock()
 GBE_EXPORT_SYMBOL gbe_program_new_from_source_cb *gbe_program_new_from_source = NULL;
 GBE_EXPORT_SYMBOL gbe_program_compile_from_source_cb *gbe_program_compile_from_source = NULL;
 GBE_EXPORT_SYMBOL gbe_program_link_program_cb *gbe_program_link_program = NULL;
+GBE_EXPORT_SYMBOL gbe_program_check_opt_cb *gbe_program_check_opt = NULL;
 GBE_EXPORT_SYMBOL gbe_program_new_from_binary_cb *gbe_program_new_from_binary = NULL;
 GBE_EXPORT_SYMBOL gbe_program_new_from_llvm_binary_cb *gbe_program_new_from_llvm_binary = NULL;
 GBE_EXPORT_SYMBOL gbe_program_serialize_to_binary_cb *gbe_program_serialize_to_binary = NULL;
@@ -1182,6 +1260,7 @@ namespace gbe
       gbe_program_new_from_source = gbe::programNewFromSource;
       gbe_program_compile_from_source = gbe::programCompileFromSource;
       gbe_program_link_program = gbe::programLinkProgram;
+      gbe_program_check_opt = gbe::programCheckOption;
       gbe_program_get_global_constant_size = gbe::programGetGlobalConstantSize;
       gbe_program_get_global_constant_data = gbe::programGetGlobalConstantData;
       gbe_program_clean_llvm_resource = gbe::programCleanLlvmResource;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 3637ebb..84ce333 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -30,6 +30,7 @@
 
 #include <stdint.h>
 #include <stdlib.h>
+#include <stdbool.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -181,14 +182,19 @@ typedef gbe_program (gbe_program_compile_from_source_cb)(uint32_t deviceID,
                                                          char *err,
                                                          size_t *err_size);
 extern gbe_program_compile_from_source_cb *gbe_program_compile_from_source;
+
 /*! link the programs. */
-typedef void (gbe_program_link_program_cb)(gbe_program           dst_program,
+typedef bool (gbe_program_link_program_cb)(gbe_program           dst_program,
                                            gbe_program           src_program,
                                            size_t                stringSize,
                                            char *                err,
                                            size_t *              errSize);
 extern gbe_program_link_program_cb *gbe_program_link_program;
 
+/*! check link option. */
+typedef bool (gbe_program_check_opt_cb)(const char *option);
+extern gbe_program_check_opt_cb *gbe_program_check_opt;
+
 /*! create s new genprogram for link. */
 typedef gbe_program (gbe_program_new_gen_program_cb)(uint32_t deviceID,
                                                      const void *module,
@@ -212,6 +218,7 @@ typedef gbe_program (gbe_program_new_from_llvm_cb)(uint32_t deviceID,
                                                    const char *fileName,
                                                    const void *module,
                                                    const void *llvm_ctx,
+                                                   const char *asm_file_name,
                                                    size_t string_size,
                                                    char *err,
                                                    size_t *err_size,
@@ -219,7 +226,7 @@ typedef gbe_program (gbe_program_new_from_llvm_cb)(uint32_t deviceID,
 extern gbe_program_new_from_llvm_cb *gbe_program_new_from_llvm;
 
 /*! link the programs from llvm level. */
-typedef void (gbe_program_link_from_llvm_cb)(gbe_program dst_program,
+typedef bool (gbe_program_link_from_llvm_cb)(gbe_program dst_program,
                                              gbe_program src_program,
                                              size_t      stringSize,
                                              char *      err,
diff --git a/backend/src/ir/printf.cpp b/backend/src/ir/printf.cpp
index 3d9b2fd..eb1c199 100644
--- a/backend/src/ir/printf.cpp
+++ b/backend/src/ir/printf.cpp
@@ -31,6 +31,23 @@ namespace gbe
 
     pthread_mutex_t PrintfSet::lock = PTHREAD_MUTEX_INITIALIZER;
 
+    PrintfSlot::~PrintfSlot(void)
+    {
+        if (ptr)
+        {
+          if (type == PRINTF_SLOT_TYPE_STRING) {
+            free(ptr);
+            ptr = NULL;
+          } else if (type == PRINTF_SLOT_TYPE_STATE) {
+            delete state;
+            state = NULL;
+          } else {
+            type = PRINTF_SLOT_TYPE_NONE;
+            ptr = NULL;
+          }
+        }
+    }
+
     uint32_t PrintfSet::append(PrintfFmt* fmt, Unit& unit)
     {
       fmts.push_back(*fmt);
@@ -40,12 +57,12 @@ namespace gbe
         if (f->type == PRINTF_SLOT_TYPE_STRING)
           continue;
 
-        slots.push_back(&(*f));
+        slots.push_back(*f);
       }
 
       /* Update the total size of size. */
       if (slots.size() > 0)
-        sizeOfSize = slots.back()->state->out_buf_sizeof_offset
+        sizeOfSize = slots.back().state->out_buf_sizeof_offset
                      + getPrintfBufferElementSize(slots.size() - 1);
 
       return (uint32_t)fmts.size();
diff --git a/backend/src/ir/printf.hpp b/backend/src/ir/printf.hpp
index cbab759..df58437 100644
--- a/backend/src/ir/printf.hpp
+++ b/backend/src/ir/printf.hpp
@@ -159,10 +159,7 @@ namespace gbe
         ptr = p;
       }
 
-      ~PrintfSlot(void) {
-        if (ptr)
-          free(ptr);
-      }
+      ~PrintfSlot(void);
     };
 
     class Context;
@@ -177,7 +174,7 @@ namespace gbe
         }
 
         for (size_t i = 0; i < other.slots.size(); ++i) {
-          PrintfSlot* s = other.slots[i];
+          PrintfSlot s = other.slots[i];
           slots.push_back(s);
         }
 
@@ -215,15 +212,15 @@ namespace gbe
       uint8_t getIndexBufBTI() const { return btiIndexBuf; }
 
       uint32_t getPrintfBufferElementSize(uint32_t i) {
-        PrintfSlot* slot = slots[i];
+        PrintfSlot& slot = slots[i];
         int vec_num = 1;
-        if (slot->state->vector_n > 0) {
-          vec_num = slot->state->vector_n;
+        if (slot.state->vector_n > 0) {
+          vec_num = slot.state->vector_n;
         }
 
         assert(vec_num > 0 && vec_num <= 16);
 
-        switch (slot->state->conversion_specifier) {
+        switch (slot.state->conversion_specifier) {
           case PRINTF_CONVERSION_I:
           case PRINTF_CONVERSION_D:
           case PRINTF_CONVERSION_O:
@@ -257,7 +254,7 @@ namespace gbe
 
     private:
       vector<PrintfFmt> fmts;
-      vector<PrintfSlot*> slots;
+      vector<PrintfSlot> slots;
       uint32_t sizeOfSize; // Total sizeof size.
       friend struct LockOutput;
       uint8_t btiBuf;
diff --git a/backend/src/ir/structurizer.cpp b/backend/src/ir/structurizer.cpp
index 6c4e455..38d3dd1 100644
--- a/backend/src/ir/structurizer.cpp
+++ b/backend/src/ir/structurizer.cpp
@@ -458,6 +458,17 @@ namespace ir {
     return p_block;
   }
 
+  void CFGStructurizer::collectInsnNum(Block* block, const BasicBlock* bb)
+  {
+    BasicBlock::const_iterator iter = bb->begin();
+    BasicBlock::const_iterator iter_end = bb->end();
+    while(iter != iter_end)
+    {
+      block->insnNum++;
+      iter++;
+    }
+  }
+
   bool CFGStructurizer::checkForBarrier(const BasicBlock* bb)
   {
     BasicBlock::const_iterator iter = bb->begin();
@@ -600,6 +611,7 @@ namespace ir {
     loops = fn->getLoops();
     fn->foreachBlock([&](ir::BasicBlock &bb){
         orderedBlks.push_back(bbmap[&bb]);
+        collectInsnNum(bbmap[&bb], &bb);
         });
   }
 
@@ -721,6 +733,7 @@ namespace ir {
           p->canBeHandled = false;
           break;
         }
+        p->insnNum += (*iter)->insnNum;
         iter++;
       }
       return insertBlock(p);
@@ -770,6 +783,7 @@ namespace ir {
     if(loopSets.size() == 1)
     {
       Block* p = new SelfLoopBlock(*loopSets.begin());
+      p->insnNum = (*loopSets.begin())->insnNum;
       p->canBeHandled = true;
       (*loopSets.begin())->getExit()->isLoopExit = true;
       return insertBlock(p);
@@ -881,7 +895,8 @@ namespace ir {
     if (TrueBB->succ_size() == 1 && FalseBB->succ_size() == 1
         && TrueBB->pred_size() == 1 && FalseBB->pred_size() == 1
         && *TrueBB->succ_begin() == *FalseBB->succ_begin()
-        && !TrueBB->hasBarrier() && !FalseBB->hasBarrier() ) {
+        && !TrueBB->hasBarrier() && !FalseBB->hasBarrier()
+        && TrueBB->insnNum < 1000 && FalseBB->insnNum < 1000) {
       // if-else pattern
       ifSets.insert(block);
       if(block->fallthrough() == TrueBB) {
@@ -895,17 +910,19 @@ namespace ir {
       }else{
         GBE_ASSERT(0);
       }
+      mergedBB->insnNum = block->insnNum + TrueBB->insnNum + FalseBB->insnNum;
 
       if(block->canBeHandled == false || TrueBB->canBeHandled == false || FalseBB->canBeHandled == false)
         block->canBeHandled = false;
 
       insertBlock(mergedBB);
     } else if (TrueBB->succ_size() == 1 && TrueBB->pred_size() == 1 &&
-        *TrueBB->succ_begin() == FalseBB && !TrueBB->hasBarrier() ) {
+        *TrueBB->succ_begin() == FalseBB && !TrueBB->hasBarrier() && TrueBB->insnNum < 1000 ) {
       // if-then pattern, false is empty
       ifSets.insert(block);
       ifSets.insert(TrueBB);
       mergedBB = new IfThenBlock(block, TrueBB);
+      mergedBB->insnNum = block->insnNum + TrueBB->insnNum;
       if(block->fallthrough() == FalseBB)
         block->inversePredicate = false;
 
@@ -914,11 +931,12 @@ namespace ir {
 
       insertBlock(mergedBB);
     } else if (FalseBB->succ_size() == 1 && FalseBB->pred_size() == 1 &&
-        *FalseBB->succ_begin() == TrueBB && !FalseBB->hasBarrier() ) {
+        *FalseBB->succ_begin() == TrueBB && !FalseBB->hasBarrier() && FalseBB->insnNum < 1000 ) {
       // if-then pattern, true is empty
       ifSets.insert(block);
       ifSets.insert(FalseBB);
       mergedBB = new IfThenBlock(block, FalseBB);
+      mergedBB->insnNum = block->insnNum + FalseBB->insnNum;
       if(block->fallthrough() == TrueBB)
         block->inversePredicate = false;
 
diff --git a/backend/src/ir/structurizer.hpp b/backend/src/ir/structurizer.hpp
index 8207644..09b2a7f 100644
--- a/backend/src/ir/structurizer.hpp
+++ b/backend/src/ir/structurizer.hpp
@@ -53,7 +53,7 @@ namespace ir {
   class Block
   {
   public:
-    Block(BlockType type, const BlockList& children): has_barrier(false), mark(false), canBeHandled(true), inversePredicate(true)
+    Block(BlockType type, const BlockList& children): has_barrier(false), mark(false), canBeHandled(true), inversePredicate(true), insnNum(0)
     {
       this->btype = type;
       this->children = children;
@@ -105,6 +105,7 @@ namespace ir {
      * m-->n
      * */
     bool inversePredicate;
+    int insnNum;
   };
 
   /* represents basic block */
@@ -211,6 +212,7 @@ namespace ir {
       Block* mergeLoopBlock(BlockList& loopSets);
       int  ifPatternMatch(Block *block);
       int  patternMatch(Block *block);
+      void collectInsnNum(Block* block, const BasicBlock* bb);
 
     private:
       void handleSelfLoopBlock(Block *loopblock, LabelIndex& whileLabel);
diff --git a/backend/src/libocl/include/ocl_float.h b/backend/src/libocl/include/ocl_float.h
index 916233b..e63eaf9 100644
--- a/backend/src/libocl/include/ocl_float.h
+++ b/backend/src/libocl/include/ocl_float.h
@@ -88,6 +88,7 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){
 #define M_PI_4_F     0.7853981633974483F
 #define M_1_PI_F     0.3183098861837907F
 #define M_2_PI_F     0.6366197723675814F
+#define M_180_PI_F   57.295779513082321F
 #define M_2_SQRTPI_F 1.1283791670955126F
 #define M_SQRT2_F    1.4142135623730951F
 #define M_SQRT1_2_F  0.7071067811865476F
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index a8dbc92..eb1a2ff 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -387,10 +387,22 @@ DECL_IMAGE_TYPE(image2d_array_t, 3)
              cl_image, defaultSampler, convert_float2(effectCoord), 0);       \
   }
 
+#define DECL_WRITE_IMAGE1D_BUFFER(image_type, image_data_type, suffix, coord_type)     \
+  OVERLOADABLE void write_image ##suffix(image_type cl_image,                 \
+                                         coord_type coord,                    \
+                                         image_data_type color)               \
+  {                                                                           \
+    int2 effectCoord;                                                         \
+    effectCoord.s0 = coord %8192;                                             \
+    effectCoord.s1 = coord / 8192;                                            \
+    __gen_ocl_write_image ##suffix(cl_image, effectCoord, color);              \
+  }
+
+
 #define DECL_IMAGE_1DBuffer(int_clamping_fix, image_data_type, suffix)        \
   DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image1d_buffer_t, image_data_type,       \
                                      suffix, int)                             \
-  DECL_WRITE_IMAGE(image1d_buffer_t, image_data_type, suffix, int)
+  DECL_WRITE_IMAGE1D_BUFFER(image1d_buffer_t, image_data_type, suffix, int)
 
 DECL_IMAGE_1DBuffer(GEN_FIX_INT_CLAMPING, int4, i)
 DECL_IMAGE_1DBuffer(GEN_FIX_INT_CLAMPING, uint4, ui)
diff --git a/backend/src/libocl/tmpl/ocl_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
index 76aca2b..b6b09b5 100644
--- a/backend/src/libocl/tmpl/ocl_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_common.tmpl.cl
@@ -44,7 +44,7 @@ OVERLOADABLE float clamp(float v, float l, float u) {
 
 
 OVERLOADABLE float degrees(float radians) {
-  return (180 / M_PI_F) * radians;
+  return M_180_PI_F * radians;
 }
 OVERLOADABLE float radians(float degrees) {
   return (M_PI_F / 180) * degrees;
@@ -96,7 +96,7 @@ OVERLOADABLE half clamp(half v, half l, half u) {
   return max(min(v, u), l);
 }
 OVERLOADABLE half degrees(half radians) {
-  return ((half)(180 / M_PI_F)) * radians;
+  return ((half)(M_180_PI_F)) * radians;
 }
 OVERLOADABLE half radians(half degrees) {
   return ((half)(M_PI_F / 180)) * degrees;
diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
index 2f85443..3d84457 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -291,7 +291,7 @@ again:
 #if 0
     {
       int j = 0;
-      for (auto &s : *printf_fmt) {
+      for (auto &s : printf_fmt->first) {
         j++;
         if (s.type == PRINTF_SLOT_TYPE_STATE) {
           fprintf(stderr, "---- %d ---: state : \n", j);
diff --git a/docs/NEWS.mdwn b/docs/NEWS.mdwn
index 7aa8e94..c9aef28 100644
--- a/docs/NEWS.mdwn
+++ b/docs/NEWS.mdwn
@@ -1,5 +1,8 @@
 # News
 
+## Oct 08, 2015
+[Beignet 1.1.1](https://01.org/beignet/downloads/beignet-1.1.1-2015-10-08) is released. This is a bug-fix release.
+
 ## Jul 21, 2015
 [Beignet 1.1.0](https://01.org/beignet/downloads/beignet-1.1.0-2015-07-31) is released. This is a major release. Please see the release notes for more information.
 
diff --git a/kernels/image_1D_buffer.cl b/kernels/image_1D_buffer.cl
index e8e0a86..2c1da69 100644
--- a/kernels/image_1D_buffer.cl
+++ b/kernels/image_1D_buffer.cl
@@ -1,13 +1,7 @@
-__kernel void image_1D_buffer(image1d_buffer_t image1, image1d_t image2, sampler_t sampler, __global int *results)
+__kernel void image_1D_buffer(image1d_buffer_t image1, image1d_buffer_t image2)
 {
    int x = get_global_id(0);
-   int offset = x;
 
-   int4 col = read_imagei(image1, x);
-   int4 test = (col != read_imagei(image2, sampler, x));
-
-   if (test.x || test.y || test.z || test.w)
-      results[offset] = 0;
-   else
-      results[offset] = 1;
+   uint4 color = read_imageui(image1, x);
+   write_imageui(image2, x, color);
 }
diff --git a/src/cl_api.c b/src/cl_api.c
index 69eb0bc..cef5bbb 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -77,6 +77,7 @@ handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list,
     if (e->type != CL_COMMAND_USER &&
 	    e->queue->props & CL_QUEUE_PROFILING_ENABLE) {
 	cl_event_get_timestamp(e, CL_PROFILING_COMMAND_QUEUED);
+	cl_event_get_queued_cpu_timestamp(e);
     }
 
     if(event != NULL)
@@ -1015,10 +1016,11 @@ clLinkProgram(cl_context            context,
   INVALID_VALUE_IF (pfn_notify  == 0 && user_data   != NULL);
   INVALID_VALUE_IF (num_input_programs == 0 && input_programs != NULL);
   INVALID_VALUE_IF (num_input_programs != 0 && input_programs == NULL);
+  INVALID_VALUE_IF (num_input_programs == 0 && input_programs == NULL);
 
   program = cl_program_link(context, num_input_programs, input_programs, options, &err);
 
-  program->is_built = CL_TRUE;
+  if(program) program->is_built = CL_TRUE;
 
   if (pfn_notify) pfn_notify(program, user_data);
 
@@ -1251,6 +1253,11 @@ cl_int clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_index, cl_kernel_arg_inf
   cl_int err = CL_SUCCESS;
   CHECK_KERNEL(kernel);
 
+  if(kernel->program->build_opts == NULL ||
+        strstr(kernel->program->build_opts,"-cl-kernel-arg-info") == NULL ) {
+    err = CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
+    goto error;
+  }
   if (param_name != CL_KERNEL_ARG_ADDRESS_QUALIFIER
           && param_name != CL_KERNEL_ARG_ACCESS_QUALIFIER
           && param_name != CL_KERNEL_ARG_TYPE_NAME
@@ -1468,6 +1475,7 @@ clGetEventProfilingInfo(cl_event             event,
   cl_ulong ret_val;
 
   CHECK_EVENT(event);
+  cl_event_update_status(event, 0);
 
   if (event->type == CL_COMMAND_USER ||
       !(event->queue->props & CL_QUEUE_PROFILING_ENABLE) ||
@@ -1482,15 +1490,15 @@ clGetEventProfilingInfo(cl_event             event,
   }
 
   if (param_name == CL_PROFILING_COMMAND_QUEUED) {
-    ret_val = event->timestamp[0];
+    ret_val = event->queued_timestamp;
   } else if (param_name == CL_PROFILING_COMMAND_SUBMIT) {
-    ret_val = event->timestamp[1];
+    ret_val= event->queued_timestamp + cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[1]);
   } else if (param_name == CL_PROFILING_COMMAND_START) {
     err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_START);
-    ret_val = event->timestamp[2];
+    ret_val = event->queued_timestamp + cl_event_get_start_timestamp(event);
   } else if (param_name == CL_PROFILING_COMMAND_END) {
     err = cl_event_get_timestamp(event, CL_PROFILING_COMMAND_END);
-    ret_val = event->timestamp[3];
+    ret_val =  event->queued_timestamp + cl_event_get_end_timestamp(event);
   } else {
     err = CL_INVALID_VALUE;
     goto error;
diff --git a/src/cl_device_data.h b/src/cl_device_data.h
index b7b64c0..3552a16 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -21,6 +21,7 @@
 #define __CL_DEVICE_DATA_H__
 
 #define INVALID_CHIP_ID -1 //returned by intel_get_device_id if no device found
+#define INTEL_VENDOR_ID                 0x8086   // Vendor ID for Intel
 
 #define PCI_CHIP_GM45_GM                0x2A42
 #define PCI_CHIP_IGD_E_G                0x2E02
@@ -201,7 +202,8 @@
 #define PCI_CHIP_BROADWLL_W_GT2       0x161D /* Intel(R) Broadwell Workstation - GT2 */
 #define PCI_CHIP_BROADWLL_U_GT2       0x161E /* Intel(R) Broadwell ULX - GT2 */
 #define PCI_CHIP_BROADWLL_M_GT3       0x1622 /* Intel(R) Broadwell Mobile - Halo (EDRAM) - GT3 */
-#define PCI_CHIP_BROADWLL_D_GT3       0x1626 /* Intel(R) Broadwell U-Processor - GT3 */
+#define PCI_CHIP_BROADWLL_D_GT3       0x1626 /* Intel(R) Broadwell U-Processor HD 6000 - GT3 */
+#define PCI_CHIP_BROADWLL_UI_GT3      0x162B /* Intel(R) Broadwell U-Process Iris 6100 - GT3 */
 #define PCI_CHIP_BROADWLL_S_GT3       0x162A /* Intel(R) Broadwell Server - GT3 */
 #define PCI_CHIP_BROADWLL_W_GT3       0x162D /* Intel(R) Broadwell Workstation - GT3 */
 #define PCI_CHIP_BROADWLL_U_GT3       0x162E /* Intel(R) Broadwell ULX - GT3 */
@@ -225,6 +227,7 @@
    devid == PCI_CHIP_BROADWLL_D_GT3 || \
    devid == PCI_CHIP_BROADWLL_S_GT3 || \
    devid == PCI_CHIP_BROADWLL_W_GT3 || \
+   devid == PCI_CHIP_BROADWLL_UI_GT3 || \
    devid == PCI_CHIP_BROADWLL_U_GT3)
 
 #define IS_BROADWELL(devid) (IS_BRW_GT1(devid) || IS_BRW_GT2(devid) || IS_BRW_GT3(devid))
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index e9e2c16..1778292 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -350,7 +350,7 @@ cl_get_gt_device(void)
     case PCI_CHIP_HASWELL_CRW_E3:
       DECL_INFO_STRING(has_break, intel_hsw_gt3_device, name, "Intel(R) HD Graphics Haswell CRW GT3 reserved");
 has_break:
-      device->vendor_id = device_id;
+      device->device_id = device_id;
       device->platform = cl_get_platform_default();
       ret = device;
       cl_intel_platform_get_default_extension(ret);
@@ -363,7 +363,7 @@ has_break:
     case PCI_CHIP_IVYBRIDGE_S_GT1:
       DECL_INFO_STRING(ivb_gt1_break, intel_ivb_gt1_device, name, "Intel(R) HD Graphics IvyBridge S GT1");
 ivb_gt1_break:
-      intel_ivb_gt1_device.vendor_id = device_id;
+      intel_ivb_gt1_device.device_id = device_id;
       intel_ivb_gt1_device.platform = cl_get_platform_default();
       ret = &intel_ivb_gt1_device;
       cl_intel_platform_get_default_extension(ret);
@@ -376,7 +376,7 @@ ivb_gt1_break:
     case PCI_CHIP_IVYBRIDGE_S_GT2:
       DECL_INFO_STRING(ivb_gt2_break, intel_ivb_gt2_device, name, "Intel(R) HD Graphics IvyBridge S GT2");
 ivb_gt2_break:
-      intel_ivb_gt2_device.vendor_id = device_id;
+      intel_ivb_gt2_device.device_id = device_id;
       intel_ivb_gt2_device.platform = cl_get_platform_default();
       ret = &intel_ivb_gt2_device;
       cl_intel_platform_get_default_extension(ret);
@@ -385,7 +385,7 @@ ivb_gt2_break:
     case PCI_CHIP_BAYTRAIL_T:
       DECL_INFO_STRING(baytrail_t_device_break, intel_baytrail_t_device, name, "Intel(R) HD Graphics Bay Trail-T");
 baytrail_t_device_break:
-      intel_baytrail_t_device.vendor_id = device_id;
+      intel_baytrail_t_device.device_id = device_id;
       intel_baytrail_t_device.platform = cl_get_platform_default();
       ret = &intel_baytrail_t_device;
       cl_intel_platform_get_default_extension(ret);
@@ -403,41 +403,43 @@ baytrail_t_device_break:
       DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell ULX GT1");
 brw_gt1_break:
       /* For Gen8 and later, half float is suppported and we will enable cl_khr_fp16. */
-      intel_brw_gt1_device.vendor_id = device_id;
+      intel_brw_gt1_device.device_id = device_id;
       intel_brw_gt1_device.platform = cl_get_platform_default();
       ret = &intel_brw_gt1_device;
       cl_intel_platform_enable_fp16_extension(ret);
       break;
 
     case PCI_CHIP_BROADWLL_M_GT2:
-      DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell Mobile GT2");
+      DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5600 BroadWell Mobile GT2");
     case PCI_CHIP_BROADWLL_D_GT2:
-      DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell U-Processor GT2");
+      DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2");
     case PCI_CHIP_BROADWLL_S_GT2:
       DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell Server GT2");
     case PCI_CHIP_BROADWLL_W_GT2:
       DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell Workstation GT2");
     case PCI_CHIP_BROADWLL_U_GT2:
-      DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell ULX GT2");
+      DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics 5300 BroadWell ULX GT2");
 brw_gt2_break:
-      intel_brw_gt2_device.vendor_id = device_id;
+      intel_brw_gt2_device.device_id = device_id;
       intel_brw_gt2_device.platform = cl_get_platform_default();
       ret = &intel_brw_gt2_device;
       cl_intel_platform_enable_fp16_extension(ret);
       break;
 
     case PCI_CHIP_BROADWLL_M_GT3:
-      DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell Mobile GT3");
+      DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Pro Graphics 6200 BroadWell Mobile GT3");
     case PCI_CHIP_BROADWLL_D_GT3:
-      DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell U-Processor GT3");
+      DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics 6000 BroadWell U-Processor GT3");
+    case PCI_CHIP_BROADWLL_UI_GT3:
+      DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Graphics 6100 BroadWell U-Processor GT3");
     case PCI_CHIP_BROADWLL_S_GT3:
-      DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell Server GT3");
+      DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) Iris Pro Graphics P6300 BroadWell Server GT3");
     case PCI_CHIP_BROADWLL_W_GT3:
       DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell Workstation GT3");
     case PCI_CHIP_BROADWLL_U_GT3:
       DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell ULX GT3");
 brw_gt3_break:
-      intel_brw_gt3_device.vendor_id = device_id;
+      intel_brw_gt3_device.device_id = device_id;
       intel_brw_gt3_device.platform = cl_get_platform_default();
       ret = &intel_brw_gt3_device;
       cl_intel_platform_enable_fp16_extension(ret);
@@ -449,7 +451,7 @@ brw_gt3_break:
     case PCI_CHIP_CHV_3:
       DECL_INFO_STRING(chv_break, intel_chv_device, name, "Intel(R) HD Graphics Cherryview");
 chv_break:
-      intel_chv_device.vendor_id = device_id;
+      intel_chv_device.device_id = device_id;
       intel_chv_device.platform = cl_get_platform_default();
       ret = &intel_chv_device;
       cl_intel_platform_enable_fp16_extension(ret);
@@ -467,7 +469,7 @@ chv_break:
     case PCI_CHIP_SKYLAKE_SRV_GT1:
       DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1");
 skl_gt1_break:
-      intel_skl_gt1_device.vendor_id = device_id;
+      intel_skl_gt1_device.device_id = device_id;
       intel_skl_gt1_device.platform = cl_get_platform_default();
       ret = &intel_skl_gt1_device;
       cl_intel_platform_enable_fp16_extension(ret);
@@ -486,7 +488,7 @@ skl_gt1_break:
     case PCI_CHIP_SKYLAKE_SRV_GT2:
       DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2");
 skl_gt2_break:
-      intel_skl_gt2_device.vendor_id = device_id;
+      intel_skl_gt2_device.device_id = device_id;
       intel_skl_gt2_device.platform = cl_get_platform_default();
       ret = &intel_skl_gt2_device;
       cl_intel_platform_enable_fp16_extension(ret);
@@ -499,7 +501,7 @@ skl_gt2_break:
     case PCI_CHIP_SKYLAKE_SRV_GT3:
       DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3");
 skl_gt3_break:
-      intel_skl_gt3_device.vendor_id = device_id;
+      intel_skl_gt3_device.device_id = device_id;
       intel_skl_gt3_device.platform = cl_get_platform_default();
       ret = &intel_skl_gt3_device;
       cl_intel_platform_enable_fp16_extension(ret);
@@ -510,7 +512,7 @@ skl_gt3_break:
     case PCI_CHIP_SKYLAKE_SRV_GT4:
       DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4");
 skl_gt4_break:
-      intel_skl_gt4_device.vendor_id = device_id;
+      intel_skl_gt4_device.device_id = device_id;
       intel_skl_gt4_device.platform = cl_get_platform_default();
       ret = &intel_skl_gt4_device;
       cl_intel_platform_enable_fp16_extension(ret);
@@ -901,9 +903,9 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
 {
   size_t work_group_size, thread_cnt;
   int simd_width = interp_kernel_get_simd_width(kernel->opaque);
-  int vendor_id = kernel->program->ctx->device->vendor_id;
+  int device_id = kernel->program->ctx->device->device_id;
   if (!interp_kernel_use_slm(kernel->opaque)) {
-    if (!IS_BAYTRAIL_T(vendor_id) || simd_width == 16)
+    if (!IS_BAYTRAIL_T(device_id) || simd_width == 16)
       work_group_size = simd_width * 64;
     else
       work_group_size = kernel->program->ctx->device->max_compute_unit *
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 6daa31c..b5db91c 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -24,6 +24,7 @@
 struct _cl_device_id {
   DEFINE_ICD(dispatch)
   cl_device_type device_type;
+  cl_uint  device_id;
   cl_uint  vendor_id;
   cl_uint  max_compute_unit;               // maximum EU number
   cl_uint  max_thread_per_unit;            // maximum EU threads per EU.
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index e858d5e..9e34bb8 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -36,7 +36,10 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data)
   assert(mem->type == CL_MEM_BUFFER_TYPE ||
          mem->type == CL_MEM_SUBBUFFER_TYPE);
   struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
-  if (!mem->is_userptr) {
+  //cl_buffer_get_subdata sometime is very very very slow in linux kernel, in skl and chv,
+  //and it is randomly. So temporary disable it, use map/copy/unmap to read.
+  //Should re-enable it after find root cause.
+  if (0 && !mem->is_userptr) {
     if (cl_buffer_get_subdata(mem->bo, data->offset + buffer->sub_offset,
 			       data->size, data->ptr) != 0)
       err = CL_MAP_FAILURE;
diff --git a/src/cl_event.c b/src/cl_event.c
index bbc1776..bf44197 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -613,6 +613,61 @@ cl_int cl_event_barrier_with_wait_list(cl_command_queue queue,
   return CL_SUCCESS;
 }
 
+cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time)
+{
+  struct timespec ts;
+
+ if(clock_gettime(CLOCK_MONOTONIC_RAW,&ts) != 0){
+  printf("CPU Timmer error\n");
+  return CL_FALSE;
+  }
+  *cpu_time = (1000000000.0) * (cl_ulong) ts.tv_sec + (cl_ulong) ts.tv_nsec;
+
+  return CL_SUCCESS;
+}
+
+cl_int cl_event_get_queued_cpu_timestamp(cl_event event)
+{
+  cl_int ret_val;
+
+  ret_val = cl_event_get_cpu_timestamp(&event->queued_timestamp);
+
+  return ret_val;
+}
+
+cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp)
+{
+  cl_ulong ret_val;
+
+  if(end_timestamp > start_timestamp){
+   ret_val = end_timestamp - start_timestamp;
+   }
+  else {
+   /*if start time stamp is greater than end timstamp then set ret value to max*/
+   ret_val = ((cl_ulong) 1 << 32);
+  }
+
+  return ret_val;
+}
+
+cl_ulong cl_event_get_start_timestamp(cl_event event)
+{
+  cl_ulong ret_val;
+
+   ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[2]);
+
+  return ret_val;
+}
+
+cl_ulong cl_event_get_end_timestamp(cl_event event)
+{
+ cl_ulong ret_val;
+
+  ret_val = cl_event_get_timestamp_delta(event->timestamp[0],event->timestamp[3]);
+
+  return ret_val;
+}
+
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name)
 {
   cl_ulong ret_val = 0;
diff --git a/src/cl_event.h b/src/cl_event.h
index e3cd2b2..f7bf09f 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -70,6 +70,7 @@ struct _cl_event {
   enqueue_callback*  waits_head;  /* The head of enqueues list wait on this event */
   cl_bool            emplict;     /* Identify this event whether created by api emplict*/
   cl_ulong           timestamp[4];/* The time stamps for profiling. */
+  cl_ulong	     queued_timestamp;
 };
 
 /* Create a new event object */
@@ -96,6 +97,16 @@ void cl_event_update_status(cl_event, cl_int);
 cl_int cl_event_marker_with_wait_list(cl_command_queue, cl_uint, const cl_event *,  cl_event*);
 /* Create the barrier event */
 cl_int cl_event_barrier_with_wait_list(cl_command_queue, cl_uint, const cl_event *,  cl_event*);
+/* Get the cpu time */
+cl_ulong cl_event_get_cpu_timestamp(cl_ulong *cpu_time);
+/*Get the cpu time for queued*/
+cl_int cl_event_get_queued_cpu_timestamp(cl_event event);
+/*get timestamp delate between end and start*/
+cl_ulong cl_event_get_timestamp_delta(cl_ulong start_timestamp,cl_ulong end_timestamp);
+/*Get start time stamp*/
+cl_ulong cl_event_get_start_timestamp(cl_event event);
+/*Get end time stamp*/
+cl_ulong cl_event_get_end_timestamp(cl_event event);
 /* Do the event profiling */
 cl_int cl_event_get_timestamp(cl_event event, cl_profiling_info param_name);
 /* insert the user event */
diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp
index c3454e8..e832a53 100644
--- a/src/cl_gbe_loader.cpp
+++ b/src/cl_gbe_loader.cpp
@@ -27,6 +27,7 @@ gbe_program_new_from_source_cb *compiler_program_new_from_source = NULL;
 gbe_program_compile_from_source_cb *compiler_program_compile_from_source = NULL;
 gbe_program_new_gen_program_cb *compiler_program_new_gen_program = NULL;
 gbe_program_link_program_cb *compiler_program_link_program = NULL;
+gbe_program_check_opt_cb *compiler_program_check_opt = NULL;
 gbe_program_build_from_llvm_cb *compiler_program_build_from_llvm = NULL;
 gbe_program_new_from_llvm_binary_cb *compiler_program_new_from_llvm_binary = NULL;
 gbe_program_serialize_to_binary_cb *compiler_program_serialize_to_binary = NULL;
@@ -279,6 +280,10 @@ struct GbeLoaderInitializer
       if (compiler_program_link_program == NULL)
         return;
 
+      compiler_program_check_opt = *(gbe_program_check_opt_cb **)dlsym(dlhCompiler, "gbe_program_check_opt");
+      if (compiler_program_check_opt == NULL)
+        return;
+
       compiler_program_build_from_llvm = *(gbe_program_build_from_llvm_cb **)dlsym(dlhCompiler, "gbe_program_build_from_llvm");
       if (compiler_program_build_from_llvm == NULL)
         return;
diff --git a/src/cl_gbe_loader.h b/src/cl_gbe_loader.h
index 6fa4c98..de91c85 100644
--- a/src/cl_gbe_loader.h
+++ b/src/cl_gbe_loader.h
@@ -28,6 +28,7 @@ extern gbe_program_new_from_source_cb *compiler_program_new_from_source;
 extern gbe_program_compile_from_source_cb *compiler_program_compile_from_source;
 extern gbe_program_new_gen_program_cb *compiler_program_new_gen_program;
 extern gbe_program_link_program_cb *compiler_program_link_program;
+extern gbe_program_check_opt_cb *compiler_program_check_opt;
 extern gbe_program_build_from_llvm_cb *compiler_program_build_from_llvm;
 extern gbe_program_new_from_llvm_binary_cb *compiler_program_new_from_llvm_binary;
 extern gbe_program_serialize_to_binary_cb *compiler_program_serialize_to_binary;
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 4b43c20..bd87cc4 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -19,7 +19,8 @@
 
 /* Common fields for both all GT devices (IVB / SNB) */
 .device_type = CL_DEVICE_TYPE_GPU,
-.vendor_id = 0, /* == device_id (set when requested) */
+.device_id=0,/* == device_id (set when requested) */
+.vendor_id = INTEL_VENDOR_ID,
 .max_work_item_dimensions = 3,
 .max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1},
 .max_2d_global_work_sizes = {8192, 8192, 1},
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 28d88b6..286e57c 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -227,11 +227,11 @@ cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_
 
   switch (param_name) {
   case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
-    if (param_value_size < sizeof(cl_kernel_arg_address_qualifier))
-      return CL_INVALID_VALUE;
     if (param_value_size_ret)
       *param_value_size_ret = sizeof(cl_kernel_arg_address_qualifier);
     if (!param_value) return CL_SUCCESS;
+    if (param_value_size < sizeof(cl_kernel_arg_address_qualifier))
+      return CL_INVALID_VALUE;
     if ((cl_ulong)ret_info == 0) {
       *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_PRIVATE;
     } else if ((cl_ulong)ret_info == 1 || (cl_ulong)ret_info == 4) {
@@ -243,16 +243,16 @@ cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_
     } else {
       /* If no address qualifier is specified, the default address qualifier
          which is CL_KERNEL_ARG_ADDRESS_PRIVATE is returned. */
-      *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_LOCAL;
+      *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_PRIVATE;
     }
     return CL_SUCCESS;
 
   case CL_KERNEL_ARG_ACCESS_QUALIFIER:
-    if (param_value_size < sizeof(cl_kernel_arg_access_qualifier))
-      return CL_INVALID_VALUE;
     if (param_value_size_ret)
       *param_value_size_ret = sizeof(cl_kernel_arg_access_qualifier);
     if (!param_value) return CL_SUCCESS;
+    if (param_value_size < sizeof(cl_kernel_arg_access_qualifier))
+      return CL_INVALID_VALUE;
     if (!strcmp((char*)ret_info, "write_only")) {
       *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
     } else if (!strcmp((char*)ret_info, "read_only")) {
@@ -267,21 +267,21 @@ cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_
   case CL_KERNEL_ARG_TYPE_NAME:
   case CL_KERNEL_ARG_NAME:
     str_len = strlen(ret_info);
-    if (param_value_size < str_len + 1)
-      return CL_INVALID_VALUE;
     if (param_value_size_ret)
       *param_value_size_ret = str_len + 1;
     if (!param_value) return CL_SUCCESS;
+    if (param_value_size < str_len + 1)
+      return CL_INVALID_VALUE;
     memcpy(param_value, ret_info, str_len);
     ((char *)param_value)[str_len] = 0;
     return CL_SUCCESS;
 
   case CL_KERNEL_ARG_TYPE_QUALIFIER:
-    if (param_value_size < sizeof(cl_kernel_arg_type_qualifier))
-      return CL_INVALID_VALUE;
     if (param_value_size_ret)
       *param_value_size_ret = sizeof(cl_kernel_arg_type_qualifier);
     if (!param_value) return CL_SUCCESS;
+    if (param_value_size < sizeof(cl_kernel_arg_type_qualifier))
+      return CL_INVALID_VALUE;
     if (strstr((char*)ret_info, "const") &&
          (arg_type == GBE_ARG_GLOBAL_PTR   ||
           arg_type == GBE_ARG_CONSTANT_PTR ||
diff --git a/src/cl_program.c b/src/cl_program.c
index db53757..82dd3e3 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -224,6 +224,10 @@ cl_program_create_from_binary(cl_context             ctx,
   }
 
   program = cl_program_new(ctx);
+  if (UNLIKELY(program == NULL)) {
+      err = CL_OUT_OF_HOST_MEMORY;
+      goto error;
+  }
 
   // TODO:  Need to check the binary format here to return CL_INVALID_BINARY.
   TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char)));
@@ -237,7 +241,7 @@ cl_program_create_from_binary(cl_context             ctx,
     TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char)));
     memcpy(typed_binary+1, binaries[0], lengths[0]);
     *typed_binary = 1;
-    program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->vendor_id, typed_binary, program->binary_sz+1);
+    program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, typed_binary, program->binary_sz+1);
     cl_free(typed_binary);
     if (UNLIKELY(program->opaque == NULL)) {
       err = CL_INVALID_PROGRAM;
@@ -254,7 +258,7 @@ cl_program_create_from_binary(cl_context             ctx,
       err= CL_INVALID_BINARY;
       goto error;
     }
-    program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->vendor_id, program->binary, program->binary_sz);
+    program->opaque = compiler_program_new_from_llvm_binary(program->ctx->device->device_id, program->binary, program->binary_sz);
 
     if (UNLIKELY(program->opaque == NULL)) {
       err = CL_INVALID_PROGRAM;
@@ -263,7 +267,7 @@ cl_program_create_from_binary(cl_context             ctx,
     program->source_type = FROM_LLVM;
   }
   else if (*program->binary == 0) {
-    program->opaque = interp_program_new_from_binary(program->ctx->device->vendor_id, program->binary, program->binary_sz);
+    program->opaque = interp_program_new_from_binary(program->ctx->device->device_id, program->binary, program->binary_sz);
     if (UNLIKELY(program->opaque == NULL)) {
       err = CL_INVALID_PROGRAM;
       goto error;
@@ -379,7 +383,12 @@ cl_program_create_from_llvm(cl_context ctx,
   INVALID_VALUE_IF (file_name == NULL);
 
   program = cl_program_new(ctx);
-  program->opaque = compiler_program_new_from_llvm(ctx->device->vendor_id, file_name, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1);
+  if (UNLIKELY(program == NULL)) {
+      err = CL_OUT_OF_HOST_MEMORY;
+      goto error;
+  }
+
+  program->opaque = compiler_program_new_from_llvm(ctx->device->device_id, file_name, NULL, NULL, NULL, program->build_log_max_sz, program->build_log, &program->build_log_sz, 1);
   if (UNLIKELY(program->opaque == NULL)) {
     err = CL_INVALID_PROGRAM;
     goto error;
@@ -417,6 +426,11 @@ cl_program_create_from_source(cl_context ctx,
   // the real compilation step will be done at build time since we do not have
   // yet the compilation options
   program = cl_program_new(ctx);
+  if (UNLIKELY(program == NULL)) {
+      err = CL_OUT_OF_HOST_MEMORY;
+      goto error;
+  }
+
   TRY_ALLOC (lens, cl_calloc(count, sizeof(int32_t)));
   for (i = 0; i < (int) count; ++i) {
     size_t len;
@@ -532,7 +546,7 @@ cl_program_build(cl_program p, const char *options)
       goto error;
     }
 
-    p->opaque = compiler_program_new_from_source(p->ctx->device->vendor_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
+    p->opaque = compiler_program_new_from_source(p->ctx->device->device_id, p->source, p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
     if (UNLIKELY(p->opaque == NULL)) {
       if (p->build_log_sz > 0 && strstr(p->build_log, "error: error reading 'options'"))
         err = CL_INVALID_BUILD_OPTIONS;
@@ -560,7 +574,7 @@ cl_program_build(cl_program p, const char *options)
     /* Create all the kernels */
     TRY (cl_program_load_gen_program, p);
   } else if (p->source_type == FROM_BINARY) {
-    p->opaque = interp_program_new_from_binary(p->ctx->device->vendor_id, p->binary, p->binary_sz);
+    p->opaque = interp_program_new_from_binary(p->ctx->device->device_id, p->binary, p->binary_sz);
     if (UNLIKELY(p->opaque == NULL)) {
       err = CL_BUILD_PROGRAM_FAILURE;
       goto error;
@@ -604,21 +618,52 @@ cl_program_link(cl_context            context,
   cl_int err = CL_SUCCESS;
   cl_int i = 0;
   int copyed = 0;
+  cl_bool ret = 0;
+  int avialable_program = 0;
+
+  //Although we don't use options, but still need check options
+  if(!compiler_program_check_opt(options)) {
+    err = CL_INVALID_LINKER_OPTIONS;
+    goto error;
+  }
+
+  for(i = 0; i < num_input_programs; i++) {
+    //num_input_programs >0 and input_programs MUST not NULL, so compare with input_programs[0] directly.
+    if(input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY ||
+       input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) {
+      avialable_program++;
+    }
+  }
+
+  //None of program contain a compilerd binary or library.
+  if(avialable_program == 0) {
+    goto done;
+  }
+
+  //Must all of program contain a compilerd binary or library.
+  if(avialable_program < num_input_programs) {
+    err = CL_INVALID_OPERATION;
+    goto error;
+  }
+
   p = cl_program_new(context);
+  if (UNLIKELY(p == NULL)) {
+      err = CL_OUT_OF_HOST_MEMORY;
+      goto error;
+  }
 
   if (!check_cl_version_option(p, options)) {
     err = CL_BUILD_PROGRAM_FAILURE;
     goto error;
   }
 
-  p->opaque = compiler_program_new_gen_program(context->device->vendor_id, NULL, NULL);
-
+  p->opaque = compiler_program_new_gen_program(context->device->device_id, NULL, NULL);
   for(i = 0; i < num_input_programs; i++) {
     // if program create with llvm binary, need deserilize first to get module.
     if(input_programs[i])
-      compiler_program_link_program(p->opaque, input_programs[i]->opaque,
-        p->build_log_max_sz, p->build_log, &p->build_log_sz);
-    if (UNLIKELY(p->opaque == NULL)) {
+      ret = compiler_program_link_program(p->opaque, input_programs[i]->opaque,
+                                          p->build_log_max_sz, p->build_log, &p->build_log_sz);
+    if (UNLIKELY(ret)) {
       err = CL_LINK_PROGRAM_FAILURE;
       goto error;
     }
@@ -650,14 +695,14 @@ cl_program_link(cl_context            context,
     copyed += sz;
   }
 done:
-  p->is_built = 1;
-  p->build_status = CL_BUILD_SUCCESS;
+  if(p) p->is_built = 1;
+  if(p) p->build_status = CL_BUILD_SUCCESS;
   if (errcode_ret)
     *errcode_ret = err;
   return p;
 
 error:
-  p->build_status = CL_BUILD_ERROR;
+  if(p) p->build_status = CL_BUILD_ERROR;
   if (errcode_ret)
     *errcode_ret = err;
   return p;
@@ -741,7 +786,7 @@ cl_program_compile(cl_program            p,
       }
     }
 
-    p->opaque = compiler_program_compile_from_source(p->ctx->device->vendor_id, p->source, temp_header_path,
+    p->opaque = compiler_program_compile_from_source(p->ctx->device->device_id, p->source, temp_header_path,
         p->build_log_max_sz, options, p->build_log, &p->build_log_sz);
 
     char rm_path[255]="rm ";
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 9c72777..507c910 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -808,10 +808,10 @@ intel_update_device_info(cl_device_id device)
   /* Prefer driver-queried max compute units if supported */
   if (!drm_intel_get_eu_total(driver->fd, &eu_total))
     device->max_compute_unit = eu_total;
-  else if (IS_CHERRYVIEW(device->vendor_id))
+  else if (IS_CHERRYVIEW(device->device_id))
     printf(CHV_CONFIG_WARNING);
 #else
-  if (IS_CHERRYVIEW(device->vendor_id))
+  if (IS_CHERRYVIEW(device->device_id))
     printf(CHV_CONFIG_WARNING);
 #endif
 
@@ -821,10 +821,10 @@ intel_update_device_info(cl_device_id device)
   /* Prefer driver-queried subslice count if supported */
   if (!drm_intel_get_subslice_total(driver->fd, &subslice_total))
     device->sub_slice_count = subslice_total;
-  else if (IS_CHERRYVIEW(device->vendor_id))
+  else if (IS_CHERRYVIEW(device->device_id))
     printf(CHV_CONFIG_WARNING);
 #else
-  if (IS_CHERRYVIEW(device->vendor_id))
+  if (IS_CHERRYVIEW(device->device_id))
     printf(CHV_CONFIG_WARNING);
 #endif
 
diff --git a/utests/compiler_degrees.cpp b/utests/compiler_degrees.cpp
index 7a17ca7..2079267 100644
--- a/utests/compiler_degrees.cpp
+++ b/utests/compiler_degrees.cpp
@@ -1,5 +1,7 @@
 #include "utest_helper.hpp"
 
+#define M_180_PI_F   57.295779513082321F
+
 void compiler_degrees(void)
 {
   const int n = 32;
@@ -24,7 +26,7 @@ void compiler_degrees(void)
 
   OCL_MAP_BUFFER(1);
   for (int i = 0; i < n; ++i) {
-    OCL_ASSERT(((float *)buf_data[1])[i] == src[i] * (180 / 3.141592653589793F));
+    OCL_ASSERT(((float *)buf_data[1])[i] == src[i] * M_180_PI_F);
   }
   OCL_UNMAP_BUFFER(1);
 }
diff --git a/utests/get_arg_info.cpp b/utests/get_arg_info.cpp
index c1ea1ef..effeb64 100644
--- a/utests/get_arg_info.cpp
+++ b/utests/get_arg_info.cpp
@@ -10,7 +10,7 @@ void test_get_arg_info(void)
   char name[64];
 
   // Setup kernel and buffers
-  OCL_CREATE_KERNEL("test_get_arg_info");
+  OCL_CALL (cl_kernel_init, "test_get_arg_info.cl", "test_get_arg_info", SOURCE, "-cl-kernel-arg-info");
 
   //Arg 0
   ret = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_ADDRESS_QUALIFIER,
diff --git a/utests/get_cl_info.cpp b/utests/get_cl_info.cpp
index e2dc0d7..7c03d95 100644
--- a/utests/get_cl_info.cpp
+++ b/utests/get_cl_info.cpp
@@ -364,6 +364,113 @@ void get_program_build_info(void)
 
 MAKE_UTEST_FROM_FUNCTION(get_program_build_info);
 
+
+// This method uses clGetProgramBuildInfo to check the llvm dump build options sent
+// and verifies that the llvm dump file is actually generated in the backend.
+void get_build_llvm_info(void)
+{
+    map<cl_program_info, void *> maps;
+    cl_build_status expect_status;
+    char llvm_file[] = "test_llvm_dump.txt";
+    char build_opt[] = "-dump-opt-llvm=test_llvm_dump.txt";
+    FILE *fp = NULL;
+    int sz;
+
+    //Remove any pre-existing file
+    if( (fp = fopen(llvm_file, "r")) != NULL) {
+        fclose(fp);
+        std::remove(llvm_file);
+    }
+
+    OCL_CALL (cl_kernel_init, "compiler_if_else.cl", "compiler_if_else", SOURCE, build_opt);
+
+    /* Do our test.*/
+    expect_status = CL_BUILD_SUCCESS;
+    maps.insert(make_pair(CL_PROGRAM_BUILD_STATUS,
+                          (void *)(new Info_Result<cl_build_status>(expect_status))));
+    sz = strlen(build_opt) + 1;
+    maps.insert(make_pair(CL_PROGRAM_BUILD_OPTIONS,
+                          (void *)(new Info_Result<char *>(build_opt, sz))));
+
+    for (map<cl_program_info, void *>::iterator x = maps.begin(); x != maps.end(); ++x) {
+        switch (x->first) {
+        case CL_PROGRAM_BUILD_STATUS:
+            CALL_PROG_BUILD_INFO_AND_RET(cl_build_status);
+            break;
+        case CL_PROGRAM_BUILD_OPTIONS:
+            CALL_PROG_BUILD_INFO_AND_RET(char *);
+            break;
+        default:
+            break;
+        }
+    }
+
+    //Test is successful if the backend created the file
+    if( (fp = fopen(llvm_file, "r")) == NULL) {
+        std::cout << "LLVM file creation.. FAILED";
+        OCL_ASSERT(0);
+    } else {
+        fclose(fp);
+        std::cout << "LLVM file created.. SUCCESS";
+    }
+}
+
+MAKE_UTEST_FROM_FUNCTION(get_build_llvm_info);
+
+
+// This method uses clGetProgramBuildInfo to check the asm dump build options sent
+// And verifies that the asm dump file is actually generated in the backend.
+void get_build_asm_info(void)
+{
+    map<cl_program_info, void *> maps;
+    cl_build_status expect_status;
+    char asm_file[] = "test_asm_dump.txt";
+    char build_opt[] ="-dump-opt-asm=test_asm_dump.txt";
+    FILE *fp = NULL;
+    int sz;
+
+    //Remove any pre-existing file
+    if( (fp = fopen(asm_file, "r")) != NULL) {
+        fclose(fp);
+        std::remove(asm_file);
+    }
+
+    OCL_CALL (cl_kernel_init, "compiler_if_else.cl", "compiler_if_else", SOURCE, build_opt);
+
+    /* Do our test.*/
+    expect_status = CL_BUILD_SUCCESS;
+    maps.insert(make_pair(CL_PROGRAM_BUILD_STATUS,
+                          (void *)(new Info_Result<cl_build_status>(expect_status))));
+    sz = strlen(build_opt) + 1;
+    maps.insert(make_pair(CL_PROGRAM_BUILD_OPTIONS,
+                          (void *)(new Info_Result<char *>(build_opt, sz))));
+
+    for (map<cl_program_info, void *>::iterator x = maps.begin(); x != maps.end(); ++x) {
+        switch (x->first) {
+        case CL_PROGRAM_BUILD_STATUS:
+            CALL_PROG_BUILD_INFO_AND_RET(cl_build_status);
+            break;
+        case CL_PROGRAM_BUILD_OPTIONS:
+            CALL_PROG_BUILD_INFO_AND_RET(char *);
+            break;
+        default:
+            break;
+        }
+    }
+
+    //Test is successful if the backend created the file
+    if( (fp = fopen(asm_file, "r")) == NULL) {
+        std::cout << "ASM file creation.. FAILED";
+        OCL_ASSERT(0);
+    } else {
+        fclose(fp);
+        std::cout << "ASM file created.. SUCCESS";
+    }
+}
+
+MAKE_UTEST_FROM_FUNCTION(get_build_asm_info);
+
+
 /* ***************************************************** *
  * clGetContextInfo                                      *
  * ***************************************************** */
diff --git a/utests/image_1D_buffer.cpp b/utests/image_1D_buffer.cpp
index d8d761f..e2cfcde 100644
--- a/utests/image_1D_buffer.cpp
+++ b/utests/image_1D_buffer.cpp
@@ -3,78 +3,63 @@
 
 void image_1D_buffer(void)
 {
-  size_t buffer_sz = 1024;
-  char *buf_content = (char *)malloc(buffer_sz * sizeof(char));
+  size_t buffer_sz = 65536;
+  char *buf_content = (char *)malloc(buffer_sz * sizeof(int));
   int error;
   cl_image_desc image_desc;
   cl_image_format image_format;
-  cl_sampler sampler;
-  cl_mem image1, image2;
   cl_mem ret_mem = NULL;
 
   OCL_CREATE_KERNEL("image_1D_buffer");
 
   for (int32_t i = 0; i < (int32_t)buffer_sz; ++i)
-    buf_content[i] = (rand() & 127);
+    buf_content[i] = (rand() & 0xFFFFFFFF);
 
-  cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                      buffer_sz, buf_content, &error);
-  OCL_ASSERT(error == CL_SUCCESS);
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, buffer_sz * sizeof(int), buf_content);
+  OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, buffer_sz * sizeof(int), NULL);
 
   memset(&image_desc, 0x0, sizeof(cl_image_desc));
   memset(&image_format, 0x0, sizeof(cl_image_format));
 
   image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
-  image_desc.image_row_pitch = buffer_sz;
-  image_desc.image_width = buffer_sz / sizeof(uint32_t); //assume rgba32
-  image_desc.buffer = buff;
-
-  image_format.image_channel_order = CL_RGBA;
-  image_format.image_channel_data_type = CL_UNSIGNED_INT8;
-
-  image1 = clCreateImage(ctx, CL_MEM_READ_ONLY, &image_format,
-                        &image_desc, NULL, &error );
-  OCL_ASSERT(error == CL_SUCCESS);
-
-  error = clGetImageInfo(image1, CL_IMAGE_BUFFER, sizeof(ret_mem), &ret_mem, NULL);
-  OCL_ASSERT(error == CL_SUCCESS);
-  OCL_ASSERT(ret_mem == buff);
+  image_desc.image_row_pitch = buffer_sz * sizeof(int);
+  image_desc.image_width = buffer_sz; //assume r32
+  image_desc.buffer = buf[0];
 
+  image_format.image_channel_order = CL_R;
+  image_format.image_channel_data_type = CL_UNSIGNED_INT32;
 
-  memset(&image_desc, 0x0, sizeof(cl_image_desc));
-  image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
-  image_desc.image_width = buffer_sz / sizeof(uint32_t);
-  image2 = clCreateImage(ctx, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
-                         &image_format, &image_desc, buf_content, &error);
+  // Create the source image1d_buffer.
+  OCL_CREATE_IMAGE(buf[2], CL_MEM_READ_ONLY, &image_format, &image_desc, NULL);
+  error = clGetImageInfo(buf[2], CL_IMAGE_BUFFER, sizeof(ret_mem), &ret_mem, NULL);
   OCL_ASSERT(error == CL_SUCCESS);
+  OCL_ASSERT(ret_mem == buf[0]);
 
-  // Create sampler to use
-  sampler = clCreateSampler(ctx, false, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+  // Create the destination image1d_buffer.
+  image_desc.buffer = buf[1];
+  OCL_CREATE_IMAGE(buf[3], CL_MEM_READ_ONLY, &image_format, &image_desc, NULL);
+  error = clGetImageInfo(buf[3], CL_IMAGE_BUFFER, sizeof(ret_mem), &ret_mem, NULL);
   OCL_ASSERT(error == CL_SUCCESS);
+  OCL_ASSERT(ret_mem == buf[1]);
 
-  cl_mem result_buf = buf[0] = clCreateBuffer(ctx, 0, buffer_sz, NULL, &error);
-  OCL_ASSERT(error == CL_SUCCESS);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[3]);
 
-  OCL_SET_ARG(0, sizeof(cl_mem), &image1);
-  OCL_SET_ARG(1, sizeof(cl_mem), &image2);
-  OCL_SET_ARG(2, sizeof(sampler), &sampler);
-  OCL_SET_ARG(3, sizeof(cl_mem), &result_buf);
-
-  globals[0] = buffer_sz/sizeof(int32_t);
+  globals[0] = buffer_sz;
   locals[0] = 16;
 
   OCL_NDRANGE(1);
 
   /* Now check the result. */
   OCL_MAP_BUFFER(0);
-  for (uint32_t i = 0; i < buffer_sz/sizeof(int32_t); i++)
-    OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 1);
+  OCL_MAP_BUFFER(1);
+  for (uint32_t i = 0; i < buffer_sz; i++) {
+    if (((uint32_t*)buf_data[1])[i] != ((uint32_t*)buf_data[0])[i])
+      printf("i %d expected %x got %x \n", i, ((uint32_t*)buf_data[0])[i], ((uint32_t*)buf_data[1])[i]);
+    OCL_ASSERT(((uint32_t*)buf_data[1])[i] == ((uint32_t*)buf_data[0])[i]);
+  }
   OCL_UNMAP_BUFFER(0);
-
-  clReleaseSampler(sampler);
-  clReleaseMemObject(image1);
-  clReleaseMemObject(image2);
-  clReleaseMemObject(buff);
+  OCL_UNMAP_BUFFER(1);
 }
 
 MAKE_UTEST_FROM_FUNCTION(image_1D_buffer);
diff --git a/utests/utest_generator.py b/utests/utest_generator.py
index c220575..91cc938 100644
--- a/utests/utest_generator.py
+++ b/utests/utest_generator.py
@@ -110,7 +110,8 @@ def udebug(ulpSize,returnType,function):
 
     float ULPSIZE_NO_FAST_MATH = %s;
     ULPSIZE_FACTOR = select_ulpsize(ULPSIZE_FAST_MATH,ULPSIZE_NO_FAST_MATH);
-    
+    bool fast_math = ULPSIZE_FACTOR == ULPSIZE_FAST_MATH;
+
     if (isinf(cpu_data[index])){
       INFORNAN="INF";
     }
@@ -137,25 +138,25 @@ def udebug(ulpSize,returnType,function):
       }
     else if ((ULPSIZE >= 0 && diff <= ULPSIZE) || (ULPSIZE < 0 && diff == 0)){
       printf("%s expect:%s\\n", log, ULPSIZE);
-      }
+    }
     else
       printf_c("%s expect:%s\\n", log, ULPSIZE);
 #else
     if (isinf(cpu_data[index])){
       sprintf(log, "%s expect:%s\\n", log, INFORNAN);
-      OCL_ASSERTM(isinf(gpu_data[index]),log);
-      }
+      OCL_ASSERTM(isinf(gpu_data[index]) || fast_math,log);
+    }
     else if (isnan(cpu_data[index])){
       sprintf(log, "%s expect:%s\\n", log, INFORNAN);
-      OCL_ASSERTM(isnan(gpu_data[index]),log);
-      }
+      OCL_ASSERTM(isnan(gpu_data[index]) || fast_math,log);
+    }
     else{
       sprintf(log, "%s expect:%s\\n", log, ULPSIZE);
       if (ULPSIZE < 0)
             OCL_ASSERTM(gpu_data[index] == cpu_data[index], log);
       else
             OCL_ASSERTM(fabs(gpu_data[index]-cpu_data[index]) <= ULPSIZE, log);
-      }
+    }
 #endif
   }
 }\n'''%(returnType,Min_ulp(function),\

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git



More information about the Pkg-opencl-commits mailing list