[oclgrind] 02/10: New upstream version 16.10
James Price
jprice-guest at moszumanska.debian.org
Sun Oct 23 20:01:40 UTC 2016
This is an automated email from the git hooks/post-receive script.
jprice-guest pushed a commit to branch master
in repository oclgrind.
commit e20cc9099c31783fdf1ef04ec33092e591ae9c76
Author: James Price <j.price at bristol.ac.uk>
Date: Sun Oct 23 19:46:53 2016 +0100
New upstream version 16.10
---
.gitignore | 1 +
.travis-deps.sh | 26 +
.travis.yml | 26 +
CMakeLists.txt | 203 +-
LICENSE | 2 +-
Makefile.am | 106 +-
NEWS | 17 +
README | 138 -
README.md | 196 ++
configure.ac | 58 +-
src/core/Context.cpp | 39 +-
src/core/Context.h | 17 +-
src/core/Kernel.cpp | 259 +-
src/core/Kernel.h | 19 +-
src/core/KernelInvocation.cpp | 18 +-
src/core/KernelInvocation.h | 2 +-
src/core/Memory.cpp | 127 +-
src/core/Memory.h | 34 +-
src/core/Plugin.cpp | 2 +-
src/core/Plugin.h | 9 +-
src/core/Program.cpp | 441 ++-
src/core/Program.h | 8 +-
src/core/Queue.cpp | 21 +-
src/core/Queue.h | 28 +-
src/core/WorkGroup.cpp | 24 +-
src/core/WorkGroup.h | 14 +-
src/core/WorkItem.cpp | 244 +-
src/core/WorkItem.h | 32 +-
src/core/WorkItemBuiltins.cpp | 439 ++-
src/core/clc.h | 102 +-
src/core/common.cpp | 195 +-
src/core/common.h | 128 +-
src/core/half.cpp | 259 ++
src/core/half.h | 165 +-
src/install/INSTALL.darwin | 10 +-
src/install/INSTALL.linux | 10 +-
src/install/INSTALL.windows | 14 +-
src/install/cpack-description | 11 +
src/install/install.bat | 13 +
src/kernel/Simulation.cpp | 43 +-
src/kernel/Simulation.h | 7 +-
src/kernel/oclgrind-kernel.cpp | 13 +-
src/plugins/InstructionCounter.cpp | 2 +-
src/plugins/InstructionCounter.h | 2 +-
src/plugins/InteractiveDebugger.cpp | 26 +-
src/plugins/InteractiveDebugger.h | 2 +-
src/plugins/Logger.cpp | 2 +-
src/plugins/Logger.h | 2 +-
src/plugins/MemCheck.cpp | 134 +-
src/plugins/MemCheck.h | 27 +-
src/plugins/RaceDetector.cpp | 582 ++--
src/plugins/RaceDetector.h | 130 +-
src/plugins/Uninitialized.cpp | 2811 ++++++++++++++++++++
src/plugins/Uninitialized.h | 314 +++
src/runtime/async_queue.cpp | 2 +-
src/runtime/async_queue.h | 2 +-
src/runtime/icd.h | 2 +-
src/runtime/oclgrind | 145 -
src/runtime/oclgrind.cpp | 483 ++++
src/runtime/runtime.cpp | 253 +-
src/runtime/runtime.def | 2 +
tests/Makefile.am | 50 +
tests/apps/CMakeLists.txt | 17 +-
tests/apps/image/image.c | 133 +
tests/apps/vecadd/vecadd.c | 102 +-
tests/common/common.c | 66 +
tests/common/common.h | 15 +
tests/kernels/CMakeLists.txt | 27 +
tests/kernels/TESTS | 27 +
tests/kernels/alignment/packed.ref | 6 +-
tests/kernels/alignment/packed.sim | 2 +-
tests/kernels/alignment/unaligned.ref | 7 +-
tests/kernels/async_copy/async_copy.ref | 12 +-
tests/kernels/async_copy/async_copy_divergent.ref | 13 +-
.../kernels/async_copy/async_copy_global_race.ref | 15 +-
tests/kernels/async_copy/async_copy_local_race.ref | 15 +-
tests/kernels/async_copy/async_copy_loop.ref | 12 +-
.../async_copy/async_copy_loop_divergent.ref | 13 +-
tests/kernels/async_copy/async_copy_single_wi.ref | 13 +-
tests/kernels/async_copy/async_copy_unwaited.ref | 13 +-
.../kernels/atomics/atomic_cmpxchg_false_race.ref | 14 +-
tests/kernels/atomics/atomic_cmpxchg_read_race.ref | 8 +-
.../kernels/atomics/atomic_cmpxchg_write_race.ref | 7 +-
tests/kernels/atomics/atomic_global_fence.ref | 8 +-
tests/kernels/atomics/atomic_global_fence_race.ref | 10 +-
tests/kernels/atomics/atomic_increment.ref | 6 +-
tests/kernels/atomics/atomic_intergroup_race.ref | 8 +-
tests/kernels/atomics/atomic_local_fence.ref | 8 +-
tests/kernels/atomics/atomic_race_after.ref | 9 +-
tests/kernels/atomics/atomic_race_before.ref | 12 +-
tests/kernels/atomics/atomic_same_workitem.ref | 12 +-
.../barrier/barrier_different_instructions.ref | 15 +-
tests/kernels/barrier/barrier_divergence.ref | 13 +-
tests/kernels/bugs/byval_function_argument.cl | 19 +
tests/kernels/bugs/byval_function_argument.ref | 3 +
tests/kernels/bugs/byval_function_argument.sim | 6 +
tests/kernels/bugs/const_gep_expr_pointee_type.cl | 9 +
tests/kernels/bugs/const_gep_expr_pointee_type.sim | 4 +
.../kernels/bugs/false_warning_vector_argument.cl | 8 +
.../kernels/bugs/false_warning_vector_argument.ref | 9 +
.../kernels/bugs/false_warning_vector_argument.sim | 7 +
tests/kernels/bugs/gvn_arbitrary_integers.ref | 10 +-
tests/kernels/bugs/kernel_struct_argument.ref | 6 +-
tests/kernels/bugs/llvm_bswap.cl | 8 +
tests/kernels/bugs/llvm_bswap.ref | 7 +
tests/kernels/bugs/llvm_bswap.sim | 12 +
tests/kernels/bugs/many_alloca.ref | 6 +-
tests/kernels/bugs/multidim_array_in_struct.ref | 6 +-
tests/kernels/bugs/null_argument.ref | 6 +-
tests/kernels/bugs/rhadd_overflow.cl | 4 +
tests/kernels/bugs/rhadd_overflow.ref | 3 +
tests/kernels/bugs/rhadd_overflow.sim | 6 +
tests/kernels/bugs/sroa_addrspace_cast.ref | 6 +-
tests/kernels/bugs/write_vector_write_only_fp.cl | 5 +
tests/kernels/bugs/write_vector_write_only_fp.ref | 17 +
tests/kernels/bugs/write_vector_write_only_fp.sim | 6 +
tests/kernels/data-race/broadcast.ref | 12 +-
tests/kernels/data-race/global_fence.ref | 12 +-
tests/kernels/data-race/global_only_fence.ref | 15 +-
tests/kernels/data-race/global_read_write_race.ref | 14 +-
tests/kernels/data-race/global_read_write_race.sim | 2 +-
.../kernels/data-race/global_write_write_race.ref | 9 +-
tests/kernels/data-race/increment.ref | 12 +-
tests/kernels/data-race/intergroup_hidden_race.ref | 9 +-
tests/kernels/data-race/intergroup_race.ref | 11 +-
tests/kernels/data-race/intergroup_race.sim | 4 +-
tests/kernels/data-race/intragroup_hidden_race.ref | 9 +-
tests/kernels/data-race/local_only_fence.ref | 24 +-
tests/kernels/data-race/local_read_write_race.cl | 3 +
tests/kernels/data-race/local_read_write_race.ref | 9 +-
tests/kernels/data-race/local_write_write_race.ref | 15 +-
tests/kernels/data-race/uniform_write_race.ref | 6 +-
.../kernels/memcheck/async_copy_out_of_bounds.ref | 13 +-
tests/kernels/memcheck/atomic_out_of_bounds.ref | 15 +-
tests/kernels/memcheck/casted_static_array.cl | 31 +
tests/kernels/memcheck/casted_static_array.ref | 7 +
tests/kernels/memcheck/casted_static_array.sim | 6 +
tests/kernels/memcheck/dereference_null.ref | 8 +-
tests/kernels/memcheck/fake_out_of_bounds.cl | 12 +
tests/kernels/memcheck/fake_out_of_bounds.ref | 2 +
tests/kernels/memcheck/fake_out_of_bounds.sim | 6 +
tests/kernels/memcheck/read_out_of_bounds.ref | 17 +-
tests/kernels/memcheck/read_write_only_memory.ref | 16 +-
tests/kernels/memcheck/static_array.cl | 13 +
tests/kernels/memcheck/static_array.ref | 10 +
tests/kernels/memcheck/static_array.sim | 6 +
.../kernels/memcheck/static_array_padded_struct.cl | 12 +
.../memcheck/static_array_padded_struct.ref | 8 +
.../memcheck/static_array_padded_struct.sim | 6 +
tests/kernels/memcheck/write_out_of_bounds.ref | 13 +-
tests/kernels/memcheck/write_read_only_memory.ref | 16 +-
tests/kernels/misc/array.ref | 260 +-
tests/kernels/misc/lvalue_loads.cl | 119 +
tests/kernels/misc/lvalue_loads.ref | 75 +
tests/kernels/misc/lvalue_loads.sim | 29 +
tests/kernels/misc/program_scope_constant_array.cl | 7 +
.../kernels/misc/program_scope_constant_array.ref | 5 +
.../kernels/misc/program_scope_constant_array.sim | 6 +
tests/kernels/misc/reduce.ref | 6 +-
tests/kernels/misc/vecadd.ref | 2052 +++++++-------
tests/kernels/misc/vector_argument.cl | 4 +
tests/kernels/misc/vector_argument.ref | 5 +
tests/kernels/misc/vector_argument.sim | 9 +
tests/kernels/run_kernel_test.py | 93 -
.../uninitialized/padded_nested_struct_memcpy.cl | 27 +
.../uninitialized/padded_nested_struct_memcpy.ref | 25 +
.../uninitialized/padded_nested_struct_memcpy.sim | 6 +
.../uninitialized/padded_struct_alloca_fp.cl | 16 +
.../uninitialized/padded_struct_alloca_fp.ref | 4 +
.../uninitialized/padded_struct_alloca_fp.sim | 6 +
.../uninitialized/padded_struct_memcpy_fp.cl | 27 +
.../uninitialized/padded_struct_memcpy_fp.ref | 4 +
.../uninitialized/padded_struct_memcpy_fp.sim | 8 +
.../uninitialized/partially_uninitialized_fract.cl | 6 +
.../partially_uninitialized_fract.ref | 12 +
.../partially_uninitialized_fract.sim | 6 +
.../private_array_initializer_list.cl | 9 +
.../private_array_initializer_list.ref | 5 +
.../private_array_initializer_list.sim | 6 +
.../kernels/uninitialized/uninitialized_address.cl | 9 +
.../uninitialized/uninitialized_address.ref | 5 +
.../uninitialized/uninitialized_address.sim | 6 +
.../uninitialized/uninitialized_global_buffer.cl | 5 +
.../uninitialized/uninitialized_global_buffer.ref | 4 +
.../uninitialized/uninitialized_global_buffer.sim | 8 +
.../uninitialized/uninitialized_local_array.cl | 11 +
.../uninitialized/uninitialized_local_array.ref | 19 +
.../uninitialized/uninitialized_local_array.sim | 6 +
.../uninitialized/uninitialized_local_ptr.cl | 9 +
.../uninitialized/uninitialized_local_ptr.ref | 19 +
.../uninitialized/uninitialized_local_ptr.sim | 8 +
.../uninitialized/uninitialized_local_variable.cl | 7 +
.../uninitialized/uninitialized_local_variable.ref | 4 +
.../uninitialized/uninitialized_local_variable.sim | 6 +
.../uninitialized_packed_struct_memcpy.cl | 12 +
.../uninitialized_packed_struct_memcpy.ref | 9 +
.../uninitialized_packed_struct_memcpy.sim | 8 +
.../uninitialized_padded_nested_struct_memcpy.cl | 20 +
.../uninitialized_padded_nested_struct_memcpy.ref | 27 +
.../uninitialized_padded_nested_struct_memcpy.sim | 8 +
.../uninitialized_padded_struct_memcpy.cl | 12 +
.../uninitialized_padded_struct_memcpy.ref | 6 +
.../uninitialized_padded_struct_memcpy.sim | 8 +
.../uninitialized/uninitialized_private_array.cl | 16 +
.../uninitialized/uninitialized_private_array.ref | 7 +
.../uninitialized/uninitialized_private_array.sim | 14 +
tests/kernels/wait_event/wait_event_chained.ref | 12 +-
tests/kernels/wait_event/wait_event_divergent.cl | 3 +
tests/kernels/wait_event/wait_event_divergent.ref | 10 +-
tests/kernels/wait_event/wait_event_duplicates.ref | 12 +-
tests/kernels/wait_event/wait_event_invalid.ref | 13 +-
tests/run_test.py | 145 +
tests/{apps => runtime}/CMakeLists.txt | 25 +-
tests/runtime/map_buffer.c | 327 +++
tests/runtime/map_buffer.ref | 4 +
215 files changed, 10120 insertions(+), 3228 deletions(-)
diff --git a/.gitignore b/.gitignore
index 14830ae..34ed68f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,6 +49,7 @@ tests/apps/vecadd/vecadd
# Misc
oclgrind-*.tar.gz
oclgrind-*.zip
+.clang_complete
.DS_Store
*.kdev4
*.sublime-*
diff --git a/.travis-deps.sh b/.travis-deps.sh
new file mode 100644
index 0000000..628d1f8
--- /dev/null
+++ b/.travis-deps.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+if [ "$TRAVIS_OS_NAME" == "linux" ]
+then
+ # Add repositories
+ sudo add-apt-repository -y 'deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-3.9 main'
+ wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
+ sudo apt-get update -qq
+
+ # Remove existing LLVM
+ sudo apt-get remove llvm
+
+ # Install Clang + LLVM
+ sudo apt-get install -y llvm-3.9-dev libclang-3.9-dev clang-3.9
+ sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-3.9 20
+ sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-3.9 20
+ sudo rm -f /usr/local/clang-3.5.0/bin/clang
+ sudo rm -f /usr/local/clang-3.5.0/bin/clang++
+
+ # Other dependencies
+ sudo apt-get install -y libedit-dev
+elif [ "$TRAVIS_OS_NAME" == "osx" ]
+then
+ brew update
+ brew install -v llvm --with-clang
+fi
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..8a94414
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,26 @@
+os:
+ - linux
+ - osx
+
+sudo: required
+dist: trusty
+osx_image: xcode7.3
+
+language: cpp
+compiler:
+ - gcc
+ - clang
+
+matrix:
+ exclude:
+ - os: osx
+ compiler: gcc
+
+before_install:
+ - bash ./.travis-deps.sh
+
+script:
+ - if [ "${TRAVIS_OS_NAME}" = "linux" ]; then cmake . -DLLVM_DIR=/usr/lib/llvm-3.9/lib/cmake/llvm ; fi
+ - if [ "${TRAVIS_OS_NAME}" = "osx" ]; then cmake . -DLLVM_DIR=/usr/local/Cellar/llvm/3.8.1/share/llvm/cmake ; fi
+ - make -j 2
+ - ctest --output-on-failure
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a35af1e..4838fd5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
# CMakeLists.txt (Oclgrind)
-# Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+# Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
# University of Bristol. All rights reserved.
#
# This program is provided under a three-clause BSD license. For full
@@ -8,23 +8,26 @@
cmake_minimum_required(VERSION 2.8.12)
project(Oclgrind)
-set(Oclgrind_VERSION_MAJOR 15)
-set(Oclgrind_VERSION_MINOR 5)
+set(Oclgrind_VERSION_MAJOR 16)
+set(Oclgrind_VERSION_MINOR 10)
include(CheckIncludeFiles)
+include(CheckIncludeFileCXX)
include(CheckLibraryExists)
# Enable C99 for GCC (required for tests)
if (CMAKE_COMPILER_IS_GNUCC)
- set(CMAKE_C_FLAGS "-std=c99")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
endif()
# Enable rpath on OS X
set(CMAKE_MACOSX_RPATH 1)
-# Enable C++11 for Clang/GCC
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
- set(CMAKE_CXX_FLAGS "-std=c++11")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-strict-aliasing")
endif()
# Disable min/max macros on Windows
@@ -34,7 +37,9 @@ endif()
# Suppress warnings from OpenCL runtime API headers
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ignored-attributes -Wno-gcc-compat -Wno-availability")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ignored-attributes")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-gcc-compat")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-availability")
endif()
@@ -53,12 +58,65 @@ set(LLVM_VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR})
add_definitions(${LLVM_DEFINITIONS})
include_directories(${LLVM_INCLUDE_DIRS})
link_directories(${LLVM_LIBRARY_DIRS})
-set(CLANG ${LLVM_TOOLS_BINARY_DIR}/clang)
# Get LLVM libraries for linking
llvm_map_components_to_libnames(LLVM_LIBS
bitreader bitwriter core instrumentation ipo irreader
- linker mcparser objcarcopts option)
+ linker lto mcparser objcarcopts option target)
+
+if (NOT (${LLVM_PACKAGE_VERSION} VERSION_LESS "3.9"))
+ llvm_map_components_to_libnames(LLVM_COVERAGE coverage)
+ list(APPEND LLVM_LIBS ${LLVM_COVERAGE})
+endif()
+
+if (NOT (${LLVM_PACKAGE_VERSION} VERSION_LESS "4.0"))
+ llvm_map_components_to_libnames(LLVM_COROUTINES coroutines)
+ list(APPEND LLVM_LIBS ${LLVM_COROUTINES})
+endif()
+
+
+# Allow user to set path to Clang installation via CLANG_ROOT
+set (CLANG_ROOT " " CACHE PATH "Root of Clang installation")
+if (NOT ${CLANG_ROOT} STREQUAL " ")
+ include_directories("${CLANG_ROOT}/include")
+ link_directories("${CLANG_ROOT}/lib")
+ set(CMAKE_REQUIRED_INCLUDES
+ "${CMAKE_REQUIRED_INCLUDES};${CLANG_ROOT}/include")
+endif()
+
+set(CMAKE_REQUIRED_INCLUDES
+ "${CMAKE_REQUIRED_INCLUDES};${LLVM_INCLUDE_DIRS}")
+set(CMAKE_REQUIRED_DEFINITIONS
+ "${CMAKE_REQUIRED_DEFINITIONS};${LLVM_DEFINITIONS}")
+
+# Check for Clang headers
+unset(CLANG_HEADER CACHE)
+find_path(CLANG_HEADER "clang/CodeGen/CodeGenAction.h"
+ PATHS "${CLANG_ROOT}/include" "${LLVM_INCLUDE_DIRS}"
+ NO_DEFAULT_PATH)
+find_path(CLANG_HEADER "clang/CodeGen/CodeGenAction.h")
+if ("${CLANG_HEADER}" STREQUAL "CLANG_HEADER-NOTFOUND")
+ message(FATAL_ERROR "Clang headers not found (set CLANG_ROOT)")
+endif()
+
+# Check for Clang libraries
+unset(CLANG_LIB CACHE)
+find_library(CLANG_LIB "clangFrontend"
+ PATHS "${CLANG_ROOT}/lib" "${LLVM_LIBRARY_DIRS}"
+ NO_DEFAULT_PATH)
+find_library(CLANG_LIB "clangFrontend")
+if ("${CLANG_LIB}" STREQUAL "CLANG_LIB-NOTFOUND")
+ message(FATAL_ERROR "Clang libraries not found (set CLANG_ROOT)")
+endif()
+
+# Check for clang
+find_program(CLANG clang
+ PATHS "${CLANG_ROOT}/bin" "${LLVM_TOOLS_BINARY_DIR}"
+ NO_DEFAULT_PATH)
+find_program(CLANG clang)
+if ("${CLANG}" STREQUAL "CLANG-NOTFOUND")
+ message(FATAL_ERROR "Could not find clang binary")
+endif()
# Check for GNU readline library
@@ -73,8 +131,10 @@ if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
check_include_files("stdio.h;readline/readline.h" HAVE_READLINE_H)
check_include_files("stdio.h;readline/history.h" HAVE_HISTORY_H)
- check_library_exists(readline readline "${READLINE_DIR}/lib" HAVE_READLINE_LIB)
- check_library_exists(readline add_history "${READLINE_DIR}/lib" HAVE_HISTORY_LIB)
+ check_library_exists(readline readline
+ "${READLINE_DIR}/lib" HAVE_READLINE_LIB)
+ check_library_exists(readline add_history
+ "${READLINE_DIR}/lib" HAVE_HISTORY_LIB)
if (HAVE_READLINE_H AND HAVE_HISTORY_H AND
HAVE_READLINE_LIB AND HAVE_HISTORY_LIB)
set(HAVE_READLINE 1)
@@ -88,6 +148,15 @@ else()
set(HAVE_READLINE 0)
endif()
+# Check for library directory suffixes
+set(_LIBDIR_SUFFIX "")
+get_property(USING_LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS)
+if (USING_LIB64 AND NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
+ set(_LIBDIR_SUFFIX "64")
+endif()
+set(LIBDIR_SUFFIX "${_LIBDIR_SUFFIX}"
+ CACHE STRING "Suffix for installed library directory")
+
# Generate stringified clc.h
add_custom_command(
OUTPUT src/core/clc_h.cpp
@@ -120,6 +189,7 @@ add_library(oclgrind ${CORE_LIB_TYPE}
src/core/clc_h.cpp
src/core/common.cpp
src/core/Context.cpp
+ src/core/half.cpp
src/core/Kernel.cpp
src/core/KernelInvocation.cpp
src/core/Memory.cpp
@@ -138,12 +208,18 @@ add_library(oclgrind ${CORE_LIB_TYPE}
src/plugins/MemCheck.h
src/plugins/MemCheck.cpp
src/plugins/RaceDetector.h
- src/plugins/RaceDetector.cpp)
-target_link_libraries(oclgrind ${CORE_EXTRA_LIBS}
- clangAnalysis clangAST clangBasic clangCodeGen clangDriver clangEdit
- clangFrontend clangLex clangParse clangSema clangSerialization
+ src/plugins/RaceDetector.cpp
+ src/plugins/Uninitialized.h
+ src/plugins/Uninitialized.cpp)
+target_link_libraries(oclgrind PRIVATE ${CORE_EXTRA_LIBS}
+ clangFrontend clangSerialization clangDriver clangCodeGen
+ clangParse clangSema clangAnalysis clangEdit clangAST clangLex clangBasic
${LLVM_LIBS})
+if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
+ target_link_libraries(oclgrind PRIVATE Version)
+endif()
+
# Sources for OpenCL runtime API frontend
set(RUNTIME_SOURCES
src/runtime/async_queue.h
@@ -153,21 +229,26 @@ set(RUNTIME_SOURCES
# Add ICD exports on Windows
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
- list(APPEND RUNTIME_SOURCES src/runtime/icd.def)
+ set(DLL_EXPORTS src/runtime/icd.def)
endif()
-add_library(oclgrind-rt-icd SHARED ${RUNTIME_SOURCES})
+add_library(oclgrind-rt-icd SHARED ${RUNTIME_SOURCES} ${DLL_EXPORTS})
set_target_properties(oclgrind-rt-icd PROPERTIES COMPILE_FLAGS -DOCLGRIND_ICD)
target_link_libraries(oclgrind-rt-icd ${CMAKE_DL_LIBS} oclgrind)
# Add runtime exports on Windows
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
- list(APPEND RUNTIME_SOURCES src/runtime/runtime.def)
+ set(DLL_EXPORTS src/runtime/runtime.def)
endif()
-add_library(oclgrind-rt SHARED ${RUNTIME_SOURCES})
+add_library(oclgrind-rt SHARED ${RUNTIME_SOURCES} ${DLL_EXPORTS})
target_link_libraries(oclgrind-rt ${CMAKE_DL_LIBS} oclgrind)
+add_executable(oclgrind-exe src/runtime/oclgrind.cpp)
+set_target_properties(oclgrind-exe PROPERTIES OUTPUT_NAME oclgrind)
+target_compile_definitions(oclgrind-exe PRIVATE
+ "-DLIBDIR_SUFFIX=\"${LIBDIR_SUFFIX}\"")
+
add_executable(oclgrind-kernel
src/kernel/oclgrind-kernel.cpp
src/kernel/Simulation.h
@@ -190,15 +271,19 @@ add_custom_command(
DEPENDS src/core/clc.h)
# Generate precompiled headers for clc.h
+set(CLC_SYSROOT "${CMAKE_BINARY_DIR}/include/oclgrind/")
+if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
+ string(REPLACE "/" "\\" CLC_SYSROOT "${CLC_SYSROOT}")
+endif()
add_custom_command(
OUTPUT include/oclgrind/clc32.pch
POST_BUILD
COMMAND
${CLANG}
- -cc1 -x cl -cl-std=CL1.2 -O0 -g -fno-builtin
+ -cc1 -x cl -cl-std=CL1.2 -O0 -fno-builtin
-emit-pch -triple spir-unknown-unknown
- -relocatable-pch -isysroot ${CMAKE_BINARY_DIR}/include/oclgrind/
- ${CMAKE_BINARY_DIR}/include/oclgrind/clc.h
+ -relocatable-pch -isysroot "${CLC_SYSROOT}"
+ include/oclgrind/clc.h
-o include/oclgrind/clc32.pch
DEPENDS include/oclgrind/clc.h
)
@@ -207,10 +292,10 @@ add_custom_command(
POST_BUILD
COMMAND
${CLANG}
- -cc1 -x cl -cl-std=CL1.2 -O0 -g -fno-builtin
+ -cc1 -x cl -cl-std=CL1.2 -O0 -fno-builtin
-emit-pch -triple spir64-unknown-unknown
- -relocatable-pch -isysroot ${CMAKE_BINARY_DIR}/include/oclgrind/
- ${CMAKE_BINARY_DIR}/include/oclgrind/clc.h
+ -relocatable-pch -isysroot "${CLC_SYSROOT}"
+ include/oclgrind/clc.h
-o include/oclgrind/clc64.pch
DEPENDS include/oclgrind/clc.h
)
@@ -220,31 +305,20 @@ add_custom_command(
configure_file("cmake_config.h.in" "config.h")
-# Install oclgrind script if not on Windows
+# Generate ICD loader if not on Windows
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
- file(READ src/runtime/oclgrind OCLGRIND_SCRIPT)
- string(REGEX REPLACE
- "__VERSION__" "${Oclgrind_VERSION_MAJOR}.${Oclgrind_VERSION_MINOR}"
- OCLGRIND_SCRIPT "${OCLGRIND_SCRIPT}")
- file(WRITE ${CMAKE_BINARY_DIR}/oclgrind "${OCLGRIND_SCRIPT}")
-
- # Generate ICD loader
- get_property(OCLGRIND_RT_FILENAME TARGET oclgrind-rt-icd PROPERTY LOCATION)
- file(WRITE ${CMAKE_BINARY_DIR}/oclgrind.icd "${OCLGRIND_RT_FILENAME}\n")
-
- install(PROGRAMS
- ${CMAKE_BINARY_DIR}/oclgrind
- DESTINATION bin)
+ file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/oclgrind.icd
+ CONTENT "$<TARGET_FILE:oclgrind-rt-icd>\n")
endif()
install(TARGETS
- oclgrind-kernel
+ oclgrind-exe oclgrind-kernel
DESTINATION bin)
install(TARGETS
oclgrind oclgrind-rt oclgrind-rt-icd
- DESTINATION lib)
+ DESTINATION "lib${LIBDIR_SUFFIX}")
install(FILES
- ${CORE_HEADERS} ${CMAKE_BINARY_DIR}/config.h ${CLC_HEADERS} LICENSE
+ ${CORE_HEADERS} ${CLC_HEADERS}
DESTINATION include/oclgrind)
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
install(FILES
@@ -269,31 +343,28 @@ enable_testing()
find_package(PythonInterp)
if (PYTHONINTERP_FOUND)
- # Add kernel tests
- file(READ tests/kernels/TESTS KERNEL_TESTS)
- string(REPLACE "\n" ";" KERNEL_TESTS ${KERNEL_TESTS})
- foreach(test ${KERNEL_TESTS})
- add_test(
- NAME ${test}
- COMMAND
- ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/kernels/run_kernel_test.py
- $<TARGET_FILE:oclgrind-kernel>
- ${CMAKE_SOURCE_DIR}/tests/kernels/${test}.sim)
- endforeach(${test})
-
- # Set PCH directory
- set_tests_properties(${KERNEL_TESTS} PROPERTIES
- ENVIRONMENT "OCLGRIND_PCH_DIR=${CMAKE_BINARY_DIR}/include/oclgrind")
-
- # Expected failures
- set_tests_properties(
- atomics/atomic_intergroup_race
- data-race/intragroup_hidden_race
- PROPERTIES WILL_FAIL TRUE)
+ # Add test directories
+ add_subdirectory(tests/apps)
+ add_subdirectory(tests/kernels)
+ add_subdirectory(tests/runtime)
else()
- message(WARNING "Kernel tests will not be run (Python required)")
+ message(WARNING "Tests will not be run (Python required)")
endif()
-# Add app tests
-add_subdirectory(tests/apps)
+
+# CPack config
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "OpenCL device simulator")
+set(CPACK_PACKAGE_DESCRIPTION_FILE
+ "${CMAKE_SOURCE_DIR}/src/install/cpack-description")
+set(CPACK_PACKAGE_VENDOR "University of Bristol")
+set(CPACK_PACKAGE_VERSION_MAJOR ${Oclgrind_VERSION_MAJOR})
+set(CPACK_PACKAGE_VERSION_MINOR ${Oclgrind_VERSION_MINOR})
+set(CPACK_PACKAGE_VERSION "${Oclgrind_VERSION_MAJOR}.${Oclgrind_VERSION_MINOR}")
+set(CPACK_PACKAGE_VERSION_PATCH "0")
+
+# CPack RPM config
+set(CPACK_RPM_PACKAGE_GROUP "Development/Tools")
+set(CPACK_RPM_PACKAGE_LICENSE "BSD")
+
+include(CPack)
diff --git a/LICENSE b/LICENSE
index f91a2f2..bec9311 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
University of Bristol. All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/Makefile.am b/Makefile.am
index 8fcd00f..144a027 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,5 +1,5 @@
# Makefile.am (Oclgrind)
-# Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+# Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
# University of Bristol. All rights reserved.
#
# This program is provided under a three-clause BSD license. For full
@@ -9,8 +9,9 @@
AUTOMAKE_OPTIONS = subdir-objects
ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4
-AM_CFLAGS = -std=c99
-AM_CPPFLAGS = -I$(top_srcdir)/src/ -Wall
+SUBDIRS = . tests
+
+AM_CPPFLAGS = -I$(top_srcdir)/src/ -Wall -fno-rtti
# Suppress warnings from OpenCL runtime API headers
if USING_CLANG
@@ -19,23 +20,35 @@ endif USING_CLANG
lib_LTLIBRARIES = liboclgrind.la liboclgrind-rt.la liboclgrind-rt-icd.la
-LLVM_LIBS = `$(llvm_config) --system-libs --libs bitreader bitwriter \
- core instrumentation ipo irreader linker mcparser objcarcopts option`
+
+LLVM_COMPONENTS = bitreader bitwriter core instrumentation ipo \
+irreader linker mcparser objcarcopts option target
+
+if LLVM_39_OR_NEWER
+LLVM_COMPONENTS += coverage
+endif LLVM_39_OR_NEWER
+
+if LLVM_40_OR_NEWER
+LLVM_COMPONENTS += coroutines
+endif LLVM_40_OR_NEWER
+
+LLVM_LIBS = `$(llvm_config) --system-libs --libs $(LLVM_COMPONENTS)`
liboclgrind_la_SOURCES = src/core/common.h src/core/common.cpp \
src/core/Context.h src/core/Context.cpp src/core/half.h \
- src/core/Kernel.h src/core/Kernel.cpp src/core/KernelInvocation.h \
- src/core/KernelInvocation.cpp src/core/Memory.h src/core/Memory.cpp \
- src/core/Plugin.h src/core/Plugin.cpp src/core/Program.h \
- src/core/Program.cpp src/core/Queue.h src/core/Queue.cpp \
- src/core/WorkItem.h src/core/WorkItem.cpp \
- src/core/WorkItemBuiltins.cpp src/core/WorkGroup.h \
- src/core/WorkGroup.cpp src/plugins/InstructionCounter.h \
- src/plugins/InstructionCounter.cpp src/plugins/InteractiveDebugger.h \
- src/plugins/InteractiveDebugger.cpp src/plugins/Logger.h \
- src/plugins/Logger.cpp src/plugins/MemCheck.h \
+ src/core/half.cpp src/core/Kernel.h src/core/Kernel.cpp \
+ src/core/KernelInvocation.h src/core/KernelInvocation.cpp \
+ src/core/Memory.h src/core/Memory.cpp src/core/Plugin.h \
+ src/core/Plugin.cpp src/core/Program.h src/core/Program.cpp \
+ src/core/Queue.h src/core/Queue.cpp src/core/WorkItem.h \
+ src/core/WorkItem.cpp src/core/WorkItemBuiltins.cpp \
+ src/core/WorkGroup.h src/core/WorkGroup.cpp \
+ src/plugins/InstructionCounter.h src/plugins/InstructionCounter.cpp \
+ src/plugins/InteractiveDebugger.h src/plugins/InteractiveDebugger.cpp \
+ src/plugins/Logger.h src/plugins/Logger.cpp src/plugins/MemCheck.h \
src/plugins/MemCheck.cpp src/plugins/RaceDetector.h \
- src/plugins/RaceDetector.cpp
+ src/plugins/RaceDetector.cpp src/plugins/Uninitialized.h \
+ src/plugins/Uninitialized.cpp
nodist_liboclgrind_la_SOURCES = src/core/clc_h.cpp config.h
liboclgrind_la_LDFLAGS = -lclangFrontend -lclangDriver \
-lclangSerialization -lclangCodeGen -lclangParse -lclangSema \
@@ -45,7 +58,7 @@ oclgrind_includedir = $(includedir)/oclgrind
oclgrind_include_HEADERS = src/core/common.h src/core/Context.h \
src/core/half.h src/core/Kernel.h src/core/KernelInvocation.h \
src/core/Memory.h src/core/Plugin.h src/core/Program.h \
- src/core/Queue.h src/core/WorkItem.h src/core/WorkGroup.h config.h LICENSE
+ src/core/Queue.h src/core/WorkItem.h src/core/WorkGroup.h
src/core/clc_h.cpp: src/core/gen_clc_h.sh src/core/clc.h
$(top_srcdir)/src/core/gen_clc_h.sh $(top_srcdir)/src/core/clc.h $@
@@ -71,16 +84,15 @@ liboclgrind_rt_icd_la_SOURCES = $(RUNTIME_SOURCES)
liboclgrind_rt_icd_la_LIBADD = liboclgrind.la
liboclgrind_rt_icd_la_LDFLAGS = -shared
-bin_PROGRAMS = oclgrind-kernel
+bin_PROGRAMS = oclgrind oclgrind-kernel
+
+oclgrind_SOURCES = src/runtime/oclgrind.cpp
+oclgrind_CPPFLAGS = -DLIBDIR_SUFFIX=""
+
oclgrind_kernel_SOURCES = src/kernel/oclgrind-kernel.cpp \
src/kernel/Simulation.h src/kernel/Simulation.cpp
oclgrind_kernel_LDADD = liboclgrind.la
-bin_SCRIPTS = oclgrind
-oclgrind: $(top_srcdir)/src/runtime/oclgrind
- cat $(top_srcdir)/src/runtime/oclgrind \
- | $(SED) 's|__VERSION__|'$(VERSION)'|g' \
- >$@
noinst_SCRIPTS = oclgrind.icd \
src/include/oclgrind/clc.h \
src/include/oclgrind/clc32.pch \
@@ -95,53 +107,27 @@ src/include/oclgrind/clc.h: $(top_srcdir)/src/core/clc.h
src/include/oclgrind/clc32.pch: src/include/oclgrind/clc.h
$(clang) \
- -cc1 -x cl -cl-std=CL1.2 -O0 -g -fno-builtin \
+ -cc1 -x cl -cl-std=CL1.2 -O0 -fno-builtin \
-emit-pch -triple spir-unknown-unknown \
-relocatable-pch \
- -isysroot $(abs_builddir)/src/include/oclgrind \
+ -isysroot $(abs_builddir)/src/include/oclgrind \
$< -o $@
src/include/oclgrind/clc64.pch: src/include/oclgrind/clc.h
$(clang) \
- -cc1 -x cl -cl-std=CL1.2 -O0 -g -fno-builtin \
+ -cc1 -x cl -cl-std=CL1.2 -O0 -fno-builtin \
-emit-pch -triple spir64-unknown-unknown \
-relocatable-pch \
- -isysroot $(abs_builddir)/src/include/oclgrind \
+ -isysroot $(abs_builddir)/src/include/oclgrind \
$< -o $@
-check_PROGRAMS = tests/apps/vecadd/vecadd
-tests_apps_vecadd_vecadd_LDADD = liboclgrind-rt.la
-TESTS = $(check_PROGRAMS)
-
-TEST_EXTENSIONS = .sim
-SIM_LOG_COMPILER = $(PYTHON) \
- $(top_srcdir)/tests/kernels/run_kernel_test.py \
- ${abs_top_builddir}/oclgrind-kernel
-AM_TESTS_ENVIRONMENT = \
- export AM_TESTS=1; \
- export OCLGRIND_PCH_DIR=$(abs_builddir)/src/include/oclgrind;
-
-if HAVE_PYTHON
-TESTS += $(KERNEL_TESTS)
-XFAIL_TESTS = \
- tests/kernels/atomics/atomic_intergroup_race.sim \
- tests/kernels/data-race/intragroup_hidden_race.sim
-else
-check-local:
- @echo
- @echo "WARNING: Kernel tests skipped (Python required)."
- @echo
-endif
-
-EXTRA_DIST = NEWS src/core/gen_clc_h.sh src/core/clc.h \
- src/runtime/oclgrind src/CL/cl.h src/CL/cl_gl.h src/CL/cl_platform.h \
- src/CL/cl_ext.h src/CL/cl_gl_ext.h src/CL/cl_egl.h src/CL/cl_d3d10.h \
+EXTRA_DIST = NEWS src/core/gen_clc_h.sh src/core/clc.h src/CL/cl.h \
+ src/CL/cl_gl.h src/CL/cl_platform.h src/CL/cl_ext.h \
+ src/CL/cl_gl_ext.h src/CL/cl_egl.h src/CL/cl_d3d10.h \
src/CL/cl_d3d11.h src/CL/cl_dx9_media_sharing.h src/CL/opencl.h \
- CMakeLists.txt tests/apps/CMakeLists.txt cmake_config.h.in \
+ CMakeLists.txt tests/apps/CMakeLists.txt tests/kernels/CMakeLists.txt \
+ tests/runtime/CMakeLists.txt cmake_config.h.in \
src/core/gen_clc_h.cmake src/runtime/icd.def src/runtime/runtime.def \
src/install/INSTALL.darwin src/install/INSTALL.linux \
src/install/INSTALL.windows src/install/install.bat \
- src/install/uninstall.bat src/install/oclgrind-icd.reg \
- tests/kernels/run_kernel_test.py tests/kernels/TESTS \
- $(KERNEL_TEST_INPUTS)
-CLEANFILES = src/core/clc_h.cpp $(bin_SCRIPTS) $(noinst_SCRIPTS) \
- $(KERNEL_TEST_OUTPUTS)
+ src/install/uninstall.bat src/install/oclgrind-icd.reg
+CLEANFILES = src/core/clc_h.cpp $(noinst_SCRIPTS)
diff --git a/NEWS b/NEWS
index 16766ab..c9c996b 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,23 @@
For more information, please visit the Oclgrind Wiki:
https://github.com/jrprice/Oclgrind/wiki
+
+Oclgrind 16.10
+==============
+This release incorporates the following changes:
+
+- Added plugin to detect use of uninitialized values (from Moritz Pflanzer)
+- Added memoryMap and memoryUnmap plugin callbacks
+- Added support for LLVM 3.7, 3.8, and 3.9
+- Added oclgrind.exe command on Windows
+- Report invalid uses of mapped buffers inside kernels
+- Report invalid indices when accessing statically sized arrays
+- Improved coverage of race detection plugin
+- Fixed memcheck false-positive when writing to a write-only vector array
+- Oclgrind will now appear with device type (CPU | GPU | ACCELERATOR | DEFAULT)
+- Various minor bug fixes
+
+
Oclgrind 15.5
=============
This release updates to LLVM 3.6, which improves the OpenCL C compiler
diff --git a/README b/README
deleted file mode 100644
index 6a65f57..0000000
--- a/README
+++ /dev/null
@@ -1,138 +0,0 @@
-========
-Oclgrind
-========
-
-About
------
-This project implements a virtual OpenCL device simulator, including
-an OpenCL runtime with ICD support. The goal is to provide a platform
-for creating tools to aid OpenCL development. In particular, this
-project currently implements utilities for debugging memory access
-errors, detecting data-races and barrier divergence, collecting
-instruction histograms, and for interactive OpenCL kernel debugging.
-The simulator is built on an interpreter for LLVM IR. This project is
-being developed by James Price and Simon McIntosh-Smith at the
-University of Bristol.
-
-Binary releases can be found on the GitHub releases page:
-
- https://github.com/jrprice/Oclgrind/releases
-
-
-Building
---------
-To build this project, you will require the LLVM and Clang 3.6
-development libraries and headers. With some modifications, it may
-also be possible to use other (recent) versions of LLVM. If building
-LLVM from source, it is recommended to enable optimizations to improve
-the performance of Oclgrind (configure with --enable-optimized, or set
-CMAKE_BUILD_TYPE to RelWithDebInfo).
-
-You will also need to use a compiler that supports C++11.
-
-
-Building on Linux and OS X
---------------------------
-If you are building directly from the GitHub repository, you need to
-run 'autoreconf -i' to generate the necessary build files. This is not
-required if you are using a released source package.
-
-Run ./configure to generate the Makefile, optionally using
---prefix=PATH to specify the target installation directory. If you
-don't have the LLVM/Clang includes and libraries on your search path,
-you can specify the location of your LLVM installation using the
---with-llvm=PATH option. For example:
-
-./configure --prefix=$PWD/build/ --with-llvm=PATH/TO/LLVM/INSTALL
-
-This path should be the directory in which LLVM is installed (e.g. the
-path specified to --prefix or CMAKE_INSTALL_PATH when LLVM was built).
-
-Next, build and install with make:
-
-make
-make check
-make install
-
-If installing to a non-default location, you should add the bin/
-directory to the PATH environment variable in order to make use of the
-oclgrind command. If you wish to use Oclgrind via the OpenCL ICD
-(optional), then you should create an ICD loading point by copying the
-oclgrind.icd file from the build directory to /etc/OpenCL/vendors/.
-
-Building on Windows
--------------------
-A CMake build system is provided for building Oclgrind on Windows. At
-present, this only works with Visual Studio 2013 (or newer), and
-Windows 7.
-
-When configuring the CMake build, you may be prompted to supply a
-value for the LLVM_DIR parameter. This should be set to the directory
-containing your LLVM installations's LLVMConfig.cmake file, (for
-example C:\Program Files\LLVM\share\llvm\cmake\).
-
-If you wish to use Oclgrind via the OpenCL ICD (optional), then you
-should also create an ICD loading point. To do this, you should add a
-REG_DWORD value to the Windows Registry under one or both of the
-registry keys below, with the name set to the absolute path of the
-oclgrind-rt-icd.dll library and the value set to 0.
-
-Key for 32-bit machines or 64-bit apps on a 64-bit machine:
-HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
-
-Key for 32-bit apps on a 64-bit machine:
-HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos\OpenCL\Vendors
-
-
-Usage
------
-The recommended method of running an application with Oclgrind is to
-use the oclgrind command, for example:
-
-oclgrind ./application
-
-This command will make it such the only OpenCL platform and device
-available to your application is Oclgrind. If you need more control
-over platform selection then installing an ICD loading point for
-Oclgrind will cause it to appear when an application calls
-clGetPlatformIDs(), alongside any other OpenCL platforms installed on
-your system.
-
-If it encounters any invalid memory accesses, Oclgrind will
-report the details to stderr, for example:
-
-> Invalid write of size 4 at global memory address 0x1000000000040
-> Kernel: vecadd
-> Entity: Global(16,0,0) Local(0,0,0) Group(16,0,0)
-> store i32 %tmp9, i32 addrspace(1)* %tmp15, align 4
-> At line 4 of input.cl
-> c[i] = a[i] + b[i]
-
-Since it is interpreting an abstract intermediate representation and
-bounds-checking each memory access, Oclgrind will run quite slowly
-(typically a couple of orders of magnitude slower than a regular CPU
-implementation). Therefore, it is recommended to run your application
-with a small problem if possible.
-
-To enable an interactive, GDB-style debugging session, supply the -i
-flag to the oclgrind command, or export the environment variable
-OCLGRIND_INTERACTIVE=1. This will cause Oclgrind to automatically
-break at the beginning of each kernel invocation, and upon
-encountering an invalid memory access. Type 'help' for details of
-available commands.
-
-For more detailed information about using Oclgrind please visit the
-GitHub Wiki:
-
- https://github.com/jrprice/Oclgrind/wiki/
-
-
-Contact
--------
-If you encounter any issues or have any questions, please use the
-GitHub issues page:
-
- https://github.com/jrprice/Oclgrind/issues
-
-You can also contact the primary developer via email:
-James Price <j.price at bristol.ac.uk>
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..986c1bd
--- /dev/null
+++ b/README.md
@@ -0,0 +1,196 @@
+========
+Oclgrind
+========
+
+About
+-----
+This project implements a virtual OpenCL device simulator, including
+an OpenCL runtime with ICD support. The goal is to provide a platform
+for creating tools to aid OpenCL development. In particular, this
+project currently implements utilities for debugging memory access
+errors, detecting data-races and barrier divergence, collecting
+instruction histograms, and for interactive OpenCL kernel debugging.
+The simulator is built on an interpreter for LLVM IR. This project is
+being developed by James Price and Simon McIntosh-Smith at the
+University of Bristol.
+
+Binary releases can be found on the GitHub releases page:
+
+ https://github.com/jrprice/Oclgrind/releases
+
+
+Build dependencies
+------------------
+To build this project, you will need LLVM and Clang 3.6 (or newer)
+development libraries and headers. If you build LLVM from source, it
+is recommended to enable optimizations to significantly improve the
+performance of Oclgrind (set `CMAKE_BUILD_TYPE` to `Release` or
+`RelWithDebInfo`).
+
+You will need to use a compiler that supports C++11. Python should
+also be available in order to run the test suite.
+
+
+Building on Linux and OS X (CMake)
+----------------------------------
+The recommended method of building Oclgrind is via CMake.
+
+When configuring the CMake build, you may be prompted to supply a
+value for the `LLVM_DIR` parameter (this shouldn't be necessary if
+LLVM is installed in a standard system location). This should be set
+to the directory containing your LLVM installation's
+`LLVMConfig.cmake` file (typically either
+`${LLVM_ROOT}/lib/cmake/llvm` or `${LLVM_ROOT}/share/llvm/cmake/`).
+If Clang is installed separately to LLVM, then you may also be
+prompted to supply a path for the `CLANG_ROOT` parameter, which should
+be the root of your Clang installation (containing the `bin/`, `lib/`
+and `include/` directories).
+
+A typical CMake command-line might look like this:
+
+ cmake ${OCLGRIND_SOURCE} \
+ -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+ -DCMAKE_INSTALL_PREFIX=${INSTALL_ROOT} \
+ -DLLVM_DIR=${LLVM_ROOT}/lib/cmake/llvm
+
+where `${OCLGRIND_SOURCE}` is the path to the root directory
+containing the Oclgrind source code, `${LLVM_ROOT}` is the path to the
+LLVM installation, and `${INSTALL_ROOT}` is the desired installation
+root directory (this can be omitted if installing to system
+directories).
+
+Next, build and install with make:
+
+ make
+ make test
+ make install
+
+If installing to a non-system location, you should add the `bin/`
+directory to the `PATH` environment variable in order to make use of
+the `oclgrind` command. If you wish to use Oclgrind via the OpenCL ICD
+loader (optional), then you should create an ICD loading point by
+copying the `oclgrind.icd` file from the build directory to
+`/etc/OpenCL/vendors/`.
+
+
+Building on Linux and OS X (autotools)
+--------------------------------------
+An autotools build system is also provided as an alternative to
+CMake. This will likely be removed in a future version of Oclgrind.
+
+If you are building directly from the GitHub repository, you will need
+to run `autoreconf -i` to generate the necessary build files. This is
+not required if you are using a released source package.
+
+Run `configure` to generate the Makefile, optionally using
+`--prefix=PATH` to specify the target installation directory. If you
+don't have the LLVM includes and libraries on your search path, you
+can specify the location of your LLVM installation using the
+`--with-llvm=PATH` option. For example:
+
+ ./configure --prefix=$PWD/build/ --with-llvm=PATH/TO/LLVM/INSTALL
+
+This path should be the directory in which LLVM is installed (e.g. the
+path specified to `--prefix` or `CMAKE_INSTALL_PREFIX` when LLVM was
+built). If the Clang includes and libraries are not on your search
+path or in the same location as LLVM, you can use the
+`--with-clang=PATH` option to specify its root directory.
+
+Next, build and install with make:
+
+ make
+ make check
+ make install
+
+If installing to a non-default location, you should add the `bin/`
+directory to the `PATH` environment variable in order to make use of
+the `oclgrind` command. If you wish to use Oclgrind via the OpenCL ICD
+loader (optional), then you should create an ICD loading point by
+copying the `oclgrind.icd` file from the build directory to
+`/etc/OpenCL/vendors/`.
+
+
+Building on Windows
+-------------------
+Building Oclgrind on Windows requires Visual Studio 2013 (or newer),
+and Windows 7 (or newer). Compiling against recent versions of LLVM
+may require Visual Studio 2015.
+
+When configuring the CMake build, you may be prompted to supply a
+value for the `LLVM_DIR` parameter. This should be set to the
+directory containing your LLVM installation's `LLVMConfig.cmake` file
+(for example `C:\Program Files\LLVM\lib\cmake\llvm`). If Clang is
+installed separately to LLVM, then you may also be prompted to supply
+a path in the `CLANG_ROOT` parameter, which should be the root of your
+Clang installation (containing the `bin/`, `lib/` and `include/`
+directories).
+
+You should add the `bin` directory of the Oclgrind installation to the
+`PATH` environment variable in order to make use of the `oclgrind`
+command. If you wish to use Oclgrind via the OpenCL ICD loader
+(optional), then you should also create an ICD loading point. To do
+this, you should add a `REG_DWORD` value to the Windows Registry under
+one or both of the registry keys below, with the name set to the
+absolute path of the `oclgrind-rt-icd.dll` library and the value set
+to 0.
+
+Key for 32-bit machines or 64-bit apps on a 64-bit machine:
+`HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors`
+
+Key for 32-bit apps on a 64-bit machine:
+`HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Khronos\OpenCL\Vendors`
+
+
+Usage
+-----
+The recommended method of running an application with Oclgrind is to
+use the `oclgrind` command, for example:
+
+ oclgrind ./application
+
+This command will make it such the only OpenCL platform and device
+available to your application is Oclgrind. If you need more control
+over platform selection then installing an ICD loading point for
+Oclgrind will cause it to appear when an application calls
+`clGetPlatformIDs()`, alongside any other OpenCL platforms installed
+on your system.
+
+If it encounters any invalid memory accesses, Oclgrind will
+report the details to stderr, for example:
+
+ Invalid write of size 4 at global memory address 0x1000000000040
+ Kernel: vecadd
+ Entity: Global(16,0,0) Local(0,0,0) Group(16,0,0)
+ store i32 %tmp9, i32 addrspace(1)* %tmp15, align 4
+ At line 4 of input.cl
+ c[i] = a[i] + b[i]
+
+Since it is interpreting an abstract intermediate representation and
+bounds-checking each memory access, Oclgrind will run quite slowly
+(typically a couple of orders of magnitude slower than a regular CPU
+implementation). Therefore, it is recommended to run your application
+with a small problem if possible.
+
+To enable an interactive, GDB-style debugging session, supply the `-i`
+flag to the oclgrind command, or export the environment variable
+`OCLGRIND_INTERACTIVE=1`. This will cause Oclgrind to automatically
+break at the beginning of each kernel invocation, and upon
+encountering an invalid memory access. Type `help` for details of
+available commands.
+
+For more detailed information about using Oclgrind please visit the
+GitHub Wiki:
+
+ https://github.com/jrprice/Oclgrind/wiki/
+
+
+Contact
+-------
+If you encounter any issues or have any questions, please use the
+GitHub issues page:
+
+ https://github.com/jrprice/Oclgrind/issues
+
+You can also contact the primary developer via email:
+
+ James Price `<j.price at bristol.ac.uk>`
diff --git a/configure.ac b/configure.ac
index 4b4c793..3da5e46 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,12 +1,13 @@
# configure.ac (Oclgrind)
-# Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+# Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
# University of Bristol. All rights reserved.
#
# This program is provided under a three-clause BSD license. For full
# license terms please see the LICENSE file distributed with this
# source code.
-AC_INIT([Oclgrind], [15.5], , [oclgrind], [https://github.com/jrprice/Oclgrind])
+AC_INIT([Oclgrind], [16.10], ,
+ [oclgrind], [https://github.com/jrprice/Oclgrind])
AC_PREREQ([2.63])
AC_CONFIG_SRCDIR([src/])
AM_INIT_AUTOMAKE([foreign 1.12])
@@ -14,7 +15,7 @@ AC_LANG(C++)
AC_PROG_CXX
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_HEADERS([config.h])
-AC_CONFIG_FILES([Makefile])
+AC_CONFIG_FILES([Makefile tests/Makefile])
LT_INIT
@@ -28,18 +29,16 @@ oclgrind_extra_libs=
AX_CHECK_COMPILE_FLAG([-std=c++11], [],
[AC_MSG_ERROR([C++11 support is required])])
CXXFLAGS="$CXXFLAGS -std=c++11"
-CPPFLAGS="$CPPFLAGS -std=c++11"
-# --with-llvm option to specify root of LLVM/Clang installation
+
+# --with-llvm option to specify root of LLVM installation
AC_ARG_WITH(
llvm,
[AS_HELP_STRING([--with-llvm],
- [directory containing LLVM/Clang installation])],
- [AC_SUBST(clang, $withval/bin/clang)
- AC_SUBST(llvm_config, $withval/bin/llvm-config)])
+ [directory containing LLVM installation])],
+ [export PATH="$withval/bin:$PATH"])
-# Find LLVM/Clang binaries (assume on PATH if --with-llvm not used)
-AC_CHECK_PROG(clang, [clang], `which clang`)
+# Get path to llvm-config
AC_CHECK_PROG(llvm_config, [llvm-config], `which llvm-config`)
if test -z $llvm_config; then
AC_MSG_ERROR([llvm-config not found (use --with-llvm=)])
@@ -53,6 +52,8 @@ AC_MSG_RESULT($llvm_full_version)
if test $llvm_version -lt 36; then
AC_MSG_ERROR([LLVM version must be >= 3.6])
fi
+AM_CONDITIONAL([LLVM_39_OR_NEWER], [test $llvm_version -ge 39])
+AM_CONDITIONAL([LLVM_40_OR_NEWER], [test $llvm_version -ge 40])
AC_DEFINE_UNQUOTED([LLVM_VERSION],
[$llvm_version],
[Version of LLVM we are building against])
@@ -61,17 +62,33 @@ AC_DEFINE_UNQUOTED([LLVM_VERSION],
CPPFLAGS="$CPPFLAGS `$llvm_config --cppflags`"
LDFLAGS="$LDFLAGS `$llvm_config --ldflags`"
-# Check for LLVM/Clang headers/libraries
+
+# --with-clang option to specify root of Clang installation
+AC_ARG_WITH(
+ clang,
+ [AS_HELP_STRING([--with-clang],
+ [directory containing Clang installation])],
+ [export PATH="$withval/bin:$PATH"
+ CPPFLAGS="$CPPFLAGS -I$withval/include/"
+ LDFLAGS="$LDFLAGS -L$withval/lib/"])
+
+CPPFLAGS_old="$CPPFLAGS"
+CPPFLAGS="$CPPFLAGS -std=c++11"
+
+# Check for Clang binaries, headers and libraries
+AC_CHECK_PROG(clang, [clang], `which clang`)
AC_CHECK_HEADERS(
- [llvm/IR/Instruction.h clang/CodeGen/CodeGenAction.h],
+ [clang/CodeGen/CodeGenAction.h],
[:],
- [AC_MSG_ERROR([LLVM/Clang includes not found (use --with-llvm=)])])
-
+ [AC_MSG_ERROR([Clang headers not found (use --with-clang=)])])
AC_CHECK_LIB(
[clangFrontend],
[main],
[:],
- [AC_MSG_ERROR([Clang library not found (use --with-llvm)])])
+ [AC_MSG_ERROR([Clang libraries not found (use --with-clang)])])
+
+CPPFLAGS="$CPPFLAGS_old"
+
# GNU readline library (for interactive debugger)
AC_ARG_WITH(
@@ -118,17 +135,14 @@ AM_CONDITIONAL([HAVE_PYTHON], [test "$PYTHON" != :])
# Kernel tests
KERNEL_TESTS=""
KERNEL_TEST_INPUTS=""
-KERNEL_TEST_OUTPUTS=""
m4_foreach([name], m4_split(m4_include(tests/kernels/TESTS), m4_newline),
[
- KERNEL_TESTS="$KERNEL_TESTS tests/kernels/"name".sim"
- KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS tests/kernels/"name".sim"
- KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS tests/kernels/"name".cl"
- KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS tests/kernels/"name".ref"
- KERNEL_TEST_OUTPUTS="$KERNEL_TEST_OUTPUTS tests/kernels/"name".out"
+ KERNEL_TESTS="$KERNEL_TESTS kernels/"name".sim"
+ KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS kernels/"name".sim"
+ KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS kernels/"name".cl"
+ KERNEL_TEST_INPUTS="$KERNEL_TEST_INPUTS kernels/"name".ref"
])
AC_SUBST(KERNEL_TESTS, $KERNEL_TESTS)
AC_SUBST(KERNEL_TEST_INPUTS, $KERNEL_TEST_INPUTS)
-AC_SUBST(KERNEL_TEST_OUTPUTS, $KERNEL_TEST_OUTPUTS)
AC_OUTPUT
diff --git a/src/core/Context.cpp b/src/core/Context.cpp
index 6a8b4ff..e6fc415 100644
--- a/src/core/Context.cpp
+++ b/src/core/Context.cpp
@@ -1,11 +1,12 @@
// Context.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
+#include "config.h"
#include "common.h"
#if defined(_WIN32) && !defined(__MINGW32__)
@@ -17,6 +18,7 @@
#include <mutex>
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Instruction.h"
@@ -33,13 +35,17 @@
#include "plugins/Logger.h"
#include "plugins/MemCheck.h"
#include "plugins/RaceDetector.h"
+#include "plugins/Uninitialized.h"
using namespace oclgrind;
using namespace std;
Context::Context()
{
- m_globalMemory = new Memory(AddrSpaceGlobal, this);
+ m_llvmContext = new llvm::LLVMContext;
+
+ m_globalMemory = new Memory(AddrSpaceGlobal, sizeof(size_t)==8 ? 16 : 8,
+ this);
m_kernelInvocation = NULL;
loadPlugins();
@@ -47,6 +53,7 @@ Context::Context()
Context::~Context()
{
+ delete m_llvmContext;
delete m_globalMemory;
unloadPlugins();
@@ -67,6 +74,11 @@ Memory* Context::getGlobalMemory() const
return m_globalMemory;
}
+llvm::LLVMContext* Context::getLLVMContext() const
+{
+ return m_llvmContext;
+}
+
void Context::loadPlugins()
{
// Create core plugins
@@ -79,6 +91,9 @@ void Context::loadPlugins()
if (checkEnv("OCLGRIND_DATA_RACES"))
m_plugins.push_back(make_pair(new RaceDetector(this), true));
+ if (checkEnv("OCLGRIND_UNINITIALIZED"))
+ m_plugins.push_back(make_pair(new Uninitialized(this), true));
+
if (checkEnv("OCLGRIND_INTERACTIVE"))
m_plugins.push_back(make_pair(new InteractiveDebugger(this), true));
@@ -92,7 +107,7 @@ void Context::loadPlugins()
while(std::getline(ss, libpath, ':'))
{
#if defined(_WIN32) && !defined(__MINGW32__)
- HMODULE library = LoadLibrary(libpath.c_str());
+ HMODULE library = LoadLibraryA(libpath.c_str());
if (!library)
{
cerr << "Loading Oclgrind plugin failed (LoadLibrary): "
@@ -221,9 +236,10 @@ void Context::notifyKernelEnd(const KernelInvocation *kernelInvocation) const
}
void Context::notifyMemoryAllocated(const Memory *memory, size_t address,
- size_t size, cl_mem_flags flags) const
+ size_t size, cl_mem_flags flags,
+ const uint8_t *initData) const
{
- NOTIFY(memoryAllocated, memory, address, size, flags);
+ NOTIFY(memoryAllocated, memory, address, size, flags, initData);
}
void Context::notifyMemoryAtomicLoad(const Memory *memory, AtomicOp op,
@@ -274,6 +290,13 @@ void Context::notifyMemoryLoad(const Memory *memory, size_t address,
}
}
+void Context::notifyMemoryMap(const Memory *memory, size_t address,
+ size_t offset, size_t size,
+ cl_mem_flags flags) const
+{
+ NOTIFY(memoryMap, memory, address, offset, size, flags);
+}
+
void Context::notifyMemoryStore(const Memory *memory, size_t address,
size_t size, const uint8_t *storeData) const
{
@@ -301,6 +324,12 @@ void Context::notifyMessage(MessageType type, const char *message) const
NOTIFY(log, type, message);
}
+void Context::notifyMemoryUnmap(const Memory *memory, size_t address,
+ const void *ptr) const
+{
+ NOTIFY(memoryUnmap, memory, address, ptr);
+}
+
void Context::notifyWorkGroupBarrier(const WorkGroup *workGroup,
uint32_t flags) const
{
diff --git a/src/core/Context.h b/src/core/Context.h
index 41be6c7..131caab 100644
--- a/src/core/Context.h
+++ b/src/core/Context.h
@@ -1,5 +1,5 @@
// Context.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -8,6 +8,11 @@
#include "common.h"
+namespace llvm
+{
+ class LLVMContext;
+}
+
namespace oclgrind
{
class KernelInvocation;
@@ -26,6 +31,7 @@ namespace oclgrind
virtual ~Context();
Memory* getGlobalMemory() const;
+ llvm::LLVMContext* getLLVMContext() const;
bool isThreadSafe() const;
void logError(const char* error) const;
@@ -36,7 +42,8 @@ namespace oclgrind
void notifyKernelBegin(const KernelInvocation *kernelInvocation) const;
void notifyKernelEnd(const KernelInvocation *kernelInvocation) const;
void notifyMemoryAllocated(const Memory *memory, size_t address,
- size_t size, cl_mem_flags flags) const;
+ size_t size, cl_mem_flags flags,
+ const uint8_t *initData) const;
void notifyMemoryAtomicLoad(const Memory *memory, AtomicOp op,
size_t address, size_t size) const;
void notifyMemoryAtomicStore(const Memory *memory, AtomicOp op,
@@ -44,9 +51,13 @@ namespace oclgrind
void notifyMemoryDeallocated(const Memory *memory, size_t address) const;
void notifyMemoryLoad(const Memory *memory, size_t address,
size_t size) const;
+ void notifyMemoryMap(const Memory *memory, size_t address,
+ size_t offset, size_t size, cl_map_flags flags) const;
void notifyMemoryStore(const Memory *memory, size_t address, size_t size,
const uint8_t *storeData) const;
void notifyMessage(MessageType type, const char *message) const;
+ void notifyMemoryUnmap(const Memory *memory, size_t address,
+ const void *ptr) const;
void notifyWorkGroupBarrier(const WorkGroup *workGroup,
uint32_t flags) const;
void notifyWorkGroupBegin(const WorkGroup *workGroup) const;
@@ -68,6 +79,8 @@ namespace oclgrind
void loadPlugins();
void unloadPlugins();
+ llvm::LLVMContext *m_llvmContext;
+
public:
class Message
{
diff --git a/src/core/Kernel.cpp b/src/core/Kernel.cpp
index ab2741e..2ca8393 100644
--- a/src/core/Kernel.cpp
+++ b/src/core/Kernel.cpp
@@ -1,12 +1,14 @@
// Kernel.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
+#include "config.h"
#include "common.h"
+
#include <sstream>
#include "llvm/IR/Constants.h"
@@ -25,9 +27,6 @@ Kernel::Kernel(const Program *program,
const llvm::Function *function, const llvm::Module *module)
: m_program(program), m_function(function), m_name(function->getName())
{
- m_localMemory = new Memory(AddrSpaceLocal, program->getContext());
- m_privateMemory = new Memory(AddrSpacePrivate, program->getContext());
-
// Set-up global variables
llvm::Module::const_global_iterator itr;
for (itr = module->global_begin(); itr != module->global_end(); itr++)
@@ -37,41 +36,25 @@ Kernel::Kernel(const Program *program,
{
case AddrSpacePrivate:
{
+ // Get initializer data
const llvm::Constant *init = itr->getInitializer();
-
- // Allocate private memory for variable
unsigned size = getTypeSize(init->getType());
- size_t address = m_privateMemory->allocateBuffer(size);
-
- // Initialize variable
- void *ptr = m_privateMemory->getPointer(address);
- getConstantData((unsigned char*)ptr, init);
-
- TypedValue value =
- {
- sizeof(size_t),
- 1,
- new unsigned char[sizeof(size_t)]
- };
- value.setPointer(address);
- m_arguments[itr] = value;
+ TypedValue value = {size, 1, new uint8_t[size]};
+ getConstantData(value.data, init);
+ m_values[&*itr] = value;
break;
}
case AddrSpaceConstant:
- m_constants.push_back(itr);
+ m_constants.push_back(&*itr);
break;
case AddrSpaceLocal:
{
- // Allocate buffer
- unsigned size = getTypeSize(itr->getInitializer()->getType());
- TypedValue v = {
- sizeof(size_t),
- 1,
- new unsigned char[sizeof(size_t)]
+ // Get size of allocation
+ TypedValue allocSize = {
+ getTypeSize(itr->getInitializer()->getType()), 1, NULL
};
- v.setPointer(m_localMemory->allocateBuffer(size));
- m_arguments[itr] = v;
+ m_values[&*itr] = allocSize;
break;
}
@@ -111,26 +94,19 @@ Kernel::Kernel(const Kernel& kernel)
m_function = kernel.m_function;
m_constants = kernel.m_constants;
m_constantBuffers = kernel.m_constantBuffers;
- m_localMemory = kernel.m_localMemory->clone();
- m_privateMemory = kernel.m_privateMemory->clone();
m_name = kernel.m_name;
m_metadata = kernel.m_metadata;
- TypedValueMap::const_iterator itr;
- for (itr = kernel.m_arguments.begin();
- itr != kernel.m_arguments.end(); itr++)
+ for (auto itr = kernel.m_values.begin(); itr != kernel.m_values.end(); itr++)
{
- m_arguments[itr->first] = itr->second.clone();
+ m_values[itr->first] = itr->second.clone();
}
}
Kernel::~Kernel()
{
- delete m_localMemory;
- delete m_privateMemory;
-
TypedValueMap::iterator itr;
- for (itr = m_arguments.begin(); itr != m_arguments.end(); itr++)
+ for (itr = m_values.begin(); itr != m_values.end(); itr++)
{
delete[] itr->second.data;
}
@@ -141,7 +117,7 @@ bool Kernel::allArgumentsSet() const
llvm::Function::const_arg_iterator itr;
for (itr = m_function->arg_begin(); itr != m_function->arg_end(); itr++)
{
- if (!m_arguments.count(itr))
+ if (!m_values.count(&*itr))
{
return false;
}
@@ -157,22 +133,29 @@ void Kernel::allocateConstants(Memory *memory)
const llvm::Constant *initializer = (*itr)->getInitializer();
const llvm::Type *type = initializer->getType();
- // Allocate buffer
+ // Deallocate existing pointer
+ if (m_values.count(*itr))
+ {
+ delete[] m_values[*itr].data;
+ }
+
+ // Get initializer data
unsigned size = getTypeSize(type);
- TypedValue v = {
+ unsigned char *data = new unsigned char[size];
+ getConstantData(data, (const llvm::Constant*)initializer);
+
+ // Allocate buffer
+ TypedValue address = {
sizeof(size_t),
1,
new unsigned char[sizeof(size_t)]
};
- size_t address = memory->allocateBuffer(size);
- v.setPointer(address);
- m_constantBuffers.push_back(address);
- m_arguments[*itr] = v;
+ size_t ptr = memory->allocateBuffer(size, 0, data);
+ address.setPointer(ptr);
+
+ m_values[*itr] = address;
+ m_constantBuffers.push_back(ptr);
- // Initialise buffer contents
- unsigned char *data = new unsigned char[size];
- getConstantData(data, (const llvm::Constant*)initializer);
- memory->store(data, address, size);
delete[] data;
}
}
@@ -196,23 +179,23 @@ const llvm::Argument* Kernel::getArgument(unsigned int index) const
{
argItr++;
}
- return argItr;
+ return &*argItr;
}
unsigned int Kernel::getArgumentAccessQualifier(unsigned int index) const
{
assert(index < getNumArguments());
- // Get metadata node
- const llvm::MDNode *node = getArgumentMetadata("kernel_arg_access_qual");
- if (!node)
+ // Get metadata
+ const llvm::Metadata *md =
+ getArgumentMetadata("kernel_arg_access_qual", index);
+ if (!md)
{
return -1;
}
// Get qualifier string
- llvm::MDString *str
- = llvm::dyn_cast<llvm::MDString>(node->getOperand(index+1));
+ const llvm::MDString *str = llvm::dyn_cast<llvm::MDString>(md);
string access = str->getString();
if (access == "read_only")
{
@@ -233,15 +216,15 @@ unsigned int Kernel::getArgumentAddressQualifier(unsigned int index) const
{
assert(index < getNumArguments());
- // Get metadata node
- const llvm::MDNode *node = getArgumentMetadata("kernel_arg_addr_space");
- if (!node)
+ // Get metadata
+ const llvm::Metadata *md =
+ getArgumentMetadata("kernel_arg_addr_space", index);
+ if (!md)
{
return -1;
}
- // Get address space
- switch(getMDOpAsConstInt(node->getOperand(index+1))->getZExtValue())
+ switch(getMDAsConstInt(md)->getZExtValue())
{
case AddrSpacePrivate:
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
@@ -256,8 +239,10 @@ unsigned int Kernel::getArgumentAddressQualifier(unsigned int index) const
}
}
-const llvm::MDNode* Kernel::getArgumentMetadata(string name) const
+const llvm::Metadata* Kernel::getArgumentMetadata(string name,
+ unsigned int index) const
{
+#if LLVM_VERSION < 39
if (!m_metadata)
{
return NULL;
@@ -273,11 +258,17 @@ const llvm::MDNode* Kernel::getArgumentMetadata(string name) const
if (node->getNumOperands() > 0 &&
((llvm::MDString*)(node->getOperand(0).get()))->getString() == name)
{
- return node;
+ return node->getOperand(index+1).get();
}
}
}
return NULL;
+#else
+ llvm::MDNode *node = m_function->getMetadata(name);
+ if (!node)
+ return NULL;
+ return node->getOperand(index);
+#endif
}
const llvm::StringRef Kernel::getArgumentName(unsigned int index) const
@@ -289,30 +280,37 @@ const llvm::StringRef Kernel::getArgumentTypeName(unsigned int index) const
{
assert(index < getNumArguments());
- // Get metadata node
- const llvm::MDNode *node = getArgumentMetadata("kernel_arg_type");
- if (!node)
+ // Get metadata
+ const llvm::Metadata *md = getArgumentMetadata("kernel_arg_type", index);
+ if (!md)
{
return "";
}
- return llvm::dyn_cast<llvm::MDString>(node->getOperand(index+1))->getString();
+ llvm::StringRef name = llvm::dyn_cast<llvm::MDString>(md)->getString();
+#if LLVM_VERSION >= 39
+ size_t imgStart = name.find(" image");
+ if (imgStart != llvm::StringRef::npos)
+ {
+ name = name.substr(imgStart+1);
+ }
+#endif
+ return name;
}
unsigned int Kernel::getArgumentTypeQualifier(unsigned int index) const
{
assert(index < getNumArguments());
- // Get metadata node
- const llvm::MDNode *node = getArgumentMetadata("kernel_arg_type_qual");
- if (!node)
+ // Get metadata
+ const llvm::Metadata *md = getArgumentMetadata("kernel_arg_type_qual", index);
+ if (!md)
{
return -1;
}
// Get qualifiers
- llvm::MDString *str =
- llvm::dyn_cast<llvm::MDString>(node->getOperand(index+1));
+ const llvm::MDString *str = llvm::dyn_cast<llvm::MDString>(md);
istringstream iss(str->getString().str());
unsigned int result = CL_KERNEL_ARG_TYPE_NONE;
@@ -368,12 +366,9 @@ string Kernel::getAttributes() const
name == "work_group_size_hint")
{
attributes << name << "("
- <<
- getMDOpAsConstInt(val->getOperand(1))->getZExtValue()
- << "," <<
- getMDOpAsConstInt(val->getOperand(2))->getZExtValue()
- << "," <<
- getMDOpAsConstInt(val->getOperand(3))->getZExtValue()
+ << getMDAsConstInt(val->getOperand(1))->getZExtValue()
+ << "," << getMDAsConstInt(val->getOperand(2))->getZExtValue()
+ << "," << getMDAsConstInt(val->getOperand(3))->getZExtValue()
<< ") ";
}
else if (name == "vec_type_hint")
@@ -406,14 +401,18 @@ const llvm::Function* Kernel::getFunction() const
return m_function;
}
-const Memory* Kernel::getLocalMemory() const
-{
- return m_localMemory;
-}
-
size_t Kernel::getLocalMemorySize() const
{
- return m_localMemory->getTotalAllocated();
+ size_t sz = 0;
+ for (auto value = m_values.begin(); value != m_values.end(); value++)
+ {
+ const llvm::Type *type = value->first->getType();
+ if (type->isPointerTy() && type->getPointerAddressSpace() == AddrSpaceLocal)
+ {
+ sz += value->second.size;
+ }
+ }
+ return sz;
}
const std::string& Kernel::getName() const
@@ -426,11 +425,6 @@ unsigned int Kernel::getNumArguments() const
return m_function->arg_size();
}
-const Memory* Kernel::getPrivateMemory() const
-{
- return m_privateMemory;
-}
-
const Program* Kernel::getProgram() const
{
return m_program;
@@ -439,22 +433,11 @@ const Program* Kernel::getProgram() const
void Kernel::getRequiredWorkGroupSize(size_t reqdWorkGroupSize[3]) const
{
memset(reqdWorkGroupSize, 0, 3*sizeof(size_t));
- for (unsigned i = 0; i < m_metadata->getNumOperands(); i++)
+ for (int j = 0; j < 3; j++)
{
- const llvm::MDOperand& op = m_metadata->getOperand(i);
- if (llvm::MDNode *val = llvm::dyn_cast<llvm::MDNode>(op.get()))
- {
- llvm::MDString *str =
- llvm::dyn_cast<llvm::MDString>(val->getOperand(0).get());
- if (str->getString() == "reqd_work_group_size")
- {
- for (int j = 0; j < 3; j++)
- {
- reqdWorkGroupSize[j] =
- getMDOpAsConstInt(val->getOperand(j+1))->getZExtValue();
- }
- }
- }
+ const llvm::Metadata *md = getArgumentMetadata("reqd_work_group_size", j);
+ if (md)
+ reqdWorkGroupSize[j] = getMDAsConstInt(md)->getZExtValue();
}
}
@@ -463,72 +446,22 @@ void Kernel::setArgument(unsigned int index, TypedValue value)
assert(index < m_function->arg_size());
const llvm::Value *argument = getArgument(index);
- unsigned int type = getArgumentAddressQualifier(index);
- if (type == CL_KERNEL_ARG_ADDRESS_LOCAL)
- {
- // Deallocate existing argument
- if (m_arguments.count(argument))
- {
- m_localMemory->deallocateBuffer(m_arguments[argument].getPointer());
- delete[] m_arguments[argument].data;
- }
- // Allocate local memory buffer
- TypedValue v = {
- sizeof(size_t),
- 1,
- new unsigned char[sizeof(size_t)]
- };
- v.setPointer(m_localMemory->allocateBuffer(value.size));
- m_arguments[argument] = v;
- }
- else
+ // Deallocate existing argument
+ if (m_values.count(argument))
{
- if (((const llvm::Argument*)argument)->hasByValAttr())
- {
- // Deallocate existing argument
- if (m_arguments.count(argument))
- {
- m_privateMemory->deallocateBuffer(m_arguments[argument].getPointer());
- delete[] m_arguments[argument].data;
- }
-
- TypedValue address =
- {
- sizeof(size_t),
- 1,
- new unsigned char[sizeof(size_t)]
- };
- size_t size = value.size*value.num;
- address.setPointer(m_privateMemory->allocateBuffer(size));
- m_privateMemory->store(value.data, address.getPointer(), size);
- m_arguments[argument] = address;
- }
- else
- {
- // Deallocate existing argument
- if (m_arguments.count(argument))
- {
- delete[] m_arguments[argument].data;
- }
-
- const llvm::Type *type = argument->getType();
- if (type->isVectorTy())
- {
- value.num = type->getVectorNumElements();
- value.size = getTypeSize(type->getVectorElementType());
- }
- m_arguments[argument] = value.clone();
- }
+ delete[] m_values[argument].data;
}
+
+ m_values[argument] = value.clone();
}
-TypedValueMap::const_iterator Kernel::args_begin() const
+TypedValueMap::const_iterator Kernel::values_begin() const
{
- return m_arguments.begin();
+ return m_values.begin();
}
-TypedValueMap::const_iterator Kernel::args_end() const
+TypedValueMap::const_iterator Kernel::values_end() const
{
- return m_arguments.end();
+ return m_values.end();
}
diff --git a/src/core/Kernel.h b/src/core/Kernel.h
index cf94e90..34755a1 100644
--- a/src/core/Kernel.h
+++ b/src/core/Kernel.h
@@ -1,5 +1,5 @@
// Kernel.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -17,6 +17,7 @@ namespace llvm
class Function;
class GlobalVariable;
class MDNode;
+ class Metadata;
class Module;
}
@@ -33,24 +34,22 @@ namespace oclgrind
Kernel(const Kernel& kernel);
virtual ~Kernel();
- TypedValueMap::const_iterator args_begin() const;
- TypedValueMap::const_iterator args_end() const;
+ TypedValueMap::const_iterator values_begin() const;
+ TypedValueMap::const_iterator values_end() const;
bool allArgumentsSet() const;
void allocateConstants(Memory *memory);
void deallocateConstants(Memory *memory);
- size_t getArgumentSize(unsigned int index) const;
unsigned int getArgumentAccessQualifier(unsigned int index) const;
unsigned int getArgumentAddressQualifier(unsigned int index) const;
const llvm::StringRef getArgumentName(unsigned int index) const;
+ size_t getArgumentSize(unsigned int index) const;
const llvm::StringRef getArgumentTypeName(unsigned int index) const;
unsigned int getArgumentTypeQualifier(unsigned int index) const;
std::string getAttributes() const;
const llvm::Function* getFunction() const;
- const Memory* getLocalMemory() const;
size_t getLocalMemorySize() const;
const std::string& getName() const;
unsigned int getNumArguments() const;
- const Memory* getPrivateMemory() const;
const Program* getProgram() const;
void getRequiredWorkGroupSize(size_t reqdWorkGroupSize[3]) const;
void setArgument(unsigned int index, TypedValue value);
@@ -58,15 +57,15 @@ namespace oclgrind
private:
const Program *m_program;
const llvm::Function *m_function;
- TypedValueMap m_arguments;
std::list<const llvm::GlobalVariable*> m_constants;
std::list<size_t> m_constantBuffers;
- Memory *m_localMemory;
const llvm::MDNode *m_metadata;
std::string m_name;
- Memory *m_privateMemory;
+
+ TypedValueMap m_values;
const llvm::Argument* getArgument(unsigned int index) const;
- const llvm::MDNode* getArgumentMetadata(std::string name) const;
+ const llvm::Metadata* getArgumentMetadata(std::string name,
+ unsigned int index) const;
};
}
diff --git a/src/core/KernelInvocation.cpp b/src/core/KernelInvocation.cpp
index 3d50031..a54f865 100644
--- a/src/core/KernelInvocation.cpp
+++ b/src/core/KernelInvocation.cpp
@@ -1,5 +1,5 @@
// KernelInvocation.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -22,14 +22,6 @@
using namespace oclgrind;
using namespace std;
-// TODO: Remove this when thread_local fixed on OS X
-#ifdef __APPLE__
-#define THREAD_LOCAL __thread
-#elif defined(_WIN32) && !defined(__MINGW32__)
-#define THREAD_LOCAL __declspec(thread)
-#else
-#define THREAD_LOCAL thread_local
-#endif
struct
{
WorkGroup *workGroup;
@@ -80,7 +72,8 @@ KernelInvocation::KernelInvocation(const Context *context, const Kernel *kernel,
Size3 firstGroup(0, 0, 0);
Size3 lastGroup(m_numGroups.x-1, m_numGroups.y-1, m_numGroups.z-1);
m_workGroups.push_back(firstGroup);
- m_workGroups.push_back(lastGroup);
+ if (lastGroup != firstGroup)
+ m_workGroups.push_back(lastGroup);
}
else
{
@@ -166,7 +159,7 @@ void KernelInvocation::run(const Context *context, Kernel *kernel,
catch (FatalError& err)
{
ostringstream info;
- info << endl << "OCLGRIND FATAL ERROR "
+ info << "OCLGRIND FATAL ERROR "
<< "(" << err.getFile() << ":" << err.getLine() << ")"
<< endl << err.what()
<< endl << "When allocating kernel constants for '"
@@ -272,7 +265,7 @@ void KernelInvocation::runWorker()
catch (FatalError& err)
{
ostringstream info;
- info << endl << "OCLGRIND FATAL ERROR "
+ info << "OCLGRIND FATAL ERROR "
<< "(" << err.getFile() << ":" << err.getLine() << ")"
<< endl << err.what();
m_context->logError(info.str().c_str());
@@ -324,6 +317,7 @@ bool KernelInvocation::switchWorkItem(const Size3 gid)
if (group == *pItr)
{
workerState.workGroup = new WorkGroup(this, group);
+ m_context->notifyWorkGroupBegin(workerState.workGroup);
found = true;
// Re-order list of groups accordingly
diff --git a/src/core/KernelInvocation.h b/src/core/KernelInvocation.h
index 4f02447..edca291 100644
--- a/src/core/KernelInvocation.h
+++ b/src/core/KernelInvocation.h
@@ -1,5 +1,5 @@
// KernelInvocation.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/core/Memory.cpp b/src/core/Memory.cpp
index cd33bc4..289badc 100644
--- a/src/core/Memory.cpp
+++ b/src/core/Memory.cpp
@@ -1,5 +1,5 @@
// Memory.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -7,6 +7,7 @@
// source code.
#include "common.h"
+
#include <cassert>
#include <cmath>
#include <cstring>
@@ -26,11 +27,16 @@ mutex atomicMutex[NUM_ATOMIC_MUTEXES];
#define ATOMIC_MUTEX(offset) \
atomicMutex[(((offset)>>2) & (NUM_ATOMIC_MUTEXES-1))]
-Memory::Memory(unsigned int addrSpace, const Context *context)
+Memory::Memory(unsigned addrSpace, unsigned bufferBits, const Context *context)
{
m_context = context;
m_addressSpace = addrSpace;
+ m_numBitsBuffer = bufferBits;
+ m_numBitsAddress = ((sizeof(size_t)<<3) - m_numBitsBuffer);
+ m_maxNumBuffers = ((size_t)1 << m_numBitsBuffer) - 1; // 0 reserved for NULL
+ m_maxBufferSize = ((size_t)1 << m_numBitsAddress);
+
clear();
}
@@ -39,17 +45,18 @@ Memory::~Memory()
clear();
}
-size_t Memory::allocateBuffer(size_t size, cl_mem_flags flags)
+size_t Memory::allocateBuffer(size_t size, cl_mem_flags flags,
+ const uint8_t *initData)
{
// Check requested size doesn't exceed maximum
- if (size > MAX_BUFFER_SIZE)
+ if (size > m_maxBufferSize)
{
return 0;
}
// Find first unallocated buffer slot
unsigned b = getNextBuffer();
- if (b >= MAX_NUM_BUFFERS)
+ if (b >= m_maxNumBuffers)
{
return 0;
}
@@ -60,9 +67,6 @@ size_t Memory::allocateBuffer(size_t size, cl_mem_flags flags)
buffer->flags = flags;
buffer->data = new unsigned char[size];
- // Initialize contents to 0
- memset(buffer->data, 0, size);
-
if (b >= m_memory.size())
{
m_memory.push_back(buffer);
@@ -74,9 +78,15 @@ size_t Memory::allocateBuffer(size_t size, cl_mem_flags flags)
m_totalAllocated += size;
- size_t address = ((size_t)b) << NUM_ADDRESS_BITS;
+ // Initialize contents of buffer
+ if (initData)
+ memcpy(buffer->data, initData, size);
+ else
+ memset(buffer->data, 0, size);
+
+ size_t address = ((size_t)b) << m_numBitsAddress;
- m_context->notifyMemoryAllocated(this, address, size, flags);
+ m_context->notifyMemoryAllocated(this, address, size, flags, initData);
return address;
}
@@ -93,8 +103,8 @@ uint32_t Memory::atomic(AtomicOp op, size_t address, uint32_t value)
}
// Get buffer
- size_t offset = EXTRACT_OFFSET(address);
- Buffer *buffer = m_memory[EXTRACT_BUFFER(address)];
+ size_t offset = extractOffset(address);
+ Buffer *buffer = m_memory[extractBuffer(address)];
uint32_t *ptr = (uint32_t*)(buffer->data + offset);
if (m_addressSpace == AddrSpaceGlobal)
@@ -155,8 +165,8 @@ uint32_t Memory::atomicCmpxchg(size_t address, uint32_t cmp, uint32_t value)
}
// Get buffer
- size_t offset = EXTRACT_OFFSET(address);
- Buffer *buffer = m_memory[EXTRACT_BUFFER(address)];
+ size_t offset = extractOffset(address);
+ Buffer *buffer = m_memory[extractBuffer(address)];
uint32_t *ptr = (uint32_t*)(buffer->data + offset);
if (m_addressSpace == AddrSpaceGlobal)
@@ -190,7 +200,7 @@ void Memory::clear()
}
delete *itr;
- size_t address = (itr-m_memory.begin())<<NUM_ADDRESS_BITS;
+ size_t address = (itr-m_memory.begin())<<m_numBitsAddress;
m_context->notifyMemoryDeallocated(this, address);
}
}
@@ -200,46 +210,17 @@ void Memory::clear()
m_totalAllocated = 0;
}
-Memory* Memory::clone() const
-{
- Memory *mem = new Memory(m_addressSpace, m_context);
-
- // Clone buffers
- mem->m_memory.resize(m_memory.size());
- mem->m_memory[0] = NULL;
- for (unsigned i = 1; i < m_memory.size(); i++)
- {
- Buffer *src = m_memory[i];
- Buffer *dst = new Buffer;
- dst->size = src->size;
- dst->flags = src->flags,
- dst->data =
- (src->flags&CL_MEM_USE_HOST_PTR) ?
- src->data : new unsigned char[src->size],
- memcpy(dst->data, src->data, src->size);
- mem->m_memory[i] = dst;
- m_context->notifyMemoryAllocated(mem, ((size_t)i<<NUM_ADDRESS_BITS),
- src->size, src->flags);
- }
-
- // Clone state
- mem->m_freeBuffers = m_freeBuffers;
- mem->m_totalAllocated = m_totalAllocated;
-
- return mem;
-}
-
size_t Memory::createHostBuffer(size_t size, void *ptr, cl_mem_flags flags)
{
// Check requested size doesn't exceed maximum
- if (size > MAX_BUFFER_SIZE)
+ if (size > m_maxBufferSize)
{
return 0;
}
// Find first unallocated buffer slot
unsigned b = getNextBuffer();
- if (b >= MAX_NUM_BUFFERS)
+ if (b >= m_maxNumBuffers)
{
return 0;
}
@@ -261,9 +242,9 @@ size_t Memory::createHostBuffer(size_t size, void *ptr, cl_mem_flags flags)
m_totalAllocated += size;
- size_t address = ((size_t)b) << NUM_ADDRESS_BITS;
+ size_t address = ((size_t)b) << m_numBitsAddress;
- m_context->notifyMemoryAllocated(this, address, size, flags);
+ m_context->notifyMemoryAllocated(this, address, size, flags, (uint8_t*)ptr);
return address;
}
@@ -277,8 +258,8 @@ bool Memory::copy(size_t dst, size_t src, size_t size)
{
return false;
}
- size_t src_offset = EXTRACT_OFFSET(src);
- Buffer *src_buffer = m_memory.at(EXTRACT_BUFFER(src));
+ size_t src_offset = extractOffset(src);
+ Buffer *src_buffer = m_memory.at(extractBuffer(src));
m_context->notifyMemoryStore(this, dst, size, src_buffer->data + src_offset);
@@ -288,8 +269,8 @@ bool Memory::copy(size_t dst, size_t src, size_t size)
{
return false;
}
- size_t dst_offset = EXTRACT_OFFSET(dst);
- Buffer *dst_buffer = m_memory.at(EXTRACT_BUFFER(dst));
+ size_t dst_offset = extractOffset(dst);
+ Buffer *dst_buffer = m_memory.at(extractBuffer(dst));
// Copy data
@@ -302,7 +283,7 @@ bool Memory::copy(size_t dst, size_t src, size_t size)
void Memory::deallocateBuffer(size_t address)
{
- unsigned buffer = EXTRACT_BUFFER(address);
+ unsigned buffer = extractBuffer(address);
assert(buffer < m_memory.size() && m_memory[buffer]);
if (!(m_memory[buffer]->flags & CL_MEM_USE_HOST_PTR))
@@ -323,7 +304,7 @@ void Memory::dump() const
{
for (unsigned b = 1; b < m_memory.size(); b++)
{
- if (!m_memory[b]->data)
+ if (!m_memory[b] || !m_memory[b]->data)
{
continue;
}
@@ -334,7 +315,7 @@ void Memory::dump() const
{
cout << endl << hex << uppercase
<< setw(16) << setfill(' ') << right
- << ((((size_t)b)<<NUM_ADDRESS_BITS) | i) << ":";
+ << ((((size_t)b)<<m_numBitsAddress) | i) << ":";
}
cout << " " << hex << uppercase << setw(2) << setfill('0')
<< (int)m_memory[b]->data[i];
@@ -343,6 +324,16 @@ void Memory::dump() const
cout << endl;
}
+size_t Memory::extractBuffer(size_t address) const
+{
+ return (address >> m_numBitsAddress);
+}
+
+size_t Memory::extractOffset(size_t address) const
+{
+ return (address & (((size_t)-1) >> m_numBitsBuffer));
+}
+
unsigned int Memory::getAddressSpace() const
{
return m_addressSpace;
@@ -350,7 +341,7 @@ unsigned int Memory::getAddressSpace() const
const Memory::Buffer* Memory::getBuffer(size_t address) const
{
- size_t buf = EXTRACT_BUFFER(address);
+ size_t buf = extractBuffer(address);
if (buf == 0 || buf >= m_memory.size() || !m_memory[buf]->data)
{
return NULL;
@@ -361,7 +352,7 @@ const Memory::Buffer* Memory::getBuffer(size_t address) const
size_t Memory::getMaxAllocSize()
{
- return MAX_BUFFER_SIZE;
+ return m_maxBufferSize;
}
unsigned Memory::getNextBuffer()
@@ -380,7 +371,7 @@ unsigned Memory::getNextBuffer()
void* Memory::getPointer(size_t address) const
{
- size_t buffer = EXTRACT_BUFFER(address);
+ size_t buffer = extractBuffer(address);
// Bounds check
if (!isAddressValid(address))
@@ -388,7 +379,7 @@ void* Memory::getPointer(size_t address) const
return NULL;
}
- return m_memory[buffer]->data + EXTRACT_OFFSET(address);
+ return m_memory[buffer]->data + extractOffset(address);
}
size_t Memory::getTotalAllocated() const
@@ -398,8 +389,8 @@ size_t Memory::getTotalAllocated() const
bool Memory::isAddressValid(size_t address, size_t size) const
{
- size_t buffer = EXTRACT_BUFFER(address);
- size_t offset = EXTRACT_OFFSET(address);
+ size_t buffer = extractBuffer(address);
+ size_t offset = extractOffset(address);
if (buffer == 0 ||
buffer >= m_memory.size() ||
!m_memory[buffer] ||
@@ -421,8 +412,8 @@ bool Memory::load(unsigned char *dest, size_t address, size_t size) const
}
// Get buffer
- size_t offset = EXTRACT_OFFSET(address);
- Buffer *src = m_memory[EXTRACT_BUFFER(address)];
+ size_t offset = extractOffset(address);
+ Buffer *src = m_memory[extractBuffer(address)];
// Load data
memcpy(dest, src->data + offset, size);
@@ -430,9 +421,9 @@ bool Memory::load(unsigned char *dest, size_t address, size_t size) const
return true;
}
-unsigned char* Memory::mapBuffer(size_t address, size_t offset, size_t size)
+void* Memory::mapBuffer(size_t address, size_t offset, size_t size)
{
- size_t buffer = EXTRACT_BUFFER(address);
+ size_t buffer = extractBuffer(address);
// Bounds check
if (!isAddressValid(address, size))
@@ -440,7 +431,7 @@ unsigned char* Memory::mapBuffer(size_t address, size_t offset, size_t size)
return NULL;
}
- return m_memory[buffer]->data + offset + EXTRACT_OFFSET(address);
+ return m_memory[buffer]->data + offset + extractOffset(address);
}
bool Memory::store(const unsigned char *source, size_t address, size_t size)
@@ -454,8 +445,8 @@ bool Memory::store(const unsigned char *source, size_t address, size_t size)
}
// Get buffer
- size_t offset = EXTRACT_OFFSET(address);
- Buffer *dst = m_memory[EXTRACT_BUFFER(address)];
+ size_t offset = extractOffset(address);
+ Buffer *dst = m_memory[extractBuffer(address)];
// Store data
memcpy(dst->data + offset, source, size);
diff --git a/src/core/Memory.h b/src/core/Memory.h
index 42eb63a..71f0c45 100644
--- a/src/core/Memory.h
+++ b/src/core/Memory.h
@@ -1,5 +1,5 @@
// Memory.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -8,16 +8,6 @@
#include "common.h"
-#define NUM_BUFFER_BITS ( (sizeof(size_t)==4) ? 8 : 16)
-#define MAX_NUM_BUFFERS ((size_t)1 << NUM_BUFFER_BITS)
-#define NUM_ADDRESS_BITS ((sizeof(size_t)<<3) - NUM_BUFFER_BITS)
-#define MAX_BUFFER_SIZE ((size_t)1 << NUM_ADDRESS_BITS)
-
-#define EXTRACT_BUFFER(address) \
- (address >> NUM_ADDRESS_BITS)
-#define EXTRACT_OFFSET(address) \
- (address & (((size_t)-1) >> NUM_BUFFER_BITS))
-
namespace oclgrind
{
class Context;
@@ -25,22 +15,22 @@ namespace oclgrind
class Memory
{
public:
- typedef struct
+ struct Buffer
{
size_t size;
cl_mem_flags flags;
unsigned char *data;
- } Buffer;
+ };
public:
- Memory(unsigned int addrSpace, const Context *context);
+ Memory(unsigned addrSpace, unsigned bufferBits, const Context *context);
virtual ~Memory();
- size_t allocateBuffer(size_t size, cl_mem_flags flags=0);
+ size_t allocateBuffer(size_t size, cl_mem_flags flags=0,
+ const uint8_t *initData = NULL);
uint32_t atomic(AtomicOp op, size_t address, uint32_t value = 0);
uint32_t atomicCmpxchg(size_t address, uint32_t cmp, uint32_t value);
void clear();
- Memory *clone() const;
size_t createHostBuffer(size_t size, void *ptr, cl_mem_flags flags=0);
bool copy(size_t dest, size_t src, size_t size);
void deallocateBuffer(size_t address);
@@ -51,10 +41,13 @@ namespace oclgrind
size_t getTotalAllocated() const;
bool isAddressValid(size_t address, size_t size=1) const;
bool load(unsigned char *dst, size_t address, size_t size=1) const;
- unsigned char* mapBuffer(size_t address, size_t offset, size_t size);
+ void* mapBuffer(size_t address, size_t offset, size_t size);
bool store(const unsigned char *source, size_t address, size_t size=1);
- static size_t getMaxAllocSize();
+ size_t extractBuffer(size_t address) const;
+ size_t extractOffset(size_t address) const;
+
+ size_t getMaxAllocSize();
private:
const Context *m_context;
@@ -63,6 +56,11 @@ namespace oclgrind
unsigned int m_addressSpace;
size_t m_totalAllocated;
+ unsigned m_numBitsBuffer;
+ unsigned m_numBitsAddress;
+ size_t m_maxNumBuffers;
+ size_t m_maxBufferSize;
+
unsigned getNextBuffer();
};
}
diff --git a/src/core/Plugin.cpp b/src/core/Plugin.cpp
index 8880f2d..bab80db 100644
--- a/src/core/Plugin.cpp
+++ b/src/core/Plugin.cpp
@@ -1,5 +1,5 @@
// Plugin.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/core/Plugin.h b/src/core/Plugin.h
index d4a8ea7..df7c863 100644
--- a/src/core/Plugin.h
+++ b/src/core/Plugin.h
@@ -1,5 +1,5 @@
// Plugin.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -37,7 +37,8 @@ namespace oclgrind
virtual void kernelEnd(const KernelInvocation *kernelInvocation){}
virtual void log(MessageType type, const char *message){}
virtual void memoryAllocated(const Memory *memory, size_t address,
- size_t size, cl_mem_flags flags){}
+ size_t size, cl_mem_flags flags,
+ const uint8_t *initData){}
virtual void memoryAtomicLoad(const Memory *memory,
const WorkItem *workItem,
AtomicOp op, size_t address, size_t size){}
@@ -49,12 +50,16 @@ namespace oclgrind
size_t address, size_t size){}
virtual void memoryLoad(const Memory *memory, const WorkGroup *workGroup,
size_t address, size_t size){}
+ virtual void memoryMap(const Memory *memory, size_t address,
+ size_t offset, size_t size, cl_map_flags flags){}
virtual void memoryStore(const Memory *memory, const WorkItem *workItem,
size_t address, size_t size,
const uint8_t *storeData){}
virtual void memoryStore(const Memory *memory, const WorkGroup *workGroup,
size_t address, size_t size,
const uint8_t *storeData){}
+ virtual void memoryUnmap(const Memory *memory, size_t address,
+ const void *ptr){}
virtual void workGroupBarrier(const WorkGroup *workGroup, uint32_t flags){}
virtual void workGroupBegin(const WorkGroup *workGroup){}
virtual void workGroupComplete(const WorkGroup *workGroup){}
diff --git a/src/core/Program.cpp b/src/core/Program.cpp
index 31fdc5b..6480ed7 100644
--- a/src/core/Program.cpp
+++ b/src/core/Program.cpp
@@ -1,12 +1,14 @@
// Program.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
+#include "config.h"
#include "common.h"
+
#include <fstream>
#if defined(_WIN32) && !defined(__MINGW32__)
@@ -29,7 +31,9 @@
#include "clang/CodeGen/CodeGenAction.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "Context.h"
#include "Kernel.h"
#include "Program.h"
#include "WorkItem.h"
@@ -129,7 +133,11 @@ bool Program::build(const char *options, list<Header> headers)
args.push_back("-cl-std=CL1.2");
args.push_back("-cl-kernel-arg-info");
args.push_back("-fno-builtin");
+#if LLVM_VERSION >= 38
+ args.push_back("-debug-info-kind=standalone");
+#else
args.push_back("-g");
+#endif
args.push_back("-triple");
if (sizeof(size_t) == 4)
args.push_back("spir-unknown-unknown");
@@ -150,6 +158,10 @@ bool Program::build(const char *options, list<Header> headers)
bool optimize = true;
bool cl12 = true;
+ // Disable optimizations by default if in interactive mode
+ if (checkEnv("OCLGRIND_INTERACTIVE"))
+ optimize = false;
+
// Add OpenCL build options
const char *mainOptions = options;
const char *extraOptions = getenv("OCLGRIND_BUILD_OPTIONS");
@@ -164,7 +176,8 @@ bool Program::build(const char *options, list<Header> headers)
// Ignore options that break PCH
if (strcmp(opt, "-cl-fast-relaxed-math") != 0 &&
strcmp(opt, "-cl-finite-math-only") != 0 &&
- strcmp(opt, "-cl-single-precision-constant") != 0)
+ strcmp(opt, "-cl-single-precision-constant") &&
+ strcmp(opt, "-cl-unsafe-math-optimizations") != 0)
{
// Check for optimization flags
if (strcmp(opt, "-O0") == 0 || strcmp(opt, "-cl-opt-disable") == 0)
@@ -178,6 +191,12 @@ bool Program::build(const char *options, list<Header> headers)
continue;
}
+#if LLVM_VERSION >= 37
+ // Clang no longer supports -cl-no-signed-zeros
+ if (strcmp(opt, "-cl-no-signed-zeros") == 0)
+ continue;
+#endif
+
// Check for -cl-std flag
if (strncmp(opt, "-cl-std=", 8) == 0)
{
@@ -214,11 +233,11 @@ bool Program::build(const char *options, list<Header> headers)
#if defined(_WIN32) && !defined(__MINGW32__)
char libpath[4096];
HMODULE dll;
- if (GetModuleHandleEx(
+ if (GetModuleHandleExA(
GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
(LPCSTR)&Program::createFromBitcode, &dll) &&
- GetModuleFileName(dll, libpath, sizeof(libpath)))
+ GetModuleFileNameA(dll, libpath, sizeof(libpath)))
{
#else
Dl_info dlinfo;
@@ -274,6 +293,7 @@ bool Program::build(const char *options, list<Header> headers)
args.push_back("-include-pch");
args.push_back(pch);
+ args.push_back("-fno-validate-pch");
}
else
{
@@ -325,8 +345,7 @@ bool Program::build(const char *options, list<Header> headers)
compiler.getPreprocessorOpts().addRemappedFile(REMAP_INPUT, buffer.release());
// Compile
- llvm::LLVMContext& context = llvm::getGlobalContext();
- clang::EmitLLVMOnlyAction action(&context);
+ clang::EmitLLVMOnlyAction action(m_context->getLLVMContext());
if (compiler.ExecuteAction(action))
{
// Retrieve module
@@ -338,32 +357,34 @@ bool Program::build(const char *options, list<Header> headers)
stripDebugIntrinsics();
}
- // Initialize pass managers
- llvm::legacy::PassManager modulePasses;
- llvm::legacy::FunctionPassManager functionPasses(m_module.get());
-#if LLVM_VERSION < 37
- modulePasses.add(new llvm::DataLayoutPass());
- functionPasses.add(new llvm::DataLayoutPass());
-#endif
-
// Run optimizations on module
if (optimize)
{
+ // Initialize pass managers
+ llvm::legacy::PassManager modulePasses;
+ llvm::legacy::FunctionPassManager functionPasses(m_module.get());
+#if LLVM_VERSION < 37
+ modulePasses.add(new llvm::DataLayoutPass());
+ functionPasses.add(new llvm::DataLayoutPass());
+#endif
+
// Populate pass managers with -Oz
llvm::PassManagerBuilder builder;
builder.OptLevel = 2;
builder.SizeLevel = 2;
builder.populateModulePassManager(modulePasses);
builder.populateFunctionPassManager(functionPasses);
+
+ // Run passes
+ functionPasses.doInitialization();
+ llvm::Module::iterator fItr;
+ for (fItr = m_module->begin(); fItr != m_module->end(); fItr++)
+ functionPasses.run(*fItr);
+ functionPasses.doFinalization();
+ modulePasses.run(*m_module);
}
- // Run passes
- functionPasses.doInitialization();
- llvm::Module::iterator fItr;
- for (fItr = m_module->begin(); fItr != m_module->end(); fItr++)
- functionPasses.run(*fItr);
- functionPasses.doFinalization();
- modulePasses.run(*m_module);
+ removeLValueLoads();
m_buildStatus = CL_BUILD_SUCCESS;
}
@@ -448,14 +469,22 @@ Program* Program::createFromBitcode(const Context *context,
}
// Parse bitcode into IR module
+#if LLVM_VERSION < 37
llvm::ErrorOr<llvm::Module*> module =
- parseBitcodeFile(buffer->getMemBufferRef(), llvm::getGlobalContext());
+#else
+ llvm::ErrorOr<unique_ptr<llvm::Module>> module =
+#endif
+ parseBitcodeFile(buffer->getMemBufferRef(), *context->getLLVMContext());
if (!module)
{
return NULL;
}
+#if LLVM_VERSION < 37
return new Program(context, module.get());
+#else
+ return new Program(context, module.get().release());
+#endif
}
Program* Program::createFromBitcodeFile(const Context *context,
@@ -470,35 +499,52 @@ Program* Program::createFromBitcodeFile(const Context *context,
}
// Parse bitcode into IR module
+#if LLVM_VERSION < 37
llvm::ErrorOr<llvm::Module*> module =
+#else
+ llvm::ErrorOr<unique_ptr<llvm::Module>> module =
+#endif
parseBitcodeFile(buffer->get()->getMemBufferRef(),
- llvm::getGlobalContext());
+ *context->getLLVMContext());
if (!module)
{
return NULL;
}
+#if LLVM_VERSION < 37
return new Program(context, module.get());
+#else
+ return new Program(context, module.get().release());
+#endif
}
Program* Program::createFromPrograms(const Context *context,
list<const Program*> programs)
{
llvm::Module *module = new llvm::Module("oclgrind_linked",
- llvm::getGlobalContext());
+ *context->getLLVMContext());
+#if LLVM_VERSION < 38
llvm::Linker linker(module);
+#else
+ llvm::Linker linker(*module);
+#endif
// Link modules
list<const Program*>::iterator itr;
for (itr = programs.begin(); itr != programs.end(); itr++)
{
- if (linker.linkInModule(CloneModule((*itr)->m_module.get())))
+#if LLVM_VERSION < 38
+ llvm::Module *m = llvm::CloneModule((*itr)->m_module.get());
+#else
+ unique_ptr<llvm::Module> m = llvm::CloneModule((*itr)->m_module.get());
+#endif
+ if (linker.linkInModule(std::move(m)))
{
return NULL;
}
}
- return new Program(context, linker.getModule());
+ return new Program(context, module);
}
Kernel* Program::createKernel(const string name)
@@ -508,7 +554,7 @@ Kernel* Program::createKernel(const string name)
// Iterate over functions in module to find kernel
llvm::Function *function = NULL;
-
+#if LLVM_VERSION < 37
// Query the SPIR kernel list
llvm::NamedMDNode* tuple = m_module->getNamedMetadata("opencl.kernels");
// No kernels in module
@@ -538,6 +584,17 @@ Kernel* Program::createKernel(const string name)
break;
}
}
+#else
+ for (auto F = m_module->begin(); F != m_module->end(); F++)
+ {
+ if (F->getCallingConv() == llvm::CallingConv::SPIR_KERNEL &&
+ F->getName() == name)
+ {
+ function = &*F;
+ break;
+ }
+ }
+#endif
if (function == NULL)
{
@@ -566,20 +623,17 @@ Kernel* Program::createKernel(const string name)
}
}
-unsigned char* Program::getBinary() const
+void Program::getBinary(unsigned char *binary) const
{
if (!m_module)
- {
- return NULL;
- }
+ return;
std::string str;
llvm::raw_string_ostream stream(str);
llvm::WriteBitcodeToFile(m_module.get(), stream);
stream.str();
- unsigned char *bitcode = new unsigned char[str.length()];
- memcpy(bitcode, str.c_str(), str.length());
- return bitcode;
+
+ memcpy(binary, str.c_str(), str.length());
}
size_t Program::getBinarySize() const
@@ -632,6 +686,7 @@ list<string> Program::getKernelNames() const
{
list<string> names;
+#if LLVM_VERSION < 37
// Query the SPIR kernel list
llvm::NamedMDNode* tuple = m_module->getNamedMetadata("opencl.kernels");
@@ -656,6 +711,15 @@ list<string> Program::getKernelNames() const
names.push_back(kernelFunction->getName());
}
}
+#else
+ for (auto F = m_module->begin(); F != m_module->end(); F++)
+ {
+ if (F->getCallingConv() == llvm::CallingConv::SPIR_KERNEL)
+ {
+ names.push_back(F->getName());
+ }
+ }
+#endif
return names;
}
@@ -664,6 +728,7 @@ unsigned int Program::getNumKernels() const
{
assert(m_module);
+#if LLVM_VERSION < 37
// Extract kernels from metadata
llvm::NamedMDNode* tuple = m_module->getNamedMetadata("opencl.kernels");
@@ -672,6 +737,19 @@ unsigned int Program::getNumKernels() const
return 0;
return tuple->getNumOperands();
+#else
+ unsigned int num = 0;
+
+ for (auto F = m_module->begin(); F != m_module->end(); F++)
+ {
+ if (F->getCallingConv() == llvm::CallingConv::SPIR_KERNEL)
+ {
+ num++;
+ }
+ }
+
+ return num;
+#endif
}
const string& Program::getSource() const
@@ -697,13 +775,303 @@ unsigned long Program::getUID() const
return m_uid;
}
+void Program::pruneDeadCode(llvm::Instruction *instruction)
+{
+ // Remove instructions that have no uses
+ if (instruction->getNumUses() == 0)
+ {
+ // Get list of operands
+ set<llvm::Value*> operands;
+ {
+ llvm::Instruction::op_iterator op;
+ for (op = instruction->op_begin(); op != instruction->op_end(); op++)
+ {
+ operands.insert(*op);
+ }
+ }
+
+ // Remove instruction
+ instruction->eraseFromParent();
+
+ // Prune operands
+ set<llvm::Value*>::iterator op;
+ for (op = operands.begin(); op != operands.end(); op++)
+ {
+ if (auto inst = llvm::dyn_cast<llvm::Instruction>(*op))
+ pruneDeadCode(inst);
+ }
+ }
+}
+
+void Program::removeLValueLoads()
+{
+ // Get list of aggregate store instructions
+ set<llvm::StoreInst*> aggStores;
+ for (llvm::Module::iterator F = m_module->begin(); F != m_module->end(); F++)
+ {
+ llvm::Function *f = &*F;
+ for (llvm::inst_iterator I = inst_begin(f), E = inst_end(f); I != E; I++)
+ {
+ if (auto store = llvm::dyn_cast<llvm::StoreInst>(&*I))
+ aggStores.insert(store);
+ }
+ }
+
+ // Replace aggregate modify-write sequences with direct scalar writes
+ set<llvm::StoreInst*>::iterator itr;
+ for (itr = aggStores.begin(); itr != aggStores.end(); itr++)
+ {
+ scalarizeAggregateStore(*itr);
+ }
+}
+
+void Program::scalarizeAggregateStore(llvm::StoreInst *store)
+{
+ llvm::IntegerType *gepIndexType = (sizeof(size_t)==8) ?
+ llvm::Type::getInt64Ty(m_module.get()->getContext()) :
+ llvm::Type::getInt32Ty(m_module.get()->getContext());
+
+ llvm::Value *storeValue = store->getValueOperand();
+ llvm::Value *vectorPtr = store->getPointerOperand();
+
+ if (auto insert = llvm::dyn_cast<llvm::InsertElementInst>(storeValue))
+ {
+ llvm::Value *vector = insert->getOperand(0);
+ llvm::Value *value = insert->getOperand(1);
+ llvm::Value *index = insert->getOperand(2);
+
+ // Create GEP for scalar value
+ llvm::GetElementPtrInst *scalarPtr = NULL;
+ if (auto gep = llvm::dyn_cast<llvm::GetElementPtrInst>(vectorPtr))
+ {
+ // Create GEP from existing GEP
+ std::vector<llvm::Value*> indices;
+ for (auto idx = gep->idx_begin(); idx != gep->idx_end(); idx++)
+ {
+ indices.push_back(*idx);
+ }
+ indices.push_back(index);
+ scalarPtr = llvm::GetElementPtrInst::Create(
+#if LLVM_VERSION > 36
+ gep->getPointerOperandType()->getPointerElementType(),
+#endif
+ gep->getPointerOperand(), indices);
+ }
+ else
+ {
+ // Create GEP from non-GEP pointer
+ std::vector<llvm::Value*> indices;
+ indices.push_back(llvm::ConstantInt::getSigned(gepIndexType, 0));
+ indices.push_back(index);
+ scalarPtr = llvm::GetElementPtrInst::Create(
+#if LLVM_VERSION > 36
+ vectorPtr->getType()->getPointerElementType(),
+#endif
+ vectorPtr, indices);
+ }
+ scalarPtr->setDebugLoc(store->getDebugLoc());
+ scalarPtr->insertAfter(store);
+
+ // Create direct scalar store
+ llvm::StoreInst *scalarStore = new llvm::StoreInst(
+ value, scalarPtr, store->isVolatile(),
+ getTypeAlignment(value->getType()));
+ scalarStore->setDebugLoc(store->getDebugLoc());
+ scalarStore->insertAfter(scalarPtr);
+
+ // Check if the input to the insertelement instruction came from something
+ // other than a load to the same address as the store
+ llvm::LoadInst *load = llvm::dyn_cast<llvm::LoadInst>(vector);
+ if (!(load && load->getPointerOperand() == store->getPointerOperand()))
+ {
+ // Replace value in store with the input to the insertelement instruction
+ llvm::StoreInst *_store = new llvm::StoreInst(
+ vector, store->getPointerOperand(),
+ store->isVolatile(), store->getAlignment());
+ _store->setDebugLoc(store->getDebugLoc());
+ _store->insertAfter(store);
+
+ // Repeat process with new store
+ if (_store)
+ scalarizeAggregateStore(_store);
+ }
+
+ // Remove vector store and any dead code
+ store->eraseFromParent();
+ pruneDeadCode(insert);
+ }
+ else if (auto shuffle = llvm::dyn_cast<llvm::ShuffleVectorInst>(storeValue))
+ {
+ llvm::Value *v1 = shuffle->getOperand(0);
+ llvm::Value *v2 = shuffle->getOperand(1);
+ llvm::Constant *mask = shuffle->getMask();
+ unsigned maskSize = mask->getType()->getVectorNumElements();
+
+ // Check if shuffle sources came from a load with same address as the store
+ llvm::LoadInst *load;
+ bool v1SourceIsDest = false, v2SourceIsDest = false;
+ if ((load = llvm::dyn_cast<llvm::LoadInst>(v1)) &&
+ load->getPointerOperand() == vectorPtr)
+ v1SourceIsDest = true;
+ if ((load = llvm::dyn_cast<llvm::LoadInst>(v2)) &&
+ load->getPointerOperand() == vectorPtr)
+ v2SourceIsDest = true;
+
+ // Get mask indices that don't correspond to the destination vector
+ stack<unsigned> indices;
+ for (unsigned i = 0; i < maskSize; i++)
+ {
+ int idx = shuffle->getMaskValue(i);
+
+ // Skip undef indices
+ if (idx == -1)
+ continue;
+
+ // Check if source is the store destination
+ bool sourceIsDest =
+ ((unsigned)idx < v1->getType()->getVectorNumElements() ?
+ v1SourceIsDest : v2SourceIsDest);
+
+ // If destination is used in non-identity position, leave shuffle as is
+ if (sourceIsDest && (unsigned)idx != i)
+ return;
+
+ // Add non-destination index
+ if (!sourceIsDest)
+ indices.push(i);
+ }
+
+ // Check if destination is actually used as a source in the mask
+ if (indices.size() == maskSize)
+ {
+ // Check for any unused loads with the same address as the store
+ // These would usually be caught by DCE, but if optimisations are
+ // disabled we need to prune these manually
+ list<llvm::LoadInst*> lvalueloads;
+ for (auto user = vectorPtr->user_begin();
+ user != vectorPtr->user_end() ;
+ user++)
+ {
+ if (auto load = llvm::dyn_cast<llvm::LoadInst>(*user))
+ {
+ if (load->getNumUses() == 0)
+ lvalueloads.push_back(load);
+ }
+ }
+ for (auto load = lvalueloads.begin(); load != lvalueloads.end(); load++)
+ {
+ (*load)->eraseFromParent();
+ }
+
+ return;
+ }
+
+ // Create a scalar store for each shuffle index
+ while (!indices.empty())
+ {
+ unsigned index = indices.top();
+ indices.pop();
+
+ // Create GEP for scalar value
+ llvm::GetElementPtrInst *scalarPtr = NULL;
+ if (auto gep = llvm::dyn_cast<llvm::GetElementPtrInst>(vectorPtr))
+ {
+ // Create GEP from existing GEP
+ std::vector<llvm::Value*> gepIndices;
+ for (auto idx = gep->idx_begin(); idx != gep->idx_end(); idx++)
+ {
+ gepIndices.push_back(*idx);
+ }
+ gepIndices.push_back(llvm::ConstantInt::getSigned(gepIndexType, index));
+ scalarPtr = llvm::GetElementPtrInst::Create(
+#if LLVM_VERSION > 36
+ gep->getPointerOperandType()->getPointerElementType(),
+#endif
+ gep->getPointerOperand(), gepIndices);
+ }
+ else
+ {
+ // Create GEP from non-GEP pointer
+ std::vector<llvm::Value*> gepIndices;
+ gepIndices.push_back(llvm::ConstantInt::getSigned(gepIndexType, 0));
+ gepIndices.push_back(llvm::ConstantInt::getSigned(gepIndexType, index));
+ scalarPtr = llvm::GetElementPtrInst::Create(
+#if LLVM_VERSION > 36
+ vectorPtr->getType()->getPointerElementType(),
+#endif
+ vectorPtr, gepIndices);
+ }
+ scalarPtr->setDebugLoc(store->getDebugLoc());
+ scalarPtr->insertAfter(store);
+
+ // Get source vector and index
+ unsigned idx = shuffle->getMaskValue(index);
+ unsigned v1num = v1->getType()->getVectorNumElements();
+ llvm::Value *src = v1;
+ if (idx >= v1num)
+ {
+ idx -= v1num;
+ src = v2;
+ }
+
+ // Create direct scalar store
+ if (auto cnst = llvm::dyn_cast<llvm::ConstantVector>(src))
+ {
+ // If source is a constant, extract scalar constant
+ src = cnst->getAggregateElement(idx);
+
+ llvm::StoreInst *scalarStore = new llvm::StoreInst(
+ src, scalarPtr, store->isVolatile(),
+ getTypeAlignment(src->getType()));
+ scalarStore->setDebugLoc(store->getDebugLoc());
+ scalarStore->insertAfter(scalarPtr);
+ }
+ else
+ {
+ // If extracting from a shuffle, trace back to last non-shuffle
+ while (auto shfl = llvm::dyn_cast<llvm::ShuffleVectorInst>(src))
+ {
+ llvm::Value *v1 = shfl->getOperand(0);
+ llvm::Value *v2 = shfl->getOperand(1);
+ unsigned v1num = v1->getType()->getVectorNumElements();
+
+ // Get source vector and index
+ idx = shfl->getMaskValue(idx);
+ src = v1;
+ if (idx >= v1num)
+ {
+ idx -= v1num;
+ src = v2;
+ }
+ }
+
+ llvm::ExtractElementInst *extract = llvm::ExtractElementInst::Create(
+ src, llvm::ConstantInt::getSigned(gepIndexType, idx));
+ extract->setDebugLoc(shuffle->getDebugLoc());
+ extract->insertAfter(scalarPtr);
+
+ llvm::StoreInst *scalarStore = new llvm::StoreInst(
+ extract, scalarPtr, store->isVolatile(),
+ getTypeAlignment(extract->getType()));
+ scalarStore->setDebugLoc(store->getDebugLoc());
+ scalarStore->insertAfter(extract);
+ }
+ }
+
+ // Prune old store and dead any code
+ store->eraseFromParent();
+ pruneDeadCode(shuffle);
+ }
+}
+
void Program::stripDebugIntrinsics()
{
// Get list of llvm.dbg intrinsics
set<llvm::Instruction*> intrinsics;
for (llvm::Module::iterator F = m_module->begin(); F != m_module->end(); F++)
{
- for (llvm::inst_iterator I = inst_begin(F), E = inst_end(F); I != E; I++)
+ llvm::Function *f = &*F;
+ for (llvm::inst_iterator I = inst_begin(f), E = inst_end(f); I != E; I++)
{
if (I->getOpcode() == llvm::Instruction::Call)
{
@@ -722,7 +1090,6 @@ void Program::stripDebugIntrinsics()
set<llvm::Instruction*>::iterator itr;
for (itr = intrinsics.begin(); itr != intrinsics.end(); itr++)
{
- (*itr)->removeFromParent();
- delete *itr;
+ (*itr)->eraseFromParent();
}
}
diff --git a/src/core/Program.h b/src/core/Program.h
index f888746..8b901c9 100644
--- a/src/core/Program.h
+++ b/src/core/Program.h
@@ -1,5 +1,5 @@
// Program.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -12,6 +12,7 @@ namespace llvm
{
class Function;
class Module;
+ class StoreInst;
}
namespace oclgrind
@@ -42,7 +43,7 @@ namespace oclgrind
Kernel* createKernel(const std::string name);
const std::string& getBuildLog() const;
const std::string& getBuildOptions() const;
- unsigned char* getBinary() const;
+ void getBinary(unsigned char *binary) const;
size_t getBinarySize() const;
unsigned int getBuildStatus() const;
const Context *getContext() const;
@@ -69,6 +70,9 @@ namespace oclgrind
unsigned long m_uid;
unsigned long generateUID() const;
+ void pruneDeadCode(llvm::Instruction*);
+ void removeLValueLoads();
+ void scalarizeAggregateStore(llvm::StoreInst *store);
void stripDebugIntrinsics();
typedef std::map<const llvm::Function*, InterpreterCache*>
diff --git a/src/core/Queue.cpp b/src/core/Queue.cpp
index e9e082f..a3510e6 100644
--- a/src/core/Queue.cpp
+++ b/src/core/Queue.cpp
@@ -1,5 +1,5 @@
// Queue.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -7,6 +7,7 @@
// source code.
#include "common.h"
+
#include <cassert>
#include "Context.h"
@@ -114,6 +115,12 @@ void Queue::executeKernel(KernelCommand *cmd)
cmd->localSize);
}
+void Queue::executeMap(MapCommand *cmd)
+{
+ m_context->notifyMemoryMap(m_context->getGlobalMemory(),
+ cmd->address, cmd->offset, cmd->size, cmd->flags);
+}
+
void Queue::executeNativeKernel(NativeKernelCommand *cmd)
{
// Run kernel
@@ -147,6 +154,12 @@ void Queue::executeReadBufferRect(BufferRectCommand *cmd)
}
}
+void Queue::executeUnmap(UnmapCommand *cmd)
+{
+ m_context->notifyMemoryUnmap(m_context->getGlobalMemory(),
+ cmd->address, cmd->ptr);
+}
+
void Queue::executeWriteBuffer(BufferCommand *cmd)
{
m_context->getGlobalMemory()->store(cmd->ptr, cmd->address, cmd->size);
@@ -238,9 +251,15 @@ Queue::Command* Queue::update()
case KERNEL:
executeKernel((KernelCommand*)cmd);
break;
+ case MAP:
+ executeMap((MapCommand*)cmd);
+ break;
case NATIVE_KERNEL:
executeNativeKernel((NativeKernelCommand*)cmd);
break;
+ case UNMAP:
+ executeUnmap((UnmapCommand*)cmd);
+ break;
case WRITE:
executeWriteBuffer((BufferCommand*)cmd);
break;
diff --git a/src/core/Queue.h b/src/core/Queue.h
index 7736d47..7879dbd 100644
--- a/src/core/Queue.h
+++ b/src/core/Queue.h
@@ -1,5 +1,5 @@
// Queue.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -25,7 +25,8 @@ namespace oclgrind
{
public:
enum CommandType {EMPTY, COPY, COPY_RECT, FILL_BUFFER, FILL_IMAGE, KERNEL,
- NATIVE_KERNEL, READ, READ_RECT, WRITE, WRITE_RECT};
+ MAP, NATIVE_KERNEL, READ, READ_RECT, UNMAP, WRITE,
+ WRITE_RECT};
struct Command
{
CommandType type;
@@ -155,6 +156,27 @@ namespace oclgrind
}
}
};
+ struct MapCommand : Command
+ {
+ void *ptr;
+ size_t address;
+ size_t offset;
+ size_t size;
+ cl_map_flags flags;
+ MapCommand()
+ {
+ type = MAP;
+ }
+ };
+ struct UnmapCommand : Command
+ {
+ const void *ptr;
+ size_t address;
+ UnmapCommand()
+ {
+ type = UNMAP;
+ }
+ };
public:
Queue(const Context *context);
@@ -167,9 +189,11 @@ namespace oclgrind
void executeFillBuffer(FillBufferCommand *cmd);
void executeFillImage(FillImageCommand *cmd);
void executeKernel(KernelCommand *cmd);
+ void executeMap(MapCommand *cmd);
void executeNativeKernel(NativeKernelCommand *cmd);
void executeReadBuffer(BufferCommand *cmd);
void executeReadBufferRect(BufferRectCommand *cmd);
+ void executeUnmap(UnmapCommand *cmd);
void executeWriteBuffer(BufferCommand *cmd);
void executeWriteBufferRect(BufferRectCommand *cmd);
diff --git a/src/core/WorkGroup.cpp b/src/core/WorkGroup.cpp
index 23daf9d..2c891c6 100644
--- a/src/core/WorkGroup.cpp
+++ b/src/core/WorkGroup.cpp
@@ -1,5 +1,5 @@
// WorkGroup.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -7,6 +7,7 @@
// source code.
#include "common.h"
+
#include <sstream>
#include "llvm/IR/Module.h"
@@ -33,7 +34,20 @@ WorkGroup::WorkGroup(const KernelInvocation *kernelInvocation, Size3 wgid)
kernelInvocation->getNumGroups().x));
// Allocate local memory
- m_localMemory = kernelInvocation->getKernel()->getLocalMemory()->clone();
+ m_localMemory = new Memory(AddrSpaceLocal, sizeof(size_t)==8 ? 16 : 8,
+ m_context);
+ const Kernel *kernel = kernelInvocation->getKernel();
+ for (auto value = kernel->values_begin();
+ value != kernel->values_end();
+ value++)
+ {
+ const llvm::Type *type = value->first->getType();
+ if (type->isPointerTy() && type->getPointerAddressSpace() == AddrSpaceLocal)
+ {
+ size_t ptr = m_localMemory->allocateBuffer(value->second.size);
+ m_localAddresses[value->first] = ptr;
+ }
+ }
// Initialise work-items
for (size_t k = 0; k < m_groupSize.z; k++)
@@ -46,7 +60,6 @@ WorkGroup::WorkGroup(const KernelInvocation *kernelInvocation, Size3 wgid)
Size3(i, j, k));
m_workItems.push_back(workItem);
m_running.insert(workItem);
- m_context->notifyWorkItemBegin(workItem);
}
}
}
@@ -287,6 +300,11 @@ Memory* WorkGroup::getLocalMemory() const
return m_localMemory;
}
+size_t WorkGroup::getLocalMemoryAddress(const llvm::Value *value) const
+{
+ return m_localAddresses.at(value);
+}
+
WorkItem* WorkGroup::getNextWorkItem() const
{
if (m_running.empty())
diff --git a/src/core/WorkGroup.h b/src/core/WorkGroup.h
index 88319cf..73cb2b7 100644
--- a/src/core/WorkGroup.h
+++ b/src/core/WorkGroup.h
@@ -1,5 +1,5 @@
// WorkGroup.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -32,7 +32,7 @@ namespace oclgrind
};
std::set<WorkItem*, WorkItemCmp> m_running;
- typedef struct
+ struct AsyncCopy
{
const llvm::Instruction *instruction;
AsyncCopyType type;
@@ -44,16 +44,16 @@ namespace oclgrind
size_t destStride;
size_t event;
- } AsyncCopy;
+ };
- typedef struct
+ struct Barrier
{
const llvm::Instruction *instruction;
std::set<WorkItem*, WorkItemCmp> workItems;
uint64_t fence;
std::list<size_t> events;
- } Barrier;
+ };
public:
WorkGroup(const KernelInvocation *kernelInvocation, Size3 wgid);
@@ -76,6 +76,7 @@ namespace oclgrind
size_t getGroupIndex() const;
Size3 getGroupSize() const;
Memory* getLocalMemory() const;
+ size_t getLocalMemoryAddress(const llvm::Value *value) const;
WorkItem *getNextWorkItem() const;
WorkItem *getWorkItem(Size3 localID) const;
bool hasBarrier() const;
@@ -89,7 +90,10 @@ namespace oclgrind
Size3 m_groupID;
Size3 m_groupSize;
const Context *m_context;
+
Memory *m_localMemory;
+ std::map<const llvm::Value*,size_t> m_localAddresses;
+
std::vector<WorkItem*> m_workItems;
Barrier *m_barrier;
diff --git a/src/core/WorkItem.cpp b/src/core/WorkItem.cpp
index 9d37ade..4441a33 100644
--- a/src/core/WorkItem.cpp
+++ b/src/core/WorkItem.cpp
@@ -1,13 +1,16 @@
// WorkItem.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.`
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
+#include "config.h"
#include "common.h"
+#include <math.h>
+
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
@@ -30,9 +33,10 @@ using namespace std;
struct WorkItem::Position
{
- llvm::Function::const_iterator prevBlock;
- llvm::Function::const_iterator currBlock;
- llvm::Function::const_iterator nextBlock;
+ bool hasBegun;
+ const llvm::BasicBlock * prevBlock;
+ const llvm::BasicBlock * currBlock;
+ const llvm::BasicBlock * nextBlock;
llvm::BasicBlock::const_iterator currInst;
std::stack<const llvm::Instruction*> callStack;
std::stack< std::list<size_t> > allocations;
@@ -67,21 +71,48 @@ WorkItem::WorkItem(const KernelInvocation *kernelInvocation,
// Set initial number of values to store based on cache
m_values.resize(m_cache->getNumValues());
- m_privateMemory = kernel->getPrivateMemory()->clone();
+ m_privateMemory = new Memory(AddrSpacePrivate, sizeof(size_t)==8 ? 32 : 16,
+ m_context);
- // Initialise kernel arguments
- TypedValueMap::const_iterator argItr;
- for (argItr = kernel->args_begin(); argItr != kernel->args_end(); argItr++)
+ // Initialise kernel arguments and global variables
+ for (auto value = kernel->values_begin();
+ value != kernel->values_end();
+ value++)
{
- setValue(argItr->first, m_pool.clone(argItr->second));
+ pair<unsigned,unsigned> size = getValueSize(value->first);
+ TypedValue v = {
+ size.first,
+ size.second,
+ m_pool.alloc(size.first*size.second)
+ };
+
+ const llvm::Type *type = value->first->getType();
+ if (type->isPointerTy() &&
+ type->getPointerAddressSpace() == AddrSpacePrivate)
+ {
+ size_t sz = value->second.size*value->second.num;
+ v.setPointer(m_privateMemory->allocateBuffer(sz, 0, value->second.data));
+ }
+ else if (type->isPointerTy() &&
+ type->getPointerAddressSpace() == AddrSpaceLocal)
+ {
+ v.setPointer(m_workGroup->getLocalMemoryAddress(value->first));
+ }
+ else
+ {
+ memcpy(v.data, value->second.data, v.size*v.num);
+ }
+
+ setValue(value->first, v);
}
// Initialize interpreter state
m_state = READY;
m_position = new Position;
+ m_position->hasBegun = false;
m_position->prevBlock = NULL;
m_position->nextBlock = NULL;
- m_position->currBlock = kernel->getFunction()->begin();
+ m_position->currBlock = &*kernel->getFunction()->begin();
m_position->currInst = m_position->currBlock->begin();
}
@@ -298,19 +329,19 @@ void WorkItem::execute(const llvm::Instruction *instruction)
m_context->notifyInstructionExecuted(this, instruction, result);
}
-TypedValue WorkItem::getValue(const llvm::Value *key) const
+const stack<const llvm::Instruction*>& WorkItem::getCallStack() const
{
- return m_values[m_cache->getValueID(key)];
+ return m_position->callStack;
}
-const stack<const llvm::Instruction*>& WorkItem::getCallStack() const
+const llvm::BasicBlock* WorkItem::getCurrentBlock() const
{
- return m_position->callStack;
+ return m_position->currBlock;
}
const llvm::Instruction* WorkItem::getCurrentInstruction() const
{
- return m_position->currInst;
+ return &*m_position->currInst;
}
Size3 WorkItem::getGlobalID() const
@@ -416,6 +447,11 @@ TypedValue WorkItem::getOperand(const llvm::Value *operand) const
assert(false);
}
+const llvm::BasicBlock* WorkItem::getPreviousBlock() const
+{
+ return m_position->prevBlock;
+}
+
Memory* WorkItem::getPrivateMemory() const
{
return m_privateMemory;
@@ -426,6 +462,11 @@ WorkItem::State WorkItem::getState() const
return m_state;
}
+TypedValue WorkItem::getValue(const llvm::Value *key) const
+{
+ return m_values[m_cache->getValueID(key)];
+}
+
const unsigned char* WorkItem::getValueData(const llvm::Value *value) const
{
if (!hasValue(value))
@@ -437,13 +478,27 @@ const unsigned char* WorkItem::getValueData(const llvm::Value *value) const
const llvm::Value* WorkItem::getVariable(std::string name) const
{
+ // Check private variables
VariableMap::const_iterator itr;
itr = m_variables.find(name);
- if (itr == m_variables.end())
+ if (itr != m_variables.end())
+ return itr->second;
+
+ // Check global variables
+ string globalName = m_position->currBlock->getParent()->getName();
+ globalName += ".";
+ globalName += name;
+ const llvm::Module *module =
+ m_kernelInvocation->getKernel()->getFunction()->getParent();
+ for (auto global = module->global_begin();
+ global != module->global_end();
+ global++)
{
- return NULL;
+ if (global->getName() == globalName)
+ return &*global;
}
- return itr->second;
+
+ return NULL;
}
const WorkGroup* WorkItem::getWorkGroup() const
@@ -478,18 +533,18 @@ bool WorkItem::printVariable(string name) const
}
// Get variable value
- TypedValue result = getValue(value);
+ TypedValue result = getOperand(value);
const llvm::Type *type = value->getType();
- if (((const llvm::Instruction*)value)->getOpcode()
- == llvm::Instruction::Alloca)
+ if (value->getValueID() == llvm::Value::GlobalVariableVal ||
+ ((const llvm::Instruction*)value)->getOpcode()
+ == llvm::Instruction::Alloca)
{
- // If value is alloca result, look-up data at address
- const llvm::Type *elemType = value->getType()->getPointerElementType();
+ // If value is alloca or global variable, look-up data at address
size_t address = result.getPointer();
-
- unsigned char *data = (unsigned char*)m_privateMemory->getPointer(address);
- printTypedData(elemType, data);
+ Memory *memory = getMemory(value->getType()->getPointerAddressSpace());
+ unsigned char *data = (unsigned char*)memory->getPointer(address);
+ printTypedData(value->getType()->getPointerElementType(), data);
}
else
{
@@ -508,8 +563,14 @@ WorkItem::State WorkItem::step()
{
assert(m_state == READY);
+ if (!m_position->hasBegun)
+ {
+ m_position->hasBegun = true;
+ m_context->notifyWorkItemBegin(this);
+ }
+
// Execute the next instruction
- execute(m_position->currInst);
+ execute(&*m_position->currInst);
// Check if we've reached the end of the block
if (++m_position->currInst == m_position->currBlock->end() ||
@@ -525,6 +586,9 @@ WorkItem::State WorkItem::step()
}
}
+ if (m_state == FINISHED)
+ m_context->notifyWorkItemComplete(this);
+
return m_state;
}
@@ -663,9 +727,9 @@ INSTRUCTION(call)
// Check if function has definition
if (!function->isDeclaration())
{
- m_position->callStack.push(m_position->currInst);
+ m_position->callStack.push(&*m_position->currInst);
m_position->allocations.push(list<size_t>());
- m_position->nextBlock = function->begin();
+ m_position->nextBlock = &*function->begin();
// Set function arguments
llvm::Function::const_arg_iterator argItr;
@@ -673,7 +737,30 @@ INSTRUCTION(call)
argItr != function->arg_end(); argItr++)
{
const llvm::Value *arg = callInst->getArgOperand(argItr->getArgNo());
- setValue(argItr, m_pool.clone(getOperand(arg)));
+ TypedValue value = getOperand(arg);
+
+ if (argItr->hasByValAttr())
+ {
+ // Make new copy of value in private memory
+ void *data = m_privateMemory->getPointer(value.getPointer());
+ size_t size = getTypeSize(argItr->getType()->getPointerElementType());
+ size_t ptr = m_privateMemory->allocateBuffer(size, 0, (uint8_t*)data);
+ m_position->allocations.top().push_back(ptr);
+
+ // Pass new allocation to function
+ TypedValue address =
+ {
+ sizeof(size_t),
+ 1,
+ m_pool.alloc(sizeof(size_t))
+ };
+ address.setPointer(ptr);
+ setValue(&*argItr, address);
+ }
+ else
+ {
+ setValue(&*argItr, m_pool.clone(value));
+ }
}
return;
@@ -780,20 +867,19 @@ INSTRUCTION(fcmp)
r = a <= b;
break;
case llvm::CmpInst::FCMP_FALSE:
+ case llvm::CmpInst::FCMP_UNO:
r = false;
break;
case llvm::CmpInst::FCMP_TRUE:
- r = true;
- break;
case llvm::CmpInst::FCMP_ORD:
- case llvm::CmpInst::FCMP_UNO:
+ r = true;
break;
default:
FATAL_ERROR("Unsupported FCmp predicate: %d", pred);
}
// Deal with NaN operands
- if (::isnan(a) || ::isnan(b))
+ if (std::isnan(a) || std::isnan(b))
{
r = !llvm::CmpInst::isOrdered(pred);
}
@@ -1057,10 +1143,14 @@ INSTRUCTION(load)
{
const llvm::LoadInst *loadInst = (const llvm::LoadInst*)instruction;
unsigned addressSpace = loadInst->getPointerAddressSpace();
- size_t address = getOperand(loadInst->getPointerOperand()).getPointer();
+ const llvm::Value *opPtr = loadInst->getPointerOperand();
+ size_t address = getOperand(opPtr).getPointer();
// Check address is correctly aligned
- if (address & (loadInst->getAlignment()-1))
+ unsigned alignment = loadInst->getAlignment();
+ if (!alignment)
+ alignment = getTypeAlignment(opPtr->getType()->getPointerElementType());
+ if (address & (alignment-1))
{
m_context->logError("Invalid memory load - source pointer is "
"not aligned to the pointed type");
@@ -1116,7 +1206,8 @@ INSTRUCTION(ret)
if (!m_position->callStack.empty())
{
- m_position->currInst = m_position->callStack.top();
+ m_position->currInst =
+ llvm::BasicBlock::const_iterator(m_position->callStack.top());
m_position->currBlock = m_position->currInst->getParent();
m_position->callStack.pop();
@@ -1124,7 +1215,7 @@ INSTRUCTION(ret)
const llvm::Value *returnVal = retInst->getReturnValue();
if (returnVal)
{
- setValue(m_position->currInst, m_pool.clone(getOperand(returnVal)));
+ setValue(&*m_position->currInst, m_pool.clone(getOperand(returnVal)));
}
// Clear stack allocations
@@ -1141,7 +1232,6 @@ INSTRUCTION(ret)
m_position->nextBlock = NULL;
m_state = FINISHED;
m_workGroup->notifyFinished(this);
- m_context->notifyWorkItemComplete(this);
}
}
@@ -1270,10 +1360,14 @@ INSTRUCTION(store)
{
const llvm::StoreInst *storeInst = (const llvm::StoreInst*)instruction;
unsigned addressSpace = storeInst->getPointerAddressSpace();
- size_t address = getOperand(storeInst->getPointerOperand()).getPointer();
+ const llvm::Value *opPtr = storeInst->getPointerOperand();
+ size_t address = getOperand(opPtr).getPointer();
// Check address is correctly aligned
- if (address & (storeInst->getAlignment()-1))
+ unsigned alignment = storeInst->getAlignment();
+ if (!alignment)
+ alignment = getTypeAlignment(opPtr->getType()->getPointerElementType());
+ if (address & (alignment-1))
{
m_context->logError("Invalid memory store - source pointer is "
"not aligned to the pointed type");
@@ -1322,7 +1416,11 @@ INSTRUCTION(uitofp)
TypedValue op = getOperand(instruction->getOperand(0));
for (unsigned i = 0; i < result.num; i++)
{
- result.setFloat(op.getUInt(i), i);
+ uint64_t in = op.getUInt(i);
+ if (result.size == 4)
+ result.setFloat(in ? (float)in : 0.f, i);
+ else
+ result.setFloat(in ? (double)in : 0.0, i);
}
}
@@ -1365,7 +1463,7 @@ InterpreterCache::InterpreterCache(llvm::Function *kernel)
llvm::Module::const_global_iterator G;
for (G = module->global_begin(); G != module->global_end(); G++)
{
- addValueID(G);
+ addValueID(&*G);
}
@@ -1385,7 +1483,7 @@ InterpreterCache::InterpreterCache(llvm::Function *kernel)
llvm::Function::arg_iterator A;
for (A = function->arg_begin(); A != function->arg_end(); A++)
{
- addValueID(A);
+ addValueID(&*A);
}
// Iterate through instructions in function
@@ -1573,8 +1671,6 @@ bool InterpreterCache::hasValue(const llvm::Value *value) const
void InterpreterCache::addOperand(const llvm::Value *operand)
{
- addValueID(operand);
-
// Resolve constants
if (operand->getValueID() == llvm::Value::UndefValueVal ||
operand->getValueID() == llvm::Value::ConstantAggregateZeroVal ||
@@ -1595,8 +1691,7 @@ void InterpreterCache::addOperand(const llvm::Value *operand)
const llvm::ConstantExpr *expr = (const llvm::ConstantExpr*)operand;
if (!m_constExpressions.count(expr))
{
- for (llvm::User::const_op_iterator O = expr->op_begin();
- O != expr->op_end(); O++)
+ for (auto O = expr->op_begin(); O != expr->op_end(); O++)
{
addOperand(*O);
}
@@ -1604,57 +1699,8 @@ void InterpreterCache::addOperand(const llvm::Value *operand)
// TODO: Resolve actual value?
}
}
-}
-
-
-//////////////////////////
-// WorkItem::MemoryPool //
-//////////////////////////
-
-WorkItem::MemoryPool::MemoryPool(size_t blockSize) : m_blockSize(blockSize)
-{
- // Force first allocation to create new block
- m_offset = m_blockSize;
-}
-
-WorkItem::MemoryPool::~MemoryPool()
-{
- list<unsigned char*>::iterator itr;
- for (itr = m_blocks.begin(); itr != m_blocks.end(); itr++)
- {
- delete[] *itr;
- }
-}
-
-unsigned char* WorkItem::MemoryPool::alloc(size_t size)
-{
- // Check if requested size larger than block size
- if (size > m_blockSize)
- {
- // Oversized buffers allocated separately from main pool
- unsigned char *buffer = new unsigned char[size];
- m_blocks.push_back(buffer);
- return buffer;
- }
-
- // Check if enough space in current block
- if (m_offset + size > m_blockSize)
+ else
{
- // Allocate new block
- m_blocks.push_front(new unsigned char[m_blockSize]);
- m_offset = 0;
+ addValueID(operand);
}
- unsigned char *buffer = m_blocks.front() + m_offset;
- m_offset += size;
- return buffer;
-}
-
-TypedValue WorkItem::MemoryPool::clone(const TypedValue& source)
-{
- TypedValue dest;
- dest.size = source.size;
- dest.num = source.num;
- dest.data = alloc(dest.size*dest.num);
- memcpy(dest.data, source.data, dest.size*dest.num);
- return dest;
}
diff --git a/src/core/WorkItem.h b/src/core/WorkItem.h
index ae8380c..738df37 100644
--- a/src/core/WorkItem.h
+++ b/src/core/WorkItem.h
@@ -1,5 +1,5 @@
// WorkItem.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -10,6 +10,7 @@
namespace llvm
{
+ class BasicBlock;
class CallInst;
class ConstExpr;
class DbgValueInst;
@@ -28,17 +29,17 @@ namespace oclgrind
class WorkItemBuiltins;
// Data structures for builtin functions
- typedef struct _BuiltinFunction
+ struct BuiltinFunction
{
void (*func)(WorkItem*, const llvm::CallInst*,
const std::string&, const std::string&, TypedValue&, void*);
void *op;
- _BuiltinFunction(){};
- _BuiltinFunction(void (*f)(WorkItem*, const llvm::CallInst*,
+ BuiltinFunction(){};
+ BuiltinFunction(void (*f)(WorkItem*, const llvm::CallInst*,
const std::string&, const std::string&, TypedValue&,
void*),
void *o) : func(f), op(o) {};
- } BuiltinFunction;
+ };
typedef std::unordered_map<std::string,BuiltinFunction> BuiltinFunctionMap;
typedef std::list< std::pair<std::string, BuiltinFunction> >
BuiltinFunctionPrefixList;
@@ -50,11 +51,11 @@ namespace oclgrind
class InterpreterCache
{
public:
- typedef struct
+ struct Builtin
{
BuiltinFunction function;
std::string name, overload;
- } Builtin;
+ };
InterpreterCache(llvm::Function *kernel);
~InterpreterCache();
@@ -93,20 +94,6 @@ namespace oclgrind
public:
enum State {READY, BARRIER, FINISHED};
- private:
- class MemoryPool
- {
- public:
- MemoryPool(size_t blockSize = 1024);
- ~MemoryPool();
- unsigned char* alloc(size_t size);
- TypedValue clone(const TypedValue& source);
- private:
- size_t m_blockSize;
- size_t m_offset;
- std::list<unsigned char *> m_blocks;
- } mutable m_pool;
-
public:
WorkItem(const KernelInvocation *kernelInvocation,
WorkGroup *workGroup, Size3 lid);
@@ -116,11 +103,13 @@ namespace oclgrind
void dispatch(const llvm::Instruction *instruction, TypedValue& result);
void execute(const llvm::Instruction *instruction);
const std::stack<const llvm::Instruction*>& getCallStack() const;
+ const llvm::BasicBlock* getCurrentBlock() const;
const llvm::Instruction* getCurrentInstruction() const;
Size3 getGlobalID() const;
size_t getGlobalIndex() const;
Size3 getLocalID() const;
TypedValue getOperand(const llvm::Value *operand) const;
+ const llvm::BasicBlock* getPreviousBlock() const;
Memory* getPrivateMemory() const;
State getState() const;
const unsigned char* getValueData(const llvm::Value *value) const;
@@ -195,6 +184,7 @@ namespace oclgrind
const KernelInvocation *m_kernelInvocation;
Memory *m_privateMemory;
WorkGroup *m_workGroup;
+ mutable MemoryPool m_pool;
State m_state;
struct Position;
diff --git a/src/core/WorkItemBuiltins.cpp b/src/core/WorkItemBuiltins.cpp
index cce6da6..0bd7837 100644
--- a/src/core/WorkItemBuiltins.cpp
+++ b/src/core/WorkItemBuiltins.cpp
@@ -1,14 +1,18 @@
// WorkItemBuiltins.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
+#include "config.h"
#include "common.h"
+
#include <algorithm>
+#include <float.h>
#include <fenv.h>
+#include <math.h>
#include <mutex>
#include "llvm/IR/Instructions.h"
@@ -258,8 +262,8 @@ namespace oclgrind
address += sizeof(size_t);
}
workItem->m_state = WorkItem::BARRIER;
- workItem->m_workGroup->notifyBarrier(workItem, callInst,
- CLK_LOCAL_MEM_FENCE, events);
+ workItem->m_workGroup->notifyBarrier(
+ workItem, callInst, CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, events);
}
DEFINE_BUILTIN(prefetch)
@@ -450,7 +454,7 @@ namespace oclgrind
static double _sign_(double x)
{
- if (::isnan(x)) return 0.0;
+ if (std::isnan(x)) return 0.0;
if (x > 0.0) return 1.0;
if (x == -0.0) return -0.0;
if (x == 0.0) return 0.0;
@@ -646,6 +650,39 @@ namespace oclgrind
result.setFloat(r);
}
+ static double geometric_length(double *values, unsigned num)
+ {
+ double lengthSq = 0.0;
+ for (unsigned i = 0; i < num; i++)
+ {
+ lengthSq += values[i] * values[i];
+ }
+
+ // Check for overflow/underflow
+ double rescale = 1.0;
+ if (lengthSq == INFINITY)
+ {
+ rescale = ldexp(1.0, -512);
+ }
+ else if (lengthSq < num*DBL_MIN/DBL_EPSILON)
+ {
+ rescale = ldexp(1.0, 640);
+ }
+
+ if (rescale != 1.0)
+ {
+ // Re-do calculations with a rescaling multiplier
+ lengthSq = 0.0;
+ for (unsigned i = 0; i < num; i++)
+ {
+ double f = values[i] * rescale;
+ lengthSq += f*f;
+ }
+ }
+
+ return sqrt(lengthSq) * (1.0/rescale);
+ }
+
DEFINE_BUILTIN(distance)
{
unsigned num = 1;
@@ -654,13 +691,12 @@ namespace oclgrind
num = ARG(0)->getType()->getVectorNumElements();
}
- double distSq = 0.0;
+ double values[4];
for (unsigned i = 0; i < num; i++)
{
- double diff = FARGV(0,i) - FARGV(1,i);
- distSq += diff*diff;
+ values[i] = FARGV(0, i) - FARGV(1, i);
}
- result.setFloat(sqrt(distSq));
+ result.setFloat(geometric_length(values, num));
}
DEFINE_BUILTIN(length)
@@ -671,26 +707,79 @@ namespace oclgrind
num = ARG(0)->getType()->getVectorNumElements();
}
- double lengthSq = 0.0;
+ double values[4];
for (unsigned i = 0; i < num; i++)
{
- lengthSq += FARGV(0, i) * FARGV(0, i);
+ values[i] = FARGV(0, i);
}
- result.setFloat(sqrt(lengthSq));
+ result.setFloat(geometric_length(values, num));
}
DEFINE_BUILTIN(normalize)
{
+ double values[4];
double lengthSq = 0.0;
for (unsigned i = 0; i < result.num; i++)
{
- lengthSq += FARGV(0, i) * FARGV(0, i);
+ values[i] = FARGV(0, i);
+ lengthSq += values[i] * values[i];
}
- double length = sqrt(lengthSq);
+ if (lengthSq == INFINITY)
+ {
+ // Re-do calculations with a rescaling multiplier
+ lengthSq = 0.0;
+ double rescale = ldexp(1.0, -512);
+ for (unsigned i = 0; i < result.num; i++)
+ {
+ values[i] = values[i] * rescale;
+ lengthSq += values[i] * values[i];
+ }
+
+ if (lengthSq == INFINITY)
+ {
+ // Infinities in input, set all other values to 0
+ lengthSq = 0.0;
+ for (unsigned i = 0; i < result.num; i++)
+ {
+ if (std::isinf(values[i]))
+ {
+ values[i] = copysign(1.0, FARGV(0, i));
+ lengthSq += 1.0;
+ }
+ else
+ {
+ values[i] = copysign(0.0, FARGV(0, i));
+ }
+ }
+ }
+ }
+ else if (lengthSq < result.num*DBL_MIN/DBL_EPSILON)
+ {
+ // Re-do calculations with a rescaling multiplier
+ lengthSq = 0.0;
+ double rescale = ldexp(1.0, 640);
+ for (unsigned i = 0; i < result.num; i++)
+ {
+ values[i] = values[i] * rescale;
+ lengthSq += values[i] * values[i];
+ }
+
+ if (lengthSq == 0.0)
+ {
+ // Zeros in input, copy vector unchanged
+ for (unsigned i = 0; i < result.num; i++)
+ {
+ result.setFloat(FARGV(0, i), i);
+ }
+ return;
+ }
+ }
+
+ double length = sqrt(lengthSq);
for (unsigned i = 0; i < result.num; i++)
{
- result.setFloat(FARGV(0, i)/length, i);
+ result.setFloat(values[i]/length, i);
}
}
@@ -992,7 +1081,7 @@ namespace oclgrind
}
// Remap channels
- float ret;
+ float ret = 0.f;
int channel = getInputChannel(image->format, c, &ret);
if (channel < 0)
{
@@ -1066,7 +1155,7 @@ namespace oclgrind
}
// Remap channels
- float ret;
+ float ret = 0.f;
int channel = getInputChannel(image->format, c, &ret);
if (channel < 0)
{
@@ -1131,7 +1220,7 @@ namespace oclgrind
}
// Remap channels
- float ret;
+ float ret = 0.f;
int channel = getInputChannel(image->format, c, &ret);
if (channel < 0)
{
@@ -1198,6 +1287,12 @@ namespace oclgrind
+ a * b * c * v111;
}
+ DEFINE_BUILTIN(translate_sampler_initializer)
+ {
+ // A sampler initializer is just a pointer to its ConstantInt object
+ result.setPointer((size_t)ARG(0));
+ }
+
DEFINE_BUILTIN(read_imagef)
{
const Image *image = *(Image**)(workItem->getValue(ARG(0)).data);
@@ -1208,7 +1303,11 @@ namespace oclgrind
// Check for sampler version
if (callInst->getNumArgOperands() > 2)
{
+#if LLVM_VERSION < 40
sampler = UARG(1);
+#else
+ sampler = ((llvm::ConstantInt*)PARG(1))->getZExtValue();
+#endif
coordIndex = 2;
}
@@ -1326,7 +1425,11 @@ namespace oclgrind
// Check for sampler version
if (callInst->getNumArgOperands() > 2)
{
+#if LLVM_VERSION < 40
sampler = UARG(1);
+#else
+ sampler = ((llvm::ConstantInt*)PARG(1))->getZExtValue();
+#endif
coordIndex = 2;
}
@@ -1399,7 +1502,11 @@ namespace oclgrind
// Check for sampler version
if (callInst->getNumArgOperands() > 2)
{
+#if LLVM_VERSION < 40
sampler = UARG(1);
+#else
+ sampler = ((llvm::ConstantInt*)PARG(1))->getZExtValue();
+#endif
coordIndex = 2;
}
@@ -1888,8 +1995,8 @@ namespace oclgrind
{
uint64_t a = UARGV(0, i);
uint64_t b = UARGV(1, i);
- uint64_t c = (a > UINT64_MAX-b) ? (1L<<63) : 0;
- result.setUInt(((a + b) >> 1) | c, i);
+ uint64_t c = (a & b) & 1;
+ result.setUInt((a>>1) + (b>>1) + c, i);
break;
}
case 'c':
@@ -2128,8 +2235,8 @@ namespace oclgrind
{
uint64_t a = UARGV(0, i);
uint64_t b = UARGV(1, i);
- uint64_t c = (a > UINT64_MAX-(b+1)) ? (1L<<63) : 0;
- result.setUInt(((a + b + 1) >> 1) | c, i);
+ uint64_t c = (a | b) & 1;
+ result.setUInt((a>>1) + (b>>1) + c, i);
break;
}
case 'c':
@@ -2239,9 +2346,15 @@ namespace oclgrind
static double _sinpi_(double x){ return (sin(x * M_PI)); }
static double _tanpi_(double x){ return (tan(x * M_PI)); }
- static double _fma_(double a, double b, double c)
+ DEFINE_BUILTIN(fma_builtin)
{
- return a*b + c;
+ for (unsigned i = 0; i < result.num; i++)
+ {
+ if (result.size == 4)
+ result.setFloat(fmaf(FARGV(0, i), FARGV(1, i), FARGV(2, i)), i);
+ else
+ result.setFloat(fma(FARGV(0, i), FARGV(1, i), FARGV(2, i)), i);
+ }
}
static double _maxmag_(double x, double y)
@@ -2289,12 +2402,25 @@ namespace oclgrind
for (unsigned i = 0; i < result.num; i++)
{
double x = FARGV(0, i);
- double fl = floor(x);
-#if defined(_WIN32) && !defined(__MINGW32__)
- double r = fmin(x - fl, nextafter(1, 0));
-#else
- double r = fmin(x - fl, 0x1.fffffep-1f);
-#endif
+ double fl, r;
+ if (std::isnan(x))
+ {
+ r = nan("");
+ fl = nan("");
+ }
+ else
+ {
+ if (result.size == 4)
+ {
+ fl = floorf(x);
+ r = fmin(x - fl, nextafterf(1, 0));
+ }
+ else
+ {
+ fl = floor(x);
+ r = fmin(x - fl, nextafter(1, 0));
+ }
+ }
size_t offset = i*result.size;
result.setFloat(fl, i);
@@ -2359,7 +2485,7 @@ namespace oclgrind
{
double x = FARGV(0, i);
double integral = trunc(x);
- double fractional = copysign(::isinf(x) ? 0.0 : x - integral, x);
+ double fractional = copysign(std::isinf(x) ? 0.0 : x - integral, x);
size_t offset = i*result.size;
result.setFloat(integral, i);
@@ -2397,6 +2523,59 @@ namespace oclgrind
}
}
+ DEFINE_BUILTIN(powr)
+ {
+ for (unsigned i = 0; i < result.num; i++)
+ {
+ double x = FARGV(0, i);
+ double y = FARGV(1, i);
+
+ double r;
+ if (x < 0.0)
+ {
+ r = nan("");
+ }
+ else if (std::isnan(x) || std::isnan(y))
+ {
+ r = nan("");
+ }
+ else if (x == 1.0)
+ {
+ if (std::isinf(y))
+ r = nan("");
+ else
+ r = 1.0;
+ }
+ else if (y == 0.0)
+ {
+ if (x == 0.0 || x == INFINITY)
+ r = nan("");
+ else
+ r = 1.0;
+ }
+ else if (x == 0.0)
+ {
+ if (y < 0.0)
+ r = INFINITY;
+ else
+ r = 0.0;
+ }
+ else if (x == INFINITY)
+ {
+ if (y < 0.0)
+ r = 0.0;
+ else
+ r = INFINITY;
+ }
+ else
+ {
+ r = pow(x, y);
+ }
+
+ result.setFloat(r, i);
+ }
+ }
+
DEFINE_BUILTIN(remquo_builtin)
{
Memory *memory =
@@ -2419,9 +2598,43 @@ namespace oclgrind
{
for (unsigned i = 0; i < result.num; i++)
{
- double x = FARGV(0, i);
- int y = SARGV(1, i);
- result.setFloat(pow(x, (double)(1.0/y)), i);
+ long double x = FARGV(0, i);
+ int n = SARGV(1, i);
+
+ long double r;
+ if (n == 0)
+ {
+ r = nan("");
+ }
+ else if (x == 0)
+ {
+ if (n < 0)
+ {
+ if (n&1)
+ r = copysign(INFINITY, x);
+ else
+ r = INFINITY;
+ }
+ else
+ {
+ if (n&1)
+ r = x;
+ else
+ r = 0.0;
+ }
+ }
+ else if (x < 0 && !(n&1))
+ {
+ r = nan("");
+ }
+ else
+ {
+ r = pow(fabs(x), 1.0L/n);
+ if (x < 0 && n&1)
+ r = -r;
+ }
+
+ result.setFloat(r, i);
}
}
@@ -2488,8 +2701,8 @@ namespace oclgrind
static int64_t _isle_(double x, double y){ return islessequal(x, y); }
static int64_t _islg_(double x, double y){ return islessgreater(x, y); }
static int64_t _isfin_(double x){ return isfinite(x); }
- static int64_t _isinf_(double x){ return ::isinf(x); }
- static int64_t _isnan_(double x){ return ::isnan(x); }
+ static int64_t _isinf_(double x){ return std::isinf(x); }
+ static int64_t _isnan_(double x){ return std::isnan(x); }
static int64_t _isnorm_(double x){ return isnormal(x); }
static int64_t _isord_(double x, double y){ return !isunordered(x, y); }
static int64_t _isuord_(double x, double y){ return isunordered(x, y); }
@@ -2697,11 +2910,13 @@ namespace oclgrind
uint64_t offset = UARG(1);
// Convert to halfs
- unsigned char *data = workItem->getOperand(value).data;
- size_t num = size / sizeof(float);
- size = num*sizeof(cl_half);
- uint16_t *halfData = (uint16_t*)workItem->m_pool.alloc(2*num);
- HalfRoundMode rmode = Half_RTE; // The Oclgrind device's round mode
+ TypedValue op = workItem->getOperand(value);
+ unsigned char *data = op.data;
+ size = op.num*sizeof(cl_half);
+ uint16_t *halfData = (uint16_t*)workItem->m_pool.alloc(2*op.num);
+
+ // Parse rounding mode (RTE is the default)
+ HalfRoundMode rmode = Half_RTE;
if (fnName.find("_rtz") != std::string::npos)
rmode = Half_RTZ;
else if (fnName.find("_rtn") != std::string::npos)
@@ -2709,19 +2924,22 @@ namespace oclgrind
else if (fnName.find("_rtp") != std::string::npos)
rmode = Half_RTP;
- for (unsigned i = 0; i < num; i++)
+ for (unsigned i = 0; i < op.num; i++)
{
- halfData[i] = floatToHalf(((float*)data)[i], rmode);
+ if (op.size == 4)
+ halfData[i] = floatToHalf(((float*)data)[i], rmode);
+ else
+ halfData[i] = doubleToHalf(((double*)data)[i], rmode);
}
size_t address;
- if (fnName.compare(0, 7, "vstorea") == 0 && num == 3)
+ if (fnName.compare(0, 7, "vstorea") == 0 && op.num == 3)
{
address = base + offset*sizeof(cl_half)*4;
}
else
{
- address = base + offset*sizeof(cl_half)*num;
+ address = base + offset*sizeof(cl_half)*op.num;
}
workItem->getMemory(addressSpace)->store((unsigned char*)halfData,
@@ -2798,8 +3016,41 @@ namespace oclgrind
// Other Functions //
/////////////////////
+ static void setConvertRoundingMode(const string& name, int def)
+ {
+ size_t rpos = name.find("_rt");
+ if (rpos != string::npos)
+ {
+ switch (name[rpos+3])
+ {
+ case 'e':
+ fesetround(FE_TONEAREST);
+ break;
+ case 'z':
+ fesetround(FE_TOWARDZERO);
+ break;
+ case 'p':
+ fesetround(FE_UPWARD);
+ break;
+ case 'n':
+ fesetround(FE_DOWNWARD);
+ break;
+ default:
+ FATAL_ERROR("Unsupported rounding mode: %c", name[rpos+3]);
+ }
+ }
+ else
+ {
+ fesetround(def);
+ }
+ }
+
DEFINE_BUILTIN(convert_float)
{
+ // Use rounding mode
+ const int origRnd = fegetround();
+ setConvertRoundingMode(fnName, FE_TONEAREST);
+
for (unsigned i = 0; i < result.num; i++)
{
switch (getOverloadArgType(overload))
@@ -2808,13 +3059,19 @@ namespace oclgrind
case 't':
case 'j':
case 'm':
- result.setFloat((float)UARGV(0, i), i);
+ {
+ uint64_t in = UARGV(0, i);
+ if (result.size == 4)
+ result.setFloat(in ? (float)in : 0.f, i);
+ else
+ result.setFloat(in ? (double)in : 0.0, i);
break;
+ }
case 'c':
case 's':
case 'i':
case 'l':
- result.setFloat((float)SARGV(0, i), i);
+ result.setFloat(SARGV(0, i), i);
break;
case 'f':
case 'd':
@@ -2825,6 +3082,7 @@ namespace oclgrind
getOverloadArgType(overload));
}
}
+ fesetround(origRnd);
}
DEFINE_BUILTIN(convert_half)
@@ -2865,44 +3123,32 @@ namespace oclgrind
}
}
- static void setConvertRoundingMode(const string& name)
- {
- size_t rpos = name.find("_rt");
- if (rpos != string::npos)
- {
- switch (name[rpos+3])
- {
- case 'e':
- fesetround(FE_TONEAREST);
- break;
- case 'z':
- fesetround(FE_TOWARDZERO);
- break;
- case 'p':
- fesetround(FE_UPWARD);
- break;
- case 'n':
- fesetround(FE_DOWNWARD);
- break;
- default:
- FATAL_ERROR("Unsupported rounding mode: %c", name[rpos=3]);
- }
- }
- else
- {
- fesetround(FE_TOWARDZERO);
- }
- }
-
DEFINE_BUILTIN(convert_uint)
{
// Check for saturation modifier
bool sat = fnName.find("_sat") != string::npos;
- uint64_t max = (1UL<<(result.size*8)) - 1;
+ uint64_t max;
+ switch (result.size)
+ {
+ case 1:
+ max = UINT8_MAX;
+ break;
+ case 2:
+ max = UINT16_MAX;
+ break;
+ case 4:
+ max = UINT32_MAX;
+ break;
+ case 8:
+ max = UINT64_MAX;
+ break;
+ default:
+ FATAL_ERROR("Unsupported integer size %d", result.size);
+ }
// Use rounding mode
const int origRnd = fegetround();
- setConvertRoundingMode(fnName);
+ setConvertRoundingMode(fnName, FE_TOWARDZERO);
for (unsigned i = 0; i < result.num; i++)
{
@@ -2943,7 +3189,8 @@ namespace oclgrind
case 'd':
if (sat)
{
- r = rint(_clamp_(FARGV(0, i), 0.0, (double)max));
+ r = rint(_clamp_((long double)FARGV(0, i),
+ 0.0L, (long double)max));
}
else
{
@@ -2983,11 +3230,13 @@ namespace oclgrind
min = INT64_MIN;
max = INT64_MAX;
break;
+ default:
+ FATAL_ERROR("Unsupported integer size %d", result.size);
}
// Use rounding mode
const int origRnd = fegetround();
- setConvertRoundingMode(fnName);
+ setConvertRoundingMode(fnName, FE_TOWARDZERO);
for (unsigned i = 0; i < result.num; i++)
{
@@ -3018,7 +3267,8 @@ namespace oclgrind
case 'd':
if (sat)
{
- r = rint(_clamp_(FARGV(0, i), (double)min, (double)max));
+ r = rint(_clamp_((long double)FARGV(0, i),
+ (long double)min, (long double)max));
}
else
{
@@ -3185,6 +3435,17 @@ namespace oclgrind
// LLVM Intrinsics //
/////////////////////
+ DEFINE_BUILTIN(llvm_bswap)
+ {
+ uint64_t r = 0;
+ uint64_t value = UARG(0);
+ for (unsigned i = 0; i < result.size; i++)
+ {
+ r |= ((value>>(i*8)) & 0xFF) << ((result.size - i - 1)*8);
+ }
+ result.setUInt(r);
+ }
+
DEFINE_BUILTIN(llvm_dbg_declare)
{
const llvm::DbgDeclareInst *dbgInst =
@@ -3371,6 +3632,10 @@ namespace oclgrind
ADD_BUILTIN("write_imagef", write_imagef, NULL);
ADD_BUILTIN("write_imagei", write_imagei, NULL);
ADD_BUILTIN("write_imageui", write_imageui, NULL);
+#if LLVM_VERSION >= 40
+ ADD_BUILTIN("__translate_sampler_initializer",
+ translate_sampler_initializer, NULL);
+#endif
// Integer Functions
ADD_BUILTIN("abs", abs_builtin, NULL);
@@ -3416,7 +3681,7 @@ namespace oclgrind
ADD_BUILTIN("fabs", f1arg, F1ARG(fabs));
ADD_BUILTIN("fdim", f2arg, F2ARG(fdim));
ADD_BUILTIN("floor", f1arg, F1ARG(floor));
- ADD_BUILTIN("fma", f3arg, F3ARG(_fma_));
+ ADD_BUILTIN("fma", fma_builtin, NULL);
ADD_BUILTIN("fmax", f2arg, F2ARG(fmax));
ADD_BUILTIN("fmin", f2arg, F2ARG(fmin));
ADD_BUILTIN("fmod", f2arg, F2ARG(fmod));
@@ -3432,7 +3697,7 @@ namespace oclgrind
ADD_BUILTIN("log10", f1arg, F1ARG(log10));
ADD_BUILTIN("log1p", f1arg, F1ARG(log1p));
ADD_BUILTIN("logb", f1arg, F1ARG(logb));
- ADD_BUILTIN("mad", f3arg, F3ARG(_fma_));
+ ADD_BUILTIN("mad", fma_builtin, NULL);
ADD_BUILTIN("maxmag", f2arg, _maxmag_);
ADD_BUILTIN("minmag", f2arg, _minmag_);
ADD_BUILTIN("modf", modf_builtin, NULL);
@@ -3441,7 +3706,7 @@ namespace oclgrind
ADD_BUILTIN("nextafter", nextafter_builtin, NULL);
ADD_BUILTIN("pow", f2arg, F2ARG(pow));
ADD_BUILTIN("pown", pown, NULL);
- ADD_BUILTIN("powr", f2arg, F2ARG(pow));
+ ADD_BUILTIN("powr", powr, NULL);
ADD_BUILTIN("remainder", f2arg, F2ARG(remainder));
ADD_BUILTIN("remquo", remquo_builtin, NULL);
ADD_BUILTIN("rint", f1arg, F1ARG(rint));
@@ -3476,8 +3741,8 @@ namespace oclgrind
ADD_BUILTIN("native_log2", f1arg, F1ARG(log2));
ADD_BUILTIN("half_log10", f1arg, F1ARG(log10));
ADD_BUILTIN("native_log10", f1arg, F1ARG(log10));
- ADD_BUILTIN("half_powr", f2arg, F2ARG(pow));
- ADD_BUILTIN("native_powr", f2arg, F2ARG(pow));
+ ADD_BUILTIN("half_powr", powr, NULL);
+ ADD_BUILTIN("native_powr", powr, NULL);
ADD_BUILTIN("half_recip", f1arg, _frecip_);
ADD_BUILTIN("native_recip", f1arg, _frecip_);
ADD_BUILTIN("half_rsqrt", f1arg, _rsqrt_);
@@ -3546,14 +3811,16 @@ namespace oclgrind
ADD_BUILTIN("printf", printf_builtin, NULL);
// LLVM Intrinsics
+ ADD_PREFIX_BUILTIN("llvm.bswap.", llvm_bswap, NULL);
ADD_BUILTIN("llvm.dbg.declare", llvm_dbg_declare, NULL);
ADD_BUILTIN("llvm.dbg.value", llvm_dbg_value, NULL);
+ ADD_PREFIX_BUILTIN("llvm.fabs.f", f1arg, F1ARG(fabs));
ADD_BUILTIN("llvm.lifetime.start", llvm_lifetime_start, NULL);
ADD_BUILTIN("llvm.lifetime.end", llvm_lifetime_end, NULL);
ADD_PREFIX_BUILTIN("llvm.memcpy", llvm_memcpy, NULL);
ADD_PREFIX_BUILTIN("llvm.memmove", llvm_memcpy, NULL);
ADD_PREFIX_BUILTIN("llvm.memset", llvm_memset, NULL);
- ADD_PREFIX_BUILTIN("llvm.fmuladd", f3arg, F3ARG(_fma_));
+ ADD_PREFIX_BUILTIN("llvm.fmuladd", fma_builtin, NULL);
ADD_BUILTIN("llvm.trap", llvm_trap, NULL);
return builtins;
diff --git a/src/core/clc.h b/src/core/clc.h
index 320ddce..71fe306 100644
--- a/src/core/clc.h
+++ b/src/core/clc.h
@@ -1,5 +1,5 @@
// clc.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -421,37 +421,41 @@ BUILTIN_1ARG_FLOATS(fast_normalize);
// Image Functions //
/////////////////////
-size_t __OVERLOAD__ get_image_array_size(image1d_array_t image);
-size_t __OVERLOAD__ get_image_array_size(image2d_array_t image);
-
-int __OVERLOAD__ get_image_channel_data_type(image1d_t image);
-int __OVERLOAD__ get_image_channel_data_type(image1d_buffer_t image);
-int __OVERLOAD__ get_image_channel_data_type(image1d_array_t image);
-int __OVERLOAD__ get_image_channel_data_type(image2d_t image);
-int __OVERLOAD__ get_image_channel_data_type(image2d_array_t image);
-int __OVERLOAD__ get_image_channel_data_type(image3d_t image);
-
-int __OVERLOAD__ get_image_channel_order(image1d_t image);
-int __OVERLOAD__ get_image_channel_order(image1d_buffer_t image);
-int __OVERLOAD__ get_image_channel_order(image1d_array_t image);
-int __OVERLOAD__ get_image_channel_order(image2d_t image);
-int __OVERLOAD__ get_image_channel_order(image2d_array_t image);
-int __OVERLOAD__ get_image_channel_order(image3d_t image);
-
-int2 __OVERLOAD__ get_image_dim(image2d_t image);
-int2 __OVERLOAD__ get_image_dim(image2d_array_t image);
-int4 __OVERLOAD__ get_image_dim(image3d_t image);
-
-int __OVERLOAD__ get_image_depth(image3d_t image);
-int __OVERLOAD__ get_image_height(image2d_t image);
-int __OVERLOAD__ get_image_height(image2d_array_t image);
-int __OVERLOAD__ get_image_height(image3d_t image);
-int __OVERLOAD__ get_image_width(image1d_t image);
-int __OVERLOAD__ get_image_width(image1d_buffer_t image);
-int __OVERLOAD__ get_image_width(image1d_array_t image);
-int __OVERLOAD__ get_image_width(image2d_t image);
-int __OVERLOAD__ get_image_width(image2d_array_t image);
-int __OVERLOAD__ get_image_width(image3d_t image);
+#define IMAGE_QUERY(ret, name, type) \
+ ret __OVERLOAD__ name(read_only type image); \
+ ret __OVERLOAD__ name(write_only type image)
+
+IMAGE_QUERY(size_t, get_image_array_size, image1d_array_t);
+IMAGE_QUERY(size_t, get_image_array_size, image2d_array_t);
+
+IMAGE_QUERY(int, get_image_channel_data_type, image1d_t);
+IMAGE_QUERY(int, get_image_channel_data_type, image1d_buffer_t);
+IMAGE_QUERY(int, get_image_channel_data_type, image1d_array_t);
+IMAGE_QUERY(int, get_image_channel_data_type, image2d_t);
+IMAGE_QUERY(int, get_image_channel_data_type, image2d_array_t);
+IMAGE_QUERY(int, get_image_channel_data_type, image3d_t);
+
+IMAGE_QUERY(int, get_image_channel_order, image1d_t);
+IMAGE_QUERY(int, get_image_channel_order, image1d_buffer_t);
+IMAGE_QUERY(int, get_image_channel_order, image1d_array_t);
+IMAGE_QUERY(int, get_image_channel_order, image2d_t);
+IMAGE_QUERY(int, get_image_channel_order, image2d_array_t);
+IMAGE_QUERY(int, get_image_channel_order, image3d_t);
+
+IMAGE_QUERY(int2, get_image_dim, image2d_t);
+IMAGE_QUERY(int2, get_image_dim, image2d_array_t);
+IMAGE_QUERY(int4, get_image_dim, image3d_t);
+
+IMAGE_QUERY(int, get_image_depth, image3d_t);
+IMAGE_QUERY(int, get_image_height, image2d_t);
+IMAGE_QUERY(int, get_image_height, image2d_array_t);
+IMAGE_QUERY(int, get_image_height, image3d_t);
+IMAGE_QUERY(int, get_image_width, image1d_t);
+IMAGE_QUERY(int, get_image_width, image1d_buffer_t);
+IMAGE_QUERY(int, get_image_width, image1d_array_t);
+IMAGE_QUERY(int, get_image_width, image2d_t);
+IMAGE_QUERY(int, get_image_width, image2d_array_t);
+IMAGE_QUERY(int, get_image_width, image3d_t);
float4 __OVERLOAD__ read_imagef(image1d_t, int);
float4 __OVERLOAD__ read_imagef(image1d_buffer_t, int);
@@ -507,21 +511,21 @@ uint4 __OVERLOAD__ read_imageui(image2d_array_t, sampler_t, float4);
uint4 __OVERLOAD__ read_imageui(image3d_t, sampler_t, int4);
uint4 __OVERLOAD__ read_imageui(image3d_t, sampler_t, float4);
-void __OVERLOAD__ write_imagef(image1d_t, int, float4);
-void __OVERLOAD__ write_imagef(image1d_array_t, int2, float4);
-void __OVERLOAD__ write_imagef(image2d_t, int2, float4);
-void __OVERLOAD__ write_imagef(image2d_array_t, int4, float4);
-void __OVERLOAD__ write_imagef(image3d_t, int4, float4);
-void __OVERLOAD__ write_imagei(image1d_t, int, int4);
-void __OVERLOAD__ write_imagei(image1d_array_t, int2, int4);
-void __OVERLOAD__ write_imagei(image2d_t, int2, int4);
-void __OVERLOAD__ write_imagei(image2d_array_t, int4, int4);
-void __OVERLOAD__ write_imagei(image3d_t, int4, int4);
-void __OVERLOAD__ write_imageui(image1d_t, int, uint4);
-void __OVERLOAD__ write_imageui(image1d_array_t, int2, uint4);
-void __OVERLOAD__ write_imageui(image2d_t, int2, uint4);
-void __OVERLOAD__ write_imageui(image2d_array_t, int4, uint4);
-void __OVERLOAD__ write_imageui(image3d_t, int4, uint4);
+void __OVERLOAD__ write_imagef(write_only image1d_t, int, float4);
+void __OVERLOAD__ write_imagef(write_only image1d_array_t, int2, float4);
+void __OVERLOAD__ write_imagef(write_only image2d_t, int2, float4);
+void __OVERLOAD__ write_imagef(write_only image2d_array_t, int4, float4);
+void __OVERLOAD__ write_imagef(write_only image3d_t, int4, float4);
+void __OVERLOAD__ write_imagei(write_only image1d_t, int, int4);
+void __OVERLOAD__ write_imagei(write_only image1d_array_t, int2, int4);
+void __OVERLOAD__ write_imagei(write_only image2d_t, int2, int4);
+void __OVERLOAD__ write_imagei(write_only image2d_array_t, int4, int4);
+void __OVERLOAD__ write_imagei(write_only image3d_t, int4, int4);
+void __OVERLOAD__ write_imageui(write_only image1d_t, int, uint4);
+void __OVERLOAD__ write_imageui(write_only image1d_array_t, int2, uint4);
+void __OVERLOAD__ write_imageui(write_only image2d_t, int2, uint4);
+void __OVERLOAD__ write_imageui(write_only image2d_array_t, int4, uint4);
+void __OVERLOAD__ write_imageui(write_only image3d_t, int4, uint4);
///////////////////////
@@ -890,7 +894,9 @@ VLOADSTORE(double);
VSTORE_HALF_ADDRSPACE(func##_rtn, type);
#define VSTORE_HALF_WIDTH(n) \
VSTORE_HALF_ROUND(vstore_half##n, float##n); \
- VSTORE_HALF_ROUND(vstorea_half##n, float##n);
+ VSTORE_HALF_ROUND(vstorea_half##n, float##n); \
+ VSTORE_HALF_ROUND(vstore_half##n, double##n); \
+ VSTORE_HALF_ROUND(vstorea_half##n, double##n);
#define VLOADSTORE_HALF_WIDTH(n) \
VLOAD_HALF_WIDTH(n); \
VSTORE_HALF_WIDTH(n);
diff --git a/src/core/common.cpp b/src/core/common.cpp
index 3f849fa..a4975c7 100644
--- a/src/core/common.cpp
+++ b/src/core/common.cpp
@@ -1,11 +1,12 @@
// common.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
+#include "config.h"
#include "common.h"
#if defined(_WIN32) && !defined(__MINGW32__)
@@ -24,19 +25,19 @@ using namespace std;
namespace oclgrind
{
- _Size3_::_Size3_()
+ Size3::Size3()
{
x = y = z = 0;
}
- _Size3_::_Size3_(size_t _x, size_t _y, size_t _z)
+ Size3::Size3(size_t _x, size_t _y, size_t _z)
{
x = _x;
y = _y;
z = _z;
}
- _Size3_::_Size3_(size_t linear, _Size3_ dimensions)
+ Size3::Size3(size_t linear, Size3 dimensions)
{
x = linear % dimensions.x;
y = (linear / dimensions.x) % dimensions.y;
@@ -55,6 +56,7 @@ namespace oclgrind
return z;
default:
assert(false && "Size3 index out of range");
+ abort();
}
}
@@ -70,6 +72,7 @@ namespace oclgrind
return z;
default:
assert(false && "Size3 index out of range");
+ abort();
}
}
@@ -78,6 +81,11 @@ namespace oclgrind
return x == rhs.x && y == rhs.y && z == rhs.z;
}
+ bool Size3::operator!=(const Size3& rhs) const
+ {
+ return x != rhs.x || y != rhs.y || z != rhs.z;
+ }
+
ostream& operator<<(ostream& stream, const Size3& size)
{
stream << dec << "("
@@ -209,13 +217,68 @@ namespace oclgrind
}
}
+ ostream& operator<<(ostream& stream, const TypedValue& tv)
+ {
+ if(tv.data)
+ {
+ if(tv.num > 1)
+ {
+ stream << "(";
+ }
+
+ for(unsigned n = 0; n < tv.num; ++n)
+ {
+ for(int i = tv.size - 1; i >= 0; --i)
+ {
+ stream << hex << uppercase << setw(2) << setfill('0')
+ << (int)*(tv.data + tv.size * n + i);
+ }
+
+ if(n != tv.num - 1)
+ {
+ stream << ",";
+ }
+ }
+
+ if(tv.num > 1)
+ {
+ stream << ")";
+ }
+ }
+ else
+ {
+ stream << "NULL";
+ }
+
+ return stream;
+ }
+
+ bool TypedValue::operator==(const TypedValue& rhs) const
+ {
+ return (size == rhs.size) && (num == rhs.num) &&
+ (memcmp(data, rhs.data, size*num) == 0);
+ }
+
+ bool TypedValue::operator!=(const TypedValue& rhs) const
+ {
+ return (size != rhs.size) || (num != rhs.num) ||
+ (memcmp(data, rhs.data, size*num) != 0);
+ }
+
TypedValue TypedValue::clone() const
{
TypedValue result;
result.size = size;
result.num = num;
- result.data = new unsigned char[size*num];
- memcpy(result.data, data, size*num);
+ if (data)
+ {
+ result.data = new unsigned char[size*num];
+ memcpy(result.data, data, size*num);
+ }
+ else
+ {
+ result.data = NULL;
+ }
return result;
}
@@ -260,10 +323,27 @@ namespace oclgrind
switch (type->getTypeID())
{
case llvm::Type::IntegerTyID:
- memcpy(data,
- ((llvm::ConstantInt*)constant)->getValue().getRawData(),
- size);
+ {
+ uint64_t ui = ((llvm::ConstantInt*)constant)->getZExtValue();
+ switch (size)
+ {
+ case 1:
+ *((uint8_t*)data) = ui;
+ break;
+ case 2:
+ *((uint16_t*)data) = ui;
+ break;
+ case 4:
+ *((uint32_t*)data) = ui;
+ break;
+ case 8:
+ *((uint64_t*)data) = ui;
+ break;
+ default:
+ FATAL_ERROR("Unsupported constant int size: %u bytes", size);
+ }
break;
+ }
case llvm::Type::FloatTyID:
{
*(float*)data =
@@ -328,13 +408,8 @@ namespace oclgrind
const llvm::ConstantExpr *expr)
{
// Get operands
- unsigned numOperands = expr->getNumOperands();
- llvm::Value **valueOperands = new llvm::Value*[numOperands];
- for (unsigned i = 0; i < numOperands; i++)
- {
- valueOperands[i] = expr->getOperand(i);
- }
- llvm::ArrayRef<llvm::Value*> operands(valueOperands, numOperands);
+ vector<llvm::Value*> valueOperands(expr->op_begin(), expr->op_end());
+ llvm::ArrayRef<llvm::Value*> operands(valueOperands);
// Create instruction
unsigned opcode = expr->getOpcode();
@@ -378,7 +453,7 @@ namespace oclgrind
else
{
#if LLVM_VERSION > 36
- return llvm::GetElementPtrInst::Create(expr->getType(),
+ return llvm::GetElementPtrInst::Create(nullptr,
operands[0], operands.slice(1));
#else
return llvm::GetElementPtrInst::Create(operands[0], operands.slice(1));
@@ -387,9 +462,10 @@ namespace oclgrind
}
case llvm::Instruction::ICmp:
case llvm::Instruction::FCmp:
- return llvm::CmpInst::Create((llvm::Instruction::OtherOps)opcode,
- expr->getPredicate(),
- operands[0], operands[1]);
+ return llvm::CmpInst::Create(
+ (llvm::Instruction::OtherOps)opcode,
+ (llvm::CmpInst::Predicate)expr->getPredicate(),
+ operands[0], operands[1]);
default:
assert(expr->getNumOperands() == 2 && "Must be binary operator?");
@@ -425,10 +501,9 @@ namespace oclgrind
}
}
- const llvm::ConstantInt* getMDOpAsConstInt(const llvm::MDOperand& op)
+ const llvm::ConstantInt* getMDAsConstInt(const llvm::Metadata *md)
{
- llvm::Metadata *md = op.get();
- llvm::ConstantAsMetadata *cam =
+ const llvm::ConstantAsMetadata *cam =
llvm::dyn_cast<llvm::ConstantAsMetadata>(md);
if (!cam)
return NULL;
@@ -461,7 +536,7 @@ namespace oclgrind
}
// Unreachable
- assert(false);
+ abort();
}
unsigned getTypeSize(const llvm::Type *type)
@@ -517,16 +592,9 @@ namespace oclgrind
}
else
{
- // For some reason, getScalarSizeInBits is not const
- llvm::Type* nonConstTy = const_cast<llvm::Type*>(type);
-
// Round up for types that have a bit size not multiple of 8
// like "bool".
- unsigned ret = nonConstTy->getScalarSizeInBits() / 8;
- if (nonConstTy->getScalarSizeInBits() % 8)
- ret++;
-
- return ret;
+ return (type->getScalarSizeInBits() + 7) >> 3;
}
}
@@ -584,7 +652,7 @@ namespace oclgrind
numElements = 1;
}
- unsigned elemSize = bits >> 3;
+ unsigned elemSize = (bits+7) >> 3;
// Special case for pointer types
if (type->isPointerTy())
@@ -675,6 +743,20 @@ namespace oclgrind
case llvm::Type::PointerTyID:
cout << "0x" << hex << *(size_t*)data;
break;
+ case llvm::Type::ArrayTyID:
+ {
+ const llvm::Type *elemType = type->getArrayElementType();
+ unsigned elemSize = getTypeSize(elemType);
+ cout << "{";
+ for (unsigned i = 0; i < type->getArrayNumElements(); i++)
+ {
+ if (i > 0)
+ cout << ",";
+ printTypedData(elemType, data+i*elemSize);
+ }
+ cout << "}";
+ break;
+ }
default:
cout << "(raw) 0x" << hex << uppercase << setfill('0');
for (unsigned i = 0; i < size; i++)
@@ -709,4 +791,51 @@ namespace oclgrind
{
return runtime_error::what();
}
+
+ MemoryPool::MemoryPool(size_t blockSize) : m_blockSize(blockSize)
+ {
+ // Force first allocation to create new block
+ m_offset = m_blockSize;
+ }
+
+ MemoryPool::~MemoryPool()
+ {
+ for (auto itr = m_blocks.begin(); itr != m_blocks.end(); itr++)
+ {
+ delete[] *itr;
+ }
+ }
+
+ uint8_t* MemoryPool::alloc(size_t size)
+ {
+ // Check if requested size larger than block size
+ if (size > m_blockSize)
+ {
+ // Oversized buffers allocated separately from main pool
+ unsigned char *buffer = new unsigned char[size];
+ m_blocks.push_back(buffer);
+ return buffer;
+ }
+
+ // Check if enough space in current block
+ if (m_offset + size > m_blockSize)
+ {
+ // Allocate new block
+ m_blocks.push_front(new unsigned char[m_blockSize]);
+ m_offset = 0;
+ }
+ uint8_t *buffer = m_blocks.front() + m_offset;
+ m_offset += size;
+ return buffer;
+ }
+
+ TypedValue MemoryPool::clone(const TypedValue& source)
+ {
+ TypedValue dest;
+ dest.size = source.size;
+ dest.num = source.num;
+ dest.data = alloc(dest.size*dest.num);
+ memcpy(dest.data, source.data, dest.size*dest.num);
+ return dest;
+ }
}
diff --git a/src/core/common.h b/src/core/common.h
index d908ffa..b015cab 100644
--- a/src/core/common.h
+++ b/src/core/common.h
@@ -1,5 +1,5 @@
// common.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -9,7 +9,6 @@
#ifndef __common_h_
#define __common_h_
-#include "config.h"
#include "CL/cl.h"
#include <cassert>
#include <cstdio>
@@ -37,13 +36,23 @@
#undef ERROR
#endif
+#ifdef __APPLE__
+// TODO: Remove this when thread_local fixed on OS X
+#define THREAD_LOCAL __thread
+#elif defined(_WIN32) && !defined(__MINGW32__)
+// TODO: Remove this when thread_local fixed on Windows
+#define THREAD_LOCAL __declspec(thread)
+#else
+#define THREAD_LOCAL thread_local
+#endif
+
namespace llvm
{
class Constant;
class ConstantExpr;
class ConstantInt;
class Instruction;
- class MDOperand;
+ class Metadata;
class StructType;
class Type;
class Value;
@@ -87,26 +96,32 @@ namespace oclgrind
};
// 3-dimensional size
- typedef struct _Size3_
+ struct Size3
{
size_t x, y, z;
- _Size3_();
- _Size3_(size_t x, size_t y, size_t z);
- _Size3_(size_t linear, _Size3_ dimensions);
+ Size3();
+ Size3(size_t x, size_t y, size_t z);
+ Size3(size_t linear, Size3 dimensions);
size_t& operator[](unsigned i);
const size_t& operator[](unsigned i) const;
- bool operator==(const _Size3_& rhs) const;
- friend std::ostream& operator<<(std::ostream& stream, const _Size3_& sz);
- } Size3;
+ bool operator==(const Size3& rhs) const;
+ bool operator!=(const Size3& rhs) const;
+ friend std::ostream& operator<<(std::ostream& stream, const Size3& sz);
+ };
// Structure for a value with a size/type
- struct _TypedValue_
+ struct TypedValue
{
unsigned size;
unsigned num;
unsigned char *data;
- struct _TypedValue_ clone() const;
+ bool operator==(const TypedValue& rhs) const;
+ bool operator!=(const TypedValue& rhs) const;
+
+ friend std::ostream& operator<<(std::ostream& stream, const TypedValue& tv);
+
+ struct TypedValue clone() const;
double getFloat(unsigned index = 0) const;
size_t getPointer(unsigned index = 0) const;
@@ -118,7 +133,6 @@ namespace oclgrind
void setUInt(uint64_t value, unsigned index = 0);
};
- typedef _TypedValue_ TypedValue;
// Private memory map type
typedef std::map<const llvm::Value*,TypedValue> TypedValueMap;
@@ -147,8 +161,8 @@ namespace oclgrind
const llvm::Instruction* getConstExprAsInstruction(
const llvm::ConstantExpr *expr);
- // Get the ConstantInt object for an MDOperand
- const llvm::ConstantInt* getMDOpAsConstInt(const llvm::MDOperand& op);
+ // Get the ConstantInt object for a Metadata node
+ const llvm::ConstantInt* getMDAsConstInt(const llvm::Metadata *md);
// Get the byte offset of a struct member
unsigned getStructMemberOffset(const llvm::StructType *type, unsigned index);
@@ -198,6 +212,90 @@ namespace oclgrind
delete[] str; \
throw FatalError(msg, __FILE__, __LINE__); \
}
+
+ class MemoryPool
+ {
+ public:
+ MemoryPool(size_t blockSize = 1024);
+ ~MemoryPool();
+ uint8_t* alloc(size_t size);
+ TypedValue clone(const TypedValue& source);
+ private:
+ size_t m_blockSize;
+ size_t m_offset;
+ std::list<uint8_t*> m_blocks;
+ };
+
+ // Pool allocator class for STL containers
+ template <class T,size_t BLOCKSIZE>
+ class PoolAllocator
+ {
+ template <typename U,size_t BS> friend class PoolAllocator;
+
+ public:
+ typedef T value_type;
+ typedef T* pointer;
+ typedef T& reference;
+ typedef const T* const_pointer;
+ typedef const T& const_reference;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+
+ template<typename U>
+ struct rebind
+ {
+ typedef PoolAllocator<U,BLOCKSIZE> other;
+ };
+
+ PoolAllocator()
+ {
+ pool.reset(new MemoryPool(BLOCKSIZE));
+ }
+
+ PoolAllocator(const PoolAllocator& p)
+ {
+ this->pool = p.pool;
+ }
+
+ template<typename U>
+ PoolAllocator(const PoolAllocator<U,BLOCKSIZE>& p)
+ {
+ this->pool = p.pool;
+ }
+
+ pointer allocate(size_type n, const_pointer hint=0)
+ {
+ return (pointer)(pool->alloc(n*sizeof(value_type)));
+ }
+
+ void deallocate(pointer p, size_type n){}
+
+ template<class U, class... Args>
+ void construct(U *p, Args&&... args)
+ {
+ new ((void*)p) U(std::forward<Args>(args)...);
+ }
+
+ template<class U>
+ void destroy(U *p)
+ {
+ p->~U();
+ }
+
+ bool operator==(const PoolAllocator& p) const
+ {
+ return this->pool == p.pool;
+ }
+
+ bool operator!=(const PoolAllocator& p) const
+ {
+ return this->pool != p.pool;
+ }
+
+ private:
+ std::shared_ptr<MemoryPool> pool;
+ };
+
}
#endif // __common_h_
diff --git a/src/core/half.cpp b/src/core/half.cpp
new file mode 100644
index 0000000..c7cf59a
--- /dev/null
+++ b/src/core/half.cpp
@@ -0,0 +1,259 @@
+// half.cpp (Oclgrind)
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
+// University of Bristol. All rights reserved.
+//
+// This program is provided under a three-clause BSD license. For full
+// license terms please see the LICENSE file distributed with this
+// source code.
+
+#include "half.h"
+
+namespace oclgrind
+{
+ float halfToFloat(uint16_t half)
+ {
+ uint16_t h_sign, h_exponent, h_mantissa;
+ uint32_t f_sign, f_exponent, f_mantissa;
+
+ h_sign = half & 0x8000; // 1000 0000 0000 0000
+ h_exponent = half & 0x7C00; // 0111 1100 0000 0000
+ h_mantissa = half & 0x03FF; // 0000 0011 1111 1111
+
+ f_sign = ((uint32_t)h_sign) << 16;
+
+ if (h_exponent == 0)
+ {
+ if (h_mantissa == 0)
+ {
+ // Zero
+ f_exponent = 0;
+ f_mantissa = 0;
+ }
+ else
+ {
+ // Denorm - convert to normalized float
+ int e = -1;
+ do
+ {
+ e++;
+ h_mantissa <<= 1;
+ }
+ while((h_mantissa & 0x0400) == 0);
+
+ f_exponent = (-15 + 127 - e) << 23;
+ f_mantissa = ((uint32_t)(h_mantissa & 0x03FF)) << 13;
+ }
+ }
+ else if (h_exponent == 0x7C00)
+ {
+ // Inf or NaN
+ f_exponent = 0xFF << 23;
+ f_mantissa = h_mantissa;
+ }
+ else
+ {
+ // Normalized
+ f_exponent = (((int32_t)(h_exponent >> 10)) - 15 + 127) << 23;
+ f_mantissa = ((uint32_t)h_mantissa) << 13;
+ }
+
+ uint32_t result = f_sign | f_exponent | f_mantissa;
+ return *(float*)&result;
+ }
+
+ uint16_t floatToHalf(float sp, HalfRoundMode round)
+ {
+ uint16_t h_sign, h_exponent, h_mantissa;
+ uint32_t f_sign, f_exponent, f_mantissa;
+
+ union
+ {
+ float f;
+ uint32_t ui;
+ } FtoUI;
+ FtoUI.f = sp;
+ uint32_t f = FtoUI.ui;
+ f_sign = f & 0x80000000; // 1000 0000 0000 0000 0000 0000 0000 0000
+ f_exponent = f & 0x7F800000; // 0111 1111 1000 0000 0000 0000 0000 0000
+ f_mantissa = f & 0x007FFFFF; // 0000 0000 0111 1111 1111 1111 1111 1111
+
+ h_sign = f_sign >> 16;
+
+ if (f_exponent == 0)
+ {
+ // Zero
+ h_exponent = 0;
+ h_mantissa = 0;
+ }
+ else if (f_exponent == 0x7F800000)
+ {
+ // Inf or NaN
+ h_exponent = 0x7C00;
+ if (f_mantissa)
+ h_mantissa = 0x1FF;
+ else
+ h_mantissa = 0;
+ }
+ else
+ {
+ int e = (((int32_t)(f_exponent >> 23)) - 127 + 15);
+ if (e >= 0x1F)
+ {
+ // Value will overflow
+ h_exponent = 0x7C00;
+ h_mantissa = 0;
+
+ if (round == Half_RTZ)
+ h_mantissa = -1;
+ if (round == Half_RTP && h_sign)
+ h_mantissa = -1;
+ if (round == Half_RTN && !h_sign)
+ h_mantissa = -1;
+ }
+ else if (e <= 0)
+ {
+ // Value will underflow
+ h_exponent = 0;
+ if (14 - e > 24)
+ {
+ // Too small - flush to zero
+ h_mantissa = 0;
+ }
+ else
+ {
+ // Convert to denorm
+ f_mantissa |= 0x800000;
+ h_mantissa = (f_mantissa >> (14-e));
+ if ((f_mantissa >> (13 - e)) & 0x1)
+ {
+ h_mantissa += 0x1;
+ }
+ }
+ }
+ else
+ {
+ // Normalized
+ h_exponent = e << 10;
+ h_mantissa = f_mantissa >> 13;
+ // The current f_mantissa is done in RTZ
+ if (round == Half_RTE && (f & 0x00001000) != 0)
+ {
+ if ((f & 0x00002FFF) != 0)
+ h_mantissa += 1;
+ }
+ else if (round == Half_RTP)
+ {
+ FtoUI.ui &= 0xFFFFE000;
+ if (FtoUI.f < sp)
+ h_mantissa += 1;
+ }
+ else if (round == Half_RTN)
+ {
+ FtoUI.ui &= 0xFFFFE000;
+ if (sp < FtoUI.f)
+ h_mantissa += 1;
+ }
+ }
+ }
+
+ return h_sign + h_exponent + h_mantissa;
+ }
+
+ uint16_t doubleToHalf(double dp, HalfRoundMode round)
+ {
+ uint16_t h_sign, h_exponent, h_mantissa;
+ uint64_t d_sign, d_exponent, d_mantissa;
+
+ union
+ {
+ double d;
+ uint64_t ui;
+ } DtoUI;
+ DtoUI.d = dp;
+ uint64_t d = DtoUI.ui;
+ d_sign = d & 0x8000000000000000;
+ d_exponent = d & 0x7FF0000000000000;
+ d_mantissa = d & 0x000FFFFFFFFFFFFF;
+
+ h_sign = d_sign >> 48;
+
+ if (d_exponent == 0)
+ {
+ // Zero
+ h_exponent = 0;
+ h_mantissa = 0;
+ }
+ else if (d_exponent == 0x7FF0000000000000)
+ {
+ // Inf or NaN
+ h_exponent = 0x7C00;
+ if (d_mantissa)
+ h_mantissa = 0x1FF;
+ else
+ h_mantissa = 0;
+ }
+ else
+ {
+ int e = (((int64_t)(d_exponent >> 52)) - 1023 + 15);
+ if (e >= 0x1F)
+ {
+ // Value will overflow
+ h_exponent = 0x7C00;
+ h_mantissa = 0;
+
+ if (round == Half_RTZ)
+ h_mantissa = -1;
+ if (round == Half_RTP && h_sign)
+ h_mantissa = -1;
+ if (round == Half_RTN && !h_sign)
+ h_mantissa = -1;
+ }
+ else if (e <= 0)
+ {
+ // Value will underflow
+ h_exponent = 0;
+ if (14 - e > 24)
+ {
+ // Too small - flush to zero
+ h_mantissa = 0;
+ }
+ else
+ {
+ // Convert to denorm
+ d_mantissa |= 0x0010000000000000;
+ h_mantissa = (d_mantissa >> (14-e));
+ if ((d_mantissa >> (13 - e)) & 0x1)
+ {
+ h_mantissa += 0x1;
+ }
+ }
+ }
+ else
+ {
+ // Normalized
+ h_exponent = e << 10;
+ h_mantissa = d_mantissa >> 42;
+ // The current f_mantissa is done in RTZ
+ if (round == Half_RTE && (d & 0x20000000000) != 0)
+ {
+ if ((d & 0x5FFFFFFFFFF) != 0)
+ h_mantissa += 1;
+ }
+ else if (round == Half_RTP)
+ {
+ DtoUI.ui &= 0xFFFFFC0000000000;
+ if (DtoUI.d < dp)
+ h_mantissa += 1;
+ }
+ else if (round == Half_RTN)
+ {
+ DtoUI.ui &= 0xFFFFFC0000000000;
+ if (dp < DtoUI.d)
+ h_mantissa += 1;
+ }
+ }
+ }
+
+ return h_sign + h_exponent + h_mantissa;
+ }
+}
diff --git a/src/core/half.h b/src/core/half.h
index 58afcf1..120fa36 100644
--- a/src/core/half.h
+++ b/src/core/half.h
@@ -1,5 +1,5 @@
// half.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -8,153 +8,22 @@
#include "common.h"
-static float halfToFloat(uint16_t half)
+namespace oclgrind
{
- uint16_t h_sign, h_exponent, h_mantissa;
- uint32_t f_sign, f_exponent, f_mantissa;
-
- h_sign = half & 0x8000; // 1000 0000 0000 0000
- h_exponent = half & 0x7C00; // 0111 1100 0000 0000
- h_mantissa = half & 0x03FF; // 0000 0011 1111 1111
-
- f_sign = ((uint32_t)h_sign) << 16;
-
- if (h_exponent == 0)
- {
- if (h_mantissa == 0)
- {
- // Zero
- f_exponent = 0;
- f_mantissa = 0;
- }
- else
- {
- // Denorm - convert to normalized float
- int e = -1;
- do
- {
- e++;
- h_mantissa <<= 1;
- }
- while((h_mantissa & 0x0400) == 0);
-
- f_exponent = (-15 + 127 - e) << 23;
- f_mantissa = ((uint32_t)(h_mantissa & 0x03FF)) << 13;
- }
- }
- else if (h_exponent == 0x7C00)
- {
- // Inf or NaN
- f_exponent = 0xFF << 23;
- f_mantissa = h_mantissa;
- }
- else
- {
- // Normalized
- f_exponent = (((int32_t)(h_exponent >> 10)) - 15 + 127) << 23;
- f_mantissa = ((uint32_t)h_mantissa) << 13;
- }
-
- uint32_t result = f_sign | f_exponent | f_mantissa;
- return *(float*)&result;
-}
-
-enum HalfRoundMode
-{
- // Towards negative infinity
- Half_RTN,
- // Towards zero
- Half_RTZ,
- // Towards positive infinity
- Half_RTP,
- // Towards nearest even
- Half_RTE
-};
-
-static uint16_t floatToHalf(float sp, HalfRoundMode round = Half_RTZ)
-{
- uint16_t h_sign, h_exponent, h_mantissa;
- uint32_t f_sign, f_exponent, f_mantissa;
-
- union
- {
- float f;
- uint32_t ui;
- } FtoUI;
- FtoUI.f = sp;
- uint32_t f = FtoUI.ui;
- f_sign = f & 0x80000000; // 1000 0000 0000 0000 0000 0000 0000 0000
- f_exponent = f & 0x7F800000; // 0111 1111 1000 0000 0000 0000 0000 0000
- f_mantissa = f & 0x007FFFFF; // 0000 0000 0111 1111 1111 1111 1111 1111
-
- h_sign = f_sign >> 16;
-
- if (f_exponent == 0)
- {
- // Zero
- h_exponent = 0;
- h_mantissa = 0;
- }
- else if (f_exponent == 0x7F800000)
+ enum HalfRoundMode
{
- // Inf or NaN
- h_exponent = 0x7C00;
- h_mantissa = f_mantissa;
- }
- else
- {
- int e = (((int32_t)(f_exponent >> 23)) - 127 + 15);
- if (e >= 0x1F)
- {
- // Value will overflow
- h_exponent = 0x7C00;
- h_mantissa = 0;
- }
- else if (e <= 0)
- {
- // Value will underflow
- h_exponent = 0;
- if (14 - e > 24)
- {
- // Too small - flush to zero
- h_mantissa = 0;
- }
- else
- {
- // Convert to denorm
- f_mantissa |= 0x800000;
- h_mantissa = (f_mantissa >> (14-e));
- if ((f_mantissa >> (13 - e)) & 0x1)
- {
- h_mantissa += 0x1;
- }
- }
- }
- else
- {
- // Normalized
- h_exponent = e << 10;
- h_mantissa = f_mantissa >> 13;
- // The current f_mantissa is done in RTZ
- if (round == Half_RTE && (f & 0x00001000) != 0)
- {
- if ((f & 0x00002FFF) != 0)
- h_mantissa += 1;
- }
- else if (round == Half_RTP)
- {
- FtoUI.ui &= 0xFFFFE000;
- if (FtoUI.f < sp)
- h_mantissa += 1;
- }
- else if (round == Half_RTN)
- {
- FtoUI.ui &= 0xFFFFE000;
- if (sp < FtoUI.f)
- h_mantissa += 1;
- }
- }
- }
-
- return h_sign + h_exponent + h_mantissa;
+ // Towards negative infinity
+ Half_RTN,
+ // Towards zero
+ Half_RTZ,
+ // Towards positive infinity
+ Half_RTP,
+ // Towards nearest even
+ Half_RTE
+ };
+
+ float halfToFloat(uint16_t half);
+
+ uint16_t floatToHalf(float sp, HalfRoundMode round = Half_RTZ);
+ uint16_t doubleToHalf(double dp, HalfRoundMode round = Half_RTZ);
}
diff --git a/src/install/INSTALL.darwin b/src/install/INSTALL.darwin
index b3292d5..222d9e1 100644
--- a/src/install/INSTALL.darwin
+++ b/src/install/INSTALL.darwin
@@ -1,16 +1,12 @@
To install Oclgrind, simply copy the bin, lib and include directories
-to (for example) /usr/local/, ensuring that file modification times
-are preserved. The easiest way to do this is with the following
-command:
+to (for example) /usr/local/:
- sudo cp -rp {bin,lib,include} /usr/local
+ sudo cp -r {bin,lib,include} /usr/local
Alternatively, Oclgrind can be used from a non-system directory. To do
so, add $OCLGRIND_ROOT/bin to your PATH environment variable, and
$OCLGRIND_ROOT/lib to your DYLD_LIBRARY_PATH environment variable
-(where $OCLGRIND_ROOT is the directory containing this file). If
-copying Oclgrind to a new location, ensure that the -p flag is passed
-to cp, to ensure that file modification times are preserved.
+(where $OCLGRIND_ROOT is the directory containing this file).
Information about using Oclgrind can be found on the GitHub wiki page:
diff --git a/src/install/INSTALL.linux b/src/install/INSTALL.linux
index cf81cf9..30dbbe3 100644
--- a/src/install/INSTALL.linux
+++ b/src/install/INSTALL.linux
@@ -1,16 +1,12 @@
To install Oclgrind, simply copy the bin, lib and include directories
-to (for example) /usr/local/, ensuring that file modification times
-are preserved. The easiest way to do this is with the following
-command:
+to (for example) /usr/local/:
- sudo cp -rp {bin,lib,include} /usr/local
+ sudo cp -r {bin,lib,include} /usr/local
Alternatively, Oclgrind can be used from a non-system directory. To do
so, add $OCLGRIND_ROOT/bin to your PATH environment variable, and
$OCLGRIND_ROOT/lib to your LD_LIBRARY_PATH environment variable (where
-$OCLGRIND_ROOT is the directory containing this file). If copying
-Oclgrind to a new location, ensure that the -p flag is passed to cp,
-to ensure that file modification times are preserved.
+$OCLGRIND_ROOT is the directory containing this file).
To use Oclgrind with the OpenCL ICD loader (optional), copy
oclgrind.icd to /etc/OpenCL/vendors/.
diff --git a/src/install/INSTALL.windows b/src/install/INSTALL.windows
index 2b02636..716e8c2 100644
--- a/src/install/INSTALL.windows
+++ b/src/install/INSTALL.windows
@@ -5,4 +5,16 @@ running 'uninstall.bat' as an Administrator.
Alternatively, Oclgrind can be run from any other directory. You will
need to manually create OpenCL ICD loading points by editing the
-registry (see oclgrind-icd.reg).
+registry (see oclgrind-icd.reg), and/or add $OCLGRIND_ROOT/bin to your
+PATH environment variable to make use of the oclgrind.exe command.
+
+You may be warned about a missing MSVCP140.dll during the installation
+process, which can cause Oclgrind to fail to run properly. This can be
+fixed by installing the Microsoft Visual C++ Redistributable from
+here:
+
+ https://www.microsoft.com/en-us/download/details.aspx?id=48145
+
+Information about using Oclgrind can be found on the GitHub wiki page:
+
+ http://github.com/jrprice/Oclgrind/wiki
diff --git a/src/install/cpack-description b/src/install/cpack-description
new file mode 100644
index 0000000..3fe78cf
--- /dev/null
+++ b/src/install/cpack-description
@@ -0,0 +1,11 @@
+Oclgrind is an extensible OpenCL device simulator that provides a
+plugin interface to facilitate the creation of tools to aid analysis
+and development of OpenCL programs. Among the tools that Oclgrind
+provides are various debugging aids, such as out-of-bounds memory
+access checking, data-race detection, and an interactive debugger.
+
+Oclgrind implements the OpenCL 1.2 runtime API, which makes simulating
+an existing OpenCL program very straightforward - simply prefix your
+usual application command-line with 'oclgrind'. There is also a simple
+interface for simulating individual kernels in isolation via the
+'oclgrind-kernel' command.
diff --git a/src/install/install.bat b/src/install/install.bat
index cea2457..3fa3d21 100644
--- a/src/install/install.bat
+++ b/src/install/install.bat
@@ -13,6 +13,19 @@ xcopy uninstall.bat "%ROOT%\" /Y || goto :error
regedit /S oclgrind-icd.reg || goto :error
+echo.
+echo Installation completed.
+echo.
+
+if not exist C:\Windows\system32\msvcp140.dll (
+ echo WARNING: MSVCP140.dll not found - Oclgrind may fail to work correctly
+ echo Download the Microsoft Visual C++ Redistributable from here:
+ echo.
+ echo https://www.microsoft.com/en-us/download/details.aspx?id=48145
+ echo.
+ pause
+)
+
goto :EOF
diff --git a/src/kernel/Simulation.cpp b/src/kernel/Simulation.cpp
index 208ed77..efa20a2 100644
--- a/src/kernel/Simulation.cpp
+++ b/src/kernel/Simulation.cpp
@@ -1,12 +1,11 @@
// Simulation.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
-#include "config.h"
#include <cassert>
#include <cmath>
#include <iostream>
@@ -59,10 +58,13 @@ void Simulation::dumpArgument(DumpArg& arg)
for (size_t i = 0; i < num; i++)
{
cout << " " << arg.name << "[" << i << "] = ";
+ if (arg.hex)
+ cout << "0x" << setfill('0') << setw(sizeof(T)*2) << hex;
if (sizeof(T) == 1)
cout << (int)data[i];
else
cout << data[i];
+ cout << dec;
cout << endl;
}
cout << endl;
@@ -267,6 +269,8 @@ void Simulation::parseArgument(size_t index)
size_t typeSize = 0;
bool null = false;
bool dump = false;
+ bool hex = false;
+ bool noinit = false;
string fill = "";
string range = "";
string name = m_kernel->getArgumentName(index).str();
@@ -357,9 +361,19 @@ void Simulation::parseArgument(size_t index)
}
else if (token == "hex")
{
+ hex = true;
m_lineBuffer.setf(ios_base::hex);
m_lineBuffer.unsetf(ios_base::dec | ios_base::oct);
}
+ else if (token == "noinit")
+ {
+ if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL &&
+ addrSpace != CL_KERNEL_ARG_ADDRESS_CONSTANT)
+ {
+ throw "'noinit' only valid for buffer arguments";
+ }
+ noinit = true;
+ }
else if (token == "null")
{
if (addrSpace != CL_KERNEL_ARG_ADDRESS_GLOBAL &&
@@ -429,7 +443,7 @@ void Simulation::parseArgument(size_t index)
// Ensure size given
if (null)
{
- if (size != -1 || !fill.empty() || !range.empty())
+ if (size != -1 || !fill.empty() || !range.empty() || noinit || dump)
{
throw "'null' not valid with other argument descriptors";
}
@@ -482,10 +496,16 @@ void Simulation::parseArgument(size_t index)
{
throw "'dump' only valid for memory objects";
}
- if (null)
- {
- throw "'dump' not valid with 'null' specifier";
- }
+ }
+
+ // Ensure only one initializer given
+ unsigned numInitializers = 0;
+ if (noinit) numInitializers++;
+ if (!fill.empty()) numInitializers++;
+ if (!range.empty()) numInitializers++;
+ if (numInitializers > 1)
+ {
+ throw "Multiple initializers present";
}
// Generate argument data
@@ -506,7 +526,8 @@ void Simulation::parseArgument(size_t index)
{
// Parse argument data
unsigned char *data = new unsigned char[size];
- if (!fill.empty())
+ if (noinit){}
+ else if (!fill.empty())
{
istringstream fillStream(fill);
fillStream.copyfmt(m_lineBuffer);
@@ -591,7 +612,10 @@ void Simulation::parseArgument(size_t index)
// Allocate buffer and store content
Memory *globalMemory = m_context->getGlobalMemory();
size_t address = globalMemory->allocateBuffer(size, flags);
- globalMemory->store((unsigned char*)&data[0], address, size);
+ if (!address)
+ throw "Failed to allocate global memory";
+ if (!noinit)
+ globalMemory->store((unsigned char*)&data[0], address, size);
value.data = new unsigned char[value.size];
value.setPointer(address);
delete[] data;
@@ -604,6 +628,7 @@ void Simulation::parseArgument(size_t index)
size,
type,
name,
+ hex
};
m_dumpArguments.push_back(dump);
}
diff --git a/src/kernel/Simulation.h b/src/kernel/Simulation.h
index 19b6e9b..63edb7e 100644
--- a/src/kernel/Simulation.h
+++ b/src/kernel/Simulation.h
@@ -1,5 +1,5 @@
// Simulation.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -57,13 +57,14 @@ class Simulation
size_t m_lineNumber;
std::istringstream m_lineBuffer;
- typedef struct
+ struct DumpArg
{
size_t address;
size_t size;
ArgDataType type;
std::string name;
- } DumpArg;
+ bool hex;
+ };
std::list<DumpArg> m_dumpArguments;
template<typename T>
diff --git a/src/kernel/oclgrind-kernel.cpp b/src/kernel/oclgrind-kernel.cpp
index 4ac1d31..1ce599d 100644
--- a/src/kernel/oclgrind-kernel.cpp
+++ b/src/kernel/oclgrind-kernel.cpp
@@ -1,5 +1,5 @@
// main.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -7,6 +7,7 @@
// source code.
#include "config.h"
+
#include <cstdlib>
#include <cstring>
#include <iostream>
@@ -138,12 +139,16 @@ static bool parseArguments(int argc, char *argv[])
{
setEnvironment("OCLGRIND_UNIFORM_WRITES", "1");
}
+ else if (!strcmp(argv[i], "--uninitialized"))
+ {
+ setEnvironment("OCLGRIND_UNINITIALIZED", "1");
+ }
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version"))
{
cout << endl;
cout << "Oclgrind " PACKAGE_VERSION << endl;
cout << endl;
- cout << "Copyright (c) 2013-2015" << endl;
+ cout << "Copyright (c) 2013-2016" << endl;
cout << "James Price and Simon McIntosh-Smith, University of Bristol"
<< endl;
cout << "https://github.com/jrprice/Oclgrind" << endl;
@@ -210,11 +215,13 @@ static void printUsage()
<< " --pch-dir DIR "
"Override directory containing precompiled headers" << endl
<< " --plugins PLUGINS "
- "Load colon seperated list of plugin libraries" << endl
+ "Load colon separated list of plugin libraries" << endl
<< " -q --quick "
"Only run first and last work-group" << endl
<< " --uniform-writes "
"Don't suppress uniform write-write data-races" << endl
+ << " --uninitialized "
+ "Report usage of uninitialized values" << endl
<< " -v --version "
"Display version information" << endl
<< endl
diff --git a/src/plugins/InstructionCounter.cpp b/src/plugins/InstructionCounter.cpp
index ce680f4..ed977ec 100644
--- a/src/plugins/InstructionCounter.cpp
+++ b/src/plugins/InstructionCounter.cpp
@@ -1,5 +1,5 @@
// InstructionCounter.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/plugins/InstructionCounter.h b/src/plugins/InstructionCounter.h
index f12c33a..e6f3646 100644
--- a/src/plugins/InstructionCounter.h
+++ b/src/plugins/InstructionCounter.h
@@ -1,5 +1,5 @@
// InstructionCounter.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/plugins/InteractiveDebugger.cpp b/src/plugins/InteractiveDebugger.cpp
index a088338..012e009 100644
--- a/src/plugins/InteractiveDebugger.cpp
+++ b/src/plugins/InteractiveDebugger.cpp
@@ -1,11 +1,12 @@
// InteractiveDebugger.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
+#include "config.h"
#include "core/common.h"
#include <iterator>
@@ -308,7 +309,7 @@ void InteractiveDebugger::printFunction(
cout << ", ";
}
cout << argItr->getName().str() << "=";
- m_kernelInvocation->getCurrentWorkItem()->printValue(argItr);
+ m_kernelInvocation->getCurrentWorkItem()->printValue(&*argItr);
}
cout << ") at line " << dec << getLineNumber(instruction) << endl;
@@ -867,19 +868,27 @@ bool InteractiveDebugger::print(vector<string> args)
cout << "not found" << endl;
return false;
}
+
const llvm::Type *ptrType = ptr->getType();
+ unsigned addrSpace = ptrType->getPointerAddressSpace();
// Check for alloca instruction, in which case look at allocated type
bool alloca = false;
+ if (ptr->getValueID() == llvm::Value::GlobalVariableVal)
+ {
+ ptrType = ptrType->getPointerElementType();
+ }
if (ptr->getValueID() >= llvm::Value::InstructionVal &&
((llvm::Instruction*)ptr)->getOpcode() == llvm::Instruction::Alloca)
{
ptrType = ((const llvm::AllocaInst*)ptr)->getAllocatedType();
+ if (ptrType->isPointerTy())
+ addrSpace = ptrType->getPointerAddressSpace();
alloca = true;
}
// Ensure type is a pointer
- if (!ptrType->isPointerTy())
+ if (!ptrType->isPointerTy() && !ptrType->isArrayTy())
{
cout << "not a pointer" << endl;
return false;
@@ -891,12 +900,12 @@ bool InteractiveDebugger::print(vector<string> args)
{
// Load base address from private memory
workItem->getPrivateMemory()->load((unsigned char*)&base,
- base, sizeof(size_t));
+ base, sizeof(size_t));
}
// Get target memory object
Memory *memory = NULL;
- switch (ptrType->getPointerAddressSpace())
+ switch (addrSpace)
{
case AddrSpacePrivate:
memory = workItem->getPrivateMemory();
@@ -932,7 +941,12 @@ bool InteractiveDebugger::print(vector<string> args)
}
else
{
- if (!workItem->printVariable(args[i]))
+ try
+ {
+ if (!workItem->printVariable(args[i]))
+ cout << "not found";
+ }
+ catch (FatalError err)
{
cout << "not found";
}
diff --git a/src/plugins/InteractiveDebugger.h b/src/plugins/InteractiveDebugger.h
index 2b5db65..d6c7775 100644
--- a/src/plugins/InteractiveDebugger.h
+++ b/src/plugins/InteractiveDebugger.h
@@ -1,5 +1,5 @@
// InteractiveDebugger.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/plugins/Logger.cpp b/src/plugins/Logger.cpp
index 7b73296..acc8896 100644
--- a/src/plugins/Logger.cpp
+++ b/src/plugins/Logger.cpp
@@ -1,5 +1,5 @@
// Logger.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/plugins/Logger.h b/src/plugins/Logger.h
index 294bc67..62bbcc8 100644
--- a/src/plugins/Logger.h
+++ b/src/plugins/Logger.h
@@ -1,5 +1,5 @@
// Logger.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/plugins/MemCheck.cpp b/src/plugins/MemCheck.cpp
index fb04e57..c4d7474 100644
--- a/src/plugins/MemCheck.cpp
+++ b/src/plugins/MemCheck.cpp
@@ -1,5 +1,5 @@
// MemCheck.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -10,6 +10,10 @@
#include "core/Context.h"
#include "core/Memory.h"
+#include "core/WorkItem.h"
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
#include "MemCheck.h"
@@ -21,6 +25,36 @@ MemCheck::MemCheck(const Context *context)
{
}
+void MemCheck::instructionExecuted(const WorkItem *workItem,
+ const llvm::Instruction *instruction,
+ const TypedValue& result)
+{
+ // Check static array bounds if load or store is executed
+ const llvm::Value *PtrOp = nullptr;
+
+ if (auto LI = llvm::dyn_cast<llvm::LoadInst>(instruction))
+ {
+ PtrOp = LI->getPointerOperand();
+ }
+ else if (auto SI = llvm::dyn_cast<llvm::StoreInst>(instruction))
+ {
+ PtrOp = SI->getPointerOperand();
+ }
+ else
+ {
+ return;
+ }
+
+ // Walk up chain of GEP instructions leading to this access
+ while (auto GEPI =
+ llvm::dyn_cast<llvm::GetElementPtrInst>(PtrOp->stripPointerCasts()))
+ {
+ checkArrayAccess(workItem, GEPI);
+
+ PtrOp = GEPI->getPointerOperand();
+ }
+}
+
void MemCheck::memoryAtomicLoad(const Memory *memory,
const WorkItem *workItem,
AtomicOp op, size_t address, size_t size)
@@ -47,6 +81,17 @@ void MemCheck::memoryLoad(const Memory *memory, const WorkGroup *workGroup,
checkLoad(memory, address, size);
}
+void MemCheck::memoryMap(const Memory *memory, size_t address,
+ size_t offset, size_t size, cl_map_flags flags)
+{
+ MapRegion map =
+ {
+ address, offset, size, memory->getPointer(address + offset),
+ (flags == CL_MAP_READ ? MapRegion::READ : MapRegion::WRITE)
+ };
+ m_mapRegions.push_back(map);
+}
+
void MemCheck::memoryStore(const Memory *memory, const WorkItem *workItem,
size_t address, size_t size,
const uint8_t *storeData)
@@ -61,6 +106,62 @@ void MemCheck::memoryStore(const Memory *memory, const WorkGroup *workGroup,
checkStore(memory, address, size);
}
+void MemCheck::memoryUnmap(const Memory *memory, size_t address,
+ const void *ptr)
+{
+ for (auto region = m_mapRegions.begin();
+ region != m_mapRegions.end();
+ region++)
+ {
+ if (region->ptr == ptr)
+ {
+ m_mapRegions.erase(region);
+ return;
+ }
+ }
+}
+
+void MemCheck::checkArrayAccess(const WorkItem *workItem,
+ const llvm::GetElementPtrInst *GEPI) const
+{
+ // Iterate through GEPI indices
+ const llvm::Type *ptrType = GEPI->getPointerOperandType();
+
+ for (auto opIndex = GEPI->idx_begin(); opIndex != GEPI->idx_end(); opIndex++)
+ {
+ int64_t index = workItem->getOperand(opIndex->get()).getSInt();
+
+ if (ptrType->isArrayTy())
+ {
+ // Check index doesn't exceed size of array
+ uint64_t size = ptrType->getArrayNumElements();
+
+ if ((uint64_t)index >= size)
+ {
+ ostringstream info;
+ info << "Index ("
+ << index << ") exceeds static array size ("
+ << size << ")";
+ m_context->logError(info.str().c_str());
+ }
+
+ ptrType = ptrType->getArrayElementType();
+ }
+ else if (ptrType->isPointerTy())
+ {
+ ptrType = ptrType->getPointerElementType();
+ }
+ else if (ptrType->isVectorTy())
+ {
+ ptrType = ptrType->getVectorElementType();
+ }
+ else if (ptrType->isStructTy())
+ {
+ ptrType = ptrType->getStructElementType(index);
+ }
+ }
+}
+
void MemCheck::checkLoad(const Memory *memory,
size_t address, size_t size) const
{
@@ -74,6 +175,21 @@ void MemCheck::checkLoad(const Memory *memory,
{
m_context->logError("Invalid read from write-only buffer");
}
+
+ if (memory->getAddressSpace() == AddrSpaceLocal || memory->getAddressSpace() == AddrSpacePrivate) return;
+
+ // Check if memory location is currently mapped for writing
+ for (auto region = m_mapRegions.begin();
+ region != m_mapRegions.end();
+ region++)
+ {
+ if (region->type == MapRegion::WRITE &&
+ address < region->address + region->size &&
+ address + size >= region->address)
+ {
+ m_context->logError("Invalid read from buffer mapped for writing");
+ }
+ }
}
void MemCheck::checkStore(const Memory *memory,
@@ -89,6 +205,20 @@ void MemCheck::checkStore(const Memory *memory,
{
m_context->logError("Invalid write to read-only buffer");
}
+
+ if (memory->getAddressSpace() == AddrSpaceLocal || memory->getAddressSpace() == AddrSpacePrivate) return;
+
+ // Check if memory location is currently mapped
+ for (auto region = m_mapRegions.begin();
+ region != m_mapRegions.end();
+ region++)
+ {
+ if (address < region->address + region->size &&
+ address + size >= region->address)
+ {
+ m_context->logError("Invalid write to mapped buffer");
+ }
+ }
}
void MemCheck::logInvalidAccess(bool read, unsigned addrSpace,
@@ -104,4 +234,4 @@ void MemCheck::logInvalidAccess(bool read, unsigned addrSpace,
<< "Entity: " << msg.CURRENT_ENTITY << endl
<< msg.CURRENT_LOCATION << endl;
msg.send();
-}
\ No newline at end of file
+}
diff --git a/src/plugins/MemCheck.h b/src/plugins/MemCheck.h
index 9e685bf..7e72d8a 100644
--- a/src/plugins/MemCheck.h
+++ b/src/plugins/MemCheck.h
@@ -1,5 +1,5 @@
// MemCheck.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -8,6 +8,11 @@
#include "core/Plugin.h"
+namespace llvm
+{
+ class GetElementPtrInst;
+}
+
namespace oclgrind
{
class MemCheck : public Plugin
@@ -15,6 +20,9 @@ namespace oclgrind
public:
MemCheck(const Context *context);
+ virtual void instructionExecuted(const WorkItem *workItem,
+ const llvm::Instruction *instruction,
+ const TypedValue& result) override;
virtual void memoryAtomicLoad(const Memory *memory,
const WorkItem *workItem,
AtomicOp op,
@@ -27,17 +35,34 @@ namespace oclgrind
size_t address, size_t size) override;
virtual void memoryLoad(const Memory *memory, const WorkGroup *workGroup,
size_t address, size_t size) override;
+ virtual void memoryMap(const Memory *memory, size_t address,
+ size_t offset, size_t size,
+ cl_map_flags flags) override;
virtual void memoryStore(const Memory *memory, const WorkItem *workItem,
size_t address, size_t size,
const uint8_t *storeData) override;
virtual void memoryStore(const Memory *memory, const WorkGroup *workGroup,
size_t address, size_t size,
const uint8_t *storeData) override;
+ virtual void memoryUnmap(const Memory *memory, size_t address,
+ const void *ptr) override;
private:
+ void checkArrayAccess(const WorkItem *workItem,
+ const llvm::GetElementPtrInst *GEPI) const;
void checkLoad(const Memory *memory, size_t address, size_t size) const;
void checkStore(const Memory *memory, size_t address, size_t size) const;
void logInvalidAccess(bool read, unsigned addrSpace,
size_t address, size_t size) const;
+
+ struct MapRegion
+ {
+ size_t address;
+ size_t offset;
+ size_t size;
+ const void *ptr;
+ enum {READ, WRITE} type;
+ };
+ std::list<MapRegion> m_mapRegions;
};
}
diff --git a/src/plugins/RaceDetector.cpp b/src/plugins/RaceDetector.cpp
index 10f417e..8c38907 100644
--- a/src/plugins/RaceDetector.cpp
+++ b/src/plugins/RaceDetector.cpp
@@ -1,5 +1,5 @@
// RaceDetector.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -19,7 +19,14 @@
using namespace oclgrind;
using namespace std;
-#define KEY(memory,address) make_pair(memory, EXTRACT_BUFFER(address))
+THREAD_LOCAL RaceDetector::WorkerState RaceDetector::m_state = {NULL};
+
+#define STATE(workgroup) (m_state.groups->at(workgroup))
+
+// Use a bank of mutexes to reduce unnecessary synchronisation
+#define NUM_GLOBAL_MUTEXES 4096 // Must be power of two
+#define GLOBAL_MUTEX(buffer,offset) \
+ m_globalMutexes[buffer][offset & (NUM_GLOBAL_MUTEXES-1)]
RaceDetector::RaceDetector(const Context *context)
: Plugin(context)
@@ -29,12 +36,6 @@ RaceDetector::RaceDetector(const Context *context)
m_allowUniformWrites = !checkEnv("OCLGRIND_UNIFORM_WRITES");
}
-bool RaceDetector::isThreadSafe() const
-{
- // TODO: Improve DRD efficiency for multi-threaded case instead.
- return false;
-}
-
void RaceDetector::kernelBegin(const KernelInvocation *kernelInvocation)
{
m_kernelInvocation = kernelInvocation;
@@ -42,295 +43,486 @@ void RaceDetector::kernelBegin(const KernelInvocation *kernelInvocation)
void RaceDetector::kernelEnd(const KernelInvocation *kernelInvocation)
{
- synchronize(m_context->getGlobalMemory(), false);
+ // Log races
+ for (auto race : kernelRaces)
+ logRace(race);
+ kernelRaces.clear();
+
+ // Clear all global memory accesses
+ for (auto &buffer : m_globalAccesses)
+ {
+ size_t sz = buffer.second.size();
+ buffer.second.clear();
+ buffer.second.resize(sz);
+ }
m_kernelInvocation = NULL;
}
void RaceDetector::memoryAllocated(const Memory *memory, size_t address,
- size_t size, cl_mem_flags flags)
+ size_t size, cl_mem_flags flags,
+ const uint8_t *initData)
{
- if (memory->getAddressSpace() == AddrSpacePrivate ||
- memory->getAddressSpace() == AddrSpaceConstant)
- return;
-
- m_state[KEY(memory,address)] = make_pair(new State[size], size);
+ size_t buffer = memory->extractBuffer(address);
+ if (memory->getAddressSpace() == AddrSpaceGlobal)
+ {
+ m_globalAccesses[buffer].resize(size);
+ m_globalMutexes[buffer] = new mutex[NUM_GLOBAL_MUTEXES];
+ }
}
void RaceDetector::memoryAtomicLoad(const Memory *memory,
const WorkItem *workItem,
AtomicOp op, size_t address, size_t size)
{
- registerAtomic(memory, workItem, address, size, false);
+ registerAccess(memory, workItem->getWorkGroup(), workItem,
+ address, size, true);
}
void RaceDetector::memoryAtomicStore(const Memory *memory,
const WorkItem *workItem,
AtomicOp op, size_t address, size_t size)
{
- registerAtomic(memory, workItem, address, size, true);
+ registerAccess(memory, workItem->getWorkGroup(), workItem,
+ address, size, true,
+ (const uint8_t*)memory->getPointer(address));
}
void RaceDetector::memoryDeallocated(const Memory *memory, size_t address)
{
- if (memory->getAddressSpace() == AddrSpacePrivate ||
- memory->getAddressSpace() == AddrSpaceConstant)
- return;
+ size_t buffer = memory->extractBuffer(address);
+ if (memory->getAddressSpace() == AddrSpaceGlobal)
+ {
+ m_globalAccesses.erase(buffer);
- delete[] m_state[KEY(memory,address)].first;
- m_state.erase(KEY(memory,address));
+ delete[] m_globalMutexes.at(buffer);
+ m_globalMutexes.erase(buffer);
+ }
}
void RaceDetector::memoryLoad(const Memory *memory, const WorkItem *workItem,
size_t address, size_t size)
{
- registerLoadStore(memory, workItem, workItem->getWorkGroup(),
- address, size, NULL);
+ registerAccess(memory, workItem->getWorkGroup(), workItem,
+ address, size, false, NULL);
}
void RaceDetector::memoryLoad(const Memory *memory, const WorkGroup *workGroup,
size_t address, size_t size)
{
- registerLoadStore(memory, NULL, workGroup, address, size, NULL);
+ registerAccess(memory, workGroup, NULL, address, size, false);
}
void RaceDetector::memoryStore(const Memory *memory, const WorkItem *workItem,
size_t address, size_t size,
const uint8_t *storeData)
{
- registerLoadStore(memory, workItem, workItem->getWorkGroup(),
- address, size, storeData);
+ registerAccess(memory, workItem->getWorkGroup(), workItem,
+ address, size, false, storeData);
}
void RaceDetector::memoryStore(const Memory *memory, const WorkGroup *workGroup,
size_t address, size_t size,
const uint8_t *storeData)
{
- registerLoadStore(memory, NULL, workGroup, address, size, storeData);
+ registerAccess(memory, workGroup, NULL,
+ address, size, false, storeData);
+}
+
+void RaceDetector::workGroupBarrier(const WorkGroup *workGroup, uint32_t flags)
+{
+ if (flags & CLK_LOCAL_MEM_FENCE)
+ {
+ syncWorkItems(workGroup->getLocalMemory(),
+ STATE(workGroup), STATE(workGroup).wiLocal);
+ }
+ if (flags & CLK_GLOBAL_MEM_FENCE)
+ {
+ syncWorkItems(m_context->getGlobalMemory(),
+ STATE(workGroup), STATE(workGroup).wiGlobal);
+ }
+}
+
+void RaceDetector::workGroupBegin(const WorkGroup *workGroup)
+{
+ // Create worker state if haven't already
+ if (!m_state.groups)
+ {
+ m_state.groups = new unordered_map<const WorkGroup*,WorkGroupState>;
+ }
+
+ // Initialize work-group state
+ WorkGroupState& state = (*m_state.groups)[workGroup];
+ Size3 wgsize = workGroup->getGroupSize();
+ state.numWorkItems = wgsize.x*wgsize.y*wgsize.z;
+
+ // Re-use pool allocator for all access maps
+ AccessMap tmp(0, AccessMap::hasher(), AccessMap::key_equal(),
+ state.wgGlobal.get_allocator());
+ state.wiGlobal.resize(state.numWorkItems+1, tmp);
+ state.wiLocal.resize(state.numWorkItems+1, tmp);
+}
+
+void RaceDetector::workGroupComplete(const WorkGroup *workGroup)
+{
+ WorkGroupState& state = STATE(workGroup);
+
+ syncWorkItems(workGroup->getLocalMemory(), state, state.wiLocal);
+ syncWorkItems(m_context->getGlobalMemory(), state, state.wiGlobal);
+
+ // Merge global accesses across kernel invocation
+ size_t group = workGroup->getGroupIndex();
+ for (auto &record : state.wgGlobal)
+ {
+ size_t address = record.first;
+ size_t buffer = m_context->getGlobalMemory()->extractBuffer(address);
+ size_t offset = m_context->getGlobalMemory()->extractOffset(address);
+
+ lock_guard<mutex> lock(GLOBAL_MUTEX(buffer, offset));
+
+ AccessRecord& a = record.second;
+ AccessRecord& b = m_globalAccesses.at(buffer)[offset];
+
+ // Check for races with previous accesses
+ if (check(a.load, b.store) && getAccessWorkGroup(b.store) != group)
+ insertKernelRace({AddrSpaceGlobal, address, a.load, b.store});
+ if (check(a.store, b.load) && getAccessWorkGroup(b.load) != group)
+ insertKernelRace({AddrSpaceGlobal, address, a.store, b.load});
+ if (check(a.store, b.store) && getAccessWorkGroup(b.store) != group)
+ insertKernelRace({AddrSpaceGlobal, address, a.store, b.store});
+
+ // Insert accesses
+ if (a.load.isSet())
+ insert(b, a.load);
+ if (a.store.isSet())
+ insert(b, a.store);
+ }
+ state.wgGlobal.clear();
+
+ // Clean-up work-group state
+ m_state.groups->erase(workGroup);
+ if (m_state.groups->empty())
+ {
+ delete m_state.groups;
+ m_state.groups = NULL;
+ }
+}
+
+bool RaceDetector::check(const MemoryAccess& a,
+ const MemoryAccess& b) const
+{
+ // Ensure both accesses are valid
+ if (!a.isSet() || !b.isSet())
+ return false;
+
+ // No race if same work-item
+ if (a.isWorkItem() && b.isWorkItem() && (a.getEntity() == b.getEntity()))
+ return false;
+
+ // No race if both operations are atomics
+ if (a.isAtomic() && b.isAtomic())
+ return false;
+
+ // Potential race if at least one store
+ if (a.isStore() || b.isStore())
+ {
+ // Read-write race if one is a load
+ if (a.isLoad() || b.isLoad())
+ return true;
+
+ // Write-write race if not uniform
+ if (!m_allowUniformWrites || (a.getStoreData() != b.getStoreData()))
+ return true;
+ }
+
+ return false;
+}
+
+size_t RaceDetector::getAccessWorkGroup(const MemoryAccess& access) const
+{
+ if (access.isWorkItem())
+ {
+ const Size3& wgsize = m_kernelInvocation->getLocalSize();
+ return access.getEntity() / (wgsize.x*wgsize.y*wgsize.z);
+ }
+ else
+ return access.getEntity();
+}
+
+void RaceDetector::insert(AccessRecord& record,
+ const MemoryAccess& access) const
+{
+ if (access.isLoad())
+ {
+ if (!record.load.isSet() || record.load.isAtomic())
+ record.load = access;
+ }
+ else if (access.isStore())
+ {
+ if (!record.store.isSet() || record.store.isAtomic())
+ record.store = access;
+ }
}
-void RaceDetector::logRace(DataRaceType type,
- unsigned int addrSpace,
- size_t address,
- size_t lastWorkGroup,
- size_t lastWorkItem,
- const llvm::Instruction *lastInstruction) const
+void RaceDetector::insertKernelRace(const Race& race)
{
- const char *raceType = NULL;
- switch (type)
+ lock_guard<mutex> lock(kernelRacesMutex);
+ insertRace(kernelRaces, race);
+}
+
+void RaceDetector::insertRace(RaceList& races, const Race& race) const
+{
+ // Check list for duplicates
+ for (auto x = races.begin(); x != races.end(); x++)
{
- case ReadWriteRace:
- raceType = "Read-write";
- break;
- case WriteWriteRace:
- raceType = "Write-write";
- break;
+ // Check if races are equal modulo address
+ if ((race.a == x->a && race.b == x->b) ||
+ (race.a == x->b && race.b == x->a))
+ {
+ // If they match, keep the one with the lowest address
+ if (race.address < x->address)
+ {
+ races.erase(x);
+ races.push_back(race);
+ return;
+ }
+ else
+ return;
+ }
}
+ races.push_back(race);
+}
+
+void RaceDetector::logRace(const Race& race) const
+{
+ const char *raceType;
+ if (race.a.isLoad() || race.b.isLoad())
+ raceType = "Read-write";
+ else
+ raceType = "Write-write";
+
Context::Message msg(ERROR, m_context);
msg << raceType << " data race at "
- << getAddressSpaceName(addrSpace)
- << " memory address 0x" << hex << address << endl
+ << getAddressSpaceName(race.addrspace)
+ << " memory address 0x" << hex << race.address << endl
<< msg.INDENT
<< "Kernel: " << msg.CURRENT_KERNEL << endl
<< endl
- << "First entity: " << msg.CURRENT_ENTITY << endl
- << msg.CURRENT_LOCATION << endl
- << endl
- << "Second entity: ";
+ << "First entity: ";
- // Show details of other entity involved in race
- if (lastWorkItem != -1)
+ if (race.a.isWorkItem())
{
- Size3 global(lastWorkItem, m_kernelInvocation->getGlobalSize());
- Size3 local, group;
- local.x = global.x % m_kernelInvocation->getLocalSize().x;
- local.y = global.y % m_kernelInvocation->getLocalSize().y;
- local.z = global.z % m_kernelInvocation->getLocalSize().z;
- group.x = global.x / m_kernelInvocation->getLocalSize().x;
- group.y = global.y / m_kernelInvocation->getLocalSize().y;
- group.z = global.z / m_kernelInvocation->getLocalSize().z;
+ Size3 wgsize = m_kernelInvocation->getLocalSize();
+ Size3 global(race.a.getEntity(), m_kernelInvocation->getGlobalSize());
+ Size3 local(global.x%wgsize.x, global.y%wgsize.y, global.z%wgsize.z);
+ Size3 group(global.x/wgsize.x, global.y/wgsize.y, global.z/wgsize.z);
msg << "Global" << global << " Local" << local << " Group" << group;
}
- else if (lastWorkGroup != -1)
+ else
{
msg << "Group"
- << Size3(lastWorkGroup, m_kernelInvocation->getNumGroups());
+ << Size3(race.a.getEntity(), m_kernelInvocation->getLocalSize());
+ }
+
+ msg << endl << race.a.getInstruction() << endl
+ << endl
+ << "Second entity: ";
+
+ // Show details of other entity involved in race
+ if (race.b.isWorkItem())
+ {
+ Size3 wgsize = m_kernelInvocation->getLocalSize();
+ Size3 global(race.b.getEntity(), m_kernelInvocation->getGlobalSize());
+ Size3 local(global.x%wgsize.x, global.y%wgsize.y, global.z%wgsize.z);
+ Size3 group(global.x/wgsize.x, global.y/wgsize.y, global.z/wgsize.z);
+ msg << "Global" << global << " Local" << local << " Group" << group;
}
else
{
- msg << "(unknown)";
+ msg << "Group"
+ << Size3(race.b.getEntity(), m_kernelInvocation->getLocalSize());
}
- msg << endl
- << lastInstruction << endl;
+ msg << endl << race.b.getInstruction() << endl;
msg.send();
}
-void RaceDetector::registerAtomic(const Memory *memory,
+void RaceDetector::registerAccess(const Memory *memory,
+ const WorkGroup *workGroup,
const WorkItem *workItem,
- size_t address, size_t size,
- bool store)
+ size_t address, size_t size, bool atomic,
+ const uint8_t *storeData)
{
+ unsigned addrSpace = memory->getAddressSpace();
+ if (addrSpace == AddrSpacePrivate ||
+ addrSpace == AddrSpaceConstant)
+ return;
if (!memory->isAddressValid(address, size))
return;
- State *state = m_state[KEY(memory,address)].first + EXTRACT_OFFSET(address);
+ // Construct access
+ MemoryAccess access(workGroup, workItem, storeData != NULL, atomic);
- // Get work-item index
- size_t workItemIndex = workItem->getGlobalIndex();
+ size_t index;
+ if (workItem)
+ {
+ Size3 wgsize = workGroup->getGroupSize();
+ Size3 lid = workItem->getLocalID();
+ index = lid.x + (lid.y + lid.z*wgsize.y)*wgsize.x;
+ }
+ else
+ {
+ index = STATE(workGroup).wiLocal.size() - 1;
+ }
+
+ AccessMap& accesess = (addrSpace == AddrSpaceGlobal) ?
+ STATE(workGroup).wiGlobal[index] :
+ STATE(workGroup).wiLocal[index];
- bool race = false;
- for (size_t offset = 0; offset < size; offset++, state++)
+ for (size_t i = 0; i < size; i++)
{
- // Check for races with non-atomic operations
- bool conflict = store ? !state->canAtomicStore : !state->canAtomicLoad;
- if (!race && conflict && workItemIndex != state->workItem)
- {
- logRace(ReadWriteRace,
- memory->getAddressSpace(),
- address,
- state->workItem,
- state->workGroup,
- state->instruction);
- race = true;
- }
+ if (storeData)
+ access.setStoreData(storeData[i]);
+
+ insert(accesess[address+i], access);
+ }
+}
- // Update state
- if (store)
- state->canLoad = false;
- state->canStore = false;
- if (!state->wasWorkItem)
+void RaceDetector::syncWorkItems(const Memory *memory,
+ WorkGroupState& state,
+ vector<AccessMap>& accesses)
+{
+ AccessMap wgAccesses(0, AccessMap::hasher(), AccessMap::key_equal(),
+ state.wgGlobal.get_allocator());
+
+ for (size_t i = 0; i < state.numWorkItems + 1; i++)
+ {
+ RaceList races;
+ for (auto &record : accesses[i])
{
- state->instruction = workItem->getCurrentInstruction();
- state->workItem = workItemIndex;
- state->wasWorkItem = true;
+ size_t address = record.first;
+
+ AccessRecord& a = record.second;
+ AccessRecord& b = wgAccesses[address];
+
+ if (check(a.load, b.store))
+ insertRace(races, {memory->getAddressSpace(),address,a.load,b.store});
+ if (check(a.store, b.load))
+ insertRace(races, {memory->getAddressSpace(),address,a.store,b.load});
+ if (check(a.store, b.store))
+ insertRace(races, {memory->getAddressSpace(),address,a.store,b.store});
+
+ if (a.load.isSet())
+ {
+ insert(b, a.load);
+ if (memory->getAddressSpace() == AddrSpaceGlobal)
+ insert(state.wgGlobal[address], a.load);
+ }
+ if (a.store.isSet())
+ {
+ insert(b, a.store);
+ if (memory->getAddressSpace() == AddrSpaceGlobal)
+ insert(state.wgGlobal[address], a.store);
+ }
}
+
+ accesses[i].clear();
+
+ // Log races
+ for (auto race : races)
+ logRace(race);
}
}
-void RaceDetector::registerLoadStore(const Memory *memory,
- const WorkItem *workItem,
- const WorkGroup *workGroup,
- size_t address, size_t size,
- const uint8_t *storeData)
+RaceDetector::MemoryAccess::MemoryAccess()
{
- if (!m_kernelInvocation)
- return;
- if (memory->getAddressSpace() == AddrSpacePrivate ||
- memory->getAddressSpace() == AddrSpaceConstant)
- return;
- if (!memory->isAddressValid(address, size))
- return;
+ this->info = 0;
+ this->instruction = NULL;
+}
- bool load = !storeData;
- bool store = storeData;
+RaceDetector::MemoryAccess::MemoryAccess(const WorkGroup *workGroup,
+ const WorkItem *workItem,
+ bool store, bool atomic)
+{
+ this->info = 0;
+
+ this->info |= 1 << SET_BIT;
+ this->info |= store << STORE_BIT;
+ this->info |= atomic << ATOMIC_BIT;
- // Get index of work-item and work-group performing access
- size_t workItemIndex = -1, workGroupIndex = -1;
if (workItem)
{
- workItemIndex = workItem->getGlobalIndex();
+ this->entity = workItem->getGlobalIndex();
+ this->instruction = workItem->getCurrentInstruction();
}
- if (workGroup)
+ else
{
- workGroupIndex = workGroup->getGroupIndex();
+ this->info |= (1<<WG_BIT);
+ this->entity = workGroup->getGroupIndex();
+ this->instruction = NULL; // TODO?
}
+}
- bool race = false;
- size_t base = EXTRACT_OFFSET(address);
- State *state = m_state[KEY(memory, address)].first + base;
+void RaceDetector::MemoryAccess::clear()
+{
+ this->info = 0;
+ this->instruction = NULL;
+}
- for (size_t offset = 0; offset < size; offset++, state++)
- {
- bool conflict = store ? !state->canStore : !state->canLoad;
- if (m_allowUniformWrites && storeData)
- {
- uint8_t *ptr = (uint8_t*)(memory->getPointer(address));
- conflict &= (ptr[offset] != storeData[offset]);
- }
+bool RaceDetector::MemoryAccess::isSet() const
+{
+ return this->info & (1<<SET_BIT);
+}
- if (!race && conflict &&
- (state->wasWorkItem ? // If state set by work-item,
- state->workItem != workItemIndex : // must be same work-item,
- state->workGroup != workGroupIndex) // otherwise must be same group
- )
- {
- // Report data-race
- DataRaceType type = load|state->canLoad ? ReadWriteRace : WriteWriteRace;
- logRace(type, memory->getAddressSpace(),
- address + offset,
- state->workItem,
- state->workGroup,
- state->instruction);
- race = true;
- }
- else
- {
- // Only update WI info if this operation is stronger than previous one
- bool updateWI = store || (load && state->canStore);
-
- // Update state
- if (store)
- state->canAtomicLoad = false;
- state->canAtomicStore = false;
- state->canLoad &= load;
- state->canStore = false;
- if (updateWI)
- {
- state->workGroup = workGroupIndex;
- if (workItem)
- {
- state->instruction = workItem->getCurrentInstruction();
- state->workItem = workItemIndex;
- state->wasWorkItem = true;
- }
- }
- }
- }
+bool RaceDetector::MemoryAccess::isAtomic() const
+{
+ return this->info & (1<<ATOMIC_BIT);
}
-void RaceDetector::synchronize(const Memory *memory, bool workGroup)
+bool RaceDetector::MemoryAccess::isLoad() const
{
- StateMap::iterator itr;
- for (itr = m_state.begin(); itr != m_state.end(); itr++)
- {
- if (itr->first.first != memory)
- continue;
+ return !isStore();
+}
- pair<State*,size_t> obj = itr->second;
- for (State *state = obj.first; state < obj.first+obj.second; state++)
- {
- // TODO: atomic_intergroup_race test failure
- state->canAtomicLoad = true;
- state->canAtomicStore = true;
- state->workItem = -1;
- state->wasWorkItem = false;
- if (!workGroup)
- {
- state->workGroup = -1;
- state->canLoad = true;
- state->canStore = true;
- }
- }
- }
+bool RaceDetector::MemoryAccess::isStore() const
+{
+ return this->info & (1<<STORE_BIT);
}
-void RaceDetector::workGroupBarrier(const WorkGroup *workGroup, uint32_t flags)
+bool RaceDetector::MemoryAccess::isWorkGroup() const
{
- if (flags & CLK_LOCAL_MEM_FENCE)
- synchronize(workGroup->getLocalMemory(), false);
- if (flags & CLK_GLOBAL_MEM_FENCE)
- synchronize(m_context->getGlobalMemory(), true);
+ return this->info & (1<<WG_BIT);
+}
+
+bool RaceDetector::MemoryAccess::isWorkItem() const
+{
+ return !isWorkGroup();
+}
+
+size_t RaceDetector::MemoryAccess::getEntity() const
+{
+ return this->entity;
+}
+
+const llvm::Instruction* RaceDetector::MemoryAccess::getInstruction() const
+{
+ return this->instruction;
+}
+
+uint8_t RaceDetector::MemoryAccess::getStoreData() const
+{
+ return this->storeData;
+}
+
+void RaceDetector::MemoryAccess::setStoreData(uint8_t data)
+{
+ this->storeData = data;
}
-RaceDetector::State::State()
+bool RaceDetector::MemoryAccess::operator==(
+ const RaceDetector::MemoryAccess& other) const
{
- instruction = NULL;
- workItem = -1;
- workGroup = -1;
- canAtomicLoad = true;
- canAtomicStore = true;
- canLoad = true;
- canStore = true;
- wasWorkItem = false;
+ return this->entity == other.entity &&
+ this->instruction == other.instruction &&
+ this->info == other.info;
}
diff --git a/src/plugins/RaceDetector.h b/src/plugins/RaceDetector.h
index 2442b56..c65cbd3 100644
--- a/src/plugins/RaceDetector.h
+++ b/src/plugins/RaceDetector.h
@@ -1,5 +1,5 @@
// RaceDetector.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
@@ -8,6 +8,8 @@
#include "core/Plugin.h"
+#include <mutex>
+
namespace oclgrind
{
class RaceDetector : public Plugin
@@ -18,7 +20,8 @@ namespace oclgrind
virtual void kernelBegin(const KernelInvocation *kernelInvocation) override;
virtual void kernelEnd(const KernelInvocation *kernelInvocation) override;
virtual void memoryAllocated(const Memory *memory, size_t address,
- size_t size, cl_mem_flags flags) override;
+ size_t size, cl_mem_flags flags,
+ const uint8_t *initData) override;
virtual void memoryAtomicLoad(const Memory *memory,
const WorkItem *workItem,
AtomicOp op,
@@ -27,7 +30,8 @@ namespace oclgrind
const WorkItem *workItem,
AtomicOp op,
size_t address, size_t size) override;
- virtual void memoryDeallocated(const Memory *memory, size_t address);
+ virtual void memoryDeallocated(const Memory *memory,
+ size_t address) override;
virtual void memoryLoad(const Memory *memory, const WorkItem *workItem,
size_t address, size_t size) override;
virtual void memoryLoad(const Memory *memory, const WorkGroup *workGroup,
@@ -40,55 +44,103 @@ namespace oclgrind
const uint8_t *storeData) override;
virtual void workGroupBarrier(const WorkGroup *workGroup,
uint32_t flags) override;
-
- virtual bool isThreadSafe() const override;
+ virtual void workGroupBegin(const WorkGroup *workGroup) override;
+ virtual void workGroupComplete(const WorkGroup *workGroup) override;
private:
- struct State
+ struct MemoryAccess
{
+ private:
+ size_t entity;
const llvm::Instruction *instruction;
- size_t workItem;
- size_t workGroup;
- bool canAtomicLoad;
- bool canAtomicStore;
- bool canLoad;
- bool canStore;
- bool wasWorkItem;
-
- State();
+
+ uint8_t info;
+ static const unsigned SET_BIT = 0;
+ static const unsigned STORE_BIT = 1;
+ static const unsigned ATOMIC_BIT = 2;
+ static const unsigned WG_BIT = 3;
+ uint8_t storeData;
+
+ public:
+ void clear();
+
+ bool isSet() const;
+
+ bool isAtomic() const;
+ bool isLoad() const;
+ bool isStore() const;
+ bool isWorkGroup() const;
+ bool isWorkItem() const;
+
+ size_t getEntity() const;
+ const llvm::Instruction* getInstruction() const;
+
+ uint8_t getStoreData() const;
+ void setStoreData(uint8_t);
+
+ MemoryAccess();
+ MemoryAccess(const WorkGroup *workGroup, const WorkItem *workItem,
+ bool store, bool atomic);
+
+ bool operator==(const MemoryAccess& other) const;
};
+ struct AccessRecord
+ {
+ MemoryAccess load;
+ MemoryAccess store;
+ };
+ typedef std::vector<MemoryAccess> AccessList;
+ typedef std::unordered_map<
+ size_t,AccessRecord,
+ std::hash<size_t>,
+ std::equal_to<size_t>,
+ PoolAllocator<std::pair<const size_t,AccessRecord>,8192>
+ > AccessMap;
+
+ std::unordered_map<size_t,std::vector<AccessRecord>> m_globalAccesses;
+ std::map< size_t,std::mutex* > m_globalMutexes;
- // Enumeration for types of data-race
- enum DataRaceType
+ struct WorkGroupState
+ {
+ size_t numWorkItems;
+ std::vector<AccessMap> wiLocal;
+ std::vector<AccessMap> wiGlobal;
+ AccessMap wgGlobal;
+ };
+ struct WorkerState
{
- ReadWriteRace,
- WriteWriteRace
+ std::unordered_map<const WorkGroup*,WorkGroupState> *groups;
};
+ static THREAD_LOCAL WorkerState m_state;
- typedef std::map<
- std::pair<const Memory*, size_t>,
- std::pair<State*, size_t>
- > StateMap;
- StateMap m_state;
+ struct Race
+ {
+ unsigned addrspace;
+ size_t address;
+ MemoryAccess a, b;
+ };
+ typedef std::list<Race> RaceList;
bool m_allowUniformWrites;
const KernelInvocation *m_kernelInvocation;
- void logRace(DataRaceType type,
- unsigned int addrSpace,
- size_t address,
- size_t lastWorkGroup,
- size_t lastWorkItem,
- const llvm::Instruction *lastInstruction) const;
- void registerAtomic(const Memory *memory,
+ std::mutex kernelRacesMutex;
+ RaceList kernelRaces;
+
+ size_t getAccessWorkGroup(const MemoryAccess& access) const;
+
+ bool check(const MemoryAccess& a, const MemoryAccess& b) const;
+ void insert(AccessRecord& record, const MemoryAccess& access) const;
+ void insertKernelRace(const Race& race);
+ void insertRace(RaceList& races, const Race& race) const;
+ void logRace(const Race& race) const;
+ void registerAccess(const Memory *memory,
+ const WorkGroup *workGroup,
const WorkItem *workItem,
- size_t address, size_t size,
- bool store);
- void registerLoadStore(const Memory *memory,
- const WorkItem *workItem,
- const WorkGroup *workGroup,
- size_t address, size_t size,
- const uint8_t *storeData);
- void synchronize(const Memory *memory, bool workGroup);
+ size_t address, size_t size, bool atomic,
+ const uint8_t *storeData = NULL);
+ void syncWorkItems(const Memory *memory,
+ WorkGroupState& state,
+ std::vector<AccessMap>& accesses);
};
}
diff --git a/src/plugins/Uninitialized.cpp b/src/plugins/Uninitialized.cpp
new file mode 100644
index 0000000..fb9fd77
--- /dev/null
+++ b/src/plugins/Uninitialized.cpp
@@ -0,0 +1,2811 @@
+// Uninitialized.h (Oclgrind)
+// Copyright (c) 2015, Moritz Pflanzer
+// Imperial College London. All rights reserved.
+//
+// This program is provided under a three-clause BSD license. For full
+// license terms please see the LICENSE file distributed with this
+// source code.
+
+#include "core/common.h"
+
+#include "core/Context.h"
+#include "core/Memory.h"
+#include "core/WorkItem.h"
+#include "core/WorkGroup.h"
+#include "core/Kernel.h"
+#include "core/KernelInvocation.h"
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Type.h"
+
+#include "Uninitialized.h"
+#include <mutex>
+
+using namespace oclgrind;
+using namespace std;
+
+//void Uninitialized::memoryAllocated(const Memory *memory, size_t address,
+// size_t size, cl_mem_flags flags,
+// const uint8_t *initData)
+//{
+// cout << "Memory: " << memory << ", address: " << hex << address << dec << ", size: " << size << endl;
+//}
+
+// Multiple mutexes to mitigate risk of unnecessary synchronisation in atomics
+#define NUM_ATOMIC_MUTEXES 64 // Must be power of two
+static std::mutex atomicShadowMutex[NUM_ATOMIC_MUTEXES];
+#define ATOMIC_MUTEX(offset) \
+ atomicShadowMutex[(((offset)>>2) & (NUM_ATOMIC_MUTEXES-1))]
+
+THREAD_LOCAL ShadowContext::WorkSpace ShadowContext::m_workSpace = {NULL, NULL, NULL, 0};
+
+Uninitialized::Uninitialized(const Context *context)
+ : Plugin(context), shadowContext(sizeof(size_t)==8 ? 32 : 16)
+{
+ shadowContext.createMemoryPool();
+}
+
+Uninitialized::~Uninitialized()
+{
+ shadowContext.destroyMemoryPool();
+}
+
+void Uninitialized::allocAndStoreShadowMemory(unsigned addrSpace, size_t address, TypedValue SM,
+ const WorkItem *workItem, const WorkGroup *workGroup, bool unchecked)
+{
+ if(addrSpace == AddrSpaceConstant)
+ {
+ //TODO: Eventually store value
+ return;
+ }
+
+ ShadowMemory *memory = getShadowMemory(addrSpace, workItem, workGroup);
+ memory->allocate(address, SM.size*SM.num);
+ storeShadowMemory(addrSpace, address, SM, workItem, workGroup, unchecked);
+}
+
+bool Uninitialized::checkAllOperandsDefined(const WorkItem *workItem, const llvm::Instruction *I)
+{
+ for(llvm::Instruction::const_op_iterator OI = I->op_begin(); OI != I->op_end(); ++OI)
+ {
+ if(!ShadowContext::isCleanValue(shadowContext.getValue(workItem, OI->get())))
+ {
+#ifdef DUMP_SHADOW
+ OI->get()->dump();
+ cout << "Shadow value: " << shadowContext.getValue(workItem, OI->get()) << endl;
+#endif
+ logUninitializedCF();
+#ifdef DUMP_SHADOW
+ shadowContext.dump(workItem);
+#endif
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void Uninitialized::checkStructMemcpy(const WorkItem *workItem, const llvm::Value *src)
+{
+ const llvm::PointerType *srcPtrTy = llvm::dyn_cast<llvm::PointerType>(src->getType());
+ const llvm::StructType *structTy = llvm::dyn_cast<llvm::StructType>(srcPtrTy->getElementType());
+ size_t srcAddr = workItem->getOperand(src).getPointer();
+ unsigned srcAddrSpace = srcPtrTy->getPointerAddressSpace();
+
+ ShadowMemory *shadowMemory;
+
+ switch(srcAddrSpace)
+ {
+ case AddrSpacePrivate:
+ {
+ shadowMemory = shadowContext.getShadowWorkItem(workItem)->getPrivateMemory();
+ break;
+ }
+ case AddrSpaceLocal:
+ {
+ shadowMemory = shadowContext.getShadowWorkGroup(workItem->getWorkGroup())->getLocalMemory();
+ break;
+ }
+ case AddrSpaceConstant:
+ //TODO: Constants should always be clean?!
+ return;
+ case AddrSpaceGlobal:
+ shadowMemory = shadowContext.getGlobalMemory();
+ break;
+ default:
+ FATAL_ERROR("Unsupported addressspace %d", srcAddrSpace);
+ }
+
+ if(!ShadowContext::isCleanStruct(shadowMemory, srcAddr, structTy))
+ {
+ logUninitializedWrite(srcAddrSpace, srcAddr);
+ }
+}
+
+void Uninitialized::copyShadowMemory(unsigned dstAddrSpace, size_t dst, unsigned srcAddrSpace, size_t src, unsigned size, const WorkItem *workItem, const WorkGroup *workGroup, bool unchecked)
+{
+ copyShadowMemoryStrided(dstAddrSpace, dst, srcAddrSpace, src, 1, 1, size, workItem, workGroup, unchecked);
+}
+
+void Uninitialized::copyShadowMemoryStrided(unsigned dstAddrSpace, size_t dst, unsigned srcAddrSpace, size_t src, size_t num, size_t stride, unsigned size, const WorkItem *workItem, const WorkGroup *workGroup, bool unchecked)
+{
+ TypedValue v = {
+ size,
+ 1,
+ new unsigned char[size]
+ };
+
+ for (unsigned i = 0; i < num; i++)
+ {
+ loadShadowMemory(srcAddrSpace, src, v, workItem, workGroup);
+ storeShadowMemory(dstAddrSpace, dst, v, workItem, workGroup, unchecked);
+ src += stride * size;
+ dst += stride * size;
+ }
+
+ delete[] v.data;
+}
+
+std::string Uninitialized::extractUnmangledName(const std::string fullname)
+{
+ // Extract unmangled name
+ if(fullname.compare(0,2, "_Z") == 0)
+ {
+ int len = atoi(fullname.c_str() + 2);
+ int start = fullname.find_first_not_of("0123456789", 2);
+ return fullname.substr(start, len);
+ }
+ else
+ {
+ return fullname;
+ }
+}
+
+ShadowMemory* Uninitialized::getShadowMemory(unsigned addrSpace,
+ const WorkItem *workItem, const WorkGroup *workGroup) const
+{
+ switch(addrSpace)
+ {
+ case AddrSpacePrivate:
+ {
+ if(!workItem)
+ {
+ FATAL_ERROR("Work item needed to access private memory!");
+ }
+
+ return shadowContext.getShadowWorkItem(workItem)->getPrivateMemory();
+ }
+ case AddrSpaceLocal:
+ {
+ if(!workGroup)
+ {
+ if(!workItem)
+ {
+ FATAL_ERROR("Work item or work group needed to access local memory!");
+ }
+
+ workGroup = workItem->getWorkGroup();
+ }
+
+ return shadowContext.getShadowWorkGroup(workGroup)->getLocalMemory();
+ }
+ //case AddrSpaceConstant:
+ // break;
+ case AddrSpaceGlobal:
+ return shadowContext.getGlobalMemory();
+ default:
+ FATAL_ERROR("Unsupported addressspace %d", addrSpace);
+ }
+}
+
+bool Uninitialized::handleBuiltinFunction(const WorkItem *workItem, string name,
+ const llvm::CallInst *CI, const TypedValue result)
+{
+ name = extractUnmangledName(name);
+ ShadowValues *shadowValues = shadowContext.getShadowWorkItem(workItem)->getValues();
+
+ if(name == "async_work_group_copy" ||
+ name == "async_work_group_strided_copy")
+ {
+ int arg = 0;
+
+ // Get src/dest addresses
+ const llvm::Value *dstOp = CI->getArgOperand(arg++);
+ const llvm::Value *srcOp = CI->getArgOperand(arg++);
+ size_t dst = workItem->getOperand(dstOp).getPointer();
+ size_t src = workItem->getOperand(srcOp).getPointer();
+
+ // Get size of copy
+ unsigned elemSize = getTypeSize(dstOp->getType()->getPointerElementType());
+
+ const llvm::Value *numOp = CI->getArgOperand(arg++);
+ uint64_t num = workItem->getOperand(numOp).getUInt();
+ TypedValue numShadow = shadowContext.getValue(workItem, numOp);
+
+ if(!ShadowContext::isCleanValue(numShadow))
+ {
+ logUninitializedIndex();
+ }
+
+ // Get stride
+ size_t stride = 1;
+
+ if(name == "async_work_group_strided_copy")
+ {
+ const llvm::Value *strideOp = CI->getArgOperand(arg++);
+ stride = workItem->getOperand(strideOp).getUInt();
+ TypedValue strideShadow = shadowContext.getValue(workItem, strideOp);
+
+ if(!ShadowContext::isCleanValue(strideShadow))
+ {
+ logUninitializedIndex();
+ }
+ }
+
+ const llvm::Value *eventOp = CI->getArgOperand(arg++);
+ TypedValue eventShadow = shadowContext.getValue(workItem, eventOp);
+
+ // Get type of copy
+ AddressSpace dstAddrSpace = AddrSpaceLocal;
+ AddressSpace srcAddrSpace = AddrSpaceLocal;
+
+ if(dstOp->getType()->getPointerAddressSpace() == AddrSpaceLocal)
+ {
+ srcAddrSpace = AddrSpaceGlobal;
+ }
+ else
+ {
+ dstAddrSpace = AddrSpaceGlobal;
+ }
+
+ copyShadowMemoryStrided(dstAddrSpace, dst, srcAddrSpace, src, num, stride, elemSize, workItem);
+ shadowValues->setValue(CI, eventShadow);
+
+ // Check shadow of src address
+ TypedValue srcShadow = shadowContext.getValue(workItem, srcOp);
+
+ if(!ShadowContext::isCleanValue(srcShadow))
+ {
+ logUninitializedAddress(srcAddrSpace, src, false);
+ }
+
+ // Check shadow of dst address
+ TypedValue dstShadow = shadowContext.getValue(workItem, dstOp);
+
+ if(!ShadowContext::isCleanValue(dstShadow))
+ {
+ logUninitializedAddress(dstAddrSpace, dst);
+ }
+
+ return true;
+ }
+ else if(name == "wait_group_events")
+ {
+ const llvm::Value *Addr = CI->getArgOperand(1);
+ const llvm::Value *Num = CI->getArgOperand(0);
+ uint64_t num = workItem->getOperand(Num).getUInt();
+ size_t address = workItem->getOperand(Addr).getPointer();
+
+ TypedValue numShadow = shadowContext.getValue(workItem, Num);
+ TypedValue eventShadow = {
+ sizeof(size_t),
+ 1,
+ new unsigned char[sizeof(size_t)]
+ };
+
+ // Check shadow for the number of events
+ if(!ShadowContext::isCleanValue(numShadow))
+ {
+ logUninitializedCF();
+ }
+
+ for(unsigned i = 0; i < num; ++i)
+ {
+ loadShadowMemory(AddrSpacePrivate, address, eventShadow, workItem);
+
+ if(!ShadowContext::isCleanValue(eventShadow))
+ {
+ logUninitializedCF();
+ delete[] eventShadow.data;
+ return true;
+ }
+
+ address += sizeof(size_t);
+ }
+
+ delete[] eventShadow.data;
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(AddrSpacePrivate, address, false);
+ }
+
+ return true;
+ }
+ else if(name.compare(0, 6, "atomic") == 0)
+ {
+ if(name.compare(6, string::npos, "cmpxchg") == 0)
+ {
+ const llvm::Value *Addr = CI->getArgOperand(0);
+ unsigned addrSpace = Addr->getType()->getPointerAddressSpace();
+ size_t address = workItem->getOperand(Addr).getPointer();
+ uint32_t cmp = workItem->getOperand(CI->getArgOperand(1)).getUInt();
+ uint32_t old = workItem->getOperand(CI).getUInt();
+ TypedValue argShadow = shadowContext.getValue(workItem, CI->getArgOperand(2));
+ TypedValue cmpShadow = shadowContext.getValue(workItem, CI->getArgOperand(1));
+ TypedValue oldShadow = {
+ 4,
+ 1,
+ shadowContext.getMemoryPool()->alloc(4)
+ };
+
+ // Check shadow of the condition
+ if(!ShadowContext::isCleanValue(cmpShadow))
+ {
+ logUninitializedCF();
+ }
+
+ // Perform cmpxchg
+ if(addrSpace == AddrSpaceGlobal)
+ {
+ shadowContext.getGlobalMemory()->lock(address);
+ }
+
+ loadShadowMemory(addrSpace, address, oldShadow, workItem);
+
+ if(old == cmp)
+ {
+ storeShadowMemory(addrSpace, address, argShadow, workItem);
+ }
+
+ if(addrSpace == AddrSpaceGlobal)
+ {
+ shadowContext.getGlobalMemory()->unlock(address);
+ }
+
+ shadowValues->setValue(CI, oldShadow);
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(addrSpace, address);
+ }
+
+ return true;
+ }
+ else
+ {
+ SimpleOrAtomic(workItem, CI);
+ return true;
+ }
+ }
+ else if(name == "fract" ||
+ name == "modf" ||
+ name == "sincos")
+ {
+ const llvm::Value *Addr = CI->getArgOperand(1);
+ unsigned addrSpace = Addr->getType()->getPointerAddressSpace();
+ size_t iptr = workItem->getOperand(Addr).getPointer();
+ TypedValue argShadow = shadowContext.getValue(workItem, CI->getArgOperand(0));
+ TypedValue newElemShadow;
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for(unsigned i = 0; i < result.num; ++i)
+ {
+ if(!ShadowContext::isCleanValue(argShadow, i))
+ {
+ newElemShadow = ShadowContext::getPoisonedValue(result.size);
+ }
+ else
+ {
+ newElemShadow = ShadowContext::getCleanValue(result.size);
+ }
+
+ memcpy(newShadow.data, newElemShadow.data, result.size);
+ }
+
+ storeShadowMemory(addrSpace, iptr, newShadow);
+ shadowValues->setValue(CI, newShadow);
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(addrSpace, iptr);
+ }
+
+ return true;
+ }
+ else if(name == "frexp" ||
+ name == "lgamma_r")
+ {
+ const llvm::Value *Addr = CI->getArgOperand(1);
+ unsigned addrSpace = Addr->getType()->getPointerAddressSpace();
+ size_t iptr = workItem->getOperand(Addr).getPointer();
+ TypedValue argShadow = shadowContext.getValue(workItem, CI->getArgOperand(0));
+ TypedValue newElemShadow;
+ TypedValue newElemIntShadow;
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+ TypedValue newIntShadow = {
+ newShadow.size,
+ newShadow.num,
+ shadowContext.getMemoryPool()->alloc(4)
+ };
+
+ for(unsigned i = 0; i < result.num; ++i)
+ {
+ if(!ShadowContext::isCleanValue(argShadow, i))
+ {
+ newElemShadow = ShadowContext::getPoisonedValue(result.size);
+ newElemIntShadow = ShadowContext::getPoisonedValue(4);
+ }
+ else
+ {
+ newElemShadow = ShadowContext::getCleanValue(result.size);
+ newElemIntShadow = ShadowContext::getCleanValue(4);
+ }
+
+ memcpy(newIntShadow.data, newElemIntShadow.data, 4);
+ memcpy(newShadow.data, newElemShadow.data, result.size);
+ }
+
+ storeShadowMemory(addrSpace, iptr, newIntShadow);
+ shadowValues->setValue(CI, newShadow);
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(addrSpace, iptr);
+ }
+
+ return true;
+ }
+ else if(name == "remquo")
+ {
+ const llvm::Value *Addr = CI->getArgOperand(2);
+ unsigned addrSpace = Addr->getType()->getPointerAddressSpace();
+ size_t iptr = workItem->getOperand(Addr).getPointer();
+ TypedValue arg0Shadow = shadowContext.getValue(workItem, CI->getArgOperand(0));
+ TypedValue arg1Shadow = shadowContext.getValue(workItem, CI->getArgOperand(1));
+ TypedValue newElemShadow;
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for(unsigned i = 0; i < result.num; ++i)
+ {
+ if(!ShadowContext::isCleanValue(arg0Shadow, i) || !ShadowContext::isCleanValue(arg1Shadow, i))
+ {
+ newElemShadow = ShadowContext::getPoisonedValue(result.size);
+ }
+ else
+ {
+ newElemShadow = ShadowContext::getCleanValue(result.size);
+ }
+
+ storeShadowMemory(addrSpace, iptr + i*4, newElemShadow);
+ memcpy(newShadow.data, newElemShadow.data, result.size);
+ }
+
+ shadowValues->setValue(CI, newShadow);
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(addrSpace, iptr);
+ }
+
+ return true;
+ }
+ else if(name == "shuffle")
+ {
+ TypedValue mask = workItem->getOperand(CI->getArgOperand(1));
+ TypedValue maskShadow = shadowContext.getValue(workItem, CI->getArgOperand(1));
+ TypedValue shadow = shadowContext.getValue(workItem, CI->getArgOperand(0));
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for(unsigned i = 0; i < newShadow.num; ++i)
+ {
+ if(!ShadowContext::isCleanValue(maskShadow, i))
+ {
+ TypedValue v = ShadowContext::getPoisonedValue(newShadow.size);
+ memcpy(newShadow.data + i*newShadow.size, v.data, newShadow.size);
+ }
+ else
+ {
+ size_t srcOffset = mask.getUInt(i) * shadow.size;
+ memcpy(newShadow.data + i*newShadow.size, shadow.data + srcOffset, newShadow.size);
+ }
+ }
+
+ shadowValues->setValue(CI, newShadow);
+ return true;
+ }
+ else if(name == "shuffle2")
+ {
+ TypedValue mask = workItem->getOperand(CI->getArgOperand(2));
+ TypedValue maskShadow = shadowContext.getValue(workItem, CI->getArgOperand(2));
+ TypedValue shadow[] = {shadowContext.getValue(workItem, CI->getArgOperand(0)),
+ shadowContext.getValue(workItem, CI->getArgOperand(1))};
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for (unsigned i = 0; i < newShadow.num; ++i)
+ {
+ uint64_t m = 1;
+
+ if(CI->getArgOperand(0)->getType()->isVectorTy())
+ {
+ m = CI->getArgOperand(0)->getType()->getVectorNumElements();
+ }
+
+ uint64_t src = 0;
+ uint64_t index = mask.getUInt(i);
+
+ if(index >= m)
+ {
+ index -= m;
+ src = 1;
+ }
+
+ if(!ShadowContext::isCleanValue(maskShadow, i))
+ {
+ TypedValue v = ShadowContext::getPoisonedValue(newShadow.size);
+ memcpy(newShadow.data + i*newShadow.size, v.data, newShadow.size);
+ }
+ else
+ {
+ size_t srcOffset = index * shadow[src].size;
+ memcpy(newShadow.data + i*newShadow.size, shadow[src].data + srcOffset, newShadow.size);
+ }
+ }
+
+ shadowValues->setValue(CI, newShadow);
+ return true;
+ }
+ else if(name == "any")
+ {
+ const llvm::Value *argOp = CI->getArgOperand(0);
+ TypedValue shadow = shadowContext.getValue(workItem, argOp);
+
+ unsigned num = 1;
+ if(argOp->getType()->isVectorTy())
+ {
+ num = argOp->getType()->getVectorNumElements();
+ }
+
+ for(unsigned i = 0; i < num; ++i)
+ {
+ if(ShadowContext::isCleanValue(shadow, i))
+ {
+ shadowValues->setValue(CI, ShadowContext::getCleanValue(result.size));
+ return true;
+ }
+ }
+
+ shadowValues->setValue(CI, ShadowContext::getPoisonedValue(result.size));
+ return true;
+ }
+ else if(name == "select")
+ {
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+ TypedValue shadow[] = {shadowContext.getValue(workItem, CI->getArgOperand(0)),
+ shadowContext.getValue(workItem, CI->getArgOperand(1))};
+ TypedValue selectShadow = shadowContext.getValue(workItem, CI->getArgOperand(2));
+
+ for(unsigned i = 0; i < newShadow.num; ++i)
+ {
+ int64_t c = workItem->getOperand(CI->getArgOperand(2)).getSInt(i);
+ uint64_t src = ((newShadow.num > 1) ? c & INT64_MIN : c) ? 1 : 0;
+
+ if(!ShadowContext::isCleanValue(selectShadow, i))
+ {
+ TypedValue v = ShadowContext::getPoisonedValue(newShadow.size);
+ memcpy(newShadow.data + i*newShadow.size, v.data, newShadow.size);
+ }
+ else
+ {
+ size_t srcOffset = i * shadow[src].size;
+ memcpy(newShadow.data + i*newShadow.size, shadow[src].data + srcOffset, newShadow.size);
+ }
+ }
+
+ shadowValues->setValue(CI, newShadow);
+ return true;
+ }
+ else if(name.compare(0, 10, "vload_half") == 0 ||
+ name.compare(0, 11, "vloada_half") == 0)
+ {
+ const llvm::Value *BaseOp = CI->getArgOperand(1);
+ const llvm::Value *OffsetOp = CI->getArgOperand(0);
+ size_t base = workItem->getOperand(BaseOp).getPointer();
+ unsigned int addressSpace = BaseOp->getType()->getPointerAddressSpace();
+ uint64_t offset = workItem->getOperand(OffsetOp).getUInt();
+
+ size_t address;
+
+ if(name.compare(0, 6, "vloada") == 0 && result.num == 3)
+ {
+ address = base + offset * sizeof(cl_half) * 4;
+ }
+ else
+ {
+ address = base + offset * sizeof(cl_half) * result.num;
+ }
+
+ TypedValue halfShadow = {
+ sizeof(cl_half),
+ result.num,
+ shadowContext.getMemoryPool()->alloc(2 * result.num)
+ };
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ loadShadowMemory(addressSpace, address, halfShadow, workItem);
+
+ TypedValue pv = ShadowContext::getPoisonedValue(newShadow.size);
+ TypedValue cv = ShadowContext::getCleanValue(newShadow.size);
+
+ // Convert to float shadows
+ for(unsigned i = 0; i < newShadow.num; ++i)
+ {
+ if(!ShadowContext::isCleanValue(halfShadow, i))
+ {
+ memcpy(newShadow.data + i*newShadow.size, pv.data, newShadow.size);
+ }
+ else
+ {
+ memcpy(newShadow.data + i*newShadow.size, cv.data, newShadow.size);
+ }
+ }
+
+ shadowValues->setValue(CI, newShadow);
+
+ // Check shadow of address
+ TypedValue baseShadow = shadowContext.getValue(workItem, BaseOp);
+ TypedValue offsetShadow = shadowContext.getValue(workItem, OffsetOp);
+
+ if(!ShadowContext::isCleanValue(baseShadow) ||
+ !ShadowContext::isCleanValue(offsetShadow))
+ {
+ logUninitializedAddress(addressSpace, address, false);
+ }
+
+ return true;
+ }
+ else if(name.compare(0, 11, "vstore_half") == 0 ||
+ name.compare(0, 12, "vstorea_half") == 0)
+ {
+ const llvm::Value *value = CI->getArgOperand(0);
+ unsigned size = getTypeSize(value->getType());
+
+ if(isVector3(value))
+ {
+ // 3-element vectors are same size as 4-element vectors,
+ // but vstore address offset shouldn't use this.
+ size = (size / 4) * 3;
+ }
+
+ const llvm::Value *BaseOp = CI->getArgOperand(2);
+ const llvm::Value *OffsetOp = CI->getArgOperand(1);
+ size_t base = workItem->getOperand(BaseOp).getPointer();
+ unsigned int addressSpace = BaseOp->getType()->getPointerAddressSpace();
+ uint64_t offset = workItem->getOperand(OffsetOp).getUInt();
+
+ // Convert to halfs
+ TypedValue shadow = shadowContext.getValue(workItem, value);
+ unsigned num = size / sizeof(float);
+ size = num * sizeof(cl_half);
+ TypedValue halfShadow = {
+ sizeof(cl_half),
+ num,
+ shadowContext.getMemoryPool()->alloc(2 * num)
+ };
+
+ TypedValue pv = ShadowContext::getPoisonedValue(halfShadow.size);
+ TypedValue cv = ShadowContext::getCleanValue(halfShadow.size);
+
+ for(unsigned i = 0; i < num; i++)
+ {
+ if(!ShadowContext::isCleanValue(shadow, i))
+ {
+ memcpy(halfShadow.data + i*halfShadow.size, pv.data, halfShadow.size);
+ }
+ else
+ {
+ memcpy(halfShadow.data + i*halfShadow.size, cv.data, halfShadow.size);
+ }
+ }
+
+ size_t address;
+ if(name.compare(0, 7, "vstorea") == 0 && num == 3)
+ {
+ address = base + offset * sizeof(cl_half) * 4;
+ }
+ else
+ {
+ address = base + offset * sizeof(cl_half) * num;
+ }
+
+ storeShadowMemory(addressSpace, address, halfShadow, workItem);
+
+ // Check shadow of address
+ TypedValue baseShadow = shadowContext.getValue(workItem, BaseOp);
+ TypedValue offsetShadow = shadowContext.getValue(workItem, OffsetOp);
+
+ if(!ShadowContext::isCleanValue(baseShadow) ||
+ !ShadowContext::isCleanValue(offsetShadow))
+ {
+ logUninitializedAddress(addressSpace, address);
+ }
+ return true;
+ }
+ else if(name.compare(0, 5, "vload") == 0)
+ {
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+ const llvm::Value *BaseOp = CI->getArgOperand(1);
+ const llvm::Value *OffsetOp = CI->getArgOperand(0);
+ unsigned int addressSpace = BaseOp->getType()->getPointerAddressSpace();
+ size_t base = workItem->getOperand(BaseOp).getPointer();
+ uint64_t offset = workItem->getOperand(OffsetOp).getUInt();
+
+ size_t size = newShadow.size*newShadow.num;
+ size_t address = base + offset*size;
+ loadShadowMemory(addressSpace, address, newShadow, workItem);
+
+ shadowValues->setValue(CI, newShadow);
+
+ // Check shadow of address
+ TypedValue baseShadow = shadowContext.getValue(workItem, BaseOp);
+ TypedValue offsetShadow = shadowContext.getValue(workItem, OffsetOp);
+
+ if(!ShadowContext::isCleanValue(baseShadow) ||
+ !ShadowContext::isCleanValue(offsetShadow))
+ {
+ logUninitializedAddress(addressSpace, address, false);
+ }
+
+ return true;
+ }
+ else if(name.compare(0, 6, "vstore") == 0)
+ {
+ const llvm::Value *value = CI->getArgOperand(0);
+ unsigned size = getTypeSize(value->getType());
+
+ if(isVector3(value))
+ {
+ // 3-element vectors are same size as 4-element vectors,
+ // but vstore address offset shouldn't use this.
+ size = (size/4) * 3;
+ }
+
+ const llvm::Value *BaseOp = CI->getArgOperand(2);
+ const llvm::Value *OffsetOp = CI->getArgOperand(1);
+ unsigned int addressSpace = BaseOp->getType()->getPointerAddressSpace();
+ size_t base = workItem->getOperand(BaseOp).getPointer();
+ uint64_t offset = workItem->getOperand(OffsetOp).getUInt();
+
+ size_t address = base + offset*size;
+ TypedValue shadow = shadowContext.getValue(workItem, value);
+ storeShadowMemory(addressSpace, address, shadow, workItem);
+
+ // Check shadow of address
+ TypedValue baseShadow = shadowContext.getValue(workItem, BaseOp);
+ TypedValue offsetShadow = shadowContext.getValue(workItem, OffsetOp);
+
+ if(!ShadowContext::isCleanValue(baseShadow) ||
+ !ShadowContext::isCleanValue(offsetShadow))
+ {
+ logUninitializedAddress(addressSpace, address);
+ }
+
+ return true;
+ }
+ else if(name == "read_imagef" ||
+ name == "read_imagei" ||
+ name == "read_imageui")
+ {
+ Image *image = *(Image**)(workItem->getOperand(CI->getArgOperand(0)).data);
+ TypedValue shadowImage = shadowContext.getValue(workItem, CI->getArgOperand(0));
+ TypedValue newShadow;
+
+ //FIXME: The new shadow should be loaded from memory
+ //and not generated based on the image description
+ //However, this currently requires to duplicate all functionality
+ //in WorkItemBuiltins.cpp for the image function
+ //Has to be changed in combination with the write functions
+ size_t address = image->address;
+
+ if(!ShadowContext::isCleanImage(shadowImage))
+ {
+ newShadow = ShadowContext::getPoisonedValue(result);
+ }
+ else
+ {
+ newShadow = ShadowContext::getCleanValue(result);
+ }
+
+ shadowValues->setValue(CI, newShadow);
+
+ // Check image
+ if(!ShadowContext::isCleanImageAddress(shadowImage))
+ {
+ logUninitializedAddress(AddrSpaceGlobal, address, false);
+ }
+
+ return true;
+ }
+ else if(name == "write_imagef" ||
+ name == "write_imagei" ||
+ name == "write_imageui")
+ {
+ Image *image = *(Image**)(workItem->getOperand(CI->getArgOperand(0)).data);
+ TypedValue shadowImage = shadowContext.getValue(workItem, CI->getArgOperand(0));
+
+ //FIXME: The actual shadow of the image should be stored to memory
+ //However, this currently requires to duplicate all functionality
+ //in WorkItemBuiltins.cpp for the image function
+ //Has to be changed in combination with the read functions
+ size_t address = image->address;
+
+ // Check image
+ if(!ShadowContext::isCleanImageAddress(shadowImage))
+ {
+ logUninitializedAddress(AddrSpaceGlobal, address);
+ }
+
+ return true;
+ }
+ else if(name.compare(0, 10, "get_image_") == 0)
+ {
+ TypedValue shadowImage = shadowContext.getValue(workItem, CI->getArgOperand(0));
+ TypedValue newShadow = {
+ result.size,
+ result.num,
+ shadowContext.getMemoryPool()->alloc(result.size * result.num)
+ };
+
+ if(name == "get_image_array_size")
+ {
+ newShadow.setUInt(((Image*)shadowImage.data)->desc.image_array_size);
+ }
+ else if(name == "get_image_dim")
+ {
+ newShadow.setUInt(((Image*)shadowImage.data)->desc.image_width, 0);
+ newShadow.setUInt(((Image*)shadowImage.data)->desc.image_height, 1);
+
+ if(newShadow.num > 2)
+ {
+ newShadow.setUInt(((Image*)shadowImage.data)->desc.image_depth, 2);
+ newShadow.setUInt(0, 3);
+ }
+ }
+ else if(name == "get_image_depth")
+ {
+ newShadow.setUInt(((Image*)shadowImage.data)->desc.image_depth);
+ }
+ else if(name == "get_image_height")
+ {
+ newShadow.setUInt(((Image*)shadowImage.data)->desc.image_height);
+ }
+ else if(name == "get_image_width")
+ {
+ newShadow.setUInt(((Image*)shadowImage.data)->desc.image_width);
+ }
+ else if(name == "get_image_channel_order")
+ {
+ newShadow.setUInt(((Image*)shadowImage.data)->format.image_channel_order);
+ }
+ else if(name == "get_image_channel_data_type")
+ {
+ newShadow.setUInt(((Image*)shadowImage.data)->format.image_channel_data_type);
+ }
+
+ shadowValues->setValue(CI, newShadow);
+ return true;
+ }
+
+ return false;
+}
+
+void Uninitialized::handleIntrinsicInstruction(const WorkItem *workItem, const llvm::IntrinsicInst *I)
+{
+ switch (I->getIntrinsicID())
+ {
+ case llvm::Intrinsic::fmuladd:
+ {
+ SimpleOr(workItem, I);
+ break;
+ }
+ case llvm::Intrinsic::memcpy:
+ {
+ const llvm::MemCpyInst *memcpyInst = (const llvm::MemCpyInst*)I;
+ const llvm::Value *dstOp = memcpyInst->getDest();
+ const llvm::Value *srcOp = memcpyInst->getSource();
+ size_t dst = workItem->getOperand(dstOp).getPointer();
+ size_t src = workItem->getOperand(srcOp).getPointer();
+ size_t size = workItem->getOperand(memcpyInst->getLength()).getUInt();
+ unsigned dstAddrSpace = memcpyInst->getDestAddressSpace();
+ unsigned srcAddrSpace = memcpyInst->getSourceAddressSpace();
+ const llvm::PointerType *srcPtrTy = llvm::dyn_cast<llvm::PointerType>(memcpyInst->getSource()->getType());
+
+ if(dstAddrSpace != AddrSpacePrivate && srcPtrTy->getElementType()->isStructTy())
+ {
+ checkStructMemcpy(workItem, memcpyInst->getSource());
+ }
+
+ copyShadowMemory(dstAddrSpace, dst, srcAddrSpace, src, size, workItem, NULL, true);
+
+ // Check shadow of src address
+ TypedValue srcShadow = shadowContext.getValue(workItem, srcOp);
+
+ if(!ShadowContext::isCleanValue(srcShadow))
+ {
+ logUninitializedAddress(srcAddrSpace, src, false);
+ }
+
+ // Check shadow of dst address
+ TypedValue dstShadow = shadowContext.getValue(workItem, dstOp);
+
+ if(!ShadowContext::isCleanValue(dstShadow))
+ {
+ logUninitializedAddress(dstAddrSpace, dst);
+ }
+ break;
+ }
+ case llvm::Intrinsic::bswap:
+ {
+ // TODO: byte-level accuracy
+ SimpleOr(workItem, I);
+ break;
+ }
+ case llvm::Intrinsic::memset:
+ {
+ const llvm::MemSetInst *memsetInst = (const llvm::MemSetInst*)I;
+ const llvm::Value *Addr = memsetInst->getDest();
+ size_t dst = workItem->getOperand(Addr).getPointer();
+ unsigned size = workItem->getOperand(memsetInst->getLength()).getUInt();
+ unsigned addrSpace = memsetInst->getDestAddressSpace();
+
+ TypedValue shadowValue = {
+ size,
+ 1,
+ new unsigned char[size]
+ };
+
+ memset(shadowValue.data, shadowContext.getValue(workItem, memsetInst->getArgOperand(1)).getUInt(), size);
+ storeShadowMemory(addrSpace, dst, shadowValue, workItem, NULL, true);
+
+ delete[] shadowValue.data;
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(addrSpace, dst);
+ }
+ break;
+ }
+ case llvm::Intrinsic::dbg_declare:
+ //Do nothing
+ break;
+ case llvm::Intrinsic::dbg_value:
+ //Do nothing
+ break;
+ case llvm::Intrinsic::lifetime_end:
+ //Do nothing
+ break;
+ case llvm::Intrinsic::lifetime_start:
+ //Do nothing
+ break;
+ default:
+ FATAL_ERROR("Unsupported intrinsic %s", llvm::Intrinsic::getName(I->getIntrinsicID()).data());
+ }
+}
+
+void Uninitialized::hostMemoryStore(const Memory *memory,
+ size_t address, size_t size,
+ const uint8_t *storeData)
+{
+ if(memory->getAddressSpace() == AddrSpaceGlobal)
+ {
+ TypedValue v = ShadowContext::getCleanValue(size);
+ allocAndStoreShadowMemory(AddrSpaceGlobal, address, v);
+ }
+}
+
+void Uninitialized::instructionExecuted(const WorkItem *workItem,
+ const llvm::Instruction *instruction,
+ const TypedValue& result)
+{
+#ifdef DUMP_SHADOW
+ cout << "++++++++++++++++++++++++++++++++++++++++++++" << endl;
+ instruction->dump();
+#endif
+
+ ShadowWorkItem *shadowWorkItem = shadowContext.getShadowWorkItem(workItem);
+ ShadowValues *shadowValues = shadowWorkItem->getValues();
+
+ switch(instruction->getOpcode())
+ {
+ case llvm::Instruction::Add:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::Alloca:
+ {
+ const llvm::AllocaInst *allocaInst = ((const llvm::AllocaInst*)instruction);
+
+ size_t address = result.getPointer();
+
+ shadowValues->setValue(instruction, ShadowContext::getCleanValue(instruction));
+
+ TypedValue v = ShadowContext::getPoisonedValue(allocaInst->getAllocatedType());
+ allocAndStoreShadowMemory(AddrSpacePrivate, address, v, workItem);
+ break;
+ }
+ case llvm::Instruction::And:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::AShr:
+ {
+ TypedValue S0 = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue S1 = shadowContext.getValue(workItem, instruction->getOperand(1));
+
+ if(!ShadowContext::isCleanValue(S1))
+ {
+ shadowValues->setValue(instruction, ShadowContext::getPoisonedValue(instruction));
+ }
+ else
+ {
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+ TypedValue Shift = workItem->getOperand(instruction->getOperand(1));
+ uint64_t shiftMask = (S0.num > 1 ? S0.size : max((size_t)S0.size, sizeof(uint32_t))) * 8 - 1;
+
+ for (unsigned i = 0; i < S0.num; i++)
+ {
+ newShadow.setUInt(S0.getSInt(i) >> (Shift.getUInt(i) & shiftMask), i);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ }
+
+ break;
+ }
+ case llvm::Instruction::BitCast:
+ {
+ TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ memcpy(newShadow.data, shadow.data, newShadow.size*newShadow.num);
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::Br:
+ {
+ checkAllOperandsDefined(workItem, instruction);
+#ifdef DUMP_SHADOW
+ // Insert pseudo value to keep numbering
+ shadowValues->setValue(instruction, ShadowContext::getCleanValue(3));
+#endif
+ break;
+ }
+ case llvm::Instruction::Call:
+ {
+ const llvm::CallInst *callInst = ((const llvm::CallInst*)instruction);
+ const llvm::Function *function = callInst->getCalledFunction();
+
+ // Check for indirect function calls
+ if (!function)
+ {
+ // Resolve indirect function pointer
+ const llvm::Value *func = callInst->getCalledValue();
+ const llvm::Value *funcPtr = ((const llvm::User*)func)->getOperand(0);
+ function = (const llvm::Function*)funcPtr;
+ }
+
+ // For inline asm, do the usual thing: check argument shadow and mark all
+ // outputs as clean. Note that any side effects of the inline asm that are
+ // not immediately visible in its constraints are not handled.
+ if (callInst->isInlineAsm())
+ {
+ checkAllOperandsDefined(workItem, instruction);
+ shadowValues->setValue(instruction, ShadowContext::getCleanValue(instruction));
+ break;
+ }
+
+ if(const llvm::IntrinsicInst *II = llvm::dyn_cast<const llvm::IntrinsicInst>(instruction))
+ {
+ handleIntrinsicInstruction(workItem, II);
+ break;
+ }
+
+ if(function->isDeclaration())
+ {
+ if(!handleBuiltinFunction(workItem, function->getName().str(), callInst, result))
+ {
+ // Handle external function calls
+ checkAllOperandsDefined(workItem, instruction);
+
+ if(callInst->getType()->isSized())
+ {
+ // Set return value only if function is non-void
+ shadowValues->setValue(instruction, ShadowContext::getCleanValue(instruction));
+ }
+ }
+ break;
+ }
+
+ assert(!function->isVarArg() && "Variadic functions are not supported!");
+
+ assert(!llvm::isa<const llvm::IntrinsicInst>(instruction) && "intrinsics are handled elsewhere");
+
+ // Fresh values for function
+ ShadowFrame *values = shadowValues->createCleanShadowFrame();
+
+ llvm::Function::const_arg_iterator argItr;
+ for (argItr = function->arg_begin(); argItr != function->arg_end(); argItr++)
+ {
+ const llvm::Value *Val = callInst->getArgOperand(argItr->getArgNo());
+
+ if (!Val->getType()->isSized())
+ {
+ continue;
+ }
+
+ if(argItr->hasByValAttr())
+ {
+ assert(Val->getType()->isPointerTy() && "ByVal argument is not a pointer!");
+ // Make new copy of shadow in private memory
+ size_t origShadowAddress = workItem->getOperand(Val).getPointer();
+ size_t newShadowAddress = workItem->getOperand(&*argItr).getPointer();
+ ShadowMemory *mem = shadowWorkItem->getPrivateMemory();
+ unsigned char *origShadowData = (unsigned char*)mem->getPointer(origShadowAddress);
+ size_t size = getTypeSize(argItr->getType()->getPointerElementType());
+
+ // Set new shadow memory
+ TypedValue v = ShadowContext::getCleanValue(size);
+ memcpy(v.data, origShadowData, size);
+ allocAndStoreShadowMemory(AddrSpacePrivate, newShadowAddress, v, workItem);
+ values->setValue(&*argItr, ShadowContext::getCleanValue(&*argItr));
+ }
+ else
+ {
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(shadowContext.getValue(workItem, Val));
+ values->setValue(&*argItr, newShadow);
+ }
+ }
+
+ // Now, get the shadow for the RetVal.
+ if(callInst->getType()->isSized())
+ {
+ values->setCall(callInst);
+ }
+
+ shadowValues->pushFrame(values);
+
+ break;
+ }
+ case llvm::Instruction::ExtractElement:
+ {
+ const llvm::ExtractElementInst *extractInst = ((const llvm::ExtractElementInst*)instruction);
+
+ TypedValue indexShadow = shadowContext.getValue(workItem, extractInst->getIndexOperand());
+
+ if(!ShadowContext::isCleanValue(indexShadow))
+ {
+ logUninitializedIndex();
+ }
+
+ TypedValue vectorShadow = shadowContext.getValue(workItem, extractInst->getVectorOperand());
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ unsigned index = workItem->getOperand(extractInst->getIndexOperand()).getUInt();
+ memcpy(newShadow.data, vectorShadow.data + newShadow.size*index, newShadow.size);
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::ExtractValue:
+ {
+ const llvm::ExtractValueInst *extractInst = ((const llvm::ExtractValueInst*)instruction);
+
+ const llvm::Value *Agg = extractInst->getAggregateOperand();
+ TypedValue ResShadow = shadowContext.getMemoryPool()->clone(result);
+
+ llvm::ArrayRef<unsigned int> indices = extractInst->getIndices();
+
+ // Compute offset for target value
+ int offset = 0;
+ const llvm::Type *type = Agg->getType();
+ for (unsigned i = 0; i < indices.size(); i++)
+ {
+ if (type->isArrayTy())
+ {
+ type = type->getArrayElementType();
+ offset += getTypeSize(type) * indices[i];
+ }
+ else if (type->isStructTy())
+ {
+ offset += getStructMemberOffset((const llvm::StructType*)type, indices[i]);
+ type = type->getStructElementType(indices[i]);
+ }
+ else
+ {
+ FATAL_ERROR("Unsupported aggregate type: %d", type->getTypeID())
+ }
+ }
+
+ // Copy target value to result
+ memcpy(ResShadow.data, shadowContext.getValue(workItem, Agg).data + offset, getTypeSize(type));
+
+ shadowValues->setValue(instruction, ResShadow);
+ break;
+ }
+ case llvm::Instruction::FAdd:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FCmp:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FDiv:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FMul:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FPExt:
+ {
+ SimpleOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FPToSI:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FPToUI:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FPTrunc:
+ {
+ SimpleOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FRem:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::FSub:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::GetElementPtr:
+ {
+ SimpleOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::ICmp:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::InsertElement:
+ {
+ TypedValue indexShadow = shadowContext.getValue(workItem, instruction->getOperand(2));
+
+ if(!ShadowContext::isCleanValue(indexShadow))
+ {
+ logUninitializedIndex();
+ }
+
+ TypedValue vectorShadow = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue elementShadow = shadowContext.getValue(workItem, instruction->getOperand(1));
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ unsigned index = workItem->getOperand(instruction->getOperand(2)).getUInt();
+ memcpy(newShadow.data, vectorShadow.data, newShadow.size*newShadow.num);
+ memcpy(newShadow.data + index*newShadow.size, elementShadow.data, newShadow.size);
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::InsertValue:
+ {
+ const llvm::InsertValueInst *insertInst = (const llvm::InsertValueInst*)instruction;
+
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ // Load original aggregate data
+ const llvm::Value *agg = insertInst->getAggregateOperand();
+ memcpy(newShadow.data, shadowContext.getValue(workItem, agg).data, newShadow.size*newShadow.num);
+
+ // Compute offset for inserted value
+ int offset = 0;
+ llvm::ArrayRef<unsigned int> indices = insertInst->getIndices();
+ const llvm::Type *type = agg->getType();
+ for (unsigned i = 0; i < indices.size(); i++)
+ {
+ if (type->isArrayTy())
+ {
+ type = type->getArrayElementType();
+ offset += getTypeSize(type) * indices[i];
+ }
+ else if (type->isStructTy())
+ {
+ offset += getStructMemberOffset((const llvm::StructType*)type, indices[i]);
+ type = type->getStructElementType(indices[i]);
+ }
+ else
+ {
+ FATAL_ERROR("Unsupported aggregate type: %d", type->getTypeID())
+ }
+ }
+
+ // Copy inserted value into result
+ const llvm::Value *value = insertInst->getInsertedValueOperand();
+ memcpy(newShadow.data + offset, shadowContext.getValue(workItem, value).data, getTypeSize(value->getType()));
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::IntToPtr:
+ {
+ TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for (unsigned i = 0; i < newShadow.num; i++)
+ {
+ newShadow.setPointer(shadow.getUInt(i), i);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::Load:
+ {
+ assert(instruction->getType()->isSized() && "Load type must have size");
+ const llvm::LoadInst *loadInst = ((const llvm::LoadInst*)instruction);
+ const llvm::Value *Addr = loadInst->getPointerOperand();
+
+ size_t address = workItem->getOperand(Addr).getPointer();
+ unsigned addrSpace = loadInst->getPointerAddressSpace();
+
+ TypedValue v = shadowContext.getMemoryPool()->clone(result);
+ loadShadowMemory(addrSpace, address, v, workItem);
+ shadowValues->setValue(instruction, v);
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(addrSpace, address, false);
+ }
+
+// if (I.isAtomic())
+// I.setOrdering(addAcquireOrdering(I.getOrdering()));
+
+ break;
+ }
+ case llvm::Instruction::LShr:
+ {
+ TypedValue S0 = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue S1 = shadowContext.getValue(workItem, instruction->getOperand(1));
+
+ if(!ShadowContext::isCleanValue(S1))
+ {
+ shadowValues->setValue(instruction, ShadowContext::getPoisonedValue(instruction));
+ }
+ else
+ {
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+ TypedValue Shift = workItem->getOperand(instruction->getOperand(1));
+ uint64_t shiftMask = (S0.num > 1 ? S0.size : max((size_t)S0.size, sizeof(uint32_t))) * 8 - 1;
+
+ for (unsigned i = 0; i < S0.num; i++)
+ {
+ newShadow.setUInt(S0.getUInt(i) >> (Shift.getUInt(i) & shiftMask), i);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ }
+
+ break;
+ }
+ case llvm::Instruction::Mul:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::Or:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::PHI:
+ {
+ const llvm::PHINode *phiNode = (const llvm::PHINode*)instruction;
+ const llvm::Value *value = phiNode->getIncomingValueForBlock(workItem->getPreviousBlock());
+ TypedValue shadowValue = shadowContext.getValue(workItem, value);
+
+ shadowValues->setValue(instruction, shadowValue);
+ break;
+ }
+ case llvm::Instruction::PtrToInt:
+ {
+ TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for (unsigned i = 0; i < newShadow.num; i++)
+ {
+ newShadow.setUInt(shadow.getPointer(i), i);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::Ret:
+ {
+ const llvm::ReturnInst *retInst = ((const llvm::ReturnInst*)instruction);
+ const llvm::Value *RetVal = retInst->getReturnValue();
+
+ if(RetVal)
+ {
+ //Value *ShadowPtr = getValuePtrForRetval(RetVal, IRB);
+ //if (CheckReturnValue) {
+ // insertShadowCheck(RetVal, &I);
+ // Value *Shadow = getCleanValue(RetVal);
+ // IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ //} else {
+ TypedValue retValShadow = shadowContext.getMemoryPool()->clone(shadowContext.getValue(workItem, RetVal));
+ const llvm::CallInst *callInst = shadowValues->getCall();
+ shadowValues->popFrame();
+ shadowValues->setValue(callInst, retValShadow);
+ //}
+ }
+ else
+ {
+#ifdef DUMP_SHADOW
+ // Insert pseudo value to keep numbering
+ shadowValues->setValue(instruction, ShadowContext::getCleanValue(3));
+#endif
+ shadowValues->popFrame();
+ }
+
+ break;
+ }
+ case llvm::Instruction::SDiv:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::Select:
+ {
+ const llvm::SelectInst *selectInst = (const llvm::SelectInst*)instruction;
+
+ TypedValue opCondition = workItem->getOperand(selectInst->getCondition());
+ TypedValue conditionShadow = shadowContext.getValue(workItem, selectInst->getCondition());
+ TypedValue newShadow;
+
+ if(!ShadowContext::isCleanValue(conditionShadow))
+ {
+ newShadow = ShadowContext::getPoisonedValue(instruction);
+ }
+ else
+ {
+ newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for(unsigned i = 0; i < result.num; i++)
+ {
+ const bool cond = selectInst->getCondition()->getType()->isVectorTy() ?
+ opCondition.getUInt(i) :
+ opCondition.getUInt();
+ const llvm::Value *op = cond ?
+ selectInst->getTrueValue() :
+ selectInst->getFalseValue();
+
+ memcpy(newShadow.data + i*newShadow.size,
+ shadowContext.getValue(workItem, op).data + i*newShadow.size,
+ newShadow.size);
+ }
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::SExt:
+ {
+ const llvm::Value *operand = instruction->getOperand(0);
+ TypedValue shadow = shadowContext.getValue(workItem, operand);
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for (unsigned i = 0; i < newShadow.num; i++)
+ {
+ int64_t val = shadow.getSInt(i);
+ if (operand->getType()->getPrimitiveSizeInBits() == 1)
+ {
+ val = val ? -1 : 0;
+ }
+ newShadow.setSInt(val, i);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+
+ break;
+ }
+ case llvm::Instruction::Shl:
+ {
+ TypedValue S0 = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue S1 = shadowContext.getValue(workItem, instruction->getOperand(1));
+
+ if(!ShadowContext::isCleanValue(S1))
+ {
+ shadowValues->setValue(instruction, ShadowContext::getPoisonedValue(instruction));
+ }
+ else
+ {
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+ TypedValue Shift = workItem->getOperand(instruction->getOperand(1));
+ uint64_t shiftMask = (S0.num > 1 ? S0.size : max((size_t)S0.size, sizeof(uint32_t))) * 8 - 1;
+
+ for (unsigned i = 0; i < S0.num; i++)
+ {
+ newShadow.setUInt(S0.getUInt(i) << (Shift.getUInt(i) & shiftMask), i);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ }
+
+ break;
+ }
+ case llvm::Instruction::ShuffleVector:
+ {
+ const llvm::ShuffleVectorInst *shuffleInst = (const llvm::ShuffleVectorInst*)instruction;
+ const llvm::Value *v1 = shuffleInst->getOperand(0);
+ const llvm::Value *v2 = shuffleInst->getOperand(1);
+ TypedValue mask = workItem->getOperand(shuffleInst->getMask());
+ TypedValue maskShadow = shadowContext.getValue(workItem, shuffleInst->getMask());
+ TypedValue newShadow = ShadowContext::getCleanValue(result);
+ TypedValue pv = ShadowContext::getPoisonedValue(newShadow.size);
+
+ unsigned num = v1->getType()->getVectorNumElements();
+ for(unsigned i = 0; i < newShadow.num; i++)
+ {
+ if(shuffleInst->getMask()->getAggregateElement(i)->getValueID() == llvm::Value::UndefValueVal || !ShadowContext::isCleanValue(maskShadow, i))
+ {
+ // Undef value are poisoned
+ memcpy(newShadow.data + i*newShadow.size, pv.data, newShadow.size);
+ continue;
+ }
+
+ const llvm::Value *src = v1;
+ unsigned int index = mask.getUInt(i);
+ if(index >= num)
+ {
+ index -= num;
+ src = v2;
+ }
+
+ TypedValue v = shadowContext.getValue(workItem, src);
+ size_t srcOffset = index*newShadow.size;
+ memcpy(newShadow.data + i*newShadow.size, v.data + srcOffset, newShadow.size);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::SIToFP:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::SRem:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::Store:
+ {
+ PARANOID_CHECK(workItem, instruction);
+ const llvm::StoreInst *storeInst = ((const llvm::StoreInst*)instruction);
+ const llvm::Value *Val = storeInst->getValueOperand();
+ const llvm::Value *Addr = storeInst->getPointerOperand();
+
+ size_t address = workItem->getOperand(Addr).getPointer();
+ unsigned addrSpace = storeInst->getPointerAddressSpace();
+
+ TypedValue shadowVal = storeInst->isAtomic() ? ShadowContext::getCleanValue(Val) :
+ shadowContext.getValue(workItem, Val);
+ storeShadowMemory(addrSpace, address, shadowVal, workItem);
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(addrSpace, address);
+ }
+ break;
+ }
+ case llvm::Instruction::Sub:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::Switch:
+ {
+ checkAllOperandsDefined(workItem, instruction);
+#ifdef DUMP_SHADOW
+ // Insert pseudo value to keep numbering
+ shadowValues->setValue(instruction, ShadowContext::getCleanValue(3));
+#endif
+ break;
+ }
+ case llvm::Instruction::Trunc:
+ {
+ TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for (unsigned i = 0; i < newShadow.num; i++)
+ {
+ memcpy(newShadow.data+i*newShadow.size, shadow.data+i*shadow.size, newShadow.size);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ case llvm::Instruction::UDiv:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::UIToFP:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::URem:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::Unreachable:
+ FATAL_ERROR("Encountered unreachable instruction");
+ case llvm::Instruction::Xor:
+ {
+ VectorOr(workItem, instruction);
+ break;
+ }
+ case llvm::Instruction::ZExt:
+ {
+ TypedValue shadow = shadowContext.getValue(workItem, instruction->getOperand(0));
+ TypedValue newShadow = shadowContext.getMemoryPool()->clone(result);
+
+ for (unsigned i = 0; i < newShadow.num; i++)
+ {
+ newShadow.setUInt(shadow.getUInt(i), i);
+ }
+
+ shadowValues->setValue(instruction, newShadow);
+ break;
+ }
+ default:
+ FATAL_ERROR("Unsupported instruction: %s", instruction->getOpcodeName());
+ }
+
+#ifdef DUMP_SHADOW
+ if(shadowContext.hasValue(workItem, instruction))
+ {
+ cout << shadowContext.getValue(workItem, instruction) << endl;
+ }
+#endif
+}
+
+void Uninitialized::kernelBegin(const KernelInvocation *kernelInvocation)
+{
+ const Kernel *kernel = kernelInvocation->getKernel();
+
+ // Initialise kernel arguments and global variables
+ for (auto value = kernel->values_begin(); value != kernel->values_end(); value++)
+ {
+ const llvm::Type *type = value->first->getType();
+
+ if(!type->isSized())
+ {
+ continue;
+ }
+
+ if(type->isPointerTy())
+ {
+ switch(type->getPointerAddressSpace())
+ {
+ case AddrSpaceConstant:
+ {
+ // Constants
+ // value->second.data == ptr
+ // value->second.size == ptr size
+ TypedValue cleanValue = m_pool.clone(ShadowContext::getCleanValue(value->first));
+ shadowContext.setGlobalValue(value->first, cleanValue);
+ const llvm::Type *elementTy = type->getPointerElementType();
+ allocAndStoreShadowMemory(AddrSpaceConstant, value->second.getPointer(),
+ ShadowContext::getCleanValue(elementTy));
+ break;
+ }
+ case AddrSpaceGlobal:
+ {
+ // Global pointer kernel arguments
+ // value->second.data == ptr
+ // value->second.size == ptr size
+ size_t address = value->second.getPointer();
+
+ if(m_context->getGlobalMemory()->isAddressValid(address) &&
+ !shadowContext.getGlobalMemory()->isAddressValid(address))
+ {
+ // Allocate poisoned global memory if there was no host store
+ size_t size = m_context->getGlobalMemory()->getBuffer(address)->size;
+ allocAndStoreShadowMemory(AddrSpaceGlobal, address,
+ ShadowContext::getPoisonedValue(size), NULL, NULL, true);
+ }
+
+ m_deferredInit.push_back(*value);
+ break;
+ }
+ case AddrSpaceLocal:
+ {
+ // Local pointer kernel arguments and local data variables
+ // value->second.data == NULL
+ // value->second.size == val size
+ if(llvm::isa<llvm::Argument>(value->first))
+ {
+ // Arguments have a private pointer
+ m_deferredInit.push_back(*value);
+ }
+ else
+ {
+ // Variables have a global pointer
+ TypedValue cleanValue = m_pool.clone(ShadowContext::getCleanValue(value->first));
+ shadowContext.setGlobalValue(value->first, cleanValue);
+ }
+
+ m_deferredInitGroup.push_back(*value);
+ break;
+ }
+ case AddrSpacePrivate:
+ {
+ const llvm::Argument *A = llvm::dyn_cast<llvm::Argument>(value->first);
+
+ if(A && A->hasByValAttr())
+ {
+ // ByVal kernel argument
+ // value->second.data == val
+ // value->second.size == val size
+ m_deferredInit.push_back(*value);
+ }
+ else
+ {
+ // Private struct/Union definitions with global type
+ // value->second.data == val
+ // value->second.size == val size
+ m_deferredInit.push_back(*value);
+ TypedValue cleanValue = m_pool.clone(ShadowContext::getCleanValue(value->first));
+ //TODO: Structs can have posioned padding bytes. Is this important?
+ shadowContext.setGlobalValue(value->first, cleanValue);
+ }
+ break;
+ }
+ default:
+ FATAL_ERROR("Unsupported addressspace %d", type->getPointerAddressSpace());
+ }
+ }
+ else
+ {
+ // Non pointer type kernel arguments
+ // value->second.data == val
+ // value->second.size == val size
+ m_deferredInit.push_back(*value);
+ }
+ }
+}
+
+void Uninitialized::kernelEnd(const KernelInvocation *kernelInvocation)
+{
+ m_deferredInit.clear();
+ m_deferredInitGroup.clear();
+ shadowContext.clearGlobalValues();
+}
+
+void Uninitialized::loadShadowMemory(unsigned addrSpace, size_t address, TypedValue &SM, const WorkItem *workItem, const WorkGroup *workGroup)
+{
+ if(addrSpace == AddrSpaceConstant)
+ {
+ //TODO: Eventually load value
+ memset(SM.data, 0, SM.size*SM.num);
+ return;
+ }
+
+ ShadowMemory *memory = getShadowMemory(addrSpace, workItem, workGroup);
+ memory->load(SM.data, address, SM.size*SM.num);
+
+#ifdef DUMP_SHADOW
+ cout << "Loaded " << hex << SM << " from space " << dec << addrSpace << " at address " << hex << address << endl;
+#endif
+}
+
+void Uninitialized::logUninitializedAddress(unsigned int addrSpace, size_t address, bool write) const
+{
+ Context::Message msg(WARNING, m_context);
+ msg << "Uninitialized address used to " << (write ? "write to " : "read from ")
+ << getAddressSpaceName(addrSpace)
+ << " memory address 0x" << hex << address << endl
+ << msg.INDENT
+ << "Kernel: " << msg.CURRENT_KERNEL << endl
+ << "Entity: " << msg.CURRENT_ENTITY << endl
+ << msg.CURRENT_LOCATION << endl;
+ msg.send();
+}
+
+void Uninitialized::logUninitializedCF() const
+{
+ Context::Message msg(WARNING, m_context);
+ msg << "Controlflow depends on uninitialized value" << endl
+ << msg.INDENT
+ << "Kernel: " << msg.CURRENT_KERNEL << endl
+ << "Entity: " << msg.CURRENT_ENTITY << endl
+ << msg.CURRENT_LOCATION << endl;
+ msg.send();
+}
+
+void Uninitialized::logUninitializedIndex() const
+{
+ Context::Message msg(WARNING, m_context);
+ msg << "Instruction depends on an uninitialized index value" << endl
+ << msg.INDENT
+ << "Kernel: " << msg.CURRENT_KERNEL << endl
+ << "Entity: " << msg.CURRENT_ENTITY << endl
+ << msg.CURRENT_LOCATION << endl;
+ msg.send();
+}
+
+void Uninitialized::logUninitializedWrite(unsigned int addrSpace, size_t address) const
+{
+ Context::Message msg(WARNING, m_context);
+ msg << "Uninitialized value written to "
+ << getAddressSpaceName(addrSpace)
+ << " memory address 0x" << hex << address << endl
+ << msg.INDENT
+ << "Kernel: " << msg.CURRENT_KERNEL << endl
+ << "Entity: " << msg.CURRENT_ENTITY << endl
+ << msg.CURRENT_LOCATION << endl;
+ msg.send();
+}
+
+void Uninitialized::memoryMap(const Memory *memory, size_t address,
+ size_t offset, size_t size, cl_map_flags flags)
+{
+ if(!(flags & CL_MAP_READ))
+ {
+ allocAndStoreShadowMemory(memory->getAddressSpace(), address + offset,
+ ShadowContext::getCleanValue(size));
+ }
+}
+
+void Uninitialized::VectorOr(const WorkItem *workItem, const llvm::Instruction *I)
+{
+ PARANOID_CHECK(workItem, I);
+ ShadowValues *shadowValues = shadowContext.getShadowWorkItem(workItem)->getValues();
+
+ TypedValue newShadow = ShadowContext::getCleanValue(I);
+
+ for(llvm::Instruction::const_op_iterator OI = I->op_begin(); OI != I->op_end(); ++OI)
+ {
+ ShadowContext::shadowOr(newShadow, shadowContext.getValue(workItem, OI->get()));
+ }
+
+ shadowValues->setValue(I, newShadow);
+}
+
+void Uninitialized::SimpleOr(const WorkItem *workItem, const llvm::Instruction *I)
+{
+ PARANOID_CHECK(workItem, I);
+ ShadowValues *shadowValues = shadowContext.getShadowWorkItem(workItem)->getValues();
+
+ for(llvm::Instruction::const_op_iterator OI = I->op_begin(); OI != I->op_end(); ++OI)
+ {
+ if(!ShadowContext::isCleanValue(shadowContext.getValue(workItem, OI->get())))
+ {
+ shadowValues->setValue(I, ShadowContext::getPoisonedValue(I));
+ return;
+ }
+ }
+
+ shadowValues->setValue(I, ShadowContext::getCleanValue(I));
+}
+
+void Uninitialized::SimpleOrAtomic(const WorkItem *workItem, const llvm::CallInst *CI)
+{
+ const llvm::Value *Addr = CI->getArgOperand(0);
+ unsigned addrSpace = Addr->getType()->getPointerAddressSpace();
+ size_t address = workItem->getOperand(Addr).getPointer();
+
+ TypedValue oldShadow = {
+ 4,
+ 1,
+ shadowContext.getMemoryPool()->alloc(4)
+ };
+
+ TypedValue newShadow = ShadowContext::getCleanValue(4);
+
+ if(addrSpace == AddrSpaceGlobal)
+ {
+ shadowContext.getGlobalMemory()->lock(address);
+ }
+
+ loadShadowMemory(addrSpace, address, oldShadow, workItem);
+
+ if (!ShadowContext::isCleanValue(oldShadow))
+ {
+ newShadow = ShadowContext::getPoisonedValue(4);
+ }
+
+ if (CI->getNumArgOperands() > 1)
+ {
+ TypedValue argShadow = shadowContext.getValue(workItem, CI->getArgOperand(1));
+ if(!ShadowContext::isCleanValue(argShadow))
+ {
+ newShadow = ShadowContext::getPoisonedValue(4);
+ }
+ }
+
+ storeShadowMemory(addrSpace, address, newShadow, workItem);
+
+ if(addrSpace == AddrSpaceGlobal)
+ {
+ shadowContext.getGlobalMemory()->unlock(address);
+ }
+
+ ShadowValues *shadowValues = shadowContext.getShadowWorkItem(workItem)->getValues();
+ shadowValues->setValue(CI, oldShadow);
+
+ // Check shadow of address
+ TypedValue addrShadow = shadowContext.getValue(workItem, Addr);
+
+ if(!ShadowContext::isCleanValue(addrShadow))
+ {
+ logUninitializedAddress(addrSpace, address);
+ }
+}
+
+void Uninitialized::storeShadowMemory(unsigned addrSpace, size_t address, TypedValue SM, const WorkItem *workItem, const WorkGroup *workGroup, bool unchecked)
+{
+#ifdef DUMP_SHADOW
+ cout << "Store " << hex << SM << " to space " << dec << addrSpace << " at address " << hex << address << endl;
+#endif
+
+ if(!unchecked && addrSpace != AddrSpacePrivate && !ShadowContext::isCleanValue(SM))
+ {
+#ifdef DUMP_SHADOW
+ shadowContext.dump(workItem);
+#endif
+ logUninitializedWrite(addrSpace, address);
+ }
+
+ if(addrSpace == AddrSpaceConstant)
+ {
+ //TODO: Eventually store value
+ return;
+ }
+
+ ShadowMemory *memory = getShadowMemory(addrSpace, workItem, workGroup);
+ memory->store(SM.data, address, SM.size*SM.num);
+}
+
+void Uninitialized::workItemBegin(const WorkItem *workItem)
+{
+ shadowContext.createMemoryPool();
+ shadowContext.allocateWorkItems();
+ ShadowWorkItem *shadowWI = shadowContext.createShadowWorkItem(workItem);
+ ShadowValues *shadowValues = shadowWI->getValues();
+
+ for(auto value : m_deferredInit)
+ {
+ const llvm::Type *type = value.first->getType();
+
+ if(type->isPointerTy())
+ {
+ switch(type->getPointerAddressSpace())
+ {
+ case AddrSpaceGlobal:
+ {
+ // Global pointer kernel arguments
+ // value.second.data == ptr
+ // value.second.size == ptr size
+ shadowValues->setValue(value.first, ShadowContext::getCleanValue(type));
+ break;
+ }
+ case AddrSpaceLocal:
+ {
+ // Local pointer kernel arguments
+ // value.second.data == NULL
+ // value.second.size == val size
+ shadowValues->setValue(value.first, ShadowContext::getCleanValue(value.first));
+ break;
+ }
+ case AddrSpacePrivate:
+ {
+ const llvm::Argument *A = llvm::dyn_cast<llvm::Argument>(value.first);
+
+ if(A && A->hasByValAttr())
+ {
+ // ByVal kernel argument
+ // value.second.data == val
+ // value.second.size == val size
+ size_t address = workItem->getOperand(value.first).getPointer();
+ TypedValue cleanValue = ShadowContext::getCleanValue(value.second.size);
+ allocAndStoreShadowMemory(AddrSpacePrivate, address, cleanValue, workItem);
+ shadowValues->setValue(value.first, ShadowContext::getCleanValue(value.first));
+ }
+ else
+ {
+ // Private struct/Union definitions with global type
+ // value.second.data == NULL
+ // value.second.size == val size
+ size_t address = workItem->getOperand(value.first).getPointer();
+ TypedValue cleanValue = ShadowContext::getCleanValue(value.second.size);
+ allocAndStoreShadowMemory(AddrSpacePrivate, address, cleanValue, workItem);
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ // Non pointer type kernel arguments
+ // value->second.data == val
+ // value->second.size == val size
+ shadowValues->setValue(value.first, ShadowContext::getCleanValue(value.first));
+ }
+ }
+}
+
+void Uninitialized::workItemComplete(const WorkItem *workItem)
+{
+ shadowContext.destroyShadowWorkItem(workItem);
+ shadowContext.freeWorkItems();
+ shadowContext.destroyMemoryPool();
+}
+
+void Uninitialized::workGroupBegin(const WorkGroup *workGroup)
+{
+ shadowContext.createMemoryPool();
+ shadowContext.allocateWorkGroups();
+ shadowContext.createShadowWorkGroup(workGroup);
+
+ for(auto value : m_deferredInitGroup)
+ {
+ // Local data variables
+ // value->second.data == NULL
+ // value->second.size == val size
+ size_t address = workGroup->getLocalMemoryAddress(value.first);
+ TypedValue v;
+
+ if(llvm::isa<llvm::Argument>(value.first))
+ {
+ //TODO: Local memory clean or poisoned? May need to differentiate
+ // between kernel argument (?) and variable (poisoned)
+ v = ShadowContext::getPoisonedValue(value.second.size);
+ }
+ else
+ {
+ v = ShadowContext::getPoisonedValue(value.second.size);
+ }
+
+ allocAndStoreShadowMemory(AddrSpaceLocal, address, v, NULL, workGroup, true);
+ }
+}
+
+void Uninitialized::workGroupComplete(const WorkGroup *workGroup)
+{
+ shadowContext.destroyShadowWorkGroup(workGroup);
+ shadowContext.freeWorkGroups();
+ shadowContext.destroyMemoryPool();
+}
+
+ShadowFrame::ShadowFrame() :
+ m_values(new UnorderedTypedValueMap())
+{
+#ifdef DUMP_SHADOW
+ m_valuesList = new ValuesList();
+#endif
+}
+
+ShadowFrame::~ShadowFrame()
+{
+ delete m_values;
+#ifdef DUMP_SHADOW
+ delete m_valuesList;
+#endif
+}
+
+void ShadowFrame::dump() const
+{
+ cout << "==== ShadowMap (private) =======" << endl;
+
+#ifdef DUMP_SHADOW
+ ValuesList::const_iterator itr;
+ unsigned num = 1;
+
+ for(itr = m_valuesList->begin(); itr != m_valuesList->end(); ++itr)
+ {
+ if((*itr)->hasName())
+ {
+ cout << "%" << (*itr)->getName().str() << ": " << m_values->at(*itr) << endl;
+ }
+ else
+ {
+ cout << "%" << dec << num++ << ": " << m_values->at(*itr) << endl;
+ }
+ }
+#else
+ cout << endl << "Dump not activated!" << endl;
+#endif
+
+ cout << "=======================" << endl;
+}
+
+TypedValue ShadowFrame::getValue(const llvm::Value *V) const
+{
+ if (llvm::isa<llvm::Instruction>(V)) {
+ // For instructions the shadow is already stored in the map.
+ assert(m_values->count(V) && "No shadow for instruction value");
+ return m_values->at(V);
+ }
+ else if (llvm::isa<llvm::UndefValue>(V)) {
+ return ShadowContext::getPoisonedValue(V);
+ }
+ else if (llvm::isa<llvm::Argument>(V)) {
+ // For arguments the shadow is already stored in the map.
+ assert(m_values->count(V) && "No shadow for argument value");
+ return m_values->at(V);
+ }
+ else if(const llvm::ConstantVector *VC = llvm::dyn_cast<llvm::ConstantVector>(V))
+ {
+ TypedValue vecShadow = ShadowContext::getCleanValue(V);
+ TypedValue elemShadow;
+
+ for(unsigned i = 0; i < vecShadow.num; ++i)
+ {
+ elemShadow = getValue(VC->getAggregateElement(i));
+ size_t offset = i*vecShadow.size;
+ memcpy(vecShadow.data + offset, elemShadow.data, vecShadow.size);
+ }
+
+ return vecShadow;
+ }
+ else
+ {
+ // For everything else the shadow is zero.
+ return ShadowContext::getCleanValue(V);
+ }
+}
+
+void ShadowFrame::setValue(const llvm::Value *V, TypedValue SV)
+{
+#ifdef DUMP_SHADOW
+ if(!m_values->count(V))
+ {
+ m_valuesList->push_back(V);
+ }
+ else
+ {
+ cout << "Shadow for value " << V->getName().str() << " reset!" << endl;
+ }
+#endif
+ (*m_values)[V] = SV;
+}
+
+ShadowValues::ShadowValues() :
+ m_stack(new ShadowValuesStack())
+{
+ pushFrame(createCleanShadowFrame());
+}
+
+ShadowValues::~ShadowValues()
+{
+ while(!m_stack->empty())
+ {
+ popFrame();
+ }
+
+ delete m_stack;
+}
+
+ShadowFrame* ShadowValues::createCleanShadowFrame()
+{
+ return new ShadowFrame();
+}
+
+ShadowWorkItem::ShadowWorkItem(unsigned bufferBits) :
+ m_memory(new ShadowMemory(AddrSpacePrivate, bufferBits)), m_values(new ShadowValues())
+{
+}
+
+ShadowWorkItem::~ShadowWorkItem()
+{
+ delete m_memory;
+ delete m_values;
+}
+
+ShadowWorkGroup::ShadowWorkGroup(unsigned bufferBits) :
+ //FIXME: Hard coded values
+ m_memory(new ShadowMemory(AddrSpaceLocal, sizeof(size_t) == 8 ? 16 : 8))
+{
+}
+
+ShadowWorkGroup::~ShadowWorkGroup()
+{
+ delete m_memory;
+}
+
+ShadowMemory::ShadowMemory(AddressSpace addrSpace, unsigned bufferBits) :
+ m_addrSpace(addrSpace), m_map(), m_numBitsAddress((sizeof(size_t)<<3) - bufferBits), m_numBitsBuffer(bufferBits)
+{
+}
+
+ShadowMemory::~ShadowMemory()
+{
+ clear();
+}
+
+void ShadowMemory::allocate(size_t address, size_t size)
+{
+ size_t index = extractBuffer(address);
+
+ if(m_map.count(index))
+ {
+ deallocate(address);
+ }
+
+ Buffer *buffer = new Buffer();
+ buffer->size = size;
+ buffer->flags = 0;
+ buffer->data = new unsigned char[size];
+
+ m_map[index] = buffer;
+}
+
+void ShadowMemory::clear()
+{
+ MemoryMap::iterator mItr;
+ for(mItr = m_map.begin(); mItr != m_map.end(); ++mItr)
+ {
+ delete[] mItr->second->data;
+ delete mItr->second;
+ }
+}
+
+void ShadowMemory::deallocate(size_t address)
+{
+ size_t index = extractBuffer(address);
+
+ assert(m_map.count(index) && "Cannot deallocate non existing memory!");
+
+ delete[] m_map.at(index)->data;
+ delete m_map.at(index);
+ m_map.at(index) = NULL;
+}
+
+void ShadowMemory::dump() const
+{
+ cout << "====== ShadowMem (" << getAddressSpaceName(m_addrSpace) << ") ======";
+
+ for(unsigned b = 0, o = 1; b < m_map.size(); o++)
+ {
+ if(!m_map.count(b+o))
+ {
+ continue;
+ }
+
+ for(unsigned i = 0; i < m_map.at(b+o)->size; i++)
+ {
+ if (i%4 == 0)
+ {
+ cout << endl << hex << uppercase
+ << setw(16) << setfill(' ') << right
+ << ((((size_t)b+o)<<m_numBitsAddress) | i) << ":";
+ }
+ cout << " " << hex << uppercase << setw(2) << setfill('0')
+ << (int)m_map.at(b+o)->data[i];
+ }
+
+ ++b;
+ o = 0;
+ }
+ cout << endl;
+
+ cout << "=======================" << endl;
+}
+
+size_t ShadowMemory::extractBuffer(size_t address) const
+{
+ return (address >> m_numBitsAddress);
+}
+
+size_t ShadowMemory::extractOffset(size_t address) const
+{
+ return (address & (((size_t)-1) >> m_numBitsBuffer));
+}
+
+void* ShadowMemory::getPointer(size_t address) const
+{
+ size_t index = extractBuffer(address);
+ size_t offset= extractOffset(address);
+
+ assert(m_map.count(index) && "No shadow memory found!");
+
+ return m_map.at(index)->data + offset;
+}
+
+bool ShadowMemory::isAddressValid(size_t address, size_t size) const
+{
+ size_t index = extractBuffer(address);
+ size_t offset = extractOffset(address);
+ return m_map.count(index) && (offset + size <= m_map.at(index)->size);
+}
+
+void ShadowMemory::load(unsigned char *dst, size_t address, size_t size) const
+{
+ size_t index = extractBuffer(address);
+ size_t offset = extractOffset(address);
+
+ if(isAddressValid(address, size))
+ {
+ assert(m_map.count(index) && "No shadow memory found!");
+ memcpy(dst, m_map.at(index)->data + offset, size);
+ }
+ else
+ {
+ TypedValue v = ShadowContext::getPoisonedValue(size);
+ memcpy(dst, v.data, size);
+ }
+}
+
+void ShadowMemory::lock(size_t address) const
+{
+ size_t offset = extractOffset(address);
+ ATOMIC_MUTEX(offset).lock();
+}
+
+void ShadowMemory::store(const unsigned char *src, size_t address, size_t size)
+{
+ size_t index = extractBuffer(address);
+ size_t offset = extractOffset(address);
+
+ if(isAddressValid(address, size))
+ {
+ assert(m_map.count(index) && "Cannot store to unallocated memory!");
+ memcpy(m_map.at(index)->data + offset, src, size);
+ }
+}
+
+void ShadowMemory::unlock(size_t address) const
+{
+ size_t offset = extractOffset(address);
+ ATOMIC_MUTEX(offset).unlock();
+}
+
+ShadowContext::ShadowContext(unsigned bufferBits) :
+ m_globalMemory(new ShadowMemory(AddrSpaceGlobal, bufferBits)), m_globalValues(), m_numBitsBuffer(bufferBits)
+{
+}
+
+ShadowContext::~ShadowContext()
+{
+ delete m_globalMemory;
+}
+
+void ShadowContext::allocateWorkItems()
+{
+ if(!m_workSpace.workItems)
+ {
+ m_workSpace.workItems = new ShadowItemMap();
+ }
+}
+
+void ShadowContext::allocateWorkGroups()
+{
+ if(!m_workSpace.workGroups)
+ {
+ m_workSpace.workGroups = new ShadowGroupMap();
+ }
+}
+
+void ShadowContext::clearGlobalValues()
+{
+ m_globalValues.clear();
+}
+
+void ShadowContext::createMemoryPool()
+{
+ if(m_workSpace.poolUsers == 0)
+ {
+ m_workSpace.memoryPool = new MemoryPool();
+ }
+
+ ++m_workSpace.poolUsers;
+}
+
+ShadowWorkItem* ShadowContext::createShadowWorkItem(const WorkItem *workItem)
+{
+ assert(!m_workSpace.workItems->count(workItem) && "Workitems may only have one shadow");
+ ShadowWorkItem *sWI = new ShadowWorkItem(m_numBitsBuffer);
+ (*m_workSpace.workItems)[workItem] = sWI;
+ return sWI;
+}
+
+ShadowWorkGroup* ShadowContext::createShadowWorkGroup(const WorkGroup *workGroup)
+{
+ assert(!m_workSpace.workGroups->count(workGroup) && "Workgroups may only have one shadow");
+ ShadowWorkGroup *sWG = new ShadowWorkGroup(m_numBitsBuffer);
+ (*m_workSpace.workGroups)[workGroup] = sWG;
+ return sWG;
+}
+
+void ShadowContext::destroyMemoryPool()
+{
+ --m_workSpace.poolUsers;
+
+ if(m_workSpace.poolUsers == 0)
+ {
+ delete m_workSpace.memoryPool;
+ }
+}
+
+void ShadowContext::destroyShadowWorkItem(const WorkItem *workItem)
+{
+ assert(m_workSpace.workItems->count(workItem) && "No shadow for workitem found!");
+ delete (*m_workSpace.workItems)[workItem];
+ m_workSpace.workItems->erase(workItem);
+}
+
+void ShadowContext::destroyShadowWorkGroup(const WorkGroup *workGroup)
+{
+ assert(m_workSpace.workGroups->count(workGroup) && "No shadow for workgroup found!");
+ delete (*m_workSpace.workGroups)[workGroup];
+ m_workSpace.workGroups->erase(workGroup);
+}
+
+void ShadowContext::dump(const WorkItem *workItem) const
+{
+ dumpGlobalValues();
+ m_globalMemory->dump();
+ if(m_workSpace.workGroups && m_workSpace.workGroups->size())
+ {
+ m_workSpace.workGroups->begin()->second->dump();
+ }
+ if(m_workSpace.workItems && m_workSpace.workItems->size())
+ {
+ if(workItem)
+ {
+ cout << "Item " << workItem->getGlobalID() << endl;
+ getShadowWorkItem(workItem)->dump();
+ }
+ else
+ {
+ ShadowItemMap::const_iterator itr;
+ for(itr = m_workSpace.workItems->begin(); itr != m_workSpace.workItems->end(); ++itr)
+ {
+ cout << "Item " << itr->first->getGlobalID() << endl;
+ itr->second->dump();
+ }
+ }
+ }
+}
+
+void ShadowContext::dumpGlobalValues() const
+{
+ cout << "==== ShadowMap (global) =======" << endl;
+
+ UnorderedTypedValueMap::const_iterator itr;
+ unsigned num = 1;
+
+ for(itr = m_globalValues.begin(); itr != m_globalValues.end(); ++itr)
+ {
+ if(itr->first->hasName())
+ {
+ cout << "%" << itr->first->getName().str() << ": " << itr->second << endl;
+ }
+ else
+ {
+ cout << "%" << dec << num++ << ": " << itr->second << endl;
+ }
+ }
+
+ cout << "=======================" << endl;
+}
+
+void ShadowContext::freeWorkItems()
+{
+ if(m_workSpace.workItems && !m_workSpace.workItems->size())
+ {
+ delete m_workSpace.workItems;
+ m_workSpace.workItems = NULL;
+ }
+}
+
+void ShadowContext::freeWorkGroups()
+{
+ if(m_workSpace.workGroups && !m_workSpace.workGroups->size())
+ {
+ delete m_workSpace.workGroups;
+ m_workSpace.workGroups = NULL;
+ }
+}
+
+TypedValue ShadowContext::getCleanValue(unsigned size)
+{
+ TypedValue v = {
+ size,
+ 1,
+ m_workSpace.memoryPool->alloc(size)
+ };
+
+ memset(v.data, 0, size);
+
+ return v;
+}
+
+TypedValue ShadowContext::getCleanValue(TypedValue v)
+{
+ TypedValue c = {
+ v.size,
+ v.num,
+ m_workSpace.memoryPool->alloc(v.size*v.num)
+ };
+
+ memset(c.data, 0, v.size*v.num);
+
+ return c;
+}
+
+TypedValue ShadowContext::getCleanValue(const llvm::Value *V)
+{
+ pair<unsigned,unsigned> size = getValueSize(V);
+ TypedValue v = {
+ size.first,
+ size.second,
+ m_workSpace.memoryPool->alloc(size.first*size.second)
+ };
+
+ memset(v.data, 0, v.size*v.num);
+
+ return v;
+}
+
+TypedValue ShadowContext::getCleanValue(const llvm::Type *Ty)
+{
+ unsigned size = getTypeSize(Ty);
+ TypedValue v = {
+ size,
+ 1,
+ m_workSpace.memoryPool->alloc(size)
+ };
+
+ memset(v.data, 0, v.size);
+
+ return v;
+}
+
+TypedValue ShadowContext::getPoisonedValue(unsigned size)
+{
+ TypedValue v = {
+ size,
+ 1,
+ m_workSpace.memoryPool->alloc(size)
+ };
+
+ memset(v.data, -1, size);
+
+ return v;
+}
+
+TypedValue ShadowContext::getPoisonedValue(TypedValue v)
+{
+ TypedValue p = {
+ v.size,
+ v.num,
+ m_workSpace.memoryPool->alloc(v.size*v.num)
+ };
+
+ memset(p.data, -1, v.size*v.num);
+
+ return p;
+}
+
+TypedValue ShadowContext::getPoisonedValue(const llvm::Value *V)
+{
+ pair<unsigned,unsigned> size = getValueSize(V);
+ TypedValue v = {
+ size.first,
+ size.second,
+ m_workSpace.memoryPool->alloc(size.first*size.second)
+ };
+
+ memset(v.data, -1, v.size*v.num);
+
+ return v;
+}
+
+TypedValue ShadowContext::getPoisonedValue(const llvm::Type *Ty)
+{
+ unsigned size = getTypeSize(Ty);
+ TypedValue v = {
+ size,
+ 1,
+ m_workSpace.memoryPool->alloc(size)
+ };
+
+ memset(v.data, -1, v.size);
+
+ return v;
+}
+
+TypedValue ShadowContext::getValue(const WorkItem *workItem, const llvm::Value *V) const
+{
+ if(m_globalValues.count(V))
+ {
+ return m_globalValues.at(V);
+ }
+ else
+ {
+ ShadowValues *shadowValues = getShadowWorkItem(workItem)->getValues();
+ return shadowValues->getValue(V);
+ }
+}
+
+bool ShadowContext::isCleanImage(const TypedValue shadowImage)
+{
+ return (isCleanImageAddress(shadowImage) &&
+ isCleanImageDescription(shadowImage) &&
+ isCleanImageFormat(shadowImage));
+}
+
+bool ShadowContext::isCleanImageAddress(const TypedValue shadowImage)
+{
+ Image *image = (Image*)shadowImage.data;
+
+ return ShadowContext::isCleanValue(image->address);
+}
+
+bool ShadowContext::isCleanImageDescription(const TypedValue shadowImage)
+{
+ Image *image = (Image*)shadowImage.data;
+
+ //TODO: image->desc.buffer is currently not checked
+ return (ShadowContext::isCleanValue(image->desc.image_type) &&
+ ShadowContext::isCleanValue(image->desc.image_width) &&
+ ShadowContext::isCleanValue(image->desc.image_height) &&
+ ShadowContext::isCleanValue(image->desc.image_depth) &&
+ ShadowContext::isCleanValue(image->desc.image_array_size) &&
+ ShadowContext::isCleanValue(image->desc.image_row_pitch) &&
+ ShadowContext::isCleanValue(image->desc.image_slice_pitch) &&
+ ShadowContext::isCleanValue(image->desc.num_mip_levels) &&
+ ShadowContext::isCleanValue(image->desc.num_samples));
+}
+
+bool ShadowContext::isCleanImageFormat(const TypedValue shadowImage)
+{
+ Image *image = (Image*)shadowImage.data;
+
+ return (ShadowContext::isCleanValue(image->format.image_channel_order) &&
+ ShadowContext::isCleanValue(image->format.image_channel_data_type));
+}
+
+bool ShadowContext::isCleanStruct(ShadowMemory *shadowMemory, size_t address, const llvm::StructType *structTy)
+{
+ if(structTy->isPacked())
+ {
+ unsigned size = getTypeSize(structTy);
+ TypedValue v = {
+ size,
+ 1,
+ m_workSpace.memoryPool->alloc(size)
+ };
+
+ shadowMemory->load(v.data, address, size);
+
+ return isCleanValue(v);
+ }
+ else
+ {
+ for(unsigned i = 0; i < structTy->getStructNumElements(); ++i)
+ {
+ size_t offset = getStructMemberOffset(structTy, i);
+ unsigned size = getTypeSize(structTy->getElementType(i));
+
+ if(const llvm::StructType *elemTy = llvm::dyn_cast<llvm::StructType>(structTy->getElementType(i)))
+ {
+ if(!isCleanStruct(shadowMemory, address + offset, elemTy))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ TypedValue v = {
+ size,
+ 1,
+ m_workSpace.memoryPool->alloc(size)
+ };
+
+ shadowMemory->load(v.data, address + offset, size);
+
+ if(!isCleanValue(v))
+ {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+}
+
+bool ShadowContext::isCleanValue(unsigned long v)
+{
+ return v == 0UL;
+}
+
+bool ShadowContext::isCleanValue(TypedValue v)
+{
+ return (ShadowContext::getCleanValue(v) == v);
+}
+
+bool ShadowContext::isCleanValue(TypedValue v, unsigned offset)
+{
+ TypedValue c = ShadowContext::getCleanValue(v.size);
+ return !memcmp(v.data + offset*v.size, c.data, v.size);
+}
+
+void ShadowContext::setGlobalValue(const llvm::Value *V, TypedValue SV)
+{
+ assert(!m_globalValues.count(V) && "Values may only have one shadow");
+ m_globalValues[V] = SV;
+}
+
+void ShadowContext::shadowOr(TypedValue v1, TypedValue v2)
+{
+ assert(v1.num == v2.num && "Cannot create shadow for vectors of different lengths!");
+
+ for(unsigned int i = 0; i < v1.num; ++i)
+ {
+ if(!ShadowContext::isCleanValue(v2, i))
+ {
+ memset(v1.data + i * v1.size, 0xff, v1.size);
+ }
+ }
+}
diff --git a/src/plugins/Uninitialized.h b/src/plugins/Uninitialized.h
new file mode 100644
index 0000000..eca4957
--- /dev/null
+++ b/src/plugins/Uninitialized.h
@@ -0,0 +1,314 @@
+// Uninitialized.h (Oclgrind)
+// Copyright (c) 2015, Moritz Pflanzer
+// Imperial College London. All rights reserved.
+//
+// This program is provided under a three-clause BSD license. For full
+// license terms please see the LICENSE file distributed with this
+// source code.
+
+#include "core/Plugin.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+
+//#define DUMP_SHADOW
+//#define PARANOID_CHECK(W, I) assert(checkAllOperandsDefined(W, I) && "Not all operands defined")
+//#define PARANOID_CHECK(W, I) checkAllOperandsDefined(W, I)
+#define PARANOID_CHECK(W, I) (void*)0
+
+namespace oclgrind
+{
+ typedef std::unordered_map<const llvm::Value*, TypedValue> UnorderedTypedValueMap;
+
+ class ShadowFrame
+ {
+ public:
+ ShadowFrame();
+ virtual ~ShadowFrame();
+
+ void dump() const;
+ inline const llvm::CallInst* getCall() const
+ {
+ return m_call;
+ }
+ TypedValue getValue(const llvm::Value *V) const;
+ inline bool hasValue(const llvm::Value* V) const
+ {
+ return llvm::isa<llvm::Constant>(V) || m_values->count(V);
+ }
+ inline void setCall(const llvm::CallInst *CI)
+ {
+ m_call = CI;
+ }
+ void setValue(const llvm::Value *V, TypedValue SV);
+
+ private:
+ typedef std::list<const llvm::Value*> ValuesList;
+
+ const llvm::CallInst *m_call;
+ UnorderedTypedValueMap *m_values;
+#ifdef DUMP_SHADOW
+ ValuesList *m_valuesList;
+#endif
+ };
+
+ class ShadowValues
+ {
+ public:
+ ShadowValues();
+ virtual ~ShadowValues();
+
+ ShadowFrame* createCleanShadowFrame();
+ inline void dump() const
+ {
+ m_stack->top()->dump();
+ }
+ inline const llvm::CallInst* getCall() const
+ {
+ return m_stack->top()->getCall();
+ }
+ inline TypedValue getValue(const llvm::Value *V) const
+ {
+ return m_stack->top()->getValue(V);
+ }
+ inline bool hasValue(const llvm::Value* V) const
+ {
+ return llvm::isa<llvm::Constant>(V) || m_stack->top()->hasValue(V);
+ }
+ inline void popFrame()
+ {
+ ShadowFrame *frame = m_stack->top();
+ m_stack->pop();
+ delete frame;
+ }
+ inline void pushFrame(ShadowFrame *frame)
+ {
+ m_stack->push(frame);
+ }
+ inline void setCall(const llvm::CallInst *CI)
+ {
+ m_stack->top()->setCall(CI);
+ }
+ inline void setValue(const llvm::Value *V, TypedValue SV)
+ {
+ m_stack->top()->setValue(V, SV);
+ }
+
+ private:
+ typedef std::stack<ShadowFrame*> ShadowValuesStack;
+
+ ShadowValuesStack *m_stack;
+ };
+
+ class ShadowMemory
+ {
+ public:
+ struct Buffer
+ {
+ size_t size;
+ cl_mem_flags flags;
+ unsigned char *data;
+ };
+
+ ShadowMemory(AddressSpace addrSpace, unsigned bufferBits);
+ virtual ~ShadowMemory();
+
+ void allocate(size_t address, size_t size);
+ void dump() const;
+ void* getPointer(size_t address) const;
+ bool isAddressValid(size_t address, size_t size=1) const;
+ void load(unsigned char *dst, size_t address, size_t size=1) const;
+ void lock(size_t address) const;
+ void store(const unsigned char *src, size_t address, size_t size=1);
+ void unlock(size_t address) const;
+
+ private:
+ typedef std::unordered_map<size_t, Buffer*> MemoryMap;
+
+ AddressSpace m_addrSpace;
+ MemoryMap m_map;
+ unsigned m_numBitsAddress;
+ unsigned m_numBitsBuffer;
+
+ void clear();
+ void deallocate(size_t address);
+ size_t extractBuffer(size_t address) const;
+ size_t extractOffset(size_t address) const;
+ };
+
+ class ShadowWorkItem
+ {
+ public:
+ ShadowWorkItem(unsigned bufferBits);
+ virtual ~ShadowWorkItem();
+
+ inline void dump() const
+ {
+ m_values->dump();
+ m_memory->dump();
+ }
+ inline ShadowMemory* getPrivateMemory()
+ {
+ return m_memory;
+ }
+ inline ShadowValues* getValues() const
+ {
+ return m_values;
+ }
+
+ private:
+ ShadowMemory *m_memory;
+ ShadowValues *m_values;
+ };
+
+ class ShadowWorkGroup
+ {
+ public:
+ ShadowWorkGroup(unsigned bufferBits);
+ virtual ~ShadowWorkGroup();
+
+ inline void dump() const
+ {
+ m_memory->dump();
+ }
+ inline ShadowMemory* getLocalMemory()
+ {
+ return m_memory;
+ }
+
+ private:
+ ShadowMemory *m_memory;
+ };
+
+ class ShadowContext
+ {
+ public:
+ ShadowContext(unsigned bufferBits);
+ virtual ~ShadowContext();
+
+ void allocateWorkItems();
+ void allocateWorkGroups();
+ void clearGlobalValues();
+ void createMemoryPool();
+ ShadowWorkItem* createShadowWorkItem(const WorkItem *workItem);
+ ShadowWorkGroup* createShadowWorkGroup(const WorkGroup *workGroup);
+ void destroyMemoryPool();
+ void destroyShadowWorkItem(const WorkItem *workItem);
+ void destroyShadowWorkGroup(const WorkGroup *workGroup);
+ void dump(const WorkItem *workItem) const;
+ void dumpGlobalValues() const;
+ void freeWorkItems();
+ void freeWorkGroups();
+ static TypedValue getCleanValue(unsigned size);
+ static TypedValue getCleanValue(TypedValue v);
+ static TypedValue getCleanValue(const llvm::Type *Ty);
+ static TypedValue getCleanValue(const llvm::Value *V);
+ inline ShadowMemory* getGlobalMemory() const
+ {
+ return m_globalMemory;
+ }
+ TypedValue getGlobalValue(const llvm::Value *V) const;
+ MemoryPool* getMemoryPool() const
+ {
+ return m_workSpace.memoryPool;
+ }
+ static TypedValue getPoisonedValue(unsigned size);
+ static TypedValue getPoisonedValue(TypedValue v);
+ static TypedValue getPoisonedValue(const llvm::Type *Ty);
+ static TypedValue getPoisonedValue(const llvm::Value *V);
+ inline ShadowWorkItem* getShadowWorkItem(const WorkItem *workItem) const
+ {
+ return m_workSpace.workItems->at(workItem);
+ }
+ inline ShadowWorkGroup* getShadowWorkGroup(const WorkGroup *workGroup) const
+ {
+ return m_workSpace.workGroups->at(workGroup);
+ }
+ TypedValue getValue(const WorkItem *workItem, const llvm::Value *V) const;
+ inline bool hasValue(const WorkItem *workItem, const llvm::Value* V) const
+ {
+ return llvm::isa<llvm::Constant>(V) || m_globalValues.count(V) || m_workSpace.workItems->at(workItem)->getValues()->hasValue(V);
+ }
+ static bool isCleanImage(const TypedValue shadowImage);
+ static bool isCleanImageAddress(const TypedValue shadowImage);
+ static bool isCleanImageDescription(const TypedValue shadowImage);
+ static bool isCleanImageFormat(const TypedValue shadowImage);
+ static bool isCleanStruct(ShadowMemory *shadowMemory, size_t address, const llvm::StructType *structTy);
+ static bool isCleanValue(unsigned long v);
+ static bool isCleanValue(TypedValue v);
+ static bool isCleanValue(TypedValue v, unsigned offset);
+ void setGlobalValue(const llvm::Value *V, TypedValue SV);
+ static void shadowOr(TypedValue v1, TypedValue v2);
+
+ private:
+ ShadowMemory *m_globalMemory;
+ UnorderedTypedValueMap m_globalValues;
+ unsigned m_numBitsBuffer;
+ typedef std::map<const WorkItem*, ShadowWorkItem*> ShadowItemMap;
+ typedef std::map<const WorkGroup*, ShadowWorkGroup*> ShadowGroupMap;
+ struct WorkSpace
+ {
+ ShadowItemMap *workItems;
+ ShadowGroupMap *workGroups;
+ MemoryPool *memoryPool;
+ unsigned poolUsers;
+ };
+ static THREAD_LOCAL WorkSpace m_workSpace;
+ };
+
+ class Uninitialized : public Plugin
+ {
+ public:
+ Uninitialized(const Context *context);
+ virtual ~Uninitialized();
+
+ virtual void hostMemoryStore(const Memory *memory,
+ size_t address, size_t size,
+ const uint8_t *storeData) override;
+ virtual void instructionExecuted(const WorkItem *workItem,
+ const llvm::Instruction *instruction,
+ const TypedValue& result) override;
+ virtual void kernelBegin(const KernelInvocation *kernelInvocation) override;
+ virtual void kernelEnd(const KernelInvocation *kernelInvocation) override;
+ virtual void memoryMap(const Memory *memory, size_t address,
+ size_t offset, size_t size, cl_map_flags flags) override;
+ virtual void workItemBegin(const WorkItem *workItem) override;
+ virtual void workItemComplete(const WorkItem *workItem) override;
+ virtual void workGroupBegin(const WorkGroup *workGroup) override;
+ virtual void workGroupComplete(const WorkGroup *workGroup) override;
+ //virtual void memoryAllocated(const Memory *memory, size_t address,
+ // size_t size, cl_mem_flags flags,
+ // const uint8_t *initData);
+ private:
+ std::list<std::pair<const llvm::Value*, TypedValue> > m_deferredInit;
+ std::list<std::pair<const llvm::Value*, TypedValue> > m_deferredInitGroup;
+ ShadowContext shadowContext;
+ MemoryPool m_pool;
+
+ void allocAndStoreShadowMemory(unsigned addrSpace, size_t address, TypedValue SM,
+ const WorkItem *workItem = NULL, const WorkGroup *workGroup = NULL, bool unchecked = false);
+ bool checkAllOperandsDefined(const WorkItem *workItem, const llvm::Instruction *I);
+ void checkStructMemcpy(const WorkItem *workItem, const llvm::Value *src);
+ void copyShadowMemory(unsigned dstAddrSpace, size_t dst,
+ unsigned srcAddrSpace, size_t src, unsigned size,
+ const WorkItem *workItem = NULL, const WorkGroup *workGroup = NULL, bool unchecked = false);
+ void copyShadowMemoryStrided(unsigned dstAddrSpace, size_t dst,
+ unsigned srcAddrSpace, size_t src,
+ size_t num, size_t stride, unsigned size,
+ const WorkItem *workItem = NULL, const WorkGroup *workGroup = NULL, bool unchecked = false);
+ static std::string extractUnmangledName(const std::string fullname);
+ ShadowMemory* getShadowMemory(unsigned addrSpace, const WorkItem *workItem = NULL, const WorkGroup *workGroup = NULL) const;
+ bool handleBuiltinFunction(const WorkItem *workItem, std::string name, const llvm::CallInst *CI, const TypedValue result);
+ void handleIntrinsicInstruction(const WorkItem *workItem, const llvm::IntrinsicInst *I);
+ void loadShadowMemory(unsigned addrSpace, size_t address, TypedValue &SM,
+ const WorkItem *workItem = NULL, const WorkGroup *workGroup = NULL);
+ void logUninitializedAddress(unsigned int addrSpace, size_t address, bool write = true) const;
+ void logUninitializedCF() const;
+ void logUninitializedIndex() const;
+ void logUninitializedWrite(unsigned int addrSpace, size_t address) const;
+ void SimpleOr(const WorkItem *workItem, const llvm::Instruction *I);
+ void SimpleOrAtomic(const WorkItem *workItem, const llvm::CallInst *CI);
+ void storeShadowMemory(unsigned addrSpace, size_t address, TypedValue SM,
+ const WorkItem *workItem = NULL, const WorkGroup *workGroup = NULL, bool unchecked = false);
+ void VectorOr(const WorkItem *workItem, const llvm::Instruction *I);
+ };
+}
diff --git a/src/runtime/async_queue.cpp b/src/runtime/async_queue.cpp
index cc5f41c..28789df 100644
--- a/src/runtime/async_queue.cpp
+++ b/src/runtime/async_queue.cpp
@@ -1,5 +1,5 @@
// async_queue.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/runtime/async_queue.h b/src/runtime/async_queue.h
index 5ff4f4a..39bdb50 100644
--- a/src/runtime/async_queue.h
+++ b/src/runtime/async_queue.h
@@ -1,5 +1,5 @@
// async_queue.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/runtime/icd.h b/src/runtime/icd.h
index 7059cf9..6a2d207 100644
--- a/src/runtime/icd.h
+++ b/src/runtime/icd.h
@@ -1,5 +1,5 @@
// icd.h (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
diff --git a/src/runtime/oclgrind b/src/runtime/oclgrind
deleted file mode 100755
index 4925be4..0000000
--- a/src/runtime/oclgrind
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/bin/bash
-# oclgrind (Oclgrind)
-# Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
-# University of Bristol. All rights reserved.
-#
-# This program is provided under a three-clause BSD license. For full
-# license terms please see the LICENSE file distributed with this
-# source code.
-
-function usage
-{
- echo "Usage: "
- echo " oclgrind [OPTIONS] COMMAND"
- echo " oclgrind [--help | --version]"
- echo
- echo "Options:"
- echo -n " --build-options OPTIONS "
- echo "Additional options to pass to the OpenCL compiler"
- echo -n " --check-api "
- echo "Reports errors on API calls"
- echo -n " --data-races "
- echo "Enable data-race detection"
- echo -n " --disable-pch "
- echo "Don't use precompiled headers"
- echo -n " --dump-spir "
- echo "Dump SPIR to /tmp/oclgrind_*.{ll,bc}"
- echo -n " -h --help "
- echo "Display usage information"
- echo -n " --inst-counts "
- echo "Output histograms of instructions executed"
- echo -n " -i --interactive "
- echo "Enable interactive mode"
- echo -n " --log LOGFILE "
- echo "Redirect log/error messages to a file"
- echo -n " --max-errors NUM "
- echo "Limit the number of error/warning messages"
- echo -n " --num-threads NUM "
- echo "Set the number of worker threads to use"
- echo -n " --pch-dir DIR "
- echo "Override directory containing precompiled headers"
- echo -n " --plugins PLUGINS "
- echo "Load colon seperated list of plugin libraries"
- echo -n " -q --quick "
- echo "Only run first and last work-group"
- echo -n " --uniform-writes "
- echo "Don't suppress uniform write-write data-races"
- echo -n " -v --version "
- echo "Display version information"
- echo
- echo "For more information, please visit the Oclgrind wiki page:"
- echo "-> https://github.com/jrprice/Oclgrind/wiki"
- echo
-}
-
-# Parse arguments
-while [ $# -gt 0 -a "${1:0:1}" == "-" ]
-do
- if [ "$1" == "--build-options" ]
- then
- shift
- export OCLGRIND_BUILD_OPTIONS="$1"
- elif [ "$1" == "--check-api" ]
- then
- export OCLGRIND_CHECK_API=1
- elif [ "$1" == "--data-races" ]
- then
- export OCLGRIND_DATA_RACES=1
- elif [ "$1" == "--disable-pch" ]
- then
- export OCLGRIND_DISABLE_PCH=1
- elif [ "$1" == "--dump-spir" ]
- then
- export OCLGRIND_DUMP_SPIR=1
- elif [ "$1" == "-h" -o "$1" == "--help" ]
- then
- usage
- exit 0
- elif [ "$1" == "--inst-counts" ]
- then
- export OCLGRIND_INST_COUNTS=1
- elif [ "$1" == "-i" -o "$1" == "--interactive" ]
- then
- export OCLGRIND_INTERACTIVE=1
- elif [ "$1" == "--log" ]
- then
- shift
- export OCLGRIND_LOG="$1"
- elif [ "$1" == "--max-errors" ]
- then
- shift
- export OCLGRIND_MAX_ERRORS="$1"
- elif [ "$1" == "--num-threads" ]
- then
- shift
- export OCLGRIND_NUM_THREADS="$1"
- elif [ "$1" == "--pch-dir" ]
- then
- shift
- export OCLGRIND_PCH_DIR="$1"
- elif [ "$1" == "--plugins" ]
- then
- shift
- export OCLGRIND_PLUGINS="$1"
- elif [ "$1" == "-q" -o "$1" == "--quick" ]
- then
- export OCLGRIND_QUICK=1
- elif [ "$1" == "--uniform-writes" ]
- then
- export OCLGRIND_UNIFORM_WRITES=1
- elif [ "$1" == "-v" -o "$1" == "--version" ]
- then
- echo
- echo "Oclgrind __VERSION__"
- echo
- echo "Copyright (c) 2013-2015"
- echo "James Price and Simon McIntosh-Smith, University of Bristol"
- echo "https://github.com/jrprice/Oclgrind"
- echo
- exit 0
- else
- echo "Unrecognized argument '$1'"
- usage
- exit 1
- fi
- shift
-done
-
-# Ensure target command supplied
-if [ $# -lt 1 ]
-then
- usage
- exit 1
-fi
-
-# Inject liboclgrind.{so,dylib} and run command
-LIBDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/../lib"
-if [ "$(uname -s)" == "Darwin" ]
-then
- DYLD_LIBRARY_PATH=$LIBDIR:$DYLD_LIBRARY_PATH \
- DYLD_INSERT_LIBRARIES=$LIBDIR/liboclgrind-rt.dylib \
- DYLD_FORCE_FLAT_NAMESPACE=1 "$@"
-else
- LD_LIBRARY_PATH=$LIBDIR:$LD_LIBRARY_PATH \
- LD_PRELOAD=$LIBDIR/liboclgrind-rt.so "$@"
-fi
diff --git a/src/runtime/oclgrind.cpp b/src/runtime/oclgrind.cpp
new file mode 100644
index 0000000..e547bb2
--- /dev/null
+++ b/src/runtime/oclgrind.cpp
@@ -0,0 +1,483 @@
+// oclgrind.cpp (Oclgrind)
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
+// University of Bristol. All rights reserved.
+//
+// This program is provided under a three-clause BSD license. For full
+// license terms please see the LICENSE file distributed with this
+// source code.
+
+#include "config.h"
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+#include <windows.h>
+#else
+#include <limits.h>
+#include <unistd.h>
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+#endif
+
+using namespace std;
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+
+static string appCmd;
+static void checkWow64(HANDLE parent, HANDLE child);
+static void die(const char *op);
+
+#else // not Windows
+
+static char **appArgs = NULL;
+#ifdef __APPLE__
+#define LIB_EXTENSION "dylib"
+#define LD_LIBRARY_PATH_ENV "DYLD_LIBRARY_PATH"
+#define LD_PRELOAD_ENV "DYLD_INSERT_LIBRARIES"
+#else
+#define LIB_EXTENSION "so"
+#define LD_LIBRARY_PATH_ENV "LD_LIBRARY_PATH"
+#define LD_PRELOAD_ENV "LD_PRELOAD"
+#endif
+
+#endif
+
+static string getLibDirPath();
+static bool parseArguments(int argc, char *argv[]);
+static void printUsage();
+static void setEnvironment(const char *name, const char *value);
+
+int main(int argc, char *argv[])
+{
+ // Parse arguments
+ if (!parseArguments(argc, argv))
+ {
+ return 1;
+ }
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+
+ // Get full path to oclgrind-rt.dll
+ string dllpath = getLibDirPath();
+ dllpath += "\\oclgrind-rt.dll";
+
+
+ PROCESS_INFORMATION pinfo = { 0 };
+ STARTUPINFOA sinfo = { 0 };
+ sinfo.cb = sizeof(sinfo);
+
+ // Create child process in suspended state
+ if (!CreateProcessA(NULL, (LPSTR)appCmd.c_str(), NULL, NULL, FALSE,
+ CREATE_SUSPENDED, NULL, NULL, &sinfo, &pinfo))
+ die("creating child process");
+
+ // Check that we are running as 64-bit if and only if we need to be
+ checkWow64(GetCurrentProcess(), pinfo.hProcess);
+
+ // Allocate memory for DLL path
+ void *childPath = VirtualAllocEx(pinfo.hProcess, NULL, dllpath.size()+1,
+ MEM_COMMIT, PAGE_READWRITE);
+ if (!childPath)
+ die("allocating child memory");
+
+ // Write DLL path to child
+ if (!WriteProcessMemory(pinfo.hProcess, childPath,
+ (void*)dllpath.c_str(), dllpath.size()+1, NULL))
+ die("writing child memory");
+
+ // Create thread to load DLL in child process
+ HANDLE childThread =
+ CreateRemoteThread(pinfo.hProcess, NULL, 0,
+ (LPTHREAD_START_ROUTINE)GetProcAddress(
+ GetModuleHandleA("kernel32.dll"), "LoadLibraryA"),
+ childPath, 0, NULL);
+ if (!childThread)
+ die("loading DLL in child thread");
+
+ // Wait for child thread to complete
+ if (WaitForSingleObject(childThread, INFINITE) != WAIT_OBJECT_0)
+ die("waiting for load thread");
+
+ CloseHandle(childThread);
+ VirtualFreeEx(pinfo.hProcess, childPath, dllpath.size()+1, MEM_RELEASE);
+
+
+ // Load DLL in this process as well to get function pointers
+ HMODULE dll = LoadLibraryA(dllpath.c_str());
+ if (!dll)
+ die("loading DLL");
+
+ // Get handle to initOclgrind function in DLL
+ HANDLE initFunction = GetProcAddress(dll, "initOclgrind");
+ if (!initFunction)
+ die("getting init function address");
+
+ // Launch init function in child process
+ childThread = CreateRemoteThread(pinfo.hProcess, NULL, 0,
+ (LPTHREAD_START_ROUTINE)initFunction,
+ NULL, 0, NULL);
+ if (!childThread)
+ die("launching init in child thread");
+
+ // Wait for init to finish
+ if (WaitForSingleObject(childThread, INFINITE) != WAIT_OBJECT_0)
+ die("waiting for init thread");
+
+
+ // Check return value
+ DWORD retval = 0;
+ if (!GetExitCodeThread(childThread, &retval))
+ die("getting init exit code");
+ if (!retval)
+ {
+ cerr << "[Oclgrind] initialization failed: " << retval << endl;
+ exit(retval);
+ }
+
+ CloseHandle(childThread);
+
+ // Resume child process
+ if (ResumeThread(pinfo.hThread) == -1)
+ die("resuming thread");
+
+ // Wait for child process to finish
+ if (WaitForSingleObject(pinfo.hProcess, INFINITE) != WAIT_OBJECT_0)
+ die("waiting for child process failed");
+
+ // Get return code and forward it
+ if (!GetExitCodeProcess(pinfo.hProcess, &retval))
+ die("getting child process exit code");
+
+ return retval;
+
+#else // not Windows
+
+ // Get path to Oclgrind library directory
+ string libdir = getLibDirPath();
+
+ // Construct new LD_LIBRARY_PATH
+ string ldLibraryPath = libdir;
+ const char *oldLdLibraryPath = getenv(LD_LIBRARY_PATH_ENV);
+ if (oldLdLibraryPath)
+ {
+ ldLibraryPath += ":";
+ ldLibraryPath += oldLdLibraryPath;
+ }
+
+ // Add oclgrind-rt library to LD_PRELOAD
+ string ldPreload = libdir;
+ ldPreload += "/liboclgrind-rt.";
+ ldPreload += LIB_EXTENSION;
+ const char *oldLdPreload = getenv(LD_PRELOAD_ENV);
+ if (oldLdPreload)
+ {
+ ldPreload += ":";
+ ldPreload += oldLdPreload;
+ }
+
+ setEnvironment(LD_LIBRARY_PATH_ENV, ldLibraryPath.c_str());
+ setEnvironment(LD_PRELOAD_ENV, ldPreload.c_str());
+#ifdef __APPLE__
+ setEnvironment("DYLD_FORCE_FLAT_NAMESPACE", "1");
+#endif
+
+ // Launch target application
+ if (execvp(appArgs[0], appArgs) == -1)
+ {
+ cerr << "[Oclgrind] Failed to launch target application" << endl;
+ exit(1);
+ }
+
+#endif
+}
+
+static bool parseArguments(int argc, char *argv[])
+{
+ for (int i = 1; i < argc; i++)
+ {
+ if (!strcmp(argv[i], "--build-options"))
+ {
+ if (++i >= argc)
+ {
+ cerr << "Missing argument to --build-options" << endl;
+ return false;
+ }
+ setEnvironment("OCLGRIND_BUILD_OPTIONS", argv[i]);
+ }
+ else if (!strcmp(argv[i], "--check-api"))
+ {
+ setEnvironment("OCLGRIND_CHECK_API", "1");
+ }
+ else if (!strcmp(argv[i], "--data-races"))
+ {
+ setEnvironment("OCLGRIND_DATA_RACES", "1");
+ }
+ else if (!strcmp(argv[i], "--disable-pch"))
+ {
+ setEnvironment("OCLGRIND_DISABLE_PCH", "1");
+ }
+ else if (!strcmp(argv[i], "--dump-spir"))
+ {
+ setEnvironment("OCLGRIND_DUMP_SPIR", "1");
+ }
+ else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help"))
+ {
+ printUsage();
+ exit(0);
+ }
+ else if (!strcmp(argv[i], "--inst-counts"))
+ {
+ setEnvironment("OCLGRIND_INST_COUNTS", "1");
+ }
+ else if (!strcmp(argv[i], "-i") || !strcmp(argv[i], "--interactive"))
+ {
+ setEnvironment("OCLGRIND_INTERACTIVE", "1");
+ }
+ else if (!strcmp(argv[i], "--log"))
+ {
+ if (++i >= argc)
+ {
+ cerr << "Missing argument to --log" << endl;
+ return false;
+ }
+ setEnvironment("OCLGRIND_LOG", argv[i]);
+ }
+ else if (!strcmp(argv[i], "--max-errors"))
+ {
+ if (++i >= argc)
+ {
+ cerr << "Missing argument to --max-errors" << endl;
+ return false;
+ }
+ setEnvironment("OCLGRIND_MAX_ERRORS", argv[i]);
+ }
+ else if (!strcmp(argv[i], "--num-threads"))
+ {
+ if (++i >= argc)
+ {
+ cerr << "Missing argument to --num-threads" << endl;
+ return false;
+ }
+ setEnvironment("OCLGRIND_NUM_THREADS", argv[i]);
+ }
+ else if (!strcmp(argv[i], "--pch-dir"))
+ {
+ if (++i >= argc)
+ {
+ cerr << "Missing argument to --pch-dir" << endl;
+ return false;
+ }
+ setEnvironment("OCLGRIND_PCH_DIR", argv[i]);
+ }
+ else if (!strcmp(argv[i], "--plugins"))
+ {
+ if (++i >= argc)
+ {
+ cerr << "Missing argument to --plugins" << endl;
+ return false;
+ }
+ setEnvironment("OCLGRIND_PLUGINS", argv[i]);
+ }
+ else if (!strcmp(argv[i], "-q") || !strcmp(argv[i], "--quick"))
+ {
+ setEnvironment("OCLGRIND_QUICK", "1");
+ }
+ else if (!strcmp(argv[i], "--uniform-writes"))
+ {
+ setEnvironment("OCLGRIND_UNIFORM_WRITES", "1");
+ }
+ else if (!strcmp(argv[i], "--uninitialized"))
+ {
+ setEnvironment("OCLGRIND_UNINITIALIZED", "1");
+ }
+ else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version"))
+ {
+ cout << endl;
+ cout << "Oclgrind " PACKAGE_VERSION << endl;
+ cout << endl;
+ cout << "Copyright (c) 2013-2016" << endl;
+ cout << "James Price and Simon McIntosh-Smith, University of Bristol"
+ << endl;
+ cout << "https://github.com/jrprice/Oclgrind" << endl;
+ cout << endl;
+ exit(0);
+ }
+ else if (argv[i][0] == '-')
+ {
+ cerr << "Unrecognised option '" << argv[i] << "'" << endl;
+ return false;
+ }
+ else
+ {
+#if defined(_WIN32) && !defined(__MINGW32__)
+ // Build command-line for target application
+ for (; i < argc; i++)
+ {
+ appCmd += argv[i];
+ appCmd += " ";
+ }
+#else // not Windows
+ appArgs = (char**)malloc((argc-i+1) * sizeof(char*));
+ int offset = i;
+ for (; i < argc; i++)
+ {
+ appArgs[i-offset] = argv[i];
+ }
+ appArgs[argc-offset] = NULL;
+#endif
+ break;
+ }
+ }
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+ if (appCmd.size() == 0)
+#else
+ if (!appArgs)
+#endif
+ {
+ printUsage();
+ return false;
+ }
+
+ return true;
+}
+
+static string getLibDirPath()
+{
+ string libdir;
+
+ // Get full path to executable
+#if defined(_WIN32) && !defined(__MINGW32__)
+ char path[MAX_PATH];
+ GetModuleFileNameA(GetModuleHandle(NULL), path, MAX_PATH);
+ if (GetLastError() != ERROR_SUCCESS)
+ die("getting path to Oclgrind installation");
+ libdir = path;
+#else
+ char exepath[PATH_MAX];
+ char path[PATH_MAX];
+ // Get path to executable
+#if defined(__APPLE__)
+ uint32_t sz = PATH_MAX;
+ if (_NSGetExecutablePath(exepath, &sz))
+#else // not apple
+ if (readlink("/proc/self/exe", exepath, PATH_MAX) == -1)
+#endif
+ {
+ cerr << "[Oclgrind] Unable to get path to Oclgrind installation" << endl;
+ exit(1);
+ }
+ // Resolve symbolic links and normalise path
+ realpath(exepath, path);
+ libdir = path;
+#endif
+
+ // Remove executable filename and containing directory
+ size_t slash;
+ for (int i = 0; i < 2; i++)
+ {
+#if defined(_WIN32) && !defined(__MINGW32__)
+ if ((slash = libdir.find_last_of('\\')) == string::npos)
+#else
+ if ((slash = libdir.find_last_of('/')) == string::npos)
+#endif
+ cerr << "[Oclgrind] Failed to get path to library directory" << endl;
+
+ libdir.resize(slash);
+ }
+
+ // Append library directory
+ libdir += "/lib" LIBDIR_SUFFIX;
+
+ return libdir;
+}
+
+static void printUsage()
+{
+ cout
+ << "Usage: oclgrind [OPTIONS] COMMAND" << endl
+ << " oclgrind [--help | --version]" << endl
+ << endl
+ << "Options:" << endl
+ << " --build-options OPTIONS "
+ "Additional options to pass to the OpenCL compiler" << endl
+ << " --check-api "
+ "Report errors on API calls" << endl
+ << " --data-races "
+ "Enable data-race detection" << endl
+ << " --disable-pch "
+ "Don't use precompiled headers" << endl
+ << " --dump-spir "
+ "Dump SPIR to /tmp/oclgrind_*.{ll,bc}" << endl
+ << " -h --help "
+ "Display usage information" << endl
+ << " --inst-counts "
+ "Output histograms of instructions executed" << endl
+ << " -i --interactive "
+ "Enable interactive mode" << endl
+ << " --log LOGFILE "
+ "Redirect log/error messages to a file" << endl
+ << " --max-errors NUM "
+ "Limit the number of error/warning messages" << endl
+ << " --num-threads NUM "
+ "Set the number of worker threads to use" << endl
+ << " --pch-dir DIR "
+ "Override directory containing precompiled headers" << endl
+ << " --plugins PLUGINS "
+ "Load colon separated list of plugin libraries" << endl
+ << " -q --quick "
+ "Only run first and last work-group" << endl
+ << " --uniform-writes "
+ "Don't suppress uniform write-write data-races" << endl
+ << " --uninitialized "
+ "Report usage of uninitialized values" << endl
+ << " -v --version "
+ "Display version information" << endl
+ << endl
+ << "For more information, please visit the Oclgrind wiki page:" << endl
+ << "-> https://github.com/jrprice/Oclgrind/wiki" << endl
+ << endl;
+}
+
+static void setEnvironment(const char *name, const char *value)
+{
+#if defined(_WIN32) && !defined(__MINGW32__)
+ _putenv_s(name, value);
+#else
+ setenv(name, value, 1);
+#endif
+}
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+
+void checkWow64(HANDLE parent, HANDLE child)
+{
+ BOOL parentWow64, childWow64;
+ IsWow64Process(parent, &parentWow64);
+ IsWow64Process(child, &childWow64);
+ if (parentWow64 != childWow64)
+ {
+ const char *bits = childWow64 ? "32" : "64";
+ cerr << "[Oclgrind] target application is " << bits << "-bit" << endl
+ << "Use the " << bits << "-bit version of oclgrind.exe" << endl;
+ exit(1);
+ }
+}
+
+void die(const char *op)
+{
+ DWORD err = GetLastError();
+ char buffer[1024];
+ FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err,
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ buffer, 1024, NULL);
+ cerr << "[Oclgrind] Error while '" << op << "':" << endl
+ << buffer << endl;
+ exit(1);
+}
+
+#endif
diff --git a/src/runtime/runtime.cpp b/src/runtime/runtime.cpp
index 1cf7338..55759c5 100644
--- a/src/runtime/runtime.cpp
+++ b/src/runtime/runtime.cpp
@@ -1,11 +1,13 @@
// runtime.cpp (Oclgrind)
-// Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+// Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
// University of Bristol. All rights reserved.
//
// This program is provided under a three-clause BSD license. For full
// license terms please see the LICENSE file distributed with this
// source code.
+#include "config.h"
+
#include <cassert>
#include <cmath>
#include <cstring>
@@ -27,7 +29,7 @@ using namespace std;
#define MAX_GLOBAL_MEM_SIZE (128 * 1048576)
#define MAX_CONSTANT_BUFFER_SIZE (1048576)
#define MAX_LOCAL_MEM_SIZE (32768)
-#define MAX_WI_SIZE (65536)
+#define MAX_WI_SIZE (1024)
#define PLATFORM_NAME "Oclgrind"
#define PLATFORM_VENDOR "University of Bristol"
@@ -53,6 +55,10 @@ using namespace std;
cl_khr_local_int32_extended_atomics \
cl_khr_byte_addressable_store \
cl_khr_fp64"
+#define DEVICE_TYPE (CL_DEVICE_TYPE_CPU | \
+ CL_DEVICE_TYPE_GPU | \
+ CL_DEVICE_TYPE_ACCELERATOR | \
+ CL_DEVICE_TYPE_DEFAULT)
namespace
@@ -347,9 +353,7 @@ clGetDeviceIDs
ReturnError(NULL, CL_INVALID_VALUE);
}
- if (device_type != CL_DEVICE_TYPE_CPU &&
- device_type != CL_DEVICE_TYPE_DEFAULT &&
- device_type != CL_DEVICE_TYPE_ALL)
+ if (!(device_type & DEVICE_TYPE))
{
ReturnError(NULL, CL_DEVICE_NOT_FOUND);
}
@@ -411,7 +415,7 @@ clGetDeviceInfo
{
case CL_DEVICE_TYPE:
result_size = sizeof(cl_device_type);
- result_data.cldevicetype = CL_DEVICE_TYPE_CPU;
+ result_data.cldevicetype = DEVICE_TYPE;
break;
case CL_DEVICE_VENDOR_ID:
result_size = sizeof(cl_uint);
@@ -682,8 +686,10 @@ clGetDeviceInfo
}
else
{
- const void* src = str ? (const void*)str : (const void*)&result_data;
- memcpy(param_value, src, result_size);
+ if (str)
+ memcpy(param_value, str, result_size);
+ else
+ memcpy(param_value, &result_data, result_size);
}
}
@@ -805,9 +811,7 @@ clCreateContextFromType
"pfn_notify NULL but user_data non-NULL");
return NULL;
}
- if (device_type != CL_DEVICE_TYPE_CPU &&
- device_type != CL_DEVICE_TYPE_DEFAULT &&
- device_type != CL_DEVICE_TYPE_ALL)
+ if (!(device_type & DEVICE_TYPE))
{
SetErrorArg(NULL, CL_DEVICE_NOT_FOUND, device_type);
return NULL;
@@ -1634,7 +1638,7 @@ clGetSupportedImageFormats
sizeof(ordersAll) / sizeof(cl_channel_order),
sizeof(ordersNormalized) / sizeof(cl_channel_order),
sizeof(ordersByte) / sizeof(cl_channel_order),
- //sizeof(ordersPacked) / sizeof(cl_channel_order),
+ sizeof(ordersPacked) / sizeof(cl_channel_order),
};
// Channel types
@@ -1670,7 +1674,7 @@ clGetSupportedImageFormats
sizeof(typesAll) / sizeof(cl_channel_order),
sizeof(typesNormalized) / sizeof(cl_channel_order),
sizeof(typesByte) / sizeof(cl_channel_order),
- //sizeof(typesPacked) / sizeof(cl_channel_order),
+ sizeof(typesPacked) / sizeof(cl_channel_order),
};
// Calculate total number of formats
@@ -2490,9 +2494,6 @@ clGetProgramInfo
size_t * param_value_size_ret
) CL_API_SUFFIX__VERSION_1_0
{
- size_t result_size = 0;
- void *result_data = NULL;
-
// Check program is valid
if (!program)
{
@@ -2506,100 +2507,92 @@ clGetProgramInfo
"Program not successfully built");
}
+ size_t dummy;
+ size_t& result_size = param_value_size_ret ? *param_value_size_ret : dummy;
+ union
+ {
+ cl_uint cluint;
+ cl_device_id device;
+ cl_context context;
+ size_t sizet;
+ } result_data;
+ const char* str = 0;
+ string kernelNames;
+
switch (param_name)
{
case CL_PROGRAM_REFERENCE_COUNT:
result_size = sizeof(cl_uint);
- result_data = malloc(result_size);
- *(cl_uint*)result_data = program->refCount;
+ result_data.cluint = program->refCount;
break;
case CL_PROGRAM_CONTEXT:
result_size = sizeof(cl_context);
- result_data = malloc(result_size);
- *(cl_context*)result_data = program->context;
+ result_data.context = program->context;
break;
case CL_PROGRAM_NUM_DEVICES:
result_size = sizeof(cl_uint);
- result_data = malloc(result_size);
- *(cl_uint*)result_data = 1;
+ result_data.cluint = 1;
break;
case CL_PROGRAM_DEVICES:
result_size = sizeof(cl_device_id);
- result_data = malloc(result_size);
- *(cl_device_id*)result_data = m_device;
+ result_data.device = m_device;
break;
case CL_PROGRAM_SOURCE:
- result_size = strlen(program->program->getSource().c_str()) + 1;
- result_data = malloc(result_size);
- strcpy((char*)result_data, program->program->getSource().c_str());
+ str = program->program->getSource().c_str();
+ result_size = strlen(str) + 1;
break;
case CL_PROGRAM_BINARY_SIZES:
result_size = sizeof(size_t);
- result_data = malloc(result_size);
- *(size_t*)result_data = program->program->getBinarySize();
+ result_data.sizet = program->program->getBinarySize();
break;
case CL_PROGRAM_BINARIES:
result_size = sizeof(unsigned char*);
- result_data = program->program->getBinary();
break;
case CL_PROGRAM_NUM_KERNELS:
result_size = sizeof(size_t);
- result_data = malloc(result_size);
- *(size_t*)result_data = program->program->getNumKernels();
+ result_data.sizet = program->program->getNumKernels();
break;
case CL_PROGRAM_KERNEL_NAMES:
{
list<string> names = program->program->getKernelNames();
- string ret;
for (list<string>::iterator itr = names.begin(); itr != names.end(); itr++)
{
- ret += *itr;
- ret += ";";
+ kernelNames += *itr;
+ kernelNames += ";";
}
- if (!ret.empty())
+ if (!kernelNames.empty())
{
- ret.erase(ret.length()-1);
+ kernelNames.erase(kernelNames.length()-1);
}
- result_size = strlen(ret.c_str()) + 1;
- result_data = malloc(result_size);
- strcpy((char*)result_data, ret.c_str());
+ str = kernelNames.c_str();
+ result_size = strlen(str) + 1;
break;
}
default:
ReturnErrorArg(program->context, CL_INVALID_VALUE, param_name);
}
- cl_int return_value = CL_SUCCESS;
if (param_value)
{
- if (param_name == CL_PROGRAM_BINARIES)
+ // Check destination is large enough
+ if (param_value_size < result_size)
+ {
+ ReturnErrorInfo(NULL, CL_INVALID_VALUE, ParamValueSizeTooSmall);
+ }
+ else if (param_name == CL_PROGRAM_BINARIES)
{
- memcpy(((unsigned char**)param_value)[0],
- result_data, program->program->getBinarySize());
+ program->program->getBinary(((unsigned char**)param_value)[0]);
}
else
{
- // Check destination is large enough
- if (param_value_size < result_size)
- {
- // TODO: Use API error reporting mechanism
- return_value = CL_INVALID_VALUE;
- }
+ if (str)
+ memcpy(param_value, str, result_size);
else
- {
- memcpy(param_value, result_data, result_size);
- }
+ memcpy(param_value, &result_data, result_size);
}
}
- if (param_value_size_ret)
- {
- *param_value_size_ret = result_size;
- }
-
- free(result_data);
-
- return return_value;
+ return CL_SUCCESS;
}
CL_API_ENTRY cl_int CL_API_CALL
@@ -4186,7 +4179,8 @@ clEnqueueFillImage
((float*)color)[output] = ((float*)fill_color)[input];
break;
case CL_HALF_FLOAT:
- ((uint16_t*)color)[output] = floatToHalf(((float*)fill_color)[input]);
+ ((uint16_t*)color)[output] =
+ oclgrind::floatToHalf(((float*)fill_color)[input]);
break;
case CL_SIGNED_INT8:
((int8_t*)color)[output] = ((int32_t*)fill_color)[input];
@@ -4568,7 +4562,11 @@ clEnqueueMapBuffer
}
// Enqueue command
- oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command();
+ oclgrind::Queue::MapCommand *cmd = new oclgrind::Queue::MapCommand();
+ cmd->address = buffer->address;
+ cmd->offset = offset;
+ cmd->size = cb;
+ cmd->flags = map_flags;
asyncQueueRetain(cmd, buffer);
asyncEnqueue(command_queue, CL_COMMAND_MAP_BUFFER, cmd,
num_events_in_wait_list, event_wait_list, event);
@@ -4697,7 +4695,11 @@ clEnqueueMapImage
}
// Enqueue command
- oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command();
+ oclgrind::Queue::MapCommand *cmd = new oclgrind::Queue::MapCommand();
+ cmd->address = image->address;
+ cmd->offset = offset;
+ cmd->size = size;
+ cmd->flags = map_flags;
asyncQueueRetain(cmd, image);
asyncEnqueue(command_queue, CL_COMMAND_MAP_IMAGE, cmd,
num_events_in_wait_list, event_wait_list, event);
@@ -4733,7 +4735,9 @@ clEnqueueUnmapMemObject
}
// Enqueue command
- oclgrind::Queue::Command *cmd = new oclgrind::Queue::Command();
+ oclgrind::Queue::UnmapCommand *cmd = new oclgrind::Queue::UnmapCommand();
+ cmd->address = memobj->address;
+ cmd->ptr = mapped_ptr;
asyncQueueRetain(cmd, memobj);
asyncEnqueue(command_queue, CL_COMMAND_UNMAP_MEM_OBJECT, cmd,
num_events_in_wait_list, event_wait_list, event);
@@ -4799,6 +4803,8 @@ clEnqueueNDRangeKernel
}
// Check global and local sizes are valid
+ size_t reqdWorkGroupSize[3];
+ kernel->kernel->getRequiredWorkGroupSize(reqdWorkGroupSize);
for (unsigned i = 0; i < work_dim; i++)
{
if (!global_work_size[i])
@@ -4809,10 +4815,17 @@ clEnqueueNDRangeKernel
if (local_work_size && global_work_size[i] % local_work_size[i])
{
ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE,
- "Dimension " << i <<
- ": local_work_size (" << local_work_size[i] <<
- ") does not divide global_work_size (" <<
- global_work_size[i] << ")");
+ "local_work_size[" << i << "]=" << local_work_size[i] <<
+ " does not divide global_work_size[" << i << "]=" <<
+ global_work_size[i]);
+ }
+ if (local_work_size && reqdWorkGroupSize[i] &&
+ local_work_size[i] != reqdWorkGroupSize[i])
+ {
+ ReturnErrorInfo(command_queue->context, CL_INVALID_WORK_GROUP_SIZE,
+ "local_work_size[" << i << "]=" << local_work_size[i] <<
+ " does not match reqd_work_group_size[" << i << "]=" <<
+ reqdWorkGroupSize[i])
}
}
@@ -5592,3 +5605,105 @@ void *m_dispatchTable[] =
DISPATCH_TABLE_ENTRY(NULL),
#endif
};
+
+#if defined(_WIN32) && !defined(OCLGRIND_ICD)
+
+#include <Psapi.h>
+
+// Function to replace calls to clGetPlatformIDs with
+// the Oclgrind implementation.
+//
+// This is invoked by oclgrind.exe after this DLL is
+// injected into the child process.
+//
+// Returns true on success, false on failure.
+bool initOclgrind()
+{
+ // Get base address of process
+ char *base = (char*)GetModuleHandle(NULL);
+
+ // Get pointer to NT headers
+ PIMAGE_DOS_HEADER dosHeader = (PIMAGE_DOS_HEADER)(base);
+ PIMAGE_NT_HEADERS ntHeaders = (PIMAGE_NT_HEADERS)(base + dosHeader->e_lfanew);
+ if (ntHeaders->Signature != IMAGE_NT_SIGNATURE)
+ {
+ std::cerr << "[Oclgrind] Invalid NT signature: "
+ << ntHeaders->Signature << std::endl;
+ return false;
+ }
+
+ // Get pointer to import directory
+ DWORD importOffset =
+ ntHeaders->OptionalHeader.
+ DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress;
+ PIMAGE_IMPORT_DESCRIPTOR importDesc =
+ (PIMAGE_IMPORT_DESCRIPTOR)(base + importOffset);
+
+ // Loop over directory entries
+ while (importDesc->Name)
+ {
+ // Look for OpenCL.dll
+ const char *modname = (const char*)(base + importDesc->Name);
+ if (!stricmp(modname, "opencl.dll"))
+ {
+ // We use the OriginalFirstThunk to match the name,
+ // and then replace the function pointer in FirstThunk
+ PIMAGE_THUNK_DATA origThunk =
+ (PIMAGE_THUNK_DATA)(base + importDesc->OriginalFirstThunk);
+ PIMAGE_THUNK_DATA firstThunk =
+ (PIMAGE_THUNK_DATA)(base + importDesc->FirstThunk);
+
+ // Loop over functions
+ while (origThunk->u1.AddressOfData)
+ {
+ // Skip unnamed functions
+ if (!(origThunk->u1.Ordinal & IMAGE_ORDINAL_FLAG))
+ {
+ // Get function name and check for clGetPlatformIDs
+ PIMAGE_IMPORT_BY_NAME import =
+ (PIMAGE_IMPORT_BY_NAME)(base + origThunk->u1.AddressOfData);
+ if (!stricmp((char*)import->Name, "clGetPlatformIDs"))
+ {
+ // Make page writable temporarily
+ MEMORY_BASIC_INFORMATION mbinfo;
+ VirtualQuery(firstThunk, &mbinfo, sizeof(mbinfo));
+ if (!VirtualProtect(mbinfo.BaseAddress, mbinfo.RegionSize,
+ PAGE_EXECUTE_READWRITE, &mbinfo.Protect))
+ {
+ std::cerr << "[Oclgrind] Failed to make page writeable: "
+ << GetLastError() << std::endl;
+ return false;
+ }
+
+ // Replace function pointer with our implementation
+ firstThunk->u1.Function = (ULONG64)clGetPlatformIDs;
+
+ // Restore page protection
+ DWORD zero = 0;
+ if (!VirtualProtect(mbinfo.BaseAddress, mbinfo.RegionSize,
+ mbinfo.Protect, &zero))
+ {
+ std::cerr << "[Oclgrind] Failed to restore page protection: "
+ << GetLastError() << std::endl;
+ return false;
+ }
+
+ return true;
+ }
+ }
+
+ origThunk++;
+ firstThunk++;
+ }
+ }
+ importDesc++;
+ }
+
+ // We didn't find the function, so just warn user
+ std::cerr << "[Oclgrind] Warning: unable to patch clGetPlatformIDs"
+ << std::endl;
+
+ return true;
+}
+
+#endif
diff --git a/src/runtime/runtime.def b/src/runtime/runtime.def
index 77992cf..59b94c7 100644
--- a/src/runtime/runtime.def
+++ b/src/runtime/runtime.def
@@ -117,3 +117,5 @@ clEnqueueReleaseD3D11ObjectsKHR
clGetDeviceIDsFromDX9MediaAdapterKHR
clEnqueueAcquireDX9MediaSurfacesKHR
clEnqueueReleaseDX9MediaSurfacesKHR
+
+initOclgrind
\ No newline at end of file
diff --git a/tests/Makefile.am b/tests/Makefile.am
new file mode 100644
index 0000000..fea836f
--- /dev/null
+++ b/tests/Makefile.am
@@ -0,0 +1,50 @@
+# Makefile.am (Oclgrind)
+# Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
+# University of Bristol. All rights reserved.
+#
+# This program is provided under a three-clause BSD license. For full
+# license terms please see the LICENSE file distributed with this
+# source code.
+
+AUTOMAKE_OPTIONS = subdir-objects
+ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS} -I m4
+
+AM_CFLAGS = -std=c99 -I$(top_srcdir)/src/ -I${srcdir}/common -Wall
+LDADD = ../liboclgrind-rt.la libcommon.la
+
+noinst_LTLIBRARIES = libcommon.la
+libcommon_la_SOURCES = common/common.c common/common.h
+
+check_PROGRAMS = \
+ apps/image/image \
+ apps/vecadd/vecadd \
+ runtime/map_buffer
+TESTS = $(check_PROGRAMS)
+
+if HAVE_PYTHON
+
+TEST_EXTENSIONS = .sim
+LOG_COMPILER = $(PYTHON) \
+ $(srcdir)/run_test.py \
+ ${abs_top_builddir}/oclgrind-kernel
+SIM_LOG_COMPILER = $(PYTHON) \
+ $(srcdir)/run_test.py \
+ ${abs_top_builddir}/oclgrind-kernel
+AM_TESTS_ENVIRONMENT = \
+ export OCLGRIND_PCH_DIR=$(abs_top_builddir)/src/include/oclgrind;
+
+TESTS += $(KERNEL_TESTS)
+#XFAIL_TESTS =
+
+clean-local:
+ find . -name '*.out' -exec rm -f {} \;
+
+else
+check-local:
+ @echo
+ @echo "WARNING: Tests will not be run (Python required)."
+ @echo
+endif
+
+EXTRA_DIST = run_test.py kernels/TESTS $(KERNEL_TEST_INPUTS) \
+ runtime/map_buffer.ref
diff --git a/tests/apps/CMakeLists.txt b/tests/apps/CMakeLists.txt
index 0dff241..b8fb14a 100644
--- a/tests/apps/CMakeLists.txt
+++ b/tests/apps/CMakeLists.txt
@@ -1,29 +1,38 @@
# CMakeLists.txt (Oclgrind)
-# Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+# Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
# University of Bristol. All rights reserved.
#
# This program is provided under a three-clause BSD license. For full
# license terms please see the LICENSE file distributed with this
# source code.
+set(COMMON_SOURCES ../common/common.c ../common/common.h)
+include_directories(../common)
+
# Add app tests
foreach(test
+ image
vecadd)
- add_executable(${test} ${test}/${test}.c)
+ add_executable(${test} ${test}/${test}.c ${COMMON_SOURCES})
target_link_libraries(${test} oclgrind-rt)
# Generate test binaries in same dir as Oclgrind libraries on Windows
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
- add_test(app_${test} "${CMAKE_BINARY_DIR}/${test}")
set_target_properties(${test} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
else()
- add_test(app_${test} "${test}/${test}")
set_target_properties(${test} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${test}")
set_target_properties(${test} PROPERTIES LINKER_LANGUAGE CXX)
endif()
+ add_test(
+ NAME app_${test}
+ COMMAND
+ ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/run_test.py
+ $<TARGET_FILE:oclgrind-kernel>
+ $<TARGET_FILE:${test}>)
+
set_tests_properties(app_${test} PROPERTIES DEPENDS ${test})
# Set PCH directory
diff --git a/tests/apps/image/image.c b/tests/apps/image/image.c
new file mode 100644
index 0000000..101dd4f
--- /dev/null
+++ b/tests/apps/image/image.c
@@ -0,0 +1,133 @@
+#include "common.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#define IMG_SIZE 100
+#define TOL 1e-8
+#define MAX_ERRORS 8
+
+const char *KERNEL_SOURCE =
+"__kernel void image_copy(__read_only image2d_array_t src, \n"
+" __write_only image2d_array_t dst) \n"
+"{ \n"
+" size_t size = get_image_array_size(src); \n"
+" const int x = get_global_id(0); \n"
+" const int y = get_global_id(1); \n"
+" const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | \n"
+" CLK_ADDRESS_CLAMP | \n"
+" CLK_FILTER_NEAREST; \n"
+" float4 pixel = read_imagef(src, sampler, (int4)(x, y, 0, 0)); \n"
+" write_imagef(dst, (int4)(x, y, 0, 0), pixel); \n"
+"} \n"
+;
+
+int main()
+{
+ cl_int err;
+ cl_kernel kernel;
+ cl_image_format img_fmt;
+ cl_image_desc img_desc;
+ cl_mem src, dst;
+ float *input, *output;
+ size_t width, height;
+ width = height = 10;
+ size_t origin[] = {0, 0, 0};
+ size_t region[] = {width, height, 1};
+ size_t GWSize[] = {width, height, 1};
+
+ input = (float*)malloc(IMG_SIZE * 3 * sizeof(float));
+ output = (float*)malloc(IMG_SIZE * 3 * sizeof(float));
+
+ // Create Input data
+ for(int i = 0; i < 3; ++i)
+ {
+ for(int j = 0; j < IMG_SIZE; ++j)
+ {
+ input[i * IMG_SIZE + j] = j + 1.0;
+ }
+ }
+
+ Context cl = createContext(KERNEL_SOURCE);
+ kernel = clCreateKernel(cl.program, "image_copy", &err);
+ checkError(err, "creating kernel");
+
+ img_fmt.image_channel_order = CL_RGB;
+ img_fmt.image_channel_data_type = CL_FLOAT;
+
+ img_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ img_desc.image_width = width;
+ img_desc.image_height = height;
+ img_desc.image_depth = 0;
+ img_desc.image_array_size = 0;
+ img_desc.image_row_pitch = 0;
+ img_desc.image_slice_pitch = 0;
+ img_desc.num_mip_levels = 0;
+ img_desc.num_samples = 0;
+ img_desc.buffer = NULL;
+
+ src = clCreateImage(cl.context, CL_MEM_READ_ONLY, &img_fmt, &img_desc, NULL, &err);
+ checkError(err, "creating source image");
+
+ dst = clCreateImage(cl.context, CL_MEM_READ_WRITE, &img_fmt, &img_desc, NULL, &err);
+ checkError(err, "creating destination image");
+
+ err = clEnqueueWriteImage(cl.queue, src, CL_TRUE, origin, region, 0, 0, input, 0, NULL, NULL);
+ checkError(err,"enqueuing write image");
+
+
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src);
+ err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &dst);
+ checkError(err, "setting kernel args");
+
+ err = clEnqueueNDRangeKernel(cl.queue, kernel, 2, NULL, GWSize, NULL, 0, NULL, NULL);
+ checkError(err, "enqueuing kernel");
+
+ err = clFinish(cl.queue);
+ checkError(err, "running kernel");
+
+ err = clEnqueueReadImage(cl.queue, dst, CL_TRUE, origin, region, 0, 0, output, 0, NULL, NULL);
+ checkError(err, "reading image data");
+
+ // Check results
+ unsigned errors = 0;
+ for(int i = 0; i < 3; ++i)
+ {
+ for(int j = 0; j < IMG_SIZE; ++j)
+ {
+ float ref = input[i * IMG_SIZE + j];
+ float val = output[i * IMG_SIZE + j];
+
+ if(fabs(ref - val) > TOL)
+ {
+ if(errors < MAX_ERRORS)
+ {
+ fprintf(stderr, "%4d: %.4f != %.4f\n", i, val, ref);
+ }
+ errors++;
+ }
+ }
+ }
+
+ free(input);
+ free(output);
+ clReleaseMemObject(src);
+ clReleaseMemObject(dst);
+ clReleaseKernel(kernel);
+ releaseContext(cl);
+
+ return (errors != 0);
+}
+
+//cl_mem image3;
+//
+//image3 = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_fmt, width, height, 0, 0, &err);
+//
+//// copy Image1 to Image3
+//err = clEnqueueCopyImage(command_queue, image1, image3, origin, origin, region, 1, event, &event[3]);
+//err_check(err, "clEnqueueCopyImage");
+
+
+//clReleaseMemObject(image3);
diff --git a/tests/apps/vecadd/vecadd.c b/tests/apps/vecadd/vecadd.c
index 22d55ed..49f59e5 100644
--- a/tests/apps/vecadd/vecadd.c
+++ b/tests/apps/vecadd/vecadd.c
@@ -1,4 +1,5 @@
-#include <CL/cl.h>
+#include "common.h"
+
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@@ -18,16 +19,9 @@ const char *KERNEL_SOURCE =
"} \n"
;
-void checkError(cl_int err, const char *operation);
-
int main(int argc, char *argv[])
{
cl_int err;
- cl_platform_id platform;
- cl_device_id device;
- cl_context context;
- cl_command_queue queue;
- cl_program program;
cl_kernel kernel;
cl_mem d_a, d_b, d_c;
float *h_a, *h_b, *h_c;
@@ -50,57 +44,9 @@ int main(int argc, char *argv[])
exit(1);
}
- // Get list of platforms
- cl_uint numPlatforms = 0;
- cl_platform_id platforms[MAX_PLATFORMS];
- err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &numPlatforms);
- checkError(err, "getting platforms");
-
- // Find Oclgrind
- platform = NULL;
- for (int i = 0; i < numPlatforms; i++)
- {
- char name[256];
- err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 256, name, NULL);
- checkError(err, "getting platform name");
- if (!strcmp(name, "Oclgrind"))
- {
- platform = platforms[i];
- break;
- }
- }
- if (!platform)
- {
- fprintf(stderr, "Unable to find Oclgrind platform\n");
- exit(1);
- }
-
- err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
- checkError(err, "getting device");
-
- context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
- checkError(err, "creating context");
+ Context cl = createContext(KERNEL_SOURCE);
- queue = clCreateCommandQueue(context, device, 0, &err);
- checkError(err, "creating command queue");
-
- program = clCreateProgramWithSource(context, 1, &KERNEL_SOURCE, NULL, &err);
- checkError(err, "creating program");
-
- err = clBuildProgram(program, 1, &device, "", NULL, NULL);
- if (err == CL_BUILD_PROGRAM_FAILURE)
- {
- size_t sz;
- clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
- sizeof(size_t), NULL, &sz);
- char *buildLog = malloc(++sz);
- clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
- sz, buildLog, NULL);
- fprintf(stderr, "%s\n", buildLog);
- }
- checkError(err, "building program");
-
- kernel = clCreateKernel(program, "vecadd", &err);
+ kernel = clCreateKernel(cl.program, "vecadd", &err);
checkError(err, "creating kernel");
size_t dataSize = N*sizeof(cl_float);
@@ -110,27 +56,27 @@ int main(int argc, char *argv[])
h_a = malloc(dataSize);
h_b = malloc(dataSize);
h_c = malloc(dataSize);
- for (int i = 0; i < N; i++)
+ for (unsigned i = 0; i < N; i++)
{
h_a[i] = rand()/(float)RAND_MAX;
h_b[i] = rand()/(float)RAND_MAX;
h_c[i] = 0;
}
- d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize, NULL, &err);
+ d_a = clCreateBuffer(cl.context, CL_MEM_READ_ONLY, dataSize, NULL, &err);
checkError(err, "creating d_a buffer");
- d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize, NULL, &err);
+ d_b = clCreateBuffer(cl.context, CL_MEM_READ_ONLY, dataSize, NULL, &err);
checkError(err, "creating d_b buffer");
- d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &err);
+ d_c = clCreateBuffer(cl.context, CL_MEM_WRITE_ONLY, dataSize, NULL, &err);
checkError(err, "creating d_c buffer");
- err = clEnqueueWriteBuffer(queue, d_a, CL_FALSE,
+ err = clEnqueueWriteBuffer(cl.queue, d_a, CL_FALSE,
0, dataSize, h_a, 0, NULL, NULL);
checkError(err, "writing d_a data");
- err = clEnqueueWriteBuffer(queue, d_b, CL_FALSE,
+ err = clEnqueueWriteBuffer(cl.queue, d_b, CL_FALSE,
0, dataSize, h_b, 0, NULL, NULL);
checkError(err, "writing d_b data");
- err = clEnqueueWriteBuffer(queue, d_c, CL_FALSE,
+ err = clEnqueueWriteBuffer(cl.queue, d_c, CL_FALSE,
0, dataSize, h_c, 0, NULL, NULL);
checkError(err, "writing d_c data");
@@ -139,20 +85,20 @@ int main(int argc, char *argv[])
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
checkError(err, "setting kernel args");
- err = clEnqueueNDRangeKernel(queue, kernel,
+ err = clEnqueueNDRangeKernel(cl.queue, kernel,
1, NULL, &global, NULL, 0, NULL, NULL);
checkError(err, "enqueuing kernel");
- err = clFinish(queue);
+ err = clFinish(cl.queue);
checkError(err, "running kernel");
- err = clEnqueueReadBuffer(queue, d_c, CL_TRUE,
+ err = clEnqueueReadBuffer(cl.queue, d_c, CL_TRUE,
0, dataSize, h_c, 0, NULL, NULL);
checkError(err, "reading d_c data");
// Check results
- int errors = 0;
- for (int i = 0; i < N; i++)
+ unsigned errors = 0;
+ for (unsigned i = 0; i < N; i++)
{
float ref = h_a[i] + h_b[i];
if (fabs(ref - h_c[i]) > TOL)
@@ -164,7 +110,8 @@ int main(int argc, char *argv[])
errors++;
}
}
- printf("%d errors detected\n", errors);
+ if (errors)
+ printf("%d errors detected\n", errors);
free(h_a);
free(h_b);
@@ -173,18 +120,7 @@ int main(int argc, char *argv[])
clReleaseMemObject(d_b);
clReleaseMemObject(d_c);
clReleaseKernel(kernel);
- clReleaseProgram(program);
- clReleaseCommandQueue(queue);
- clReleaseContext(context);
+ releaseContext(cl);
return (errors != 0);
}
-
-void checkError(cl_int err, const char *operation)
-{
- if (err != CL_SUCCESS)
- {
- fprintf(stderr, "Error during operation '%s': %d\n", operation, err);
- exit(1);
- }
-}
diff --git a/tests/common/common.c b/tests/common/common.c
new file mode 100644
index 0000000..1923048
--- /dev/null
+++ b/tests/common/common.c
@@ -0,0 +1,66 @@
+#include "common.h"
+
+#include <stdio.h>
+#include <string.h>
+
+void checkError(cl_int err, const char *operation)
+{
+ if (err != CL_SUCCESS)
+ {
+ fprintf(stderr, "Error during operation '%s': %d\n", operation, err);
+ exit(1);
+ }
+}
+
+Context createContext(const char *source)
+{
+ Context cl;
+ cl_int err;
+
+ err = clGetPlatformIDs(1, &cl.platform, NULL);
+ checkError(err, "getting platform");
+
+ // Check platform is Oclgrind
+ char name[256];
+ err = clGetPlatformInfo(cl.platform, CL_PLATFORM_NAME, 256, name, NULL);
+ checkError(err, "getting platform name");
+ if (strcmp(name, "Oclgrind"))
+ {
+ fprintf(stderr, "Unable to find Oclgrind platform\n");
+ exit(1);
+ }
+
+ err = clGetDeviceIDs(cl.platform, CL_DEVICE_TYPE_ALL, 1, &cl.device, NULL);
+ checkError(err, "getting device");
+
+ cl.context = clCreateContext(NULL, 1, &cl.device, NULL, NULL, &err);
+ checkError(err, "creating context");
+
+ cl.queue = clCreateCommandQueue(cl.context, cl.device, 0, &err);
+ checkError(err, "creating command queue");
+
+ cl.program = clCreateProgramWithSource(cl.context, 1, &source, NULL, &err);
+ checkError(err, "creating program");
+
+ err = clBuildProgram(cl.program, 1, &cl.device, "", NULL, NULL);
+ if (err == CL_BUILD_PROGRAM_FAILURE)
+ {
+ size_t sz;
+ clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG,
+ sizeof(size_t), NULL, &sz);
+ char *buildLog = malloc(++sz);
+ clGetProgramBuildInfo(cl.program, cl.device, CL_PROGRAM_BUILD_LOG,
+ sz, buildLog, NULL);
+ fprintf(stderr, "%s\n", buildLog);
+ }
+ checkError(err, "building program");
+
+ return cl;
+}
+
+void releaseContext(Context cl)
+{
+ clReleaseProgram(cl.program);
+ clReleaseCommandQueue(cl.queue);
+ clReleaseContext(cl.context);
+}
diff --git a/tests/common/common.h b/tests/common/common.h
new file mode 100644
index 0000000..92b44db
--- /dev/null
+++ b/tests/common/common.h
@@ -0,0 +1,15 @@
+#include <CL/cl.h>
+
+typedef struct
+{
+ cl_platform_id platform;
+ cl_device_id device;
+ cl_context context;
+ cl_command_queue queue;
+ cl_program program;
+} Context;
+
+void checkError(cl_int err, const char *operation);
+
+Context createContext(const char *source);
+void releaseContext(Context cl);
diff --git a/tests/kernels/CMakeLists.txt b/tests/kernels/CMakeLists.txt
new file mode 100644
index 0000000..017acb2
--- /dev/null
+++ b/tests/kernels/CMakeLists.txt
@@ -0,0 +1,27 @@
+# CMakeLists.txt (Oclgrind)
+# Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
+# University of Bristol. All rights reserved.
+#
+# This program is provided under a three-clause BSD license. For full
+# license terms please see the LICENSE file distributed with this
+# source code.
+
+# Add kernel tests
+file(READ TESTS KERNEL_TESTS)
+string(REPLACE "\n" ";" KERNEL_TESTS ${KERNEL_TESTS})
+foreach(test ${KERNEL_TESTS})
+ add_test(
+ NAME ${test}
+ COMMAND
+ ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/run_test.py
+ $<TARGET_FILE:oclgrind-kernel>
+ ${CMAKE_SOURCE_DIR}/tests/kernels/${test}.sim)
+endforeach(${test})
+
+# Set PCH directory
+set_tests_properties(${KERNEL_TESTS} PROPERTIES
+ ENVIRONMENT "OCLGRIND_PCH_DIR=${CMAKE_BINARY_DIR}/include/oclgrind")
+
+# Expected failures
+set_tests_properties(
+ PROPERTIES WILL_FAIL TRUE)
diff --git a/tests/kernels/TESTS b/tests/kernels/TESTS
index 2ac8723..fd95269 100644
--- a/tests/kernels/TESTS
+++ b/tests/kernels/TESTS
@@ -21,12 +21,18 @@ atomics/atomic_race_before
atomics/atomic_same_workitem
barrier/barrier_different_instructions
barrier/barrier_divergence
+bugs/byval_function_argument
+bugs/const_gep_expr_pointee_type
+bugs/false_warning_vector_argument
bugs/gvn_arbitrary_integers
bugs/kernel_struct_argument
+bugs/llvm_bswap
bugs/many_alloca
bugs/multidim_array_in_struct
bugs/null_argument
+bugs/rhadd_overflow
bugs/sroa_addrspace_cast
+bugs/write_vector_write_only_fp
data-race/broadcast
data-race/global_fence
data-race/global_only_fence
@@ -42,14 +48,35 @@ data-race/local_write_write_race
data-race/uniform_write_race
memcheck/async_copy_out_of_bounds
memcheck/atomic_out_of_bounds
+memcheck/casted_static_array
memcheck/dereference_null
+memcheck/fake_out_of_bounds
memcheck/read_out_of_bounds
memcheck/read_write_only_memory
+memcheck/static_array
+memcheck/static_array_padded_struct
memcheck/write_out_of_bounds
memcheck/write_read_only_memory
misc/array
+misc/lvalue_loads
+misc/program_scope_constant_array
misc/reduce
misc/vecadd
+misc/vector_argument
+uninitialized/padded_nested_struct_memcpy
+uninitialized/padded_struct_alloca_fp
+uninitialized/padded_struct_memcpy_fp
+uninitialized/partially_uninitialized_fract
+uninitialized/private_array_initializer_list
+uninitialized/uninitialized_global_buffer
+uninitialized/uninitialized_address
+uninitialized/uninitialized_local_array
+uninitialized/uninitialized_local_ptr
+uninitialized/uninitialized_local_variable
+uninitialized/uninitialized_packed_struct_memcpy
+uninitialized/uninitialized_padded_struct_memcpy
+uninitialized/uninitialized_padded_nested_struct_memcpy
+uninitialized/uninitialized_private_array
wait_event/wait_event_chained
wait_event/wait_event_divergent
wait_event/wait_event_duplicates
diff --git a/tests/kernels/alignment/packed.ref b/tests/kernels/alignment/packed.ref
index df23fc9..a2d80b4 100644
--- a/tests/kernels/alignment/packed.ref
+++ b/tests/kernels/alignment/packed.ref
@@ -1,4 +1,2 @@
-
-Argument 'out': 4 bytes
- out[0] = 2
-
+EXACT Argument 'out': 4 bytes
+EXACT out[0] = 33554434
diff --git a/tests/kernels/alignment/packed.sim b/tests/kernels/alignment/packed.sim
index 46e9090..a92b922 100644
--- a/tests/kernels/alignment/packed.sim
+++ b/tests/kernels/alignment/packed.sim
@@ -5,6 +5,6 @@ packed
<size=5 char hex>
0x01
-0x02 0x00 0x0 0x00
+0x02 0x00 0x00 0x02
<size=4 fill=0 dump>
diff --git a/tests/kernels/alignment/unaligned.ref b/tests/kernels/alignment/unaligned.ref
index 1114e03..d12f6d1 100644
--- a/tests/kernels/alignment/unaligned.ref
+++ b/tests/kernels/alignment/unaligned.ref
@@ -1,5 +1,4 @@
-ERROR EXPECTED
-
-Argument 'out': 4 bytes
- out[0] = 2752512
+ERROR Invalid memory load - source pointer is not aligned to the pointed type
+EXACT Argument 'out': 4 bytes
+EXACT out[0] = 2752512
diff --git a/tests/kernels/async_copy/async_copy.ref b/tests/kernels/async_copy/async_copy.ref
index cf0b04f..690f35e 100644
--- a/tests/kernels/async_copy/async_copy.ref
+++ b/tests/kernels/async_copy/async_copy.ref
@@ -1,7 +1,5 @@
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
-
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/async_copy/async_copy_divergent.ref b/tests/kernels/async_copy/async_copy_divergent.ref
index 8ce4dbb..9004865 100644
--- a/tests/kernels/async_copy/async_copy_divergent.ref
+++ b/tests/kernels/async_copy/async_copy_divergent.ref
@@ -1,8 +1,7 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
+ERROR Work-group divergence detected (async copy)
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/async_copy/async_copy_global_race.ref b/tests/kernels/async_copy/async_copy_global_race.ref
index 4da13c4..aee3e3d 100644
--- a/tests/kernels/async_copy/async_copy_global_race.ref
+++ b/tests/kernels/async_copy/async_copy_global_race.ref
@@ -1,8 +1,9 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 0
- data[1] = 1
- data[2] = 2
- data[3] = 3
+ERROR Write-write data race at global memory
+ERROR Write-write data race at global memory
+ERROR Write-write data race at global memory
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 0
+EXACT data[1] = 1
+EXACT data[2] = 2
+EXACT data[3] = 3
diff --git a/tests/kernels/async_copy/async_copy_local_race.ref b/tests/kernels/async_copy/async_copy_local_race.ref
index 8ce4dbb..73340db 100644
--- a/tests/kernels/async_copy/async_copy_local_race.ref
+++ b/tests/kernels/async_copy/async_copy_local_race.ref
@@ -1,8 +1,9 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
+ERROR Write-write data race at local memory
+ERROR Write-write data race at local memory
+ERROR Write-write data race at local memory
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/async_copy/async_copy_loop.ref b/tests/kernels/async_copy/async_copy_loop.ref
index cf0b04f..690f35e 100644
--- a/tests/kernels/async_copy/async_copy_loop.ref
+++ b/tests/kernels/async_copy/async_copy_loop.ref
@@ -1,7 +1,5 @@
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
-
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/async_copy/async_copy_loop_divergent.ref b/tests/kernels/async_copy/async_copy_loop_divergent.ref
index 8ce4dbb..9004865 100644
--- a/tests/kernels/async_copy/async_copy_loop_divergent.ref
+++ b/tests/kernels/async_copy/async_copy_loop_divergent.ref
@@ -1,8 +1,7 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
+ERROR Work-group divergence detected (async copy)
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/async_copy/async_copy_single_wi.ref b/tests/kernels/async_copy/async_copy_single_wi.ref
index 8ce4dbb..9004865 100644
--- a/tests/kernels/async_copy/async_copy_single_wi.ref
+++ b/tests/kernels/async_copy/async_copy_single_wi.ref
@@ -1,8 +1,7 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
+ERROR Work-group divergence detected (async copy)
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/async_copy/async_copy_unwaited.ref b/tests/kernels/async_copy/async_copy_unwaited.ref
index 8ce4dbb..b890ad8 100644
--- a/tests/kernels/async_copy/async_copy_unwaited.ref
+++ b/tests/kernels/async_copy/async_copy_unwaited.ref
@@ -1,8 +1,7 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
+ERROR Work-item finished without waiting for events
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/atomics/atomic_cmpxchg_false_race.ref b/tests/kernels/atomics/atomic_cmpxchg_false_race.ref
index fe14281..4014209 100644
--- a/tests/kernels/atomics/atomic_cmpxchg_false_race.ref
+++ b/tests/kernels/atomics/atomic_cmpxchg_false_race.ref
@@ -1,8 +1,6 @@
-
-Argument 'data': 20 bytes
- data[0] = 4
- data[1] = 1
- data[2] = 2
- data[3] = 3
- data[4] = 4
-
+EXACT Argument 'data': 20 bytes
+EXACT data[0] = 4
+EXACT data[1] = 1
+EXACT data[2] = 2
+EXACT data[3] = 3
+EXACT data[4] = 4
diff --git a/tests/kernels/atomics/atomic_cmpxchg_read_race.ref b/tests/kernels/atomics/atomic_cmpxchg_read_race.ref
index b398c6c..d01adc0 100644
--- a/tests/kernels/atomics/atomic_cmpxchg_read_race.ref
+++ b/tests/kernels/atomics/atomic_cmpxchg_read_race.ref
@@ -1,5 +1,5 @@
-ERROR EXPECTED
-
-Argument 'data': 4 bytes
- data[0] = 1
+ERROR Read-write data race at global memory
+ERROR Write-write data race at global memory
+EXACT Argument 'data': 4 bytes
+EXACT data[0] = 1
diff --git a/tests/kernels/atomics/atomic_cmpxchg_write_race.ref b/tests/kernels/atomics/atomic_cmpxchg_write_race.ref
index af96d9b..ca64ee3 100644
--- a/tests/kernels/atomics/atomic_cmpxchg_write_race.ref
+++ b/tests/kernels/atomics/atomic_cmpxchg_write_race.ref
@@ -1,5 +1,4 @@
-ERROR EXPECTED
-
-Argument 'data': 4 bytes
- data[0] = 42
+ERROR Read-write data race at global memory
+EXACT Argument 'data': 4 bytes
+EXACT data[0] = 42
diff --git a/tests/kernels/atomics/atomic_global_fence.ref b/tests/kernels/atomics/atomic_global_fence.ref
index a7bf48a..dab956f 100644
--- a/tests/kernels/atomics/atomic_global_fence.ref
+++ b/tests/kernels/atomics/atomic_global_fence.ref
@@ -1,5 +1,3 @@
-
-Argument 'data': 8 bytes
- data[0] = 6
- data[1] = 22
-
+EXACT Argument 'data': 8 bytes
+EXACT data[0] = 6
+EXACT data[1] = 22
diff --git a/tests/kernels/atomics/atomic_global_fence_race.ref b/tests/kernels/atomics/atomic_global_fence_race.ref
index 4920bcf..71c83c2 100644
--- a/tests/kernels/atomics/atomic_global_fence_race.ref
+++ b/tests/kernels/atomics/atomic_global_fence_race.ref
@@ -1,6 +1,6 @@
-ERROR EXPECTED
-
-Argument 'data': 8 bytes
- data[0] = 6
- data[1] = 28
+ERROR Read-write data race at global memory
+ERROR Read-write data race at global memory
+EXACT Argument 'data': 8 bytes
+MATCH data[0] =
+MATCH data[1] =
diff --git a/tests/kernels/atomics/atomic_increment.ref b/tests/kernels/atomics/atomic_increment.ref
index f61189d..fc93c7c 100644
--- a/tests/kernels/atomics/atomic_increment.ref
+++ b/tests/kernels/atomics/atomic_increment.ref
@@ -1,4 +1,2 @@
-
-Argument 'data': 4 bytes
- data[0] = 4
-
+EXACT Argument 'data': 4 bytes
+EXACT data[0] = 4
diff --git a/tests/kernels/atomics/atomic_intergroup_race.ref b/tests/kernels/atomics/atomic_intergroup_race.ref
index cab3430..43c056c 100644
--- a/tests/kernels/atomics/atomic_intergroup_race.ref
+++ b/tests/kernels/atomics/atomic_intergroup_race.ref
@@ -1,5 +1,5 @@
-ERROR EXPECTED
-
-Argument 'data': 4 bytes
- data[0] = 8
+ERROR Read-write data race at global memory
+ERROR Write-write data race at global memory
+EXACT Argument 'data': 4 bytes
+MATCH data[0] =
diff --git a/tests/kernels/atomics/atomic_local_fence.ref b/tests/kernels/atomics/atomic_local_fence.ref
index a7bf48a..dab956f 100644
--- a/tests/kernels/atomics/atomic_local_fence.ref
+++ b/tests/kernels/atomics/atomic_local_fence.ref
@@ -1,5 +1,3 @@
-
-Argument 'data': 8 bytes
- data[0] = 6
- data[1] = 22
-
+EXACT Argument 'data': 8 bytes
+EXACT data[0] = 6
+EXACT data[1] = 22
diff --git a/tests/kernels/atomics/atomic_race_after.ref b/tests/kernels/atomics/atomic_race_after.ref
index bc902a8..8c97680 100644
--- a/tests/kernels/atomics/atomic_race_after.ref
+++ b/tests/kernels/atomics/atomic_race_after.ref
@@ -1,5 +1,6 @@
-ERROR EXPECTED
-
-Argument 'data': 4 bytes
- data[0] = 5
+ERROR Read-write data race at global memory
+ERROR Read-write data race at global memory
+ERROR Write-write data race at global memory
+EXACT Argument 'data': 4 bytes
+EXACT data[0] = 5
diff --git a/tests/kernels/atomics/atomic_race_before.ref b/tests/kernels/atomics/atomic_race_before.ref
index 6ecedc3..65484a7 100644
--- a/tests/kernels/atomics/atomic_race_before.ref
+++ b/tests/kernels/atomics/atomic_race_before.ref
@@ -1,5 +1,9 @@
-ERROR EXPECTED
-
-Argument 'data': 4 bytes
- data[0] = 4
+ERROR Read-write data race at global memory address
+ERROR Write-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Write-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Write-write data race at global memory address
+EXACT Argument 'data': 4 bytes
+EXACT data[0] = 4
diff --git a/tests/kernels/atomics/atomic_same_workitem.ref b/tests/kernels/atomics/atomic_same_workitem.ref
index 3ef3ca7..63eb583 100644
--- a/tests/kernels/atomics/atomic_same_workitem.ref
+++ b/tests/kernels/atomics/atomic_same_workitem.ref
@@ -1,7 +1,5 @@
-
-Argument 'data': 16 bytes
- data[0] = 1
- data[1] = 2
- data[2] = 1
- data[3] = 2
-
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 1
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 2
diff --git a/tests/kernels/barrier/barrier_different_instructions.ref b/tests/kernels/barrier/barrier_different_instructions.ref
index 3ffaa5a..ab5b1f0 100644
--- a/tests/kernels/barrier/barrier_different_instructions.ref
+++ b/tests/kernels/barrier/barrier_different_instructions.ref
@@ -1,8 +1,9 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 42
- data[1] = 43
- data[2] = 44
- data[3] = 45
+ERROR Work-group divergence detected (barrier)
+ERROR Work-group divergence detected (barrier)
+ERROR Work-group divergence detected (barrier)
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 42
+EXACT data[1] = 43
+EXACT data[2] = 44
+EXACT data[3] = 45
diff --git a/tests/kernels/barrier/barrier_divergence.ref b/tests/kernels/barrier/barrier_divergence.ref
index 4da13c4..028c077 100644
--- a/tests/kernels/barrier/barrier_divergence.ref
+++ b/tests/kernels/barrier/barrier_divergence.ref
@@ -1,8 +1,7 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 0
- data[1] = 1
- data[2] = 2
- data[3] = 3
+ERROR Work-group divergence detected (barrier)
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 0
+EXACT data[1] = 1
+EXACT data[2] = 2
+EXACT data[3] = 3
diff --git a/tests/kernels/bugs/byval_function_argument.cl b/tests/kernels/bugs/byval_function_argument.cl
new file mode 100644
index 0000000..de36492
--- /dev/null
+++ b/tests/kernels/bugs/byval_function_argument.cl
@@ -0,0 +1,19 @@
+union U
+{
+ uint a;
+ uint b;
+};
+
+uint func(union U value)
+{
+ uint ret = value.a;
+ value.b = 777;
+ return ret;
+}
+
+kernel void byval_function_argument(global uint *output)
+{
+ union U u = {42};
+ output[0] = func(u);
+ output[1] = u.b;
+}
diff --git a/tests/kernels/bugs/byval_function_argument.ref b/tests/kernels/bugs/byval_function_argument.ref
new file mode 100644
index 0000000..f6eec51
--- /dev/null
+++ b/tests/kernels/bugs/byval_function_argument.ref
@@ -0,0 +1,3 @@
+EXACT Argument 'output': 8 bytes
+EXACT output[0] = 42
+EXACT output[1] = 42
diff --git a/tests/kernels/bugs/byval_function_argument.sim b/tests/kernels/bugs/byval_function_argument.sim
new file mode 100644
index 0000000..f7abb44
--- /dev/null
+++ b/tests/kernels/bugs/byval_function_argument.sim
@@ -0,0 +1,6 @@
+byval_function_argument.cl
+byval_function_argument
+1 1 1
+1 1 1
+
+<size=8 fill=0 dump>
diff --git a/tests/kernels/bugs/const_gep_expr_pointee_type.cl b/tests/kernels/bugs/const_gep_expr_pointee_type.cl
new file mode 100644
index 0000000..63d9780
--- /dev/null
+++ b/tests/kernels/bugs/const_gep_expr_pointee_type.cl
@@ -0,0 +1,9 @@
+struct S0 {
+ int d;
+ long b;
+} fn1() {
+ struct S0 a = {3};
+ a.d;
+ return a;
+ }
+__kernel void entry() { fn1(); }
diff --git a/tests/kernels/bugs/const_gep_expr_pointee_type.sim b/tests/kernels/bugs/const_gep_expr_pointee_type.sim
new file mode 100644
index 0000000..59ad90d
--- /dev/null
+++ b/tests/kernels/bugs/const_gep_expr_pointee_type.sim
@@ -0,0 +1,4 @@
+const_gep_expr_pointee_type.cl
+entry
+1 1 1
+1 1 1
diff --git a/tests/kernels/bugs/false_warning_vector_argument.cl b/tests/kernels/bugs/false_warning_vector_argument.cl
new file mode 100644
index 0000000..b05740f
--- /dev/null
+++ b/tests/kernels/bugs/false_warning_vector_argument.cl
@@ -0,0 +1,8 @@
+kernel void false_warning_vector_argument(int16 arg, global int8 *res)
+{
+ int8 v = (int8)(1,2,3,4,5,6,7,8);
+
+ int16 add = arg + v.s0011223344556677;
+
+ *res = add.lo;
+}
diff --git a/tests/kernels/bugs/false_warning_vector_argument.ref b/tests/kernels/bugs/false_warning_vector_argument.ref
new file mode 100644
index 0000000..ccac9e6
--- /dev/null
+++ b/tests/kernels/bugs/false_warning_vector_argument.ref
@@ -0,0 +1,9 @@
+EXACT Argument 'res': 32 bytes
+EXACT res[0] = 1
+EXACT res[1] = 1
+EXACT res[2] = 2
+EXACT res[3] = 2
+EXACT res[4] = 3
+EXACT res[5] = 3
+EXACT res[6] = 4
+EXACT res[7] = 4
diff --git a/tests/kernels/bugs/false_warning_vector_argument.sim b/tests/kernels/bugs/false_warning_vector_argument.sim
new file mode 100644
index 0000000..7a7df14
--- /dev/null
+++ b/tests/kernels/bugs/false_warning_vector_argument.sim
@@ -0,0 +1,7 @@
+false_warning_vector_argument.cl
+false_warning_vector_argument
+1 1 1
+1 1 1
+
+<size=64 fill=0>
+<size=32 fill=0 dump>
diff --git a/tests/kernels/bugs/gvn_arbitrary_integers.ref b/tests/kernels/bugs/gvn_arbitrary_integers.ref
index fafe2ec..217cbcf 100644
--- a/tests/kernels/bugs/gvn_arbitrary_integers.ref
+++ b/tests/kernels/bugs/gvn_arbitrary_integers.ref
@@ -1,6 +1,4 @@
-
-Argument 'dest': 12 bytes
- dest[0] = 0
- dest[1] = 0
- dest[2] = 42
-
+EXACT Argument 'dest': 12 bytes
+EXACT dest[0] = 0
+EXACT dest[1] = 0
+EXACT dest[2] = 42
diff --git a/tests/kernels/bugs/kernel_struct_argument.ref b/tests/kernels/bugs/kernel_struct_argument.ref
index b8c7e51..7a4426d 100644
--- a/tests/kernels/bugs/kernel_struct_argument.ref
+++ b/tests/kernels/bugs/kernel_struct_argument.ref
@@ -1,4 +1,2 @@
-
-Argument 'out': 4 bytes
- out[0] = 144
-
+EXACT Argument 'out': 4 bytes
+EXACT out[0] = 144
diff --git a/tests/kernels/bugs/llvm_bswap.cl b/tests/kernels/bugs/llvm_bswap.cl
new file mode 100644
index 0000000..c9636eb
--- /dev/null
+++ b/tests/kernels/bugs/llvm_bswap.cl
@@ -0,0 +1,8 @@
+kernel void test(global uint *input, global uint *output)
+{
+ for (unsigned int i = 0; i < 4; i++)
+ {
+ uint word = input[i];
+ output[i] = ((word & 0xff) << 24) | ((word & 0xff00) << 8) | ((word & 0xff0000) >> 8) | ((word & 0xff000000) >> 24);
+ }
+}
diff --git a/tests/kernels/bugs/llvm_bswap.ref b/tests/kernels/bugs/llvm_bswap.ref
new file mode 100644
index 0000000..e217f93
--- /dev/null
+++ b/tests/kernels/bugs/llvm_bswap.ref
@@ -0,0 +1,7 @@
+
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 0x01000000
+EXACT output[1] = 0x00000010
+EXACT output[2] = 0x78563412
+EXACT output[3] = 0x45342312
+
diff --git a/tests/kernels/bugs/llvm_bswap.sim b/tests/kernels/bugs/llvm_bswap.sim
new file mode 100644
index 0000000..de720a0
--- /dev/null
+++ b/tests/kernels/bugs/llvm_bswap.sim
@@ -0,0 +1,12 @@
+llvm_bswap.cl
+test
+1 1 1
+1 1 1
+
+<size=16 hex>
+0x00000001
+0x10000000
+0x12345678
+0x12233445
+
+<size=16 fill=0 hex dump>
diff --git a/tests/kernels/bugs/many_alloca.ref b/tests/kernels/bugs/many_alloca.ref
index 201d55d..a7a484c 100644
--- a/tests/kernels/bugs/many_alloca.ref
+++ b/tests/kernels/bugs/many_alloca.ref
@@ -1,4 +1,2 @@
-
-Argument 'data': 4 bytes
- data[0] = 100000
-
+EXACT Argument 'data': 4 bytes
+EXACT data[0] = 100000
diff --git a/tests/kernels/bugs/multidim_array_in_struct.ref b/tests/kernels/bugs/multidim_array_in_struct.ref
index f9606f2..e7fa7eb 100644
--- a/tests/kernels/bugs/multidim_array_in_struct.ref
+++ b/tests/kernels/bugs/multidim_array_in_struct.ref
@@ -1,4 +1,2 @@
-
-Argument 'output': 8 bytes
- output[0] = 6
-
+EXACT Argument 'output': 8 bytes
+EXACT output[0] = 6
diff --git a/tests/kernels/bugs/null_argument.ref b/tests/kernels/bugs/null_argument.ref
index dcf81cb..87107f8 100644
--- a/tests/kernels/bugs/null_argument.ref
+++ b/tests/kernels/bugs/null_argument.ref
@@ -1,4 +1,2 @@
-
-Argument 'output': 8 bytes
- output[0] = 1
-
+EXACT Argument 'output': 8 bytes
+EXACT output[0] = 1
diff --git a/tests/kernels/bugs/rhadd_overflow.cl b/tests/kernels/bugs/rhadd_overflow.cl
new file mode 100644
index 0000000..42a2960
--- /dev/null
+++ b/tests/kernels/bugs/rhadd_overflow.cl
@@ -0,0 +1,4 @@
+kernel void rhadd_overflow(global ulong *output)
+{
+ output[0] = rhadd(0UL, 0xFFFFFFFFFFFFFFFFUL);
+}
diff --git a/tests/kernels/bugs/rhadd_overflow.ref b/tests/kernels/bugs/rhadd_overflow.ref
new file mode 100644
index 0000000..3bb215c
--- /dev/null
+++ b/tests/kernels/bugs/rhadd_overflow.ref
@@ -0,0 +1,3 @@
+
+EXACT Argument 'output': 8 bytes
+EXACT output[0] = 0x8000000000000000
diff --git a/tests/kernels/bugs/rhadd_overflow.sim b/tests/kernels/bugs/rhadd_overflow.sim
new file mode 100644
index 0000000..ddb5e14
--- /dev/null
+++ b/tests/kernels/bugs/rhadd_overflow.sim
@@ -0,0 +1,6 @@
+rhadd_overflow.cl
+rhadd_overflow
+1 1 1
+1 1 1
+
+<size=8 fill=0 hex dump>
diff --git a/tests/kernels/bugs/sroa_addrspace_cast.ref b/tests/kernels/bugs/sroa_addrspace_cast.ref
index 2fff44c..0492407 100644
--- a/tests/kernels/bugs/sroa_addrspace_cast.ref
+++ b/tests/kernels/bugs/sroa_addrspace_cast.ref
@@ -1,4 +1,2 @@
-
-Argument 'output': 4 bytes
- output[0] = 42.24
-
+EXACT Argument 'output': 4 bytes
+EXACT output[0] = 42.24
diff --git a/tests/kernels/bugs/write_vector_write_only_fp.cl b/tests/kernels/bugs/write_vector_write_only_fp.cl
new file mode 100644
index 0000000..7426b84
--- /dev/null
+++ b/tests/kernels/bugs/write_vector_write_only_fp.cl
@@ -0,0 +1,5 @@
+kernel void write_vector_write_only_fp(global int4 *output)
+{
+ int i = get_global_id(0);
+ output[i].x = 42;
+}
diff --git a/tests/kernels/bugs/write_vector_write_only_fp.ref b/tests/kernels/bugs/write_vector_write_only_fp.ref
new file mode 100644
index 0000000..dd0ed01
--- /dev/null
+++ b/tests/kernels/bugs/write_vector_write_only_fp.ref
@@ -0,0 +1,17 @@
+EXACT Argument 'output': 64 bytes
+EXACT output[0] = 42
+EXACT output[1] = 7
+EXACT output[2] = 7
+EXACT output[3] = 7
+EXACT output[4] = 42
+EXACT output[5] = 7
+EXACT output[6] = 7
+EXACT output[7] = 7
+EXACT output[8] = 42
+EXACT output[9] = 7
+EXACT output[10] = 7
+EXACT output[11] = 7
+EXACT output[12] = 42
+EXACT output[13] = 7
+EXACT output[14] = 7
+EXACT output[15] = 7
diff --git a/tests/kernels/bugs/write_vector_write_only_fp.sim b/tests/kernels/bugs/write_vector_write_only_fp.sim
new file mode 100644
index 0000000..296d298
--- /dev/null
+++ b/tests/kernels/bugs/write_vector_write_only_fp.sim
@@ -0,0 +1,6 @@
+write_vector_write_only_fp.cl
+write_vector_write_only_fp
+4 1 1
+1 1 1
+
+<size=64 wo fill=7 dump>
diff --git a/tests/kernels/data-race/broadcast.ref b/tests/kernels/data-race/broadcast.ref
index 69790f7..c5d628b 100644
--- a/tests/kernels/data-race/broadcast.ref
+++ b/tests/kernels/data-race/broadcast.ref
@@ -1,7 +1,5 @@
-
-Argument 'output': 16 bytes
- output[0] = 42
- output[1] = 42
- output[2] = 42
- output[3] = 42
-
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 42
+EXACT output[1] = 42
+EXACT output[2] = 42
+EXACT output[3] = 42
diff --git a/tests/kernels/data-race/global_fence.ref b/tests/kernels/data-race/global_fence.ref
index 342c29a..caae30a 100644
--- a/tests/kernels/data-race/global_fence.ref
+++ b/tests/kernels/data-race/global_fence.ref
@@ -1,7 +1,5 @@
-
-Argument 'output': 16 bytes
- output[0] = 6
- output[1] = 22
- output[2] = 38
- output[3] = 54
-
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 6
+EXACT output[1] = 22
+EXACT output[2] = 38
+EXACT output[3] = 54
diff --git a/tests/kernels/data-race/global_only_fence.ref b/tests/kernels/data-race/global_only_fence.ref
index 5b62861..faf17e0 100644
--- a/tests/kernels/data-race/global_only_fence.ref
+++ b/tests/kernels/data-race/global_only_fence.ref
@@ -1,8 +1,9 @@
-ERROR EXPECTED
-
-Argument 'output': 16 bytes
- output[0] = 6
- output[1] = 0
- output[2] = 0
- output[3] = 0
+ERROR Read-write data race at local memory
+ERROR Read-write data race at local memory
+ERROR Read-write data race at local memory
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 6
+EXACT output[1] = 0
+EXACT output[2] = 0
+EXACT output[3] = 0
diff --git a/tests/kernels/data-race/global_read_write_race.ref b/tests/kernels/data-race/global_read_write_race.ref
index 7e1c317..7f5a780 100644
--- a/tests/kernels/data-race/global_read_write_race.ref
+++ b/tests/kernels/data-race/global_read_write_race.ref
@@ -1,8 +1,8 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 0
- data[1] = 0
- data[2] = 0
- data[3] = 0
+ERROR Read-write data race at global memory
+ERROR Read-write data race at global memory
+EXACT Argument 'data': 16 bytes
+MATCH data[0] =
+MATCH data[1] =
+MATCH data[2] =
+MATCH data[3] =
diff --git a/tests/kernels/data-race/global_read_write_race.sim b/tests/kernels/data-race/global_read_write_race.sim
index 11077ab..fd262ec 100644
--- a/tests/kernels/data-race/global_read_write_race.sim
+++ b/tests/kernels/data-race/global_read_write_race.sim
@@ -1,6 +1,6 @@
global_read_write_race.cl
global_read_write_race
4 1 1
-1 1 1
+4 1 1
<size=16 range=0:1:3 dump>
diff --git a/tests/kernels/data-race/global_write_write_race.ref b/tests/kernels/data-race/global_write_write_race.ref
index 0b31b65..475e301 100644
--- a/tests/kernels/data-race/global_write_write_race.ref
+++ b/tests/kernels/data-race/global_write_write_race.ref
@@ -1,5 +1,6 @@
-ERROR EXPECTED
-
-Argument 'data': 4 bytes
- data[0] = 3
+ERROR Write-write data race at global memory
+ERROR Write-write data race at global memory
+ERROR Write-write data race at global memory
+EXACT Argument 'data': 4 bytes
+MATCH data[0] =
diff --git a/tests/kernels/data-race/increment.ref b/tests/kernels/data-race/increment.ref
index 11a20e6..3c1a851 100644
--- a/tests/kernels/data-race/increment.ref
+++ b/tests/kernels/data-race/increment.ref
@@ -1,7 +1,5 @@
-
-Argument 'data': 16 bytes
- data[0] = 1
- data[1] = 2
- data[2] = 3
- data[3] = 4
-
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 1
+EXACT data[1] = 2
+EXACT data[2] = 3
+EXACT data[3] = 4
diff --git a/tests/kernels/data-race/intergroup_hidden_race.ref b/tests/kernels/data-race/intergroup_hidden_race.ref
index 9390c4c..e210dc0 100644
--- a/tests/kernels/data-race/intergroup_hidden_race.ref
+++ b/tests/kernels/data-race/intergroup_hidden_race.ref
@@ -1,6 +1,5 @@
-ERROR EXPECTED
-
-Argument 'output': 8 bytes
- output[0] = 0
- output[1] = 0
+ERROR Read-write data race at global memory
+EXACT Argument 'output': 8 bytes
+MATCH output[0] =
+MATCH output[1] =
diff --git a/tests/kernels/data-race/intergroup_race.ref b/tests/kernels/data-race/intergroup_race.ref
index 4da13c4..7078eef 100644
--- a/tests/kernels/data-race/intergroup_race.ref
+++ b/tests/kernels/data-race/intergroup_race.ref
@@ -1,8 +1,5 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 0
- data[1] = 1
- data[2] = 2
- data[3] = 3
+ERROR Read-write data race at global memory
+EXACT Argument 'data': 8 bytes
+MATCH data[0] =
+MATCH data[1] =
diff --git a/tests/kernels/data-race/intergroup_race.sim b/tests/kernels/data-race/intergroup_race.sim
index 4e60c87..6a10dd8 100644
--- a/tests/kernels/data-race/intergroup_race.sim
+++ b/tests/kernels/data-race/intergroup_race.sim
@@ -1,6 +1,6 @@
intergroup_race.cl
intergroup_race
-16 1 1
+8 1 1
4 1 1
-<size=16 fill=0 dump>
+<size=8 fill=0 dump>
diff --git a/tests/kernels/data-race/intragroup_hidden_race.ref b/tests/kernels/data-race/intragroup_hidden_race.ref
index 7ff022b..102070c 100644
--- a/tests/kernels/data-race/intragroup_hidden_race.ref
+++ b/tests/kernels/data-race/intragroup_hidden_race.ref
@@ -1,6 +1,5 @@
-ERROR EXPECTED
-
-Argument 'output': 8 bytes
- output[0] = 42
- output[1] = 42
+ERROR Read-write data race at global memory
+EXACT Argument 'output': 8 bytes
+EXACT output[0] = 42
+EXACT output[1] = 42
diff --git a/tests/kernels/data-race/local_only_fence.ref b/tests/kernels/data-race/local_only_fence.ref
index b6b7f00..8f99004 100644
--- a/tests/kernels/data-race/local_only_fence.ref
+++ b/tests/kernels/data-race/local_only_fence.ref
@@ -1,8 +1,18 @@
-ERROR EXPECTED
-
-Argument 'output': 16 bytes
- output[0] = 6
- output[1] = 22
- output[2] = 38
- output[3] = 54
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+ERROR Read-write data race at global memory address
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 6
+EXACT output[1] = 22
+EXACT output[2] = 38
+EXACT output[3] = 54
diff --git a/tests/kernels/data-race/local_read_write_race.cl b/tests/kernels/data-race/local_read_write_race.cl
index bcc3ff8..3834fd4 100644
--- a/tests/kernels/data-race/local_read_write_race.cl
+++ b/tests/kernels/data-race/local_read_write_race.cl
@@ -1,6 +1,9 @@
kernel void local_read_write_race(global int *data, local int *scratch)
{
int l = get_local_id(0);
+ scratch[l] = 0;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
scratch[l] = l;
if (l == 0)
{
diff --git a/tests/kernels/data-race/local_read_write_race.ref b/tests/kernels/data-race/local_read_write_race.ref
index 0943b15..f4792d4 100644
--- a/tests/kernels/data-race/local_read_write_race.ref
+++ b/tests/kernels/data-race/local_read_write_race.ref
@@ -1,5 +1,6 @@
-ERROR EXPECETD
-
-Argument 'data': 4 bytes
- data[0] = 0
+ERROR Read-write data race at local memory
+ERROR Read-write data race at local memory
+ERROR Read-write data race at local memory
+EXACT Argument 'data': 4 bytes
+EXACT data[0] = 0
diff --git a/tests/kernels/data-race/local_write_write_race.ref b/tests/kernels/data-race/local_write_write_race.ref
index 3fe4e95..f094bf0 100644
--- a/tests/kernels/data-race/local_write_write_race.ref
+++ b/tests/kernels/data-race/local_write_write_race.ref
@@ -1,8 +1,9 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 3
- data[2] = 3
- data[3] = 3
+ERROR Write-write data race at local memory
+ERROR Write-write data race at local memory
+ERROR Write-write data race at local memory
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 3
+EXACT data[2] = 3
+EXACT data[3] = 3
diff --git a/tests/kernels/data-race/uniform_write_race.ref b/tests/kernels/data-race/uniform_write_race.ref
index b688113..3cdbf81 100644
--- a/tests/kernels/data-race/uniform_write_race.ref
+++ b/tests/kernels/data-race/uniform_write_race.ref
@@ -1,4 +1,2 @@
-
-Argument 'data': 4 bytes
- data[0] = 0
-
+EXACT Argument 'data': 4 bytes
+EXACT data[0] = 0
diff --git a/tests/kernels/memcheck/async_copy_out_of_bounds.ref b/tests/kernels/memcheck/async_copy_out_of_bounds.ref
index 9a8cb35..82c85c5 100644
--- a/tests/kernels/memcheck/async_copy_out_of_bounds.ref
+++ b/tests/kernels/memcheck/async_copy_out_of_bounds.ref
@@ -1,8 +1,7 @@
-ERROR EXPECTED
-
-Argument 'dst': 16 bytes
- dst[0] = 0
- dst[1] = 0
- dst[2] = 1
- dst[3] = 2
+ERROR Invalid write of size 4 at global memory
+EXACT Argument 'dst': 16 bytes
+EXACT dst[0] = 0
+EXACT dst[1] = 0
+EXACT dst[2] = 1
+EXACT dst[3] = 2
diff --git a/tests/kernels/memcheck/atomic_out_of_bounds.ref b/tests/kernels/memcheck/atomic_out_of_bounds.ref
index cfcff7d..3a26898 100644
--- a/tests/kernels/memcheck/atomic_out_of_bounds.ref
+++ b/tests/kernels/memcheck/atomic_out_of_bounds.ref
@@ -1,8 +1,9 @@
-ERROR EXPECTED
-
-Argument 'counters': 16 bytes
- counters[0] = 1
- counters[1] = 1
- counters[2] = 1
- counters[3] = 1
+ERROR Invalid read of size 4 at global memory
+ERROR Invalid write of size 4 at global memory
+ERROR Uninitialized value
+EXACT Argument 'counters': 16 bytes
+EXACT counters[0] = 1
+EXACT counters[1] = 1
+EXACT counters[2] = 1
+EXACT counters[3] = 1
diff --git a/tests/kernels/memcheck/casted_static_array.cl b/tests/kernels/memcheck/casted_static_array.cl
new file mode 100644
index 0000000..2519acb
--- /dev/null
+++ b/tests/kernels/memcheck/casted_static_array.cl
@@ -0,0 +1,31 @@
+void transparent_crc_no_string(ulong *p1, ulong p2) { *p1 += p2; }
+int get_linear_global_id() {
+ return (get_global_id(2) * get_global_size(1) + get_global_id(1)) *
+ get_global_size(0) +
+ get_global_id(0);
+}
+union U5 {
+ short f0;
+ int f3;
+};
+struct S6 {
+ union U5 g_75[5][7][2];
+ union U5 **g_91[78];
+};
+__kernel void casted_static_array(__global ulong *p1) {
+ int i, j, k;
+ struct S6 c_864;
+ struct S6 *p_863 = &c_864;
+ union U5 *p_863_6;
+ struct S6 c_865 = {{{{{0xD54EL}}}}, {&p_863_6}};
+ c_864 = c_865;
+ ulong crc64_context = i = 0;
+ for (; i < 9; i++) {
+ j = 0;
+ {
+ k = 0;
+ { transparent_crc_no_string(&crc64_context, p_863->g_75[i][j][k].f0); }
+ }
+ }
+ p1[get_linear_global_id()] = crc64_context;
+}
diff --git a/tests/kernels/memcheck/casted_static_array.ref b/tests/kernels/memcheck/casted_static_array.ref
new file mode 100644
index 0000000..b96430d
--- /dev/null
+++ b/tests/kernels/memcheck/casted_static_array.ref
@@ -0,0 +1,7 @@
+ERROR exceeds static array size
+ERROR exceeds static array size
+ERROR exceeds static array size
+ERROR exceeds static array size
+
+EXACT Argument 'p1': 8 bytes
+MATCH p1[0] =
diff --git a/tests/kernels/memcheck/casted_static_array.sim b/tests/kernels/memcheck/casted_static_array.sim
new file mode 100644
index 0000000..4edcd72
--- /dev/null
+++ b/tests/kernels/memcheck/casted_static_array.sim
@@ -0,0 +1,6 @@
+casted_static_array.cl
+casted_static_array
+1 1 1
+1 1 1
+
+<size=8 fill=0 dump>
diff --git a/tests/kernels/memcheck/dereference_null.ref b/tests/kernels/memcheck/dereference_null.ref
index 5a01471..3f8d021 100644
--- a/tests/kernels/memcheck/dereference_null.ref
+++ b/tests/kernels/memcheck/dereference_null.ref
@@ -1,5 +1,5 @@
-ERROR EXPECTED
-
-Argument 'output': 4 bytes
- output[0] = 0
+ERROR Invalid read of size 4 at global memory address 0x0
+ERROR Uninitialized value
+EXACT Argument 'output': 4 bytes
+EXACT output[0] = 0
diff --git a/tests/kernels/memcheck/fake_out_of_bounds.cl b/tests/kernels/memcheck/fake_out_of_bounds.cl
new file mode 100644
index 0000000..90fce03
--- /dev/null
+++ b/tests/kernels/memcheck/fake_out_of_bounds.cl
@@ -0,0 +1,12 @@
+struct S0 {
+ uchar f[1];
+ ulong g[4];
+};
+
+__kernel void entry(__global ulong *result) {
+ struct S0 s = {{1}, {2,3,4,5}};
+ struct S0 t = s;
+
+ volatile int i = 0;
+ *result = t.g[i];
+}
diff --git a/tests/kernels/memcheck/fake_out_of_bounds.ref b/tests/kernels/memcheck/fake_out_of_bounds.ref
new file mode 100644
index 0000000..d932a4a
--- /dev/null
+++ b/tests/kernels/memcheck/fake_out_of_bounds.ref
@@ -0,0 +1,2 @@
+EXACT Argument 'result': 8 bytes
+EXACT result[0] = 2
diff --git a/tests/kernels/memcheck/fake_out_of_bounds.sim b/tests/kernels/memcheck/fake_out_of_bounds.sim
new file mode 100644
index 0000000..23799e4
--- /dev/null
+++ b/tests/kernels/memcheck/fake_out_of_bounds.sim
@@ -0,0 +1,6 @@
+fake_out_of_bounds.cl
+entry
+1 1 1
+1 1 1
+
+<size=8 fill=0 dump>
diff --git a/tests/kernels/memcheck/read_out_of_bounds.ref b/tests/kernels/memcheck/read_out_of_bounds.ref
index 539c267..75a0f05 100644
--- a/tests/kernels/memcheck/read_out_of_bounds.ref
+++ b/tests/kernels/memcheck/read_out_of_bounds.ref
@@ -1,9 +1,10 @@
-ERROR EXPECTED
-
-Argument 'c': 20 bytes
- c[0] = 0
- c[1] = 2
- c[2] = 4
- c[3] = 6
- c[4] = 0
+ERROR Invalid read of size 4 at global memory
+ERROR Invalid read of size 4 at global memory
+ERROR Uninitialized value
+EXACT Argument 'c': 20 bytes
+EXACT c[0] = 0
+EXACT c[1] = 2
+EXACT c[2] = 4
+EXACT c[3] = 6
+EXACT c[4] = 0
diff --git a/tests/kernels/memcheck/read_write_only_memory.ref b/tests/kernels/memcheck/read_write_only_memory.ref
index cb933ab..90270d4 100644
--- a/tests/kernels/memcheck/read_write_only_memory.ref
+++ b/tests/kernels/memcheck/read_write_only_memory.ref
@@ -1,8 +1,10 @@
-ERROR EXPECTED
-
-Argument 'output': 16 bytes
- output[0] = 0
- output[1] = 1
- output[2] = 2
- output[3] = 3
+ERROR Invalid read from write-only buffer
+ERROR Invalid read from write-only buffer
+ERROR Invalid read from write-only buffer
+ERROR Invalid read from write-only buffer
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 0
+EXACT output[1] = 1
+EXACT output[2] = 2
+EXACT output[3] = 3
diff --git a/tests/kernels/memcheck/static_array.cl b/tests/kernels/memcheck/static_array.cl
new file mode 100644
index 0000000..973f86b
--- /dev/null
+++ b/tests/kernels/memcheck/static_array.cl
@@ -0,0 +1,13 @@
+struct S
+{
+ int a;
+ char b[2];
+};
+
+kernel void static_array(global char *output)
+{
+ volatile struct S s = {-1, {42, 7}};
+ int i = get_global_id(0);
+ s.b[i] = i;
+ output[i] = s.b[i];
+}
diff --git a/tests/kernels/memcheck/static_array.ref b/tests/kernels/memcheck/static_array.ref
new file mode 100644
index 0000000..a0a4901
--- /dev/null
+++ b/tests/kernels/memcheck/static_array.ref
@@ -0,0 +1,10 @@
+ERROR exceeds static array size
+ERROR exceeds static array size
+ERROR exceeds static array size
+ERROR exceeds static array size
+
+EXACT Argument 'output': 4 bytes
+EXACT output[0] = 0
+EXACT output[1] = 1
+MATCH output[2] =
+MATCH output[3] =
diff --git a/tests/kernels/memcheck/static_array.sim b/tests/kernels/memcheck/static_array.sim
new file mode 100644
index 0000000..cbfda15
--- /dev/null
+++ b/tests/kernels/memcheck/static_array.sim
@@ -0,0 +1,6 @@
+static_array.cl
+static_array
+4 1 1
+4 1 1
+
+<size=4 fill=0 dump>
diff --git a/tests/kernels/memcheck/static_array_padded_struct.cl b/tests/kernels/memcheck/static_array_padded_struct.cl
new file mode 100644
index 0000000..30afccc
--- /dev/null
+++ b/tests/kernels/memcheck/static_array_padded_struct.cl
@@ -0,0 +1,12 @@
+struct S
+{
+ int a;
+ char b[2];
+};
+
+kernel void static_array_padded_struct(global char *output)
+{
+ struct S s = {-1, {42, 7}};
+ int i = get_global_id(0);
+ output[i] = s.b[i];
+}
diff --git a/tests/kernels/memcheck/static_array_padded_struct.ref b/tests/kernels/memcheck/static_array_padded_struct.ref
new file mode 100644
index 0000000..d38dbb7
--- /dev/null
+++ b/tests/kernels/memcheck/static_array_padded_struct.ref
@@ -0,0 +1,8 @@
+ERROR exceeds static array size
+ERROR exceeds static array size
+
+EXACT Argument 'output': 4 bytes
+EXACT output[0] = 42
+EXACT output[1] = 7
+MATCH output[2] =
+MATCH output[3] =
diff --git a/tests/kernels/memcheck/static_array_padded_struct.sim b/tests/kernels/memcheck/static_array_padded_struct.sim
new file mode 100644
index 0000000..a3bd18e
--- /dev/null
+++ b/tests/kernels/memcheck/static_array_padded_struct.sim
@@ -0,0 +1,6 @@
+static_array_padded_struct.cl
+static_array_padded_struct
+4 1 1
+4 1 1
+
+<size=4 fill=0 dump>
diff --git a/tests/kernels/memcheck/write_out_of_bounds.ref b/tests/kernels/memcheck/write_out_of_bounds.ref
index 6412f26..c79217d 100644
--- a/tests/kernels/memcheck/write_out_of_bounds.ref
+++ b/tests/kernels/memcheck/write_out_of_bounds.ref
@@ -1,8 +1,7 @@
-ERROR EXPECTED
-
-Argument 'c': 16 bytes
- c[0] = 0
- c[1] = 2
- c[2] = 4
- c[3] = 6
+ERROR Invalid write of size 4 at global memory address
+EXACT Argument 'c': 16 bytes
+EXACT c[0] = 0
+EXACT c[1] = 2
+EXACT c[2] = 4
+EXACT c[3] = 6
diff --git a/tests/kernels/memcheck/write_read_only_memory.ref b/tests/kernels/memcheck/write_read_only_memory.ref
index cb933ab..c1b469c 100644
--- a/tests/kernels/memcheck/write_read_only_memory.ref
+++ b/tests/kernels/memcheck/write_read_only_memory.ref
@@ -1,8 +1,10 @@
-ERROR EXPECTED
-
-Argument 'output': 16 bytes
- output[0] = 0
- output[1] = 1
- output[2] = 2
- output[3] = 3
+ERROR Invalid write to read-only buffer
+ERROR Invalid write to read-only buffer
+ERROR Invalid write to read-only buffer
+ERROR Invalid write to read-only buffer
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 0
+EXACT output[1] = 1
+EXACT output[2] = 2
+EXACT output[3] = 3
diff --git a/tests/kernels/misc/array.ref b/tests/kernels/misc/array.ref
index 1a1d2d0..f999ca8 100644
--- a/tests/kernels/misc/array.ref
+++ b/tests/kernels/misc/array.ref
@@ -1,131 +1,129 @@
-
-Argument 'output': 1024 bytes
- output[0] = 0
- output[1] = 0
- output[2] = 0
- output[3] = 0
- output[4] = 0
- output[5] = 0
- output[6] = 0
- output[7] = 0
- output[8] = 0
- output[9] = 0
- output[10] = 0
- output[11] = 0
- output[12] = 0
- output[13] = 0
- output[14] = 0
- output[15] = 0
- output[16] = 1
- output[17] = 1
- output[18] = 1
- output[19] = 1
- output[20] = 1
- output[21] = 1
- output[22] = 1
- output[23] = 1
- output[24] = 1
- output[25] = 1
- output[26] = 1
- output[27] = 1
- output[28] = 1
- output[29] = 1
- output[30] = 1
- output[31] = 1
- output[32] = 2
- output[33] = 2
- output[34] = 2
- output[35] = 2
- output[36] = 2
- output[37] = 2
- output[38] = 2
- output[39] = 2
- output[40] = 2
- output[41] = 2
- output[42] = 2
- output[43] = 2
- output[44] = 2
- output[45] = 2
- output[46] = 2
- output[47] = 2
- output[48] = 3
- output[49] = 3
- output[50] = 3
- output[51] = 3
- output[52] = 3
- output[53] = 3
- output[54] = 3
- output[55] = 3
- output[56] = 3
- output[57] = 3
- output[58] = 3
- output[59] = 3
- output[60] = 3
- output[61] = 3
- output[62] = 3
- output[63] = 3
- output[64] = 4
- output[65] = 4
- output[66] = 4
- output[67] = 4
- output[68] = 4
- output[69] = 4
- output[70] = 4
- output[71] = 4
- output[72] = 4
- output[73] = 4
- output[74] = 4
- output[75] = 4
- output[76] = 4
- output[77] = 4
- output[78] = 4
- output[79] = 4
- output[80] = 5
- output[81] = 5
- output[82] = 5
- output[83] = 5
- output[84] = 5
- output[85] = 5
- output[86] = 5
- output[87] = 5
- output[88] = 5
- output[89] = 5
- output[90] = 5
- output[91] = 5
- output[92] = 5
- output[93] = 5
- output[94] = 5
- output[95] = 5
- output[96] = 6
- output[97] = 6
- output[98] = 6
- output[99] = 6
- output[100] = 6
- output[101] = 6
- output[102] = 6
- output[103] = 6
- output[104] = 6
- output[105] = 6
- output[106] = 6
- output[107] = 6
- output[108] = 6
- output[109] = 6
- output[110] = 6
- output[111] = 6
- output[112] = 7
- output[113] = 7
- output[114] = 7
- output[115] = 7
- output[116] = 7
- output[117] = 7
- output[118] = 7
- output[119] = 7
- output[120] = 7
- output[121] = 7
- output[122] = 7
- output[123] = 7
- output[124] = 7
- output[125] = 7
- output[126] = 7
- output[127] = 7
-
+EXACT Argument 'output': 1024 bytes
+EXACT output[0] = 0
+EXACT output[1] = 0
+EXACT output[2] = 0
+EXACT output[3] = 0
+EXACT output[4] = 0
+EXACT output[5] = 0
+EXACT output[6] = 0
+EXACT output[7] = 0
+EXACT output[8] = 0
+EXACT output[9] = 0
+EXACT output[10] = 0
+EXACT output[11] = 0
+EXACT output[12] = 0
+EXACT output[13] = 0
+EXACT output[14] = 0
+EXACT output[15] = 0
+EXACT output[16] = 1
+EXACT output[17] = 1
+EXACT output[18] = 1
+EXACT output[19] = 1
+EXACT output[20] = 1
+EXACT output[21] = 1
+EXACT output[22] = 1
+EXACT output[23] = 1
+EXACT output[24] = 1
+EXACT output[25] = 1
+EXACT output[26] = 1
+EXACT output[27] = 1
+EXACT output[28] = 1
+EXACT output[29] = 1
+EXACT output[30] = 1
+EXACT output[31] = 1
+EXACT output[32] = 2
+EXACT output[33] = 2
+EXACT output[34] = 2
+EXACT output[35] = 2
+EXACT output[36] = 2
+EXACT output[37] = 2
+EXACT output[38] = 2
+EXACT output[39] = 2
+EXACT output[40] = 2
+EXACT output[41] = 2
+EXACT output[42] = 2
+EXACT output[43] = 2
+EXACT output[44] = 2
+EXACT output[45] = 2
+EXACT output[46] = 2
+EXACT output[47] = 2
+EXACT output[48] = 3
+EXACT output[49] = 3
+EXACT output[50] = 3
+EXACT output[51] = 3
+EXACT output[52] = 3
+EXACT output[53] = 3
+EXACT output[54] = 3
+EXACT output[55] = 3
+EXACT output[56] = 3
+EXACT output[57] = 3
+EXACT output[58] = 3
+EXACT output[59] = 3
+EXACT output[60] = 3
+EXACT output[61] = 3
+EXACT output[62] = 3
+EXACT output[63] = 3
+EXACT output[64] = 4
+EXACT output[65] = 4
+EXACT output[66] = 4
+EXACT output[67] = 4
+EXACT output[68] = 4
+EXACT output[69] = 4
+EXACT output[70] = 4
+EXACT output[71] = 4
+EXACT output[72] = 4
+EXACT output[73] = 4
+EXACT output[74] = 4
+EXACT output[75] = 4
+EXACT output[76] = 4
+EXACT output[77] = 4
+EXACT output[78] = 4
+EXACT output[79] = 4
+EXACT output[80] = 5
+EXACT output[81] = 5
+EXACT output[82] = 5
+EXACT output[83] = 5
+EXACT output[84] = 5
+EXACT output[85] = 5
+EXACT output[86] = 5
+EXACT output[87] = 5
+EXACT output[88] = 5
+EXACT output[89] = 5
+EXACT output[90] = 5
+EXACT output[91] = 5
+EXACT output[92] = 5
+EXACT output[93] = 5
+EXACT output[94] = 5
+EXACT output[95] = 5
+EXACT output[96] = 6
+EXACT output[97] = 6
+EXACT output[98] = 6
+EXACT output[99] = 6
+EXACT output[100] = 6
+EXACT output[101] = 6
+EXACT output[102] = 6
+EXACT output[103] = 6
+EXACT output[104] = 6
+EXACT output[105] = 6
+EXACT output[106] = 6
+EXACT output[107] = 6
+EXACT output[108] = 6
+EXACT output[109] = 6
+EXACT output[110] = 6
+EXACT output[111] = 6
+EXACT output[112] = 7
+EXACT output[113] = 7
+EXACT output[114] = 7
+EXACT output[115] = 7
+EXACT output[116] = 7
+EXACT output[117] = 7
+EXACT output[118] = 7
+EXACT output[119] = 7
+EXACT output[120] = 7
+EXACT output[121] = 7
+EXACT output[122] = 7
+EXACT output[123] = 7
+EXACT output[124] = 7
+EXACT output[125] = 7
+EXACT output[126] = 7
+EXACT output[127] = 7
diff --git a/tests/kernels/misc/lvalue_loads.cl b/tests/kernels/misc/lvalue_loads.cl
new file mode 100644
index 0000000..eed8331
--- /dev/null
+++ b/tests/kernels/misc/lvalue_loads.cl
@@ -0,0 +1,119 @@
+typedef struct
+{
+ char a;
+ int b;
+ int c;
+ char d;
+} S;
+
+void va(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+ output[i].z = 42.f;
+}
+
+void vb(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+ output[i].z = 7.f;
+ output[i].y = 42.f;
+}
+
+void vc(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+ output[i].zy = (float2)(7.f,42.f);
+}
+
+void vd(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+ output[i].y = output[i].z;
+}
+
+void ve(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+ output[i].wzyx = output[i];
+}
+
+void vf(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+ output[i].zy = output[i].yz;
+}
+
+void vg(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+ output[i].wzyx = input[i];
+}
+
+void vh(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+ output[i].zy = input[i].yz;
+}
+
+void vi(global float4 *input, global float4 *output)
+{
+ int i = get_global_id(0);
+
+ float4 x = output[i];
+ x.z = 42.f;
+ output[i] = x;
+ output[i+1] = x;
+}
+
+void sa(global S *input, global S *output)
+{
+ int i = get_global_id(0);
+ output[i].c = 42;
+}
+
+void sb(global S *input, global S *output)
+{
+ int i = get_global_id(0);
+ output[i].c = output[i].b;
+}
+
+void sc(global S *input, global S *output)
+{
+ int i = get_global_id(0);
+ output[i].c = input[i].b;
+}
+
+kernel void lvalue_loads(
+ global float4 *vIn,
+ global float4 *vA,
+ global float4 *vB,
+ global float4 *vC,
+ global float4 *vD,
+ global float4 *vE,
+ global float4 *vF,
+ global float4 *vG,
+ global float4 *vH,
+ global float4 *vI,
+
+ global S *sIn,
+ global S *sA,
+ global S *sB,
+ global S *sC,
+
+ global float *nop
+ )
+{
+ va(vIn, vA);
+ vb(vIn, vB);
+ vc(vIn, vC);
+ vd(vIn, vD);
+ ve(vIn, vE);
+ vf(vIn, vF);
+ vg(vIn, vG);
+ vh(vIn, vH);
+ vi(vIn, vI);
+
+ sa(sIn, sA);
+ sb(sIn, sB);
+ sc(sIn, sC);
+}
diff --git a/tests/kernels/misc/lvalue_loads.ref b/tests/kernels/misc/lvalue_loads.ref
new file mode 100644
index 0000000..a2c7852
--- /dev/null
+++ b/tests/kernels/misc/lvalue_loads.ref
@@ -0,0 +1,75 @@
+EXACT Argument 'vA': 16 bytes
+EXACT vA[0] = 1
+EXACT vA[1] = 2
+EXACT vA[2] = 42
+EXACT vA[3] = 4
+
+EXACT Argument 'vB': 16 bytes
+EXACT vB[0] = 1
+EXACT vB[1] = 42
+EXACT vB[2] = 7
+EXACT vB[3] = 4
+
+EXACT Argument 'vC': 16 bytes
+EXACT vC[0] = 1
+EXACT vC[1] = 42
+EXACT vC[2] = 7
+EXACT vC[3] = 4
+
+EXACT Argument 'vD': 16 bytes
+EXACT vD[0] = 1
+EXACT vD[1] = 3
+EXACT vD[2] = 3
+EXACT vD[3] = 4
+
+EXACT Argument 'vE': 16 bytes
+EXACT vE[0] = 4
+EXACT vE[1] = 3
+EXACT vE[2] = 2
+EXACT vE[3] = 1
+
+EXACT Argument 'vF': 16 bytes
+EXACT vF[0] = 1
+EXACT vF[1] = 3
+EXACT vF[2] = 2
+EXACT vF[3] = 4
+
+EXACT Argument 'vG': 16 bytes
+EXACT vG[0] = 19
+EXACT vG[1] = 18
+EXACT vG[2] = 17
+EXACT vG[3] = 16
+
+EXACT Argument 'vH': 16 bytes
+EXACT vH[0] = 1
+EXACT vH[1] = 18
+EXACT vH[2] = 17
+EXACT vH[3] = 4
+
+EXACT Argument 'vI': 32 bytes
+EXACT vI[0] = 1
+EXACT vI[1] = 2
+EXACT vI[2] = 42
+EXACT vI[3] = 4
+EXACT vI[4] = 1
+EXACT vI[5] = 2
+EXACT vI[6] = 42
+EXACT vI[7] = 4
+
+EXACT Argument 'sA': 16 bytes
+EXACT sA[0] = 1
+EXACT sA[1] = 2
+EXACT sA[2] = 42
+EXACT sA[3] = 4
+
+EXACT Argument 'sB': 16 bytes
+EXACT sB[0] = 1
+EXACT sB[1] = 2
+EXACT sB[2] = 2
+EXACT sB[3] = 4
+
+EXACT Argument 'sC': 16 bytes
+EXACT sC[0] = 1
+EXACT sC[1] = 2
+EXACT sC[2] = 17
+EXACT sC[3] = 4
diff --git a/tests/kernels/misc/lvalue_loads.sim b/tests/kernels/misc/lvalue_loads.sim
new file mode 100644
index 0000000..66823ee
--- /dev/null
+++ b/tests/kernels/misc/lvalue_loads.sim
@@ -0,0 +1,29 @@
+lvalue_loads.cl
+lvalue_loads
+1 1 1
+1 1 1
+
+# Vector input
+<size=16 range=16:1:19>
+
+# Vector outputs
+<size=16 range=1:1:4 dump wo>
+<size=16 range=1:1:4 dump wo>
+<size=16 range=1:1:4 dump wo>
+<size=16 range=1:1:4 dump>
+<size=16 range=1:1:4 dump>
+<size=16 range=1:1:4 dump>
+<size=16 range=1:1:4 dump wo>
+<size=16 range=1:1:4 dump wo>
+<size=32 range=1:1:8 dump>
+
+# Structure input
+<size=16 int range=16:1:19>
+
+# Structure outputs
+<size=16 int range=1:1:4 dump wo>
+<size=16 int range=1:1:4 dump>
+<size=16 int range=1:1:4 dump wo>
+
+# NOP
+<size=0 fill=0>
diff --git a/tests/kernels/misc/program_scope_constant_array.cl b/tests/kernels/misc/program_scope_constant_array.cl
new file mode 100644
index 0000000..786b0b2
--- /dev/null
+++ b/tests/kernels/misc/program_scope_constant_array.cl
@@ -0,0 +1,7 @@
+constant int data[4] = {7, 42, 0, -1};
+
+kernel void program_scope_constant_array(global int *output)
+{
+ int i = get_global_id(0);
+ output[i] = data[i];
+}
diff --git a/tests/kernels/misc/program_scope_constant_array.ref b/tests/kernels/misc/program_scope_constant_array.ref
new file mode 100644
index 0000000..ef2df1c
--- /dev/null
+++ b/tests/kernels/misc/program_scope_constant_array.ref
@@ -0,0 +1,5 @@
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 7
+EXACT output[1] = 42
+EXACT output[2] = 0
+EXACT output[3] = -1
diff --git a/tests/kernels/misc/program_scope_constant_array.sim b/tests/kernels/misc/program_scope_constant_array.sim
new file mode 100644
index 0000000..1f6eecd
--- /dev/null
+++ b/tests/kernels/misc/program_scope_constant_array.sim
@@ -0,0 +1,6 @@
+program_scope_constant_array.cl
+program_scope_constant_array
+4 1 1
+1 1 1
+
+<size=16 fill=0 dump>
diff --git a/tests/kernels/misc/reduce.ref b/tests/kernels/misc/reduce.ref
index fa92b4e..3ebb5ea 100644
--- a/tests/kernels/misc/reduce.ref
+++ b/tests/kernels/misc/reduce.ref
@@ -1,4 +1,2 @@
-
-Argument 'result': 4 bytes
- result[0] = 120
-
+EXACT Argument 'result': 4 bytes
+EXACT result[0] = 120
diff --git a/tests/kernels/misc/vecadd.ref b/tests/kernels/misc/vecadd.ref
index 9fa7b4c..841371f 100644
--- a/tests/kernels/misc/vecadd.ref
+++ b/tests/kernels/misc/vecadd.ref
@@ -1,1027 +1,1025 @@
-
-Argument 'c': 4096 bytes
- c[0] = 0
- c[1] = 2
- c[2] = 4
- c[3] = 6
- c[4] = 8
- c[5] = 10
- c[6] = 12
- c[7] = 14
- c[8] = 16
- c[9] = 18
- c[10] = 20
- c[11] = 22
- c[12] = 24
- c[13] = 26
- c[14] = 28
- c[15] = 30
- c[16] = 32
- c[17] = 34
- c[18] = 36
- c[19] = 38
- c[20] = 40
- c[21] = 42
- c[22] = 44
- c[23] = 46
- c[24] = 48
- c[25] = 50
- c[26] = 52
- c[27] = 54
- c[28] = 56
- c[29] = 58
- c[30] = 60
- c[31] = 62
- c[32] = 64
- c[33] = 66
- c[34] = 68
- c[35] = 70
- c[36] = 72
- c[37] = 74
- c[38] = 76
- c[39] = 78
- c[40] = 80
- c[41] = 82
- c[42] = 84
- c[43] = 86
- c[44] = 88
- c[45] = 90
- c[46] = 92
- c[47] = 94
- c[48] = 96
- c[49] = 98
- c[50] = 100
- c[51] = 102
- c[52] = 104
- c[53] = 106
- c[54] = 108
- c[55] = 110
- c[56] = 112
- c[57] = 114
- c[58] = 116
- c[59] = 118
- c[60] = 120
- c[61] = 122
- c[62] = 124
- c[63] = 126
- c[64] = 128
- c[65] = 130
- c[66] = 132
- c[67] = 134
- c[68] = 136
- c[69] = 138
- c[70] = 140
- c[71] = 142
- c[72] = 144
- c[73] = 146
- c[74] = 148
- c[75] = 150
- c[76] = 152
- c[77] = 154
- c[78] = 156
- c[79] = 158
- c[80] = 160
- c[81] = 162
- c[82] = 164
- c[83] = 166
- c[84] = 168
- c[85] = 170
- c[86] = 172
- c[87] = 174
- c[88] = 176
- c[89] = 178
- c[90] = 180
- c[91] = 182
- c[92] = 184
- c[93] = 186
- c[94] = 188
- c[95] = 190
- c[96] = 192
- c[97] = 194
- c[98] = 196
- c[99] = 198
- c[100] = 200
- c[101] = 202
- c[102] = 204
- c[103] = 206
- c[104] = 208
- c[105] = 210
- c[106] = 212
- c[107] = 214
- c[108] = 216
- c[109] = 218
- c[110] = 220
- c[111] = 222
- c[112] = 224
- c[113] = 226
- c[114] = 228
- c[115] = 230
- c[116] = 232
- c[117] = 234
- c[118] = 236
- c[119] = 238
- c[120] = 240
- c[121] = 242
- c[122] = 244
- c[123] = 246
- c[124] = 248
- c[125] = 250
- c[126] = 252
- c[127] = 254
- c[128] = 256
- c[129] = 258
- c[130] = 260
- c[131] = 262
- c[132] = 264
- c[133] = 266
- c[134] = 268
- c[135] = 270
- c[136] = 272
- c[137] = 274
- c[138] = 276
- c[139] = 278
- c[140] = 280
- c[141] = 282
- c[142] = 284
- c[143] = 286
- c[144] = 288
- c[145] = 290
- c[146] = 292
- c[147] = 294
- c[148] = 296
- c[149] = 298
- c[150] = 300
- c[151] = 302
- c[152] = 304
- c[153] = 306
- c[154] = 308
- c[155] = 310
- c[156] = 312
- c[157] = 314
- c[158] = 316
- c[159] = 318
- c[160] = 320
- c[161] = 322
- c[162] = 324
- c[163] = 326
- c[164] = 328
- c[165] = 330
- c[166] = 332
- c[167] = 334
- c[168] = 336
- c[169] = 338
- c[170] = 340
- c[171] = 342
- c[172] = 344
- c[173] = 346
- c[174] = 348
- c[175] = 350
- c[176] = 352
- c[177] = 354
- c[178] = 356
- c[179] = 358
- c[180] = 360
- c[181] = 362
- c[182] = 364
- c[183] = 366
- c[184] = 368
- c[185] = 370
- c[186] = 372
- c[187] = 374
- c[188] = 376
- c[189] = 378
- c[190] = 380
- c[191] = 382
- c[192] = 384
- c[193] = 386
- c[194] = 388
- c[195] = 390
- c[196] = 392
- c[197] = 394
- c[198] = 396
- c[199] = 398
- c[200] = 400
- c[201] = 402
- c[202] = 404
- c[203] = 406
- c[204] = 408
- c[205] = 410
- c[206] = 412
- c[207] = 414
- c[208] = 416
- c[209] = 418
- c[210] = 420
- c[211] = 422
- c[212] = 424
- c[213] = 426
- c[214] = 428
- c[215] = 430
- c[216] = 432
- c[217] = 434
- c[218] = 436
- c[219] = 438
- c[220] = 440
- c[221] = 442
- c[222] = 444
- c[223] = 446
- c[224] = 448
- c[225] = 450
- c[226] = 452
- c[227] = 454
- c[228] = 456
- c[229] = 458
- c[230] = 460
- c[231] = 462
- c[232] = 464
- c[233] = 466
- c[234] = 468
- c[235] = 470
- c[236] = 472
- c[237] = 474
- c[238] = 476
- c[239] = 478
- c[240] = 480
- c[241] = 482
- c[242] = 484
- c[243] = 486
- c[244] = 488
- c[245] = 490
- c[246] = 492
- c[247] = 494
- c[248] = 496
- c[249] = 498
- c[250] = 500
- c[251] = 502
- c[252] = 504
- c[253] = 506
- c[254] = 508
- c[255] = 510
- c[256] = 512
- c[257] = 514
- c[258] = 516
- c[259] = 518
- c[260] = 520
- c[261] = 522
- c[262] = 524
- c[263] = 526
- c[264] = 528
- c[265] = 530
- c[266] = 532
- c[267] = 534
- c[268] = 536
- c[269] = 538
- c[270] = 540
- c[271] = 542
- c[272] = 544
- c[273] = 546
- c[274] = 548
- c[275] = 550
- c[276] = 552
- c[277] = 554
- c[278] = 556
- c[279] = 558
- c[280] = 560
- c[281] = 562
- c[282] = 564
- c[283] = 566
- c[284] = 568
- c[285] = 570
- c[286] = 572
- c[287] = 574
- c[288] = 576
- c[289] = 578
- c[290] = 580
- c[291] = 582
- c[292] = 584
- c[293] = 586
- c[294] = 588
- c[295] = 590
- c[296] = 592
- c[297] = 594
- c[298] = 596
- c[299] = 598
- c[300] = 600
- c[301] = 602
- c[302] = 604
- c[303] = 606
- c[304] = 608
- c[305] = 610
- c[306] = 612
- c[307] = 614
- c[308] = 616
- c[309] = 618
- c[310] = 620
- c[311] = 622
- c[312] = 624
- c[313] = 626
- c[314] = 628
- c[315] = 630
- c[316] = 632
- c[317] = 634
- c[318] = 636
- c[319] = 638
- c[320] = 640
- c[321] = 642
- c[322] = 644
- c[323] = 646
- c[324] = 648
- c[325] = 650
- c[326] = 652
- c[327] = 654
- c[328] = 656
- c[329] = 658
- c[330] = 660
- c[331] = 662
- c[332] = 664
- c[333] = 666
- c[334] = 668
- c[335] = 670
- c[336] = 672
- c[337] = 674
- c[338] = 676
- c[339] = 678
- c[340] = 680
- c[341] = 682
- c[342] = 684
- c[343] = 686
- c[344] = 688
- c[345] = 690
- c[346] = 692
- c[347] = 694
- c[348] = 696
- c[349] = 698
- c[350] = 700
- c[351] = 702
- c[352] = 704
- c[353] = 706
- c[354] = 708
- c[355] = 710
- c[356] = 712
- c[357] = 714
- c[358] = 716
- c[359] = 718
- c[360] = 720
- c[361] = 722
- c[362] = 724
- c[363] = 726
- c[364] = 728
- c[365] = 730
- c[366] = 732
- c[367] = 734
- c[368] = 736
- c[369] = 738
- c[370] = 740
- c[371] = 742
- c[372] = 744
- c[373] = 746
- c[374] = 748
- c[375] = 750
- c[376] = 752
- c[377] = 754
- c[378] = 756
- c[379] = 758
- c[380] = 760
- c[381] = 762
- c[382] = 764
- c[383] = 766
- c[384] = 768
- c[385] = 770
- c[386] = 772
- c[387] = 774
- c[388] = 776
- c[389] = 778
- c[390] = 780
- c[391] = 782
- c[392] = 784
- c[393] = 786
- c[394] = 788
- c[395] = 790
- c[396] = 792
- c[397] = 794
- c[398] = 796
- c[399] = 798
- c[400] = 800
- c[401] = 802
- c[402] = 804
- c[403] = 806
- c[404] = 808
- c[405] = 810
- c[406] = 812
- c[407] = 814
- c[408] = 816
- c[409] = 818
- c[410] = 820
- c[411] = 822
- c[412] = 824
- c[413] = 826
- c[414] = 828
- c[415] = 830
- c[416] = 832
- c[417] = 834
- c[418] = 836
- c[419] = 838
- c[420] = 840
- c[421] = 842
- c[422] = 844
- c[423] = 846
- c[424] = 848
- c[425] = 850
- c[426] = 852
- c[427] = 854
- c[428] = 856
- c[429] = 858
- c[430] = 860
- c[431] = 862
- c[432] = 864
- c[433] = 866
- c[434] = 868
- c[435] = 870
- c[436] = 872
- c[437] = 874
- c[438] = 876
- c[439] = 878
- c[440] = 880
- c[441] = 882
- c[442] = 884
- c[443] = 886
- c[444] = 888
- c[445] = 890
- c[446] = 892
- c[447] = 894
- c[448] = 896
- c[449] = 898
- c[450] = 900
- c[451] = 902
- c[452] = 904
- c[453] = 906
- c[454] = 908
- c[455] = 910
- c[456] = 912
- c[457] = 914
- c[458] = 916
- c[459] = 918
- c[460] = 920
- c[461] = 922
- c[462] = 924
- c[463] = 926
- c[464] = 928
- c[465] = 930
- c[466] = 932
- c[467] = 934
- c[468] = 936
- c[469] = 938
- c[470] = 940
- c[471] = 942
- c[472] = 944
- c[473] = 946
- c[474] = 948
- c[475] = 950
- c[476] = 952
- c[477] = 954
- c[478] = 956
- c[479] = 958
- c[480] = 960
- c[481] = 962
- c[482] = 964
- c[483] = 966
- c[484] = 968
- c[485] = 970
- c[486] = 972
- c[487] = 974
- c[488] = 976
- c[489] = 978
- c[490] = 980
- c[491] = 982
- c[492] = 984
- c[493] = 986
- c[494] = 988
- c[495] = 990
- c[496] = 992
- c[497] = 994
- c[498] = 996
- c[499] = 998
- c[500] = 1000
- c[501] = 1002
- c[502] = 1004
- c[503] = 1006
- c[504] = 1008
- c[505] = 1010
- c[506] = 1012
- c[507] = 1014
- c[508] = 1016
- c[509] = 1018
- c[510] = 1020
- c[511] = 1022
- c[512] = 1024
- c[513] = 1026
- c[514] = 1028
- c[515] = 1030
- c[516] = 1032
- c[517] = 1034
- c[518] = 1036
- c[519] = 1038
- c[520] = 1040
- c[521] = 1042
- c[522] = 1044
- c[523] = 1046
- c[524] = 1048
- c[525] = 1050
- c[526] = 1052
- c[527] = 1054
- c[528] = 1056
- c[529] = 1058
- c[530] = 1060
- c[531] = 1062
- c[532] = 1064
- c[533] = 1066
- c[534] = 1068
- c[535] = 1070
- c[536] = 1072
- c[537] = 1074
- c[538] = 1076
- c[539] = 1078
- c[540] = 1080
- c[541] = 1082
- c[542] = 1084
- c[543] = 1086
- c[544] = 1088
- c[545] = 1090
- c[546] = 1092
- c[547] = 1094
- c[548] = 1096
- c[549] = 1098
- c[550] = 1100
- c[551] = 1102
- c[552] = 1104
- c[553] = 1106
- c[554] = 1108
- c[555] = 1110
- c[556] = 1112
- c[557] = 1114
- c[558] = 1116
- c[559] = 1118
- c[560] = 1120
- c[561] = 1122
- c[562] = 1124
- c[563] = 1126
- c[564] = 1128
- c[565] = 1130
- c[566] = 1132
- c[567] = 1134
- c[568] = 1136
- c[569] = 1138
- c[570] = 1140
- c[571] = 1142
- c[572] = 1144
- c[573] = 1146
- c[574] = 1148
- c[575] = 1150
- c[576] = 1152
- c[577] = 1154
- c[578] = 1156
- c[579] = 1158
- c[580] = 1160
- c[581] = 1162
- c[582] = 1164
- c[583] = 1166
- c[584] = 1168
- c[585] = 1170
- c[586] = 1172
- c[587] = 1174
- c[588] = 1176
- c[589] = 1178
- c[590] = 1180
- c[591] = 1182
- c[592] = 1184
- c[593] = 1186
- c[594] = 1188
- c[595] = 1190
- c[596] = 1192
- c[597] = 1194
- c[598] = 1196
- c[599] = 1198
- c[600] = 1200
- c[601] = 1202
- c[602] = 1204
- c[603] = 1206
- c[604] = 1208
- c[605] = 1210
- c[606] = 1212
- c[607] = 1214
- c[608] = 1216
- c[609] = 1218
- c[610] = 1220
- c[611] = 1222
- c[612] = 1224
- c[613] = 1226
- c[614] = 1228
- c[615] = 1230
- c[616] = 1232
- c[617] = 1234
- c[618] = 1236
- c[619] = 1238
- c[620] = 1240
- c[621] = 1242
- c[622] = 1244
- c[623] = 1246
- c[624] = 1248
- c[625] = 1250
- c[626] = 1252
- c[627] = 1254
- c[628] = 1256
- c[629] = 1258
- c[630] = 1260
- c[631] = 1262
- c[632] = 1264
- c[633] = 1266
- c[634] = 1268
- c[635] = 1270
- c[636] = 1272
- c[637] = 1274
- c[638] = 1276
- c[639] = 1278
- c[640] = 1280
- c[641] = 1282
- c[642] = 1284
- c[643] = 1286
- c[644] = 1288
- c[645] = 1290
- c[646] = 1292
- c[647] = 1294
- c[648] = 1296
- c[649] = 1298
- c[650] = 1300
- c[651] = 1302
- c[652] = 1304
- c[653] = 1306
- c[654] = 1308
- c[655] = 1310
- c[656] = 1312
- c[657] = 1314
- c[658] = 1316
- c[659] = 1318
- c[660] = 1320
- c[661] = 1322
- c[662] = 1324
- c[663] = 1326
- c[664] = 1328
- c[665] = 1330
- c[666] = 1332
- c[667] = 1334
- c[668] = 1336
- c[669] = 1338
- c[670] = 1340
- c[671] = 1342
- c[672] = 1344
- c[673] = 1346
- c[674] = 1348
- c[675] = 1350
- c[676] = 1352
- c[677] = 1354
- c[678] = 1356
- c[679] = 1358
- c[680] = 1360
- c[681] = 1362
- c[682] = 1364
- c[683] = 1366
- c[684] = 1368
- c[685] = 1370
- c[686] = 1372
- c[687] = 1374
- c[688] = 1376
- c[689] = 1378
- c[690] = 1380
- c[691] = 1382
- c[692] = 1384
- c[693] = 1386
- c[694] = 1388
- c[695] = 1390
- c[696] = 1392
- c[697] = 1394
- c[698] = 1396
- c[699] = 1398
- c[700] = 1400
- c[701] = 1402
- c[702] = 1404
- c[703] = 1406
- c[704] = 1408
- c[705] = 1410
- c[706] = 1412
- c[707] = 1414
- c[708] = 1416
- c[709] = 1418
- c[710] = 1420
- c[711] = 1422
- c[712] = 1424
- c[713] = 1426
- c[714] = 1428
- c[715] = 1430
- c[716] = 1432
- c[717] = 1434
- c[718] = 1436
- c[719] = 1438
- c[720] = 1440
- c[721] = 1442
- c[722] = 1444
- c[723] = 1446
- c[724] = 1448
- c[725] = 1450
- c[726] = 1452
- c[727] = 1454
- c[728] = 1456
- c[729] = 1458
- c[730] = 1460
- c[731] = 1462
- c[732] = 1464
- c[733] = 1466
- c[734] = 1468
- c[735] = 1470
- c[736] = 1472
- c[737] = 1474
- c[738] = 1476
- c[739] = 1478
- c[740] = 1480
- c[741] = 1482
- c[742] = 1484
- c[743] = 1486
- c[744] = 1488
- c[745] = 1490
- c[746] = 1492
- c[747] = 1494
- c[748] = 1496
- c[749] = 1498
- c[750] = 1500
- c[751] = 1502
- c[752] = 1504
- c[753] = 1506
- c[754] = 1508
- c[755] = 1510
- c[756] = 1512
- c[757] = 1514
- c[758] = 1516
- c[759] = 1518
- c[760] = 1520
- c[761] = 1522
- c[762] = 1524
- c[763] = 1526
- c[764] = 1528
- c[765] = 1530
- c[766] = 1532
- c[767] = 1534
- c[768] = 1536
- c[769] = 1538
- c[770] = 1540
- c[771] = 1542
- c[772] = 1544
- c[773] = 1546
- c[774] = 1548
- c[775] = 1550
- c[776] = 1552
- c[777] = 1554
- c[778] = 1556
- c[779] = 1558
- c[780] = 1560
- c[781] = 1562
- c[782] = 1564
- c[783] = 1566
- c[784] = 1568
- c[785] = 1570
- c[786] = 1572
- c[787] = 1574
- c[788] = 1576
- c[789] = 1578
- c[790] = 1580
- c[791] = 1582
- c[792] = 1584
- c[793] = 1586
- c[794] = 1588
- c[795] = 1590
- c[796] = 1592
- c[797] = 1594
- c[798] = 1596
- c[799] = 1598
- c[800] = 1600
- c[801] = 1602
- c[802] = 1604
- c[803] = 1606
- c[804] = 1608
- c[805] = 1610
- c[806] = 1612
- c[807] = 1614
- c[808] = 1616
- c[809] = 1618
- c[810] = 1620
- c[811] = 1622
- c[812] = 1624
- c[813] = 1626
- c[814] = 1628
- c[815] = 1630
- c[816] = 1632
- c[817] = 1634
- c[818] = 1636
- c[819] = 1638
- c[820] = 1640
- c[821] = 1642
- c[822] = 1644
- c[823] = 1646
- c[824] = 1648
- c[825] = 1650
- c[826] = 1652
- c[827] = 1654
- c[828] = 1656
- c[829] = 1658
- c[830] = 1660
- c[831] = 1662
- c[832] = 1664
- c[833] = 1666
- c[834] = 1668
- c[835] = 1670
- c[836] = 1672
- c[837] = 1674
- c[838] = 1676
- c[839] = 1678
- c[840] = 1680
- c[841] = 1682
- c[842] = 1684
- c[843] = 1686
- c[844] = 1688
- c[845] = 1690
- c[846] = 1692
- c[847] = 1694
- c[848] = 1696
- c[849] = 1698
- c[850] = 1700
- c[851] = 1702
- c[852] = 1704
- c[853] = 1706
- c[854] = 1708
- c[855] = 1710
- c[856] = 1712
- c[857] = 1714
- c[858] = 1716
- c[859] = 1718
- c[860] = 1720
- c[861] = 1722
- c[862] = 1724
- c[863] = 1726
- c[864] = 1728
- c[865] = 1730
- c[866] = 1732
- c[867] = 1734
- c[868] = 1736
- c[869] = 1738
- c[870] = 1740
- c[871] = 1742
- c[872] = 1744
- c[873] = 1746
- c[874] = 1748
- c[875] = 1750
- c[876] = 1752
- c[877] = 1754
- c[878] = 1756
- c[879] = 1758
- c[880] = 1760
- c[881] = 1762
- c[882] = 1764
- c[883] = 1766
- c[884] = 1768
- c[885] = 1770
- c[886] = 1772
- c[887] = 1774
- c[888] = 1776
- c[889] = 1778
- c[890] = 1780
- c[891] = 1782
- c[892] = 1784
- c[893] = 1786
- c[894] = 1788
- c[895] = 1790
- c[896] = 1792
- c[897] = 1794
- c[898] = 1796
- c[899] = 1798
- c[900] = 1800
- c[901] = 1802
- c[902] = 1804
- c[903] = 1806
- c[904] = 1808
- c[905] = 1810
- c[906] = 1812
- c[907] = 1814
- c[908] = 1816
- c[909] = 1818
- c[910] = 1820
- c[911] = 1822
- c[912] = 1824
- c[913] = 1826
- c[914] = 1828
- c[915] = 1830
- c[916] = 1832
- c[917] = 1834
- c[918] = 1836
- c[919] = 1838
- c[920] = 1840
- c[921] = 1842
- c[922] = 1844
- c[923] = 1846
- c[924] = 1848
- c[925] = 1850
- c[926] = 1852
- c[927] = 1854
- c[928] = 1856
- c[929] = 1858
- c[930] = 1860
- c[931] = 1862
- c[932] = 1864
- c[933] = 1866
- c[934] = 1868
- c[935] = 1870
- c[936] = 1872
- c[937] = 1874
- c[938] = 1876
- c[939] = 1878
- c[940] = 1880
- c[941] = 1882
- c[942] = 1884
- c[943] = 1886
- c[944] = 1888
- c[945] = 1890
- c[946] = 1892
- c[947] = 1894
- c[948] = 1896
- c[949] = 1898
- c[950] = 1900
- c[951] = 1902
- c[952] = 1904
- c[953] = 1906
- c[954] = 1908
- c[955] = 1910
- c[956] = 1912
- c[957] = 1914
- c[958] = 1916
- c[959] = 1918
- c[960] = 1920
- c[961] = 1922
- c[962] = 1924
- c[963] = 1926
- c[964] = 1928
- c[965] = 1930
- c[966] = 1932
- c[967] = 1934
- c[968] = 1936
- c[969] = 1938
- c[970] = 1940
- c[971] = 1942
- c[972] = 1944
- c[973] = 1946
- c[974] = 1948
- c[975] = 1950
- c[976] = 1952
- c[977] = 1954
- c[978] = 1956
- c[979] = 1958
- c[980] = 1960
- c[981] = 1962
- c[982] = 1964
- c[983] = 1966
- c[984] = 1968
- c[985] = 1970
- c[986] = 1972
- c[987] = 1974
- c[988] = 1976
- c[989] = 1978
- c[990] = 1980
- c[991] = 1982
- c[992] = 1984
- c[993] = 1986
- c[994] = 1988
- c[995] = 1990
- c[996] = 1992
- c[997] = 1994
- c[998] = 1996
- c[999] = 1998
- c[1000] = 2000
- c[1001] = 2002
- c[1002] = 2004
- c[1003] = 2006
- c[1004] = 2008
- c[1005] = 2010
- c[1006] = 2012
- c[1007] = 2014
- c[1008] = 2016
- c[1009] = 2018
- c[1010] = 2020
- c[1011] = 2022
- c[1012] = 2024
- c[1013] = 2026
- c[1014] = 2028
- c[1015] = 2030
- c[1016] = 2032
- c[1017] = 2034
- c[1018] = 2036
- c[1019] = 2038
- c[1020] = 2040
- c[1021] = 2042
- c[1022] = 2044
- c[1023] = 2046
-
+EXACT Argument 'c': 4096 bytes
+EXACT c[0] = 0
+EXACT c[1] = 2
+EXACT c[2] = 4
+EXACT c[3] = 6
+EXACT c[4] = 8
+EXACT c[5] = 10
+EXACT c[6] = 12
+EXACT c[7] = 14
+EXACT c[8] = 16
+EXACT c[9] = 18
+EXACT c[10] = 20
+EXACT c[11] = 22
+EXACT c[12] = 24
+EXACT c[13] = 26
+EXACT c[14] = 28
+EXACT c[15] = 30
+EXACT c[16] = 32
+EXACT c[17] = 34
+EXACT c[18] = 36
+EXACT c[19] = 38
+EXACT c[20] = 40
+EXACT c[21] = 42
+EXACT c[22] = 44
+EXACT c[23] = 46
+EXACT c[24] = 48
+EXACT c[25] = 50
+EXACT c[26] = 52
+EXACT c[27] = 54
+EXACT c[28] = 56
+EXACT c[29] = 58
+EXACT c[30] = 60
+EXACT c[31] = 62
+EXACT c[32] = 64
+EXACT c[33] = 66
+EXACT c[34] = 68
+EXACT c[35] = 70
+EXACT c[36] = 72
+EXACT c[37] = 74
+EXACT c[38] = 76
+EXACT c[39] = 78
+EXACT c[40] = 80
+EXACT c[41] = 82
+EXACT c[42] = 84
+EXACT c[43] = 86
+EXACT c[44] = 88
+EXACT c[45] = 90
+EXACT c[46] = 92
+EXACT c[47] = 94
+EXACT c[48] = 96
+EXACT c[49] = 98
+EXACT c[50] = 100
+EXACT c[51] = 102
+EXACT c[52] = 104
+EXACT c[53] = 106
+EXACT c[54] = 108
+EXACT c[55] = 110
+EXACT c[56] = 112
+EXACT c[57] = 114
+EXACT c[58] = 116
+EXACT c[59] = 118
+EXACT c[60] = 120
+EXACT c[61] = 122
+EXACT c[62] = 124
+EXACT c[63] = 126
+EXACT c[64] = 128
+EXACT c[65] = 130
+EXACT c[66] = 132
+EXACT c[67] = 134
+EXACT c[68] = 136
+EXACT c[69] = 138
+EXACT c[70] = 140
+EXACT c[71] = 142
+EXACT c[72] = 144
+EXACT c[73] = 146
+EXACT c[74] = 148
+EXACT c[75] = 150
+EXACT c[76] = 152
+EXACT c[77] = 154
+EXACT c[78] = 156
+EXACT c[79] = 158
+EXACT c[80] = 160
+EXACT c[81] = 162
+EXACT c[82] = 164
+EXACT c[83] = 166
+EXACT c[84] = 168
+EXACT c[85] = 170
+EXACT c[86] = 172
+EXACT c[87] = 174
+EXACT c[88] = 176
+EXACT c[89] = 178
+EXACT c[90] = 180
+EXACT c[91] = 182
+EXACT c[92] = 184
+EXACT c[93] = 186
+EXACT c[94] = 188
+EXACT c[95] = 190
+EXACT c[96] = 192
+EXACT c[97] = 194
+EXACT c[98] = 196
+EXACT c[99] = 198
+EXACT c[100] = 200
+EXACT c[101] = 202
+EXACT c[102] = 204
+EXACT c[103] = 206
+EXACT c[104] = 208
+EXACT c[105] = 210
+EXACT c[106] = 212
+EXACT c[107] = 214
+EXACT c[108] = 216
+EXACT c[109] = 218
+EXACT c[110] = 220
+EXACT c[111] = 222
+EXACT c[112] = 224
+EXACT c[113] = 226
+EXACT c[114] = 228
+EXACT c[115] = 230
+EXACT c[116] = 232
+EXACT c[117] = 234
+EXACT c[118] = 236
+EXACT c[119] = 238
+EXACT c[120] = 240
+EXACT c[121] = 242
+EXACT c[122] = 244
+EXACT c[123] = 246
+EXACT c[124] = 248
+EXACT c[125] = 250
+EXACT c[126] = 252
+EXACT c[127] = 254
+EXACT c[128] = 256
+EXACT c[129] = 258
+EXACT c[130] = 260
+EXACT c[131] = 262
+EXACT c[132] = 264
+EXACT c[133] = 266
+EXACT c[134] = 268
+EXACT c[135] = 270
+EXACT c[136] = 272
+EXACT c[137] = 274
+EXACT c[138] = 276
+EXACT c[139] = 278
+EXACT c[140] = 280
+EXACT c[141] = 282
+EXACT c[142] = 284
+EXACT c[143] = 286
+EXACT c[144] = 288
+EXACT c[145] = 290
+EXACT c[146] = 292
+EXACT c[147] = 294
+EXACT c[148] = 296
+EXACT c[149] = 298
+EXACT c[150] = 300
+EXACT c[151] = 302
+EXACT c[152] = 304
+EXACT c[153] = 306
+EXACT c[154] = 308
+EXACT c[155] = 310
+EXACT c[156] = 312
+EXACT c[157] = 314
+EXACT c[158] = 316
+EXACT c[159] = 318
+EXACT c[160] = 320
+EXACT c[161] = 322
+EXACT c[162] = 324
+EXACT c[163] = 326
+EXACT c[164] = 328
+EXACT c[165] = 330
+EXACT c[166] = 332
+EXACT c[167] = 334
+EXACT c[168] = 336
+EXACT c[169] = 338
+EXACT c[170] = 340
+EXACT c[171] = 342
+EXACT c[172] = 344
+EXACT c[173] = 346
+EXACT c[174] = 348
+EXACT c[175] = 350
+EXACT c[176] = 352
+EXACT c[177] = 354
+EXACT c[178] = 356
+EXACT c[179] = 358
+EXACT c[180] = 360
+EXACT c[181] = 362
+EXACT c[182] = 364
+EXACT c[183] = 366
+EXACT c[184] = 368
+EXACT c[185] = 370
+EXACT c[186] = 372
+EXACT c[187] = 374
+EXACT c[188] = 376
+EXACT c[189] = 378
+EXACT c[190] = 380
+EXACT c[191] = 382
+EXACT c[192] = 384
+EXACT c[193] = 386
+EXACT c[194] = 388
+EXACT c[195] = 390
+EXACT c[196] = 392
+EXACT c[197] = 394
+EXACT c[198] = 396
+EXACT c[199] = 398
+EXACT c[200] = 400
+EXACT c[201] = 402
+EXACT c[202] = 404
+EXACT c[203] = 406
+EXACT c[204] = 408
+EXACT c[205] = 410
+EXACT c[206] = 412
+EXACT c[207] = 414
+EXACT c[208] = 416
+EXACT c[209] = 418
+EXACT c[210] = 420
+EXACT c[211] = 422
+EXACT c[212] = 424
+EXACT c[213] = 426
+EXACT c[214] = 428
+EXACT c[215] = 430
+EXACT c[216] = 432
+EXACT c[217] = 434
+EXACT c[218] = 436
+EXACT c[219] = 438
+EXACT c[220] = 440
+EXACT c[221] = 442
+EXACT c[222] = 444
+EXACT c[223] = 446
+EXACT c[224] = 448
+EXACT c[225] = 450
+EXACT c[226] = 452
+EXACT c[227] = 454
+EXACT c[228] = 456
+EXACT c[229] = 458
+EXACT c[230] = 460
+EXACT c[231] = 462
+EXACT c[232] = 464
+EXACT c[233] = 466
+EXACT c[234] = 468
+EXACT c[235] = 470
+EXACT c[236] = 472
+EXACT c[237] = 474
+EXACT c[238] = 476
+EXACT c[239] = 478
+EXACT c[240] = 480
+EXACT c[241] = 482
+EXACT c[242] = 484
+EXACT c[243] = 486
+EXACT c[244] = 488
+EXACT c[245] = 490
+EXACT c[246] = 492
+EXACT c[247] = 494
+EXACT c[248] = 496
+EXACT c[249] = 498
+EXACT c[250] = 500
+EXACT c[251] = 502
+EXACT c[252] = 504
+EXACT c[253] = 506
+EXACT c[254] = 508
+EXACT c[255] = 510
+EXACT c[256] = 512
+EXACT c[257] = 514
+EXACT c[258] = 516
+EXACT c[259] = 518
+EXACT c[260] = 520
+EXACT c[261] = 522
+EXACT c[262] = 524
+EXACT c[263] = 526
+EXACT c[264] = 528
+EXACT c[265] = 530
+EXACT c[266] = 532
+EXACT c[267] = 534
+EXACT c[268] = 536
+EXACT c[269] = 538
+EXACT c[270] = 540
+EXACT c[271] = 542
+EXACT c[272] = 544
+EXACT c[273] = 546
+EXACT c[274] = 548
+EXACT c[275] = 550
+EXACT c[276] = 552
+EXACT c[277] = 554
+EXACT c[278] = 556
+EXACT c[279] = 558
+EXACT c[280] = 560
+EXACT c[281] = 562
+EXACT c[282] = 564
+EXACT c[283] = 566
+EXACT c[284] = 568
+EXACT c[285] = 570
+EXACT c[286] = 572
+EXACT c[287] = 574
+EXACT c[288] = 576
+EXACT c[289] = 578
+EXACT c[290] = 580
+EXACT c[291] = 582
+EXACT c[292] = 584
+EXACT c[293] = 586
+EXACT c[294] = 588
+EXACT c[295] = 590
+EXACT c[296] = 592
+EXACT c[297] = 594
+EXACT c[298] = 596
+EXACT c[299] = 598
+EXACT c[300] = 600
+EXACT c[301] = 602
+EXACT c[302] = 604
+EXACT c[303] = 606
+EXACT c[304] = 608
+EXACT c[305] = 610
+EXACT c[306] = 612
+EXACT c[307] = 614
+EXACT c[308] = 616
+EXACT c[309] = 618
+EXACT c[310] = 620
+EXACT c[311] = 622
+EXACT c[312] = 624
+EXACT c[313] = 626
+EXACT c[314] = 628
+EXACT c[315] = 630
+EXACT c[316] = 632
+EXACT c[317] = 634
+EXACT c[318] = 636
+EXACT c[319] = 638
+EXACT c[320] = 640
+EXACT c[321] = 642
+EXACT c[322] = 644
+EXACT c[323] = 646
+EXACT c[324] = 648
+EXACT c[325] = 650
+EXACT c[326] = 652
+EXACT c[327] = 654
+EXACT c[328] = 656
+EXACT c[329] = 658
+EXACT c[330] = 660
+EXACT c[331] = 662
+EXACT c[332] = 664
+EXACT c[333] = 666
+EXACT c[334] = 668
+EXACT c[335] = 670
+EXACT c[336] = 672
+EXACT c[337] = 674
+EXACT c[338] = 676
+EXACT c[339] = 678
+EXACT c[340] = 680
+EXACT c[341] = 682
+EXACT c[342] = 684
+EXACT c[343] = 686
+EXACT c[344] = 688
+EXACT c[345] = 690
+EXACT c[346] = 692
+EXACT c[347] = 694
+EXACT c[348] = 696
+EXACT c[349] = 698
+EXACT c[350] = 700
+EXACT c[351] = 702
+EXACT c[352] = 704
+EXACT c[353] = 706
+EXACT c[354] = 708
+EXACT c[355] = 710
+EXACT c[356] = 712
+EXACT c[357] = 714
+EXACT c[358] = 716
+EXACT c[359] = 718
+EXACT c[360] = 720
+EXACT c[361] = 722
+EXACT c[362] = 724
+EXACT c[363] = 726
+EXACT c[364] = 728
+EXACT c[365] = 730
+EXACT c[366] = 732
+EXACT c[367] = 734
+EXACT c[368] = 736
+EXACT c[369] = 738
+EXACT c[370] = 740
+EXACT c[371] = 742
+EXACT c[372] = 744
+EXACT c[373] = 746
+EXACT c[374] = 748
+EXACT c[375] = 750
+EXACT c[376] = 752
+EXACT c[377] = 754
+EXACT c[378] = 756
+EXACT c[379] = 758
+EXACT c[380] = 760
+EXACT c[381] = 762
+EXACT c[382] = 764
+EXACT c[383] = 766
+EXACT c[384] = 768
+EXACT c[385] = 770
+EXACT c[386] = 772
+EXACT c[387] = 774
+EXACT c[388] = 776
+EXACT c[389] = 778
+EXACT c[390] = 780
+EXACT c[391] = 782
+EXACT c[392] = 784
+EXACT c[393] = 786
+EXACT c[394] = 788
+EXACT c[395] = 790
+EXACT c[396] = 792
+EXACT c[397] = 794
+EXACT c[398] = 796
+EXACT c[399] = 798
+EXACT c[400] = 800
+EXACT c[401] = 802
+EXACT c[402] = 804
+EXACT c[403] = 806
+EXACT c[404] = 808
+EXACT c[405] = 810
+EXACT c[406] = 812
+EXACT c[407] = 814
+EXACT c[408] = 816
+EXACT c[409] = 818
+EXACT c[410] = 820
+EXACT c[411] = 822
+EXACT c[412] = 824
+EXACT c[413] = 826
+EXACT c[414] = 828
+EXACT c[415] = 830
+EXACT c[416] = 832
+EXACT c[417] = 834
+EXACT c[418] = 836
+EXACT c[419] = 838
+EXACT c[420] = 840
+EXACT c[421] = 842
+EXACT c[422] = 844
+EXACT c[423] = 846
+EXACT c[424] = 848
+EXACT c[425] = 850
+EXACT c[426] = 852
+EXACT c[427] = 854
+EXACT c[428] = 856
+EXACT c[429] = 858
+EXACT c[430] = 860
+EXACT c[431] = 862
+EXACT c[432] = 864
+EXACT c[433] = 866
+EXACT c[434] = 868
+EXACT c[435] = 870
+EXACT c[436] = 872
+EXACT c[437] = 874
+EXACT c[438] = 876
+EXACT c[439] = 878
+EXACT c[440] = 880
+EXACT c[441] = 882
+EXACT c[442] = 884
+EXACT c[443] = 886
+EXACT c[444] = 888
+EXACT c[445] = 890
+EXACT c[446] = 892
+EXACT c[447] = 894
+EXACT c[448] = 896
+EXACT c[449] = 898
+EXACT c[450] = 900
+EXACT c[451] = 902
+EXACT c[452] = 904
+EXACT c[453] = 906
+EXACT c[454] = 908
+EXACT c[455] = 910
+EXACT c[456] = 912
+EXACT c[457] = 914
+EXACT c[458] = 916
+EXACT c[459] = 918
+EXACT c[460] = 920
+EXACT c[461] = 922
+EXACT c[462] = 924
+EXACT c[463] = 926
+EXACT c[464] = 928
+EXACT c[465] = 930
+EXACT c[466] = 932
+EXACT c[467] = 934
+EXACT c[468] = 936
+EXACT c[469] = 938
+EXACT c[470] = 940
+EXACT c[471] = 942
+EXACT c[472] = 944
+EXACT c[473] = 946
+EXACT c[474] = 948
+EXACT c[475] = 950
+EXACT c[476] = 952
+EXACT c[477] = 954
+EXACT c[478] = 956
+EXACT c[479] = 958
+EXACT c[480] = 960
+EXACT c[481] = 962
+EXACT c[482] = 964
+EXACT c[483] = 966
+EXACT c[484] = 968
+EXACT c[485] = 970
+EXACT c[486] = 972
+EXACT c[487] = 974
+EXACT c[488] = 976
+EXACT c[489] = 978
+EXACT c[490] = 980
+EXACT c[491] = 982
+EXACT c[492] = 984
+EXACT c[493] = 986
+EXACT c[494] = 988
+EXACT c[495] = 990
+EXACT c[496] = 992
+EXACT c[497] = 994
+EXACT c[498] = 996
+EXACT c[499] = 998
+EXACT c[500] = 1000
+EXACT c[501] = 1002
+EXACT c[502] = 1004
+EXACT c[503] = 1006
+EXACT c[504] = 1008
+EXACT c[505] = 1010
+EXACT c[506] = 1012
+EXACT c[507] = 1014
+EXACT c[508] = 1016
+EXACT c[509] = 1018
+EXACT c[510] = 1020
+EXACT c[511] = 1022
+EXACT c[512] = 1024
+EXACT c[513] = 1026
+EXACT c[514] = 1028
+EXACT c[515] = 1030
+EXACT c[516] = 1032
+EXACT c[517] = 1034
+EXACT c[518] = 1036
+EXACT c[519] = 1038
+EXACT c[520] = 1040
+EXACT c[521] = 1042
+EXACT c[522] = 1044
+EXACT c[523] = 1046
+EXACT c[524] = 1048
+EXACT c[525] = 1050
+EXACT c[526] = 1052
+EXACT c[527] = 1054
+EXACT c[528] = 1056
+EXACT c[529] = 1058
+EXACT c[530] = 1060
+EXACT c[531] = 1062
+EXACT c[532] = 1064
+EXACT c[533] = 1066
+EXACT c[534] = 1068
+EXACT c[535] = 1070
+EXACT c[536] = 1072
+EXACT c[537] = 1074
+EXACT c[538] = 1076
+EXACT c[539] = 1078
+EXACT c[540] = 1080
+EXACT c[541] = 1082
+EXACT c[542] = 1084
+EXACT c[543] = 1086
+EXACT c[544] = 1088
+EXACT c[545] = 1090
+EXACT c[546] = 1092
+EXACT c[547] = 1094
+EXACT c[548] = 1096
+EXACT c[549] = 1098
+EXACT c[550] = 1100
+EXACT c[551] = 1102
+EXACT c[552] = 1104
+EXACT c[553] = 1106
+EXACT c[554] = 1108
+EXACT c[555] = 1110
+EXACT c[556] = 1112
+EXACT c[557] = 1114
+EXACT c[558] = 1116
+EXACT c[559] = 1118
+EXACT c[560] = 1120
+EXACT c[561] = 1122
+EXACT c[562] = 1124
+EXACT c[563] = 1126
+EXACT c[564] = 1128
+EXACT c[565] = 1130
+EXACT c[566] = 1132
+EXACT c[567] = 1134
+EXACT c[568] = 1136
+EXACT c[569] = 1138
+EXACT c[570] = 1140
+EXACT c[571] = 1142
+EXACT c[572] = 1144
+EXACT c[573] = 1146
+EXACT c[574] = 1148
+EXACT c[575] = 1150
+EXACT c[576] = 1152
+EXACT c[577] = 1154
+EXACT c[578] = 1156
+EXACT c[579] = 1158
+EXACT c[580] = 1160
+EXACT c[581] = 1162
+EXACT c[582] = 1164
+EXACT c[583] = 1166
+EXACT c[584] = 1168
+EXACT c[585] = 1170
+EXACT c[586] = 1172
+EXACT c[587] = 1174
+EXACT c[588] = 1176
+EXACT c[589] = 1178
+EXACT c[590] = 1180
+EXACT c[591] = 1182
+EXACT c[592] = 1184
+EXACT c[593] = 1186
+EXACT c[594] = 1188
+EXACT c[595] = 1190
+EXACT c[596] = 1192
+EXACT c[597] = 1194
+EXACT c[598] = 1196
+EXACT c[599] = 1198
+EXACT c[600] = 1200
+EXACT c[601] = 1202
+EXACT c[602] = 1204
+EXACT c[603] = 1206
+EXACT c[604] = 1208
+EXACT c[605] = 1210
+EXACT c[606] = 1212
+EXACT c[607] = 1214
+EXACT c[608] = 1216
+EXACT c[609] = 1218
+EXACT c[610] = 1220
+EXACT c[611] = 1222
+EXACT c[612] = 1224
+EXACT c[613] = 1226
+EXACT c[614] = 1228
+EXACT c[615] = 1230
+EXACT c[616] = 1232
+EXACT c[617] = 1234
+EXACT c[618] = 1236
+EXACT c[619] = 1238
+EXACT c[620] = 1240
+EXACT c[621] = 1242
+EXACT c[622] = 1244
+EXACT c[623] = 1246
+EXACT c[624] = 1248
+EXACT c[625] = 1250
+EXACT c[626] = 1252
+EXACT c[627] = 1254
+EXACT c[628] = 1256
+EXACT c[629] = 1258
+EXACT c[630] = 1260
+EXACT c[631] = 1262
+EXACT c[632] = 1264
+EXACT c[633] = 1266
+EXACT c[634] = 1268
+EXACT c[635] = 1270
+EXACT c[636] = 1272
+EXACT c[637] = 1274
+EXACT c[638] = 1276
+EXACT c[639] = 1278
+EXACT c[640] = 1280
+EXACT c[641] = 1282
+EXACT c[642] = 1284
+EXACT c[643] = 1286
+EXACT c[644] = 1288
+EXACT c[645] = 1290
+EXACT c[646] = 1292
+EXACT c[647] = 1294
+EXACT c[648] = 1296
+EXACT c[649] = 1298
+EXACT c[650] = 1300
+EXACT c[651] = 1302
+EXACT c[652] = 1304
+EXACT c[653] = 1306
+EXACT c[654] = 1308
+EXACT c[655] = 1310
+EXACT c[656] = 1312
+EXACT c[657] = 1314
+EXACT c[658] = 1316
+EXACT c[659] = 1318
+EXACT c[660] = 1320
+EXACT c[661] = 1322
+EXACT c[662] = 1324
+EXACT c[663] = 1326
+EXACT c[664] = 1328
+EXACT c[665] = 1330
+EXACT c[666] = 1332
+EXACT c[667] = 1334
+EXACT c[668] = 1336
+EXACT c[669] = 1338
+EXACT c[670] = 1340
+EXACT c[671] = 1342
+EXACT c[672] = 1344
+EXACT c[673] = 1346
+EXACT c[674] = 1348
+EXACT c[675] = 1350
+EXACT c[676] = 1352
+EXACT c[677] = 1354
+EXACT c[678] = 1356
+EXACT c[679] = 1358
+EXACT c[680] = 1360
+EXACT c[681] = 1362
+EXACT c[682] = 1364
+EXACT c[683] = 1366
+EXACT c[684] = 1368
+EXACT c[685] = 1370
+EXACT c[686] = 1372
+EXACT c[687] = 1374
+EXACT c[688] = 1376
+EXACT c[689] = 1378
+EXACT c[690] = 1380
+EXACT c[691] = 1382
+EXACT c[692] = 1384
+EXACT c[693] = 1386
+EXACT c[694] = 1388
+EXACT c[695] = 1390
+EXACT c[696] = 1392
+EXACT c[697] = 1394
+EXACT c[698] = 1396
+EXACT c[699] = 1398
+EXACT c[700] = 1400
+EXACT c[701] = 1402
+EXACT c[702] = 1404
+EXACT c[703] = 1406
+EXACT c[704] = 1408
+EXACT c[705] = 1410
+EXACT c[706] = 1412
+EXACT c[707] = 1414
+EXACT c[708] = 1416
+EXACT c[709] = 1418
+EXACT c[710] = 1420
+EXACT c[711] = 1422
+EXACT c[712] = 1424
+EXACT c[713] = 1426
+EXACT c[714] = 1428
+EXACT c[715] = 1430
+EXACT c[716] = 1432
+EXACT c[717] = 1434
+EXACT c[718] = 1436
+EXACT c[719] = 1438
+EXACT c[720] = 1440
+EXACT c[721] = 1442
+EXACT c[722] = 1444
+EXACT c[723] = 1446
+EXACT c[724] = 1448
+EXACT c[725] = 1450
+EXACT c[726] = 1452
+EXACT c[727] = 1454
+EXACT c[728] = 1456
+EXACT c[729] = 1458
+EXACT c[730] = 1460
+EXACT c[731] = 1462
+EXACT c[732] = 1464
+EXACT c[733] = 1466
+EXACT c[734] = 1468
+EXACT c[735] = 1470
+EXACT c[736] = 1472
+EXACT c[737] = 1474
+EXACT c[738] = 1476
+EXACT c[739] = 1478
+EXACT c[740] = 1480
+EXACT c[741] = 1482
+EXACT c[742] = 1484
+EXACT c[743] = 1486
+EXACT c[744] = 1488
+EXACT c[745] = 1490
+EXACT c[746] = 1492
+EXACT c[747] = 1494
+EXACT c[748] = 1496
+EXACT c[749] = 1498
+EXACT c[750] = 1500
+EXACT c[751] = 1502
+EXACT c[752] = 1504
+EXACT c[753] = 1506
+EXACT c[754] = 1508
+EXACT c[755] = 1510
+EXACT c[756] = 1512
+EXACT c[757] = 1514
+EXACT c[758] = 1516
+EXACT c[759] = 1518
+EXACT c[760] = 1520
+EXACT c[761] = 1522
+EXACT c[762] = 1524
+EXACT c[763] = 1526
+EXACT c[764] = 1528
+EXACT c[765] = 1530
+EXACT c[766] = 1532
+EXACT c[767] = 1534
+EXACT c[768] = 1536
+EXACT c[769] = 1538
+EXACT c[770] = 1540
+EXACT c[771] = 1542
+EXACT c[772] = 1544
+EXACT c[773] = 1546
+EXACT c[774] = 1548
+EXACT c[775] = 1550
+EXACT c[776] = 1552
+EXACT c[777] = 1554
+EXACT c[778] = 1556
+EXACT c[779] = 1558
+EXACT c[780] = 1560
+EXACT c[781] = 1562
+EXACT c[782] = 1564
+EXACT c[783] = 1566
+EXACT c[784] = 1568
+EXACT c[785] = 1570
+EXACT c[786] = 1572
+EXACT c[787] = 1574
+EXACT c[788] = 1576
+EXACT c[789] = 1578
+EXACT c[790] = 1580
+EXACT c[791] = 1582
+EXACT c[792] = 1584
+EXACT c[793] = 1586
+EXACT c[794] = 1588
+EXACT c[795] = 1590
+EXACT c[796] = 1592
+EXACT c[797] = 1594
+EXACT c[798] = 1596
+EXACT c[799] = 1598
+EXACT c[800] = 1600
+EXACT c[801] = 1602
+EXACT c[802] = 1604
+EXACT c[803] = 1606
+EXACT c[804] = 1608
+EXACT c[805] = 1610
+EXACT c[806] = 1612
+EXACT c[807] = 1614
+EXACT c[808] = 1616
+EXACT c[809] = 1618
+EXACT c[810] = 1620
+EXACT c[811] = 1622
+EXACT c[812] = 1624
+EXACT c[813] = 1626
+EXACT c[814] = 1628
+EXACT c[815] = 1630
+EXACT c[816] = 1632
+EXACT c[817] = 1634
+EXACT c[818] = 1636
+EXACT c[819] = 1638
+EXACT c[820] = 1640
+EXACT c[821] = 1642
+EXACT c[822] = 1644
+EXACT c[823] = 1646
+EXACT c[824] = 1648
+EXACT c[825] = 1650
+EXACT c[826] = 1652
+EXACT c[827] = 1654
+EXACT c[828] = 1656
+EXACT c[829] = 1658
+EXACT c[830] = 1660
+EXACT c[831] = 1662
+EXACT c[832] = 1664
+EXACT c[833] = 1666
+EXACT c[834] = 1668
+EXACT c[835] = 1670
+EXACT c[836] = 1672
+EXACT c[837] = 1674
+EXACT c[838] = 1676
+EXACT c[839] = 1678
+EXACT c[840] = 1680
+EXACT c[841] = 1682
+EXACT c[842] = 1684
+EXACT c[843] = 1686
+EXACT c[844] = 1688
+EXACT c[845] = 1690
+EXACT c[846] = 1692
+EXACT c[847] = 1694
+EXACT c[848] = 1696
+EXACT c[849] = 1698
+EXACT c[850] = 1700
+EXACT c[851] = 1702
+EXACT c[852] = 1704
+EXACT c[853] = 1706
+EXACT c[854] = 1708
+EXACT c[855] = 1710
+EXACT c[856] = 1712
+EXACT c[857] = 1714
+EXACT c[858] = 1716
+EXACT c[859] = 1718
+EXACT c[860] = 1720
+EXACT c[861] = 1722
+EXACT c[862] = 1724
+EXACT c[863] = 1726
+EXACT c[864] = 1728
+EXACT c[865] = 1730
+EXACT c[866] = 1732
+EXACT c[867] = 1734
+EXACT c[868] = 1736
+EXACT c[869] = 1738
+EXACT c[870] = 1740
+EXACT c[871] = 1742
+EXACT c[872] = 1744
+EXACT c[873] = 1746
+EXACT c[874] = 1748
+EXACT c[875] = 1750
+EXACT c[876] = 1752
+EXACT c[877] = 1754
+EXACT c[878] = 1756
+EXACT c[879] = 1758
+EXACT c[880] = 1760
+EXACT c[881] = 1762
+EXACT c[882] = 1764
+EXACT c[883] = 1766
+EXACT c[884] = 1768
+EXACT c[885] = 1770
+EXACT c[886] = 1772
+EXACT c[887] = 1774
+EXACT c[888] = 1776
+EXACT c[889] = 1778
+EXACT c[890] = 1780
+EXACT c[891] = 1782
+EXACT c[892] = 1784
+EXACT c[893] = 1786
+EXACT c[894] = 1788
+EXACT c[895] = 1790
+EXACT c[896] = 1792
+EXACT c[897] = 1794
+EXACT c[898] = 1796
+EXACT c[899] = 1798
+EXACT c[900] = 1800
+EXACT c[901] = 1802
+EXACT c[902] = 1804
+EXACT c[903] = 1806
+EXACT c[904] = 1808
+EXACT c[905] = 1810
+EXACT c[906] = 1812
+EXACT c[907] = 1814
+EXACT c[908] = 1816
+EXACT c[909] = 1818
+EXACT c[910] = 1820
+EXACT c[911] = 1822
+EXACT c[912] = 1824
+EXACT c[913] = 1826
+EXACT c[914] = 1828
+EXACT c[915] = 1830
+EXACT c[916] = 1832
+EXACT c[917] = 1834
+EXACT c[918] = 1836
+EXACT c[919] = 1838
+EXACT c[920] = 1840
+EXACT c[921] = 1842
+EXACT c[922] = 1844
+EXACT c[923] = 1846
+EXACT c[924] = 1848
+EXACT c[925] = 1850
+EXACT c[926] = 1852
+EXACT c[927] = 1854
+EXACT c[928] = 1856
+EXACT c[929] = 1858
+EXACT c[930] = 1860
+EXACT c[931] = 1862
+EXACT c[932] = 1864
+EXACT c[933] = 1866
+EXACT c[934] = 1868
+EXACT c[935] = 1870
+EXACT c[936] = 1872
+EXACT c[937] = 1874
+EXACT c[938] = 1876
+EXACT c[939] = 1878
+EXACT c[940] = 1880
+EXACT c[941] = 1882
+EXACT c[942] = 1884
+EXACT c[943] = 1886
+EXACT c[944] = 1888
+EXACT c[945] = 1890
+EXACT c[946] = 1892
+EXACT c[947] = 1894
+EXACT c[948] = 1896
+EXACT c[949] = 1898
+EXACT c[950] = 1900
+EXACT c[951] = 1902
+EXACT c[952] = 1904
+EXACT c[953] = 1906
+EXACT c[954] = 1908
+EXACT c[955] = 1910
+EXACT c[956] = 1912
+EXACT c[957] = 1914
+EXACT c[958] = 1916
+EXACT c[959] = 1918
+EXACT c[960] = 1920
+EXACT c[961] = 1922
+EXACT c[962] = 1924
+EXACT c[963] = 1926
+EXACT c[964] = 1928
+EXACT c[965] = 1930
+EXACT c[966] = 1932
+EXACT c[967] = 1934
+EXACT c[968] = 1936
+EXACT c[969] = 1938
+EXACT c[970] = 1940
+EXACT c[971] = 1942
+EXACT c[972] = 1944
+EXACT c[973] = 1946
+EXACT c[974] = 1948
+EXACT c[975] = 1950
+EXACT c[976] = 1952
+EXACT c[977] = 1954
+EXACT c[978] = 1956
+EXACT c[979] = 1958
+EXACT c[980] = 1960
+EXACT c[981] = 1962
+EXACT c[982] = 1964
+EXACT c[983] = 1966
+EXACT c[984] = 1968
+EXACT c[985] = 1970
+EXACT c[986] = 1972
+EXACT c[987] = 1974
+EXACT c[988] = 1976
+EXACT c[989] = 1978
+EXACT c[990] = 1980
+EXACT c[991] = 1982
+EXACT c[992] = 1984
+EXACT c[993] = 1986
+EXACT c[994] = 1988
+EXACT c[995] = 1990
+EXACT c[996] = 1992
+EXACT c[997] = 1994
+EXACT c[998] = 1996
+EXACT c[999] = 1998
+EXACT c[1000] = 2000
+EXACT c[1001] = 2002
+EXACT c[1002] = 2004
+EXACT c[1003] = 2006
+EXACT c[1004] = 2008
+EXACT c[1005] = 2010
+EXACT c[1006] = 2012
+EXACT c[1007] = 2014
+EXACT c[1008] = 2016
+EXACT c[1009] = 2018
+EXACT c[1010] = 2020
+EXACT c[1011] = 2022
+EXACT c[1012] = 2024
+EXACT c[1013] = 2026
+EXACT c[1014] = 2028
+EXACT c[1015] = 2030
+EXACT c[1016] = 2032
+EXACT c[1017] = 2034
+EXACT c[1018] = 2036
+EXACT c[1019] = 2038
+EXACT c[1020] = 2040
+EXACT c[1021] = 2042
+EXACT c[1022] = 2044
+EXACT c[1023] = 2046
diff --git a/tests/kernels/misc/vector_argument.cl b/tests/kernels/misc/vector_argument.cl
new file mode 100644
index 0000000..9ad03ba
--- /dev/null
+++ b/tests/kernels/misc/vector_argument.cl
@@ -0,0 +1,4 @@
+kernel void vector_argument(int4 vector, global int4 *output)
+{
+ *output = vector + 42;
+}
diff --git a/tests/kernels/misc/vector_argument.ref b/tests/kernels/misc/vector_argument.ref
new file mode 100644
index 0000000..ea2d29f
--- /dev/null
+++ b/tests/kernels/misc/vector_argument.ref
@@ -0,0 +1,5 @@
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 49
+EXACT output[1] = 84
+EXACT output[2] = 42
+EXACT output[3] = 41
diff --git a/tests/kernels/misc/vector_argument.sim b/tests/kernels/misc/vector_argument.sim
new file mode 100644
index 0000000..d54a636
--- /dev/null
+++ b/tests/kernels/misc/vector_argument.sim
@@ -0,0 +1,9 @@
+vector_argument.cl
+vector_argument
+4 1 1
+1 1 1
+
+<size=16>
+7 42 0 -1
+
+<size=16 fill=0 dump>
diff --git a/tests/kernels/run_kernel_test.py b/tests/kernels/run_kernel_test.py
deleted file mode 100644
index 9387b9c..0000000
--- a/tests/kernels/run_kernel_test.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# run_kernel_test.py (Oclgrind)
-# Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
-# University of Bristol. All rights reserved.
-#
-# This program is provided under a three-clause BSD license. For full
-# license terms please see the LICENSE file distributed with this
-# source code.
-
-import os
-import re
-import subprocess
-import sys
-
-# Check arguments
-if len(sys.argv) != 3:
- print 'Usage: python run_kernel_test.py EXE SIMFILE'
- sys.exit(1)
-if not os.path.isfile(sys.argv[2]):
- print 'Test file not found'
- sys.exit(1)
-
-# Construct paths to test inputs/outputs
-test_exe = sys.argv[1]
-test_file = sys.argv[2]
-test_dir = os.path.dirname(os.path.realpath(test_file))
-test_file = os.path.basename(test_file)
-test_name = os.path.splitext(test_file)[0]
-test_out = test_name + '.out'
-test_ref = test_dir + os.path.sep + test_name + '.ref'
-current_dir = os.getcwd()
-
-if os.environ.get('AM_TESTS') == '1':
- # If running via automake, use build directory for output file
- test_out = 'tests' + os.path.sep + 'kernels' + os.path.sep + \
- test_dir.split(os.path.sep)[-1] + os.path.sep + test_out
-else:
- # Otherwise, use test directory for output file
- test_out = test_dir + os.path.sep + test_out
-
-# Run oclgrind-kernel
-out = open(test_out, 'w')
-os.chdir(test_dir)
-retval = subprocess.call([test_exe, '--data-races', test_file],
- stdout=out, stderr=out)
-out.close()
-if retval != 0:
- print 'oclgrind-kernel returned non-zero value (' + str(retval) + ')'
- sys.exit(retval)
-
-# Open output and reference files
-os.chdir(current_dir)
-out = open(test_out).read().splitlines()
-ref = open(test_ref).read().splitlines()
-
-# Scan through file to reach argument data
-oi = 0
-ri = 0
-try:
- while re.match('Argument \'.*\': [0-9]+ *bytes', out[oi]) == None:
- oi += 1
- while re.match('Argument \'.*\': [0-9]+ *bytes', ref[ri]) == None:
- ri += 1
-except:
- print 'Error searching for argument data'
- sys.exit(1)
-
-# Check that an error was produced iff an error was expected
-# An error occured if global memory dump isn't at start of file
-# TODO: Improve this so that more details about the error are checked
-should_error = ri > 1
-if should_error and oi < 2:
- print 'Error expected, but no error reported'
- sys.exit(1)
-if not should_error and oi > 1:
- print 'Error reported, but no error expected'
- sys.exit(1)
-
-# Check that the global memory dump matches the reference
-# TODO: 32-bit machines will fail this due to memory address comparisons
-match = 1
-while oi < len(out):
- if out[oi] != ref[ri]:
- print '[%d:%d] "%s" vs "%s"' % (oi, ri, out[oi], ref[ri])
- match = 0
- oi += 1
- ri += 1
-if not match:
- print
- print 'Output didn\'t match reference'
- sys.exit(1)
-
-# Test passed
-sys.exit(0)
diff --git a/tests/kernels/uninitialized/padded_nested_struct_memcpy.cl b/tests/kernels/uninitialized/padded_nested_struct_memcpy.cl
new file mode 100644
index 0000000..20656ea
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_nested_struct_memcpy.cl
@@ -0,0 +1,27 @@
+struct T
+{
+ char a;
+ int b;
+ char c;
+};
+
+struct S
+{
+ char a;
+ int b;
+ char c;
+ struct T d;
+};
+
+kernel void padded_nested_struct_memcpy(global struct S *output)
+{
+ struct S s;
+ s.a = 1;
+ s.b = 2;
+ s.c = 3;
+ s.d.a = 4;
+ s.d.b = 5;
+ s.d.c = 6;
+
+ *output = s;
+}
diff --git a/tests/kernels/uninitialized/padded_nested_struct_memcpy.ref b/tests/kernels/uninitialized/padded_nested_struct_memcpy.ref
new file mode 100644
index 0000000..a55c060
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_nested_struct_memcpy.ref
@@ -0,0 +1,25 @@
+EXACT Argument 'output': 24 bytes
+EXACT output[0] = 1
+MATCH output[1] =
+MATCH output[2] =
+MATCH output[3] =
+EXACT output[4] = 2
+EXACT output[5] = 0
+EXACT output[6] = 0
+EXACT output[7] = 0
+EXACT output[8] = 3
+MATCH output[9] =
+MATCH output[10] =
+MATCH output[11] =
+EXACT output[12] = 4
+MATCH output[13] =
+MATCH output[14] =
+MATCH output[15] =
+EXACT output[16] = 5
+EXACT output[17] = 0
+EXACT output[18] = 0
+EXACT output[19] = 0
+EXACT output[20] = 6
+MATCH output[21] =
+MATCH output[22] =
+MATCH output[23] =
diff --git a/tests/kernels/uninitialized/padded_nested_struct_memcpy.sim b/tests/kernels/uninitialized/padded_nested_struct_memcpy.sim
new file mode 100644
index 0000000..4c9d374
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_nested_struct_memcpy.sim
@@ -0,0 +1,6 @@
+padded_nested_struct_memcpy.cl
+padded_nested_struct_memcpy
+1 1 1
+1 1 1
+
+<size=24 char fill=0 dump>
diff --git a/tests/kernels/uninitialized/padded_struct_alloca_fp.cl b/tests/kernels/uninitialized/padded_struct_alloca_fp.cl
new file mode 100644
index 0000000..7f878a8
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_struct_alloca_fp.cl
@@ -0,0 +1,16 @@
+struct S
+{
+ char a;
+ int b;
+ char c;
+};
+
+kernel void padded_struct_alloca_fp(global struct S *output)
+{
+ struct S s;
+ s.a = 42;
+ s.b = -7;
+ s.c = 127;
+
+ *output = s;
+}
diff --git a/tests/kernels/uninitialized/padded_struct_alloca_fp.ref b/tests/kernels/uninitialized/padded_struct_alloca_fp.ref
new file mode 100644
index 0000000..609ed8f
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_struct_alloca_fp.ref
@@ -0,0 +1,4 @@
+EXACT Argument 'output': 12 bytes
+EXACT output[0] = 42
+EXACT output[1] = -7
+EXACT output[2] = 127
diff --git a/tests/kernels/uninitialized/padded_struct_alloca_fp.sim b/tests/kernels/uninitialized/padded_struct_alloca_fp.sim
new file mode 100644
index 0000000..74f1b0a
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_struct_alloca_fp.sim
@@ -0,0 +1,6 @@
+padded_struct_alloca_fp.cl
+padded_struct_alloca_fp
+1 1 1
+1 1 1
+
+<size=12 int fill=0 dump>
diff --git a/tests/kernels/uninitialized/padded_struct_memcpy_fp.cl b/tests/kernels/uninitialized/padded_struct_memcpy_fp.cl
new file mode 100644
index 0000000..f1a449b
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_struct_memcpy_fp.cl
@@ -0,0 +1,27 @@
+struct S
+{
+ char a;
+ int b;
+ char c;
+};
+
+kernel void padded_struct_memcpy_fp(local struct S *scratch,
+ global struct S *output)
+{
+ int lid = get_local_id(0);
+
+ struct S s;
+ s.a = 42;
+ s.b = -7;
+ s.c = 127;
+
+ if (lid == 0)
+ {
+ *scratch = s;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ if (lid == 1)
+ {
+ *output = *scratch;
+ }
+}
diff --git a/tests/kernels/uninitialized/padded_struct_memcpy_fp.ref b/tests/kernels/uninitialized/padded_struct_memcpy_fp.ref
new file mode 100644
index 0000000..609ed8f
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_struct_memcpy_fp.ref
@@ -0,0 +1,4 @@
+EXACT Argument 'output': 12 bytes
+EXACT output[0] = 42
+EXACT output[1] = -7
+EXACT output[2] = 127
diff --git a/tests/kernels/uninitialized/padded_struct_memcpy_fp.sim b/tests/kernels/uninitialized/padded_struct_memcpy_fp.sim
new file mode 100644
index 0000000..8ed2ae9
--- /dev/null
+++ b/tests/kernels/uninitialized/padded_struct_memcpy_fp.sim
@@ -0,0 +1,8 @@
+padded_struct_memcpy_fp.cl
+padded_struct_memcpy_fp
+2 1 1
+2 1 1
+
+<size=12 char>
+
+<size=12 int fill=0 dump>
diff --git a/tests/kernels/uninitialized/partially_uninitialized_fract.cl b/tests/kernels/uninitialized/partially_uninitialized_fract.cl
new file mode 100644
index 0000000..c277e01
--- /dev/null
+++ b/tests/kernels/uninitialized/partially_uninitialized_fract.cl
@@ -0,0 +1,6 @@
+__kernel void partially_uninitialized_fract(__global float4 *output)
+{
+ float4 f;
+ f.xzw = 4.2;
+ *(output + 1) = fract(f, output);
+}
diff --git a/tests/kernels/uninitialized/partially_uninitialized_fract.ref b/tests/kernels/uninitialized/partially_uninitialized_fract.ref
new file mode 100644
index 0000000..7a74bbb
--- /dev/null
+++ b/tests/kernels/uninitialized/partially_uninitialized_fract.ref
@@ -0,0 +1,12 @@
+ERROR Uninitialized value
+ERROR Uninitialized value
+
+EXACT Argument 'output': 32 bytes
+EXACT output[0] = 4
+MATCH output[1] =
+EXACT output[2] = 4
+EXACT output[3] = 4
+EXACT output[4] = 0.2
+MATCH output[5] =
+EXACT output[6] = 0.2
+EXACT output[7] = 0.2
diff --git a/tests/kernels/uninitialized/partially_uninitialized_fract.sim b/tests/kernels/uninitialized/partially_uninitialized_fract.sim
new file mode 100644
index 0000000..699fb0e
--- /dev/null
+++ b/tests/kernels/uninitialized/partially_uninitialized_fract.sim
@@ -0,0 +1,6 @@
+partially_uninitialized_fract.cl
+partially_uninitialized_fract
+1 1 1
+1 1 1
+
+<size=32 fill=0 dump>
diff --git a/tests/kernels/uninitialized/private_array_initializer_list.cl b/tests/kernels/uninitialized/private_array_initializer_list.cl
new file mode 100644
index 0000000..20bfdab
--- /dev/null
+++ b/tests/kernels/uninitialized/private_array_initializer_list.cl
@@ -0,0 +1,9 @@
+kernel void private_array_initializer_list(global float *output)
+{
+ float scratch[4] = {7.f, 42.f, -1.f, 0.f};
+
+ for (int i = 0; i < 4; i++)
+ {
+ output[i] = scratch[i];
+ }
+}
diff --git a/tests/kernels/uninitialized/private_array_initializer_list.ref b/tests/kernels/uninitialized/private_array_initializer_list.ref
new file mode 100644
index 0000000..7de7145
--- /dev/null
+++ b/tests/kernels/uninitialized/private_array_initializer_list.ref
@@ -0,0 +1,5 @@
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 7
+EXACT output[1] = 42
+EXACT output[2] = -1
+EXACT output[3] = 0
diff --git a/tests/kernels/uninitialized/private_array_initializer_list.sim b/tests/kernels/uninitialized/private_array_initializer_list.sim
new file mode 100644
index 0000000..b672712
--- /dev/null
+++ b/tests/kernels/uninitialized/private_array_initializer_list.sim
@@ -0,0 +1,6 @@
+private_array_initializer_list.cl
+private_array_initializer_list
+1 1 1
+1 1 1
+
+<size=16 fill=0 dump>
diff --git a/tests/kernels/uninitialized/uninitialized_address.cl b/tests/kernels/uninitialized/uninitialized_address.cl
new file mode 100644
index 0000000..5e39a8c
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_address.cl
@@ -0,0 +1,9 @@
+__kernel void uninitialized_address(__global ulong *output)
+{
+ int a[] = {1, 2, 3};
+ volatile int i, j;
+
+ a[i] = 4;
+
+ output[0] = a[j];
+}
diff --git a/tests/kernels/uninitialized/uninitialized_address.ref b/tests/kernels/uninitialized/uninitialized_address.ref
new file mode 100644
index 0000000..b9045ab
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_address.ref
@@ -0,0 +1,5 @@
+ERROR Uninitialized address
+ERROR Uninitialized address
+
+EXACT Argument 'output': 8 bytes
+MATCH output[0] =
diff --git a/tests/kernels/uninitialized/uninitialized_address.sim b/tests/kernels/uninitialized/uninitialized_address.sim
new file mode 100644
index 0000000..6fc429c
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_address.sim
@@ -0,0 +1,6 @@
+uninitialized_address.cl
+uninitialized_address
+1 1 1
+1 1 1
+
+<size=8 fill=0 dump>
diff --git a/tests/kernels/uninitialized/uninitialized_global_buffer.cl b/tests/kernels/uninitialized/uninitialized_global_buffer.cl
new file mode 100644
index 0000000..59038f4
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_global_buffer.cl
@@ -0,0 +1,5 @@
+kernel void uninitialized_global_buffer(global float *input,
+ global float *output)
+{
+ output[get_global_id(0)] = *input;
+}
diff --git a/tests/kernels/uninitialized/uninitialized_global_buffer.ref b/tests/kernels/uninitialized/uninitialized_global_buffer.ref
new file mode 100644
index 0000000..34caf30
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_global_buffer.ref
@@ -0,0 +1,4 @@
+ERROR Uninitialized value
+
+EXACT Argument 'output': 4 bytes
+EXACT output[0] = 0
diff --git a/tests/kernels/uninitialized/uninitialized_global_buffer.sim b/tests/kernels/uninitialized/uninitialized_global_buffer.sim
new file mode 100644
index 0000000..184ea3d
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_global_buffer.sim
@@ -0,0 +1,8 @@
+uninitialized_global_buffer.cl
+uninitialized_global_buffer
+1 1 1
+1 1 1
+
+<size=4 noinit>
+
+<size=4 fill=0 dump>
diff --git a/tests/kernels/uninitialized/uninitialized_local_array.cl b/tests/kernels/uninitialized/uninitialized_local_array.cl
new file mode 100644
index 0000000..0c95007
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_array.cl
@@ -0,0 +1,11 @@
+kernel void uninitialized_local_array(global float *output)
+{
+ local float scratch[16];
+
+ int i = get_local_id(0);
+ if (i != get_local_size(0)/2)
+ {
+ scratch[i] = i;
+ }
+ output[i] = scratch[i];
+}
diff --git a/tests/kernels/uninitialized/uninitialized_local_array.ref b/tests/kernels/uninitialized/uninitialized_local_array.ref
new file mode 100644
index 0000000..85ba40e
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_array.ref
@@ -0,0 +1,19 @@
+ERROR Uninitialized value
+
+EXACT Argument 'output': 64 bytes
+EXACT output[0] = 0
+EXACT output[1] = 1
+EXACT output[2] = 2
+EXACT output[3] = 3
+EXACT output[4] = 4
+EXACT output[5] = 5
+EXACT output[6] = 6
+EXACT output[7] = 7
+EXACT output[8] = 0
+EXACT output[9] = 9
+EXACT output[10] = 10
+EXACT output[11] = 11
+EXACT output[12] = 12
+EXACT output[13] = 13
+EXACT output[14] = 14
+EXACT output[15] = 15
diff --git a/tests/kernels/uninitialized/uninitialized_local_array.sim b/tests/kernels/uninitialized/uninitialized_local_array.sim
new file mode 100644
index 0000000..3dc20c5
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_array.sim
@@ -0,0 +1,6 @@
+uninitialized_local_array.cl
+uninitialized_local_array
+16 1 1
+16 1 1
+
+<size=64 fill=0 dump>
diff --git a/tests/kernels/uninitialized/uninitialized_local_ptr.cl b/tests/kernels/uninitialized/uninitialized_local_ptr.cl
new file mode 100644
index 0000000..76631b8
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_ptr.cl
@@ -0,0 +1,9 @@
+kernel void uninitialized_local_ptr(local float *scratch, global float *output)
+{
+ int i = get_local_id(0);
+ if (i != get_local_size(0)/2)
+ {
+ scratch[i] = i;
+ }
+ output[i] = scratch[i];
+}
diff --git a/tests/kernels/uninitialized/uninitialized_local_ptr.ref b/tests/kernels/uninitialized/uninitialized_local_ptr.ref
new file mode 100644
index 0000000..85ba40e
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_ptr.ref
@@ -0,0 +1,19 @@
+ERROR Uninitialized value
+
+EXACT Argument 'output': 64 bytes
+EXACT output[0] = 0
+EXACT output[1] = 1
+EXACT output[2] = 2
+EXACT output[3] = 3
+EXACT output[4] = 4
+EXACT output[5] = 5
+EXACT output[6] = 6
+EXACT output[7] = 7
+EXACT output[8] = 0
+EXACT output[9] = 9
+EXACT output[10] = 10
+EXACT output[11] = 11
+EXACT output[12] = 12
+EXACT output[13] = 13
+EXACT output[14] = 14
+EXACT output[15] = 15
diff --git a/tests/kernels/uninitialized/uninitialized_local_ptr.sim b/tests/kernels/uninitialized/uninitialized_local_ptr.sim
new file mode 100644
index 0000000..47c711b
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_ptr.sim
@@ -0,0 +1,8 @@
+uninitialized_local_ptr.cl
+uninitialized_local_ptr
+16 1 1
+16 1 1
+
+<size=64>
+
+<size=64 fill=0 dump>
diff --git a/tests/kernels/uninitialized/uninitialized_local_variable.cl b/tests/kernels/uninitialized/uninitialized_local_variable.cl
new file mode 100644
index 0000000..1cf8685
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_variable.cl
@@ -0,0 +1,7 @@
+kernel void uninitialized_local_variable(global int *output)
+{
+ local int x;
+ if (*output > 0)
+ x = *output;
+ *output = x;
+}
diff --git a/tests/kernels/uninitialized/uninitialized_local_variable.ref b/tests/kernels/uninitialized/uninitialized_local_variable.ref
new file mode 100644
index 0000000..34caf30
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_variable.ref
@@ -0,0 +1,4 @@
+ERROR Uninitialized value
+
+EXACT Argument 'output': 4 bytes
+EXACT output[0] = 0
diff --git a/tests/kernels/uninitialized/uninitialized_local_variable.sim b/tests/kernels/uninitialized/uninitialized_local_variable.sim
new file mode 100644
index 0000000..df2838e
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_local_variable.sim
@@ -0,0 +1,6 @@
+uninitialized_local_variable.cl
+uninitialized_local_variable
+1 1 1
+1 1 1
+
+<size=4 dump fill=0>
diff --git a/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.cl b/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.cl
new file mode 100644
index 0000000..fbc888e
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.cl
@@ -0,0 +1,12 @@
+struct __attribute__ ((packed)) S
+{
+ char a;
+ int b __attribute__ ((packed));
+ char c;
+};
+
+kernel void uninitialized_packed_struct_memcpy(local int *scratch, global struct S *output)
+{
+ struct S s = {1, *scratch, 2};
+ *output = s;
+}
diff --git a/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.ref b/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.ref
new file mode 100644
index 0000000..a0d374f
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.ref
@@ -0,0 +1,9 @@
+ERROR Uninitialized value
+
+EXACT Argument 'output': 6 bytes
+EXACT output[0] = 1
+MATCH output[1] =
+MATCH output[2] =
+MATCH output[3] =
+MATCH output[4] =
+EXACT output[5] = 2
diff --git a/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.sim b/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.sim
new file mode 100644
index 0000000..236ae5b
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_packed_struct_memcpy.sim
@@ -0,0 +1,8 @@
+uninitialized_packed_struct_memcpy.cl
+uninitialized_packed_struct_memcpy
+1 1 1
+1 1 1
+
+<size=8>
+
+<size=6 char fill=0 dump>
diff --git a/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.cl b/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.cl
new file mode 100644
index 0000000..4558ff6
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.cl
@@ -0,0 +1,20 @@
+struct T
+{
+ char a;
+ int b;
+ char c;
+};
+
+struct S
+{
+ char a;
+ int b;
+ char c;
+ struct T d;
+};
+
+kernel void uninitialized_padded_nested_struct_memcpy(local int *scratch, global struct S *output)
+{
+ struct S s = {1, 2, 3, {4, *scratch, 5}};
+ *output = s;
+}
diff --git a/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.ref b/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.ref
new file mode 100644
index 0000000..ad64661
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.ref
@@ -0,0 +1,27 @@
+ERROR Uninitialized value
+
+EXACT Argument 'output': 24 bytes
+EXACT output[0] = 1
+MATCH output[1] =
+MATCH output[2] =
+MATCH output[3] =
+EXACT output[4] = 2
+EXACT output[5] = 0
+EXACT output[6] = 0
+EXACT output[7] = 0
+EXACT output[8] = 3
+MATCH output[9] =
+MATCH output[10] =
+MATCH output[11] =
+EXACT output[12] = 4
+MATCH output[13] =
+MATCH output[14] =
+MATCH output[15] =
+MATCH output[16] =
+MATCH output[17] =
+MATCH output[18] =
+MATCH output[19] =
+EXACT output[20] = 5
+MATCH output[21] =
+MATCH output[22] =
+MATCH output[23] =
diff --git a/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.sim b/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.sim
new file mode 100644
index 0000000..fde5c05
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_padded_nested_struct_memcpy.sim
@@ -0,0 +1,8 @@
+uninitialized_padded_nested_struct_memcpy.cl
+uninitialized_padded_nested_struct_memcpy
+1 1 1
+1 1 1
+
+<size=8>
+
+<size=24 char fill=0 dump>
diff --git a/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.cl b/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.cl
new file mode 100644
index 0000000..3b96f3a
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.cl
@@ -0,0 +1,12 @@
+struct S
+{
+ char a;
+ int b;
+ char c;
+};
+
+kernel void uninitialized_padded_struct_memcpy(local int *scratch, global struct S *output)
+{
+ struct S s = {1, *scratch, 2};
+ *output = s;
+}
diff --git a/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.ref b/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.ref
new file mode 100644
index 0000000..3fffd1d
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.ref
@@ -0,0 +1,6 @@
+ERROR Uninitialized value
+
+EXACT Argument 'output': 12 bytes
+EXACT output[0] = 1
+MATCH output[1] =
+EXACT output[2] = 2
diff --git a/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.sim b/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.sim
new file mode 100644
index 0000000..cdf5827
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_padded_struct_memcpy.sim
@@ -0,0 +1,8 @@
+uninitialized_padded_struct_memcpy.cl
+uninitialized_padded_struct_memcpy
+1 1 1
+1 1 1
+
+<size=8>
+
+<size=12 int fill=0 dump>
diff --git a/tests/kernels/uninitialized/uninitialized_private_array.cl b/tests/kernels/uninitialized/uninitialized_private_array.cl
new file mode 100644
index 0000000..2f46248
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_private_array.cl
@@ -0,0 +1,16 @@
+kernel void uninitialized_private_array(global uint *indices,
+ global float *input,
+ global float *output)
+{
+ float scratch[4];
+
+ for (int i = 0; i < 4; i++)
+ {
+ scratch[indices[i]] = i;
+ }
+
+ for (int i = 0; i < 4; i++)
+ {
+ output[i] = scratch[i];
+ }
+}
diff --git a/tests/kernels/uninitialized/uninitialized_private_array.ref b/tests/kernels/uninitialized/uninitialized_private_array.ref
new file mode 100644
index 0000000..21fef4f
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_private_array.ref
@@ -0,0 +1,7 @@
+ERROR Uninitialized value
+
+EXACT Argument 'output': 16 bytes
+EXACT output[0] = 0
+EXACT output[1] = 2
+EXACT output[2] = 0
+EXACT output[3] = 3
diff --git a/tests/kernels/uninitialized/uninitialized_private_array.sim b/tests/kernels/uninitialized/uninitialized_private_array.sim
new file mode 100644
index 0000000..c03f63c
--- /dev/null
+++ b/tests/kernels/uninitialized/uninitialized_private_array.sim
@@ -0,0 +1,14 @@
+uninitialized_private_array.cl
+uninitialized_private_array
+1 1 1
+1 1 1
+
+<size=16>
+0
+1
+1
+3
+
+<size=16 range=1:1:4>
+
+<size=16 fill=0 dump>
diff --git a/tests/kernels/wait_event/wait_event_chained.ref b/tests/kernels/wait_event/wait_event_chained.ref
index cf0b04f..690f35e 100644
--- a/tests/kernels/wait_event/wait_event_chained.ref
+++ b/tests/kernels/wait_event/wait_event_chained.ref
@@ -1,7 +1,5 @@
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
-
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/wait_event/wait_event_divergent.cl b/tests/kernels/wait_event/wait_event_divergent.cl
index d88f3f3..6d56d10 100644
--- a/tests/kernels/wait_event/wait_event_divergent.cl
+++ b/tests/kernels/wait_event/wait_event_divergent.cl
@@ -1,6 +1,9 @@
kernel void wait_event_divergent(global int *data, local int *scratch)
{
int i = get_local_id(0);
+ scratch[i] = 0;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
event_t events[2];
events[0] = async_work_group_copy(scratch, data, 1, 0);
events[1] = async_work_group_copy(scratch+1, data+1, 1, 0);
diff --git a/tests/kernels/wait_event/wait_event_divergent.ref b/tests/kernels/wait_event/wait_event_divergent.ref
index 56f64ac..1f7cc25 100644
--- a/tests/kernels/wait_event/wait_event_divergent.ref
+++ b/tests/kernels/wait_event/wait_event_divergent.ref
@@ -1,6 +1,6 @@
-ERROR EXPECTED
-
-Argument 'data': 8 bytes
- data[0] = 0
- data[1] = 0
+ERROR Work-group divergence detected (barrier)
+ERROR Work-item finished without waiting for events
+EXACT Argument 'data': 8 bytes
+EXACT data[0] = 0
+EXACT data[1] = 0
diff --git a/tests/kernels/wait_event/wait_event_duplicates.ref b/tests/kernels/wait_event/wait_event_duplicates.ref
index cf0b04f..690f35e 100644
--- a/tests/kernels/wait_event/wait_event_duplicates.ref
+++ b/tests/kernels/wait_event/wait_event_duplicates.ref
@@ -1,7 +1,5 @@
-
-Argument 'data': 16 bytes
- data[0] = 3
- data[1] = 2
- data[2] = 1
- data[3] = 0
-
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 3
+EXACT data[1] = 2
+EXACT data[2] = 1
+EXACT data[3] = 0
diff --git a/tests/kernels/wait_event/wait_event_invalid.ref b/tests/kernels/wait_event/wait_event_invalid.ref
index 4da13c4..1c2467d 100644
--- a/tests/kernels/wait_event/wait_event_invalid.ref
+++ b/tests/kernels/wait_event/wait_event_invalid.ref
@@ -1,8 +1,7 @@
-ERROR EXPECTED
-
-Argument 'data': 16 bytes
- data[0] = 0
- data[1] = 1
- data[2] = 2
- data[3] = 3
+ERROR Invalid wait event
+EXACT Argument 'data': 16 bytes
+EXACT data[0] = 0
+EXACT data[1] = 1
+EXACT data[2] = 2
+EXACT data[3] = 3
diff --git a/tests/run_test.py b/tests/run_test.py
new file mode 100644
index 0000000..330ca7e
--- /dev/null
+++ b/tests/run_test.py
@@ -0,0 +1,145 @@
+# run_test.py (Oclgrind)
+# Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
+# University of Bristol. All rights reserved.
+#
+# This program is provided under a three-clause BSD license. For full
+# license terms please see the LICENSE file distributed with this
+# source code.
+
+import errno
+import os
+import re
+import subprocess
+import sys
+
+# Check arguments
+if len(sys.argv) != 3:
+ print('Usage: python run_test.py OCLGRIND-KERNEL TEST_EXE|TEST.sim')
+ sys.exit(1)
+if not os.path.isfile(sys.argv[2]):
+ print('Test file not found')
+ sys.exit(1)
+
+# Construct paths to test inputs/outputs
+oclgrind_kernel = sys.argv[1]
+test_full_path = sys.argv[2]
+test_dir = os.path.dirname(os.path.realpath(test_full_path))
+test_file = os.path.basename(test_full_path)
+test_name = os.path.splitext(test_file)[0]
+current_dir = os.getcwd()
+
+if test_file.endswith('.sim'):
+ test_ref = test_full_path[:-4] + '.ref'
+else:
+ if test_full_path[0] == '/':
+ rel_path = test_full_path[test_full_path.find('/tests/') + 7:]
+ else:
+ rel_path = test_full_path
+
+ test_ref = os.path.dirname(os.path.abspath(__file__)) + os.path.sep \
+ + rel_path + '.ref'
+
+# Enable race detection and uninitialized memory plugins
+os.environ["OCLGRIND_CHECK_API"] = "1"
+os.environ["OCLGRIND_DATA_RACES"] = "1"
+os.environ["OCLGRIND_UNINITIALIZED"] = "1"
+
+def fail(ret=1):
+ print('FAILED')
+ sys.exit(ret)
+
+def run(output_suffix):
+
+ # Get filename for test output
+ if test_file.endswith('.sim'):
+ test_out = test_dir.split(os.path.sep)[-1] + os.path.sep + \
+ test_name + output_suffix + '.out'
+ else:
+ test_out = test_dir + os.path.sep + \
+ test_name + output_suffix + '.out'
+
+
+ output_dir = os.path.dirname(test_out)
+ try:
+ os.makedirs(output_dir)
+ except OSError as exc:
+ if exc.errno == errno.EEXIST and os.path.isdir(output_dir):
+ pass
+ else:
+ raise
+
+ out = open(test_out, 'w')
+
+ # Run test
+ if test_file.endswith('.sim'):
+ os.chdir(test_dir)
+ retval = subprocess.call([oclgrind_kernel, test_file],
+ stdout=out, stderr=out)
+ os.chdir(current_dir)
+ else:
+ retval = subprocess.call([test_full_path], stdout=out, stderr=out)
+
+ out.close()
+ if retval != 0:
+ print('Test returned non-zero value (' + str(retval) + ')')
+ fail(retval)
+
+ # Compare output to reference file (if provided)
+ if os.path.isfile(test_ref):
+
+ # Open output and reference files
+ out = open(test_out).read().splitlines()
+ ref = open(test_ref).read().splitlines()
+
+ # Check output matches references
+ oi = 0
+ for line in ref:
+ if len(line) == 0:
+ continue
+
+ type = line.split()[0]
+ text = line[6:]
+
+ # Find next non-blank line in output file
+ while not len(out[oi]):
+ oi += 1
+
+ if type == 'ERROR':
+ # Check first line of error contains reference message
+ if not text in out[oi]:
+ print('Expected ' + line)
+ print('Found "' + out[oi] + '"')
+ fail()
+ # Skip remaining lines of error
+ while oi < len(out) and len(out[oi]):
+ oi += 1
+ elif type == 'EXACT':
+ # Check line of output matches reference exactly
+ if not text == out[oi]:
+ print('Expected ' + line)
+ print('Found "' + out[oi] + '"')
+ fail()
+ oi += 1
+ elif type == 'MATCH':
+ # Check line of output contains reference text
+ if not text in out[oi]:
+ print('Expected ' + line)
+ print('Found "' + out[oi] + '"')
+ fail()
+ oi += 1
+ else:
+ print('Invalid match type in reference file')
+ fail()
+
+print('Running test with optimisations')
+run('')
+print('PASSED')
+
+print('')
+print('Running test without optimisations')
+os.environ["OCLGRIND_BUILD_OPTIONS"] = "-cl-opt-disable"
+run('_noopt')
+print('PASSED')
+
+# Test passed
+sys.exit(0)
diff --git a/tests/apps/CMakeLists.txt b/tests/runtime/CMakeLists.txt
similarity index 57%
copy from tests/apps/CMakeLists.txt
copy to tests/runtime/CMakeLists.txt
index 0dff241..c700f96 100644
--- a/tests/apps/CMakeLists.txt
+++ b/tests/runtime/CMakeLists.txt
@@ -1,33 +1,40 @@
# CMakeLists.txt (Oclgrind)
-# Copyright (c) 2013-2015, James Price and Simon McIntosh-Smith,
+# Copyright (c) 2013-2016, James Price and Simon McIntosh-Smith,
# University of Bristol. All rights reserved.
#
# This program is provided under a three-clause BSD license. For full
# license terms please see the LICENSE file distributed with this
# source code.
-# Add app tests
+set(COMMON_SOURCES ../common/common.c ../common/common.h)
+include_directories(../common)
+
+# Add runtime tests
foreach(test
- vecadd)
+ map_buffer)
- add_executable(${test} ${test}/${test}.c)
+ add_executable(${test} ${test}.c ${COMMON_SOURCES})
target_link_libraries(${test} oclgrind-rt)
# Generate test binaries in same dir as Oclgrind libraries on Windows
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
- add_test(app_${test} "${CMAKE_BINARY_DIR}/${test}")
set_target_properties(${test} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
else()
- add_test(app_${test} "${test}/${test}")
- set_target_properties(${test} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${test}")
set_target_properties(${test} PROPERTIES LINKER_LANGUAGE CXX)
endif()
- set_tests_properties(app_${test} PROPERTIES DEPENDS ${test})
+ add_test(
+ NAME rt_${test}
+ COMMAND
+ ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tests/run_test.py
+ $<TARGET_FILE:oclgrind-kernel>
+ $<TARGET_FILE:${test}>)
+
+ set_tests_properties(rt_${test} PROPERTIES DEPENDS ${test})
# Set PCH directory
- set_tests_properties(app_${test} PROPERTIES
+ set_tests_properties(rt_${test} PROPERTIES
ENVIRONMENT "OCLGRIND_PCH_DIR=${CMAKE_BINARY_DIR}/include/oclgrind")
endforeach(${test})
diff --git a/tests/runtime/map_buffer.c b/tests/runtime/map_buffer.c
new file mode 100644
index 0000000..059b261
--- /dev/null
+++ b/tests/runtime/map_buffer.c
@@ -0,0 +1,327 @@
+#include "common.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define TOL 1e-8
+#define MAX_ERRORS 8
+
+const char *KERNEL_SOURCE =
+"kernel void vecadd(global float *a, \n"
+" global float *b, \n"
+" global float *c) \n"
+"{ \n"
+" int i = get_global_id(0); \n"
+" c[i] = a[i] + b[i]; \n"
+"} \n"
+;
+
+unsigned checkResults(size_t N, float *a, float *b, float *results);
+
+// Run everything as normal
+unsigned run1(Context cl, cl_kernel kernel,
+ cl_mem d_a, cl_mem d_b, cl_mem d_c, size_t N)
+{
+ cl_int err;
+ float *h_a, *h_b, *h_c;
+ size_t dataSize = N*sizeof(cl_float);
+
+ // Initialise data
+ srand(0);
+ h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_a buffer");
+ h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_b buffer");
+ h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize,
+ 0, NULL, NULL, &err);
+ checkError(err, "mapping d_c buffer");
+ for (unsigned i = 0; i < N; i++)
+ {
+ h_a[i] = rand()/(float)RAND_MAX;
+ h_b[i] = rand()/(float)RAND_MAX;
+ h_c[i] = 0;
+ }
+
+ err = clEnqueueUnmapMemObject(cl.queue, d_a, h_a, 0, NULL, NULL);
+ checkError(err, "unmapping d_a");
+ err = clEnqueueUnmapMemObject(cl.queue, d_b, h_b, 0, NULL, NULL);
+ checkError(err, "unmapping d_b");
+ err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL);
+ checkError(err, "unmapping d_c");
+
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
+ err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
+ err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
+ checkError(err, "setting kernel args");
+
+ err = clEnqueueNDRangeKernel(cl.queue, kernel,
+ 1, NULL, &N, NULL, 0, NULL, NULL);
+ checkError(err, "enqueuing kernel");
+
+ h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize,
+ 0, NULL, NULL, &err);
+ checkError(err, "mapping d_c buffer");
+
+ err = clFinish(cl.queue);
+ checkError(err, "running kernel");
+
+ unsigned errors = checkResults(N, h_a, h_b, h_c);
+
+ err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL);
+ checkError(err, "unmapping d_c");
+
+ return errors;
+}
+
+// Don't unmap input buffers before running kernel
+// Should result in "Invalid read from buffer mapped for writing" error
+unsigned run2(Context cl, cl_kernel kernel,
+ cl_mem d_a, cl_mem d_b, cl_mem d_c, size_t N)
+{
+ cl_int err;
+ float *h_a, *h_b, *h_c;
+ size_t dataSize = N*sizeof(cl_float);
+
+ // Initialise data
+ srand(0);
+ h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_a buffer");
+ h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_b buffer");
+ h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize,
+ 0, NULL, NULL, &err);
+ checkError(err, "mapping d_c buffer");
+ for (unsigned i = 0; i < N; i++)
+ {
+ h_a[i] = rand()/(float)RAND_MAX;
+ h_b[i] = rand()/(float)RAND_MAX;
+ h_c[i] = 0;
+ }
+
+ err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL);
+ checkError(err, "unmapping d_c");
+
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
+ err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
+ err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
+ checkError(err, "setting kernel args");
+
+ err = clEnqueueNDRangeKernel(cl.queue, kernel,
+ 1, NULL, &N, NULL, 0, NULL, NULL);
+ checkError(err, "enqueuing kernel");
+
+ h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize,
+ 0, NULL, NULL, &err);
+ checkError(err, "mapping d_c buffer");
+
+ err = clFinish(cl.queue);
+ checkError(err, "running kernel");
+
+ unsigned errors = checkResults(N, h_a, h_b, h_c);
+
+ err = clEnqueueUnmapMemObject(cl.queue, d_a, h_a, 0, NULL, NULL);
+ checkError(err, "unmapping d_a");
+ err = clEnqueueUnmapMemObject(cl.queue, d_b, h_b, 0, NULL, NULL);
+ checkError(err, "unmapping d_b");
+ err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL);
+ checkError(err, "unmapping d_c");
+
+ return errors;
+}
+
+// Don't unmap output buffer before running kernel
+// Should result in "Invalid write to mapped buffer" error
+unsigned run3(Context cl, cl_kernel kernel,
+ cl_mem d_a, cl_mem d_b, cl_mem d_c, size_t N)
+{
+ cl_int err;
+ float *h_a, *h_b, *h_c;
+ size_t dataSize = N*sizeof(cl_float);
+
+ // Initialise data
+ srand(0);
+ h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_a buffer");
+ h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_b buffer");
+ h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize,
+ 0, NULL, NULL, &err);
+ checkError(err, "mapping d_c buffer");
+ for (unsigned i = 0; i < N; i++)
+ {
+ h_a[i] = rand()/(float)RAND_MAX;
+ h_b[i] = rand()/(float)RAND_MAX;
+ h_c[i] = 0;
+ }
+
+ err = clEnqueueUnmapMemObject(cl.queue, d_a, h_a, 0, NULL, NULL);
+ checkError(err, "unmapping d_a");
+ err = clEnqueueUnmapMemObject(cl.queue, d_b, h_b, 0, NULL, NULL);
+ checkError(err, "unmapping d_b");
+
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
+ err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
+ err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
+ checkError(err, "setting kernel args");
+
+ err = clEnqueueNDRangeKernel(cl.queue, kernel,
+ 1, NULL, &N, NULL, 0, NULL, NULL);
+ checkError(err, "enqueuing kernel");
+
+ err = clFinish(cl.queue);
+ checkError(err, "running kernel");
+
+ unsigned errors = checkResults(N, h_a, h_b, h_c);
+
+ err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL);
+ checkError(err, "unmapping d_c");
+
+ return errors;
+}
+
+// Re-map input buffers for reading
+// Should not result in any error
+unsigned run4(Context cl, cl_kernel kernel,
+ cl_mem d_a, cl_mem d_b, cl_mem d_c, size_t N)
+{
+ cl_int err;
+ float *h_a, *h_b, *h_c;
+ size_t dataSize = N*sizeof(cl_float);
+
+ // Initialise data
+ srand(0);
+ h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_a buffer");
+ h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE,
+ CL_MAP_WRITE_INVALIDATE_REGION,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_b buffer");
+ h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize,
+ 0, NULL, NULL, &err);
+ checkError(err, "mapping d_c buffer");
+ for (unsigned i = 0; i < N; i++)
+ {
+ h_a[i] = rand()/(float)RAND_MAX;
+ h_b[i] = rand()/(float)RAND_MAX;
+ h_c[i] = 0;
+ }
+
+ h_a = clEnqueueMapBuffer(cl.queue, d_a, CL_TRUE,
+ CL_MAP_READ,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_a buffer");
+ h_b = clEnqueueMapBuffer(cl.queue, d_b, CL_TRUE,
+ CL_MAP_READ,
+ 0, dataSize, 0, NULL, NULL, &err);
+ checkError(err, "mapping d_b buffer");
+
+ err = clEnqueueUnmapMemObject(cl.queue, d_a, h_a, 0, NULL, NULL);
+ checkError(err, "unmapping d_a");
+ err = clEnqueueUnmapMemObject(cl.queue, d_b, h_b, 0, NULL, NULL);
+ checkError(err, "unmapping d_b");
+ err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL);
+ checkError(err, "unmapping d_c");
+
+ err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
+ err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
+ err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
+ checkError(err, "setting kernel args");
+
+ err = clEnqueueNDRangeKernel(cl.queue, kernel,
+ 1, NULL, &N, NULL, 0, NULL, NULL);
+ checkError(err, "enqueuing kernel");
+
+ h_c = clEnqueueMapBuffer(cl.queue, d_c, CL_FALSE, CL_MAP_READ, 0, dataSize,
+ 0, NULL, NULL, &err);
+ checkError(err, "mapping d_c buffer");
+
+ err = clFinish(cl.queue);
+ checkError(err, "running kernel");
+
+ unsigned errors = checkResults(N, h_a, h_b, h_c);
+
+ err = clEnqueueUnmapMemObject(cl.queue, d_c, h_c, 0, NULL, NULL);
+ checkError(err, "unmapping d_c");
+
+ return errors;
+}
+
+int main(int argc, char *argv[])
+{
+ cl_int err;
+ cl_kernel kernel;
+ cl_mem d_a, d_b, d_c;
+
+ size_t N = 1;
+ if (argc > 1)
+ {
+ N = atoi(argv[1]);
+ }
+
+ Context cl = createContext(KERNEL_SOURCE);
+
+ kernel = clCreateKernel(cl.program, "vecadd", &err);
+ checkError(err, "creating kernel");
+
+ size_t dataSize = N*sizeof(cl_float);
+
+ d_a = clCreateBuffer(cl.context, CL_MEM_READ_ONLY, dataSize, NULL, &err);
+ checkError(err, "creating d_a buffer");
+ d_b = clCreateBuffer(cl.context, CL_MEM_READ_ONLY, dataSize, NULL, &err);
+ checkError(err, "creating d_b buffer");
+ d_c = clCreateBuffer(cl.context, CL_MEM_WRITE_ONLY, dataSize, NULL, &err);
+ checkError(err, "creating d_c buffer");
+
+ unsigned errors = 0;
+
+ errors += run1(cl, kernel, d_a, d_b, d_c, N);
+ errors += run2(cl, kernel, d_a, d_b, d_c, N);
+ errors += run3(cl, kernel, d_a, d_b, d_c, N);
+ errors += run4(cl, kernel, d_a, d_b, d_c, N);
+
+ clReleaseMemObject(d_a);
+ clReleaseMemObject(d_b);
+ clReleaseMemObject(d_c);
+ clReleaseKernel(kernel);
+ releaseContext(cl);
+
+ return (errors != 0);
+}
+
+unsigned checkResults(size_t N, float *a, float *b, float *results)
+{
+ // Check results
+ unsigned errors = 0;
+ for (unsigned i = 0; i < N; i++)
+ {
+ float ref = a[i] + b[i];
+ if (fabs(ref - results[i]) > TOL)
+ {
+ if (errors < MAX_ERRORS)
+ {
+ fprintf(stderr, "%4d: %.4f != %.4f\n", i, results[i], ref);
+ }
+ errors++;
+ }
+ }
+ if (errors)
+ printf("%d errors detected\n", errors);
+
+ return errors;
+}
diff --git a/tests/runtime/map_buffer.ref b/tests/runtime/map_buffer.ref
new file mode 100644
index 0000000..a64878d
--- /dev/null
+++ b/tests/runtime/map_buffer.ref
@@ -0,0 +1,4 @@
+ERROR Invalid read from buffer mapped for writing
+ERROR Invalid read from buffer mapped for writing
+
+ERROR Invalid write to mapped buffer
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/oclgrind.git
More information about the Pkg-opencl-commits
mailing list