[clblas] 32/67: Integrating new travis and appveyor build yaml scripts
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Oct 27 08:02:12 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clblas.
commit 0a08f16c9f223171669e4a6918135980aa970890
Author: Kent Knox <kent.knox at amd>
Date: Wed Sep 23 10:06:06 2015 -0500
Integrating new travis and appveyor build yaml scripts
---
.travis.yml | 168 +++++++++++++++------
README.md | 13 +-
appveyor.yml | 105 +++++++++++++
.../AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp | 53 ++++---
src/library/blas/AutoGemm/Includes.py | 13 +-
.../UserGemmKernelSources/UserGemmClKernels.h | 9 +-
6 files changed, 282 insertions(+), 79 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index f6289e9..366b2a8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,54 +1,134 @@
+# Ubuntu name decoder ring; https://en.wikipedia.org/wiki/List_of_Ubuntu_releases
+# Ubuntu 12.04 LTS (Precise Pangolin) <== Travis CI VM image
+# Ubuntu 12.10 (Quantal Quetzal)
+# Ubuntu 13.04 (Raring Ringtail)
+# Ubuntu 13.10 (Saucy Salamander)
+# Ubuntu 14.04 LTS (Trusty Tahr)
+# Ubuntu 14.10 (Utopic Unicorn)
+# Ubuntu 15.04 (Vivid Vervet)
+# Ubuntu 15.10 (Wily Werewolf)
+# Ubuntu 16.04 LTS (Xenial Xantus)
+
+# language: instructs travis what compilers && environment to set up in build matrix
language: cpp
+# sudo: false instructs travis to build our project in a docker VM (faster)
+# Can not yet install fglrx packages with 'false'
+sudo: required # false
+
+# os: expands the build matrix to include multiple os's
+# disable linux, as we get sporadic failures on building boost, needs investigation
+os:
+ - linux
+ - osx
+
+# compiler: expands the build matrix to include multiple compilers (per os)
compiler:
- gcc
+ - clang
+
+addons:
+ # apt: is disabled on osx builds
+ # apt: needed by docker framework to install project dependencies without
+ # sudo. Apt uses published Ubunto PPA's from https://launchpad.net/
+ # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
+ apt:
+ sources:
+ # ubuntu-toolchain-r-test contains newer versions of gcc to install
+ # - ubuntu-toolchain-r-test
+ # llvm-toolchain-precise-3.6 contains newer versions of clang to install
+ # - llvm-toolchain-precise-3.6
+ # kubuntu-backports contains newer versions of cmake to install
+ - kubuntu-backports
+ # boost-latest contains boost v1.55
+ - boost-latest
+ packages:
+ # g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
+ # - g++-4.8
+ # - clang-3.6
+ # We require v2.8.12 minimum
+ - cmake
+ # I'm finding problems between pre-compiled versions of boost ublas, with gtest
+ # stl_algobase.h: error: no matching function for call to swap()
+ - libboost-program-options1.55-dev
+ # - libboost-serialization1.55-dev
+ # - libboost-filesystem1.55-dev
+ # - libboost-system1.55-dev
+ # - libboost-regex1.55-dev
+ # The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater
+# - opencl-headers
+ # Uncomment one of the following when fglrx modules are added to the apt whitelist
+# - fglrx
+# - fglrx=2:8.960-0ubuntu1
+# - fglrx=2:13.350.1-0ubuntu0.0.1
+
+# env: specifies additional global variables to define per row in build matrix
+env:
+ global:
+ - CLBLAS_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
+
+# The following filters our build matrix; we are interested in linux-gcc & osx-clang
+matrix:
+ exclude:
+ - os: linux
+ compiler: clang
+ - os: osx
+ compiler: gcc
before_install:
- - sudo apt-get update -qq
- - sudo apt-get install -qq fglrx libboost-program-options-dev
-# Uncomment below to help verify the installs above work
-# - ls -la /usr/lib/libboost*
-# - ls -la /usr/include/boost
+ # Remove the following linux clause when fglrx can be installed with sudo: false
+ - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+ sudo apt-get update -qq &&
+ sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
+ fi
+ - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+ export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
+ fi
+ - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+ brew update;
+ brew outdated boost || brew upgrade boost;
+ brew outdated cmake || brew upgrade cmake;
+ fi
+ # - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi
+ - cmake --version;
+ - ${CC} --version;
+ - ${CXX} --version;
+install:
+ # 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website
+ # Remove when the travis VM upgrades to 'trusty' or beyond
+ - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+ mkdir -p ${OPENCL_ROOT}/include/CL;
+ pushd ${OPENCL_ROOT}/include/CL;
+ wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
+ popd;
+ fi
+ # osx image does not contain cl.hpp file; download from Khronos
+ # - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+ # pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/;
+ # sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp;
+ # popd;
+ # fi
+
+# Use before_script: to run configure steps
before_script:
- - cd ${TRAVIS_BUILD_DIR}
-# download OpenCL 1.2 header files since Travis CI only provides 1.1
- - mkdir -p OpenCLInclude/CL
- - cd OpenCLInclude/CL
- #- wget -r --no-parent -nH --cut-dirs=4 --reject="index.html*" https://www.khronos.org/registry/cl/api/1.2/
- - wget https://www.khronos.org/registry/cl/api/1.2/cl.h
- - wget https://www.khronos.org/registry/cl/api/1.2/cl.hpp
- - wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d10.h
- - wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d11.h
- - wget https://www.khronos.org/registry/cl/api/1.2/cl_dx9_media_sharing.h
- - wget https://www.khronos.org/registry/cl/api/1.2/cl_egl.h
- - wget https://www.khronos.org/registry/cl/api/1.2/cl_ext.h
- - wget https://www.khronos.org/registry/cl/api/1.2/cl_gl.h
- - wget https://www.khronos.org/registry/cl/api/1.2/cl_gl_ext.h
- - wget https://www.khronos.org/registry/cl/api/1.2/cl_platform.h
- - wget https://www.khronos.org/registry/cl/api/1.2/opencl.h
- - ls
- - pwd
- - cd ../..
- - mkdir -p bin/clBLAS
- - cd bin/clBLAS
- - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOPENCL_INCLUDE_DIRS:PATH=$PWD/../../OpenCLInclude -DCMAKE_INSTALL_PREFIX:PATH=$PWD/package ../../src
-
-script:
- - make install
-# - ls -Rla package
-# Run a simple test to validate that the build works; CPU device in a VM
- - cd package/bin
- - export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clBLAS/package/lib64:${LD_LIBRARY_PATH}
- - ./clBLAS-client --cpu
-
-after_success:
- - cd ${TRAVIS_BUILD_DIR}/bin/clBLAS
+ - mkdir -p ${CLBLAS_ROOT}
+ - pushd ${CLBLAS_ROOT}
+ - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
+
+# use script: to execute build steps
+script:
- make package
-notifications:
- email:
- - clmath-developers at googlegroups.com
- on_success: change
- on_failure: always
-
\ No newline at end of file
+deploy:
+ provider: releases
+ prerelease: true
+ draft: true
+ skip_cleanup: true
+ api_key:
+ secure: MBkxtcfSk+4UvGRO+WRhmS86vIVzAs0LIF2sAtr/S+Ed+OdUAuhZypUsDXGWtK3mL55v9c8BZXefFfHfJqElcNmyHKwCptbCR/JiM8YBtjoy2/RW1NcJUZp+QuRlk23xPADj7QkPjv7dfrQUMitkLUXAD+uTmMe2l8gmlbhMrQqPBKhb+31FNv6Lmo6oa6GjbiGi7qjsrJc7uQjhppLam+M7BZbBALGbIqMIrb2BMDMMhBoDbb4zSKrSg3+krd3kKiCClJlK7xjIlyFXZ527ETQ+PMtIeQb0eJ3aQwa4caBRCm5BDzt8GnJ48S88EkynbQioCEE87ebcyOM7M+wfslW/Fm1Y86X5odIljkOmTNKoDvgLxc9vUCBtMyVHNIgZcToPdsrMsGxcHV+JtU3yVQVm6dnA5P/zG5bA+aBjsd7p7BdOE4fdhvZV5XRAk/wmiyWalF7hKJxHIiWAKknL+tpPDDUF+fHm [...]
+ file: ${CLBLAS_ROOT}/clBLAS-build/*.tar.gz
+ file_glob: true
+ on:
+ all_branches: true
+ tags: true
diff --git a/README.md b/README.md
index c7add19..eefc68b 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,11 @@
+## Build Status
+| Build branch | master | develop |
+|-----|-----|-----|
+| GCC/Clang x64 | [](https://travis-ci.org/clMathLibraries/clBLAS/branches) | [](https://travis-ci.org/clMathLibraries/clBLAS/branches) |
+| Visual Studio x64 | |[](https://ci.appveyor.com/project/clMathLibraries/clblas/branch/develop) |
+
clBLAS
=====
-[](https://travis-ci.org/clMathLibraries/clBLAS)
-
-
This repository houses the code for the OpenCL™ BLAS portion of clMath.
The complete set of BLAS level 1, 2 & 3 routines is implemented. Please
see Netlib BLAS for the list of supported routines. In addition to GPU
@@ -102,7 +105,7 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
11, 12, 13,
21, 22, 23,
31, 32, 33,
- 41, 42, 43,
+ 41, 42, 43,
};
static const size_t ldc = N; /* i.e. ldc = N */
@@ -147,7 +150,7 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
M * N * sizeof( *C ), C, 0, NULL, NULL );
/* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
- err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
+ err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
M, N, K,
alpha, bufA, 0, lda,
bufB, 0, ldb, beta,
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 0000000..5e1462c
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,105 @@
+# Appveyor OS list
+# Windows Server 2012 R2 (x64) <== Appveyor default image
+# Visual Studio 2015
+
+# os: expands the build matrix to include multiple os's
+os:
+ - Windows Server 2012
+
+# compiler: expands the build matrix to include multiple compilers (per os)
+platform:
+ - x64
+
+configuration:
+ - Release
+
+# Only clone the top level commit; don't bother with history
+shallow_clone: true
+
+# environment: specifies additional global variables to define per row in build matrix
+environment:
+ global:
+ CLBLAS_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\nmake\\release"
+ OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl"
+ # BOOST_ROOT: "C:/Libraries/boost" # boost 1.56, 32-bit only
+ BOOST_ROOT: "C:\\Libraries\\boost_1_58_0"
+ OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"
+
+init:
+ - echo init step
+ - cmake --version
+ - C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM%
+ # Uncomment the following to display Remote Desktop connection details
+ # - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
+
+# We need to create an opencl import library that clblas can link against
+# Vendor based OpenCL packages are hard to use because of download size, registration requirements
+# and unattended installs not well supported
+install:
+ - echo install step
+ - ps: mkdir $env:OPENCL_ROOT
+ - ps: pushd $env:OPENCL_ROOT
+ - ps: $opencl_registry = $env:OPENCL_REGISTRY
+ # This downloads the source to the example/demo icd library
+ - ps: wget $opencl_registry/specs/opencl-icd-1.2.11.0.tgz -OutFile opencl-icd-1.2.11.0.tgz
+ - ps: 7z x opencl-icd-1.2.11.0.tgz
+ - ps: 7z x opencl-icd-1.2.11.0.tar
+ - ps: mv .\icd\* .
+ # This downloads all the opencl header files
+ # The cmake build files expect a directory called inc
+ - ps: mkdir inc/CL
+ - ps: wget $opencl_registry/api/1.2/ | select -ExpandProperty links | where {$_.href -like "*.h*"} | select -ExpandProperty outerText | foreach{ wget $opencl_registry/api/1.2/$_ -OutFile inc/CL/$_ }
+ # - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom }
+ # Create the static import lib in a directory called lib, so findopencl() will find it
+ - ps: mkdir lib
+ - ps: pushd lib
+ - cmake -G "NMake Makefiles" ..
+ - nmake
+ - ps: popd
+ # Rename the inc directory to include, so FindOpencl() will find it
+ - ps: ren inc include
+ - ps: popd
+ - ps: popd
+
+# before_build is used to run configure steps
+before_build:
+ - echo before_build step
+ # Boost 1.58 is not installed in typical fashion, help FindBoost() find binary libs with BOOST_LIBRARYDIR
+ - ps: $env:BOOST_LIBRARYDIR = "$env:BOOST_ROOT/lib64-msvc-12.0"
+ - ps: mkdir $env:CLBLAS_ROOT
+ - ps: pushd $env:CLBLAS_ROOT
+ - cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=%OPENCL_ROOT% %APPVEYOR_BUILD_FOLDER%/src
+
+# build_script invokes the compiler
+build_script:
+ - echo build_script step
+ - nmake package
+
+after_build:
+ - echo after_build step
+ - ps: ls $env:CLBLAS_ROOT
+ - ps: mv $env:CLBLAS_ROOT\*.zip $env:APPVEYOR_BUILD_FOLDER
+
+# Appyeyor will save a copy of the package in it's personal storage
+artifacts:
+ - path: '*.zip'
+ name: binary_zip
+ type: zip
+
+# on_finish always executes regardless of passed or failed builds
+on_finish:
+ - echo on_finish step
+
+# Appveyor will push the artifacts it has saved to GitHub 'releases' tab
+deploy:
+ provider: GitHub
+ auth_token:
+ secure: dRXIWJKpU7h2RsHX7RqmyYCtCw+Q9O3X5MArloY6p34GZC1w7bp+jQYTZqbdO7bw
+ artifact: binary_zip
+ draft: true
+ prerelease: true
+ on:
+ appveyor_repo_tag: true
+
+ # Uncomment the following to pause the VM and wait for RDP connetion to debug
+ # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
diff --git a/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp b/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp
index f638efb..32add3b 100644
--- a/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp
+++ b/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp
@@ -9,7 +9,11 @@
#include <iomanip>
#include <fstream>
//#include <Windows.h>
+#if defined( __APPLE__ ) || defined( __MACOSX )
+#include <OpenCL/cl.h>
+#else
#include <CL/cl.h>
+#endif
//#include "library/tools/ktest/naive/naive_blas.cpp"
//using namespace NaiveBlas;
#include "AutoGemmTools/AutoGemmUtil.h"
@@ -169,7 +173,7 @@ public:
}
printf("; fallback = %ux%u\n", tiles[fallbackTileIndex][0], tiles[fallbackTileIndex][1]);
-
+
printf("add(%4u,%4u) rule::valid =", rule.startSize, rule.startSize, rule.validTileIndices[0]);
for (unsigned int i = 0; i < rule.numValidTiles; i++) {
printf("%ux%u, ", tiles[rule.validTileIndices[i]][0], tiles[rule.validTileIndices[i]][1]);
@@ -381,7 +385,7 @@ void makeGemmKernel(
1, clKernel,
NULL );
CL_CHECK(err)
-
+
#if 0
// get kernel name
size_t kernelNameLength;
@@ -406,7 +410,7 @@ void makeGemmKernel(
}
}
-
+
/****************************************************************************
* Compare Matrices
***************************************************************************/
@@ -436,7 +440,7 @@ compareMatrices(
if (blasVal != naiveVal) {
equal = false;
}
-
+
if (blasVal != naiveVal) {
if (numPrint-- > 0) {
#if CGEMM || ZGEMM
@@ -530,7 +534,7 @@ float benchmarkKernel(
size_t K
) {
-
+
DATA_TYPE beta;
if (betaNonZero) {
beta = DATA_TYPE_CONSTRUCTOR(1, 0);
@@ -543,7 +547,7 @@ float benchmarkKernel(
bool needColKernel = N%macroTileNumCols > 0 && M/macroTileNumRows > 0;
bool needCornerKernel = M%macroTileNumRows > 0 && N%macroTileNumCols > 0;
-
+
#if 1
printf("Testing: %sgemm_%s_%s%s_%s_%03u_%03u_%02u\n",
#if SGEMM
@@ -631,7 +635,7 @@ float benchmarkKernel(
unsigned int microTileNumCols;
//printf("Creating kernel.\n");
- bool kernelFound =
+ bool kernelFound =
gemmSelectKernelSpecific<DATA_TYPE>(
order,
transA,
@@ -703,7 +707,7 @@ float benchmarkKernel(
size_t rowKernelGlobalWorkSize[2] = { 1*workGroupNumRows, (N/(macroTileNumCols))*workGroupNumCols };
size_t colKernelGlobalWorkSize[2] = { (M/(macroTileNumRows))*workGroupNumRows, 1*workGroupNumCols };
size_t cornerKernelGlobalWorkSize[2] = { 1*workGroupNumRows, 1*workGroupNumCols };
-
+
/****************************************************************************
* Row Kernel (along bottom of matrix)
***************************************************************************/
@@ -725,7 +729,7 @@ float benchmarkKernel(
totalEnqueues++;
// kernel dimensions
}
-
+
/****************************************************************************
* Col Kernel (along side of kernel)
***************************************************************************/
@@ -747,7 +751,7 @@ float benchmarkKernel(
totalEnqueues++;
// kernel dimensions
}
-
+
/****************************************************************************
* Corner Kernel (lower left corder of kernel)
***************************************************************************/
@@ -851,7 +855,7 @@ int main(void) {
// load tiles for precision
tiles = new unsigned int*[numTiles];
for (unsigned int i = 0; i < numTiles; i++) {
- tiles[i] =
+ tiles[i] =
#if SGEMM
sgemmTileEnumeration[i];
#elif DGEMM
@@ -873,7 +877,7 @@ int main(void) {
file << tile[0] << "x" << tile[1] << ", ";
}
file << "fallback, fastest, would-be valid tiles\n";
-
+
int *fallbackBegin = new int[numTiles]; // size at which tile starts being fallback
int *fallbackEnd = new int[numTiles]; // size at which tile stops being fallback
@@ -888,7 +892,7 @@ int main(void) {
validBegin[i] = -1;
validEnd[i] = -1;
}
-
+
platform = getPlatform(PLATFORM_NAME);
assert(platform != NULL);
device = getDevice(platform, DEVICE_NAME);
@@ -899,7 +903,7 @@ int main(void) {
queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
assert(queue != NULL);
-
+
clblasOrder order = clblasColumnMajor;
clblasTranspose transA = clblasNoTrans;
clblasTranspose transB = clblasTrans;
@@ -907,7 +911,7 @@ int main(void) {
unsigned int systemSizeMin = 16;
unsigned int systemSizeStep = 16;
-
+
//unsigned int kValues[] = {64, 512, 2048};
//unsigned int numKValues = 3;
unsigned int kValues[] = {0};
@@ -921,7 +925,7 @@ int main(void) {
kMax = systemSizeMax;
}
-
+
/******************************************************************
* Largest Matrix Dimension
*****************************************************************/
@@ -1010,7 +1014,7 @@ int main(void) {
ksrFile.open( ksrFileName, std::ios_base::out); // or ::app for append
KernelSelectionRules ksr(ksrFile);
for (unsigned int systemSize = systemSizeMin; systemSize <= systemSizeMax; systemSize += systemSizeStep) {
-
+
unsigned int M = systemSize;
unsigned int N = systemSize;
file << M << ", " << N << ", ";
@@ -1025,7 +1029,7 @@ int main(void) {
for (unsigned int kIdx = 0; kIdx < numKValues; kIdx++) {
unsigned int K = kValues[kIdx];
if (K == 0) K = systemSize;
-
+
// (3) for each tile
for (unsigned int tileIdx = 0; tileIdx < numTiles; tileIdx++) {
unsigned int *tile = tiles[tileIdx];
@@ -1047,7 +1051,7 @@ int main(void) {
M-1, N-1, K
);
fallbackScore[tileIdx] += fallbackSpeed;
-
+
/******************************************************************
* (5) tile speed
*****************************************************************/
@@ -1067,7 +1071,7 @@ int main(void) {
if (printDetails) printf("fs=%8.3f, ts=%8.3f\n", fallbackSpeed, tileSpeed );
} // tile sizes
-
+
} // for k
/**************************************************************
@@ -1082,7 +1086,7 @@ int main(void) {
tileScore[tileIdx] /= numKValues;
file << tileScore[tileIdx] << ", ";
}
-
+
/**************************************************************
* (7) get fastest fallback speed for this system size
@@ -1096,7 +1100,7 @@ int main(void) {
}
}
file << tiles[fastestFallbackIdx][0] << "x" << tiles[fastestFallbackIdx][1] << ", ";
-
+
/**************************************************************
* (8) ensure fallback tile has begun/ended
*************************************************************/
@@ -1105,7 +1109,7 @@ int main(void) {
//}
//fallbackEnd[fastestFallbackIdx] = static_cast<int>(systemSize); // push the end back farther
-
+
/**************************************************************
* (9) which tiles are valid for this system size
* - tile must be faster than fallback
@@ -1199,7 +1203,7 @@ int main(void) {
//free(C);
//free(naiveC);
//free(source);
-
+
//system("PAUSE");
//Sleep(5000); // ms
exit(EXIT_SUCCESS);
@@ -1375,4 +1379,3 @@ createKernel(
}
return kernel;
}
-
diff --git a/src/library/blas/AutoGemm/Includes.py b/src/library/blas/AutoGemm/Includes.py
index d656592..bb1969c 100644
--- a/src/library/blas/AutoGemm/Includes.py
+++ b/src/library/blas/AutoGemm/Includes.py
@@ -173,13 +173,21 @@ class ClKernelIncludes:
self.incFile.write( Common.getAutoGemmHeader() )
self.incStr = "#ifndef AUTOGEMM_CL_KERNELS_H\n"
self.incStr += "#define AUTOGEMM_CL_KERNELS_H\n"
- self.incStr += "#include \"CL/cl.h\"\n"
+ self.incStr += "#if defined( __APPLE__ ) || defined( __MACOSX )\n"
+ self.incStr += "#include <OpenCL/cl.h>\n"
+ self.incStr += "#else\n"
+ self.incStr += "#include <CL/cl.h>\n"
+ self.incStr += "#endif\n"
self.incStr += "\n"
self.cppName = Common.getIncludePath() + "AutoGemmClKernels.cpp"
self.cppFile = open(self.cppName, "w")
self.cppFile.write( Common.getAutoGemmHeader() )
- self.cppStr = "#include \"CL/cl.h\"\n"
+ self.cppStr = "#if defined( __APPLE__ ) || defined( __MACOSX )\n"
+ self.cppStr += "#include <OpenCL/cl.h>\n"
+ self.cppStr += "#else\n"
+ self.cppStr += "#include <CL/cl.h>\n"
+ self.cppStr += "#endif\n"
self.cppStr += "\n"
def addKernel(self, kernel):
@@ -455,4 +463,3 @@ if __name__ == "__main__":
else:
print "Warning: No output path specified; default is working directory."
writeIncludes()
-
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h b/src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h
index 908bcf0..a98c0ad 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h
@@ -1,7 +1,12 @@
#ifndef USERGEMM_CL_KERNELS_H
#define USERGEMM_CL_KERNELS_H
-#include "CL/cl.h"
+
+#if defined( __APPLE__ ) || defined( __MACOSX )
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
static cl_kernel sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel = NULL;
@@ -15,4 +20,4 @@ static cl_kernel sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
static const int user_kernel_count = 7;
-#endif
\ No newline at end of file
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list