[clblas] 32/67: Integrating new travis and appveyor build yaml scripts

Tue Oct 27 08:02:12 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clblas.

commit 0a08f16c9f223171669e4a6918135980aa970890
Author: Kent Knox <kent.knox at amd>
Date:   Wed Sep 23 10:06:06 2015 -0500

    Integrating new travis and appveyor build yaml scripts
---
 .travis.yml                                        | 168 +++++++++++++++------
 README.md                                          |  13 +-
 appveyor.yml                                       | 105 +++++++++++++
 .../AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp     |  53 ++++---
 src/library/blas/AutoGemm/Includes.py              |  13 +-
 .../UserGemmKernelSources/UserGemmClKernels.h      |   9 +-
 6 files changed, 282 insertions(+), 79 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f6289e9..366b2a8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,54 +1,134 @@
+# Ubuntu name decoder ring; https://en.wikipedia.org/wiki/List_of_Ubuntu_releases
+# Ubuntu 12.04 LTS (Precise Pangolin) <== Travis CI VM image
+# Ubuntu 12.10 (Quantal Quetzal)
+# Ubuntu 13.04 (Raring Ringtail)
+# Ubuntu 13.10 (Saucy Salamander)
+# Ubuntu 14.04 LTS (Trusty Tahr)
+# Ubuntu 14.10 (Utopic Unicorn)
+# Ubuntu 15.04 (Vivid Vervet)
+# Ubuntu 15.10 (Wily Werewolf)
+# Ubuntu 16.04 LTS (Xenial Xantus)
+
+# language: instructs travis what compilers && environment to set up in build matrix
 language: cpp
 
+# sudo: false instructs travis to build our project in a docker VM (faster)
+# Can not yet install fglrx packages with 'false'
+sudo: required # false
+
+# os: expands the build matrix to include multiple os's
+# disable linux, as we get sporadic failures on building boost, needs investigation
+os:
+  - linux
+  - osx
+
+# compiler: expands the build matrix to include multiple compilers (per os)
 compiler:
   - gcc
+  - clang
+
+addons:
+  # apt: is disabled on osx builds
+  # apt: needed by docker framework to install project dependencies without
+  # sudo.  Apt uses published Ubunto PPA's from https://launchpad.net/
+  # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
+  apt:
+    sources:
+      # ubuntu-toolchain-r-test contains newer versions of gcc to install
+      # - ubuntu-toolchain-r-test
+      # llvm-toolchain-precise-3.6 contains newer versions of clang to install
+      # - llvm-toolchain-precise-3.6
+      # kubuntu-backports contains newer versions of cmake to install
+      - kubuntu-backports
+      # boost-latest contains boost v1.55
+      - boost-latest
+    packages:
+      # g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
+      # - g++-4.8
+      # - clang-3.6
+      # We require v2.8.12 minimum
+      - cmake
+      # I'm finding problems between pre-compiled versions of boost ublas, with gtest
+      # stl_algobase.h: error: no matching function for call to swap()
+      - libboost-program-options1.55-dev
+      # - libboost-serialization1.55-dev
+      # - libboost-filesystem1.55-dev
+      # - libboost-system1.55-dev
+      # - libboost-regex1.55-dev
+      # The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater
+#      - opencl-headers
+      # Uncomment one of the following when fglrx modules are added to the apt whitelist
+#      - fglrx
+#      - fglrx=2:8.960-0ubuntu1
+#      - fglrx=2:13.350.1-0ubuntu0.0.1
+
+# env: specifies additional global variables to define per row in build matrix
+env:
+  global:
+    - CLBLAS_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
+
+# The following filters our build matrix; we are interested in linux-gcc & osx-clang
+matrix:
+  exclude:
+    - os: linux
+      compiler: clang
+    - os: osx
+      compiler: gcc
 
 before_install:
-  - sudo apt-get update -qq
-  - sudo apt-get install -qq fglrx libboost-program-options-dev
-# Uncomment below to help verify the installs above work
-#  - ls -la /usr/lib/libboost*
-#  - ls -la /usr/include/boost
+  # Remove the following linux clause when fglrx can be installed with sudo: false
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      sudo apt-get update -qq &&
+      sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
+    fi
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
+    fi
+  - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+      brew update;
+      brew outdated boost || brew upgrade boost;
+      brew outdated cmake || brew upgrade cmake;
+    fi
+  # - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi
+  - cmake --version;
+  - ${CC} --version;
+  - ${CXX} --version;
 
+install:
+  # 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website
+  # Remove when the travis VM upgrades to 'trusty' or beyond
+  - if [ ${TRAVIS_OS_NAME} == "linux" ]; then
+      mkdir -p ${OPENCL_ROOT}/include/CL;
+      pushd ${OPENCL_ROOT}/include/CL;
+      wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
+      popd;
+    fi
+  # osx image does not contain cl.hpp file; download from Khronos
+  # - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
+  #     pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/;
+  #     sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp;
+  #     popd;
+  #   fi
+
+# Use before_script: to run configure steps
 before_script:
-  - cd ${TRAVIS_BUILD_DIR}
-# download OpenCL 1.2 header files since Travis CI only provides 1.1
-  - mkdir -p OpenCLInclude/CL
-  - cd OpenCLInclude/CL
-  #- wget -r --no-parent -nH --cut-dirs=4 --reject="index.html*" https://www.khronos.org/registry/cl/api/1.2/
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl.hpp
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d10.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d11.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_dx9_media_sharing.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_egl.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_ext.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_gl.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_gl_ext.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/cl_platform.h
-  - wget https://www.khronos.org/registry/cl/api/1.2/opencl.h
-  - ls
-  - pwd
-  - cd ../..
-  - mkdir -p bin/clBLAS
-  - cd bin/clBLAS
-  - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOPENCL_INCLUDE_DIRS:PATH=$PWD/../../OpenCLInclude -DCMAKE_INSTALL_PREFIX:PATH=$PWD/package ../../src
-
-script: 
-  - make install
-#  - ls -Rla package
-# Run a simple test to validate that the build works; CPU device in a VM
-  - cd package/bin
-  - export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clBLAS/package/lib64:${LD_LIBRARY_PATH}
-  - ./clBLAS-client --cpu
-
-after_success:
-  - cd ${TRAVIS_BUILD_DIR}/bin/clBLAS
+  - mkdir -p ${CLBLAS_ROOT}
+  - pushd ${CLBLAS_ROOT}
+  - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
+
+# use script: to execute build steps
+script:
   - make package
 
-notifications:
-   email:
-     - clmath-developers at googlegroups.com
-   on_success: change
-   on_failure: always
-   
\ No newline at end of file
+deploy:
+  provider: releases
+  prerelease: true
+  draft: true
+  skip_cleanup: true
+  api_key:
+    secure: MBkxtcfSk+4UvGRO+WRhmS86vIVzAs0LIF2sAtr/S+Ed+OdUAuhZypUsDXGWtK3mL55v9c8BZXefFfHfJqElcNmyHKwCptbCR/JiM8YBtjoy2/RW1NcJUZp+QuRlk23xPADj7QkPjv7dfrQUMitkLUXAD+uTmMe2l8gmlbhMrQqPBKhb+31FNv6Lmo6oa6GjbiGi7qjsrJc7uQjhppLam+M7BZbBALGbIqMIrb2BMDMMhBoDbb4zSKrSg3+krd3kKiCClJlK7xjIlyFXZ527ETQ+PMtIeQb0eJ3aQwa4caBRCm5BDzt8GnJ48S88EkynbQioCEE87ebcyOM7M+wfslW/Fm1Y86X5odIljkOmTNKoDvgLxc9vUCBtMyVHNIgZcToPdsrMsGxcHV+JtU3yVQVm6dnA5P/zG5bA+aBjsd7p7BdOE4fdhvZV5XRAk/wmiyWalF7hKJxHIiWAKknL+tpPDDUF+fHm [...]
+  file: ${CLBLAS_ROOT}/clBLAS-build/*.tar.gz
+  file_glob: true
+  on:
+    all_branches: true
+    tags: true
diff --git a/README.md b/README.md
index c7add19..eefc68b 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,11 @@
+## Build Status
+| Build branch | master | develop |
+|-----|-----|-----|
+| GCC/Clang x64 | [![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.svg?branch=master)](https://travis-ci.org/clMathLibraries/clBLAS/branches) | [![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.svg?branch=develop)](https://travis-ci.org/clMathLibraries/clBLAS/branches) |
+| Visual Studio x64 |  |[![Build status](https://ci.appveyor.com/api/projects/status/9yfwi3n31pj7a2og/branch/develop?svg=true)](https://ci.appveyor.com/project/clMathLibraries/clblas/branch/develop) |
+
 clBLAS
 =====
-[![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.png)](https://travis-ci.org/clMathLibraries/clBLAS)
-
-
 This repository houses the code for the OpenCL™ BLAS portion of clMath.
 The complete set of BLAS level 1, 2 & 3 routines is implemented. Please
 see Netlib BLAS for the list of supported routines. In addition to GPU
@@ -102,7 +105,7 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
         11, 12, 13,
         21, 22, 23,
         31, 32, 33,
-        41, 42, 43, 
+        41, 42, 43,
     };
     static const size_t ldc = N;        /* i.e. ldc = N */
 
@@ -147,7 +150,7 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
             M * N * sizeof( *C ), C, 0, NULL, NULL );
 
         /* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
-        err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans, 
+        err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
                                 M, N, K,
                                 alpha, bufA, 0, lda,
                                 bufB, 0, ldb, beta,
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 0000000..5e1462c
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,105 @@
+# Appveyor OS list
+# Windows Server 2012 R2 (x64) <== Appveyor default image
+# Visual Studio 2015
+
+# os: expands the build matrix to include multiple os's
+os:
+  - Windows Server 2012
+
+# compiler: expands the build matrix to include multiple compilers (per os)
+platform:
+  - x64
+
+configuration:
+  - Release
+
+# Only clone the top level commit; don't bother with history
+shallow_clone: true
+
+# environment: specifies additional global variables to define per row in build matrix
+environment:
+  global:
+    CLBLAS_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\nmake\\release"
+    OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl"
+    # BOOST_ROOT: "C:/Libraries/boost"   # boost 1.56, 32-bit only
+    BOOST_ROOT: "C:\\Libraries\\boost_1_58_0"
+    OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"
+
+init:
+  - echo init step
+  - cmake --version
+  - C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM%
+  # Uncomment the following to display Remote Desktop connection details
+  # - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
+
+# We need to create an opencl import library that clblas can link against
+# Vendor based OpenCL packages are hard to use because of download size, registration requirements
+# and unattended installs not well supported
+install:
+  - echo install step
+  - ps: mkdir $env:OPENCL_ROOT
+  - ps: pushd $env:OPENCL_ROOT
+  - ps: $opencl_registry = $env:OPENCL_REGISTRY
+  # This downloads the source to the example/demo icd library
+  - ps: wget $opencl_registry/specs/opencl-icd-1.2.11.0.tgz -OutFile opencl-icd-1.2.11.0.tgz
+  - ps: 7z x opencl-icd-1.2.11.0.tgz
+  - ps: 7z x opencl-icd-1.2.11.0.tar
+  - ps: mv .\icd\* .
+  # This downloads all the opencl header files
+  # The cmake build files expect a directory called inc
+  - ps: mkdir inc/CL
+  - ps: wget $opencl_registry/api/1.2/ | select -ExpandProperty links | where {$_.href -like "*.h*"} | select -ExpandProperty outerText | foreach{ wget $opencl_registry/api/1.2/$_ -OutFile inc/CL/$_ }
+  # - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom }
+  # Create the static import lib in a directory called lib, so findopencl() will find it
+  - ps: mkdir lib
+  - ps: pushd lib
+  - cmake -G "NMake Makefiles" ..
+  - nmake
+  - ps: popd
+  # Rename the inc directory to include, so FindOpencl() will find it
+  - ps: ren inc include
+  - ps: popd
+  - ps: popd
+
+# before_build is used to run configure steps
+before_build:
+  - echo before_build step
+  # Boost 1.58 is not installed in typical fashion, help FindBoost() find binary libs with BOOST_LIBRARYDIR
+  - ps: $env:BOOST_LIBRARYDIR = "$env:BOOST_ROOT/lib64-msvc-12.0"
+  - ps: mkdir $env:CLBLAS_ROOT
+  - ps: pushd $env:CLBLAS_ROOT
+  - cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=%OPENCL_ROOT% %APPVEYOR_BUILD_FOLDER%/src
+
+# build_script invokes the compiler
+build_script:
+  - echo build_script step
+  - nmake package
+
+after_build:
+  - echo after_build step
+  - ps: ls $env:CLBLAS_ROOT
+  - ps: mv $env:CLBLAS_ROOT\*.zip $env:APPVEYOR_BUILD_FOLDER
+
+# Appyeyor will save a copy of the package in it's personal storage
+artifacts:
+  - path: '*.zip'
+    name: binary_zip
+    type: zip
+
+# on_finish always executes regardless of passed or failed builds
+on_finish:
+  - echo on_finish step
+
+# Appveyor will push the artifacts it has saved to GitHub 'releases' tab
+deploy:
+  provider: GitHub
+  auth_token:
+    secure: dRXIWJKpU7h2RsHX7RqmyYCtCw+Q9O3X5MArloY6p34GZC1w7bp+jQYTZqbdO7bw
+  artifact: binary_zip
+  draft: true
+  prerelease: true
+  on:
+    appveyor_repo_tag: true
+
+  # Uncomment the following to pause the VM and wait for RDP connetion to debug
+  # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
diff --git a/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp b/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp
index f638efb..32add3b 100644
--- a/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp
+++ b/src/library/blas/AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp
@@ -9,7 +9,11 @@
 #include <iomanip>
 #include <fstream>
 //#include <Windows.h>
+#if defined( __APPLE__ ) || defined( __MACOSX )
+#include <OpenCL/cl.h>
+#else
 #include <CL/cl.h>
+#endif
 //#include "library/tools/ktest/naive/naive_blas.cpp"
 //using namespace NaiveBlas;
 #include "AutoGemmTools/AutoGemmUtil.h"
@@ -169,7 +173,7 @@ public:
     }
     printf("; fallback = %ux%u\n", tiles[fallbackTileIndex][0], tiles[fallbackTileIndex][1]);
 
-    
+
     printf("add(%4u,%4u)  rule::valid =", rule.startSize, rule.startSize, rule.validTileIndices[0]);
     for (unsigned int i = 0; i < rule.numValidTiles; i++) {
       printf("%ux%u, ", tiles[rule.validTileIndices[i]][0], tiles[rule.validTileIndices[i]][1]);
@@ -381,7 +385,7 @@ void makeGemmKernel(
       1, clKernel,
       NULL );
     CL_CHECK(err)
-    
+
 #if 0
     // get kernel name
     size_t kernelNameLength;
@@ -406,7 +410,7 @@ void makeGemmKernel(
   }
 }
 
- 
+
 /****************************************************************************
  * Compare Matrices
  ***************************************************************************/
@@ -436,7 +440,7 @@ compareMatrices(
             if (blasVal != naiveVal) {
               equal = false;
             }
-            
+
             if (blasVal != naiveVal) {
               if (numPrint-- > 0) {
 #if CGEMM || ZGEMM
@@ -530,7 +534,7 @@ float benchmarkKernel(
   size_t K
   ) {
 
-    
+
   DATA_TYPE beta;
   if (betaNonZero) {
     beta = DATA_TYPE_CONSTRUCTOR(1, 0);
@@ -543,7 +547,7 @@ float benchmarkKernel(
   bool needColKernel = N%macroTileNumCols > 0 && M/macroTileNumRows > 0;
   bool needCornerKernel = M%macroTileNumRows > 0 && N%macroTileNumCols > 0;
 
-    
+
 #if 1
   printf("Testing: %sgemm_%s_%s%s_%s_%03u_%03u_%02u\n",
 #if SGEMM
@@ -631,7 +635,7 @@ float benchmarkKernel(
   unsigned int microTileNumCols;
 
     //printf("Creating kernel.\n");
-  bool kernelFound = 
+  bool kernelFound =
   gemmSelectKernelSpecific<DATA_TYPE>(
     order,
     transA,
@@ -703,7 +707,7 @@ float benchmarkKernel(
   size_t rowKernelGlobalWorkSize[2] = { 1*workGroupNumRows, (N/(macroTileNumCols))*workGroupNumCols };
   size_t colKernelGlobalWorkSize[2] = { (M/(macroTileNumRows))*workGroupNumRows, 1*workGroupNumCols };
   size_t cornerKernelGlobalWorkSize[2] = { 1*workGroupNumRows, 1*workGroupNumCols };
-  
+
   /****************************************************************************
    * Row Kernel (along bottom of matrix)
    ***************************************************************************/
@@ -725,7 +729,7 @@ float benchmarkKernel(
     totalEnqueues++;
     // kernel dimensions
   }
-  
+
   /****************************************************************************
    * Col Kernel (along side of kernel)
    ***************************************************************************/
@@ -747,7 +751,7 @@ float benchmarkKernel(
     totalEnqueues++;
     // kernel dimensions
   }
-  
+
   /****************************************************************************
    * Corner Kernel (lower left corder of kernel)
    ***************************************************************************/
@@ -851,7 +855,7 @@ int main(void) {
   // load tiles for precision
   tiles = new unsigned int*[numTiles];
   for (unsigned int i = 0; i < numTiles; i++) {
-    tiles[i] = 
+    tiles[i] =
 #if SGEMM
           sgemmTileEnumeration[i];
 #elif DGEMM
@@ -873,7 +877,7 @@ int main(void) {
     file << tile[0] << "x" << tile[1] << ", ";
   }
   file << "fallback, fastest, would-be valid tiles\n";
-      
+
 
   int *fallbackBegin = new int[numTiles]; // size at which tile starts being fallback
   int   *fallbackEnd = new int[numTiles]; // size at which tile stops being fallback
@@ -888,7 +892,7 @@ int main(void) {
        validBegin[i] = -1;
          validEnd[i] = -1;
   }
-  
+
   platform = getPlatform(PLATFORM_NAME);
   assert(platform != NULL);
   device = getDevice(platform, DEVICE_NAME);
@@ -899,7 +903,7 @@ int main(void) {
   queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
   assert(queue != NULL);
 
-  
+
   clblasOrder order = clblasColumnMajor;
   clblasTranspose transA = clblasNoTrans;
   clblasTranspose transB = clblasTrans;
@@ -907,7 +911,7 @@ int main(void) {
 
   unsigned int systemSizeMin = 16;
   unsigned int systemSizeStep = 16;
-    
+
   //unsigned int kValues[] = {64, 512, 2048};
   //unsigned int numKValues = 3;
   unsigned int kValues[] = {0};
@@ -921,7 +925,7 @@ int main(void) {
     kMax = systemSizeMax;
   }
 
-  
+
   /******************************************************************
    * Largest Matrix Dimension
    *****************************************************************/
@@ -1010,7 +1014,7 @@ int main(void) {
   ksrFile.open( ksrFileName, std::ios_base::out); // or ::app for append
   KernelSelectionRules ksr(ksrFile);
   for (unsigned int systemSize = systemSizeMin; systemSize <= systemSizeMax; systemSize += systemSizeStep) {
-          
+
     unsigned int M = systemSize;
     unsigned int N = systemSize;
     file << M << ", " << N << ", ";
@@ -1025,7 +1029,7 @@ int main(void) {
     for (unsigned int kIdx = 0; kIdx < numKValues; kIdx++) {
       unsigned int K = kValues[kIdx];
       if (K == 0) K = systemSize;
-      
+
       // (3) for each tile
       for (unsigned int tileIdx = 0; tileIdx < numTiles; tileIdx++) {
         unsigned int *tile = tiles[tileIdx];
@@ -1047,7 +1051,7 @@ int main(void) {
             M-1, N-1, K
             );
         fallbackScore[tileIdx] += fallbackSpeed;
-        
+
         /******************************************************************
          * (5) tile speed
          *****************************************************************/
@@ -1067,7 +1071,7 @@ int main(void) {
 
         if (printDetails) printf("fs=%8.3f, ts=%8.3f\n", fallbackSpeed, tileSpeed );
       } // tile sizes
-      
+
     } // for k
 
       /**************************************************************
@@ -1082,7 +1086,7 @@ int main(void) {
         tileScore[tileIdx] /= numKValues;
         file << tileScore[tileIdx] << ", ";
       }
-      
+
 
       /**************************************************************
        * (7) get fastest fallback speed for this system size
@@ -1096,7 +1100,7 @@ int main(void) {
         }
       }
       file << tiles[fastestFallbackIdx][0] << "x" << tiles[fastestFallbackIdx][1] << ", ";
-      
+
       /**************************************************************
        * (8) ensure fallback tile has begun/ended
        *************************************************************/
@@ -1105,7 +1109,7 @@ int main(void) {
       //}
       //fallbackEnd[fastestFallbackIdx] = static_cast<int>(systemSize); // push the end back farther
 
-      
+
       /**************************************************************
        * (9) which tiles are valid for this system size
        * - tile must be faster than fallback
@@ -1199,7 +1203,7 @@ int main(void) {
     //free(C);
     //free(naiveC);
     //free(source);
-  
+
     //system("PAUSE");
     //Sleep(5000); // ms
     exit(EXIT_SUCCESS);
@@ -1375,4 +1379,3 @@ createKernel(
     }
     return kernel;
 }
-
diff --git a/src/library/blas/AutoGemm/Includes.py b/src/library/blas/AutoGemm/Includes.py
index d656592..bb1969c 100644
--- a/src/library/blas/AutoGemm/Includes.py
+++ b/src/library/blas/AutoGemm/Includes.py
@@ -173,13 +173,21 @@ class ClKernelIncludes:
     self.incFile.write( Common.getAutoGemmHeader() )
     self.incStr = "#ifndef AUTOGEMM_CL_KERNELS_H\n"
     self.incStr += "#define AUTOGEMM_CL_KERNELS_H\n"
-    self.incStr += "#include \"CL/cl.h\"\n"
+    self.incStr += "#if defined( __APPLE__ ) || defined( __MACOSX )\n"
+    self.incStr += "#include <OpenCL/cl.h>\n"
+    self.incStr += "#else\n"
+    self.incStr += "#include <CL/cl.h>\n"
+    self.incStr += "#endif\n"
     self.incStr += "\n"
 
     self.cppName = Common.getIncludePath() + "AutoGemmClKernels.cpp"
     self.cppFile = open(self.cppName, "w")
     self.cppFile.write( Common.getAutoGemmHeader() )
-    self.cppStr  = "#include \"CL/cl.h\"\n"
+    self.cppStr = "#if defined( __APPLE__ ) || defined( __MACOSX )\n"
+    self.cppStr += "#include <OpenCL/cl.h>\n"
+    self.cppStr += "#else\n"
+    self.cppStr += "#include <CL/cl.h>\n"
+    self.cppStr += "#endif\n"
     self.cppStr += "\n"
 
   def addKernel(self, kernel):
@@ -455,4 +463,3 @@ if __name__ == "__main__":
   else:
     print "Warning: No output path specified; default is working directory."
   writeIncludes()
-
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h b/src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h
index 908bcf0..a98c0ad 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.h
@@ -1,7 +1,12 @@
 
 #ifndef USERGEMM_CL_KERNELS_H
 #define USERGEMM_CL_KERNELS_H
-#include "CL/cl.h"
+
+#if defined( __APPLE__ ) || defined( __MACOSX )
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
 
 static cl_kernel sgemm_Col_NT_B1_MX128_NX128_KX16_clKernel = NULL;
 
@@ -15,4 +20,4 @@ static cl_kernel sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_clKernel = NULL;
 
 static const int user_kernel_count = 7;
 
-#endif
\ No newline at end of file
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git