[clblas] 49/75: Removing the pedantic flag from gcc compiles

Tue Jan 24 23:30:36 UTC 2017

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/master
in repository clblas.

commit e0df18b178ca36531ca4288c6469f0593c9ebea8
Author: Kent Knox <kent.knox at amd>
Date:   Wed Apr 20 16:57:58 2016 -0500

    Removing the pedantic flag from gcc compiles
    
    The library was not developed with the pedantic warning flag enabled, and
    the build outputs a volume of verbose warning messages on every build.
    It's not currently helpful to have this enabled.  This flag should be enabled
    again as the warnings get fixed, to enable a more robust library.
    
    Various warnings and #pragmas fixed that were remaining.
    Changed the allocation of a temp buffer in corr-trmv.cpp
---
 .gitignore                                         |   3 -
 src/CMakeLists.txt                                 |   4 +-
 src/FindOpenCL.cmake                               | 143 +++++++++++++++------
 src/library/blas/AutoGemm/Includes.py              |   8 +-
 src/library/blas/AutoGemm/KernelOpenCL.py          |   2 +-
 src/library/blas/gens/trmm.c                       |   2 +-
 .../blas/trtri/diag_dtrtri_lower_128_16.cpp        |   1 -
 .../blas/trtri/diag_dtrtri_upper_128_16.cpp        |  15 +--
 .../blas/trtri/diag_dtrtri_upper_192_12.cpp        |   9 +-
 .../trtri/triple_dgemm_update_128_16_PART1_L.cpp   |   9 +-
 .../trtri/triple_dgemm_update_128_16_PART2_L.cpp   |   1 -
 .../blas/trtri/triple_dgemm_update_128_16_R.cpp    |  11 +-
 .../trtri/triple_dgemm_update_128_32_PART1_L.cpp   |   7 +-
 .../trtri/triple_dgemm_update_128_32_PART1_R.cpp   |   7 +-
 .../trtri/triple_dgemm_update_128_32_PART2_L.cpp   |   1 -
 .../trtri/triple_dgemm_update_128_32_PART2_R.cpp   |   1 -
 .../trtri/triple_dgemm_update_128_64_PART1_L.cpp   |   7 +-
 .../trtri/triple_dgemm_update_128_64_PART1_R.cpp   |   5 +-
 .../trtri/triple_dgemm_update_128_64_PART2_L.cpp   |   1 -
 .../trtri/triple_dgemm_update_128_64_PART2_R.cpp   |   1 -
 .../triple_dgemm_update_128_ABOVE64_PART1_L.cpp    |   7 +-
 .../triple_dgemm_update_128_ABOVE64_PART1_R.cpp    |   5 +-
 .../triple_dgemm_update_128_ABOVE64_PART2_L.cpp    |   1 -
 .../triple_dgemm_update_128_ABOVE64_PART2_R.cpp    |   1 -
 .../triple_dgemm_update_128_ABOVE64_PART3_L.cpp    |   1 -
 .../triple_dgemm_update_128_ABOVE64_PART3_R.cpp    |   1 -
 .../blas/trtri/triple_dgemm_update_192_12_R.cpp    |   5 +-
 .../trtri/triple_dgemm_update_192_24_PART1_R.cpp   |   1 -
 .../trtri/triple_dgemm_update_192_24_PART2_R.cpp   |   1 -
 .../trtri/triple_dgemm_update_192_48_PART1_R.cpp   |   3 +-
 .../trtri/triple_dgemm_update_192_48_PART2_R.cpp   |   1 -
 .../trtri/triple_dgemm_update_192_96_PART1_R.cpp   |   3 +-
 .../trtri/triple_dgemm_update_192_96_PART2_R.cpp   |   1 -
 src/library/blas/xgemm.cc                          |  33 +++--
 src/tests/CMakeLists.txt                           |  26 ++--
 src/tests/correctness/corr-trmv.cpp                |   2 +-
 src/tests/include/cmdline.h                        |   2 +-
 src/tests/include/matrix.h                         | 118 ++++++++---------
 38 files changed, 240 insertions(+), 210 deletions(-)

diff --git a/.gitignore b/.gitignore
index bb36278..d25acd5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,8 +24,5 @@
 # vim temp files
 .*.swp
 
-src/build/
-
 # python compiled files
 *.pyc
-
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7d90f28..33a91ee 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -266,7 +266,7 @@ if( BUILD_TEST )
 endif( )
 
 # This will define OPENCL_FOUND
-find_package( OpenCL )
+find_package( OpenCL ${OPENCL_VERSION} )
 
 # Find Boost on the system, and configure the type of boost build we want
 set( Boost_USE_MULTITHREADED ON )
@@ -288,7 +288,7 @@ endif()
 
 # Turn on maximum compiler verbosity
 if(CMAKE_COMPILER_IS_GNUCXX)
-    add_definitions(-pedantic -Wall -Wextra
+    add_definitions(# -pedantic -Wall -Wextra
         -D_POSIX_C_SOURCE=199309L -D_XOPEN_SOURCE=500
     )
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -Wstrict-prototypes" CACHE STRING
diff --git a/src/FindOpenCL.cmake b/src/FindOpenCL.cmake
index 746fbe6..9810dd2 100644
--- a/src/FindOpenCL.cmake
+++ b/src/FindOpenCL.cmake
@@ -1,5 +1,5 @@
 # ########################################################################
-# Copyright 2013 Advanced Micro Devices, Inc.
+# Copyright 2015 Advanced Micro Devices, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,6 @@
 # limitations under the License.
 # ########################################################################
 
-
 # Locate an OpenCL implementation.
 # Currently supports AMD APP SDK (http://developer.amd.com/sdks/AMDAPPSDK/Pages/default.aspx/)
 #
@@ -46,60 +45,122 @@
 #    target_link_libraries(foo ${OPENCL_LIBRARIES})
 #
 #-----------------------
+include( CheckSymbolExists )
+include( CMakePushCheckState )
+
+if( DEFINED OPENCL_ROOT OR DEFINED ENV{OPENCL_ROOT})
+  message( STATUS "Defined OPENCL_ROOT: ${OPENCL_ROOT}, ENV{OPENCL_ROOT}: $ENV{OPENCL_ROOT}" )
+endif( )
 
 find_path(OPENCL_INCLUDE_DIRS
-    NAMES OpenCL/cl.h CL/cl.h
-    HINTS
-        ${OPENCL_ROOT}/include
-        $ENV{AMDAPPSDKROOT}/include
-        $ENV{CUDA_PATH}/include
-    PATHS
-        /usr/include
-        /usr/local/include
-        /usr/local/cuda/include
-        /opt/cuda/include
-    DOC "OpenCL header file path"
+  NAMES OpenCL/cl.h CL/cl.h
+  HINTS
+    ${OPENCL_ROOT}/include
+    $ENV{OPENCL_ROOT}/include
+    $ENV{AMDAPPSDKROOT}/include
+    $ENV{CUDA_PATH}/include
+  PATHS
+    /usr/include
+    /usr/local/include
+    /usr/local/cuda/include
+  DOC "OpenCL header file path"
 )
 mark_as_advanced( OPENCL_INCLUDE_DIRS )
+message( STATUS "OPENCL_INCLUDE_DIRS: ${OPENCL_INCLUDE_DIRS}" )
+
+set( OpenCL_VERSION "0.0" )
+
+cmake_push_check_state( RESET )
+set( CMAKE_REQUIRED_INCLUDES "${OPENCL_INCLUDE_DIRS}" )
+
+# Bug in check_symbol_exists prevents us from specifying a list of files, so we loop
+# Only 1 of these files will exist on a system, so the other file will not clobber the output variable
+if( APPLE )
+   set( CL_HEADER_FILE "OpenCL/cl.h" )
+else( )
+   set( CL_HEADER_FILE "CL/cl.h" )
+endif( )
+
+check_symbol_exists( CL_VERSION_2_0 ${CL_HEADER_FILE} HAVE_CL_2_0 )
+check_symbol_exists( CL_VERSION_1_2 ${CL_HEADER_FILE} HAVE_CL_1_2 )
+check_symbol_exists( CL_VERSION_1_1 ${CL_HEADER_FILE} HAVE_CL_1_1 )
+# message( STATUS "HAVE_CL_2_0: ${HAVE_CL_2_0}" )
+# message( STATUS "HAVE_CL_1_2: ${HAVE_CL_1_2}" )
+# message( STATUS "HAVE_CL_1_1: ${HAVE_CL_1_1}" )
+
+# set OpenCL_VERSION to the highest detected version
+if( HAVE_CL_2_0 )
+  set( OpenCL_VERSION "2.0" )
+elseif( HAVE_CL_1_2 )
+  set( OpenCL_VERSION "1.2" )
+elseif( HAVE_CL_1_1 )
+  set( OpenCL_VERSION "1.1" )
+endif( )
+
+cmake_pop_check_state( )
 
 # Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else
 get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS )
+if( LIB64 )
+  message( STATUS "FindOpenCL searching for 64-bit libraries" )
+else( )
+  message( STATUS "FindOpenCL searching for 32-bit libraries" )
+endif( )
 
 if( LIB64 )
-    find_library( OPENCL_LIBRARIES
-        NAMES OpenCL
-        HINTS
-            ${OPENCL_ROOT}/lib
-            $ENV{AMDAPPSDKROOT}/lib
-            $ENV{CUDA_PATH}/lib
-        DOC "OpenCL dynamic library path"
-        PATH_SUFFIXES x86_64 x64 x86_64/sdk
-        PATHS
-            /usr/lib
-            /usr/local/cuda/lib
-            /opt/cuda/lib
-    )
+  find_library( OPENCL_LIBRARIES
+    NAMES OpenCL
+    HINTS
+      ${OPENCL_ROOT}/lib
+      $ENV{OPENCL_ROOT}/lib
+      $ENV{AMDAPPSDKROOT}/lib
+      $ENV{CUDA_PATH}/lib
+    DOC "OpenCL dynamic library path"
+    PATH_SUFFIXES x86_64 x64 x86_64/sdk
+    PATHS
+    /usr/lib
+    /usr/local/cuda/lib
+  )
 else( )
-    find_library( OPENCL_LIBRARIES
-        NAMES OpenCL
-        HINTS
-            ${OPENCL_ROOT}/lib
-            $ENV{AMDAPPSDKROOT}/lib
-            $ENV{CUDA_PATH}/lib
-        DOC "OpenCL dynamic library path"
-        PATH_SUFFIXES x86 Win32
-        
-        PATHS
-            /usr/lib
-            /usr/local/cuda/lib
-            /opt/cuda/lib
-    )
+  find_library( OPENCL_LIBRARIES
+    NAMES OpenCL
+    HINTS
+      ${OPENCL_ROOT}/lib
+      $ENV{OPENCL_ROOT}/lib
+      $ENV{AMDAPPSDKROOT}/lib
+      $ENV{CUDA_PATH}/lib
+    DOC "OpenCL dynamic library path"
+    PATH_SUFFIXES x86 Win32
+    PATHS
+    /usr/lib
+    /usr/local/cuda/lib
+  )
 endif( )
 mark_as_advanced( OPENCL_LIBRARIES )
 
+# message( STATUS "OpenCL_FIND_VERSION: ${OpenCL_FIND_VERSION}" )
+if( OpenCL_VERSION VERSION_LESS OpenCL_FIND_VERSION )
+    message( FATAL_ERROR "Requested OpenCL version: ${OpenCL_FIND_VERSION}, Found OpenCL version: ${OpenCL_VERSION}" )
+endif( )
+
+# If we asked for OpenCL 1.2, and we found a version installed greater than that, pass the 'use deprecated' flag
+if( (OpenCL_FIND_VERSION VERSION_LESS "2.0") AND (OpenCL_VERSION VERSION_GREATER OpenCL_FIND_VERSION) )
+    add_definitions( -DCL_USE_DEPRECATED_OPENCL_2_0_APIS )
+
+    # If we asked for OpenCL 1.1, and we found a version installed greater than that, pass the 'use deprecated' flag
+    if( (OpenCL_FIND_VERSION VERSION_LESS "1.2") AND (OpenCL_VERSION VERSION_GREATER OpenCL_FIND_VERSION) )
+        add_definitions( -DCL_USE_DEPRECATED_OPENCL_1_1_APIS )
+    endif( )
+endif( )
+
 include( FindPackageHandleStandardArgs )
-FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS )
+FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL
+    REQUIRED_VARS OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS
+    VERSION_VAR OpenCL_VERSION
+    )
 
 if( NOT OPENCL_FOUND )
     message( STATUS "FindOpenCL looked for libraries named: OpenCL" )
+else( )
+    message(STATUS "FindOpenCL ${OPENCL_LIBRARIES}, ${OPENCL_INCLUDE_DIRS}")
 endif()
diff --git a/src/library/blas/AutoGemm/Includes.py b/src/library/blas/AutoGemm/Includes.py
index 3c8435f..0f61695 100644
--- a/src/library/blas/AutoGemm/Includes.py
+++ b/src/library/blas/AutoGemm/Includes.py
@@ -113,7 +113,7 @@ class KernelBinaryIncludes:
     self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
     self.cppStr += "        size_t %s_binSize = 0;\n" % kernelName
     self.cppStr += "#else\n"
-    self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
+    # self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
     self.cppStr += "#endif\n"
 
     kernelName = kernel.getRowName()
@@ -123,7 +123,7 @@ class KernelBinaryIncludes:
     self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
     self.cppStr += "        size_t %s_binSize = 0;\n" % kernelName
     self.cppStr += "#else\n"
-    self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
+    # self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
     self.cppStr += "#endif\n"
 
     kernelName = kernel.getColName()
@@ -133,7 +133,7 @@ class KernelBinaryIncludes:
     self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
     self.cppStr += "        size_t %s_binSize = 0;\n" % kernelName
     self.cppStr += "#else\n"
-    self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
+    # self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
     self.cppStr += "#endif\n"
 
     kernelName = kernel.getCornerName()
@@ -143,7 +143,7 @@ class KernelBinaryIncludes:
     self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
     self.cppStr += "        size_t %s_binSize = 0;\n" % kernelName
     self.cppStr += "#else\n"
-    self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
+    # self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
     self.cppStr += "#endif\n"
 
     self.incFile.write( self.incStr )
diff --git a/src/library/blas/AutoGemm/KernelOpenCL.py b/src/library/blas/AutoGemm/KernelOpenCL.py
index d7835d5..87a5676 100644
--- a/src/library/blas/AutoGemm/KernelOpenCL.py
+++ b/src/library/blas/AutoGemm/KernelOpenCL.py
@@ -482,7 +482,7 @@ def writeOpenCLKernelToFile(kernel):
   kernelFile.write("\";\n")
   kernelFile.write("\n")
   kernelFile.write("#else\n")
-  kernelFile.write("#pragma message(\"AutoGemmKernelSources.cpp: %s was overriden by user kernel.\")\n" % kernel.getName() )
+  # kernelFile.write("#pragma message(\"AutoGemmKernelSources.cpp: %s was overriden by user kernel.\")\n" % kernel.getName() )
   kernelFile.write("#endif\n")
   kernelFile.close()
 
diff --git a/src/library/blas/gens/trmm.c b/src/library/blas/gens/trmm.c
index 7655af3..0c8f8b4 100644
--- a/src/library/blas/gens/trmm.c
+++ b/src/library/blas/gens/trmm.c
@@ -1245,7 +1245,7 @@ static int trmmGetDefaultDecomp( PGranularity *pgran,
     unsigned int subdimsNum,
     void *pArgs)
 {
-    (void*)subdimsNum;
+    DUMMY_ARG_USAGE(subdimsNum);
 
     if ( NULL == pArgs ) {
         return -EINVAL;
diff --git a/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp b/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
index 1f7c19c..f3d6ca5 100644
--- a/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_DIAG_DTRTRI_LOWER_128_16_SRC_CPP
 #define KERNEL_DIAG_DTRTRI_LOWER_128_16_SRC_CPP
-#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp b/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
index bc9c296..f039b27 100644
--- a/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP
 #define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP
-#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -64,17 +63,17 @@ uint na)\n
     {\n
       if(tx <= i && i+bx*BLOCK_SIZE < na )\n
         {\n
-	  Bs[i*BLOCK_SIZE+tx] = *(Aoff+i*lda+tx);\n    
+	  Bs[i*BLOCK_SIZE+tx] = *(Aoff+i*lda+tx);\n
         }\n
       else\n
         {\n
 	  Bs[i*BLOCK_SIZE+tx] = ZERO;\n
         }\n
-    }\n 
+    }\n
   // read in the whole square block of my A and zero out the non data triangular
- 
+
   // Synchronize to make sure the matrices are loaded
-  //__syncthreads(); 
+  //__syncthreads();
   barrier(CLK_LOCAL_MEM_FENCE);\n
 
   // solve the diagonals
@@ -92,7 +91,7 @@ uint na)\n
       else \n
 	{\n
 	  Bs[tx*BLOCK_SIZE+tx] = ONE / ( Bs[tx*BLOCK_SIZE+tx]) ;\n
-	}\n      
+	}\n
     }\n
     barrier(CLK_LOCAL_MEM_FENCE);\n
 
@@ -139,14 +138,14 @@ uint na)\n
     // __syncthreads();
     barrier(CLK_LOCAL_MEM_FENCE);\n
   }\n
-  
+
     // write back A
   _Pragma("unroll")\n
   for( i=0; i < BLOCK_SIZE; i++ )\n
   {\n
     *(d_dinvA+i*NB+tx) = Bs[i*BLOCK_SIZE+tx];\n
   }\n
-  
+
 }\n
 // end of kernel
 );
diff --git a/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp b/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
index 0ffbebf..0d81ee2 100644
--- a/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP
 #define KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP
-#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -43,10 +42,10 @@ double neg_switcher; \n
 // Thread index
 int tx = get_local_id(0); \n
 
-// Thread index 
+// Thread index
 int gx = get_global_id(0); \n
 
-// Block index 
+// Block index
 int bx = get_group_id(0); \n
 
 A = A + offA; \n
@@ -56,7 +55,7 @@ int NumBLperNB = NB / BLOCK_SIZE; \n
 d_dinvA += bx / NumBLperNB*NB*NB + (bx % NumBLperNB)*(NB*BLOCK_SIZE + BLOCK_SIZE); \n
 
 __local double Bs[BLOCK_SIZE*BLOCK_SIZE]; \n
-__local double workspace[BLOCK_SIZE];\n    // workspace used to store the current working column 
+__local double workspace[BLOCK_SIZE];\n    // workspace used to store the current working column
 
 // load A \n
 _Pragma("unroll")\n
@@ -74,7 +73,7 @@ for (i = 0; i < BLOCK_SIZE; i++)\n
 // read in the whole square block of my A and zero out the non data triangular
 
 // Synchronize to make sure the matrices are loaded
-//__syncthreads(); 
+//__syncthreads();
 barrier(CLK_LOCAL_MEM_FENCE); \n
 
 // solve the diagonals
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp
index c0e3b4c..f0c041f 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp
@@ -5,7 +5,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ Ain = Ain + offAin; \n
 	int ya = page*blk * 2; \n
 	int incA = ya * lda + xa; \n
 
-	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???) 
+	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
 
 	int maxA; \n
 	if (xa < na)\n
-		maxA = lda*na; \n // macro READA will detect overflow on y dimension 
+		maxA = lda*na; \n // macro READA will detect overflow on y dimension
 	else\n
-	    maxA = 0; \n  // there is already an overflow on xa 
+	    maxA = 0; \n  // there is already an overflow on xa
 
 #define READA ( (incA < maxA ) ? Ain[incA] : 0 )  \n
 
@@ -139,7 +138,7 @@ Ain = Ain + offAin; \n
 		daxpy(a[1], &bs[13][0], c); \n
 		daxpy(a[2], &bs[14][0], c); \n
 		daxpy(a[3], &bs[15][0], c); \n
-	
+
 		B += 16; \n
 		//__syncthreads();
 		barrier(CLK_LOCAL_MEM_FENCE); \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_16_PART2_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_16_PART2_L.cpp
index 71c13dc..dbffeb9 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_16_PART2_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_16_PART2_L.cpp
@@ -5,7 +5,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART2_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART2_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART2_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_16_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_16_R.cpp
index 237d3fe..fd410a9 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_16_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_16_R.cpp
@@ -1,14 +1,13 @@
 /*******************************************************************************
  * Hand-tuned kernel
- 
+
  * B21 = -inv(A11)*A12*inv(A22)
  * 16 to 32
- 
+
  ******************************************************************************/
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_16_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -76,13 +75,13 @@ int PagesPerNB = NB / (blk * 2); \n
 	int ya = page*blk * 2 + blk; \n
 	int incA = ya * lda + xa; \n
 
-	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???) 
+	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
 
 	int maxA; \n
 	if (xa < na)\n
-		maxA = lda*na; \n // macro READA will detect overflow on y dimension 
+		maxA = lda*na; \n // macro READA will detect overflow on y dimension
 	else
-	    maxA = 0; \n // there is already an overflow on xa 
+	    maxA = 0; \n // there is already an overflow on xa
 
 #define READA ( (incA < maxA ) ? Ain[incA] : 0 )  \n
 
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_L.cpp
index 46b7e97..e4bde33 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_L.cpp
@@ -6,7 +6,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -73,13 +72,13 @@ int PagesPerNB = NB / (blk * 2); \n
 	int ya = page*blk * 2;\n
 	int incA = ya * lda + xa; \n
 
-	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???) 
+	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
 
 	int maxA; \n
 	if (xa < na)\n
-		maxA = lda*na; \n // macro READA will detect overflow on y dimension 
+		maxA = lda*na; \n // macro READA will detect overflow on y dimension
 	else\n
-		maxA = 0; \n // there is already an overflow on xa 
+		maxA = 0; \n // there is already an overflow on xa
 
 #define READA ( (incA < maxA ) ? Ain[incA] : 0 )  \n
 
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_R.cpp
index 3358af6..43760b6 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_R.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ int PagesPerNB = NB / (blk * 2); \n
 	int ya = page*blk * 2 + blk; \n
 	int incA = ya * lda + xa; \n
 
-	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???) 
+	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
 
 	int maxA; \n
 	if (xa < na)\n
-		maxA = lda*na; \n  // macro READA will detect overflow on y dimension  
+		maxA = lda*na; \n  // macro READA will detect overflow on y dimension
 	else\n
-	    maxA = 0; \n // there is already an overflow on xa 
+	    maxA = 0; \n // there is already an overflow on xa
 
 #define READA ( (incA < maxA ) ? Ain[incA] : 0 )  \n
 
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_L.cpp
index e480d6b..12efa1a 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_L.cpp
@@ -6,7 +6,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_R.cpp
index 6c04dee..f0df069 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_R.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_L.cpp
index eef824c..11fa10b 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_L.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -73,13 +72,13 @@ int PagesPerNB = NB / (blk * 2); \n
 	int ya = page*blk * 2; \n
 	int incA = ya * lda + xa; \n
 
-	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???) 
+	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
 
 	int maxA; \n
 	if (xa < na)\n
-		maxA = lda*na; \n  // macro READA will detect overflow on y dimension 
+		maxA = lda*na; \n  // macro READA will detect overflow on y dimension
 	else\n
-		maxA = 0;\n  // there is already an overflow on xa 
+		maxA = 0;\n  // there is already an overflow on xa
 
 #define READA ( (incA < maxA ) ? Ain[incA] : 0 )  \n
 
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_R.cpp
index 0f64809..ad5d548 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_R.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ int PagesPerNB = NB / (blk * 2); \n
 	int ya = page*blk * 2 + blk; \n
 	int incA = ya * lda + xa; \n
 
-	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???) 
+	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
 
 	int maxA; \n
 	if (xa < na)\n
 		maxA = lda*na; \n  // macro READA will detect overflow on y dimension
 	else\n
-	    maxA = 0; \n  // there is already an overflow on xa 
+	    maxA = 0; \n  // there is already an overflow on xa
 
 #define READA ( (incA < maxA ) ? Ain[incA] : 0 )  \n
 
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_L.cpp
index 83e0c7e..cf38785 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_L.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_R.cpp
index 5ce3e42..923f476 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_R.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_L.cpp
index af7f518..31a97fa 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_L.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ int PagesPerNB = NB / (blk * 2); \n
 	int ya = page*blk * 2; \n
 	int incA = ya * lda + xa; \n
 
-	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???) 
+	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
 
 	int maxA; \n
 	if (xa < na)\n
-		maxA = lda*na; \n // macro READA will detect overflow on y dimension 
+		maxA = lda*na; \n // macro READA will detect overflow on y dimension
 	else\n
-		maxA = 0; \n // there is already an overflow on xa 
+		maxA = 0; \n // there is already an overflow on xa
 
 #define READA ( (incA < maxA ) ? Ain[incA] : 0 )  \n
 
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_R.cpp
index 51a3e87..315908e 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_R.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ int PagesPerNB = NB / (blk * 2); \n
 	int ya = page*blk * 2 + blk; \n
 	int incA = ya * lda + xa; \n
 
-	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???) 
+	// maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
 
 	int maxA; \n
 	if (xa < na)\n
 		maxA = lda*na; \n   // macro READA will detect overflow on y dimension
 	else\n
-	    maxA = 0; \n // there is already an overflow on xa 
+	    maxA = 0; \n // there is already an overflow on xa
 
 #define READA ( (incA < maxA ) ? Ain[incA] : 0 )  \n
 
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_L.cpp
index 674fdd5..a4cd85c 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_L.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_R.cpp
index a45494b..f13e19b 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_R.cpp
@@ -7,7 +7,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp
index d2077bf..b576114 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp
@@ -6,7 +6,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_L_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_L_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp
index 004a8d2..adb22d7 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp
@@ -8,7 +8,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_12_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_12_R.cpp
index 79bc4c0..4d645bc 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_12_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_12_R.cpp
@@ -1,14 +1,13 @@
 /*******************************************************************************
  * Hand-tuned kernel
- 
+
  * B21 = -inv(A11)*A12*inv(A22)
  * 12 to 24
- 
+
  ******************************************************************************/
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_12_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_192_12_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_12_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_24_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_24_PART1_R.cpp
index 6b62eb4..f6465d3 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_24_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_24_PART1_R.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART1_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART1_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_24_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_24_PART2_R.cpp
index d8c2f99..1e46a8d 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_24_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_24_PART2_R.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART2_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART2_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_48_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_48_PART1_R.cpp
index dafa65b..3dc0516 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_48_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_48_PART1_R.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART1_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART1_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -54,7 +53,7 @@ const char * const triple_dgemm_update_192_48_PART1_R_src = STRINGIFY(
 	//each workgroup loads half of B (up or down)
 	B = d_dinvA + page_block*NB*NB + page*blk * 2 * NB + page*blk * 2 + blk*NB + blk + gidy*(blk / 2)*NB; \n
 
-	//decide invA12 location for each page; 
+	//decide invA12 location for each page;
 	//Actually this will be stored in invA21 temporarily
 	//each workgroup writes 1/4 of C
 	C = d_dinvA + page_block*NB*NB + page*blk * 2 * NB + page*blk * 2 + blk*NB + gidx % 2 * (blk / 2) + gidy*(blk / 2)*NB; \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_48_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_48_PART2_R.cpp
index 4571112..37ea0a2 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_48_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_48_PART2_R.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART2_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART2_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_96_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_96_PART1_R.cpp
index 8e58ab6..1416ff3 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_96_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_96_PART1_R.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART1_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART1_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
@@ -55,7 +54,7 @@ const char * const triple_dgemm_update_192_96_PART1_R_src = STRINGIFY(
 	//each workgroup loads 1/4 of B (up or down)
 	B = d_dinvA + page_block*NB*NB + blk*NB + blk + gidy*(blk / 4)*NB; \n
 
-	//decide invA12 location for each page; 
+	//decide invA12 location for each page;
 	//Actually this will be stored in invA21 temporarily
 	//each workgroup writes 1/4*1/4 of C
 	C = d_dinvA + page_block*NB*NB + blk*NB + gidx % 4 * (blk / 4) + gidy*(blk / 4)*NB; \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_96_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_96_PART2_R.cpp
index 2ff217d..9e961ff 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_96_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_96_PART2_R.cpp
@@ -4,7 +4,6 @@
 
 #ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART2_R_SRC_CPP
 #define KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART2_R_SRC_CPP.")
 
 #ifndef STRINGIFY
 #define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/xgemm.cc b/src/library/blas/xgemm.cc
index eb78112..a2c6cb0 100644
--- a/src/library/blas/xgemm.cc
+++ b/src/library/blas/xgemm.cc
@@ -170,7 +170,7 @@ void makeGemmKernel(
   #if defined( _WIN32 )
   __declspec( thread ) static kernel_map_t *kernel_map = 0;
 #else
-  __thread static kernel_map_t *kernel_map = 0;
+  static __thread kernel_map_t *kernel_map = 0;
 #endif
   if (!kernel_map) {
     kernel_map = new kernel_map_t();
@@ -317,11 +317,11 @@ void makeGemmKernel(
  * get precision string
  *****************************************************************************/
 template<typename Precision>
-char * getPrecision();
-template<> char * getPrecision<float>() { return "s"; }
-template<> char * getPrecision<double>() { return "d"; }
-template<> char * getPrecision<FloatComplex>()  { return "c"; }
-template<> char * getPrecision<DoubleComplex>() { return "z"; }
+const char * getPrecision();
+template<> const char * getPrecision<float>() { return "s"; }
+template<> const char * getPrecision<double>() { return "d"; }
+template<> const char * getPrecision<FloatComplex>()  { return "c"; }
+template<> const char * getPrecision<DoubleComplex>() { return "z"; }
 
 
 /******************************************************************************
@@ -500,7 +500,7 @@ clblasGemm(
     &unroll);
   // make sure gemmSelectKernel found a valid kernel
   if (!tileKernelSource) {
-    printf("ERROR: gemmSelectKernel() couldn't find kernel(s) for { order=%s, transA=%s, transB=%s, M=%llu, N=%llu, K=%llu, beta=%u, onept=%f }\n",
+    printf("ERROR: gemmSelectKernel() couldn't find kernel(s) for { order=%s, transA=%s, transB=%s, M=%u, N=%u, K=%u, beta=%u, onept=%f }\n",
       order==clblasColumnMajor ? "ColMajor" : "RowMajor",
       transA==clblasNoTrans ? "N" : transA==clblasTrans ? "T" : "C",
       transB==clblasNoTrans ? "N" : transB==clblasTrans ? "T" : "C",
@@ -566,8 +566,7 @@ clblasGemm(
 /******************************************************************************
  * Build kernels
  *****************************************************************************/
-  
-  
+
   cl_kernel  tileClKernel       = NULL;
   cl_kernel  rowClKernel        = NULL;
   cl_kernel  colClKernel        = NULL;
@@ -688,14 +687,14 @@ clblasSgemm(
   clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
   if (clblasErr != clblasSuccess)
   	  return clblasErr;
-  
+
   if (K != 0)
   {
   	//check matrix A
   	clblasErr = checkMatrixSizes(TYPE_FLOAT, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
   	if (clblasErr != clblasSuccess)
   		return clblasErr;
-  
+
   	//check matrix B
 	clblasErr = checkMatrixSizes(TYPE_FLOAT, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
 	if (clblasErr != clblasSuccess)
@@ -748,14 +747,14 @@ clblasDgemm( clblasOrder order,
   clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
   if (clblasErr != clblasSuccess)
   	  return clblasErr;
-  
+
   if (K != 0)
   {
   	//check matrix A
   	clblasErr = checkMatrixSizes(TYPE_DOUBLE, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
   	if (clblasErr != clblasSuccess)
   		return clblasErr;
-  
+
   	//check matrix B
   	clblasErr = checkMatrixSizes(TYPE_DOUBLE, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
   	if (clblasErr != clblasSuccess)
@@ -809,14 +808,14 @@ clblasCgemm(
   clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
   if (clblasErr != clblasSuccess)
   	return clblasErr;
-  
+
   if (K != 0)
   {
   	//check matrix A
   	clblasErr = checkMatrixSizes(TYPE_COMPLEX_FLOAT, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
   	if (clblasErr != clblasSuccess)
   		return clblasErr;
-  
+
   	//check matrix B
   	clblasErr = checkMatrixSizes(TYPE_COMPLEX_FLOAT, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
   	if (clblasErr != clblasSuccess)
@@ -870,14 +869,14 @@ clblasZgemm(
   clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
   if (clblasErr != clblasSuccess)
   	  return clblasErr;
-  
+
   if (K != 0)
   {
   	//check matrix A
   	clblasErr = checkMatrixSizes(TYPE_COMPLEX_DOUBLE, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
   	if (clblasErr != clblasSuccess)
   		return clblasErr;
-  
+
   	//check matrix B
   	clblasErr = checkMatrixSizes(TYPE_COMPLEX_DOUBLE, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
   	if (clblasErr != clblasSuccess)
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 9ecfd13..b3944aa 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,12 +1,12 @@
 # ########################################################################
 # Copyright 2013 Advanced Micro Devices, Inc.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 # http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -251,7 +251,7 @@ endif()
 # Having problems on build server, compiling gtest headers with -pedantic; disabling detection of long long
 # http://code.google.com/p/googletest/issues/detail?id=334
 if( CMAKE_COMPILER_IS_GNUCXX )
-	add_definitions( -Wno-long-long )
+	add_definitions( -Wno-long-long -Wno-variadic-macros )
 endif( )
 
 if( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
@@ -259,7 +259,7 @@ if( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
 
 	# By default, -Mipa=fast is used, and this does not mix well with the cl compiler
 	string( REPLACE "-Mipa=fast" "" CMAKE_Fortran_FLAGS_RELEASE ${CMAKE_Fortran_FLAGS_RELEASE} )
-	
+
 	# In windows, dynamically link to the C runtime, and tell fortran linker to not include default main subroutine
 	if( WIN32 )
 		set( CMAKE_EXE_LINKER_FLAGS "-Bdynamic -Mnostartup ${CMAKE_EXE_LINKER_FLAGS}" )
@@ -296,7 +296,7 @@ if( GTEST_FOUND )
 			${clBLAS_SOURCE_DIR}/tests/include  ${clBLAS_SOURCE_DIR}/include)
 
 		add_definitions(-DCORR_TEST_WITH_ACML)
-	    
+
 	    add_executable(test-correctness ${SRC_CORR} ${SRC_COMMON} ${SRC_COMMON_REFIMPL}
 									    ${CORR_HEADERS} ${TESTS_HEADERS})
         set_target_properties( test-correctness PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
@@ -311,8 +311,8 @@ if( GTEST_FOUND )
 	    set_target_properties(test-short PROPERTIES COMPILE_DEFINITIONS SHORT_TESTS)
         set_target_properties( test-short PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
 
-	    # The build server builds the library with gcc 4.1.2 to support Red Hat 5.5, but the test programs must be built with 
-	    # gcc > 4.3.2 to support ACML.  
+	    # The build server builds the library with gcc 4.1.2 to support Red Hat 5.5, but the test programs must be built with
+	    # gcc > 4.3.2 to support ACML.
 	    # If the runtime is being built by the project, use it, otherwise link to a runtime library specified in the install prefix
 	    if( BUILD_RUNTIME )
 		    target_link_libraries(test-correctness ${ACML_LIBRARIES} ${GTEST_LIBRARIES} ${THREAD_LIBRARY} clBLAS)
@@ -350,7 +350,7 @@ if( GTEST_FOUND )
 			set_target_properties( test-medium PROPERTIES LINKER_LANGUAGE Fortran )
 			set_target_properties( test-short PROPERTIES LINKER_LANGUAGE Fortran )
 		endif( )
-		
+
 	    if( BUILD_RUNTIME )
 			if( NETLIB_FOUND )
 				target_link_libraries(test-correctness ${Netlib_LIBRARIES} ${GTEST_LIBRARIES} ${THREAD_LIBRARY} clBLAS)
@@ -373,7 +373,7 @@ if( GTEST_FOUND )
 			endif( )
 		endif( )
     endif( )
-    
+
     set_property( TARGET test-correctness PROPERTY FOLDER "Test")
     set_property( TARGET test-medium PROPERTY FOLDER "Test")
     set_property( TARGET test-short PROPERTY FOLDER "Test")
@@ -384,7 +384,7 @@ if( GTEST_FOUND )
             LIBRARY DESTINATION lib${SUFFIX_LIB}
             ARCHIVE DESTINATION lib${SUFFIX_LIB}/import
             )
-    
+
     get_target_property( testLocation test-correctness LOCATION )
 
     configure_file(
@@ -395,7 +395,7 @@ if( GTEST_FOUND )
 
     # Register script at run at install time to analyze the executable and copy dependencies into package
     install( SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/copyTestDependencies.cmake")
- 
+
 	if( ACML_FOUND )
 		include_directories(${OPENCL_INCLUDE_DIRS} ${GTEST_INCLUDE_DIRS}
 			${clBLAS_SOURCE_DIR} ${clBLAS_SOURCE_DIR}/tests/include  ${clBLAS_SOURCE_DIR}/include)
@@ -430,7 +430,7 @@ if( GTEST_FOUND )
 
 	add_executable(test-functional ${SRC_FUNC} ${SRC_COMMON} ${SRC_COMMON_TIMER}
 								  ${FUNC_HEADERS} ${TESTS_HEADERS})
-								  
+
     set_target_properties( test-functional PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
 	if( BUILD_RUNTIME )
 		target_link_libraries(test-functional ${GTEST_LIBRARIES} ${TIME_LIBRARY} ${THREAD_LIBRARY} clBLAS )
diff --git a/src/tests/correctness/corr-trmv.cpp b/src/tests/correctness/corr-trmv.cpp
index 7e97d6c..a8c7151 100644
--- a/src/tests/correctness/corr-trmv.cpp
+++ b/src/tests/correctness/corr-trmv.cpp
@@ -127,7 +127,7 @@ trmvCorrectnessTest(TestParams *params)
 	// Allocate buffers
     bufA = base->createEnqueueBuffer(A, (lengthA + params->offa)* sizeof(*A), 0, CL_MEM_READ_ONLY);
     bufX = base->createEnqueueBuffer(clblasX, (lengthX + params->offBX)* sizeof(*clblasX), 0, CL_MEM_WRITE_ONLY);
-    bufXTemp = base->createEnqueueBuffer(NULL, lengthX * sizeof(*clblasX), 0, CL_MEM_READ_ONLY);
+    bufXTemp = base->createEnqueueBuffer(NULL, lengthX * sizeof(*clblasX), 0, CL_MEM_READ_WRITE);
 
 	//printData( "bufX", blasX, lengthX, 1, lengthX);
 	//printData( "clblasX", clblasX, lengthX, 1, lengthX);
diff --git a/src/tests/include/cmdline.h b/src/tests/include/cmdline.h
index 68ddfba..b767973 100644
--- a/src/tests/include/cmdline.h
+++ b/src/tests/include/cmdline.h
@@ -44,7 +44,7 @@ typedef enum SetoptFlags {
     SET_INCY  = (1 << 9),
     SET_NUM_COMMAND_QUEUES = (1 << 10),
     SET_DEVICE_ORD = (1 << 11),
-    SET_PLATFORM_ORD = (1 << 12),
+    SET_PLATFORM_ORD = (1 << 12)
 } SetoptFlags;
 
 typedef struct TestParams {
diff --git a/src/tests/include/matrix.h b/src/tests/include/matrix.h
index 65757ad..8794f0b 100644
--- a/src/tests/include/matrix.h
+++ b/src/tests/include/matrix.h
@@ -310,21 +310,21 @@ compareMatrices(
     const cl_double *absDelta = NULL)
 {
     size_t m = 0, n = 0;
-    T a, b;
+    T ref, clresult;
     cl_double delta;
 
     if( lda > 0 ) // General case
     {
     for (m = 0; m < M; m++) {
         for (n = 0; n < N; n++) {
-            a = getElement<T>(order, clblasNoTrans, m, n, A, lda);
-            b = getElement<T>(order, clblasNoTrans, m, n, B, lda);
+            ref = getElement<T>(order, clblasNoTrans, m, n, A, lda);
+            clresult = getElement<T>(order, clblasNoTrans, m, n, B, lda);
             delta = 0.0;
             if (absDelta != NULL) {
                 delta = absDelta[m * N + n];
             }
-			if( module(a-b) > delta )		printf("m : %d\t n: %d\n", (int)m, (int)n);
-            ASSERT_NEAR(a, b, delta);
+			if( module(ref-clresult) > delta )		printf("m : %d\t n: %d\n", (int)m, (int)n);
+            ASSERT_NEAR(ref, clresult, delta);
         }
     }
     }
@@ -336,14 +336,14 @@ compareMatrices(
 		{
 			for( m=n; m < M; m++)
 			{
-            			a = getElement<T>(order, clblasNoTrans, m, n, A, lda);
-			        b = getElement<T>(order, clblasNoTrans, m, n, B, lda);
+            			ref = getElement<T>(order, clblasNoTrans, m, n, A, lda);
+			        clresult = getElement<T>(order, clblasNoTrans, m, n, B, lda);
             			delta = 0.0;
             			if (absDelta != NULL) {
                 			//delta = absDelta[m * N + n];
             			}
-						if( module(a-b) > delta )		printf("m : %d\t n: %d\n", (int)m, (int)n);
-            			ASSERT_NEAR(a, b, delta);
+						if( module(ref-clresult) > delta )		printf("m : %d\t n: %d\n", (int)m, (int)n);
+            			ASSERT_NEAR(ref, clresult, delta);
 			}
 		}
 	}
@@ -353,14 +353,14 @@ compareMatrices(
 		{
 			for( n = 0; n <= m; n++)
 			{
-            			a = getElement<T>(order, clblasNoTrans, m, n, A, lda);
-			        b = getElement<T>(order, clblasNoTrans, m, n, B, lda);
+            			ref = getElement<T>(order, clblasNoTrans, m, n, A, lda);
+			        clresult = getElement<T>(order, clblasNoTrans, m, n, B, lda);
             			delta = 0.0;
             			if (absDelta != NULL) {
                 			//delta = absDelta[m * N + n];
             			}
-						if( module(a-b) > delta )		printf("m : %d\t n: %d\n", (int)m, (int)n);
-            			ASSERT_NEAR(a, b, delta);
+						if( module(ref-clresult) > delta )		printf("m : %d\t n: %d\n", (int)m, (int)n);
+            			ASSERT_NEAR(ref, clresult, delta);
 			}
 		}
 	}
@@ -379,23 +379,23 @@ compareMatrices<FloatComplex>(
     const cl_double *absDelta)
 {
     size_t m = 0, n = 0;
-    FloatComplex a, b;
+    FloatComplex ref, clresult;
     cl_double delta;
 
 if ( lda > 0 )
 {
     for (m = 0; m < M; m++) {
         for (n = 0; n < N; n++) {
-            a = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
-            b = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
+            ref = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
+            clresult = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
             delta = 0.0;
             if (absDelta != NULL) {
                 delta = absDelta[m * N + n];
             }
-			if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+			if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
 					printf("m : %d\t n: %d\n", (int)m, (int)n);
-            ASSERT_NEAR(CREAL(a), CREAL(b), delta);
-            ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+            ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+            ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
         }
     }
 }
@@ -407,16 +407,16 @@ if ( lda > 0 )
 		{
 			for( m=n; m < M; m++)
 			{
-            			a = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
-				        b = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
+            			ref = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
+				        clresult = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
             			delta = 0.0;
             			if (absDelta != NULL) {
                 			//delta = absDelta[m * N + n];
             			}
-            			if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+            			if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
 							printf("m : %d\t n: %d\n", (int)m, (int)n);
-            			ASSERT_NEAR(CREAL(a), CREAL(b), delta);
-		            	ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+            			ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+		            	ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
 			}
 		}
 	}
@@ -426,16 +426,16 @@ if ( lda > 0 )
 		{
 			for( n = 0; n <= m; n++)
 			{
-            			a = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
-			        b = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
+            			ref = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
+			        clresult = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
             			delta = 0.0;
             			if (absDelta != NULL) {
                 			//delta = absDelta[m * N + n];
             			}
-						if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+						if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
 							printf("m : %d\t n: %d\n", (int)m, (int)n);
-            			ASSERT_NEAR(CREAL(a), CREAL(b), delta);
-		            	ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+            			ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+		            	ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
 			}
 		}
 	}
@@ -455,22 +455,22 @@ compareMatrices<DoubleComplex>(
     const cl_double *absDelta)
 {
     size_t m = 0, n = 0;
-    DoubleComplex a, b;
+    DoubleComplex ref, clresult;
     cl_double delta;
 if( lda > 0 )
 {
     for (m = 0; m < M; m++) {
         for (n = 0; n < N; n++) {
-            a = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
-            b = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
+            ref = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
+            clresult = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
             delta = 0.0;
             if (absDelta != NULL) {
                 delta = absDelta[m * N + n];
             }
-			if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+			if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
 					printf("m : %d\t n: %d\n", (int)m, (int)n);
-            ASSERT_NEAR(CREAL(a), CREAL(b), delta);
-            ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+            ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+            ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
         }
     }
 }
@@ -482,16 +482,16 @@ if( lda > 0 )
 		{
 			for( m=n; m < M; m++)
 			{
-            			a = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
-			        b = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
+            			ref = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
+			        clresult = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
             			delta = 0.0;
             			if (absDelta != NULL) {
                 			//delta = absDelta[m * N + n];
             			}
-						if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+						if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
 							printf("m : %d\t n: %d\n", (int)m, (int)n);
-            			ASSERT_NEAR(CREAL(a), CREAL(b), delta);
-		            	ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+            			ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+		            	ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
 			}
 		}
 	}
@@ -501,16 +501,16 @@ if( lda > 0 )
 		{
 			for( n = 0; n <= m; n++)
 			{
-            			a = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
-			        b = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
+            			ref = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
+			        clresult = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
             			delta = 0.0;
             			if (absDelta != NULL) {
                 			//delta = absDelta[m * N + n];
             			}
-						if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+						if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
 							printf("m : %d\t n: %d\n", (int)m, (int)n);
-            			ASSERT_NEAR(CREAL(a), CREAL(b), delta);
-		            	ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+            			ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+		            	ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
 			}
 		}
 	}
@@ -764,10 +764,10 @@ template <typename T>
 static void compareValues(
     const T *A, const T *B, const cl_double absDelta=0.0 )
 {
-    T a, b;
-    a = *A;
-    b = *B;
-    ASSERT_NEAR(a, b, absDelta);
+    T ref, clresult;
+    ref = *A;
+    clresult = *B;
+    ASSERT_NEAR(ref, clresult, absDelta);
 }
 
  template<>
@@ -775,12 +775,12 @@ __template_static void
 compareValues<FloatComplex> (
     const FloatComplex *A, const FloatComplex *B, const cl_double absDelta )
 {
-    FloatComplex a, b;
+    FloatComplex ref, clresult;
 
-    a = *A;
-    b = *B;
-    ASSERT_NEAR(CREAL(a), CREAL(b), absDelta);
-    ASSERT_NEAR(CIMAG(a), CIMAG(b), absDelta);
+    ref = *A;
+    clresult = *B;
+    ASSERT_NEAR(CREAL(ref), CREAL(clresult), absDelta);
+    ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), absDelta);
 }
 
  template<>
@@ -788,11 +788,11 @@ __template_static void
 compareValues<DoubleComplex> (
     const DoubleComplex *A, const DoubleComplex *B, const cl_double absDelta )
 {
-    DoubleComplex a, b;
+    DoubleComplex ref, clresult;
 
-    a = *A;
-    b = *B;
-    ASSERT_NEAR(CREAL(a), CREAL(b), absDelta);
-    ASSERT_NEAR(CIMAG(a), CIMAG(b), absDelta);
+    ref = *A;
+    clresult = *B;
+    ASSERT_NEAR(CREAL(ref), CREAL(clresult), absDelta);
+    ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), absDelta);
 }
 #endif  // MATRIX_H_

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git