[clblas] 49/75: Removing the pedantic flag from gcc compiles
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Jan 24 23:30:36 UTC 2017
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/master
in repository clblas.
commit e0df18b178ca36531ca4288c6469f0593c9ebea8
Author: Kent Knox <kent.knox at amd>
Date: Wed Apr 20 16:57:58 2016 -0500
Removing the pedantic flag from gcc compiles
The library was not developed with the pedantic warning flag enabled, and
the build outputs a volume of verbose warning messages on every build.
It's not currently helpful to have this enabled. This flag should be enabled
again as the warnings get fixed, to enable a more robust library.
Various warnings and #pragmas fixed that were remaining.
Changed the allocation of a temp buffer in corr-trmv.cpp
---
.gitignore | 3 -
src/CMakeLists.txt | 4 +-
src/FindOpenCL.cmake | 143 +++++++++++++++------
src/library/blas/AutoGemm/Includes.py | 8 +-
src/library/blas/AutoGemm/KernelOpenCL.py | 2 +-
src/library/blas/gens/trmm.c | 2 +-
.../blas/trtri/diag_dtrtri_lower_128_16.cpp | 1 -
.../blas/trtri/diag_dtrtri_upper_128_16.cpp | 15 +--
.../blas/trtri/diag_dtrtri_upper_192_12.cpp | 9 +-
.../trtri/triple_dgemm_update_128_16_PART1_L.cpp | 9 +-
.../trtri/triple_dgemm_update_128_16_PART2_L.cpp | 1 -
.../blas/trtri/triple_dgemm_update_128_16_R.cpp | 11 +-
.../trtri/triple_dgemm_update_128_32_PART1_L.cpp | 7 +-
.../trtri/triple_dgemm_update_128_32_PART1_R.cpp | 7 +-
.../trtri/triple_dgemm_update_128_32_PART2_L.cpp | 1 -
.../trtri/triple_dgemm_update_128_32_PART2_R.cpp | 1 -
.../trtri/triple_dgemm_update_128_64_PART1_L.cpp | 7 +-
.../trtri/triple_dgemm_update_128_64_PART1_R.cpp | 5 +-
.../trtri/triple_dgemm_update_128_64_PART2_L.cpp | 1 -
.../trtri/triple_dgemm_update_128_64_PART2_R.cpp | 1 -
.../triple_dgemm_update_128_ABOVE64_PART1_L.cpp | 7 +-
.../triple_dgemm_update_128_ABOVE64_PART1_R.cpp | 5 +-
.../triple_dgemm_update_128_ABOVE64_PART2_L.cpp | 1 -
.../triple_dgemm_update_128_ABOVE64_PART2_R.cpp | 1 -
.../triple_dgemm_update_128_ABOVE64_PART3_L.cpp | 1 -
.../triple_dgemm_update_128_ABOVE64_PART3_R.cpp | 1 -
.../blas/trtri/triple_dgemm_update_192_12_R.cpp | 5 +-
.../trtri/triple_dgemm_update_192_24_PART1_R.cpp | 1 -
.../trtri/triple_dgemm_update_192_24_PART2_R.cpp | 1 -
.../trtri/triple_dgemm_update_192_48_PART1_R.cpp | 3 +-
.../trtri/triple_dgemm_update_192_48_PART2_R.cpp | 1 -
.../trtri/triple_dgemm_update_192_96_PART1_R.cpp | 3 +-
.../trtri/triple_dgemm_update_192_96_PART2_R.cpp | 1 -
src/library/blas/xgemm.cc | 33 +++--
src/tests/CMakeLists.txt | 26 ++--
src/tests/correctness/corr-trmv.cpp | 2 +-
src/tests/include/cmdline.h | 2 +-
src/tests/include/matrix.h | 118 ++++++++---------
38 files changed, 240 insertions(+), 210 deletions(-)
diff --git a/.gitignore b/.gitignore
index bb36278..d25acd5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,8 +24,5 @@
# vim temp files
.*.swp
-src/build/
-
# python compiled files
*.pyc
-
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7d90f28..33a91ee 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -266,7 +266,7 @@ if( BUILD_TEST )
endif( )
# This will define OPENCL_FOUND
-find_package( OpenCL )
+find_package( OpenCL ${OPENCL_VERSION} )
# Find Boost on the system, and configure the type of boost build we want
set( Boost_USE_MULTITHREADED ON )
@@ -288,7 +288,7 @@ endif()
# Turn on maximum compiler verbosity
if(CMAKE_COMPILER_IS_GNUCXX)
- add_definitions(-pedantic -Wall -Wextra
+ add_definitions(# -pedantic -Wall -Wextra
-D_POSIX_C_SOURCE=199309L -D_XOPEN_SOURCE=500
)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -Wstrict-prototypes" CACHE STRING
diff --git a/src/FindOpenCL.cmake b/src/FindOpenCL.cmake
index 746fbe6..9810dd2 100644
--- a/src/FindOpenCL.cmake
+++ b/src/FindOpenCL.cmake
@@ -1,5 +1,5 @@
# ########################################################################
-# Copyright 2013 Advanced Micro Devices, Inc.
+# Copyright 2015 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,7 +14,6 @@
# limitations under the License.
# ########################################################################
-
# Locate an OpenCL implementation.
# Currently supports AMD APP SDK (http://developer.amd.com/sdks/AMDAPPSDK/Pages/default.aspx/)
#
@@ -46,60 +45,122 @@
# target_link_libraries(foo ${OPENCL_LIBRARIES})
#
#-----------------------
+include( CheckSymbolExists )
+include( CMakePushCheckState )
+
+if( DEFINED OPENCL_ROOT OR DEFINED ENV{OPENCL_ROOT})
+ message( STATUS "Defined OPENCL_ROOT: ${OPENCL_ROOT}, ENV{OPENCL_ROOT}: $ENV{OPENCL_ROOT}" )
+endif( )
find_path(OPENCL_INCLUDE_DIRS
- NAMES OpenCL/cl.h CL/cl.h
- HINTS
- ${OPENCL_ROOT}/include
- $ENV{AMDAPPSDKROOT}/include
- $ENV{CUDA_PATH}/include
- PATHS
- /usr/include
- /usr/local/include
- /usr/local/cuda/include
- /opt/cuda/include
- DOC "OpenCL header file path"
+ NAMES OpenCL/cl.h CL/cl.h
+ HINTS
+ ${OPENCL_ROOT}/include
+ $ENV{OPENCL_ROOT}/include
+ $ENV{AMDAPPSDKROOT}/include
+ $ENV{CUDA_PATH}/include
+ PATHS
+ /usr/include
+ /usr/local/include
+ /usr/local/cuda/include
+ DOC "OpenCL header file path"
)
mark_as_advanced( OPENCL_INCLUDE_DIRS )
+message( STATUS "OPENCL_INCLUDE_DIRS: ${OPENCL_INCLUDE_DIRS}" )
+
+set( OpenCL_VERSION "0.0" )
+
+cmake_push_check_state( RESET )
+set( CMAKE_REQUIRED_INCLUDES "${OPENCL_INCLUDE_DIRS}" )
+
+# Bug in check_symbol_exists prevents us from specifying a list of files, so we loop
+# Only 1 of these files will exist on a system, so the other file will not clobber the output variable
+if( APPLE )
+ set( CL_HEADER_FILE "OpenCL/cl.h" )
+else( )
+ set( CL_HEADER_FILE "CL/cl.h" )
+endif( )
+
+check_symbol_exists( CL_VERSION_2_0 ${CL_HEADER_FILE} HAVE_CL_2_0 )
+check_symbol_exists( CL_VERSION_1_2 ${CL_HEADER_FILE} HAVE_CL_1_2 )
+check_symbol_exists( CL_VERSION_1_1 ${CL_HEADER_FILE} HAVE_CL_1_1 )
+# message( STATUS "HAVE_CL_2_0: ${HAVE_CL_2_0}" )
+# message( STATUS "HAVE_CL_1_2: ${HAVE_CL_1_2}" )
+# message( STATUS "HAVE_CL_1_1: ${HAVE_CL_1_1}" )
+
+# set OpenCL_VERSION to the highest detected version
+if( HAVE_CL_2_0 )
+ set( OpenCL_VERSION "2.0" )
+elseif( HAVE_CL_1_2 )
+ set( OpenCL_VERSION "1.2" )
+elseif( HAVE_CL_1_1 )
+ set( OpenCL_VERSION "1.1" )
+endif( )
+
+cmake_pop_check_state( )
# Search for 64bit libs if FIND_LIBRARY_USE_LIB64_PATHS is set to true in the global environment, 32bit libs else
get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS )
+if( LIB64 )
+ message( STATUS "FindOpenCL searching for 64-bit libraries" )
+else( )
+ message( STATUS "FindOpenCL searching for 32-bit libraries" )
+endif( )
if( LIB64 )
- find_library( OPENCL_LIBRARIES
- NAMES OpenCL
- HINTS
- ${OPENCL_ROOT}/lib
- $ENV{AMDAPPSDKROOT}/lib
- $ENV{CUDA_PATH}/lib
- DOC "OpenCL dynamic library path"
- PATH_SUFFIXES x86_64 x64 x86_64/sdk
- PATHS
- /usr/lib
- /usr/local/cuda/lib
- /opt/cuda/lib
- )
+ find_library( OPENCL_LIBRARIES
+ NAMES OpenCL
+ HINTS
+ ${OPENCL_ROOT}/lib
+ $ENV{OPENCL_ROOT}/lib
+ $ENV{AMDAPPSDKROOT}/lib
+ $ENV{CUDA_PATH}/lib
+ DOC "OpenCL dynamic library path"
+ PATH_SUFFIXES x86_64 x64 x86_64/sdk
+ PATHS
+ /usr/lib
+ /usr/local/cuda/lib
+ )
else( )
- find_library( OPENCL_LIBRARIES
- NAMES OpenCL
- HINTS
- ${OPENCL_ROOT}/lib
- $ENV{AMDAPPSDKROOT}/lib
- $ENV{CUDA_PATH}/lib
- DOC "OpenCL dynamic library path"
- PATH_SUFFIXES x86 Win32
-
- PATHS
- /usr/lib
- /usr/local/cuda/lib
- /opt/cuda/lib
- )
+ find_library( OPENCL_LIBRARIES
+ NAMES OpenCL
+ HINTS
+ ${OPENCL_ROOT}/lib
+ $ENV{OPENCL_ROOT}/lib
+ $ENV{AMDAPPSDKROOT}/lib
+ $ENV{CUDA_PATH}/lib
+ DOC "OpenCL dynamic library path"
+ PATH_SUFFIXES x86 Win32
+ PATHS
+ /usr/lib
+ /usr/local/cuda/lib
+ )
endif( )
mark_as_advanced( OPENCL_LIBRARIES )
+# message( STATUS "OpenCL_FIND_VERSION: ${OpenCL_FIND_VERSION}" )
+if( OpenCL_VERSION VERSION_LESS OpenCL_FIND_VERSION )
+ message( FATAL_ERROR "Requested OpenCL version: ${OpenCL_FIND_VERSION}, Found OpenCL version: ${OpenCL_VERSION}" )
+endif( )
+
+# If we asked for OpenCL 1.2, and we found a version installed greater than that, pass the 'use deprecated' flag
+if( (OpenCL_FIND_VERSION VERSION_LESS "2.0") AND (OpenCL_VERSION VERSION_GREATER OpenCL_FIND_VERSION) )
+ add_definitions( -DCL_USE_DEPRECATED_OPENCL_2_0_APIS )
+
+ # If we asked for OpenCL 1.1, and we found a version installed greater than that, pass the 'use deprecated' flag
+ if( (OpenCL_FIND_VERSION VERSION_LESS "1.2") AND (OpenCL_VERSION VERSION_GREATER OpenCL_FIND_VERSION) )
+ add_definitions( -DCL_USE_DEPRECATED_OPENCL_1_1_APIS )
+ endif( )
+endif( )
+
include( FindPackageHandleStandardArgs )
-FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS )
+FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL
+ REQUIRED_VARS OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS
+ VERSION_VAR OpenCL_VERSION
+ )
if( NOT OPENCL_FOUND )
message( STATUS "FindOpenCL looked for libraries named: OpenCL" )
+else( )
+ message(STATUS "FindOpenCL ${OPENCL_LIBRARIES}, ${OPENCL_INCLUDE_DIRS}")
endif()
diff --git a/src/library/blas/AutoGemm/Includes.py b/src/library/blas/AutoGemm/Includes.py
index 3c8435f..0f61695 100644
--- a/src/library/blas/AutoGemm/Includes.py
+++ b/src/library/blas/AutoGemm/Includes.py
@@ -113,7 +113,7 @@ class KernelBinaryIncludes:
self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
self.cppStr += " size_t %s_binSize = 0;\n" % kernelName
self.cppStr += "#else\n"
- self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
+ # self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
self.cppStr += "#endif\n"
kernelName = kernel.getRowName()
@@ -123,7 +123,7 @@ class KernelBinaryIncludes:
self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
self.cppStr += " size_t %s_binSize = 0;\n" % kernelName
self.cppStr += "#else\n"
- self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
+ # self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
self.cppStr += "#endif\n"
kernelName = kernel.getColName()
@@ -133,7 +133,7 @@ class KernelBinaryIncludes:
self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
self.cppStr += " size_t %s_binSize = 0;\n" % kernelName
self.cppStr += "#else\n"
- self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
+ # self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
self.cppStr += "#endif\n"
kernelName = kernel.getCornerName()
@@ -143,7 +143,7 @@ class KernelBinaryIncludes:
self.cppStr += "unsigned char *%s_bin = 0;\n" % kernelName
self.cppStr += " size_t %s_binSize = 0;\n" % kernelName
self.cppStr += "#else\n"
- self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
+ # self.cppStr += "#pragma message(\"AutoGemmKernelBinaries.cpp: %s was pre-compiled.\")\n" % kernelName
self.cppStr += "#endif\n"
self.incFile.write( self.incStr )
diff --git a/src/library/blas/AutoGemm/KernelOpenCL.py b/src/library/blas/AutoGemm/KernelOpenCL.py
index d7835d5..87a5676 100644
--- a/src/library/blas/AutoGemm/KernelOpenCL.py
+++ b/src/library/blas/AutoGemm/KernelOpenCL.py
@@ -482,7 +482,7 @@ def writeOpenCLKernelToFile(kernel):
kernelFile.write("\";\n")
kernelFile.write("\n")
kernelFile.write("#else\n")
- kernelFile.write("#pragma message(\"AutoGemmKernelSources.cpp: %s was overriden by user kernel.\")\n" % kernel.getName() )
+ # kernelFile.write("#pragma message(\"AutoGemmKernelSources.cpp: %s was overriden by user kernel.\")\n" % kernel.getName() )
kernelFile.write("#endif\n")
kernelFile.close()
diff --git a/src/library/blas/gens/trmm.c b/src/library/blas/gens/trmm.c
index 7655af3..0c8f8b4 100644
--- a/src/library/blas/gens/trmm.c
+++ b/src/library/blas/gens/trmm.c
@@ -1245,7 +1245,7 @@ static int trmmGetDefaultDecomp( PGranularity *pgran,
unsigned int subdimsNum,
void *pArgs)
{
- (void*)subdimsNum;
+ DUMMY_ARG_USAGE(subdimsNum);
if ( NULL == pArgs ) {
return -EINVAL;
diff --git a/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp b/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
index 1f7c19c..f3d6ca5 100644
--- a/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_lower_128_16.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_DIAG_DTRTRI_LOWER_128_16_SRC_CPP
#define KERNEL_DIAG_DTRTRI_LOWER_128_16_SRC_CPP
-#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp b/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
index bc9c296..f039b27 100644
--- a/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_upper_128_16.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP
#define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP
-#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_128_16_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -64,17 +63,17 @@ uint na)\n
{\n
if(tx <= i && i+bx*BLOCK_SIZE < na )\n
{\n
- Bs[i*BLOCK_SIZE+tx] = *(Aoff+i*lda+tx);\n
+ Bs[i*BLOCK_SIZE+tx] = *(Aoff+i*lda+tx);\n
}\n
else\n
{\n
Bs[i*BLOCK_SIZE+tx] = ZERO;\n
}\n
- }\n
+ }\n
// read in the whole square block of my A and zero out the non data triangular
-
+
// Synchronize to make sure the matrices are loaded
- //__syncthreads();
+ //__syncthreads();
barrier(CLK_LOCAL_MEM_FENCE);\n
// solve the diagonals
@@ -92,7 +91,7 @@ uint na)\n
else \n
{\n
Bs[tx*BLOCK_SIZE+tx] = ONE / ( Bs[tx*BLOCK_SIZE+tx]) ;\n
- }\n
+ }\n
}\n
barrier(CLK_LOCAL_MEM_FENCE);\n
@@ -139,14 +138,14 @@ uint na)\n
// __syncthreads();
barrier(CLK_LOCAL_MEM_FENCE);\n
}\n
-
+
// write back A
_Pragma("unroll")\n
for( i=0; i < BLOCK_SIZE; i++ )\n
{\n
*(d_dinvA+i*NB+tx) = Bs[i*BLOCK_SIZE+tx];\n
}\n
-
+
}\n
// end of kernel
);
diff --git a/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp b/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
index 0ffbebf..0d81ee2 100644
--- a/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
+++ b/src/library/blas/trtri/diag_dtrtri_upper_192_12.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP
#define KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP
-#pragma message("#define KERNEL_DIAG_DTRTRI_UPPER_192_12_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -43,10 +42,10 @@ double neg_switcher; \n
// Thread index
int tx = get_local_id(0); \n
-// Thread index
+// Thread index
int gx = get_global_id(0); \n
-// Block index
+// Block index
int bx = get_group_id(0); \n
A = A + offA; \n
@@ -56,7 +55,7 @@ int NumBLperNB = NB / BLOCK_SIZE; \n
d_dinvA += bx / NumBLperNB*NB*NB + (bx % NumBLperNB)*(NB*BLOCK_SIZE + BLOCK_SIZE); \n
__local double Bs[BLOCK_SIZE*BLOCK_SIZE]; \n
-__local double workspace[BLOCK_SIZE];\n // workspace used to store the current working column
+__local double workspace[BLOCK_SIZE];\n // workspace used to store the current working column
// load A \n
_Pragma("unroll")\n
@@ -74,7 +73,7 @@ for (i = 0; i < BLOCK_SIZE; i++)\n
// read in the whole square block of my A and zero out the non data triangular
// Synchronize to make sure the matrices are loaded
-//__syncthreads();
+//__syncthreads();
barrier(CLK_LOCAL_MEM_FENCE); \n
// solve the diagonals
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp
index c0e3b4c..f0c041f 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_16_PART1_L.cpp
@@ -5,7 +5,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART1_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ Ain = Ain + offAin; \n
int ya = page*blk * 2; \n
int incA = ya * lda + xa; \n
- // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
+ // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
int maxA; \n
if (xa < na)\n
- maxA = lda*na; \n // macro READA will detect overflow on y dimension
+ maxA = lda*na; \n // macro READA will detect overflow on y dimension
else\n
- maxA = 0; \n // there is already an overflow on xa
+ maxA = 0; \n // there is already an overflow on xa
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
@@ -139,7 +138,7 @@ Ain = Ain + offAin; \n
daxpy(a[1], &bs[13][0], c); \n
daxpy(a[2], &bs[14][0], c); \n
daxpy(a[3], &bs[15][0], c); \n
-
+
B += 16; \n
//__syncthreads();
barrier(CLK_LOCAL_MEM_FENCE); \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_16_PART2_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_16_PART2_L.cpp
index 71c13dc..dbffeb9 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_16_PART2_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_16_PART2_L.cpp
@@ -5,7 +5,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART2_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART2_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_PART2_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_16_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_16_R.cpp
index 237d3fe..fd410a9 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_16_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_16_R.cpp
@@ -1,14 +1,13 @@
/*******************************************************************************
* Hand-tuned kernel
-
+
* B21 = -inv(A11)*A12*inv(A22)
* 16 to 32
-
+
******************************************************************************/
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_16_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_16_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -76,13 +75,13 @@ int PagesPerNB = NB / (blk * 2); \n
int ya = page*blk * 2 + blk; \n
int incA = ya * lda + xa; \n
- // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
+ // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
int maxA; \n
if (xa < na)\n
- maxA = lda*na; \n // macro READA will detect overflow on y dimension
+ maxA = lda*na; \n // macro READA will detect overflow on y dimension
else
- maxA = 0; \n // there is already an overflow on xa
+ maxA = 0; \n // there is already an overflow on xa
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_L.cpp
index 46b7e97..e4bde33 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_L.cpp
@@ -6,7 +6,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -73,13 +72,13 @@ int PagesPerNB = NB / (blk * 2); \n
int ya = page*blk * 2;\n
int incA = ya * lda + xa; \n
- // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
+ // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
int maxA; \n
if (xa < na)\n
- maxA = lda*na; \n // macro READA will detect overflow on y dimension
+ maxA = lda*na; \n // macro READA will detect overflow on y dimension
else\n
- maxA = 0; \n // there is already an overflow on xa
+ maxA = 0; \n // there is already an overflow on xa
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_R.cpp
index 3358af6..43760b6 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_32_PART1_R.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART1_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ int PagesPerNB = NB / (blk * 2); \n
int ya = page*blk * 2 + blk; \n
int incA = ya * lda + xa; \n
- // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
+ // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
int maxA; \n
if (xa < na)\n
- maxA = lda*na; \n // macro READA will detect overflow on y dimension
+ maxA = lda*na; \n // macro READA will detect overflow on y dimension
else\n
- maxA = 0; \n // there is already an overflow on xa
+ maxA = 0; \n // there is already an overflow on xa
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_L.cpp
index e480d6b..12efa1a 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_L.cpp
@@ -6,7 +6,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_R.cpp
index 6c04dee..f0df069 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_32_PART2_R.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_32_PART2_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_L.cpp
index eef824c..11fa10b 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_L.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -73,13 +72,13 @@ int PagesPerNB = NB / (blk * 2); \n
int ya = page*blk * 2; \n
int incA = ya * lda + xa; \n
- // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
+ // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
int maxA; \n
if (xa < na)\n
- maxA = lda*na; \n // macro READA will detect overflow on y dimension
+ maxA = lda*na; \n // macro READA will detect overflow on y dimension
else\n
- maxA = 0;\n // there is already an overflow on xa
+ maxA = 0;\n // there is already an overflow on xa
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_R.cpp
index 0f64809..ad5d548 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_64_PART1_R.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART1_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ int PagesPerNB = NB / (blk * 2); \n
int ya = page*blk * 2 + blk; \n
int incA = ya * lda + xa; \n
- // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
+ // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
int maxA; \n
if (xa < na)\n
maxA = lda*na; \n // macro READA will detect overflow on y dimension
else\n
- maxA = 0; \n // there is already an overflow on xa
+ maxA = 0; \n // there is already an overflow on xa
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_L.cpp
index 83e0c7e..cf38785 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_L.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_R.cpp
index 5ce3e42..923f476 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_64_PART2_R.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_64_PART2_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_L.cpp
index af7f518..31a97fa 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_L.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ int PagesPerNB = NB / (blk * 2); \n
int ya = page*blk * 2; \n
int incA = ya * lda + xa; \n
- // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
+ // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
int maxA; \n
if (xa < na)\n
- maxA = lda*na; \n // macro READA will detect overflow on y dimension
+ maxA = lda*na; \n // macro READA will detect overflow on y dimension
else\n
- maxA = 0; \n // there is already an overflow on xa
+ maxA = 0; \n // there is already an overflow on xa
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_R.cpp
index 51a3e87..315908e 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART1_R.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART1_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -74,13 +73,13 @@ int PagesPerNB = NB / (blk * 2); \n
int ya = page*blk * 2 + blk; \n
int incA = ya * lda + xa; \n
- // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
+ // maxA will be used to detect overflow on all subsequent accesses on A(xa, ya:ya+???)
int maxA; \n
if (xa < na)\n
maxA = lda*na; \n // macro READA will detect overflow on y dimension
else\n
- maxA = 0; \n // there is already an overflow on xa
+ maxA = 0; \n // there is already an overflow on xa
#define READA ( (incA < maxA ) ? Ain[incA] : 0 ) \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_L.cpp
index 674fdd5..a4cd85c 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_L.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_R.cpp
index a45494b..f13e19b 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART2_R.cpp
@@ -7,7 +7,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART2_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp
index d2077bf..b576114 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_L.cpp
@@ -6,7 +6,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_L_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_L_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_L_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp
index 004a8d2..adb22d7 100644
--- a/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_128_ABOVE64_PART3_R.cpp
@@ -8,7 +8,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_128_ABOVE64_PART3_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_12_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_12_R.cpp
index 79bc4c0..4d645bc 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_12_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_12_R.cpp
@@ -1,14 +1,13 @@
/*******************************************************************************
* Hand-tuned kernel
-
+
* B21 = -inv(A11)*A12*inv(A22)
* 12 to 24
-
+
******************************************************************************/
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_12_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_192_12_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_12_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_24_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_24_PART1_R.cpp
index 6b62eb4..f6465d3 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_24_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_24_PART1_R.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART1_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART1_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_24_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_24_PART2_R.cpp
index d8c2f99..1e46a8d 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_24_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_24_PART2_R.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART2_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_24_PART2_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_48_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_48_PART1_R.cpp
index dafa65b..3dc0516 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_48_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_48_PART1_R.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART1_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART1_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -54,7 +53,7 @@ const char * const triple_dgemm_update_192_48_PART1_R_src = STRINGIFY(
//each workgroup loads half of B (up or down)
B = d_dinvA + page_block*NB*NB + page*blk * 2 * NB + page*blk * 2 + blk*NB + blk + gidy*(blk / 2)*NB; \n
- //decide invA12 location for each page;
+ //decide invA12 location for each page;
//Actually this will be stored in invA21 temporarily
//each workgroup writes 1/4 of C
C = d_dinvA + page_block*NB*NB + page*blk * 2 * NB + page*blk * 2 + blk*NB + gidx % 2 * (blk / 2) + gidy*(blk / 2)*NB; \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_48_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_48_PART2_R.cpp
index 4571112..37ea0a2 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_48_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_48_PART2_R.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART2_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_48_PART2_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_96_PART1_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_96_PART1_R.cpp
index 8e58ab6..1416ff3 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_96_PART1_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_96_PART1_R.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART1_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART1_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART1_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
@@ -55,7 +54,7 @@ const char * const triple_dgemm_update_192_96_PART1_R_src = STRINGIFY(
//each workgroup loads 1/4 of B (up or down)
B = d_dinvA + page_block*NB*NB + blk*NB + blk + gidy*(blk / 4)*NB; \n
- //decide invA12 location for each page;
+ //decide invA12 location for each page;
//Actually this will be stored in invA21 temporarily
//each workgroup writes 1/4*1/4 of C
C = d_dinvA + page_block*NB*NB + blk*NB + gidx % 4 * (blk / 4) + gidy*(blk / 4)*NB; \n
diff --git a/src/library/blas/trtri/triple_dgemm_update_192_96_PART2_R.cpp b/src/library/blas/trtri/triple_dgemm_update_192_96_PART2_R.cpp
index 2ff217d..9e961ff 100644
--- a/src/library/blas/trtri/triple_dgemm_update_192_96_PART2_R.cpp
+++ b/src/library/blas/trtri/triple_dgemm_update_192_96_PART2_R.cpp
@@ -4,7 +4,6 @@
#ifndef KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART2_R_SRC_CPP
#define KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART2_R_SRC_CPP
-#pragma message("#define KERNEL_TRIPLE_DGEMM_UPDATE_192_96_PART2_R_SRC_CPP.")
#ifndef STRINGIFY
#define STRINGIFY2(...) #__VA_ARGS__
diff --git a/src/library/blas/xgemm.cc b/src/library/blas/xgemm.cc
index eb78112..a2c6cb0 100644
--- a/src/library/blas/xgemm.cc
+++ b/src/library/blas/xgemm.cc
@@ -170,7 +170,7 @@ void makeGemmKernel(
#if defined( _WIN32 )
__declspec( thread ) static kernel_map_t *kernel_map = 0;
#else
- __thread static kernel_map_t *kernel_map = 0;
+ static __thread kernel_map_t *kernel_map = 0;
#endif
if (!kernel_map) {
kernel_map = new kernel_map_t();
@@ -317,11 +317,11 @@ void makeGemmKernel(
* get precision string
*****************************************************************************/
template<typename Precision>
-char * getPrecision();
-template<> char * getPrecision<float>() { return "s"; }
-template<> char * getPrecision<double>() { return "d"; }
-template<> char * getPrecision<FloatComplex>() { return "c"; }
-template<> char * getPrecision<DoubleComplex>() { return "z"; }
+const char * getPrecision();
+template<> const char * getPrecision<float>() { return "s"; }
+template<> const char * getPrecision<double>() { return "d"; }
+template<> const char * getPrecision<FloatComplex>() { return "c"; }
+template<> const char * getPrecision<DoubleComplex>() { return "z"; }
/******************************************************************************
@@ -500,7 +500,7 @@ clblasGemm(
&unroll);
// make sure gemmSelectKernel found a valid kernel
if (!tileKernelSource) {
- printf("ERROR: gemmSelectKernel() couldn't find kernel(s) for { order=%s, transA=%s, transB=%s, M=%llu, N=%llu, K=%llu, beta=%u, onept=%f }\n",
+ printf("ERROR: gemmSelectKernel() couldn't find kernel(s) for { order=%s, transA=%s, transB=%s, M=%u, N=%u, K=%u, beta=%u, onept=%f }\n",
order==clblasColumnMajor ? "ColMajor" : "RowMajor",
transA==clblasNoTrans ? "N" : transA==clblasTrans ? "T" : "C",
transB==clblasNoTrans ? "N" : transB==clblasTrans ? "T" : "C",
@@ -566,8 +566,7 @@ clblasGemm(
/******************************************************************************
* Build kernels
*****************************************************************************/
-
-
+
cl_kernel tileClKernel = NULL;
cl_kernel rowClKernel = NULL;
cl_kernel colClKernel = NULL;
@@ -688,14 +687,14 @@ clblasSgemm(
clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
if (clblasErr != clblasSuccess)
return clblasErr;
-
+
if (K != 0)
{
//check matrix A
clblasErr = checkMatrixSizes(TYPE_FLOAT, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
if (clblasErr != clblasSuccess)
return clblasErr;
-
+
//check matrix B
clblasErr = checkMatrixSizes(TYPE_FLOAT, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
if (clblasErr != clblasSuccess)
@@ -748,14 +747,14 @@ clblasDgemm( clblasOrder order,
clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
if (clblasErr != clblasSuccess)
return clblasErr;
-
+
if (K != 0)
{
//check matrix A
clblasErr = checkMatrixSizes(TYPE_DOUBLE, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
if (clblasErr != clblasSuccess)
return clblasErr;
-
+
//check matrix B
clblasErr = checkMatrixSizes(TYPE_DOUBLE, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
if (clblasErr != clblasSuccess)
@@ -809,14 +808,14 @@ clblasCgemm(
clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
if (clblasErr != clblasSuccess)
return clblasErr;
-
+
if (K != 0)
{
//check matrix A
clblasErr = checkMatrixSizes(TYPE_COMPLEX_FLOAT, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
if (clblasErr != clblasSuccess)
return clblasErr;
-
+
//check matrix B
clblasErr = checkMatrixSizes(TYPE_COMPLEX_FLOAT, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
if (clblasErr != clblasSuccess)
@@ -870,14 +869,14 @@ clblasZgemm(
clblasErr = checkMemObjects(A, B, C, true, A_MAT_ERRSET, B_MAT_ERRSET, C_MAT_ERRSET);
if (clblasErr != clblasSuccess)
return clblasErr;
-
+
if (K != 0)
{
//check matrix A
clblasErr = checkMatrixSizes(TYPE_COMPLEX_DOUBLE, order, transA, M, K, A, offA, lda, A_MAT_ERRSET);
if (clblasErr != clblasSuccess)
return clblasErr;
-
+
//check matrix B
clblasErr = checkMatrixSizes(TYPE_COMPLEX_DOUBLE, order, transB, K, N, B, offB, ldb, B_MAT_ERRSET);
if (clblasErr != clblasSuccess)
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 9ecfd13..b3944aa 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,12 +1,12 @@
# ########################################################################
# Copyright 2013 Advanced Micro Devices, Inc.
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -251,7 +251,7 @@ endif()
# Having problems on build server, compiling gtest headers with -pedantic; disabling detection of long long
# http://code.google.com/p/googletest/issues/detail?id=334
if( CMAKE_COMPILER_IS_GNUCXX )
- add_definitions( -Wno-long-long )
+ add_definitions( -Wno-long-long -Wno-variadic-macros )
endif( )
if( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
@@ -259,7 +259,7 @@ if( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" )
# By default, -Mipa=fast is used, and this does not mix well with the cl compiler
string( REPLACE "-Mipa=fast" "" CMAKE_Fortran_FLAGS_RELEASE ${CMAKE_Fortran_FLAGS_RELEASE} )
-
+
# In windows, dynamically link to the C runtime, and tell fortran linker to not include default main subroutine
if( WIN32 )
set( CMAKE_EXE_LINKER_FLAGS "-Bdynamic -Mnostartup ${CMAKE_EXE_LINKER_FLAGS}" )
@@ -296,7 +296,7 @@ if( GTEST_FOUND )
${clBLAS_SOURCE_DIR}/tests/include ${clBLAS_SOURCE_DIR}/include)
add_definitions(-DCORR_TEST_WITH_ACML)
-
+
add_executable(test-correctness ${SRC_CORR} ${SRC_COMMON} ${SRC_COMMON_REFIMPL}
${CORR_HEADERS} ${TESTS_HEADERS})
set_target_properties( test-correctness PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
@@ -311,8 +311,8 @@ if( GTEST_FOUND )
set_target_properties(test-short PROPERTIES COMPILE_DEFINITIONS SHORT_TESTS)
set_target_properties( test-short PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
- # The build server builds the library with gcc 4.1.2 to support Red Hat 5.5, but the test programs must be built with
- # gcc > 4.3.2 to support ACML.
+ # The build server builds the library with gcc 4.1.2 to support Red Hat 5.5, but the test programs must be built with
+ # gcc > 4.3.2 to support ACML.
# If the runtime is being built by the project, use it, otherwise link to a runtime library specified in the install prefix
if( BUILD_RUNTIME )
target_link_libraries(test-correctness ${ACML_LIBRARIES} ${GTEST_LIBRARIES} ${THREAD_LIBRARY} clBLAS)
@@ -350,7 +350,7 @@ if( GTEST_FOUND )
set_target_properties( test-medium PROPERTIES LINKER_LANGUAGE Fortran )
set_target_properties( test-short PROPERTIES LINKER_LANGUAGE Fortran )
endif( )
-
+
if( BUILD_RUNTIME )
if( NETLIB_FOUND )
target_link_libraries(test-correctness ${Netlib_LIBRARIES} ${GTEST_LIBRARIES} ${THREAD_LIBRARY} clBLAS)
@@ -373,7 +373,7 @@ if( GTEST_FOUND )
endif( )
endif( )
endif( )
-
+
set_property( TARGET test-correctness PROPERTY FOLDER "Test")
set_property( TARGET test-medium PROPERTY FOLDER "Test")
set_property( TARGET test-short PROPERTY FOLDER "Test")
@@ -384,7 +384,7 @@ if( GTEST_FOUND )
LIBRARY DESTINATION lib${SUFFIX_LIB}
ARCHIVE DESTINATION lib${SUFFIX_LIB}/import
)
-
+
get_target_property( testLocation test-correctness LOCATION )
configure_file(
@@ -395,7 +395,7 @@ if( GTEST_FOUND )
# Register script at run at install time to analyze the executable and copy dependencies into package
install( SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/copyTestDependencies.cmake")
-
+
if( ACML_FOUND )
include_directories(${OPENCL_INCLUDE_DIRS} ${GTEST_INCLUDE_DIRS}
${clBLAS_SOURCE_DIR} ${clBLAS_SOURCE_DIR}/tests/include ${clBLAS_SOURCE_DIR}/include)
@@ -430,7 +430,7 @@ if( GTEST_FOUND )
add_executable(test-functional ${SRC_FUNC} ${SRC_COMMON} ${SRC_COMMON_TIMER}
${FUNC_HEADERS} ${TESTS_HEADERS})
-
+
set_target_properties( test-functional PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
if( BUILD_RUNTIME )
target_link_libraries(test-functional ${GTEST_LIBRARIES} ${TIME_LIBRARY} ${THREAD_LIBRARY} clBLAS )
diff --git a/src/tests/correctness/corr-trmv.cpp b/src/tests/correctness/corr-trmv.cpp
index 7e97d6c..a8c7151 100644
--- a/src/tests/correctness/corr-trmv.cpp
+++ b/src/tests/correctness/corr-trmv.cpp
@@ -127,7 +127,7 @@ trmvCorrectnessTest(TestParams *params)
// Allocate buffers
bufA = base->createEnqueueBuffer(A, (lengthA + params->offa)* sizeof(*A), 0, CL_MEM_READ_ONLY);
bufX = base->createEnqueueBuffer(clblasX, (lengthX + params->offBX)* sizeof(*clblasX), 0, CL_MEM_WRITE_ONLY);
- bufXTemp = base->createEnqueueBuffer(NULL, lengthX * sizeof(*clblasX), 0, CL_MEM_READ_ONLY);
+ bufXTemp = base->createEnqueueBuffer(NULL, lengthX * sizeof(*clblasX), 0, CL_MEM_READ_WRITE);
//printData( "bufX", blasX, lengthX, 1, lengthX);
//printData( "clblasX", clblasX, lengthX, 1, lengthX);
diff --git a/src/tests/include/cmdline.h b/src/tests/include/cmdline.h
index 68ddfba..b767973 100644
--- a/src/tests/include/cmdline.h
+++ b/src/tests/include/cmdline.h
@@ -44,7 +44,7 @@ typedef enum SetoptFlags {
SET_INCY = (1 << 9),
SET_NUM_COMMAND_QUEUES = (1 << 10),
SET_DEVICE_ORD = (1 << 11),
- SET_PLATFORM_ORD = (1 << 12),
+ SET_PLATFORM_ORD = (1 << 12)
} SetoptFlags;
typedef struct TestParams {
diff --git a/src/tests/include/matrix.h b/src/tests/include/matrix.h
index 65757ad..8794f0b 100644
--- a/src/tests/include/matrix.h
+++ b/src/tests/include/matrix.h
@@ -310,21 +310,21 @@ compareMatrices(
const cl_double *absDelta = NULL)
{
size_t m = 0, n = 0;
- T a, b;
+ T ref, clresult;
cl_double delta;
if( lda > 0 ) // General case
{
for (m = 0; m < M; m++) {
for (n = 0; n < N; n++) {
- a = getElement<T>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<T>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<T>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<T>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
delta = absDelta[m * N + n];
}
- if( module(a-b) > delta ) printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(a, b, delta);
+ if( module(ref-clresult) > delta ) printf("m : %d\t n: %d\n", (int)m, (int)n);
+ ASSERT_NEAR(ref, clresult, delta);
}
}
}
@@ -336,14 +336,14 @@ compareMatrices(
{
for( m=n; m < M; m++)
{
- a = getElement<T>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<T>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<T>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<T>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
//delta = absDelta[m * N + n];
}
- if( module(a-b) > delta ) printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(a, b, delta);
+ if( module(ref-clresult) > delta ) printf("m : %d\t n: %d\n", (int)m, (int)n);
+ ASSERT_NEAR(ref, clresult, delta);
}
}
}
@@ -353,14 +353,14 @@ compareMatrices(
{
for( n = 0; n <= m; n++)
{
- a = getElement<T>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<T>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<T>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<T>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
//delta = absDelta[m * N + n];
}
- if( module(a-b) > delta ) printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(a, b, delta);
+ if( module(ref-clresult) > delta ) printf("m : %d\t n: %d\n", (int)m, (int)n);
+ ASSERT_NEAR(ref, clresult, delta);
}
}
}
@@ -379,23 +379,23 @@ compareMatrices<FloatComplex>(
const cl_double *absDelta)
{
size_t m = 0, n = 0;
- FloatComplex a, b;
+ FloatComplex ref, clresult;
cl_double delta;
if ( lda > 0 )
{
for (m = 0; m < M; m++) {
for (n = 0; n < N; n++) {
- a = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
delta = absDelta[m * N + n];
}
- if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+ if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(CREAL(a), CREAL(b), delta);
- ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+ ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+ ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
}
}
}
@@ -407,16 +407,16 @@ if ( lda > 0 )
{
for( m=n; m < M; m++)
{
- a = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
//delta = absDelta[m * N + n];
}
- if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+ if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(CREAL(a), CREAL(b), delta);
- ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+ ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+ ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
}
}
}
@@ -426,16 +426,16 @@ if ( lda > 0 )
{
for( n = 0; n <= m; n++)
{
- a = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<FloatComplex>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<FloatComplex>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
//delta = absDelta[m * N + n];
}
- if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+ if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(CREAL(a), CREAL(b), delta);
- ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+ ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+ ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
}
}
}
@@ -455,22 +455,22 @@ compareMatrices<DoubleComplex>(
const cl_double *absDelta)
{
size_t m = 0, n = 0;
- DoubleComplex a, b;
+ DoubleComplex ref, clresult;
cl_double delta;
if( lda > 0 )
{
for (m = 0; m < M; m++) {
for (n = 0; n < N; n++) {
- a = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
delta = absDelta[m * N + n];
}
- if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+ if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(CREAL(a), CREAL(b), delta);
- ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+ ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+ ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
}
}
}
@@ -482,16 +482,16 @@ if( lda > 0 )
{
for( m=n; m < M; m++)
{
- a = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
//delta = absDelta[m * N + n];
}
- if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+ if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(CREAL(a), CREAL(b), delta);
- ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+ ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+ ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
}
}
}
@@ -501,16 +501,16 @@ if( lda > 0 )
{
for( n = 0; n <= m; n++)
{
- a = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
- b = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
+ ref = getElement<DoubleComplex>(order, clblasNoTrans, m, n, A, lda);
+ clresult = getElement<DoubleComplex>(order, clblasNoTrans, m, n, B, lda);
delta = 0.0;
if (absDelta != NULL) {
//delta = absDelta[m * N + n];
}
- if( (module(CREAL(a) - CREAL(b)) > delta) || (module(CIMAG(a) - CIMAG(b)) > delta) )
+ if( (module(CREAL(ref) - CREAL(clresult)) > delta) || (module(CIMAG(ref) - CIMAG(clresult)) > delta) )
printf("m : %d\t n: %d\n", (int)m, (int)n);
- ASSERT_NEAR(CREAL(a), CREAL(b), delta);
- ASSERT_NEAR(CIMAG(a), CIMAG(b), delta);
+ ASSERT_NEAR(CREAL(ref), CREAL(clresult), delta);
+ ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), delta);
}
}
}
@@ -764,10 +764,10 @@ template <typename T>
static void compareValues(
const T *A, const T *B, const cl_double absDelta=0.0 )
{
- T a, b;
- a = *A;
- b = *B;
- ASSERT_NEAR(a, b, absDelta);
+ T ref, clresult;
+ ref = *A;
+ clresult = *B;
+ ASSERT_NEAR(ref, clresult, absDelta);
}
template<>
@@ -775,12 +775,12 @@ __template_static void
compareValues<FloatComplex> (
const FloatComplex *A, const FloatComplex *B, const cl_double absDelta )
{
- FloatComplex a, b;
+ FloatComplex ref, clresult;
- a = *A;
- b = *B;
- ASSERT_NEAR(CREAL(a), CREAL(b), absDelta);
- ASSERT_NEAR(CIMAG(a), CIMAG(b), absDelta);
+ ref = *A;
+ clresult = *B;
+ ASSERT_NEAR(CREAL(ref), CREAL(clresult), absDelta);
+ ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), absDelta);
}
template<>
@@ -788,11 +788,11 @@ __template_static void
compareValues<DoubleComplex> (
const DoubleComplex *A, const DoubleComplex *B, const cl_double absDelta )
{
- DoubleComplex a, b;
+ DoubleComplex ref, clresult;
- a = *A;
- b = *B;
- ASSERT_NEAR(CREAL(a), CREAL(b), absDelta);
- ASSERT_NEAR(CIMAG(a), CIMAG(b), absDelta);
+ ref = *A;
+ clresult = *B;
+ ASSERT_NEAR(CREAL(ref), CREAL(clresult), absDelta);
+ ASSERT_NEAR(CIMAG(ref), CIMAG(clresult), absDelta);
}
#endif // MATRIX_H_
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list