[clblas] 62/67: Merge pull request #151 from guacamoleo/master
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Oct 27 08:02:16 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clblas.
commit 8b5f7a0e6f800b9597319d71f70fbf67e410b004
Merge: 9731ea2 0482e1c
Author: Timmy <timmy.liu at amd.com>
Date: Fri Oct 16 08:09:17 2015 -0700
Merge pull request #151 from guacamoleo/master
promoting develop to master
.gitignore | 3 +
.travis.yml | 168 ++-
README.md | 57 +-
appveyor.yml | 105 ++
.../S9150/cgemmNT_S9150_14.50.2_2.6.0_8.csv | 721 +++++++++
.../S9150/dgemmNT_S9150_14.50.2_2.6.0_8.csv | 721 +++++++++
doc/performance/clBLAS_2.6.0/S9150/dtrsm_192.csv | 60 +-
.../S9150/sgemmNT_S9150_14.50.2_2.6.0_8.csv | 721 +++++++++
doc/performance/clBLAS_2.6.0/S9150/sgemm_32.csv | 360 ++---
.../S9150/zgemmNT_S9150_14.50.2_2.6.0_8.csv | 721 +++++++++
doc/performance/clBLAS_2.6.0/W9100/README.txt | 35 +
.../W9100/clblas_sgemmNT_w9100_14502.csv | 181 +++
.../{S9150/sgemm_32.csv => W9100/dgemm_32.csv} | 360 ++---
doc/performance/clBLAS_2.6.0/W9100/dgemm_96.csv | 61 +
.../W9100/dtrsm_w9100_14502.csv} | 60 +-
doc/performance/clBLAS_2.6.0/W9100/peak_dp.csv | 181 +++
doc/performance/clBLAS_2.6.0/W9100/peak_sp.csv | 181 +++
.../{S9150/sgemm_32.csv => W9100/zgemm_32.csv} | 360 ++---
doc/performance/clBLAS_2.6.0/W9100/zgemm_64.csv | 91 ++
.../S9150/cgemmNT_S9150_14.50.2_2.7.1_8.csv | 721 +++++++++
.../S9150/dgemmNT_S9150_14.50.2_2.7.1_8.csv | 721 +++++++++
.../S9150/sgemmNT_S9150_14.50.2_2.7.1_8.csv | 721 +++++++++
.../S9150/zgemmNT_S9150_14.50.2_2.7.1_8.csv | 721 +++++++++
...as271_w9100_dtrsm_col_left_lower_unit_14502.csv | 31 +
...as271_w9100_dtrsm_col_left_upper_unit_14502.csv | 31 +
...s271_w9100_dtrsm_col_right_lower_unit_14502.csv | 31 +
...s271_w9100_dtrsm_col_right_upper_unit_14502.csv | 31 +
doc/performance/cuBLAS_7.0/Tesla_K40/dtrsm.csv | 60 +-
doc/performance/cuBLAS_7.0/Tesla_K40/sgemm.csv | 360 ++---
.../cublas75_k40_dtrsm_col_left_lower_unit.csv | 31 +
.../cublas75_k40_dtrsm_col_left_upper_unit.csv | 31 +
.../cublas75_k40_dtrsm_col_right_lower_unit.csv | 31 +
.../cublas75_k40_dtrsm_col_right_upper_unit.csv | 31 +
.../cuBLAS_7.5/Tesla_K40/cublas_cgemm_8.csv | 721 +++++++++
.../cuBLAS_7.5/Tesla_K40/cublas_dgemm_8.csv | 721 +++++++++
.../cuBLAS_7.5/Tesla_K40/cublas_sgemm_8.csv | 721 +++++++++
.../cuBLAS_7.5/Tesla_K40/cublas_zgemm_8.csv | 721 +++++++++
doc/performance/cuBLAS_7.5/Tesla_K40/peak_dp.csv | 181 +++
doc/performance/cuBLAS_7.5/Tesla_K40/peak_sp.csv | 181 +++
src/CMakeLists.txt | 87 +-
src/client/clfunc_common.hpp | 18 +-
src/client/clfunc_xgemm.hpp | 192 ++-
src/client/clfunc_xgemv.hpp | 22 +-
src/client/clfunc_xger.hpp | 16 +-
src/client/clfunc_xgerc.hpp | 12 +-
src/client/clfunc_xgeru.hpp | 12 +-
src/client/clfunc_xhemm.hpp | 34 +-
src/client/clfunc_xhemv.hpp | 12 +-
src/client/clfunc_xher.hpp | 10 +-
src/client/clfunc_xher2.hpp | 12 +-
src/client/clfunc_xher2k.hpp | 20 +-
src/client/clfunc_xherk.hpp | 20 +-
src/client/clfunc_xsymm.hpp | 58 +-
src/client/clfunc_xsymv.hpp | 12 +-
src/client/clfunc_xsyr.hpp | 10 +-
src/client/clfunc_xsyr2.hpp | 12 +-
src/client/clfunc_xsyr2k.hpp | 34 +-
src/client/clfunc_xsyrk.hpp | 32 +-
src/client/clfunc_xtrmm.hpp | 48 +-
src/client/clfunc_xtrmv.hpp | 14 +-
src/client/clfunc_xtrsm.hpp | 50 +-
src/client/clfunc_xtrsv.hpp | 14 +-
src/client/client.cpp | 12 +-
src/include/msvc.h | 2 +
src/library/CMakeLists.txt | 464 +++++-
src/library/OCLBinaryGenerator.cmake | 86 ++
src/library/bingen.cmake | 1 +
src/library/blas/AutoGemm/.gitignore | 4 +
src/library/blas/AutoGemm/AutoGemm.py | 47 +
src/library/blas/AutoGemm/AutoGemmParameters.py | 149 ++
.../AutoGemmTools/AutoGemmPreCompileKernels.cpp | 925 ++++++++++++
.../blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h | 793 ++++++++++
.../AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp | 1392 ++++++++++++++++++
.../blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp | 995 +++++++++++++
src/library/blas/AutoGemm/Common.py | 60 +
src/library/blas/AutoGemm/Includes.py | 465 ++++++
src/library/blas/AutoGemm/KernelOpenCL.py | 587 ++++++++
src/library/blas/AutoGemm/KernelParameters.py | 253 ++++
src/library/blas/AutoGemm/KernelSelection.py | 683 +++++++++
src/library/blas/AutoGemm/KernelsToPreCompile.py | 91 ++
src/library/blas/AutoGemm/README.txt | 0
.../UserGemmKernelSources/UserGemmClKernels.h | 23 +
.../UserGemmKernelSourceIncludes.cpp | 57 +
.../UserGemmKernelSourceIncludes.h | 80 +
.../dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp | 203 +++
.../dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp | 203 +++
.../dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp | 196 +++
.../dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp | 193 +++
.../dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp | 195 +++
.../dgemm_Col_TN_B1_MX048_NX048_KX08_src.cpp | 195 +++
.../sgemm_Col_NN_B0_MX032_NX032_KX16_src.cpp | 129 ++
.../sgemm_Col_NN_B0_MX064_NX064_KX16_src.cpp | 160 ++
.../sgemm_Col_NN_B0_MX096_NX096_KX16_src.cpp | 208 +++
...sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp | 149 ++
.../sgemm_Col_NN_B1_MX032_NX032_KX16_src.cpp | 129 ++
.../sgemm_Col_NN_B1_MX064_NX064_KX16_src.cpp | 161 +++
.../sgemm_Col_NN_B1_MX096_NX096_KX16_src.cpp | 207 +++
.../sgemm_Col_NT_B0_MX032_NX032_KX16_src.cpp | 126 ++
.../sgemm_Col_NT_B0_MX064_NX064_KX16_src.cpp | 165 +++
.../sgemm_Col_NT_B0_MX096_NX096_KX16_src.cpp | 210 +++
...sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp | 148 ++
...sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_src.cpp | 158 ++
.../sgemm_Col_NT_B1_MX032_NX032_KX16_src.cpp | 126 ++
.../sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_src.cpp | 161 +++
.../sgemm_Col_NT_B1_MX064_NX032_KX16_COL_src.cpp | 157 ++
.../sgemm_Col_NT_B1_MX064_NX064_KX16_src.cpp | 160 ++
.../sgemm_Col_NT_B1_MX096_NX096_KX16_src.cpp | 208 +++
.../sgemm_Col_NT_B1_MX128_NX128_KX16_src.cpp | 290 ++++
.../sgemm_Col_TN_B0_MX032_NX032_KX16_src.cpp | 128 ++
.../sgemm_Col_TN_B0_MX064_NX064_KX16_src.cpp | 165 +++
.../sgemm_Col_TN_B0_MX096_NX096_KX16_src.cpp | 209 +++
...sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp | 148 ++
.../sgemm_Col_TN_B1_MX032_NX032_KX16_src.cpp | 127 ++
.../sgemm_Col_TN_B1_MX064_NX064_KX16_src.cpp | 165 +++
.../sgemm_Col_TN_B1_MX096_NX096_KX16_src.cpp | 209 +++
src/library/blas/functor/functor.cc | 3 +-
src/library/blas/functor/hawaii.cc | 19 +
.../blas/functor/hawaii_sgemmBig1024Kernel.cc | 506 +++++++
.../blas/functor/hawaii_sgemmSplitKernel.cc | 147 ++
.../functor/include/hawaii_sgemmBig1024Kernel.h | 48 +
src/library/blas/generic/binary_lookup.cc | 6 +-
src/library/blas/generic/common.c | 16 +-
.../blas/gens/clTemplates/sgemm_gcn_bigMatrices.cl | 264 ++++
src/library/blas/include/xgemm.h | 39 +
src/library/blas/ixamax.c | 16 +-
src/library/blas/specialCases/GemmSpecialCases.cpp | 994 +++++++++++++
.../blas/specialCases/include/GemmSpecialCases.h | 42 +
src/library/blas/trtri/TrtriClKernels.h | 44 +
.../blas/trtri/TrtriKernelSourceIncludes.cpp | 81 ++
src/library/blas/trtri/TrtriKernelSourceIncludes.h | 124 ++
.../blas/trtri/diag_dtrtri_lower_128_16.cpp | 172 +++
.../blas/trtri/diag_dtrtri_upper_128_16.cpp | 151 ++
.../blas/trtri/diag_dtrtri_upper_192_12.cpp | 149 ++
.../trtri/triple_dgemm_update_128_16_PART1_L.cpp | 161 +++
.../trtri/triple_dgemm_update_128_16_PART2_L.cpp | 143 ++
.../blas/trtri/triple_dgemm_update_128_16_R.cpp | 239 +++
.../trtri/triple_dgemm_update_128_32_PART1_L.cpp | 150 ++
.../trtri/triple_dgemm_update_128_32_PART1_R.cpp | 151 ++
.../trtri/triple_dgemm_update_128_32_PART2_L.cpp | 135 ++
.../trtri/triple_dgemm_update_128_32_PART2_R.cpp | 136 ++
.../trtri/triple_dgemm_update_128_64_PART1_L.cpp | 145 ++
.../trtri/triple_dgemm_update_128_64_PART1_R.cpp | 145 ++
.../trtri/triple_dgemm_update_128_64_PART2_L.cpp | 133 ++
.../trtri/triple_dgemm_update_128_64_PART2_R.cpp | 134 ++
.../triple_dgemm_update_128_ABOVE64_PART1_L.cpp | 146 ++
.../triple_dgemm_update_128_ABOVE64_PART1_R.cpp | 144 ++
.../triple_dgemm_update_128_ABOVE64_PART2_L.cpp | 134 ++
.../triple_dgemm_update_128_ABOVE64_PART2_R.cpp | 135 ++
.../triple_dgemm_update_128_ABOVE64_PART3_L.cpp | 91 ++
.../triple_dgemm_update_128_ABOVE64_PART3_R.cpp | 94 ++
.../blas/trtri/triple_dgemm_update_192_12_R.cpp | 194 +++
.../trtri/triple_dgemm_update_192_24_PART1_R.cpp | 117 ++
.../trtri/triple_dgemm_update_192_24_PART2_R.cpp | 112 ++
.../trtri/triple_dgemm_update_192_48_PART1_R.cpp | 144 ++
.../trtri/triple_dgemm_update_192_48_PART2_R.cpp | 145 ++
.../trtri/triple_dgemm_update_192_96_PART1_R.cpp | 156 ++
.../trtri/triple_dgemm_update_192_96_PART2_R.cpp | 157 ++
src/library/blas/xasum.c | 16 +-
src/library/blas/xaxpy.c | 6 +
src/library/blas/xcopy.c | 6 +
src/library/blas/xdot.c | 20 +-
src/library/blas/xgemm.cc | 872 ++++++++---
src/library/blas/xger.c | 8 +
src/library/blas/xher.c | 8 +-
src/library/blas/xher2.c | 8 +
src/library/blas/xrot.c | 12 +-
src/library/blas/xrotg.c | 24 +-
src/library/blas/xrotm.c | 8 +
src/library/blas/xrotmg.c | 14 +
src/library/blas/xscal.c | 8 +-
src/library/blas/xswap.c | 6 +
src/library/blas/xsymm.c | 19 +-
src/library/blas/xsyr.c | 8 +-
src/library/blas/xsyr2.c | 8 +
src/library/blas/xtbmv.c | 16 +-
src/library/blas/xtrmv.c | 16 +-
src/library/blas/xtrsm.cc | 1525 ++++++++++++++++++++
.../tools/OCLBinaryGenerator/CMakeLists.txt | 33 +
.../OCLBinaryGenerator/OCLBinaryGenerator.cpp | 347 +++++
src/scripts/perf/blasPerformanceTesting.py | 14 +-
src/tests/common.cpp | 29 +-
src/tests/correctness/corr-gemm.cpp | 12 +-
src/tests/include/gemm.h | 6 +-
183 files changed, 32982 insertions(+), 1582 deletions(-)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list