[clblas] 01/10: Merge tag 'upstream/v2.8' into debian/sid

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Wed Oct 28 11:52:33 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clblas.

commit c432c8e3c0e6dae2c81490887ac5fbb3bf20a368
Merge: 763f39c 8b5f7a0
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date:   Tue Oct 27 08:10:31 2015 +0000

    Merge tag 'upstream/v2.8' into debian/sid

 .gitignore                                         |    3 +
 .travis.yml                                        |  168 ++-
 README.md                                          |   57 +-
 appveyor.yml                                       |  105 ++
 .../S9150/cgemmNT_S9150_14.50.2_2.6.0_8.csv        |  721 +++++++++
 .../S9150/dgemmNT_S9150_14.50.2_2.6.0_8.csv        |  721 +++++++++
 doc/performance/clBLAS_2.6.0/S9150/dtrsm_192.csv   |   60 +-
 .../S9150/sgemmNT_S9150_14.50.2_2.6.0_8.csv        |  721 +++++++++
 doc/performance/clBLAS_2.6.0/S9150/sgemm_32.csv    |  360 ++---
 .../S9150/zgemmNT_S9150_14.50.2_2.6.0_8.csv        |  721 +++++++++
 doc/performance/clBLAS_2.6.0/W9100/README.txt      |   35 +
 .../W9100/clblas_sgemmNT_w9100_14502.csv           |  181 +++
 .../{S9150/sgemm_32.csv => W9100/dgemm_32.csv}     |  360 ++---
 doc/performance/clBLAS_2.6.0/W9100/dgemm_96.csv    |   61 +
 .../W9100/dtrsm_w9100_14502.csv}                   |   60 +-
 doc/performance/clBLAS_2.6.0/W9100/peak_dp.csv     |  181 +++
 doc/performance/clBLAS_2.6.0/W9100/peak_sp.csv     |  181 +++
 .../{S9150/sgemm_32.csv => W9100/zgemm_32.csv}     |  360 ++---
 doc/performance/clBLAS_2.6.0/W9100/zgemm_64.csv    |   91 ++
 .../S9150/cgemmNT_S9150_14.50.2_2.7.1_8.csv        |  721 +++++++++
 .../S9150/dgemmNT_S9150_14.50.2_2.7.1_8.csv        |  721 +++++++++
 .../S9150/sgemmNT_S9150_14.50.2_2.7.1_8.csv        |  721 +++++++++
 .../S9150/zgemmNT_S9150_14.50.2_2.7.1_8.csv        |  721 +++++++++
 ...as271_w9100_dtrsm_col_left_lower_unit_14502.csv |   31 +
 ...as271_w9100_dtrsm_col_left_upper_unit_14502.csv |   31 +
 ...s271_w9100_dtrsm_col_right_lower_unit_14502.csv |   31 +
 ...s271_w9100_dtrsm_col_right_upper_unit_14502.csv |   31 +
 doc/performance/cuBLAS_7.0/Tesla_K40/dtrsm.csv     |   60 +-
 doc/performance/cuBLAS_7.0/Tesla_K40/sgemm.csv     |  360 ++---
 .../cublas75_k40_dtrsm_col_left_lower_unit.csv     |   31 +
 .../cublas75_k40_dtrsm_col_left_upper_unit.csv     |   31 +
 .../cublas75_k40_dtrsm_col_right_lower_unit.csv    |   31 +
 .../cublas75_k40_dtrsm_col_right_upper_unit.csv    |   31 +
 .../cuBLAS_7.5/Tesla_K40/cublas_cgemm_8.csv        |  721 +++++++++
 .../cuBLAS_7.5/Tesla_K40/cublas_dgemm_8.csv        |  721 +++++++++
 .../cuBLAS_7.5/Tesla_K40/cublas_sgemm_8.csv        |  721 +++++++++
 .../cuBLAS_7.5/Tesla_K40/cublas_zgemm_8.csv        |  721 +++++++++
 doc/performance/cuBLAS_7.5/Tesla_K40/peak_dp.csv   |  181 +++
 doc/performance/cuBLAS_7.5/Tesla_K40/peak_sp.csv   |  181 +++
 src/CMakeLists.txt                                 |   87 +-
 src/client/clfunc_common.hpp                       |   18 +-
 src/client/clfunc_xgemm.hpp                        |  192 ++-
 src/client/clfunc_xgemv.hpp                        |   22 +-
 src/client/clfunc_xger.hpp                         |   16 +-
 src/client/clfunc_xgerc.hpp                        |   12 +-
 src/client/clfunc_xgeru.hpp                        |   12 +-
 src/client/clfunc_xhemm.hpp                        |   34 +-
 src/client/clfunc_xhemv.hpp                        |   12 +-
 src/client/clfunc_xher.hpp                         |   10 +-
 src/client/clfunc_xher2.hpp                        |   12 +-
 src/client/clfunc_xher2k.hpp                       |   20 +-
 src/client/clfunc_xherk.hpp                        |   20 +-
 src/client/clfunc_xsymm.hpp                        |   58 +-
 src/client/clfunc_xsymv.hpp                        |   12 +-
 src/client/clfunc_xsyr.hpp                         |   10 +-
 src/client/clfunc_xsyr2.hpp                        |   12 +-
 src/client/clfunc_xsyr2k.hpp                       |   34 +-
 src/client/clfunc_xsyrk.hpp                        |   32 +-
 src/client/clfunc_xtrmm.hpp                        |   48 +-
 src/client/clfunc_xtrmv.hpp                        |   14 +-
 src/client/clfunc_xtrsm.hpp                        |   50 +-
 src/client/clfunc_xtrsv.hpp                        |   14 +-
 src/client/client.cpp                              |   12 +-
 src/include/msvc.h                                 |    2 +
 src/library/CMakeLists.txt                         |  464 +++++-
 src/library/OCLBinaryGenerator.cmake               |   86 ++
 src/library/bingen.cmake                           |    1 +
 src/library/blas/AutoGemm/.gitignore               |    4 +
 src/library/blas/AutoGemm/AutoGemm.py              |   47 +
 src/library/blas/AutoGemm/AutoGemmParameters.py    |  149 ++
 .../AutoGemmTools/AutoGemmPreCompileKernels.cpp    |  925 ++++++++++++
 .../blas/AutoGemm/AutoGemmTools/AutoGemmUtil.h     |  793 ++++++++++
 .../AutoGemm/AutoGemmTools/ProfileAutoGemm.cpp     | 1392 ++++++++++++++++++
 .../blas/AutoGemm/AutoGemmTools/TestAutoGemm.cpp   |  995 +++++++++++++
 src/library/blas/AutoGemm/Common.py                |   60 +
 src/library/blas/AutoGemm/Includes.py              |  465 ++++++
 src/library/blas/AutoGemm/KernelOpenCL.py          |  587 ++++++++
 src/library/blas/AutoGemm/KernelParameters.py      |  253 ++++
 src/library/blas/AutoGemm/KernelSelection.py       |  683 +++++++++
 src/library/blas/AutoGemm/KernelsToPreCompile.py   |   91 ++
 src/library/blas/AutoGemm/README.txt               |    0
 .../UserGemmKernelSources/UserGemmClKernels.h      |   23 +
 .../UserGemmKernelSourceIncludes.cpp               |   57 +
 .../UserGemmKernelSourceIncludes.h                 |   80 +
 .../dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp       |  203 +++
 .../dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp       |  203 +++
 .../dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp       |  196 +++
 .../dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp       |  193 +++
 .../dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp       |  195 +++
 .../dgemm_Col_TN_B1_MX048_NX048_KX08_src.cpp       |  195 +++
 .../sgemm_Col_NN_B0_MX032_NX032_KX16_src.cpp       |  129 ++
 .../sgemm_Col_NN_B0_MX064_NX064_KX16_src.cpp       |  160 ++
 .../sgemm_Col_NN_B0_MX096_NX096_KX16_src.cpp       |  208 +++
 ...sgemm_Col_NN_B1_MX032_NX032_KX16_BRANCH_src.cpp |  149 ++
 .../sgemm_Col_NN_B1_MX032_NX032_KX16_src.cpp       |  129 ++
 .../sgemm_Col_NN_B1_MX064_NX064_KX16_src.cpp       |  161 +++
 .../sgemm_Col_NN_B1_MX096_NX096_KX16_src.cpp       |  207 +++
 .../sgemm_Col_NT_B0_MX032_NX032_KX16_src.cpp       |  126 ++
 .../sgemm_Col_NT_B0_MX064_NX064_KX16_src.cpp       |  165 +++
 .../sgemm_Col_NT_B0_MX096_NX096_KX16_src.cpp       |  210 +++
 ...sgemm_Col_NT_B1_MX032_NX032_KX16_BRANCH_src.cpp |  148 ++
 ...sgemm_Col_NT_B1_MX032_NX032_KX16_SINGLE_src.cpp |  158 ++
 .../sgemm_Col_NT_B1_MX032_NX032_KX16_src.cpp       |  126 ++
 .../sgemm_Col_NT_B1_MX032_NX064_KX16_ROW_src.cpp   |  161 +++
 .../sgemm_Col_NT_B1_MX064_NX032_KX16_COL_src.cpp   |  157 ++
 .../sgemm_Col_NT_B1_MX064_NX064_KX16_src.cpp       |  160 ++
 .../sgemm_Col_NT_B1_MX096_NX096_KX16_src.cpp       |  208 +++
 .../sgemm_Col_NT_B1_MX128_NX128_KX16_src.cpp       |  290 ++++
 .../sgemm_Col_TN_B0_MX032_NX032_KX16_src.cpp       |  128 ++
 .../sgemm_Col_TN_B0_MX064_NX064_KX16_src.cpp       |  165 +++
 .../sgemm_Col_TN_B0_MX096_NX096_KX16_src.cpp       |  209 +++
 ...sgemm_Col_TN_B1_MX032_NX032_KX16_BRANCH_src.cpp |  148 ++
 .../sgemm_Col_TN_B1_MX032_NX032_KX16_src.cpp       |  127 ++
 .../sgemm_Col_TN_B1_MX064_NX064_KX16_src.cpp       |  165 +++
 .../sgemm_Col_TN_B1_MX096_NX096_KX16_src.cpp       |  209 +++
 src/library/blas/functor/functor.cc                |    3 +-
 src/library/blas/functor/hawaii.cc                 |   19 +
 .../blas/functor/hawaii_sgemmBig1024Kernel.cc      |  506 +++++++
 .../blas/functor/hawaii_sgemmSplitKernel.cc        |  147 ++
 .../functor/include/hawaii_sgemmBig1024Kernel.h    |   48 +
 src/library/blas/generic/binary_lookup.cc          |    6 +-
 src/library/blas/generic/common.c                  |   16 +-
 .../blas/gens/clTemplates/sgemm_gcn_bigMatrices.cl |  264 ++++
 src/library/blas/include/xgemm.h                   |   39 +
 src/library/blas/ixamax.c                          |   16 +-
 src/library/blas/specialCases/GemmSpecialCases.cpp |  994 +++++++++++++
 .../blas/specialCases/include/GemmSpecialCases.h   |   42 +
 src/library/blas/trtri/TrtriClKernels.h            |   44 +
 .../blas/trtri/TrtriKernelSourceIncludes.cpp       |   81 ++
 src/library/blas/trtri/TrtriKernelSourceIncludes.h |  124 ++
 .../blas/trtri/diag_dtrtri_lower_128_16.cpp        |  172 +++
 .../blas/trtri/diag_dtrtri_upper_128_16.cpp        |  151 ++
 .../blas/trtri/diag_dtrtri_upper_192_12.cpp        |  149 ++
 .../trtri/triple_dgemm_update_128_16_PART1_L.cpp   |  161 +++
 .../trtri/triple_dgemm_update_128_16_PART2_L.cpp   |  143 ++
 .../blas/trtri/triple_dgemm_update_128_16_R.cpp    |  239 +++
 .../trtri/triple_dgemm_update_128_32_PART1_L.cpp   |  150 ++
 .../trtri/triple_dgemm_update_128_32_PART1_R.cpp   |  151 ++
 .../trtri/triple_dgemm_update_128_32_PART2_L.cpp   |  135 ++
 .../trtri/triple_dgemm_update_128_32_PART2_R.cpp   |  136 ++
 .../trtri/triple_dgemm_update_128_64_PART1_L.cpp   |  145 ++
 .../trtri/triple_dgemm_update_128_64_PART1_R.cpp   |  145 ++
 .../trtri/triple_dgemm_update_128_64_PART2_L.cpp   |  133 ++
 .../trtri/triple_dgemm_update_128_64_PART2_R.cpp   |  134 ++
 .../triple_dgemm_update_128_ABOVE64_PART1_L.cpp    |  146 ++
 .../triple_dgemm_update_128_ABOVE64_PART1_R.cpp    |  144 ++
 .../triple_dgemm_update_128_ABOVE64_PART2_L.cpp    |  134 ++
 .../triple_dgemm_update_128_ABOVE64_PART2_R.cpp    |  135 ++
 .../triple_dgemm_update_128_ABOVE64_PART3_L.cpp    |   91 ++
 .../triple_dgemm_update_128_ABOVE64_PART3_R.cpp    |   94 ++
 .../blas/trtri/triple_dgemm_update_192_12_R.cpp    |  194 +++
 .../trtri/triple_dgemm_update_192_24_PART1_R.cpp   |  117 ++
 .../trtri/triple_dgemm_update_192_24_PART2_R.cpp   |  112 ++
 .../trtri/triple_dgemm_update_192_48_PART1_R.cpp   |  144 ++
 .../trtri/triple_dgemm_update_192_48_PART2_R.cpp   |  145 ++
 .../trtri/triple_dgemm_update_192_96_PART1_R.cpp   |  156 ++
 .../trtri/triple_dgemm_update_192_96_PART2_R.cpp   |  157 ++
 src/library/blas/xasum.c                           |   16 +-
 src/library/blas/xaxpy.c                           |    6 +
 src/library/blas/xcopy.c                           |    6 +
 src/library/blas/xdot.c                            |   20 +-
 src/library/blas/xgemm.cc                          |  872 ++++++++---
 src/library/blas/xger.c                            |    8 +
 src/library/blas/xher.c                            |    8 +-
 src/library/blas/xher2.c                           |    8 +
 src/library/blas/xrot.c                            |   12 +-
 src/library/blas/xrotg.c                           |   24 +-
 src/library/blas/xrotm.c                           |    8 +
 src/library/blas/xrotmg.c                          |   14 +
 src/library/blas/xscal.c                           |    8 +-
 src/library/blas/xswap.c                           |    6 +
 src/library/blas/xsymm.c                           |   19 +-
 src/library/blas/xsyr.c                            |    8 +-
 src/library/blas/xsyr2.c                           |    8 +
 src/library/blas/xtbmv.c                           |   16 +-
 src/library/blas/xtrmv.c                           |   16 +-
 src/library/blas/xtrsm.cc                          | 1525 ++++++++++++++++++++
 .../tools/OCLBinaryGenerator/CMakeLists.txt        |   33 +
 .../OCLBinaryGenerator/OCLBinaryGenerator.cpp      |  347 +++++
 src/scripts/perf/blasPerformanceTesting.py         |   14 +-
 src/tests/common.cpp                               |   29 +-
 src/tests/correctness/corr-gemm.cpp                |   12 +-
 src/tests/include/gemm.h                           |    6 +-
 183 files changed, 32982 insertions(+), 1582 deletions(-)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git



More information about the debian-science-commits mailing list