[fflas-ffpack] 02/21: Imported Upstream version 2.2.1
Doug Torrance
dtorrance-guest at moszumanska.debian.org
Tue Jul 19 18:45:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
dtorrance-guest pushed a commit to branch master
in repository fflas-ffpack.
commit a47ccdd3974483298f075d478e3a3287f05c72ca
Author: Doug Torrance <dtorrance at piedmont.edu>
Date: Mon Jul 18 10:53:22 2016 -0400
Imported Upstream version 2.2.1
---
.gitignore | 143 +
AUTHORS | 13 +-
ChangeLog | 92 +-
INSTALL | 380 +-
Makefile.am | 54 +-
Makefile.in | 931 -
NEWS | 1 -
README | 17 +-
TODO | 54 +-
_clang-format | 52 +
aclocal.m4 | 990 -
autogen.sh | 202 +
benchmark/Makefile.in | 642 -
benchmark/graph/Makefile.am | 26 -
benchmark/graph/Makefile.in | 444 -
benchmark/graph/graph_report.sh | 111 -
benchmark/graph/make_graph.sh | 50 -
benchmark/graph/make_graph_file.pl | 35 -
benchmark/html/Makefile.in | 445 -
benchmark/html/fflas.css | 89 -
benchmark/html/html_report.sh | 68 -
benchmark/html/html_report.xsl | 72 -
benchmark/html/process.sh | 62 -
benchmark/run.sh | 150 -
benchmark/src/BLAS_LAPACK/Makefile.am | 54 -
benchmark/src/BLAS_LAPACK/Makefile.in | 743 -
benchmark/src/BLAS_LAPACK/check-dgemm.C | 96 -
benchmark/src/BLOCKING/Makefile | 450 -
benchmark/src/BLOCKING/Makefile.am | 38 -
benchmark/src/BLOCKING/Makefile.in | 450 -
benchmark/src/BLOCKING/gnucommand | 49 -
benchmark/src/BLOCKING/mesure.sh | 32 -
benchmark/src/BLOCKING/mulMM.C | 185 -
benchmark/src/BLOCKING/plot1-mulMM | 21 -
benchmark/src/BLOCKING/tblockmat.C | 230 -
benchmark/src/FFLAS_FFPACK/Makefile.am | 56 -
benchmark/src/FFLAS_FFPACK/Makefile.in | 754 -
benchmark/src/FFLAS_FFPACK/check-fgemm.C | 98 -
benchmark/src/FFLAS_FFPACK/check-ftrsm.C | 93 -
benchmark/src/FFLAS_FFPACK/check-wino.C | 132 -
benchmark/src/Makefile.in | 640 -
benchmark/test-src/Makefile.in | 446 -
benchmark/test-src/mesure-BLAS_LAPACK.sh | 31 -
benchmark/test-src/mesure-FFLAS_FFPACK.sh | 31 -
benchmark/test-src/mesure.sh | 29 -
benchmark/test-src/parameter.in | 7 -
benchmarks/Makefile.am | 90 +
benchmarks/Makefile.tests | 28 +
benchmarks/benchmark-charpoly.C | 108 +
benchmarks/benchmark-dgemm.C | 166 +
.../benchmark-dgetrf.C | 81 +-
.../benchmark-dgetri.C | 85 +-
.../check-dtrsm.C => benchmarks/benchmark-dtrsm.C | 67 +-
.../benchmark-dtrtri.C | 57 +-
benchmarks/benchmark-echelon.C | 326 +
benchmarks/benchmark-fgemm-mp.C | 265 +
benchmarks/benchmark-fgemm.C | 248 +
benchmarks/benchmark-fgemv-mp.C | 189 +
benchmarks/benchmark-fspmm.C | 199 +
benchmarks/benchmark-fspmv.C | 194 +
benchmarks/benchmark-ftrsm-mp.C | 105 +
benchmarks/benchmark-ftrsm.C | 151 +
.../benchmark-ftrtri.C | 48 +-
.../benchmark-inverse.C | 51 +-
benchmarks/benchmark-lqup-mp.C | 102 +
.../check-lqup.C => benchmarks/benchmark-lqup.C | 61 +-
benchmarks/benchmark-pfspmv.C | 169 +
benchmarks/benchmark-pluq.C | 259 +
benchmarks/benchmark-wino.C | 178 +
benchmarks/files/mat1916-1916x1916-195985.smf.gz | Bin 0 -> 541753 bytes
benchmarks/files/mat1916-1916x1916-195985.sms.gz | Bin 0 -> 541762 bytes
benchmarks/perfpublisher.sh | 169 +
build-aux/config.guess | 1526 --
build-aux/config.sub | 1757 --
build-aux/install-sh | 520 -
build-aux/ltmain.sh | 9655 ---------
build-aux/missing | 376 -
config.h.in | 134 -
configure | 20727 -------------------
configure.ac | 263 +-
doc/Doxyfile | 2 +-
doc/DoxyfileDev | 0
doc/Makefile.am | 2 +-
doc/Makefile.in | 493 -
doc/fflas-ffpack.html | 4 +-
doc/mainpage.doxy | 15 +-
examples/2x2-fgemm.C | 77 +
examples/Makefile.am | 58 +
fflas-ffpack-config.in | 35 +-
fflas-ffpack.pc.in | 14 +
fflas-ffpack/Makefile.am | 8 +-
fflas-ffpack/Makefile.in | 702 -
fflas-ffpack/config-blas.h | 161 +-
fflas-ffpack/fflas-ffpack-config.h | 327 +-
fflas-ffpack/fflas-ffpack-configuration.h | 45 -
fflas-ffpack/fflas-ffpack-optimise.h | 12 -
fflas-ffpack/fflas-ffpack.doxy | 34 +
fflas-ffpack/fflas-ffpack.h | 4 +-
fflas-ffpack/fflas/Makefile.am | 39 +-
fflas-ffpack/fflas/Makefile.in | 563 -
fflas-ffpack/fflas/fflas.doxy | 41 +
fflas-ffpack/fflas/fflas.h | 1239 +-
fflas-ffpack/fflas/fflas_bounds.inl | 521 +-
fflas-ffpack/fflas/fflas_enum.h | 103 +
fflas-ffpack/fflas/fflas_fadd.h | 310 +
fflas-ffpack/fflas/fflas_fadd.inl | 345 +
fflas-ffpack/fflas/fflas_faddm.inl | 288 -
fflas-ffpack/fflas/fflas_faddm_src.inl | 240 -
fflas-ffpack/fflas/fflas_faddmin_src.inl | 211 -
.../{fflas-ffpack.h => fflas/fflas_fassign.h} | 26 +-
fflas-ffpack/fflas/fflas_fassign.inl | 171 +
fflas-ffpack/fflas/fflas_faxpy.inl | 74 +-
fflas-ffpack/fflas/fflas_fcopy.inl | 82 -
fflas-ffpack/fflas/fflas_fdot.inl | 36 +-
fflas-ffpack/fflas/fflas_fgemm.inl | 2086 +-
.../fflas/fflas_fgemm}/Makefile.am | 25 +-
fflas-ffpack/fflas/fflas_fgemm/fgemm_classical.inl | 312 +
.../fflas/fflas_fgemm/fgemm_classical_mp.inl | 480 +
fflas-ffpack/fflas/fflas_fgemm/fgemm_winograd.inl | 546 +
fflas-ffpack/fflas/fflas_fgemm/matmul.doxy | 34 +
fflas-ffpack/fflas/fflas_fgemm/schedule_bini.inl | 110 +
.../fflas/fflas_fgemm/schedule_winograd.inl | 549 +
.../fflas/fflas_fgemm/schedule_winograd_acc.inl | 644 +
.../fflas/fflas_fgemm/schedule_winograd_acc_ip.inl | 425 +
.../fflas/fflas_fgemm/schedule_winograd_ip.inl | 366 +
fflas-ffpack/fflas/fflas_fgemv.inl | 677 +-
fflas-ffpack/fflas/fflas_fgemv_mp.inl | 125 +
fflas-ffpack/fflas/fflas_fger.inl | 269 +-
fflas-ffpack/fflas/fflas_fger_mp.inl | 97 +
fflas-ffpack/fflas/fflas_freduce.h | 182 +
fflas-ffpack/fflas/fflas_freduce.inl | 794 +
fflas-ffpack/fflas/fflas_freduce_mp.inl | 67 +
fflas-ffpack/fflas/fflas_freivalds.inl | 121 +
.../{fflas-ffpack.h => fflas/fflas_fscal.h} | 26 +-
fflas-ffpack/fflas/fflas_fscal.inl | 418 +
fflas-ffpack/fflas/fflas_fscal_mp.inl | 131 +
fflas-ffpack/fflas/fflas_ftrmm.inl | 17 +-
fflas-ffpack/fflas/fflas_ftrmm_src.inl | 50 +-
fflas-ffpack/fflas/fflas_ftrsm.inl | 120 +-
fflas-ffpack/fflas/fflas_ftrsm_mp.inl | 357 +
fflas-ffpack/fflas/fflas_ftrsm_src.inl | 271 +-
fflas-ffpack/fflas/fflas_ftrsv.inl | 24 +-
fflas-ffpack/fflas/fflas_helpers.inl | 368 +
.../fflas/fflas_igemm}/Makefile.am | 22 +-
fflas-ffpack/fflas/fflas_igemm/igemm.doxy | 36 +
fflas-ffpack/fflas/fflas_igemm/igemm.h | 96 +
fflas-ffpack/fflas/fflas_igemm/igemm.inl | 194 +
fflas-ffpack/fflas/fflas_igemm/igemm_kernels.h | 100 +
fflas-ffpack/fflas/fflas_igemm/igemm_kernels.inl | 578 +
fflas-ffpack/fflas/fflas_igemm/igemm_tools.h | 62 +
fflas-ffpack/fflas/fflas_igemm/igemm_tools.inl | 167 +
fflas-ffpack/fflas/fflas_level1.inl | 431 +
fflas-ffpack/fflas/fflas_level2.inl | 516 +
fflas-ffpack/fflas/fflas_level3.inl | 399 +
fflas-ffpack/fflas/fflas_pfgemm.inl | 93 +
fflas-ffpack/fflas/fflas_pftrsm.inl | 164 +
fflas-ffpack/fflas/fflas_simd.h | 357 +
.../fflas/fflas_simd}/Makefile.am | 35 +-
fflas-ffpack/fflas/fflas_simd/simd.doxy | 35 +
.../fflas_simd/simd128.inl} | 51 +-
fflas-ffpack/fflas/fflas_simd/simd128_double.inl | 354 +
fflas-ffpack/fflas/fflas_simd/simd128_float.inl | 373 +
fflas-ffpack/fflas/fflas_simd/simd128_int16.inl | 425 +
fflas-ffpack/fflas/fflas_simd/simd128_int32.inl | 455 +
fflas-ffpack/fflas/fflas_simd/simd128_int64.inl | 493 +
.../fflas_simd/simd256.inl} | 53 +-
fflas-ffpack/fflas/fflas_simd/simd256_double.inl | 384 +
fflas-ffpack/fflas/fflas_simd/simd256_float.inl | 406 +
fflas-ffpack/fflas/fflas_simd/simd256_int16.inl | 516 +
fflas-ffpack/fflas/fflas_simd/simd256_int32.inl | 484 +
fflas-ffpack/fflas/fflas_simd/simd256_int64.inl | 520 +
fflas-ffpack/fflas/fflas_simd/simd_modular.inl | 179 +
fflas-ffpack/fflas/fflas_sparse.h | 455 +
fflas-ffpack/fflas/fflas_sparse.inl | 892 +
.../fflas/fflas_sparse}/Makefile.am | 27 +-
fflas-ffpack/fflas/fflas_sparse/coo.h | 83 +
.../fflas/fflas_sparse/coo}/Makefile.am | 16 +-
fflas-ffpack/fflas/fflas_sparse/coo/coo_spmm.inl | 338 +
fflas-ffpack/fflas/fflas_sparse/coo/coo_spmv.inl | 231 +
fflas-ffpack/fflas/fflas_sparse/coo/coo_utils.inl | 95 +
fflas-ffpack/fflas/fflas_sparse/csr.h | 93 +
.../fflas/fflas_sparse/csr}/Makefile.am | 18 +-
fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmm.inl | 939 +
fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmv.inl | 429 +
fflas-ffpack/fflas/fflas_sparse/csr/csr_spmm.inl | 611 +
fflas-ffpack/fflas/fflas_sparse/csr/csr_spmv.inl | 330 +
fflas-ffpack/fflas/fflas_sparse/csr/csr_utils.inl | 251 +
fflas-ffpack/fflas/fflas_sparse/csr_hyb.h | 73 +
.../fflas/fflas_sparse/csr_hyb}/Makefile.am | 18 +-
.../fflas/fflas_sparse/csr_hyb/csr_hyb_pspmm.inl | 703 +
.../fflas/fflas_sparse/csr_hyb/csr_hyb_pspmv.inl | 217 +
.../fflas/fflas_sparse/csr_hyb/csr_hyb_spmm.inl | 317 +
.../fflas/fflas_sparse/csr_hyb/csr_hyb_spmv.inl | 131 +
.../fflas/fflas_sparse/csr_hyb/csr_hyb_utils.inl | 214 +
fflas-ffpack/fflas/fflas_sparse/ell.h | 90 +
.../fflas/fflas_sparse/ell}/Makefile.am | 18 +-
fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmm.inl | 697 +
fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmv.inl | 401 +
fflas-ffpack/fflas/fflas_sparse/ell/ell_spmm.inl | 567 +
fflas-ffpack/fflas/fflas_sparse/ell/ell_spmv.inl | 264 +
fflas-ffpack/fflas/fflas_sparse/ell/ell_utils.inl | 116 +
fflas-ffpack/fflas/fflas_sparse/ell_r.h | 84 +
.../fflas/fflas_sparse/ell_r}/Makefile.am | 18 +-
.../fflas/fflas_sparse/ell_r/ell_r_spmv.inl | 319 +
fflas-ffpack/fflas/fflas_sparse/ell_simd.h | 87 +
.../fflas/fflas_sparse/ell_simd}/Makefile.am | 16 +-
.../fflas/fflas_sparse/ell_simd/ell_simd_pspmv.inl | 628 +
.../fflas/fflas_sparse/ell_simd/ell_simd_spmv.inl | 368 +
.../fflas/fflas_sparse/ell_simd/ell_simd_utils.inl | 160 +
fflas-ffpack/fflas/fflas_sparse/hyb_zo.h | 65 +
.../fflas/fflas_sparse/hyb_zo}/Makefile.am | 19 +-
.../fflas/fflas_sparse/hyb_zo/hyb_zo_pspmm.inl | 141 +
.../fflas/fflas_sparse/hyb_zo/hyb_zo_pspmv.inl | 72 +
.../fflas/fflas_sparse/hyb_zo/hyb_zo_spmm.inl | 140 +
.../fflas/fflas_sparse/hyb_zo/hyb_zo_spmv.inl | 72 +
.../fflas/fflas_sparse/hyb_zo/hyb_zo_utils.inl | 136 +
fflas-ffpack/fflas/fflas_sparse/read_sparse.h | 460 +
fflas-ffpack/fflas/fflas_sparse/sell.h | 74 +
.../fflas/fflas_sparse/sell}/Makefile.am | 16 +-
.../fflas/fflas_sparse/sell/sell_pspmv.inl | 681 +
fflas-ffpack/fflas/fflas_sparse/sell/sell_spmv.inl | 396 +
.../fflas/fflas_sparse/sell/sell_utils.inl | 281 +
.../fflas/fflas_sparse/sparse_matrix_traits.h | 338 +
fflas-ffpack/fflas/fflas_sparse/utils.h | 130 +
fflas-ffpack/ffpack/Makefile.am | 18 +-
fflas-ffpack/ffpack/Makefile.in | 559 -
fflas-ffpack/ffpack/ffpack.doxy | 31 +
fflas-ffpack/ffpack/ffpack.h | 2338 +--
fflas-ffpack/ffpack/ffpack.inl | 401 +
fflas-ffpack/ffpack/ffpack_charpoly.inl | 259 +-
fflas-ffpack/ffpack/ffpack_charpoly_danilevski.inl | 29 +-
fflas-ffpack/ffpack/ffpack_charpoly_kgfast.inl | 126 +-
.../ffpack/ffpack_charpoly_kgfastgeneralized.inl | 168 +-
fflas-ffpack/ffpack/ffpack_charpoly_kglu.inl | 70 +-
fflas-ffpack/ffpack/ffpack_echelonforms.inl | 606 +-
fflas-ffpack/ffpack/ffpack_fgesv.inl | 93 +
fflas-ffpack/ffpack/ffpack_fgetrs.inl | 271 +
fflas-ffpack/ffpack/ffpack_frobenius.inl | 309 +-
fflas-ffpack/ffpack/ffpack_ftrtr.inl | 105 +
fflas-ffpack/ffpack/ffpack_invert.inl | 139 +
fflas-ffpack/ffpack/ffpack_krylovelim.inl | 57 +-
fflas-ffpack/ffpack/ffpack_ludivine.inl | 1051 +-
fflas-ffpack/ffpack/ffpack_ludivine_mp.inl | 136 +
fflas-ffpack/ffpack/ffpack_minpoly.inl | 28 +-
fflas-ffpack/ffpack/ffpack_minpoly_construct.inl | 29 +-
fflas-ffpack/ffpack/ffpack_permutation.inl | 541 +
fflas-ffpack/ffpack/ffpack_pluq.inl | 632 +
fflas-ffpack/ffpack/ffpack_pluq_mp.inl | 130 +
fflas-ffpack/ffpack/ffpack_ppluq.inl | 407 +
fflas-ffpack/ffpack/ffpack_rankprofiles.inl | 278 +
fflas-ffpack/field/Makefile.am | 30 +-
fflas-ffpack/field/Makefile.in | 561 -
fflas-ffpack/field/field-traits.h | 348 +
fflas-ffpack/field/field.doxy | 36 +
fflas-ffpack/field/modular-balanced-double.h | 447 -
fflas-ffpack/field/modular-balanced-float.h | 486 -
fflas-ffpack/field/modular-balanced-int32.h | 473 -
fflas-ffpack/field/modular-balanced-int64.h | 486 -
fflas-ffpack/field/modular-balanced.h | 61 -
fflas-ffpack/field/modular-double.h | 416 -
fflas-ffpack/field/modular-extended.h | 333 +
fflas-ffpack/field/modular-float.h | 421 -
fflas-ffpack/field/modular-int32.h | 546 -
fflas-ffpack/field/modular-int64.h | 482 -
fflas-ffpack/field/modular-randiter.h | 91 -
fflas-ffpack/field/nonzero-randiter.h | 92 -
fflas-ffpack/field/rns-double-elt.h | 143 +
fflas-ffpack/field/rns-double.h | 421 +
fflas-ffpack/field/rns-double.inl | 568 +
fflas-ffpack/field/rns-integer-mod.h | 862 +
fflas-ffpack/field/rns-integer.h | 179 +
fflas-ffpack/{fflas-ffpack.h => field/rns.h} | 30 +-
fflas-ffpack/{fflas-ffpack.h => field/rns.inl} | 25 +-
fflas-ffpack/field/unparametric.h | 362 -
.../html => fflas-ffpack/interfaces}/Makefile.am | 18 +-
fflas-ffpack/interfaces/interfaces.doxy | 32 +
fflas-ffpack/interfaces/libs/Makefile.am | 88 +
fflas-ffpack/interfaces/libs/c_libs.doxy | 38 +
fflas-ffpack/interfaces/libs/fflas_L1_inst.C | 66 +
fflas-ffpack/interfaces/libs/fflas_L1_inst.h | 64 +
.../interfaces/libs/fflas_L1_inst_implem.inl | 369 +
fflas-ffpack/interfaces/libs/fflas_L2_inst.C | 66 +
fflas-ffpack/interfaces/libs/fflas_L2_inst.h | 64 +
.../interfaces/libs/fflas_L2_inst_implem.inl | 464 +
fflas-ffpack/interfaces/libs/fflas_L3_inst.C | 65 +
fflas-ffpack/interfaces/libs/fflas_L3_inst.h | 64 +
.../interfaces/libs/fflas_L3_inst_implem.inl | 199 +
fflas-ffpack/interfaces/libs/fflas_c.h | 461 +
fflas-ffpack/interfaces/libs/fflas_lvl1.C | 338 +
fflas-ffpack/interfaces/libs/fflas_lvl2.C | 382 +
fflas-ffpack/interfaces/libs/fflas_lvl3.C | 143 +
.../libs/fflas_sparse.C} | 32 +-
fflas-ffpack/interfaces/libs/ffpack.C | 1209 ++
fflas-ffpack/interfaces/libs/ffpack_c.h | 707 +
.../interfaces/libs/ffpack_compiled_spec.inl | 49 +
fflas-ffpack/interfaces/libs/ffpack_inst.C | 72 +
fflas-ffpack/interfaces/libs/ffpack_inst.h | 71 +
.../interfaces/libs/ffpack_inst_implem.inl | 396 +
.../src => fflas-ffpack/paladin}/Makefile.am | 14 +-
fflas-ffpack/paladin/blockcuts.inl | 481 +
fflas-ffpack/paladin/fflas_pfinit.h | 87 +
fflas-ffpack/paladin/kaapi_routines.inl | 192 +
fflas-ffpack/paladin/parallel.h | 566 +
fflas-ffpack/paladin/pfgemm_variants.inl | 486 +
fflas-ffpack/utils/Makefile.am | 27 +-
fflas-ffpack/utils/Makefile.in | 549 -
fflas-ffpack/utils/Matio.h | 262 +-
fflas-ffpack/utils/align-allocator.h | 255 +
fflas-ffpack/utils/args-parser.h | 110 +-
fflas-ffpack/utils/bit_manipulation.h | 173 +
fflas-ffpack/{fflas-ffpack.h => utils/cast.h} | 33 +-
fflas-ffpack/utils/debug.h | 106 +-
fflas-ffpack/utils/fflas_intrinsic.h | 49 +
fflas-ffpack/utils/fflas_memory.h | 383 +
fflas-ffpack/utils/fflas_randommatrix.h | 330 +
fflas-ffpack/utils/flimits.h | 193 +
fflas-ffpack/utils/print-utils.h | 73 +-
fflas-ffpack/utils/timer.C | 218 -
fflas-ffpack/utils/timer.h | 164 +-
incremente-versions | 135 +
{fflas-ffpack => macros/CodeChunk}/Makefile.am | 21 +-
macros/CodeChunk/avx.C | 11 +
.../fflas-ffpack.h => macros/CodeChunk/cblas.C | 36 +-
.../fflas-ffpack.h => macros/CodeChunk/clapack.C | 36 +-
macros/CodeChunk/cuda.C | 9 +
.../fflas-ffpack.h => macros/CodeChunk/givaro.C | 29 +-
macros/CodeChunk/gmp.C | 6 +
.../fflas-ffpack.h => macros/CodeChunk/lapack.C | 36 +-
macros/CodeChunk/sse.C | 12 +
macros/Makefile.am | 15 +-
macros/Makefile.in | 454 -
macros/aclocal-include.m4 | 2 +-
macros/avx-check.m4 | 123 +
macros/ax_cxx_compile_stdcxx_11.m4 | 133 +
macros/blas-check.m4 | 176 -
macros/blasATLAS-check.m4 | 268 -
macros/blasGOTO-check.m4 | 145 -
macros/blasGSL-check.m4 | 141 -
macros/blasOTHER-check.m4 | 165 -
macros/cuda-check.m4 | 123 +
macros/debug.m4 | 59 +-
macros/fflas-ffpack-blas.m4 | 181 +
macros/fflas-ffpack-doc.m4 | 2 +-
macros/fflas-ffpack-misc.m4 | 2 +-
macros/fflas-ffpack-opt.m4 | 78 +-
...local-include.m4 => fflas-ffpack-precompile.m4} | 35 +-
macros/givaro-check.m4 | 62 +-
macros/gmp-check.m4 | 155 -
macros/lapack-check.m4 | 318 -
macros/libtool.m4 | 7995 -------
macros/ltoptions.m4 | 384 -
macros/ltsugar.m4 | 123 -
macros/ltversion.m4 | 23 -
macros/lt~obsolete.m4 | 98 -
macros/{aclocal-include.m4 => mkl-check.m4} | 40 +-
macros/omp-check.m4 | 73 +
macros/sse2-check.m4 | 67 +
optimiser/Makefile.am | 2 +-
optimiser/Makefile.in | 439 -
optimiser/winograd.C | 137 +-
tests/Makefile.am | 100 +-
tests/Makefile.in | 869 -
tests/Makefile.template | 48 +
tests/benchfgemm.C | 100 +
tests/benchlqup.C | 87 +
{benchmark => tests/data}/Makefile.am | 12 +-
tests/data/mat11.sms | 35 +
.../field-general.h => tests/dense_generator.C | 76 +-
tests/perfpublisher.sh | 158 +
tests/regression-check.C | 88 +
tests/test-bini-p.C | 2457 +++
tests/test-charpoly.C | 159 +
tests/test-colechelon.C | 203 +
tests/test-compressQ.C | 50 +-
tests/test-det.C | 138 +
tests/test-echelon.C | 433 +
tests/test-echelon_old.C | 204 +
tests/test-fadd.C | 507 +
tests/test-fgemm.C | 423 +
tests/test-fgemv.C | 153 +
tests/test-fger.C | 300 +
tests/test-fgesv.C | 196 +
tests/test-finit.C | 229 +
tests/{test-compressQ.C => test-frobenius.C} | 115 +-
tests/test-fscal.C | 332 +
tests/test-fspmm-dlp.C | 353 +
tests/test-fspmm-recint.C | 177 +
tests/test-fsquare.C | 102 +
tests/test-ftrsm.C | 231 +
tests/test-ftrtri.C | 131 +
tests/test-fullranksubmatrix.C | 82 +
tests/test-igemm.C | 328 +
tests/test-interfaces-c.c | 20 +
tests/test-invert.C | 120 +
tests/{test-compressQ.C => test-krylov-elim.C} | 135 +-
tests/test-lqup.C | 1004 -
tests/test-lu.C | 1062 +
.../fflas-ffpack.h => tests/test-matrix-io.h | 25 +-
tests/test-multifile1.C | 5 +
tests/test-multifile2.C | 7 +
tests/test-nullspace.C | 130 +
tests/test-paladin-splitter.C | 268 +
tests/test-paladin-task.C | 160 +
tests/test-pfgemm-DSL.C | 209 +
tests/test-pluq.C | 272 +
tests/test-ppluq.C | 313 +
tests/test-rank.C | 91 +
tests/test-rankprofiles.C | 213 +
tests/test-redcolechelon.C | 207 +
tests/test-redechelon.C | 207 +
tests/test-redrowechelon.C | 210 +
tests/test-rowechelon.C | 206 +
tests/test-simd.C | 370 +
tests/test-sparse.C | 393 +
tests/test-utils.h | 165 +-
tests/testeur_fgemm.C | 269 +
tests/testeur_ftrsm.C | 233 +
tests/testeur_lqup.C | 326 +
utils/Makefile.am | 30 -
utils/Makefile.in | 549 -
utils/Matio.h | 291 -
utils/args-parser.h | 336 -
utils/debug.h | 220 -
utils/print-utils.h | 138 -
utils/timer.C | 218 -
utils/timer.h | 194 -
426 files changed, 66655 insertions(+), 75512 deletions(-)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1eff7bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,143 @@
+Makefile
+Makefile.in
+aclocal.m4
+autogen.status
+autom4te.cache
+benchmarks/Makefile
+benchmarks/Makefile.in
+build-aux
+config.h
+config.h.in
+config.log
+config.status
+configure
+doc/Makefile
+doc/Makefile.in
+fflas-ffpack-config
+fflas-ffpack.pc
+fflas-ffpack/Makefile
+fflas-ffpack/Makefile.in
+fflas-ffpack/config.h
+fflas-ffpack/fflas-ffpack-optimise.h
+fflas-ffpack/fflas/Makefile
+fflas-ffpack/fflas/Makefile.in
+fflas-ffpack/fflas/fflas_fgemm/Makefile
+fflas-ffpack/fflas/fflas_fgemm/Makefile.in
+fflas-ffpack/fflas/fflas_igemm/Makefile
+fflas-ffpack/fflas/fflas_igemm/Makefile.in
+fflas-ffpack/fflas/fflas_simd/Makefile
+fflas-ffpack/fflas/fflas_simd/Makefile.in
+fflas-ffpack/fflas/fflas_sparse/Makefile
+fflas-ffpack/fflas/fflas_sparse/Makefile.in
+fflas-ffpack/fflas/fflas_sparse/coo/Makefile
+fflas-ffpack/fflas/fflas_sparse/coo/Makefile.in
+fflas-ffpack/fflas/fflas_sparse/csr/Makefile
+fflas-ffpack/fflas/fflas_sparse/csr/Makefile.in
+fflas-ffpack/fflas/fflas_sparse/csr_hyb/Makefile
+fflas-ffpack/fflas/fflas_sparse/csr_hyb/Makefile.in
+fflas-ffpack/fflas/fflas_sparse/ell/Makefile
+fflas-ffpack/fflas/fflas_sparse/ell/Makefile.in
+fflas-ffpack/fflas/fflas_sparse/ell_simd/Makefile
+fflas-ffpack/fflas/fflas_sparse/ell_simd/Makefile.in
+fflas-ffpack/fflas/fflas_sparse/hyb_zo/Makefile
+fflas-ffpack/fflas/fflas_sparse/hyb_zo/Makefile.in
+fflas-ffpack/fflas/fflas_sparse/sell/Makefile
+fflas-ffpack/fflas/fflas_sparse/sell/Makefile.in
+fflas-ffpack/ffpack/Makefile
+fflas-ffpack/ffpack/Makefile.in
+fflas-ffpack/field/Makefile
+fflas-ffpack/field/Makefile.in
+fflas-ffpack/interfaces/Makefile
+fflas-ffpack/interfaces/Makefile.in
+fflas-ffpack/interfaces/libs/Makefile
+fflas-ffpack/interfaces/libs/Makefile.in
+fflas-ffpack/paladin/Makefile
+fflas-ffpack/paladin/Makefile.in
+fflas-ffpack/utils/Makefile
+fflas-ffpack/utils/Makefile.in
+interfaces/
+libtool
+macros/CodeChunk/Makefile
+macros/CodeChunk/Makefile.in
+macros/Makefile
+macros/Makefile.in
+macros/libtool.m4
+macros/ltoptions.m4
+macros/ltsugar.m4
+macros/ltversion.m4
+macros/lt~obsolete.m4
+optim.log
+optimiser/Makefile
+optimiser/Makefile.in
+stamp-h1
+tests/Makefile
+tests/Makefile.in
+tests/data/Makefile
+tests/data/Makefile.in
+benchmarks/benchmark-fgemm
+benchmarks/benchmark-fgemm.o
+benchmarks/benchmark-pluq
+benchmarks/benchmark-pluq.o
+tests/regression-check
+tests/regression-check.log
+tests/regression-check.o
+tests/regression-check.trs
+tests/test-compressQ
+tests/test-compressQ.log
+tests/test-compressQ.o
+tests/test-compressQ.trs
+tests/test-det
+tests/test-det.log
+tests/test-det.o
+tests/test-det.trs
+tests/test-echelon
+tests/test-echelon.o
+tests/test-fadd
+tests/test-fadd.log
+tests/test-fadd.o
+tests/test-fadd.trs
+tests/test-fgemm
+tests/test-fgemm.log
+tests/test-fgemm.o
+tests/test-fgemm.trs
+tests/test-fger
+tests/test-fger.log
+tests/test-fger.o
+tests/test-fger.trs
+tests/test-finit
+tests/test-finit.log
+tests/test-finit.o
+tests/test-finit.trs
+tests/test-fscal
+tests/test-fscal.log
+tests/test-fscal.o
+tests/test-fscal.trs
+tests/test-ftrsm
+tests/test-ftrsm.o
+tests/test-lu
+tests/test-lu.o
+tests/test-multifile
+tests/test-multifile.log
+tests/test-multifile.trs
+tests/test-multifile1.o
+tests/test-multifile2.o
+tests/test-rankprofiles
+tests/test-rankprofiles.log
+tests/test-rankprofiles.o
+tests/test-rankprofiles.trs
+benchmarks/benchmark-charpoly.o
+benchmarks/benchmark-dgemm.o
+benchmarks/benchmark-dgetrf.o
+benchmarks/benchmark-dgetri.o
+benchmarks/benchmark-dtrsm.o
+benchmarks/benchmark-dtrtri.o
+benchmarks/benchmark-fgemm-mp.o
+benchmarks/benchmark-ftrsm
+benchmarks/benchmark-ftrsm-mp.o
+benchmarks/benchmark-ftrsm.o
+benchmarks/benchmark-ftrtri.o
+benchmarks/benchmark-inverse.o
+benchmarks/benchmark-lqup-mp.o
+benchmarks/benchmark-lqup.o
+benchmarks/benchmark-wino.o
+benchmarks/benchmark_sgemm-benchmark-dgemm.o
diff --git a/AUTHORS b/AUTHORS
index 9ef2bb1..fd87c66 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,4 +1,9 @@
-Jean-Guillaume Dumas <Jean-Guillaume.Dumas at imag.fr>
-Pascal Giorgi <pascal.giorgi at univ-perp.fr>
-Cl�ment Pernet <Clement.Pernet at imag.fr>
-
+François Bissey
+Brice Boyer <boyer.brice at gmail.com>
+Alexis Breust <alexis.breust at imag.fr>
+Jean-Guillaume Dumas <jean-guillaume.dumas at imag.fr>
+Pascal Giorgi <pascal.giorgi at lirmm.fr>
+Gavin Harisson
+Clément Pernet <clement.pernet at imag.fr>
+Ziad Sultan <ziad.sultan at imag.fr>
+Bastien Vialla <bastien.vialla at lirmm.fr>
diff --git a/ChangeLog b/ChangeLog
index 1393001..4f35a1b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,93 @@
-2011-15-04 v1.4.0
- * Convert project to autotools (à la LinBox et Givaro)
+2016-04-08 v2.2.1
+ * many fixes to the build system
+ * more consistent use of flags and dependency to precompiled code
+ * fixes all remaining issues for the integration in SageMath
+ * numerous minor fixes to the parallel code
+
+2016-02-23 v2.2.0
+ * new precompiled interface
+ * improvements and API change for the parallel code
+ * new random matrix generators
+ * fix many bugs
+
+2015-06-11 v2.1.0
+
+ Test suite and benchmark improvement :
+ * much larger coverage
+ * run most tests over a wide range of fields
+ * systematic interface and options
+
+ New features:
+ * parallel PLUQ
+ * computation of rank profiles and rank profile matrices
+ * echelon and reduced echelon forms form both LUdivine and PLUQ
+ * getters to the forms and the transformation matrices
+ * igemm routine for BLAS like gemm on 64bits ints
+ * support of Modular<int64_t> and ModularBalanced<int64_t> using igemm,
+ to support fields of bitsize between 25 and 31
+ * support of Modular<rint<K> > for Z/pZ with p of size > 32bits (based
+ on Givaro's RecInt multiprecision integers)
+ * support of RNS based gaussian elimination on multiprecision fields
+ * Paladin: DSL for parallel programming adressing OMP, TBB and Kaapi
+
+ Improvements:
+ * a lot of new sparse mat-vec product improvements
+ * faster parallel and sequential fgemm
+ * many bugs found and removed (no known bugs at release time)
+ * improved helper system, with mode of operations
+
+2014-08-08 v2.0.0
+ code update :
+ * rank profile
+ * clean namespaces
+ * use field one, zero, etc
+ * fix clang warnings
+ * more blas wrappers (sger, sdot, copy, etc)
+ * simplification of fgemm
+ * simplify blas detection (+cflags)
+ * easier permutation handling
+ * improve testers
+ * use std::min, max
+ * many functions have API change to use last pointer argument for return
+ * some more doc
+ * and probably many more in 2+ years !
+
+ bugs :
+ * correct permutations
+ * fix fgemm, fgemv, ftrmm, ftrsm bugs
+ * mem leaks
+ * bugs for degenerate cases
+ * fix bounds
+ * and probably many more in 2+ years !
+
+ new features :
+ * new pluq 2x2 recursive alg
+ * leftlooking
+ * parallel OMP fgemm, ftrmm, ftrsm
+ * parallel KAAPI fgemm, ftrmm, ftrsm
+ * new testers for pluq, fgemm, etc
+ * new tester for Bini approximate formula
+ * fadd, fsub, finit, fscal, etc
+ * vectorisation using AVX(2)
+ * in place schedules
+ * new Echelon code
+ * helper design for fgemm, fgemv, etc
+ * template factorisation for modular/multiprecision fields
+ * helper traits
+ * automatic matrix field conversion (ie double -> float)
+ * add spmv kernels
+ * enable use of sparse MKL
+ * parallel.h, avx and simd files
+ * new DSL for parallelism
+ * RNS and multiprecision fields
+ * new const_cast, fflas_new etc functions
+ * element_ptr in fields
+ * use Givaro dependency (compulsory now)
+ * new test for regressions (with tickets)
+ * and probably many more in 2+ years !
+
+2011-04-15 v1.4.0
+ * Convert project to autotools (à la LinBox et Givaro)
2008-06-05 v1.3.3
* fix the design of specializations to modular<double> modular<float>
* give a proper name to ModularBalanced
diff --git a/INSTALL b/INSTALL
index c9bcdfb..f62c89d 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,11 +1,377 @@
-If you have blas available :
- make ; make install ;
-should install FFLAS-FFPACK.
+Installation Instructions
+*************************
-If you need to compile tests, you'll probably need Givaro too.
+Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software Foundation,
+Inc.
-The test directory is under big reconstruction, and so is the benchmark/.
+ Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved. This file is offered as-is,
+without warranty of any kind.
-Any help in documenting, coding is wellcome !
+Requirements
+============
- -- The FFLAS-FFPACK team.
+ * The FFLAS-FFPACK library requires a version of GNU C++ compiler great or equal to 4.7. Other recent compilers such as clang++ or pathcc are also supported.
+ * the Givaro library v4.0.1 or later: https://github.com/linbox-team/givaro (itself depending on GNU GMP)
+ * a BLAS library: OpenBLAS or ATLAS (recommended) or any other implementation of the Fortran or C blas interface.
+
+Basic Installation
+==================
+
+ Briefly, the shell command `./configure && make && make install'
+should configure, build, and install this package. The following
+more-detailed instructions are generic; see the `README' file for
+instructions specific to this package. Some packages provide this
+`INSTALL' file but do not implement all of the features documented
+below. The lack of an optional feature in a given package is not
+necessarily a bug. More recommendations for GNU packages can be found
+in *note Makefile Conventions: (standards)Makefile Conventions.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+ It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring. Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+ The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'. You need `configure.ac' if
+you want to change it or regenerate `configure' using a newer version
+of `autoconf'.
+
+ The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system.
+
+ Running `configure' might take a while. While running, it prints
+ some messages telling which features it is checking for.
+
+ 2. Type `make ; make install' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package, generally using the just-built uninstalled binaries.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation. When installing into a prefix owned by root, it is
+ recommended that the package be configured and built as a regular
+ user, and only the `make install' phase executed with root
+ privileges.
+
+ 5. Optionally, type `make installcheck' to repeat any self-tests, but
+ this time using the binaries in their final installed location.
+ This target does not install anything. Running this target as a
+ regular user, particularly if the prior `make install' required
+ root privileges, verifies that the installation completed
+ correctly.
+
+ 6. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+ 7. Often, you can also type `make uninstall' to remove the installed
+ files again. In practice, not all packages have tested that
+ uninstallation works correctly, even though it is required by the
+ GNU Coding Standards.
+
+ 8. Some packages, particularly those that use Automake, provide `make
+ distcheck', which can by used by developers to test that all other
+ targets like `make install' and `make uninstall' work correctly.
+ This target is generally not run by end users.
+
+Compilers and Options
+=====================
+
+ Some systems require unusual options for compilation or linking that
+the `configure' script does not know about. Run `./configure --help'
+for details on some of the pertinent environment variables.
+
+ You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment. Here
+is an example:
+
+ ./configure CC=c99 CFLAGS=-g LIBS=-lposix
+
+ *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+ You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you can use GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'. This
+is known as a "VPATH" build.
+
+ With a non-GNU `make', it is safer to compile the package for one
+architecture at a time in the source code directory. After you have
+installed the package for one architecture, use `make distclean' before
+reconfiguring for another architecture.
+
+ On MacOS X 10.5 and later systems, you can create libraries and
+executables that work on multiple system types--known as "fat" or
+"universal" binaries--by specifying multiple `-arch' options to the
+compiler but only a single `-arch' option to the preprocessor. Like
+this:
+
+ ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
+ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
+ CPP="gcc -E" CXXCPP="g++ -E"
+
+ This is not guaranteed to produce working output in all cases, you
+may have to build one architecture at a time and combine the results
+using the `lipo' tool if you have problems.
+
+Installation Names
+==================
+
+ By default, `make install' installs the package's commands under
+`/usr/local/bin', include files under `/usr/local/include', etc. You
+can specify an installation prefix other than `/usr/local' by giving
+`configure' the option `--prefix=PREFIX', where PREFIX must be an
+absolute file name.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+pass the option `--exec-prefix=PREFIX' to `configure', the package uses
+PREFIX as the prefix for installing programs and libraries.
+Documentation and other data files still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=DIR' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them. In general, the
+default for these options is expressed in terms of `${prefix}', so that
+specifying just `--prefix' will affect all of the other directory
+specifications that were not explicitly provided.
+
+ The most portable way to affect installation locations is to pass the
+correct locations to `configure'; however, many packages provide one or
+both of the following shortcuts of passing variable assignments to the
+`make install' command line to change installation locations without
+having to reconfigure or recompile.
+
+ The first method involves providing an override variable for each
+affected directory. For example, `make install
+prefix=/alternate/directory' will choose an alternate location for all
+directory configuration variables that were expressed in terms of
+`${prefix}'. Any directories that were specified during `configure',
+but not in terms of `${prefix}', must each be overridden at install
+time for the entire installation to be relocated. The approach of
+makefile variable overrides for each directory variable is required by
+the GNU Coding Standards, and ideally causes no recompilation.
+However, some platforms have known limitations with the semantics of
+shared libraries that end up requiring recompilation when using this
+method, particularly noticeable in packages that use GNU Libtool.
+
+ The second method involves providing the `DESTDIR' variable. For
+example, `make install DESTDIR=/alternate/directory' will prepend
+`/alternate/directory' before all installation names. The approach of
+`DESTDIR' overrides is not required by the GNU Coding Standards, and
+does not work on platforms that have drive letters. On the other hand,
+it does better at avoiding recompilation issues, and works well even
+when some directory options were not specified in terms of `${prefix}'
+at `configure' time.
+
+Optional Features
+=================
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+ Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+ Some packages offer the ability to configure how verbose the
+execution of `make' will be. For these packages, running `./configure
+--enable-silent-rules' sets the default to minimal output, which can be
+overridden with `make V=1'; while running `./configure
+--disable-silent-rules' sets the default to verbose, which can be
+overridden with `make V=0'.
+
+Particular systems
+==================
+
+ On HP-UX, the default C compiler is not ANSI C compatible. If GNU
+CC is not installed, it is recommended to use the following options in
+order to use an ANSI C compiler:
+
+ ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
+
+and if that doesn't work, install pre-built binaries of GCC for HP-UX.
+
+ HP-UX `make' updates targets which have the same time stamps as
+their prerequisites, which makes it generally unusable when shipped
+generated files such as `configure' are involved. Use GNU `make'
+instead.
+
+ On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
+parse its `<wchar.h>' header file. The option `-nodtk' can be used as
+a workaround. If GNU CC is not installed, it is therefore recommended
+to try
+
+ ./configure CC="cc"
+
+and if that doesn't work, try
+
+ ./configure CC="cc -nodtk"
+
+ On Solaris, don't put `/usr/ucb' early in your `PATH'. This
+directory contains several dysfunctional programs; working variants of
+these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
+in your `PATH', put it _after_ `/usr/bin'.
+
+ On Haiku, software installed for all users goes in `/boot/common',
+not `/usr/local'. It is recommended to use the following options:
+
+ ./configure --prefix=/boot/common
+
+Specifying the System Type
+==========================
+
+ There may be some features `configure' cannot figure out
+automatically, but needs to determine by the type of machine the package
+will run on. Usually, assuming the package is built to be run on the
+_same_ architectures, `configure' can figure that out, but if it prints
+a message saying it cannot guess the machine type, give it the
+`--build=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+ CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+ OS
+ KERNEL-OS
+
+ See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+ If you are _building_ compiler tools for cross-compiling, you should
+use the option `--target=TYPE' to select the type of system they will
+produce code for.
+
+ If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+ If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+ Variables not defined in a site shell script can be set in the
+environment passed to `configure'. However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost. In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'. For example:
+
+ ./configure CC=/usr/local2/bin/gcc
+
+causes the specified `gcc' to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+Unfortunately, this technique does not work for `CONFIG_SHELL' due to
+an Autoconf limitation. Until the limitation is lifted, you can use
+this workaround:
+
+ CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
+
+`configure' Invocation
+======================
+
+ `configure' recognizes the following options to control how it
+operates.
+
+`--help'
+`-h'
+ Print a summary of all of the options to `configure', and exit.
+
+`--help=short'
+`--help=recursive'
+ Print a summary of the options unique to this package's
+ `configure', and exit. The `short' variant lists options used
+ only in the top level, while the `recursive' variant lists options
+ also present in any nested packages.
+
+`--version'
+`-V'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`--cache-file=FILE'
+ Enable the cache: use and save the results of the tests in FILE,
+ traditionally `config.cache'. FILE defaults to `/dev/null' to
+ disable caching.
+
+`--config-cache'
+`-C'
+ Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made. To
+ suppress all normal output, redirect it to `/dev/null' (any error
+ messages will still be shown).
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`--prefix=DIR'
+ Use DIR as the installation prefix. *note Installation Names::
+ for more details, including other options available for fine-tuning
+ the installation locations.
+
+`--no-create'
+`-n'
+ Run the configure checks, but stop before creating any output
+ files.
+
+`configure' also accepts some other, not widely useful, options. Run
+`configure --help' for more details.
diff --git a/Makefile.am b/Makefile.am
index 68b88bb..81653d2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -24,44 +24,60 @@
ACLOCAL_AMFLAGS = -I macros
-SUBDIRS=fflas-ffpack tests doc benchmark macros utils optimiser
+SUBDIRS=fflas-ffpack tests doc benchmarks macros optimiser examples
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = fflas-ffpack.pc
+# include_HEADERS=fflas-ffpack-config.h
#!@todo add examples dir XXX
docs:doc/fflas-ffpack-html/index.html
doc/fflas-ffpack-html/index.html:
- (cd doc; make docs)
+ (cd doc; ${MAKE} docs)
docs_dev:doc/fflas-ffpack-dev-html/index.html
doc/fflas-ffpack-dev-html/index.html:
- (cd doc; make docs_dev)
+ (cd doc; ${MAKE} docs_dev)
-# examples:
-# (cd examples; make examples)
+perfpublisher: benchmarks/perfpublisher tests/perfpublisher
+
+benchmarks/perfpublisher:
+ (cd benchmarks; ${MAKE} perfpublisher)
+
+tests/perfpublisher:
+ (cd tests; ${MAKE} perfpublisher)
+
+examples:
+ (cd examples; ${MAKE} examples)
benchmarks:
- (cd benchmarks; make benchmarks)
+ (cd benchmarks; ${MAKE} benchmarks)
uninstall-hook:
- (test -d "$(includedir)/fflas-ffpack" && rmdir "$(bindir)" \
- "$(libdir)" \
- "$(mandir)/man1" \
- "$(mandir)" \
- "$(includedir)/fflas-ffpack/fflas" \
- "$(includedir)/fflas-ffpack/ffpack" \
- "$(includedir)/fflas-ffpack/field" \
- "$(includedir)/fflas-ffpack/utils" \
- "$(includedir)/fflas-ffpack/" \
- "$(datarootdir)/" \
- "$(includedir)") || true
+ (test -d "$(includedir)/fflas-ffpack" && rm -rf \
+ "$(mandir)/man1" "$(mandir)" \
+ "$(includedir)/fflas-ffpack/fflas" \
+ "$(includedir)/fflas-ffpack/fflas/fflas_fgemm" \
+ "$(includedir)/fflas-ffpack/ffpack" \
+ "$(includedir)/fflas-ffpack/field" \
+ "$(includedir)/fflas-ffpack/utils" \
+ "$(includedir)/fflas-ffpack/paladin" \
+ "$(includedir)/fflas-ffpack/interfaces" \
+ "$(includedir)/fflas-ffpack/interfaces/libs" \
+ "$(includedir)/fflas-ffpack/" "$(datarootdir)/" ) || \
+ true
.PHONY:examples benchmarks
bin_SCRIPTS=fflas-ffpack-config
-VERSION=1.6.0
+git:
+ git commit -a; git pull; git push
+
+VERSION=2.2.1
# EXTRA_DIST=incremente-versions
+
diff --git a/Makefile.in b/Makefile.in
deleted file mode 100644
index 83057c0..0000000
--- a/Makefile.in
+++ /dev/null
@@ -1,931 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# adapted from LinBox configuration
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = .
-DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in $(srcdir)/config.h.in \
- $(srcdir)/fflas-ffpack-config.in $(top_srcdir)/configure \
- AUTHORS COPYING COPYING.LESSER ChangeLog INSTALL NEWS TODO \
- build-aux/config.guess build-aux/config.sub \
- build-aux/install-sh build-aux/ltmain.sh build-aux/missing
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
- configure.lineno config.status.lineno
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = config.h
-CONFIG_CLEAN_FILES = fflas-ffpack-config
-CONFIG_CLEAN_VPATH_FILES =
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__uninstall_files_from_dir = { \
- test -z "$$files" \
- || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
- || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
- $(am__cd) "$$dir" && rm -f $$files; }; \
- }
-am__installdirs = "$(DESTDIR)$(bindir)"
-SCRIPTS = $(bin_SCRIPTS)
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
- $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
- distdir dist dist-all distcheck
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-distdir = $(PACKAGE)-$(VERSION)
-top_distdir = $(distdir)
-am__remove_distdir = \
- if test -d "$(distdir)"; then \
- find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
- && rm -rf "$(distdir)" \
- || { sleep 5 && rm -rf "$(distdir)"; }; \
- else :; fi
-am__relativize = \
- dir0=`pwd`; \
- sed_first='s,^\([^/]*\)/.*$$,\1,'; \
- sed_rest='s,^[^/]*/*,,'; \
- sed_last='s,^.*/\([^/]*\)$$,\1,'; \
- sed_butlast='s,/*[^/]*$$,,'; \
- while test -n "$$dir1"; do \
- first=`echo "$$dir1" | sed -e "$$sed_first"`; \
- if test "$$first" != "."; then \
- if test "$$first" = ".."; then \
- dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
- dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
- else \
- first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
- if test "$$first2" = "$$first"; then \
- dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
- else \
- dir2="../$$dir2"; \
- fi; \
- dir0="$$dir0"/"$$first"; \
- fi; \
- fi; \
- dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
- done; \
- reldir="$$dir2"
-DIST_ARCHIVES = $(distdir).tar.gz
-GZIP_ENV = --best
-distuninstallcheck_listfiles = find . -type f -print
-am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
- | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
-distcleancheck_listfiles = find . -type f -print
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = 1.6.0
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-ACLOCAL_AMFLAGS = -I macros
-SUBDIRS = fflas-ffpack tests doc benchmark macros utils optimiser
-bin_SCRIPTS = fflas-ffpack-config
-all: config.h
- $(MAKE) $(AM_MAKEFLAGS) all-recursive
-
-.SUFFIXES:
-am--refresh: Makefile
- @:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- echo ' cd $(srcdir) && $(AUTOMAKE) --gnu --ignore-deps'; \
- $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu --ignore-deps \
- && exit 0; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- echo ' $(SHELL) ./config.status'; \
- $(SHELL) ./config.status;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- $(SHELL) ./config.status --recheck
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- $(am__cd) $(srcdir) && $(AUTOCONF)
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
-$(am__aclocal_m4_deps):
-
-config.h: stamp-h1
- @if test ! -f $@; then rm -f stamp-h1; else :; fi
- @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
-
-stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
- @rm -f stamp-h1
- cd $(top_builddir) && $(SHELL) ./config.status config.h
-$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- ($(am__cd) $(top_srcdir) && $(AUTOHEADER))
- rm -f stamp-h1
- touch $@
-
-distclean-hdr:
- -rm -f config.h stamp-h1
-fflas-ffpack-config: $(top_builddir)/config.status $(srcdir)/fflas-ffpack-config.in
- cd $(top_builddir) && $(SHELL) ./config.status $@
-install-binSCRIPTS: $(bin_SCRIPTS)
- @$(NORMAL_INSTALL)
- @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
- fi; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \
- done | \
- sed -e 'p;s,.*/,,;n' \
- -e 'h;s|.*|.|' \
- -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \
- $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \
- { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
- if ($$2 == $$4) { files[d] = files[d] " " $$1; \
- if (++n[d] == $(am__install_max)) { \
- print "f", d, files[d]; n[d] = 0; files[d] = "" } } \
- else { print "f", d "/" $$4, $$1 } } \
- END { for (d in files) print "f", d, files[d] }' | \
- while read type dir files; do \
- if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
- test -z "$$files" || { \
- echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \
- $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
- } \
- ; done
-
-uninstall-binSCRIPTS:
- @$(NORMAL_UNINSTALL)
- @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \
- files=`for p in $$list; do echo "$$p"; done | \
- sed -e 's,.*/,,;$(transform)'`; \
- dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir)
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-distclean-libtool:
- -rm -f libtool config.lt
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- $(am__remove_distdir)
- test -d "$(distdir)" || mkdir "$(distdir)"
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- $(am__make_dryrun) \
- || test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
- $(am__relativize); \
- new_distdir=$$reldir; \
- dir1=$$subdir; dir2="$(top_distdir)"; \
- $(am__relativize); \
- new_top_distdir=$$reldir; \
- echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
- echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
- ($(am__cd) $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$new_top_distdir" \
- distdir="$$new_distdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- am__skip_mode_fix=: \
- distdir) \
- || exit 1; \
- fi; \
- done
- -test -n "$(am__skip_mode_fix)" \
- || find "$(distdir)" -type d ! -perm -755 \
- -exec chmod u+rwx,go+rx {} \; -o \
- ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
- ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
- ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
- || chmod -R a+r "$(distdir)"
-dist-gzip: distdir
- tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
- $(am__remove_distdir)
-
-dist-bzip2: distdir
- tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
- $(am__remove_distdir)
-
-dist-lzip: distdir
- tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
- $(am__remove_distdir)
-
-dist-lzma: distdir
- tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma
- $(am__remove_distdir)
-
-dist-xz: distdir
- tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
- $(am__remove_distdir)
-
-dist-tarZ: distdir
- tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
- $(am__remove_distdir)
-
-dist-shar: distdir
- shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
- $(am__remove_distdir)
-
-dist-zip: distdir
- -rm -f $(distdir).zip
- zip -rq $(distdir).zip $(distdir)
- $(am__remove_distdir)
-
-dist dist-all: distdir
- tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
- $(am__remove_distdir)
-
-# This target untars the dist file and tries a VPATH configuration. Then
-# it guarantees that the distribution is self-contained by making another
-# tarfile.
-distcheck: dist
- case '$(DIST_ARCHIVES)' in \
- *.tar.gz*) \
- GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
- *.tar.bz2*) \
- bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
- *.tar.lzma*) \
- lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
- *.tar.lz*) \
- lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
- *.tar.xz*) \
- xz -dc $(distdir).tar.xz | $(am__untar) ;;\
- *.tar.Z*) \
- uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
- *.shar.gz*) \
- GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
- *.zip*) \
- unzip $(distdir).zip ;;\
- esac
- chmod -R a-w $(distdir); chmod a+w $(distdir)
- mkdir $(distdir)/_build
- mkdir $(distdir)/_inst
- chmod a-w $(distdir)
- test -d $(distdir)/_build || exit 0; \
- dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
- && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
- && am__cwd=`pwd` \
- && $(am__cd) $(distdir)/_build \
- && ../configure --srcdir=.. --prefix="$$dc_install_base" \
- $(AM_DISTCHECK_CONFIGURE_FLAGS) \
- $(DISTCHECK_CONFIGURE_FLAGS) \
- && $(MAKE) $(AM_MAKEFLAGS) \
- && $(MAKE) $(AM_MAKEFLAGS) dvi \
- && $(MAKE) $(AM_MAKEFLAGS) check \
- && $(MAKE) $(AM_MAKEFLAGS) install \
- && $(MAKE) $(AM_MAKEFLAGS) installcheck \
- && $(MAKE) $(AM_MAKEFLAGS) uninstall \
- && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
- distuninstallcheck \
- && chmod -R a-w "$$dc_install_base" \
- && ({ \
- (cd ../.. && umask 077 && mkdir "$$dc_destdir") \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
- && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
- distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
- } || { rm -rf "$$dc_destdir"; exit 1; }) \
- && rm -rf "$$dc_destdir" \
- && $(MAKE) $(AM_MAKEFLAGS) dist \
- && rm -rf $(DIST_ARCHIVES) \
- && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
- && cd "$$am__cwd" \
- || exit 1
- $(am__remove_distdir)
- @(echo "$(distdir) archives ready for distribution: "; \
- list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
- sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
-distuninstallcheck:
- @test -n '$(distuninstallcheck_dir)' || { \
- echo 'ERROR: trying to run $@ with an empty' \
- '$$(distuninstallcheck_dir)' >&2; \
- exit 1; \
- }; \
- $(am__cd) '$(distuninstallcheck_dir)' || { \
- echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
- exit 1; \
- }; \
- test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
- || { echo "ERROR: files left after uninstall:" ; \
- if test -n "$(DESTDIR)"; then \
- echo " (check DESTDIR support)"; \
- fi ; \
- $(distuninstallcheck_listfiles) ; \
- exit 1; } >&2
-distcleancheck: distclean
- @if test '$(srcdir)' = . ; then \
- echo "ERROR: distcleancheck can only run from a VPATH build" ; \
- exit 1 ; \
- fi
- @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
- || { echo "ERROR: files left in build directory after distclean:" ; \
- $(distcleancheck_listfiles) ; \
- exit 1; } >&2
-check-am: all-am
-check: check-recursive
-all-am: Makefile $(SCRIPTS) config.h
-installdirs: installdirs-recursive
-installdirs-am:
- for dir in "$(DESTDIR)$(bindir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f $(am__CONFIG_DISTCLEAN_FILES)
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-hdr \
- distclean-libtool distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am: install-binSCRIPTS
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f $(am__CONFIG_DISTCLEAN_FILES)
- -rm -rf $(top_srcdir)/autom4te.cache
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am: uninstall-binSCRIPTS
- @$(NORMAL_INSTALL)
- $(MAKE) $(AM_MAKEFLAGS) uninstall-hook
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) all \
- ctags-recursive install-am install-strip tags-recursive \
- uninstall-am
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am am--refresh check check-am clean clean-generic \
- clean-libtool ctags ctags-recursive dist dist-all dist-bzip2 \
- dist-gzip dist-lzip dist-lzma dist-shar dist-tarZ dist-xz \
- dist-zip distcheck distclean distclean-generic distclean-hdr \
- distclean-libtool distclean-tags distcleancheck distdir \
- distuninstallcheck dvi dvi-am html html-am info info-am \
- install install-am install-binSCRIPTS install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-ps install-ps-am install-strip installcheck \
- installcheck-am installdirs installdirs-am maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
- uninstall uninstall-am uninstall-binSCRIPTS uninstall-hook
-
-
-#!@todo add examples dir XXX
-
-docs:doc/fflas-ffpack-html/index.html
-
-doc/fflas-ffpack-html/index.html:
- (cd doc; make docs)
-
-docs_dev:doc/fflas-ffpack-dev-html/index.html
-
-doc/fflas-ffpack-dev-html/index.html:
- (cd doc; make docs_dev)
-
-# examples:
-# (cd examples; make examples)
-
-benchmarks:
- (cd benchmarks; make benchmarks)
-
-uninstall-hook:
- (test -d "$(includedir)/fflas-ffpack" && rmdir "$(bindir)" \
- "$(libdir)" \
- "$(mandir)/man1" \
- "$(mandir)" \
- "$(includedir)/fflas-ffpack/fflas" \
- "$(includedir)/fflas-ffpack/ffpack" \
- "$(includedir)/fflas-ffpack/field" \
- "$(includedir)/fflas-ffpack/utils" \
- "$(includedir)/fflas-ffpack/" \
- "$(datarootdir)/" \
- "$(includedir)") || true
-
-.PHONY:examples benchmarks
-
-# EXTRA_DIST=incremente-versions
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/NEWS b/NEWS
index a3b4786..e69de29 100644
--- a/NEWS
+++ b/NEWS
@@ -1 +0,0 @@
-no news is good news
diff --git a/README b/README
index a589ced..bd3e0a6 100644
--- a/README
+++ b/README
@@ -1,29 +1,30 @@
****** FFLAS-FFPACK : Finite Field Linear Algebra Subroutines/Package ******
-Version 1.3.3
+Version 2.2.1
PURPOSE:
-The FFLAS-FFPACK library provides functionalities for dense linear algebra
-over word size prime finite field.
+The FFLAS-FFPACK library provides a set of basic routines for dense and some sparse linear algebra over a finite field or the ring of integers.
INSTALLATION:
see INSTALL
-AVAILABILITY: from www-ljk.imag.fr/membres/Jean-Guillaume.Dumas/FFLAS/
+AVAILABILITY: from https://github.com/linbox-team/fflas-ffpack
-REQUIREMENTS: A BLAS library: for ex. ATLAS (http://math-atlas.sourceforge.net/)
+REQUIREMENTS:
+ * A BLAS library: for ex. OpenBLAS or ATLAS
+ * Givaro version at least 4.0.1 (https://github.com/linbox-team/givaro)
-This library requires the GNU C++ compiler (gcc-3.0 or newer) or any
+This library requires the GNU C++ compiler (gcc-4.7 or newer) or any
compiler supporting advanced template features.
==========================================================
-The FFLAS-FFPACK website is www-ljk.imag.fr/membres/Jean-Guillaume.Dumas/FFLAS/
+The FFLAS-FFPACK website is http://linbox-team.github.io/fflas-ffpack/
Please address your bug reports, suggestions and comments to
the discussion group http://groups.google.com/group/ffpack-devel
-Last update : June 2008
+Last update : March 2016
diff --git a/TODO b/TODO
index 3f4c3a1..01422cc 100644
--- a/TODO
+++ b/TODO
@@ -1,36 +1,28 @@
-Repenser la structure de fgemm:
- * plus de template, moins de tests
- * templater DoubleDomain/FLoatDomain?
- * plus rapide avec des petites matrices
- * Meilleure strategie de calcul des parametres
- * Automatic tuning des thresholds Float/Double
-
-LUdivine
- * Automatic tuning des thresholds gauss/LUdivine
- * Plus de localite?
-
-TRSM/TRMM
- * Traitement automatique float/double depuis int
- * Securiser les bornes t_update: quand winograd intervient
- * remplacer BLAS-trsm par le code de reference de ATLAS
+LUdivine-PLUQ
+ * Clean up of all base cases
+ * Only one routine, and automated switch to all implementations
FTRTRI/FTRTRM
- * generation automatique du code
- * traitement des cas de base (seuil > 1)
+ * Optimize base cases
-Idee:
-1/ Pourquoi templater FFLAS?
- --> integration au sein de ATLAS (C et corps definitif)
-2/ ameliorer les cas terminaux de ftrsm ftrmm: copier les ATL_reftrsm sur double et float
- --> introduire un nouveau seuil dans trsm: celui ou on fait des boucles et pas de la recursivite
-2/ Conversion des le debut vers modular double/float (evite les conv multiples)
-3/ Implantations non template de fgemm, trsm, .... sur double
-4/ Compilation des noyaux
-
-A discuter en fonction du besoin d'un FFLAS generique
-
+Conversion double -> float for small moduli:
+ * should be done in each routine, not only gemm
-Verifier la validite avec modular<int> (sage r�vele des det faux)
-Revoir la structure des bornes dans winograd: trop de reductions
- modulaires quand il y a des etapes de wino dans le corps fini.
\ No newline at end of file
+Simplification of helpers:
+* currently all mmhelpers have Amax,Amin,Bmax,Bmin, Cmax,Cmin,Outmax,
+ Outmin, and all related features for delayed reductions.
+* this is not suited for other FieldTraits (say Generic,
+ Multiprec,...)
+ TODO:
+ - write a by-default minimal mmhelper
+ - specialize the mmhelper with delayedModular trait with all the
+ machinery
+* The NeedPreaddreduction system is error-prone and ugly:
+==> introduce AddHelpers
+- carry max min outmax outmin info when used with a DelayedModular
+ FieldTraits
+- decide when a mod is required in this case
+- empty otherwise.
+- Two bool params: add/sub switch, and inplace switch.
+
\ No newline at end of file
diff --git a/_clang-format b/_clang-format
new file mode 100644
index 0000000..3aa2047
--- /dev/null
+++ b/_clang-format
@@ -0,0 +1,52 @@
+---
+Language: Cpp
+# BasedOnStyle: LLVM
+AccessModifierOffset: -2
+ConstructorInitializerIndentWidth: 4
+AlignEscapedNewlinesLeft: false
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: true
+AlwaysBreakTemplateDeclarations: false
+AlwaysBreakBeforeMultilineStrings: false
+BreakBeforeBinaryOperators: false
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BinPackParameters: true
+ColumnLimit: 120
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+DerivePointerBinding: false
+ExperimentalAutoDetectBinPacking: false
+IndentCaseLabels: false
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakString: 1000
+PenaltyBreakFirstLessLess: 120
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerBindsToType: false
+SpacesBeforeTrailingComments: 1
+Cpp11BracedListStyle: false
+Standard: Cpp11
+IndentWidth: 4
+TabWidth: 4
+UseTab: Never
+BreakBeforeBraces: Attach
+IndentFunctionDeclarationAfterType: false
+SpacesInParentheses: false
+SpacesInAngles: false
+SpaceInEmptyParentheses: false
+SpacesInCStyleCastParentheses: false
+SpacesInContainerLiterals: true
+SpaceBeforeAssignmentOperators: true
+ContinuationIndentWidth: 4
+CommentPragmas: '^ IWYU pragma:'
+SpaceBeforeParens: ControlStatements
+...
+
diff --git a/aclocal.m4 b/aclocal.m4
deleted file mode 100644
index 606ff1a..0000000
--- a/aclocal.m4
+++ /dev/null
@@ -1,990 +0,0 @@
-# generated automatically by aclocal 1.11.5 -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
-# Inc.
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-m4_ifndef([AC_AUTOCONF_VERSION],
- [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
-[m4_warning([this file was generated for autoconf 2.69.
-You have another version of autoconf. It may work, but is not guaranteed to.
-If you have problems, you may need to regenerate the build system entirely.
-To do so, use the procedure documented by the package, typically `autoreconf'.])])
-
-# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008, 2011 Free Software
-# Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 1
-
-# AM_AUTOMAKE_VERSION(VERSION)
-# ----------------------------
-# Automake X.Y traces this macro to ensure aclocal.m4 has been
-# generated from the m4 files accompanying Automake X.Y.
-# (This private macro should not be called outside this file.)
-AC_DEFUN([AM_AUTOMAKE_VERSION],
-[am__api_version='1.11'
-dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
-dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.11.5], [],
- [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
-])
-
-# _AM_AUTOCONF_VERSION(VERSION)
-# -----------------------------
-# aclocal traces this macro to find the Autoconf version.
-# This is a private macro too. Using m4_define simplifies
-# the logic in aclocal, which can simply ignore this definition.
-m4_define([_AM_AUTOCONF_VERSION], [])
-
-# AM_SET_CURRENT_AUTOMAKE_VERSION
-# -------------------------------
-# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
-# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
-AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.11.5])dnl
-m4_ifndef([AC_AUTOCONF_VERSION],
- [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
-
-# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-
-# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 1
-
-# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
-# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
-# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
-#
-# Of course, Automake must honor this variable whenever it calls a
-# tool from the auxiliary directory. The problem is that $srcdir (and
-# therefore $ac_aux_dir as well) can be either absolute or relative,
-# depending on how configure is run. This is pretty annoying, since
-# it makes $ac_aux_dir quite unusable in subdirectories: in the top
-# source directory, any form will work fine, but in subdirectories a
-# relative path needs to be adjusted first.
-#
-# $ac_aux_dir/missing
-# fails when called from a subdirectory if $ac_aux_dir is relative
-# $top_srcdir/$ac_aux_dir/missing
-# fails if $ac_aux_dir is absolute,
-# fails when called from a subdirectory in a VPATH build with
-# a relative $ac_aux_dir
-#
-# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
-# are both prefixed by $srcdir. In an in-source build this is usually
-# harmless because $srcdir is `.', but things will broke when you
-# start a VPATH build or use an absolute $srcdir.
-#
-# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
-# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
-# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
-# and then we would define $MISSING as
-# MISSING="\${SHELL} $am_aux_dir/missing"
-# This will work as long as MISSING is not called from configure, because
-# unfortunately $(top_srcdir) has no meaning in configure.
-# However there are other variables, like CC, which are often used in
-# configure, and could therefore not use this "fixed" $ac_aux_dir.
-#
-# Another solution, used here, is to always expand $ac_aux_dir to an
-# absolute PATH. The drawback is that using absolute paths prevent a
-# configured tree to be moved without reconfiguration.
-
-AC_DEFUN([AM_AUX_DIR_EXPAND],
-[dnl Rely on autoconf to set up CDPATH properly.
-AC_PREREQ([2.50])dnl
-# expand $ac_aux_dir to an absolute path
-am_aux_dir=`cd $ac_aux_dir && pwd`
-])
-
-# AM_CONDITIONAL -*- Autoconf -*-
-
-# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006, 2008
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 9
-
-# AM_CONDITIONAL(NAME, SHELL-CONDITION)
-# -------------------------------------
-# Define a conditional.
-AC_DEFUN([AM_CONDITIONAL],
-[AC_PREREQ(2.52)dnl
- ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
- [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
-AC_SUBST([$1_TRUE])dnl
-AC_SUBST([$1_FALSE])dnl
-_AM_SUBST_NOTMAKE([$1_TRUE])dnl
-_AM_SUBST_NOTMAKE([$1_FALSE])dnl
-m4_define([_AM_COND_VALUE_$1], [$2])dnl
-if $2; then
- $1_TRUE=
- $1_FALSE='#'
-else
- $1_TRUE='#'
- $1_FALSE=
-fi
-AC_CONFIG_COMMANDS_PRE(
-[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
- AC_MSG_ERROR([[conditional "$1" was never defined.
-Usually this means the macro was only invoked conditionally.]])
-fi])])
-
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009,
-# 2010, 2011 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 12
-
-# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
-# written in clear, in which case automake, when reading aclocal.m4,
-# will think it sees a *use*, and therefore will trigger all it's
-# C support machinery. Also note that it means that autoscan, seeing
-# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
-
-
-# _AM_DEPENDENCIES(NAME)
-# ----------------------
-# See how the compiler implements dependency checking.
-# NAME is "CC", "CXX", "GCJ", or "OBJC".
-# We try a few techniques and use that to set a single cache variable.
-#
-# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
-# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
-# dependency, and given that the user is not expected to run this macro,
-# just rely on AC_PROG_CC.
-AC_DEFUN([_AM_DEPENDENCIES],
-[AC_REQUIRE([AM_SET_DEPDIR])dnl
-AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
-AC_REQUIRE([AM_MAKE_INCLUDE])dnl
-AC_REQUIRE([AM_DEP_TRACK])dnl
-
-ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
- [$1], CXX, [depcc="$CXX" am_compiler_list=],
- [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
- [$1], UPC, [depcc="$UPC" am_compiler_list=],
- [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
- [depcc="$$1" am_compiler_list=])
-
-AC_CACHE_CHECK([dependency style of $depcc],
- [am_cv_$1_dependencies_compiler_type],
-[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
- # We make a subdir and do the tests there. Otherwise we can end up
- # making bogus files that we don't know about and never remove. For
- # instance it was reported that on HP-UX the gcc test will end up
- # making a dummy file named `D' -- because `-MD' means `put the output
- # in D'.
- rm -rf conftest.dir
- mkdir conftest.dir
- # Copy depcomp to subdir because otherwise we won't find it if we're
- # using a relative directory.
- cp "$am_depcomp" conftest.dir
- cd conftest.dir
- # We will build objects and dependencies in a subdirectory because
- # it helps to detect inapplicable dependency modes. For instance
- # both Tru64's cc and ICC support -MD to output dependencies as a
- # side effect of compilation, but ICC will put the dependencies in
- # the current directory while Tru64 will put them in the object
- # directory.
- mkdir sub
-
- am_cv_$1_dependencies_compiler_type=none
- if test "$am_compiler_list" = ""; then
- am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
- fi
- am__universal=false
- m4_case([$1], [CC],
- [case " $depcc " in #(
- *\ -arch\ *\ -arch\ *) am__universal=true ;;
- esac],
- [CXX],
- [case " $depcc " in #(
- *\ -arch\ *\ -arch\ *) am__universal=true ;;
- esac])
-
- for depmode in $am_compiler_list; do
- # Setup a source with many dependencies, because some compilers
- # like to wrap large dependency lists on column 80 (with \), and
- # we should not choose a depcomp mode which is confused by this.
- #
- # We need to recreate these files for each test, as the compiler may
- # overwrite some of them when testing with obscure command lines.
- # This happens at least with the AIX C compiler.
- : > sub/conftest.c
- for i in 1 2 3 4 5 6; do
- echo '#include "conftst'$i'.h"' >> sub/conftest.c
- # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
- # Solaris 8's {/usr,}/bin/sh.
- touch sub/conftst$i.h
- done
- echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
-
- # We check with `-c' and `-o' for the sake of the "dashmstdout"
- # mode. It turns out that the SunPro C++ compiler does not properly
- # handle `-M -o', and we need to detect this. Also, some Intel
- # versions had trouble with output in subdirs
- am__obj=sub/conftest.${OBJEXT-o}
- am__minus_obj="-o $am__obj"
- case $depmode in
- gcc)
- # This depmode causes a compiler race in universal mode.
- test "$am__universal" = false || continue
- ;;
- nosideeffect)
- # after this tag, mechanisms are not by side-effect, so they'll
- # only be used when explicitly requested
- if test "x$enable_dependency_tracking" = xyes; then
- continue
- else
- break
- fi
- ;;
- msvc7 | msvc7msys | msvisualcpp | msvcmsys)
- # This compiler won't grok `-c -o', but also, the minuso test has
- # not run yet. These depmodes are late enough in the game, and
- # so weak that their functioning should not be impacted.
- am__obj=conftest.${OBJEXT-o}
- am__minus_obj=
- ;;
- none) break ;;
- esac
- if depmode=$depmode \
- source=sub/conftest.c object=$am__obj \
- depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
- $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
- >/dev/null 2>conftest.err &&
- grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
- grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
- grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
- ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
- # icc doesn't choke on unknown options, it will just issue warnings
- # or remarks (even with -Werror). So we grep stderr for any message
- # that says an option was ignored or not supported.
- # When given -MP, icc 7.0 and 7.1 complain thusly:
- # icc: Command line warning: ignoring option '-M'; no argument required
- # The diagnosis changed in icc 8.0:
- # icc: Command line remark: option '-MP' not supported
- if (grep 'ignoring option' conftest.err ||
- grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
- am_cv_$1_dependencies_compiler_type=$depmode
- break
- fi
- fi
- done
-
- cd ..
- rm -rf conftest.dir
-else
- am_cv_$1_dependencies_compiler_type=none
-fi
-])
-AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
-AM_CONDITIONAL([am__fastdep$1], [
- test "x$enable_dependency_tracking" != xno \
- && test "$am_cv_$1_dependencies_compiler_type" = gcc3])
-])
-
-
-# AM_SET_DEPDIR
-# -------------
-# Choose a directory name for dependency files.
-# This macro is AC_REQUIREd in _AM_DEPENDENCIES
-AC_DEFUN([AM_SET_DEPDIR],
-[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
-AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
-])
-
-
-# AM_DEP_TRACK
-# ------------
-AC_DEFUN([AM_DEP_TRACK],
-[AC_ARG_ENABLE(dependency-tracking,
-[ --disable-dependency-tracking speeds up one-time build
- --enable-dependency-tracking do not reject slow dependency extractors])
-if test "x$enable_dependency_tracking" != xno; then
- am_depcomp="$ac_aux_dir/depcomp"
- AMDEPBACKSLASH='\'
- am__nodep='_no'
-fi
-AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
-AC_SUBST([AMDEPBACKSLASH])dnl
-_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
-AC_SUBST([am__nodep])dnl
-_AM_SUBST_NOTMAKE([am__nodep])dnl
-])
-
-# Generate code to set up dependency tracking. -*- Autoconf -*-
-
-# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-#serial 5
-
-# _AM_OUTPUT_DEPENDENCY_COMMANDS
-# ------------------------------
-AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
-[{
- # Autoconf 2.62 quotes --file arguments for eval, but not when files
- # are listed without --file. Let's play safe and only enable the eval
- # if we detect the quoting.
- case $CONFIG_FILES in
- *\'*) eval set x "$CONFIG_FILES" ;;
- *) set x $CONFIG_FILES ;;
- esac
- shift
- for mf
- do
- # Strip MF so we end up with the name of the file.
- mf=`echo "$mf" | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile or not.
- # We used to match only the files named `Makefile.in', but
- # some people rename them; so instead we look at the file content.
- # Grep'ing the first line is not enough: some people post-process
- # each Makefile.in and add a new line on top of each file to say so.
- # Grep'ing the whole file is not good either: AIX grep has a line
- # limit of 2048, but all sed's we know have understand at least 4000.
- if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
- dirpart=`AS_DIRNAME("$mf")`
- else
- continue
- fi
- # Extract the definition of DEPDIR, am__include, and am__quote
- # from the Makefile without running `make'.
- DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
- test -z "$DEPDIR" && continue
- am__include=`sed -n 's/^am__include = //p' < "$mf"`
- test -z "am__include" && continue
- am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
- # When using ansi2knr, U may be empty or an underscore; expand it
- U=`sed -n 's/^U = //p' < "$mf"`
- # Find all dependency output files, they are included files with
- # $(DEPDIR) in their names. We invoke sed twice because it is the
- # simplest approach to changing $(DEPDIR) to its actual value in the
- # expansion.
- for file in `sed -n "
- s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
- sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
- # Make sure the directory exists.
- test -f "$dirpart/$file" && continue
- fdir=`AS_DIRNAME(["$file"])`
- AS_MKDIR_P([$dirpart/$fdir])
- # echo "creating $dirpart/$file"
- echo '# dummy' > "$dirpart/$file"
- done
- done
-}
-])# _AM_OUTPUT_DEPENDENCY_COMMANDS
-
-
-# AM_OUTPUT_DEPENDENCY_COMMANDS
-# -----------------------------
-# This macro should only be invoked once -- use via AC_REQUIRE.
-#
-# This code is only required when automatic dependency tracking
-# is enabled. FIXME. This creates each `.P' file that we will
-# need in order to bootstrap the dependency handling code.
-AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
-[AC_CONFIG_COMMANDS([depfiles],
- [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
- [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
-])
-
-# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 8
-
-# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS.
-AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
-
-# Do all the work for Automake. -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 16
-
-# This macro actually does too much. Some checks are only needed if
-# your package does certain things. But this isn't really a big deal.
-
-# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
-# AM_INIT_AUTOMAKE([OPTIONS])
-# -----------------------------------------------
-# The call with PACKAGE and VERSION arguments is the old style
-# call (pre autoconf-2.50), which is being phased out. PACKAGE
-# and VERSION should now be passed to AC_INIT and removed from
-# the call to AM_INIT_AUTOMAKE.
-# We support both call styles for the transition. After
-# the next Automake release, Autoconf can make the AC_INIT
-# arguments mandatory, and then we can depend on a new Autoconf
-# release and drop the old call support.
-AC_DEFUN([AM_INIT_AUTOMAKE],
-[AC_PREREQ([2.62])dnl
-dnl Autoconf wants to disallow AM_ names. We explicitly allow
-dnl the ones we care about.
-m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
-AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
-AC_REQUIRE([AC_PROG_INSTALL])dnl
-if test "`cd $srcdir && pwd`" != "`pwd`"; then
- # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
- # is not polluted with repeated "-I."
- AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
- # test to see if srcdir already configured
- if test -f $srcdir/config.status; then
- AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
- fi
-fi
-
-# test whether we have cygpath
-if test -z "$CYGPATH_W"; then
- if (cygpath --version) >/dev/null 2>/dev/null; then
- CYGPATH_W='cygpath -w'
- else
- CYGPATH_W=echo
- fi
-fi
-AC_SUBST([CYGPATH_W])
-
-# Define the identity of the package.
-dnl Distinguish between old-style and new-style calls.
-m4_ifval([$2],
-[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
- AC_SUBST([PACKAGE], [$1])dnl
- AC_SUBST([VERSION], [$2])],
-[_AM_SET_OPTIONS([$1])dnl
-dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
-m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
- [m4_fatal([AC_INIT should be called with package and version arguments])])dnl
- AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
- AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
-
-_AM_IF_OPTION([no-define],,
-[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
- AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
-
-# Some tools Automake needs.
-AC_REQUIRE([AM_SANITY_CHECK])dnl
-AC_REQUIRE([AC_ARG_PROGRAM])dnl
-AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
-AM_MISSING_PROG(AUTOCONF, autoconf)
-AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
-AM_MISSING_PROG(AUTOHEADER, autoheader)
-AM_MISSING_PROG(MAKEINFO, makeinfo)
-AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
-AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
-AC_REQUIRE([AM_PROG_MKDIR_P])dnl
-# We need awk for the "check" target. The system "awk" is bad on
-# some platforms.
-AC_REQUIRE([AC_PROG_AWK])dnl
-AC_REQUIRE([AC_PROG_MAKE_SET])dnl
-AC_REQUIRE([AM_SET_LEADING_DOT])dnl
-_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
- [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
- [_AM_PROG_TAR([v7])])])
-_AM_IF_OPTION([no-dependencies],,
-[AC_PROVIDE_IFELSE([AC_PROG_CC],
- [_AM_DEPENDENCIES(CC)],
- [define([AC_PROG_CC],
- defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
-AC_PROVIDE_IFELSE([AC_PROG_CXX],
- [_AM_DEPENDENCIES(CXX)],
- [define([AC_PROG_CXX],
- defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
-AC_PROVIDE_IFELSE([AC_PROG_OBJC],
- [_AM_DEPENDENCIES(OBJC)],
- [define([AC_PROG_OBJC],
- defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
-])
-_AM_IF_OPTION([silent-rules], [AC_REQUIRE([AM_SILENT_RULES])])dnl
-dnl The `parallel-tests' driver may need to know about EXEEXT, so add the
-dnl `am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This macro
-dnl is hooked onto _AC_COMPILER_EXEEXT early, see below.
-AC_CONFIG_COMMANDS_PRE(dnl
-[m4_provide_if([_AM_COMPILER_EXEEXT],
- [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
-])
-
-dnl Hook into `_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
-dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
-dnl mangled by Autoconf and run in a shell conditional statement.
-m4_define([_AC_COMPILER_EXEEXT],
-m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
-
-
-# When config.status generates a header, we must update the stamp-h file.
-# This file resides in the same directory as the config header
-# that is generated. The stamp files are numbered to have different names.
-
-# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
-# loop where config.status creates the headers, so we can generate
-# our stamp files there.
-AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
-[# Compute $1's index in $config_headers.
-_am_arg=$1
-_am_stamp_count=1
-for _am_header in $config_headers :; do
- case $_am_header in
- $_am_arg | $_am_arg:* )
- break ;;
- * )
- _am_stamp_count=`expr $_am_stamp_count + 1` ;;
- esac
-done
-echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-
-# Copyright (C) 2001, 2003, 2005, 2008, 2011 Free Software Foundation,
-# Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 1
-
-# AM_PROG_INSTALL_SH
-# ------------------
-# Define $install_sh.
-AC_DEFUN([AM_PROG_INSTALL_SH],
-[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
-if test x"${install_sh}" != xset; then
- case $am_aux_dir in
- *\ * | *\ *)
- install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
- *)
- install_sh="\${SHELL} $am_aux_dir/install-sh"
- esac
-fi
-AC_SUBST(install_sh)])
-
-# Copyright (C) 2003, 2005 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 2
-
-# Check whether the underlying file-system supports filenames
-# with a leading dot. For instance MS-DOS doesn't.
-AC_DEFUN([AM_SET_LEADING_DOT],
-[rm -rf .tst 2>/dev/null
-mkdir .tst 2>/dev/null
-if test -d .tst; then
- am__leading_dot=.
-else
- am__leading_dot=_
-fi
-rmdir .tst 2>/dev/null
-AC_SUBST([am__leading_dot])])
-
-# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
-# From Jim Meyering
-
-# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008,
-# 2011 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 5
-
-# AM_MAINTAINER_MODE([DEFAULT-MODE])
-# ----------------------------------
-# Control maintainer-specific portions of Makefiles.
-# Default is to disable them, unless `enable' is passed literally.
-# For symmetry, `disable' may be passed as well. Anyway, the user
-# can override the default with the --enable/--disable switch.
-AC_DEFUN([AM_MAINTAINER_MODE],
-[m4_case(m4_default([$1], [disable]),
- [enable], [m4_define([am_maintainer_other], [disable])],
- [disable], [m4_define([am_maintainer_other], [enable])],
- [m4_define([am_maintainer_other], [enable])
- m4_warn([syntax], [unexpected argument to AM@&t at _MAINTAINER_MODE: $1])])
-AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
- dnl maintainer-mode's default is 'disable' unless 'enable' is passed
- AC_ARG_ENABLE([maintainer-mode],
-[ --][am_maintainer_other][-maintainer-mode am_maintainer_other make rules and dependencies not useful
- (and sometimes confusing) to the casual installer],
- [USE_MAINTAINER_MODE=$enableval],
- [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes]))
- AC_MSG_RESULT([$USE_MAINTAINER_MODE])
- AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes])
- MAINT=$MAINTAINER_MODE_TRUE
- AC_SUBST([MAINT])dnl
-]
-)
-
-AU_DEFUN([jm_MAINTAINER_MODE], [AM_MAINTAINER_MODE])
-
-# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
-
-# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005, 2008
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 6
-
-# AM_MISSING_PROG(NAME, PROGRAM)
-# ------------------------------
-AC_DEFUN([AM_MISSING_PROG],
-[AC_REQUIRE([AM_MISSING_HAS_RUN])
-$1=${$1-"${am_missing_run}$2"}
-AC_SUBST($1)])
-
-
-# AM_MISSING_HAS_RUN
-# ------------------
-# Define MISSING if not defined so far and test if it supports --run.
-# If it does, set am_missing_run to use it, otherwise, to nothing.
-AC_DEFUN([AM_MISSING_HAS_RUN],
-[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
-AC_REQUIRE_AUX_FILE([missing])dnl
-if test x"${MISSING+set}" != xset; then
- case $am_aux_dir in
- *\ * | *\ *)
- MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
- *)
- MISSING="\${SHELL} $am_aux_dir/missing" ;;
- esac
-fi
-# Use eval to expand $SHELL
-if eval "$MISSING --run true"; then
- am_missing_run="$MISSING --run "
-else
- am_missing_run=
- AC_MSG_WARN([`missing' script is too old or missing])
-fi
-])
-
-# Copyright (C) 2003, 2004, 2005, 2006, 2011 Free Software Foundation,
-# Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 1
-
-# AM_PROG_MKDIR_P
-# ---------------
-# Check for `mkdir -p'.
-AC_DEFUN([AM_PROG_MKDIR_P],
-[AC_PREREQ([2.60])dnl
-AC_REQUIRE([AC_PROG_MKDIR_P])dnl
-dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
-dnl while keeping a definition of mkdir_p for backward compatibility.
-dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
-dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
-dnl Makefile.ins that do not define MKDIR_P, so we do our own
-dnl adjustment using top_builddir (which is defined more often than
-dnl MKDIR_P).
-AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
-case $mkdir_p in
- [[\\/$]]* | ?:[[\\/]]*) ;;
- */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
-esac
-])
-
-# Helper functions for option handling. -*- Autoconf -*-
-
-# Copyright (C) 2001, 2002, 2003, 2005, 2008, 2010 Free Software
-# Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 5
-
-# _AM_MANGLE_OPTION(NAME)
-# -----------------------
-AC_DEFUN([_AM_MANGLE_OPTION],
-[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
-
-# _AM_SET_OPTION(NAME)
-# --------------------
-# Set option NAME. Presently that only means defining a flag for this option.
-AC_DEFUN([_AM_SET_OPTION],
-[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
-
-# _AM_SET_OPTIONS(OPTIONS)
-# ------------------------
-# OPTIONS is a space-separated list of Automake options.
-AC_DEFUN([_AM_SET_OPTIONS],
-[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
-
-# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
-# -------------------------------------------
-# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
-AC_DEFUN([_AM_IF_OPTION],
-[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-
-# Check to make sure that the build environment is sane. -*- Autoconf -*-
-
-# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005, 2008
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 5
-
-# AM_SANITY_CHECK
-# ---------------
-AC_DEFUN([AM_SANITY_CHECK],
-[AC_MSG_CHECKING([whether build environment is sane])
-# Just in case
-sleep 1
-echo timestamp > conftest.file
-# Reject unsafe characters in $srcdir or the absolute working directory
-# name. Accept space and tab only in the latter.
-am_lf='
-'
-case `pwd` in
- *[[\\\"\#\$\&\'\`$am_lf]]*)
- AC_MSG_ERROR([unsafe absolute working directory name]);;
-esac
-case $srcdir in
- *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*)
- AC_MSG_ERROR([unsafe srcdir value: `$srcdir']);;
-esac
-
-# Do `set' in a subshell so we don't clobber the current shell's
-# arguments. Must try -L first in case configure is actually a
-# symlink; some systems play weird games with the mod time of symlinks
-# (eg FreeBSD returns the mod time of the symlink's containing
-# directory).
-if (
- set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
- if test "$[*]" = "X"; then
- # -L didn't work.
- set X `ls -t "$srcdir/configure" conftest.file`
- fi
- rm -f conftest.file
- if test "$[*]" != "X $srcdir/configure conftest.file" \
- && test "$[*]" != "X conftest.file $srcdir/configure"; then
-
- # If neither matched, then we have a broken ls. This can happen
- # if, for instance, CONFIG_SHELL is bash and it inherits a
- # broken ls alias from the environment. This has actually
- # happened. Such a system could not be considered "sane".
- AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
-alias in your environment])
- fi
-
- test "$[2]" = conftest.file
- )
-then
- # Ok.
- :
-else
- AC_MSG_ERROR([newly created file is older than distributed files!
-Check your system clock])
-fi
-AC_MSG_RESULT(yes)])
-
-# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 1
-
-# AM_PROG_INSTALL_STRIP
-# ---------------------
-# One issue with vendor `install' (even GNU) is that you can't
-# specify the program used to strip binaries. This is especially
-# annoying in cross-compiling environments, where the build's strip
-# is unlikely to handle the host's binaries.
-# Fortunately install-sh will honor a STRIPPROG variable, so we
-# always use install-sh in `make install-strip', and initialize
-# STRIPPROG with the value of the STRIP variable (set by the user).
-AC_DEFUN([AM_PROG_INSTALL_STRIP],
-[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
-# Installed binaries are usually stripped using `strip' when the user
-# run `make install-strip'. However `strip' might not be the right
-# tool to use in cross-compilation environments, therefore Automake
-# will honor the `STRIP' environment variable to overrule this program.
-dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
-if test "$cross_compiling" != no; then
- AC_CHECK_TOOL([STRIP], [strip], :)
-fi
-INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
-AC_SUBST([INSTALL_STRIP_PROGRAM])])
-
-# Copyright (C) 2006, 2008, 2010 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 3
-
-# _AM_SUBST_NOTMAKE(VARIABLE)
-# ---------------------------
-# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
-# This macro is traced by Automake.
-AC_DEFUN([_AM_SUBST_NOTMAKE])
-
-# AM_SUBST_NOTMAKE(VARIABLE)
-# --------------------------
-# Public sister of _AM_SUBST_NOTMAKE.
-AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
-
-# Check how to create a tarball. -*- Autoconf -*-
-
-# Copyright (C) 2004, 2005, 2012 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 2
-
-# _AM_PROG_TAR(FORMAT)
-# --------------------
-# Check how to create a tarball in format FORMAT.
-# FORMAT should be one of `v7', `ustar', or `pax'.
-#
-# Substitute a variable $(am__tar) that is a command
-# writing to stdout a FORMAT-tarball containing the directory
-# $tardir.
-# tardir=directory && $(am__tar) > result.tar
-#
-# Substitute a variable $(am__untar) that extract such
-# a tarball read from stdin.
-# $(am__untar) < result.tar
-AC_DEFUN([_AM_PROG_TAR],
-[# Always define AMTAR for backward compatibility. Yes, it's still used
-# in the wild :-( We should find a proper way to deprecate it ...
-AC_SUBST([AMTAR], ['$${TAR-tar}'])
-m4_if([$1], [v7],
- [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
- [m4_case([$1], [ustar],, [pax],,
- [m4_fatal([Unknown tar format])])
-AC_MSG_CHECKING([how to create a $1 tar archive])
-# Loop over all known methods to create a tar archive until one works.
-_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
-_am_tools=${am_cv_prog_tar_$1-$_am_tools}
-# Do not fold the above two line into one, because Tru64 sh and
-# Solaris sh will not grok spaces in the rhs of `-'.
-for _am_tool in $_am_tools
-do
- case $_am_tool in
- gnutar)
- for _am_tar in tar gnutar gtar;
- do
- AM_RUN_LOG([$_am_tar --version]) && break
- done
- am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
- am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
- am__untar="$_am_tar -xf -"
- ;;
- plaintar)
- # Must skip GNU tar: if it does not support --format= it doesn't create
- # ustar tarball either.
- (tar --version) >/dev/null 2>&1 && continue
- am__tar='tar chf - "$$tardir"'
- am__tar_='tar chf - "$tardir"'
- am__untar='tar xf -'
- ;;
- pax)
- am__tar='pax -L -x $1 -w "$$tardir"'
- am__tar_='pax -L -x $1 -w "$tardir"'
- am__untar='pax -r'
- ;;
- cpio)
- am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
- am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
- am__untar='cpio -i -H $1 -d'
- ;;
- none)
- am__tar=false
- am__tar_=false
- am__untar=false
- ;;
- esac
-
- # If the value was cached, stop now. We just wanted to have am__tar
- # and am__untar set.
- test -n "${am_cv_prog_tar_$1}" && break
-
- # tar/untar a dummy directory, and stop if the command works
- rm -rf conftest.dir
- mkdir conftest.dir
- echo GrepMe > conftest.dir/file
- AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
- rm -rf conftest.dir
- if test -s conftest.tar; then
- AM_RUN_LOG([$am__untar <conftest.tar])
- grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
- fi
-done
-rm -rf conftest.dir
-
-AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
-AC_MSG_RESULT([$am_cv_prog_tar_$1])])
-AC_SUBST([am__tar])
-AC_SUBST([am__untar])
-]) # _AM_PROG_TAR
-
-m4_include([macros/aclocal-include.m4])
-m4_include([macros/blas-check.m4])
-m4_include([macros/blasATLAS-check.m4])
-m4_include([macros/blasGOTO-check.m4])
-m4_include([macros/blasGSL-check.m4])
-m4_include([macros/blasOTHER-check.m4])
-m4_include([macros/config-header.m4])
-m4_include([macros/debug.m4])
-m4_include([macros/fflas-ffpack-doc.m4])
-m4_include([macros/fflas-ffpack-misc.m4])
-m4_include([macros/fflas-ffpack-opt.m4])
-m4_include([macros/givaro-check.m4])
-m4_include([macros/gmp-check.m4])
-m4_include([macros/lapack-check.m4])
-m4_include([macros/libtool.m4])
-m4_include([macros/ltoptions.m4])
-m4_include([macros/ltsugar.m4])
-m4_include([macros/ltversion.m4])
-m4_include([macros/lt~obsolete.m4])
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..1b06ba5
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,202 @@
+#!/bin/sh
+# Coypright (c) 2011 FFLAS-FFPACK
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+# adapted from LinBox configuration
+#
+# ========LICENCE========
+# This file is part of the library FFLAS-FFPACK.
+#
+# FFLAS-FFPACK is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+# ========LICENCE========
+#/
+
+# Run this to generate all the initial makefiles, etc.
+
+# Recover command line, with double-quotes
+CMDLINE=""
+for arg in "$@"
+do
+ WHO="`echo $arg | cut -d'=' -f1`"
+ WHAT="`echo $arg | cut -s -d'=' -f2`"
+ if test "x$WHAT" = "x"; then
+ CMDLINE="$CMDLINE $WHO"
+ else
+ CMDLINE="$CMDLINE $WHO=\"$WHAT\""
+ fi
+done
+
+echo "$0 $CMDLINE" > autogen.status
+chmod +x autogen.status
+
+# Starts configuring
+srcdir=`dirname $0`
+test -z "$srcdir" && srcdir=.
+
+PKG_NAME="FFLAS-FFPACK"
+
+(test -f $srcdir/configure.ac \
+ && test -f $srcdir/fflas-ffpack/fflas-ffpack.doxy ) || {
+ echo -n "**Error**: Directory "\`$srcdir\'" does not look like the"
+ echo " top-level "\`$PKG_NAME\'" directory"
+ exit 1
+}
+
+ORIGDIR=`pwd`
+cd $srcdir
+PROJECT=fflasffpack
+TEST_TYPE=-f
+
+DIE=0
+
+# Defaults
+LIBTOOL=libtool
+LIBTOOLIZE=libtoolize
+
+# Fix OSx problem with GNU libtool
+(uname -a|grep -v Darwin) < /dev/null > /dev/null 2>&1 ||
+{
+echo "....Adding fix for OSX"
+LIBTOOL=glibtool
+LIBTOOLIZE=glibtoolize
+}
+
+
+(autoconf --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have autoconf installed to compile $PROJECT."
+ echo "Download the appropriate package for your distribution,"
+ echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
+ DIE=1
+}
+
+(automake --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have automake installed to compile $PROJECT."
+ echo "Download the appropriate package for your distribution,"
+ echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
+ DIE=1
+}
+
+(automake --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "You must have automake installed to compile $PROJECT."
+ echo "Get ftp://sourceware.cygnus.com/pub/automake/automake-1.4.tar.gz"
+ echo "(or a newer version if it is available)"
+ DIE=1
+}
+
+(grep "^AC_PROG_LIBTOOL" configure.ac >/dev/null) && {
+ ($LIBTOOLIZE --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "**Error**: You must have \`libtool' installed to compile $PROJECT."
+ echo "Download the appropriate package for your distribution,"
+ echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
+ DIE=1
+ }
+}
+
+grep "^AM_GNU_GETTEXT" configure.ac >/dev/null && {
+ grep "sed.*POTFILES" $srcdir/configure.ac >/dev/null || \
+ (gettext --version) < /dev/null > /dev/null 2>&1 || {
+ echo
+ echo "**Error**: You must have \`gettext' installed to compile $PROJECT."
+ echo "Download the appropriate package for your distribution,"
+ echo "or get the source tarball at ftp://ftp.gnu.org/pub/gnu/"
+ DIE=1
+ }
+}
+
+if test "$DIE" -eq 1; then
+ exit 1
+fi
+
+
+if test -z "$*"; then
+ echo "I am going to run ./configure with no arguments - if you wish "
+ echo "to pass any to it, please specify them on the $0 command line."
+fi
+
+case $CC in
+ *xlc | *xlc\ * | *lcc | *lcc\ *) am_opt=--include-deps;;
+esac
+
+for coin in `find . -name configure.ac -print`
+do
+ dr=`dirname $coin`
+ if test -f $dr/NO-AUTO-GEN; then
+ echo skipping $dr -- flagged as no auto-gen
+ else
+ echo processing $dr
+ macrodirs=`sed -n -e 's,AM_ACLOCAL_INCLUDE(\(.*\)),\1,gp' < $coin`
+ ( cd $dr
+ aclocalinclude="$ACLOCAL_FLAGS"
+ for k in $macrodirs; do
+ if test -d $k; then
+ aclocalinclude="$aclocalinclude -I $k"
+ ##else
+ ## echo "**Warning**: No such directory \`$k'. Ignored."
+ fi
+ done
+ if grep "^AM_GNU_GETTEXT" configure.ac >/dev/null; then
+ if grep "sed.*POTFILES" configure.ac >/dev/null; then
+ : do nothing -- we still have an old unmodified configure.ac
+ else
+ echo "Creating $dr/aclocal.m4 ..."
+ test -r $dr/aclocal.m4 || touch $dr/aclocal.m4
+ echo "Running gettextize... Ignore non-fatal messages."
+ echo "no" | gettextize --force --copy
+ echo "Making $dr/aclocal.m4 writable ..."
+ test -r $dr/aclocal.m4 && chmod u+w $dr/aclocal.m4
+ fi
+ fi
+ if grep "^AM_GNOME_GETTEXT" configure.ac >/dev/null; then
+ echo "Creating $dr/aclocal.m4 ..."
+ test -r $dr/aclocal.m4 || touch $dr/aclocal.m4
+ echo "Running gettextize... Ignore non-fatal messages."
+ echo "no" | gettextize --force --copy
+ echo "Making $dr/aclocal.m4 writable ..."
+ test -r $dr/aclocal.m4 && chmod u+w $dr/aclocal.m4
+ fi
+ if grep "^AC_PROG_LIBTOOL" configure.ac >/dev/null; then
+ echo "Running libtoolize..."
+ $LIBTOOLIZE --force --copy
+ fi
+ echo "Running aclocal $aclocalinclude ..."
+ aclocal $aclocalinclude
+ if grep "^AC_CONFIG_HEADERS" configure.ac >/dev/null; then
+ echo "Running autoheader..."
+ autoheader
+ fi
+ echo "Running automake --gnu $am_opt ..."
+ automake -c --add-missing --gnu $am_opt
+ echo "Running autoconf ..."
+ autoconf
+ )
+ fi
+done
+
+conf_flags="--enable-maintainer-mode"
+#--enable-iso-c
+
+cd "$ORIGDIR"
+
+if test x$NOCONFIGURE = x; then
+ echo Running $srcdir/configure $conf_flags "$@" ...
+ $srcdir/configure $conf_flags "$@" \
+ && echo "Now type \`make install' to compile $PROJECT" || exit 1
+else
+ echo Skipping configure process.
+fi
+
diff --git a/benchmark/Makefile.in b/benchmark/Makefile.in
deleted file mode 100644
index f4d421b..0000000
--- a/benchmark/Makefile.in
+++ /dev/null
@@ -1,642 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-#
-# Nothing yet
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = benchmark
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
- $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
- distdir
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-am__relativize = \
- dir0=`pwd`; \
- sed_first='s,^\([^/]*\)/.*$$,\1,'; \
- sed_rest='s,^[^/]*/*,,'; \
- sed_last='s,^.*/\([^/]*\)$$,\1,'; \
- sed_butlast='s,/*[^/]*$$,,'; \
- while test -n "$$dir1"; do \
- first=`echo "$$dir1" | sed -e "$$sed_first"`; \
- if test "$$first" != "."; then \
- if test "$$first" = ".."; then \
- dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
- dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
- else \
- first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
- if test "$$first2" = "$$first"; then \
- dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
- else \
- dir2="../$$dir2"; \
- fi; \
- dir0="$$dir0"/"$$first"; \
- fi; \
- fi; \
- dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
- done; \
- reldir="$$dir2"
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-SUBDIRS = graph src html test-src
-#
-EXTRA_DIST = run.sh
-all: all-recursive
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- $(am__make_dryrun) \
- || test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
- $(am__relativize); \
- new_distdir=$$reldir; \
- dir1=$$subdir; dir2="$(top_distdir)"; \
- $(am__relativize); \
- new_top_distdir=$$reldir; \
- echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
- echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
- ($(am__cd) $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$new_top_distdir" \
- distdir="$$new_distdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- am__skip_mode_fix=: \
- distdir) \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-recursive
-all-am: Makefile
-installdirs: installdirs-recursive
-installdirs-am:
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
- install-am install-strip tags-recursive
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am check check-am clean clean-generic clean-libtool \
- ctags ctags-recursive distclean distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-ps install-ps-am install-strip installcheck \
- installcheck-am installdirs installdirs-am maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
- uninstall uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/graph/Makefile.am b/benchmark/graph/Makefile.am
deleted file mode 100644
index a3e8f83..0000000
--- a/benchmark/graph/Makefile.am
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-#
-# Nothing yet
-EXTRA_DIST=graph_report.sh \
- make_graph_file.pl \
- make_graph.sh
diff --git a/benchmark/graph/Makefile.in b/benchmark/graph/Makefile.in
deleted file mode 100644
index 584b87c..0000000
--- a/benchmark/graph/Makefile.in
+++ /dev/null
@@ -1,444 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = benchmark/graph
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-
-#
-# Nothing yet
-EXTRA_DIST = graph_report.sh \
- make_graph_file.pl \
- make_graph.sh
-
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/graph/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/graph/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags: TAGS
-TAGS:
-
-ctags: CTAGS
-CTAGS:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- distclean distclean-generic distclean-libtool distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/graph/graph_report.sh b/benchmark/graph/graph_report.sh
deleted file mode 100755
index bbcf029..0000000
--- a/benchmark/graph/graph_report.sh
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/bash
-
-#* Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-CURRENT_PATH=`pwd`
-HOME_PATH="${CURRENT_PATH}/.."
-
-
-echo "Choose the architecture to process (`ls $HOME_PATH/Target -I CVS`):"
-read ARCHI
-
-TEST_DIR=`ls $HOME_PATH/Target/$ARCHI -I CVS -I GOTO -I ATLAS -I compilation.log | tail -n 1`
-echo "Processing testing directory [$TEST_DIR]"
-
-TEST_PATH="$HOME_PATH/Target/$ARCHI/$TEST_DIR"
-PRIME=65521
-
-
-if test -d ${ARCHI}; then
- echo "";
-else
- mkdir ${ARCHI}
-fi
-
-f_base=fgemm
-n_base=dgemm
-
-## triangular system
-f_funct=ftrsm
-n_funct=dtrsm
-perl make_graph_file.pl ${TEST_PATH}/ATLAS/timing-check-${f_base}-${PRIME}.txt \
- ${TEST_PATH}/ATLAS/timing-check-${f_funct}-${PRIME}.txt > /tmp/${f_funct}-ATLAS.txt
-perl make_graph_file.pl ${TEST_PATH}/GOTO/timing-check-${f_base}-${PRIME}.txt \
- ${TEST_PATH}/GOTO/timing-check-${f_funct}-${PRIME}.txt > /tmp/${f_funct}-GOTO.txt
-
-perl make_graph_file.pl ${TEST_PATH}/ATLAS/timing-check-${n_base}-${PRIME}.txt \
- ${TEST_PATH}/ATLAS/timing-check-${n_funct}-${PRIME}.txt > /tmp/${n_funct}-ATLAS.txt
-perl make_graph_file.pl ${TEST_PATH}/GOTO/timing-check-${n_base}-${PRIME}.txt \
- ${TEST_PATH}/GOTO/timing-check-${n_funct}-${PRIME}.txt > /tmp/${n_funct}-GOTO.txt
-
-./make_graph.sh /tmp/${f_funct}-ATLAS.txt /tmp/${f_funct}-GOTO.txt \
- /tmp/${n_funct}-ATLAS.txt /tmp/${n_funct}-GOTO.txt \
- "trsm" "triangular system with matrix right hand side" "${ARCHI}"
-
-## LQUP
-f_funct=lqup
-n_funct=dgetrf
-perl make_graph_file.pl ${TEST_PATH}/ATLAS/timing-check-${f_base}-${PRIME}.txt \
- ${TEST_PATH}/ATLAS/timing-check-${f_funct}-${PRIME}.txt > /tmp/${f_funct}-ATLAS.txt
-perl make_graph_file.pl ${TEST_PATH}/GOTO/timing-check-${f_base}-${PRIME}.txt \
- ${TEST_PATH}/GOTO/timing-check-${f_funct}-${PRIME}.txt > /tmp/${f_funct}-GOTO.txt
-
-perl make_graph_file.pl ${TEST_PATH}/ATLAS/timing-check-${n_base}-${PRIME}.txt \
- ${TEST_PATH}/ATLAS/timing-check-${n_funct}-${PRIME}.txt > /tmp/${n_funct}-ATLAS.txt
-perl make_graph_file.pl ${TEST_PATH}/GOTO/timing-check-${n_base}-${PRIME}.txt \
- ${TEST_PATH}/GOTO/timing-check-${n_funct}-${PRIME}.txt > /tmp/${n_funct}-GOTO.txt
-
-./make_graph.sh /tmp/${f_funct}-ATLAS.txt /tmp/${f_funct}-GOTO.txt \
- /tmp/${n_funct}-ATLAS.txt /tmp/${n_funct}-GOTO.txt \
- "lqup" "matrix triangularization" "${ARCHI}"
-
-## INVERSION
-f_funct=inverse
-n_funct=dgetri
-perl make_graph_file.pl ${TEST_PATH}/ATLAS/timing-check-${f_base}-${PRIME}.txt \
- ${TEST_PATH}/ATLAS/timing-check-${f_funct}-${PRIME}.txt > /tmp/${f_funct}-ATLAS.txt
-perl make_graph_file.pl ${TEST_PATH}/GOTO/timing-check-${f_base}-${PRIME}.txt \
- ${TEST_PATH}/GOTO/timing-check-${f_funct}-${PRIME}.txt > /tmp/${f_funct}-GOTO.txt
-
-perl make_graph_file.pl ${TEST_PATH}/ATLAS/timing-check-${n_base}-${PRIME}.txt \
- ${TEST_PATH}/ATLAS/timing-check-${n_funct}-${PRIME}.txt > /tmp/${n_funct}-ATLAS.txt
-perl make_graph_file.pl ${TEST_PATH}/GOTO/timing-check-${n_base}-${PRIME}.txt \
- ${TEST_PATH}/GOTO/timing-check-${n_funct}-${PRIME}.txt > /tmp/${n_funct}-GOTO.txt
-
-./make_graph.sh /tmp/${f_funct}-ATLAS.txt /tmp/${f_funct}-GOTO.txt \
- /tmp/${n_funct}-ATLAS.txt /tmp/${n_funct}-GOTO.txt \
- "inversion" "matrix inversion" "${ARCHI}"
-
-## TRIANGULAR MATRIX INVERSION
-f_funct=ftrtri
-n_funct=dtrtri
-perl make_graph_file.pl ${TEST_PATH}/ATLAS/timing-check-${f_base}-${PRIME}.txt \
- ${TEST_PATH}/ATLAS/timing-check-${f_funct}-${PRIME}.txt > /tmp/${f_funct}-ATLAS.txt
-perl make_graph_file.pl ${TEST_PATH}/GOTO/timing-check-${f_base}-${PRIME}.txt \
- ${TEST_PATH}/GOTO/timing-check-${f_funct}-${PRIME}.txt > /tmp/${f_funct}-GOTO.txt
-
-perl make_graph_file.pl ${TEST_PATH}/ATLAS/timing-check-${n_base}-${PRIME}.txt \
- ${TEST_PATH}/ATLAS/timing-check-${n_funct}-${PRIME}.txt > /tmp/${n_funct}-ATLAS.txt
-perl make_graph_file.pl ${TEST_PATH}/GOTO/timing-check-${n_base}-${PRIME}.txt \
- ${TEST_PATH}/GOTO/timing-check-${n_funct}-${PRIME}.txt > /tmp/${n_funct}-GOTO.txt
-
-./make_graph.sh /tmp/${f_funct}-ATLAS.txt /tmp/${f_funct}-GOTO.txt \
- /tmp/${n_funct}-ATLAS.txt /tmp/${n_funct}-GOTO.txt \
- "trinversion" "triangular matrix inversion" "${ARCHI}"
diff --git a/benchmark/graph/make_graph.sh b/benchmark/graph/make_graph.sh
deleted file mode 100755
index c487618..0000000
--- a/benchmark/graph/make_graph.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-FF_ATLAS=$1
-FF_GOTO=$2
-NUM_ATLAS=$3
-NUM_GOTO=$4
-
-FUNCTION_NAME=$5
-FUNCTION_DESCR=$6
-ARCH=$7
-
-gnuplot <<EOF
-
-
-set encoding iso_8859_1
-set xlabel "Matrix dimension"
-set ylabel "ratio"
-set title "ratio of matrix multiplication / ${FUNCTION_DESCR}"
-
-set terminal postscript enhanced color 18
-set output "${ARCH}/graph-${ARCH}-${FUNCTION_NAME}.eps"
-
-plot [1000:10000][] "$FF_ATLAS" using 1:(\$3/\$2) title "FFLAS/FFPACK (ATLAS)" with lines 1 ,\
- "$FF_GOTO" using 1:(\$3/\$2) title "FFLAS/FFPACK (GOTO)" with lines 2 ,\
- "$NUM_ATLAS" using 1:(\$3/\$2) title "BLAS/LAPACK (ATLAS)" with lines 3 ,\
- "$NUM_GOTO" using 1:(\$3/\$2) title "BLAS/LAPACK (GOTO)" with lines 4
-
-
-
-
-EOF
diff --git a/benchmark/graph/make_graph_file.pl b/benchmark/graph/make_graph_file.pl
deleted file mode 100644
index 3ffc237..0000000
--- a/benchmark/graph/make_graph_file.pl
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-$fichier1 = $ARGV[0];
-$fichier2 = $ARGV[1];
-open(FIC1, $fichier1) or die "Impossible d'ouvrir $fichier...\n";
-open(FIC2, $fichier2) or die "Impossible d'ouvrir $fichier...\n";
-
-while(($l1 = <FIC1>) && ($l2 = <FIC2>)) {
- @t1 = split(/\s+/, $l1); $v1 = $t1[5]; $v0 = $t1[1];
- @t2 = split(/\s+/, $l2); $v2 = $t2[5];
- print "$v0 $v1 $v2\n";
-}
-
-close(FIC1);
-close(FIC2);
diff --git a/benchmark/html/Makefile.in b/benchmark/html/Makefile.in
deleted file mode 100644
index 4f78472..0000000
--- a/benchmark/html/Makefile.in
+++ /dev/null
@@ -1,445 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = benchmark/html
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-
-#
-# Nothing yet
-EXTRA_DIST = fflas.css \
- html_report.sh \
- html_report.xsl \
- process.sh
-
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/html/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/html/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags: TAGS
-TAGS:
-
-ctags: CTAGS
-CTAGS:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- distclean distclean-generic distclean-libtool distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/html/fflas.css b/benchmark/html/fflas.css
deleted file mode 100644
index 316707e..0000000
--- a/benchmark/html/fflas.css
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Written by Clément Pernet <clement.pernet at imag.fr>*/
-/* Copyright (c) FFLAS-FFPACK */
-/* ========LICENCE========*/
-/* This file is part of the library FFLAS-FFPACK.*/
-/**/
-/* FFLAS-FFPACK is free software: you can redistribute it and/or modify*/
-/* it under the terms of the GNU Lesser General Public*/
-/* License as published by the Free Software Foundation; either*/
-/* version 2.1 of the License, or (at your option) any later version.*/
-/**/
-/* This library is distributed in the hope that it will be useful,*/
-/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
-/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU*/
-/* Lesser General Public License for more details.*/
-/**/
-/* You should have received a copy of the GNU Lesser General Public*/
-/* License along with this library; if not, write to the Free Software*/
-/* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA*/
-/* ========LICENCE========*/
-/*/*/
-
-body {
- margin-left: 5%;
- margin-right: 5%;
- background : #eeeeee;
-}
-
-table {
- border : 1;
- margin-left: 10%;
- width: 80%;
-}
-
-td {
- color:#000000;
- padding : 4px 10px;
- text-align:center;
- font-family: georgia;
- font-size : 10 pt;
- font-weight : bold;
-}
-
-td.headdimension {
- background-color : #99CCFF
-}
-
-td.dimension {
- color: #000000;
- text-align:center;
- font-family:georgia;
- font-size : 10 pt;
- font-weight : bold;
- background-color : #dddddd
-}
-
-td.headtime {
- background-color : #CCCCFF
-}
-
-td.time {
- padding : 4px 10px;
- color:#000088;
- text-align:right;
- font-family:georgia;
- font-size : 10 pt;
- font-weight : normal;
- background-color : #ffffff
-}
-
-h2 {
- color: #880000
-}
-
-h3 {
- color: #000088;
- margin: 0.2 10
-}
-
-p
-{
-text-align: center;
-color: black;
-font-family: arial
-}
-
-p.report{
- align: center;
-
-}
diff --git a/benchmark/html/html_report.sh b/benchmark/html/html_report.sh
deleted file mode 100755
index 38109e4..0000000
--- a/benchmark/html/html_report.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/bash
-# Copyright (c) FFLAS-FFPACK
-# Written by Clément Pernet <clement.pernet at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-
-CURRENT_PATH=`pwd`
-HOME_PATH="${CURRENT_PATH}/.."
-
-
-echo "Choose the architecture to process (`ls $HOME_PATH/Target -I CVS`):"
-read ARCHI
-
-echo "Enter the description of the architecture :"
-read ARCHI_DESCR
-
-TEST_DIR=`ls $HOME_PATH/Target/$ARCHI -I GOTO -I ATLAS -I compilation.log -I report.xml| tail -n 1`
-echo "Processing testing directory [$TEST_DIR]"
-
-TEST_PATH="$HOME_PATH/Target/$ARCHI/$TEST_DIR"
-
-PRIME=65521
-
-
-XML_FILE="$TEST_PATH/report.xml"
-HTML_FILE="$CURRENT_PATH/report-${ARCHI}.html"
-cd $TEST_PATH
-
-echo "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>" > ${XML_FILE}
-echo "<benchmark>" >> ${XML_FILE}
-echo "<archi> $ARCHI_DESCR </archi>" >> ${XML_FILE}
-echo "<prime> $PRIME </prime>" >> ${XML_FILE}
-
-${CURRENT_PATH}/process.sh "Matrix Multiplication" timing-check-dgemm-${PRIME}.txt \
- timing-check-fgemm-${PRIME}.txt >> ${XML_FILE}
-
-${CURRENT_PATH}/process.sh "Matrix Triangularization" timing-check-dgetrf-${PRIME}.txt \
- timing-check-lqup-${PRIME}.txt >> ${XML_FILE}
-
-${CURRENT_PATH}/process.sh "Multiple Triangular System Solving" timing-check-dtrsm-${PRIME}.txt \
- timing-check-ftrsm-${PRIME}.txt >> ${XML_FILE}
-
-${CURRENT_PATH}/process.sh "Matrix Inversion" timing-check-dgetri-${PRIME}.txt \
- timing-check-inverse-${PRIME}.txt >> ${XML_FILE}
-
-${CURRENT_PATH}/process.sh "Triangular Matrix Inversion" timing-check-dtrtri-${PRIME}.txt \
- timing-check-ftrtri-${PRIME}.txt >> ${XML_FILE}
-echo "</benchmark>" >> ${XML_FILE}
-
-
-xsltproc -o ${HTML_FILE} $CURRENT_PATH/html_report.xsl ${XML_FILE}
diff --git a/benchmark/html/html_report.xsl b/benchmark/html/html_report.xsl
deleted file mode 100644
index 55a068d..0000000
--- a/benchmark/html/html_report.xsl
+++ /dev/null
@@ -1,72 +0,0 @@
-<!--
-# Copyright (c) FFLAS-FFPACK
-# Written by Clément Pernet <clement.pernet at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
--->
-
-
-<?xml version="1.0" encoding="utf-8"?>
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- version="1.0">
-
- <xsl:output method="html" encoding="ISO-8859-1"
- doctype-public="-//W3C//DTD HTML 4.01//EN"
- doctype-system="http://www.w3.org/TR/html4/strict.dtd"
- indent="yes"/>
-
- <xsl:template match="benchmark">
- <html>
- <head>
- <link href="fflas.css" rel="stylesheet" type="text/css" />
- <title>FFLAS-FFPACK benchmark suite</title>
- </head>
- <body>
- <h1 align="center">FFLAS-FFPACK Benchmark suite</h1>
- <h2 align="center">Testing <xsl:value-of select="archi"/></h2>
- <p align="center">FFLAS-FFPACK computation is done over GF( <xsl:value-of select="prime"/>)</p>
- <xsl:for-each select="test">
- <hr/>
- <p class="report">
- <h3><xsl:value-of select="./@name"/> report</h3>
- <table border="1">
- <tr>
- <td class="headdimension"> Dimension </td>
- <xsl:for-each select="function[1]/run">
- <td class="dimension"> <xsl:value-of select="./@size"/> </td>
- </xsl:for-each>
- </tr>
- <xsl:for-each select="function">
- <tr>
- <td class="headtime"> <xsl:value-of select="./@name"/> with <xsl:value-of select="./@blas"/></td>
- <xsl:for-each select="run">
- <td class="time"> <xsl:value-of select='format-number(., "#.00")'/><i>s</i></td>
- </xsl:for-each>
- </tr>
- </xsl:for-each>
- </table>
- </p>
- </xsl:for-each>
- <hr/>
- <p class="footnote"> page automatically generated - Pascal Giorgi @2007</p>
- </body>
- </html>
- </xsl:template>
-
-</xsl:stylesheet>
diff --git a/benchmark/html/process.sh b/benchmark/html/process.sh
deleted file mode 100755
index 3d89c08..0000000
--- a/benchmark/html/process.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-# Written by Clément Pernet <clement.pernet at imag.fr>
-# Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-# usage : process.sh "test name" blas-result.txt fflas-result.txt
-
-prime=`basename $2 | cut -d "-" -f 4 | cut -d "." -f 1`
-blas_fct=`basename $2 | cut -d "-" -f 3`
-fflas_fct=`basename $3 | cut -d "-" -f 3`
-
-
-echo `pwd`
-echo "<test name=\"$1\">"
-echo -n "<prime> "
-echo -n "$prime "
-echo "</prime>"
-
-echo -n "<function name="
-echo -n "\"$blas_fct\""
-echo " blas=\"GOTO\">"
-awk -F " " '{print "<run size=\42"$2"\42> "$6"</run>" }' GOTO/$2
-echo "</function>"
-
-echo -n "<function name="
-echo -n "\"$fflas_fct\""
-echo " blas=\"GOTO\">"
-awk -F " " '{print "<run size=\42"$2"\42> "$6"</run>" }' GOTO/$3
-echo "</function>"
-
-
-echo -n "<function name="
-echo -n "\"$blas_fct\""
-echo " blas=\"ATLAS\">"
-awk -F " " '{print "<run size=\42"$2"\42> "$6"</run>" }' ATLAS/$2
-echo "</function>"
-
-echo -n "<function name="
-echo -n "\"$fflas_fct\""
-echo " blas=\"ATLAS\">"
-awk -F " " '{print "<run size=\42"$2"\42> "$6"</run>" }' ATLAS/$3
-echo "</function>"
-
-echo "</test>"
-
diff --git a/benchmark/run.sh b/benchmark/run.sh
deleted file mode 100755
index 8757688..0000000
--- a/benchmark/run.sh
+++ /dev/null
@@ -1,150 +0,0 @@
-#!/bin/bash
-# Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-
-TEST_NAME=`date '+%F-%Hh%M'`
-ARCH_NAME=`uname -p`
-CURRENT_PATH=`pwd`
-
-if test "$ARCH_NAME"="unknown"; then
- echo "Enter the architecture name:"
- read ARCH_NAME
-fi;
-
-TARGET_PATH="${CURRENT_PATH}/Target/${ARCH_NAME}"
-TEST_PATH="${TARGET_PATH}/${TEST_NAME}"
-TEST_SRC_PATH="${CURRENT_PATH}/test-src"
-
-DOING_COMPILATION="yes"
-NEW_ARCH="yes"
-
-
-
-if test -d ${TARGET_PATH}; then
- NEW_ARCH="no"
- echo "The $ARCH_NAME architecture has been already tested."
- while [ "$answer" != "yes" -a "$answer" != "no" ]; do
- echo -n "Do you want to perform another test? yes/no : "
- read answer
- done
- if test "$answer" = "no"; then
- echo "You cancelled this script ! Bye Bye ..."
- exit
- else
- answer=" "
- while [ "$answer" != "yes" -a "$answer" != "no" ]; do
- echo -n "Do you want to recompile the sources? yes/no : "
- read answer
- done
- if test "$answer" = "no"; then
- DOING_COMPILATION="no"
- else
- DOING_COMPILATION="yes"
- fi
- fi
- mkdir $-P ${TEST_PATH}
-else
- mkdir -p ${TARGET_PATH}
- mkdir -p ${TEST_PATH}
-fi
-
-echo
-
-if test "${DOING_COMPILATION}" = "yes"; then
-
- echo "Launching compilation..."
- echo " errors will be redirected to ${TARGET_PATH}/compilation.log"
- echo
-
- if test -f ${TARGET_PATH}/compilation.log; then
- rm ${TARGET_PATH}/compilation.log
- fi
-
- export TARGET_PATH
-
-## Launch GOTO test compilation
- BIN_PATH="${TARGET_PATH}/GOTO"
- if test -d ${BIN_PATH}; then
- rm -rf "${BIN_PATH}/*"
- else
- mkdir -p ${BIN_PATH}
- fi
- export BIN_PATH
- cd ${CURRENT_PATH}/src/FFLAS_FFPACK
- echo "Compiling FFLAS_FFPACK with GOTO..."
- echo "Compiling FFLAS_FFPACK with GOTO..." >> ${TARGET_PATH}/compilation.log
- make -k GOTO_LINK=true && echo "compilation done" && echo
- cd ${CURRENT_PATH}/src/BLAS_LAPACK
- echo "Compiling BLAS_LAPACK with GOTO..."
- echo "Compiling BLAS_LAPACK with GOTO..." >> ${TARGET_PATH}/compilation.log
- make -k GOTO_LINK=true && echo "compilation done" && echo
-
-
-## Launch ATLAS test compilation
- BIN_PATH="${TARGET_PATH}/ATLAS"
- if test -d ${BIN_PATH}; then
- rm -rf "${BIN_PATH}/*"
- else
- mkdir -p ${BIN_PATH}
- fi
- export BIN_PATH
- cd ${CURRENT_PATH}/src/FFLAS_FFPACK
- echo "Compiling FFLAS_FFPACK with ATLAS..."
- echo "Compiling FFLAS_FFPACK with ATLAS..." >> ${TARGET_PATH}/compilation.log
- make -k ATLAS_LINK=true && echo "compilation done" && echo
- cd ${CURRENT_PATH}/src/BLAS_LAPACK
- echo "Compiling BLAS_LAPACK with ATLAS..."
- echo "Compiling BLAS_LAPACK with ATLAS..." >> ${TARGET_PATH}/compilation.log
- make -k ATLAS_LINK=true && echo "compilation done" && echo
-else
- echo "Skipping compilation..."
- echo
-fi
-
-
-## launch testing phase
-echo "Launching test..."
-echo
-export TEST_SRC_PATH
-export TEST_PATH
-
-## Run GOTO test
-BIN_PATH="${TARGET_PATH}/GOTO"
-TEST_PATH="${TARGET_PATH}/${TEST_NAME}/GOTO"
-mkdir -p ${TEST_PATH}
-export TEST_PATH
-export BIN_PATH
-echo "running FFLAS_FFPACK tests with GOTO..."
-${TEST_SRC_PATH}/mesure-FFLAS_FFPACK.sh
-echo "running BLAS_LAPACK tests with GOTO..."
-${TEST_SRC_PATH}/mesure-BLAS_LAPACK.sh
-
-
-## Run ATLAS test
-BIN_PATH="${TARGET_PATH}/ATLAS"
-TEST_PATH="${TARGET_PATH}/${TEST_NAME}/ATLAS"
-mkdir -p ${TEST_PATH}
-export TEST_PATH
-export BIN_PATH
-echo "running FFLAS_FFPACK tests with ATLAS..."
-${TEST_SRC_PATH}/mesure-FFLAS_FFPACK.sh
-echo "running BLAS_LAPACK tests with ATLAS..."
-${TEST_SRC_PATH}/mesure-BLAS_LAPACK.sh
diff --git a/benchmark/src/BLAS_LAPACK/Makefile.am b/benchmark/src/BLAS_LAPACK/Makefile.am
deleted file mode 100644
index 892d603..0000000
--- a/benchmark/src/BLAS_LAPACK/Makefile.am
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-SUBDIRS =
-
-BENCHMARK=check-dgemm check-dtrsm check-dtrtri check-dgetri check-dgetrf
-
-bench: $(BENCHMARK)
-
-AM_CPPFLAGS=-I$(top_srcdir)
-AM_CXXFLAGS = @DEFAULT_CFLAGS@
-AM_CPPFLAGS += $(OPTFLAGS) -I$(top_srcdir)/fflas-ffpack/utils/ -I$(top_srcdir)/fflas-ffpack/fflas/ -I$(top_srcdir)/fflas-ffpack/ffpack -I$(top_srcdir)/fflas-ffpack/field $(CBLAS_FLAG) $(GMP_CFLAGS) $(GIVARO_CFLAGS)
-LDADD = $(BLAS_LIBS) $(GIVARO_LIBS) $(GMP_LIBS)
-AM_LDFLAGS=-static
-
-
-
-# EXTRA_DIST=
-
-CLEANFILES= \
- $(BENCHMARK)
-
-EXTRA_PROGRAMS = \
- $(BENCHMARK)
-
-check_dgemm_SOURCES = check-dgemm.C
-check_dtrsm_SOURCES = check-dtrsm.C
-check_dtrtri_SOURCES = check-dtrtri.C
-check_dgetri_SOURCES = check-dgetri.C
-check_dgetrf_SOURCES = check-dgetrf.C
-
-# for compilation of new benches
-FFLASFFPACK_BIN=@bindir@
-%:%.C
- $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) $(OPTFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
diff --git a/benchmark/src/BLAS_LAPACK/Makefile.in b/benchmark/src/BLAS_LAPACK/Makefile.in
deleted file mode 100644
index 14ace5a..0000000
--- a/benchmark/src/BLAS_LAPACK/Makefile.in
+++ /dev/null
@@ -1,743 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-EXTRA_PROGRAMS = $(am__EXEEXT_1)
-subdir = benchmark/src/BLAS_LAPACK
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-am__EXEEXT_1 = check-dgemm$(EXEEXT) check-dtrsm$(EXEEXT) \
- check-dtrtri$(EXEEXT) check-dgetri$(EXEEXT) \
- check-dgetrf$(EXEEXT)
-am_check_dgemm_OBJECTS = check-dgemm.$(OBJEXT)
-check_dgemm_OBJECTS = $(am_check_dgemm_OBJECTS)
-check_dgemm_LDADD = $(LDADD)
-am__DEPENDENCIES_1 =
-check_dgemm_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1)
-am_check_dgetrf_OBJECTS = check-dgetrf.$(OBJEXT)
-check_dgetrf_OBJECTS = $(am_check_dgetrf_OBJECTS)
-check_dgetrf_LDADD = $(LDADD)
-check_dgetrf_DEPENDENCIES = $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
-am_check_dgetri_OBJECTS = check-dgetri.$(OBJEXT)
-check_dgetri_OBJECTS = $(am_check_dgetri_OBJECTS)
-check_dgetri_LDADD = $(LDADD)
-check_dgetri_DEPENDENCIES = $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
-am_check_dtrsm_OBJECTS = check-dtrsm.$(OBJEXT)
-check_dtrsm_OBJECTS = $(am_check_dtrsm_OBJECTS)
-check_dtrsm_LDADD = $(LDADD)
-check_dtrsm_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1)
-am_check_dtrtri_OBJECTS = check-dtrtri.$(OBJEXT)
-check_dtrtri_OBJECTS = $(am_check_dtrtri_OBJECTS)
-check_dtrtri_LDADD = $(LDADD)
-check_dtrtri_DEPENDENCIES = $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
-DEFAULT_INCLUDES = -I. at am__isrc@ -I$(top_builddir)
-depcomp =
-am__depfiles_maybe =
-CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-CXXLD = $(CXX)
-CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
- $(LDFLAGS) -o $@
-SOURCES = $(check_dgemm_SOURCES) $(check_dgetrf_SOURCES) \
- $(check_dgetri_SOURCES) $(check_dtrsm_SOURCES) \
- $(check_dtrtri_SOURCES)
-DIST_SOURCES = $(check_dgemm_SOURCES) $(check_dgetrf_SOURCES) \
- $(check_dgetri_SOURCES) $(check_dtrsm_SOURCES) \
- $(check_dtrtri_SOURCES)
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
- $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
- distdir
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-am__relativize = \
- dir0=`pwd`; \
- sed_first='s,^\([^/]*\)/.*$$,\1,'; \
- sed_rest='s,^[^/]*/*,,'; \
- sed_last='s,^.*/\([^/]*\)$$,\1,'; \
- sed_butlast='s,/*[^/]*$$,,'; \
- while test -n "$$dir1"; do \
- first=`echo "$$dir1" | sed -e "$$sed_first"`; \
- if test "$$first" != "."; then \
- if test "$$first" = ".."; then \
- dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
- dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
- else \
- first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
- if test "$$first2" = "$$first"; then \
- dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
- else \
- dir2="../$$dir2"; \
- fi; \
- dir0="$$dir0"/"$$first"; \
- fi; \
- fi; \
- dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
- done; \
- reldir="$$dir2"
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-SUBDIRS =
-BENCHMARK = check-dgemm check-dtrsm check-dtrtri check-dgetri check-dgetrf
-AM_CPPFLAGS = -I$(top_srcdir) $(OPTFLAGS) \
- -I$(top_srcdir)/fflas-ffpack/utils/ \
- -I$(top_srcdir)/fflas-ffpack/fflas/ \
- -I$(top_srcdir)/fflas-ffpack/ffpack \
- -I$(top_srcdir)/fflas-ffpack/field $(CBLAS_FLAG) $(GMP_CFLAGS) \
- $(GIVARO_CFLAGS)
-AM_CXXFLAGS = @DEFAULT_CFLAGS@
-LDADD = $(BLAS_LIBS) $(GIVARO_LIBS) $(GMP_LIBS)
-AM_LDFLAGS = -static
-
-# EXTRA_DIST=
-CLEANFILES = \
- $(BENCHMARK)
-
-check_dgemm_SOURCES = check-dgemm.C
-check_dtrsm_SOURCES = check-dtrsm.C
-check_dtrtri_SOURCES = check-dtrtri.C
-check_dgetri_SOURCES = check-dgetri.C
-check_dgetrf_SOURCES = check-dgetrf.C
-
-# for compilation of new benches
-FFLASFFPACK_BIN = @bindir@
-all: all-recursive
-
-.SUFFIXES:
-.SUFFIXES: .C .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/src/BLAS_LAPACK/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/src/BLAS_LAPACK/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-check-dgemm$(EXEEXT): $(check_dgemm_OBJECTS) $(check_dgemm_DEPENDENCIES) $(EXTRA_check_dgemm_DEPENDENCIES)
- @rm -f check-dgemm$(EXEEXT)
- $(CXXLINK) $(check_dgemm_OBJECTS) $(check_dgemm_LDADD) $(LIBS)
-check-dgetrf$(EXEEXT): $(check_dgetrf_OBJECTS) $(check_dgetrf_DEPENDENCIES) $(EXTRA_check_dgetrf_DEPENDENCIES)
- @rm -f check-dgetrf$(EXEEXT)
- $(CXXLINK) $(check_dgetrf_OBJECTS) $(check_dgetrf_LDADD) $(LIBS)
-check-dgetri$(EXEEXT): $(check_dgetri_OBJECTS) $(check_dgetri_DEPENDENCIES) $(EXTRA_check_dgetri_DEPENDENCIES)
- @rm -f check-dgetri$(EXEEXT)
- $(CXXLINK) $(check_dgetri_OBJECTS) $(check_dgetri_LDADD) $(LIBS)
-check-dtrsm$(EXEEXT): $(check_dtrsm_OBJECTS) $(check_dtrsm_DEPENDENCIES) $(EXTRA_check_dtrsm_DEPENDENCIES)
- @rm -f check-dtrsm$(EXEEXT)
- $(CXXLINK) $(check_dtrsm_OBJECTS) $(check_dtrsm_LDADD) $(LIBS)
-check-dtrtri$(EXEEXT): $(check_dtrtri_OBJECTS) $(check_dtrtri_DEPENDENCIES) $(EXTRA_check_dtrtri_DEPENDENCIES)
- @rm -f check-dtrtri$(EXEEXT)
- $(CXXLINK) $(check_dtrtri_OBJECTS) $(check_dtrtri_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
-.C.o:
- $(CXXCOMPILE) -c -o $@ $<
-
-.C.obj:
- $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-.C.lo:
- $(LTCXXCOMPILE) -c -o $@ $<
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- $(am__make_dryrun) \
- || test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
- $(am__relativize); \
- new_distdir=$$reldir; \
- dir1=$$subdir; dir2="$(top_distdir)"; \
- $(am__relativize); \
- new_top_distdir=$$reldir; \
- echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
- echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
- ($(am__cd) $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$new_top_distdir" \
- distdir="$$new_distdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- am__skip_mode_fix=: \
- distdir) \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-recursive
-all-am: Makefile
-installdirs: installdirs-recursive
-installdirs-am:
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
- -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
- mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
- install-am install-strip tags-recursive
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am check check-am clean clean-generic clean-libtool \
- ctags ctags-recursive distclean distclean-compile \
- distclean-generic distclean-libtool distclean-tags distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs installdirs-am \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
- pdf pdf-am ps ps-am tags tags-recursive uninstall uninstall-am
-
-
-bench: $(BENCHMARK)
-%:%.C
- $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) $(OPTFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/src/BLAS_LAPACK/check-dgemm.C b/benchmark/src/BLAS_LAPACK/check-dgemm.C
deleted file mode 100644
index e6c3e33..0000000
--- a/benchmark/src/BLAS_LAPACK/check-dgemm.C
+++ /dev/null
@@ -1,96 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-//#include "goto-def.h"
-
-/* Copyright (c) FFLAS-FFPACK
-* Written by Clément Pernet <clement.pernet at imag.fr>
-* ========LICENCE========
-* This file is part of the library FFLAS-FFPACK.
-*
-* FFLAS-FFPACK is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public
-* License as published by the Free Software Foundation; either
-* version 2.1 of the License, or (at your option) any later version.
-*
-* This library is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this library; if not, write to the Free Software
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-* ========LICENCE========
-*/
-
-#include <iostream>
-
-#include "fflas-ffpack/config-blas.h"
-#include "fflas-ffpack/fflas/fflas.h"
-#include "fflas-ffpack/field/modular-balanced.h"
-#include "fflas-ffpack/utils/timer.h"
-#include "fflas-ffpack/utils/Matio.h"
-
-using namespace std;
-
-int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file1, file2
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- typedef FFPACK::Modular<double> Field;
- // typedef FFPACK::ModularBalanced<double> Field;
- typedef Field::Element Element;
-
- Field F(p);
-
- Timer chrono;
- double time=0.0;// time2=0.0;
-
- Element * A, * B, * C;
-
- for (size_t i=0;i<iter;++i){
-
- if (argc > 4){
- A = read_field (F, argv[4], &n, &n);
- }
- else{
- Field::RandIter G(F);
- A = new Element[n*n];
- for (size_t j=0; j<(size_t)n*n; ++j)
- G.random (*(A+j));
- }
-
- if (argc == 6){
- B = read_field (F, argv[5], &n, &n);
- }
- else{
- Field::RandIter G(F);
- B = new Element[n*n];
- for (size_t j=0; j<(size_t)n*n; ++j)
- G.random(*(B+j));
- }
-
- C = new Element[n*n];
-
- chrono.clear();
- chrono.start();
- cblas_dgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, n,n,n, F.one,
- A, n, B, n, F.zero, C,n);
- chrono.stop();
- time+=chrono.usertime();
-
- delete[] A;
- delete[] B;
- delete[] C;
- }
-
- std::cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<std::endl;
-
- return 0;
-}
-
diff --git a/benchmark/src/BLOCKING/Makefile b/benchmark/src/BLOCKING/Makefile
deleted file mode 100644
index d21b54f..0000000
--- a/benchmark/src/BLOCKING/Makefile
+++ /dev/null
@@ -1,450 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# benchmark/src/BLOCKING/Makefile. Generated from Makefile.in by configure.
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-
-
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/fflas-ffpack
-pkgincludedir = $(includedir)/fflas-ffpack
-pkglibdir = $(libdir)/fflas-ffpack
-pkglibexecdir = $(libexecdir)/fflas-ffpack
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = x86_64-unknown-linux-gnu
-host_triplet = x86_64-unknown-linux-gnu
-subdir = benchmark/src/BLOCKING
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = ${SHELL} /home/bruce/Fflas/build-aux/missing --run aclocal-1.11 -I macros
-AMTAR = $${TAR-tar}
-AR = ar
-AUTOCONF = ${SHELL} /home/bruce/Fflas/build-aux/missing --run autoconf
-AUTOHEADER = ${SHELL} /home/bruce/Fflas/build-aux/missing --run autoheader
-AUTOMAKE = ${SHELL} /home/bruce/Fflas/build-aux/missing --run automake-1.11
-AWK = gawk
-BLAS_FOUND = true
-BLAS_LIBS = -lcblas -latlas -llapack
-BLAS_PATH = /usr/lib
-BLAS_VENDOR = ATLAS
-CBLAS_FLAG = -D__FFLASFFPACK_HAVE_CBLAS
-CC = gcc
-CCNAM = gcc
-CFLAGS = -g -O2
-CPPFLAGS =
-CXX = g++
-CXXCPP = g++ -E
-CXXFLAGS =
-CYGPATH_W = echo
-DBG = yes
-DEBUG_CFLAGS = -g -DDEBUG -DFFLASFFPACK_DEBUG
-DEFAULT_CFLAGS = -O0 -pipe -Wall -Wextra -Wno-unused-parameter -Wuninitialized -Wconversion -Wcast-qual -ansi -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -g -DDEBUG -DFFLASFFPACK_DEBUG
-DEFS = -DHAVE_CONFIG_H
-DLLTOOL = false
-DSYMUTIL =
-DUMPBIN =
-ECHO_C =
-ECHO_N = -n
-ECHO_T =
-EGREP = /bin/grep -E
-EXEEXT =
-FFLASFFPACK_DOC_PATH = /tmp/docs
-FGREP = /bin/grep -F
-GIVARO_CFLAGS =
-GIVARO_LIBS =
-GMP_CFLAGS =
-GMP_LIBS = -lgmpxx -lgmp
-GMP_VERSION =
-GREP = /bin/grep
-INSTALL = /usr/bin/install -c
-INSTALL_DATA = ${INSTALL} -m 644
-INSTALL_PROGRAM = ${INSTALL}
-INSTALL_SCRIPT = ${INSTALL}
-INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
-LAPACK_LIBS = -llapack
-LD = /usr/x86_64-pc-linux-gnu/bin/ld -m elf_x86_64
-LDFLAGS =
-LIBOBJS =
-LIBS =
-LIBTOOL = $(SHELL) $(top_builddir)/libtool
-LIPO =
-LN_S = ln -s
-LTLIBOBJS =
-MAINT =
-MAKEINFO = ${SHELL} /home/bruce/Fflas/build-aux/missing --run makeinfo
-MANIFEST_TOOL = :
-MKDIR_P = /bin/mkdir -p
-NM = /usr/bin/nm -B
-NMEDIT =
-OBJDUMP = objdump
-OBJEXT = o
-OTOOL =
-OTOOL64 =
-PACKAGE = fflas-ffpack
-PACKAGE_BUGREPORT = ffpack-devel at googlegroups.com
-PACKAGE_NAME = FFLAS-FFPACK
-PACKAGE_STRING = FFLAS-FFPACK 1.6.0
-PACKAGE_TARNAME = fflas-ffpack
-PACKAGE_URL = http://www.linalg.org/projects/fflas-ffpack
-PACKAGE_VERSION = 1.6.0
-PATH_SEPARATOR = :
-PROF = no
-RANLIB = ranlib
-RM = /bin/rm -f
-SED = /bin/sed
-SET_MAKE =
-SHELL = /bin/sh
-STRIP = strip
-TESTS_CFLAGS = -O0 -Wall -Wextra -Wno-unused-parameter -Wuninitialized -Wconversion -Wcast-qual -ansi -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -g -DDEBUG -DFFLASFFPACK_DEBUG
-VERSION = 1.6.0
-WARN = full
-abs_builddir = /home/bruce/Fflas/benchmark/src/BLOCKING
-abs_srcdir = /home/bruce/Fflas/benchmark/src/BLOCKING
-abs_top_builddir = /home/bruce/Fflas
-abs_top_srcdir = /home/bruce/Fflas
-ac_ct_AR = ar
-ac_ct_CC = gcc
-ac_ct_CXX = g++
-ac_ct_DUMPBIN =
-am__leading_dot = .
-am__tar = $${TAR-tar} chof - "$$tardir"
-am__untar = $${TAR-tar} xf -
-bindir = ${exec_prefix}/bin
-build = x86_64-unknown-linux-gnu
-build_alias =
-build_cpu = x86_64
-build_os = linux-gnu
-build_vendor = unknown
-builddir = .
-datadir = ${datarootdir}
-datarootdir = ${prefix}/share
-docdir = ${datarootdir}/doc/${PACKAGE_TARNAME}
-dvidir = ${docdir}
-exec_prefix = ${prefix}
-host = x86_64-unknown-linux-gnu
-host_alias =
-host_cpu = x86_64
-host_os = linux-gnu
-host_vendor = unknown
-htmldir = ${docdir}
-includedir = ${prefix}/include
-infodir = ${datarootdir}/info
-install_sh = ${SHELL} /home/bruce/Fflas/build-aux/install-sh
-libdir = ${exec_prefix}/lib
-libexecdir = ${exec_prefix}/libexec
-localedir = ${datarootdir}/locale
-localstatedir = ${prefix}/var
-mandir = ${datarootdir}/man
-mkdir_p = /bin/mkdir -p
-oldincludedir = /usr/include
-pdfdir = ${docdir}
-prefix = /tmp
-program_transform_name = s,x,x,
-psdir = ${docdir}
-sbindir = ${exec_prefix}/sbin
-sharedstatedir = ${prefix}/com
-srcdir = .
-sysconfdir = ${prefix}/etc
-target_alias =
-top_build_prefix = ../../../
-top_builddir = ../../..
-top_srcdir = ../../..
-
-#
-# Nothing yet
-EXTRA_DIST = gnucommand Makefile Makefile.am mesure.sh mulMM.C plot1-mulMM tblockmat.C
-OPTFLAGS = -O7 -funroll-all-loops -frerun-loop-opt \
- -fexpensive-optimizations -felide-constructors -ffast-math \
- -fforce-addr -fforce-mem -fstrength-reduce -fstrict-aliasing
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/src/BLOCKING/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/src/BLOCKING/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags: TAGS
-TAGS:
-
-ctags: CTAGS
-CTAGS:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- distclean distclean-generic distclean-libtool distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am
-
-#OPTFLAGS+=-mtune=pentium -march=pentium
-
-tblockmat-%:tblockmat.C
- ${CXX} ${OPTFLAGS} ${OPT} -o $@ -DNDIM=$* $< ${CXXFLAGS} ${LDFLAGS} ${LOADLIBES}
-tblockmatdouble-%:tblockmatdouble.C
- ${CXX} ${OPTFLAGS} ${OPT} -o $@ -DNDIM=$* $< ${CXXFLAGS} ${LDFLAGS} ${LOADLIBES}
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/src/BLOCKING/Makefile.am b/benchmark/src/BLOCKING/Makefile.am
deleted file mode 100644
index 6036dea..0000000
--- a/benchmark/src/BLOCKING/Makefile.am
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-#
-# Nothing yet
-EXTRA_DIST=gnucommand Makefile Makefile.am mesure.sh mulMM.C plot1-mulMM tblockmat.C
-
-OPTFLAGS= -O7
-OPTFLAGS+=-funroll-all-loops
-OPTFLAGS+=-frerun-loop-opt
-OPTFLAGS+=-fexpensive-optimizations -felide-constructors
-OPTFLAGS+=-ffast-math
-OPTFLAGS+=-fforce-addr -fforce-mem -fstrength-reduce
-OPTFLAGS+=-fstrict-aliasing
-#OPTFLAGS+=-mtune=pentium -march=pentium
-
-tblockmat-%:tblockmat.C
- ${CXX} ${OPTFLAGS} ${OPT} -o $@ -DNDIM=$* $< ${CXXFLAGS} ${LDFLAGS} ${LOADLIBES}
-tblockmatdouble-%:tblockmatdouble.C
- ${CXX} ${OPTFLAGS} ${OPT} -o $@ -DNDIM=$* $< ${CXXFLAGS} ${LDFLAGS} ${LOADLIBES}
diff --git a/benchmark/src/BLOCKING/Makefile.in b/benchmark/src/BLOCKING/Makefile.in
deleted file mode 100644
index c606c66..0000000
--- a/benchmark/src/BLOCKING/Makefile.in
+++ /dev/null
@@ -1,450 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = benchmark/src/BLOCKING
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-
-#
-# Nothing yet
-EXTRA_DIST = gnucommand Makefile Makefile.am mesure.sh mulMM.C plot1-mulMM tblockmat.C
-OPTFLAGS = -O7 -funroll-all-loops -frerun-loop-opt \
- -fexpensive-optimizations -felide-constructors -ffast-math \
- -fforce-addr -fforce-mem -fstrength-reduce -fstrict-aliasing
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/src/BLOCKING/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/src/BLOCKING/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags: TAGS
-TAGS:
-
-ctags: CTAGS
-CTAGS:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- distclean distclean-generic distclean-libtool distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am
-
-#OPTFLAGS+=-mtune=pentium -march=pentium
-
-tblockmat-%:tblockmat.C
- ${CXX} ${OPTFLAGS} ${OPT} -o $@ -DNDIM=$* $< ${CXXFLAGS} ${LDFLAGS} ${LOADLIBES}
-tblockmatdouble-%:tblockmatdouble.C
- ${CXX} ${OPTFLAGS} ${OPT} -o $@ -DNDIM=$* $< ${CXXFLAGS} ${LDFLAGS} ${LOADLIBES}
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/src/BLOCKING/gnucommand b/benchmark/src/BLOCKING/gnucommand
deleted file mode 100644
index ed9342c..0000000
--- a/benchmark/src/BLOCKING/gnucommand
+++ /dev/null
@@ -1,49 +0,0 @@
-
-# Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-#set encoding iso_8859_1
-set xlabel "Matrix order"
-set ylabel "Mfops "
-set title "Classic matrix multiplication over Z/65521Z on a XEON, 3.6 GHz"
-#set title "Classic matrix multiplication over Z/65521Z on a PIV, 3.4 GHz" tc lt 2
-set key below
-#set logscale y
-#set logscale x
-#set xtics tc lt 2
-#set ytics tc lt 2
-#set grid noxtics ytics lt 2
-set grid noxtics ytics
-set border 4095 lt 7
-set style line 5 pt 2
-
-
-plot [] [0:8999] "dgemm-goto-xeon" using 2:(($2/$6*$2/1000)*(2*$2-1)/1000) title "GOTO::dgemm" with linespoint 2
-replot "fgemm-goto-xeon" using 2:(($2/$6*$2/1000)*(2*$2-1)/1000) title "FFLAS::fgemm" with lines 7
-replot "blockmod-40" using 1:3 title "long-block-40" with linespoint 1
-#replot "blockmod-40" using 1:5 title "mod-block-40" with linespoint 3
-replot "plot1-mulMM" using 1:3 title "long-noblock" with linespoint 5
-#replot "plot1-mulMM" using 1:3 title "StandardMod" with linespoint 4
-set terminal postscript eps enhanced color solid lw 4
-set output "gemm.eps"
-replot
-#set terminal x11
-#replot
-#pause(-1)
diff --git a/benchmark/src/BLOCKING/mesure.sh b/benchmark/src/BLOCKING/mesure.sh
deleted file mode 100755
index 2358e0a..0000000
--- a/benchmark/src/BLOCKING/mesure.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/csh -f
-# Copyright (c) FFLAS-FFPACK
-# Written by T. Gautier, jgdumas
-# adapted from Givaro.
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-
-set block = $1
-
-#foreach i (10 12 14 16 18 20 22 24 26 28 30 50 75 100 125 150)
-foreach i (175 200 225)
- @ N = $i * $block
- tblockmat-$block $N 1
-end
-
diff --git a/benchmark/src/BLOCKING/mulMM.C b/benchmark/src/BLOCKING/mulMM.C
deleted file mode 100644
index e60ca03..0000000
--- a/benchmark/src/BLOCKING/mulMM.C
+++ /dev/null
@@ -1,185 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* Copyright (c) FFLAS-FFPACK
-* ========LICENCE========
-* This file is part of the library FFLAS-FFPACK.
-*
-* FFLAS-FFPACK is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public
-* License as published by the Free Software Foundation; either
-* version 2.1 of the License, or (at your option) any later version.
-*
-* This library is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this library; if not, write to the Free Software
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-* ========LICENCE========
-*/
-
-
-#include <iostream>
-#include <fstream>
-#include <time.h>
-#include <stdlib.h>
-clock_t totaltime;
-
-
-typedef long Base;
-long P;
-double dP;
-
-inline void reduce(long& r) { r %= P; }
-//inline void reduce(double& r) { r = fmod(r,dP); }
-
-
-template<class T>
-class Matrix {
- const int sq;
- const int size;
-public:
- Matrix(int s): size(s),sq(s*s), _data(new T[sq]) {};
- int rowdim() const { return size; }
- int coldim() const { return size; }
- inline T& operator[](int i) { return _data[i]; }
- inline T& operator()(int i, int j) { return _data[i*size+j]; }
- inline const T& operator()(int i, int j) const { return _data[i*size+j]; }
- inline Matrix<T>& operator= (const T& val )
- { for (int i=size*size; --i; ) _data[i] =val; return *this; }
- inline void mul( const Matrix<T>& A, const Matrix<T>& B)
- {
- register const T* Ai = A._data;
- T* Ci = _data;
- for (int i=size; --i; Ai += size, Ci += size)
- {
- for (int j=size; --j; )
- {
- register const T* Bj = &B._data[j];
- T sum =0;
- for (int k=size; --k; Bj+=size)
- sum += Ai[k] * *Bj;
- Ci[j] = sum;
- }
- }
- }
-
- inline void mulmod( const Matrix<T>& A, const Matrix<T>& B)
- {
- mul(A,B);
- for(int i=0; i<this->sq; ++i)
- reduce(this->_data[i]);
- }
-
-
- inline void addmul( const Matrix<T>& A, const Matrix<T>& B)
- {
- register const T* Ai = A._data;
- T* Ci = _data;
- for (int i=size; --i; Ai += size, Ci += size)
- {
- for (int j=size; --j; )
- {
- register const T* Bj = &B._data[j];
- T sum =0;
- for (int k=size; --k; Bj+=size)
- sum += Ai[k] * *Bj;
- Ci[j] += sum;
- }
- }
- }
- inline void addmulmod( const Matrix<T>& A, const Matrix<T>& B)
- {
- addmul(A,B);
- for(int i=0; i<this->sq; ++i)
- reduce(this->_data[i]);
- }
-
-private:
- T * _data;
-};
-
-
-template<class T>
-void print_mat( int dim, const T* A)
-{
- int i,j;
- for (i=0; i<dim; i++)
- {
- for (j=0; j<dim; j++)
- std::cout << A[i+j*dim] << " ";
- std::cout << std::endl;
- }
-}
-
-
-int main(int argc, char** argv)
-{
- int DIM = atoi(argv[1]) ;
- int NB = (argc>2?atoi(argv[2]):1) ;
- P = (argc>3?atoi(argv[3]):65521);
- dP = (double)P;
- char * nomfich = new char[6+strlen(argv[0])]; sprintf(nomfich, "plot1-%s", argv[0]);
- std::cerr << nomfich << std::endl;
- std::ofstream plot1(nomfich, std::ios::app);
- delete [] nomfich;
-
- Matrix<Base> AA(DIM);
- Matrix<Base> BB(DIM);
- Matrix<Base> CC(DIM);
- int i,j ;
- srand((int)clock());
- int MAXITER = 3;
- double MoyenneDesTemps = 0.0;
- double MoyenneDesTempsMod = 0.0;
- double coef = DIM;
- coef = coef*coef*(2.0*coef-1)*1e-6*NB;
- double seconds;
-
- for (int k=0; k<MAXITER; k++) {
- for (i=0; i<DIM; i++) {
- for (j=0 ; j<DIM ; j++) {
- AA(i,j) = rand();
- BB(i,j) = rand();
- CC(i,j) = rand();
- }
- }
-
- CC.mul(AA,BB);
- totaltime=clock();
- for (i=0; i<NB; i++) CC.mul(AA,BB);
- totaltime=clock()-totaltime;
- seconds = (double)totaltime/CLOCKS_PER_SEC;
-
- std::cerr << "\nStandard Dim:" << DIM << ", Nb: " << NB << std::endl;
- std::cerr << "Mops:" << coef/seconds << std::endl;
- std::cerr << "time:" << seconds/NB << std::endl;
- MoyenneDesTemps += seconds;
-
-
-
- CC.mulmod(AA,BB);
- totaltime=clock();
- for (i=0; i<NB; i++) CC.mulmod(AA,BB);
- totaltime=clock()-totaltime;
- seconds = (double)totaltime/CLOCKS_PER_SEC;
-
- std::cerr << "\nStandardMod Dim:" << DIM << ", Nb: " << NB << std::endl;
- std::cerr << "Mops:" << coef/seconds << std::endl;
- std::cerr << "time:" << seconds/NB << std::endl;
- MoyenneDesTempsMod += seconds;
- }
-
- MoyenneDesTemps /= (double)(MAXITER);
- MoyenneDesTempsMod /= (double)(MAXITER);
-
-
- plot1 << DIM << '\t' << MoyenneDesTemps/(double)NB << '\t' << coef/MoyenneDesTemps;
- plot1 << '\t' << MoyenneDesTempsMod/(double)NB << '\t' << coef/MoyenneDesTempsMod << std::endl;
- plot1.close();
-
- return 0 ;
-};
diff --git a/benchmark/src/BLOCKING/plot1-mulMM b/benchmark/src/BLOCKING/plot1-mulMM
deleted file mode 100644
index d695b42..0000000
--- a/benchmark/src/BLOCKING/plot1-mulMM
+++ /dev/null
@@ -1,21 +0,0 @@
-100 0.00183333 1085.45 0.00166667 1194
-160 0.01 816.64 0.0095 859.621
-200 0.0125 1276.8 0.0121667 1311.78
-240 0.0288333 956.892 0.025 1103.62
-280 0.0783333 559.476 0.0791667 553.587
-320 0.1195 547.562 0.120167 544.524
-360 0.185333 502.783 0.179 520.572
-400 0.2545 502.318 0.256167 499.05
-480 0.4925 448.637 0.494333 446.973
-500 0.604167 413.379 0.598 417.642
-520 0.7 401.351 0.703333 399.449
-560 0.91 385.625 0.913333 384.217
-640 1.54333 339.446 1.53 342.404
-720 2.72 274.256 2.75667 270.609
-800 3.70667 276.086 3.75667 272.412
-880 5.56667 244.701 5.57 244.555
-960 7.34333 240.838 7.35333 240.51
-1000 8.97667 222.688 8.98 222.606
-2000 79.6067 200.938 79.7733 200.518
-3000 287.157 188.019 288.177 187.354
-5000 1515.07 164.992
diff --git a/benchmark/src/BLOCKING/tblockmat.C b/benchmark/src/BLOCKING/tblockmat.C
deleted file mode 100644
index 975db18..0000000
--- a/benchmark/src/BLOCKING/tblockmat.C
+++ /dev/null
@@ -1,230 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-
-/* Copyright (c) FFLAS-FFPACK
-* Written by T. Gautier, jgdumas
-* adapted from Givaro.
-* ========LICENCE========
-* This file is part of the library FFLAS-FFPACK.
-*
-* FFLAS-FFPACK is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public
-* License as published by the Free Software Foundation; either
-* version 2.1 of the License, or (at your option) any later version.
-*
-* This library is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this library; if not, write to the Free Software
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-* ========LICENCE========
-*/
-
-
-#include <iostream>
-#include <fstream>
-#include <time.h>
-#include <stdlib.h>
-clock_t totaltime;
-
-
-// Taille des blocs
-#ifndef NDIM
-#define NDIM TOREPLACE
-#endif
-
-
-typedef long Base;
-long P;
-double dP;
-
-inline void reduce(long& r) { r %= P; }
-//inline void reduce(double& r) { r = fmod(r,dP); }
-
-
-template<class T, int size>
-class FixedMatrix {
- const int sq;
-public:
- FixedMatrix():sq(size*size) {};
- int rowdim() const { return size; }
- int coldim() const { return size; }
- inline T& operator[](int i) { return _data[i]; }
- inline T& operator()(int i, int j) { return _data[i*size+j]; }
- inline const T& operator()(int i, int j) const { return _data[i*size+j]; }
- inline FixedMatrix<T,size>& operator= (const T& val )
- { for (int i=size*size; --i; ) _data[i] =val; return *this; }
- inline void mul( const FixedMatrix<T,size>& A, const FixedMatrix<T,size>& B)
- {
- register const T* Ai = A._data;
- T* Ci = _data;
- for (int i=size; --i; Ai += size, Ci += size)
- {
- for (int j=size; --j; )
- {
- register const T* Bj = &B._data[j];
- T sum =0;
- for (int k=size; --k; Bj+=size)
- sum += Ai[k] * *Bj;
- Ci[j] = sum;
- }
- }
- }
- inline void addmul( const FixedMatrix<T,size>& A, const FixedMatrix<T,size>& B)
- {
- register const T* Ai = A._data;
- T* Ci = _data;
- for (int i=size; --i; Ai += size, Ci += size)
- {
- for (int j=size; --j; )
- {
- register const T* Bj = &B._data[j];
- T sum =0;
- for (int k=size; --k; Bj+=size)
- sum += Ai[k] * *Bj;
- Ci[j] += sum;
- }
- }
- }
- inline void addmulmod( const FixedMatrix<T,size>& A, const FixedMatrix<T,size>& B)
- {
- addmul(A,B);
- for(int i=0; i<this->sq; ++i)
- reduce(this->_data[i]);
- }
-
-private:
- T _data[size*size];
-};
-
-
-
-template<class T>
-class Matrix {
-public:
- Matrix(int dim) {
- _dim = dim;
- _nblock = dim / NDIM;
- _data = new FixedMatrix<T,NDIM>[_nblock*_nblock];
- }
- ~Matrix() { delete [] _data; }
- int rowdim() const { return _dim; }
- int coldim() const { return _dim; }
- inline FixedMatrix<T,NDIM>& operator[](int i)
- { return _data[i]; }
- inline FixedMatrix<T,NDIM>& operator()(int i, int j)
- { return _data[i*_nblock+j]; }
- inline const FixedMatrix<T,NDIM>& operator()(int i, int j) const
- { return _data[i*_nblock+j]; }
- inline void mul( const Matrix<T>& A, const Matrix<T>& B)
- {
- for (int i=0; i<_nblock; ++i )
- for (int j=0; j<_nblock; ++j )
- {
- (*this)(i,j)=0;
- for (int k=0; k<_nblock; ++k ) {
- (*this)(i,j).addmul( A(i,k), B(k,j));
- }
- }
- }
- inline void mulmod( const Matrix<T>& A, const Matrix<T>& B)
- {
- for (int i=0; i<_nblock; ++i )
- for (int j=0; j<_nblock; ++j )
- {
- (*this)(i,j)=0;
- for (int k=0; k<_nblock; ++k ) {
- (*this)(i,j).addmulmod( A(i,k), B(k,j));
- }
- }
- }
-private:
- int _dim;
- int _nblock;
- FixedMatrix<T,NDIM>* _data;
-};
-
-
-template<class T>
-void print_mat( int dim, const T* A)
-{
- int i,j;
- for (i=0; i<dim; i++)
- {
- for (j=0; j<dim; j++)
- std::cout << A[i+j*dim] << " ";
- std::cout << std::endl;
- }
-}
-
-
-int main(int argc, char** argv)
-{
- int DIM = atoi(argv[1]) ;
- int NB = (argc>2?atoi(argv[2]):1) ;
- P = (argc>3?atoi(argv[3]):65521);
- dP = (double)P;
- char * nomfich = new char[6+strlen(argv[0])]; sprintf(nomfich, "plot1-%s", argv[0]);
- std::cerr << nomfich << std::endl;
- std::ofstream plot1(nomfich, std::ios::app);
- delete [] nomfich;
-
- Matrix<Base> AA(DIM);
- Matrix<Base> BB(DIM);
- Matrix<Base> CC(DIM);
- int i,j ;
- srand((int)clock());
- int MAXITER = 3;
- double MoyenneDesTemps = 0.0;
- double MoyenneDesTempsMod = 0.0;
- double coef = DIM;
- coef = coef*coef*(2.0*coef-1)*1e-6*NB;
- double seconds;
-
- for (int k=0; k<MAXITER; k++) {
- for (i=0; i<(DIM*DIM)/NDIM/NDIM; i++) {
- for (j=0 ; j<NDIM*NDIM ; j++) {
- AA[i][j] = rand();
- BB[i][j] = rand();
- CC[i][j] = rand();
- }
- }
-
- CC.mul(AA,BB);
- totaltime=clock();
- for (i=0; i<NB; i++) CC.mul(AA,BB);
- totaltime=clock()-totaltime;
- seconds = (double)totaltime/CLOCKS_PER_SEC;
-
- std::cerr << "\nMUL3 Dim:" << DIM << ", Nb: " << NB << std::endl;
- std::cerr << "Mops:" << coef/seconds << std::endl;
- std::cerr << "time:" << seconds/NB << std::endl;
- MoyenneDesTemps += seconds;
-
-
-
- CC.mulmod(AA,BB);
- totaltime=clock();
- for (i=0; i<NB; i++) CC.mulmod(AA,BB);
- totaltime=clock()-totaltime;
- seconds = (double)totaltime/CLOCKS_PER_SEC;
-
- std::cerr << "\nMULMOD3 Dim:" << DIM << ", Nb: " << NB << std::endl;
- std::cerr << "Mops:" << coef/seconds << std::endl;
- std::cerr << "time:" << seconds/NB << std::endl;
- MoyenneDesTempsMod += seconds;
- }
-
- MoyenneDesTemps /= (double)(MAXITER);
- MoyenneDesTempsMod /= (double)(MAXITER);
-
-
- plot1 << DIM << '\t' << MoyenneDesTemps/(double)NB << '\t' << coef/MoyenneDesTemps;
- plot1 << '\t' << MoyenneDesTempsMod/(double)NB << '\t' << coef/MoyenneDesTempsMod << std::endl;
- plot1.close();
- return 0 ;
-};
diff --git a/benchmark/src/FFLAS_FFPACK/Makefile.am b/benchmark/src/FFLAS_FFPACK/Makefile.am
deleted file mode 100644
index 81d7180..0000000
--- a/benchmark/src/FFLAS_FFPACK/Makefile.am
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-SUBDIRS =
-
-BENCHMARK=check-fgemm check-ftrsm check-ftrtri check-inverse check-lqup \
- check-wino
-
-bench: $(BENCHMARK)
-
-AM_CPPFLAGS=-I$(top_srcdir)
-AM_CXXFLAGS = @DEFAULT_CFLAGS@
-AM_CPPFLAGS += $(OPTFLAGS) -I$(top_srcdir)/fflas-ffpack/utils/ -I$(top_srcdir)/fflas-ffpack/fflas/ -I$(top_srcdir)/fflas-ffpack/ffpack -I$(top_srcdir)/fflas-ffpack/field $(CBLAS_FLAG) $(GMP_CFLAGS) $(GIVARO_CFLAGS)
-LDADD = $(BLAS_LIBS) $(GIVARO_LIBS) $(GMP_LIBS)
-AM_LDFLAGS=-static
-
-
-
-# EXTRA_DIST=
-
-CLEANFILES= \
- $(BENCHMARK)
-
-EXTRA_PROGRAMS = \
- $(BENCHMARK)
-
-check_fgemm_SOURCES = check-fgemm.C
-check_wino_SOURCES = check-wino.C
-check_ftrsm_SOURCES = check-ftrsm.C
-check_ftrtri_SOURCES = check-ftrtri.C
-check_inverse_SOURCES = check-inverse.C
-check_lqup_SOURCES = check-lqup.C
-
-# for compilation of new benches
-FFLASFFPACK_BIN=@bindir@
-%:%.C
- $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) $(OPTFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
diff --git a/benchmark/src/FFLAS_FFPACK/Makefile.in b/benchmark/src/FFLAS_FFPACK/Makefile.in
deleted file mode 100644
index 040ce7c..0000000
--- a/benchmark/src/FFLAS_FFPACK/Makefile.in
+++ /dev/null
@@ -1,754 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-EXTRA_PROGRAMS = $(am__EXEEXT_1)
-subdir = benchmark/src/FFLAS_FFPACK
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-am__EXEEXT_1 = check-fgemm$(EXEEXT) check-ftrsm$(EXEEXT) \
- check-ftrtri$(EXEEXT) check-inverse$(EXEEXT) \
- check-lqup$(EXEEXT) check-wino$(EXEEXT)
-am_check_fgemm_OBJECTS = check-fgemm.$(OBJEXT)
-check_fgemm_OBJECTS = $(am_check_fgemm_OBJECTS)
-check_fgemm_LDADD = $(LDADD)
-am__DEPENDENCIES_1 =
-check_fgemm_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1)
-am_check_ftrsm_OBJECTS = check-ftrsm.$(OBJEXT)
-check_ftrsm_OBJECTS = $(am_check_ftrsm_OBJECTS)
-check_ftrsm_LDADD = $(LDADD)
-check_ftrsm_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1)
-am_check_ftrtri_OBJECTS = check-ftrtri.$(OBJEXT)
-check_ftrtri_OBJECTS = $(am_check_ftrtri_OBJECTS)
-check_ftrtri_LDADD = $(LDADD)
-check_ftrtri_DEPENDENCIES = $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
-am_check_inverse_OBJECTS = check-inverse.$(OBJEXT)
-check_inverse_OBJECTS = $(am_check_inverse_OBJECTS)
-check_inverse_LDADD = $(LDADD)
-check_inverse_DEPENDENCIES = $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
-am_check_lqup_OBJECTS = check-lqup.$(OBJEXT)
-check_lqup_OBJECTS = $(am_check_lqup_OBJECTS)
-check_lqup_LDADD = $(LDADD)
-check_lqup_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1)
-am_check_wino_OBJECTS = check-wino.$(OBJEXT)
-check_wino_OBJECTS = $(am_check_wino_OBJECTS)
-check_wino_LDADD = $(LDADD)
-check_wino_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1)
-DEFAULT_INCLUDES = -I. at am__isrc@ -I$(top_builddir)
-depcomp =
-am__depfiles_maybe =
-CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-CXXLD = $(CXX)
-CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
- $(LDFLAGS) -o $@
-SOURCES = $(check_fgemm_SOURCES) $(check_ftrsm_SOURCES) \
- $(check_ftrtri_SOURCES) $(check_inverse_SOURCES) \
- $(check_lqup_SOURCES) $(check_wino_SOURCES)
-DIST_SOURCES = $(check_fgemm_SOURCES) $(check_ftrsm_SOURCES) \
- $(check_ftrtri_SOURCES) $(check_inverse_SOURCES) \
- $(check_lqup_SOURCES) $(check_wino_SOURCES)
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
- $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
- distdir
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-am__relativize = \
- dir0=`pwd`; \
- sed_first='s,^\([^/]*\)/.*$$,\1,'; \
- sed_rest='s,^[^/]*/*,,'; \
- sed_last='s,^.*/\([^/]*\)$$,\1,'; \
- sed_butlast='s,/*[^/]*$$,,'; \
- while test -n "$$dir1"; do \
- first=`echo "$$dir1" | sed -e "$$sed_first"`; \
- if test "$$first" != "."; then \
- if test "$$first" = ".."; then \
- dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
- dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
- else \
- first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
- if test "$$first2" = "$$first"; then \
- dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
- else \
- dir2="../$$dir2"; \
- fi; \
- dir0="$$dir0"/"$$first"; \
- fi; \
- fi; \
- dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
- done; \
- reldir="$$dir2"
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-SUBDIRS =
-BENCHMARK = check-fgemm check-ftrsm check-ftrtri check-inverse check-lqup \
- check-wino
-
-AM_CPPFLAGS = -I$(top_srcdir) $(OPTFLAGS) \
- -I$(top_srcdir)/fflas-ffpack/utils/ \
- -I$(top_srcdir)/fflas-ffpack/fflas/ \
- -I$(top_srcdir)/fflas-ffpack/ffpack \
- -I$(top_srcdir)/fflas-ffpack/field $(CBLAS_FLAG) $(GMP_CFLAGS) \
- $(GIVARO_CFLAGS)
-AM_CXXFLAGS = @DEFAULT_CFLAGS@
-LDADD = $(BLAS_LIBS) $(GIVARO_LIBS) $(GMP_LIBS)
-AM_LDFLAGS = -static
-
-# EXTRA_DIST=
-CLEANFILES = \
- $(BENCHMARK)
-
-check_fgemm_SOURCES = check-fgemm.C
-check_wino_SOURCES = check-wino.C
-check_ftrsm_SOURCES = check-ftrsm.C
-check_ftrtri_SOURCES = check-ftrtri.C
-check_inverse_SOURCES = check-inverse.C
-check_lqup_SOURCES = check-lqup.C
-
-# for compilation of new benches
-FFLASFFPACK_BIN = @bindir@
-all: all-recursive
-
-.SUFFIXES:
-.SUFFIXES: .C .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/src/FFLAS_FFPACK/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/src/FFLAS_FFPACK/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-check-fgemm$(EXEEXT): $(check_fgemm_OBJECTS) $(check_fgemm_DEPENDENCIES) $(EXTRA_check_fgemm_DEPENDENCIES)
- @rm -f check-fgemm$(EXEEXT)
- $(CXXLINK) $(check_fgemm_OBJECTS) $(check_fgemm_LDADD) $(LIBS)
-check-ftrsm$(EXEEXT): $(check_ftrsm_OBJECTS) $(check_ftrsm_DEPENDENCIES) $(EXTRA_check_ftrsm_DEPENDENCIES)
- @rm -f check-ftrsm$(EXEEXT)
- $(CXXLINK) $(check_ftrsm_OBJECTS) $(check_ftrsm_LDADD) $(LIBS)
-check-ftrtri$(EXEEXT): $(check_ftrtri_OBJECTS) $(check_ftrtri_DEPENDENCIES) $(EXTRA_check_ftrtri_DEPENDENCIES)
- @rm -f check-ftrtri$(EXEEXT)
- $(CXXLINK) $(check_ftrtri_OBJECTS) $(check_ftrtri_LDADD) $(LIBS)
-check-inverse$(EXEEXT): $(check_inverse_OBJECTS) $(check_inverse_DEPENDENCIES) $(EXTRA_check_inverse_DEPENDENCIES)
- @rm -f check-inverse$(EXEEXT)
- $(CXXLINK) $(check_inverse_OBJECTS) $(check_inverse_LDADD) $(LIBS)
-check-lqup$(EXEEXT): $(check_lqup_OBJECTS) $(check_lqup_DEPENDENCIES) $(EXTRA_check_lqup_DEPENDENCIES)
- @rm -f check-lqup$(EXEEXT)
- $(CXXLINK) $(check_lqup_OBJECTS) $(check_lqup_LDADD) $(LIBS)
-check-wino$(EXEEXT): $(check_wino_OBJECTS) $(check_wino_DEPENDENCIES) $(EXTRA_check_wino_DEPENDENCIES)
- @rm -f check-wino$(EXEEXT)
- $(CXXLINK) $(check_wino_OBJECTS) $(check_wino_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
-.C.o:
- $(CXXCOMPILE) -c -o $@ $<
-
-.C.obj:
- $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-.C.lo:
- $(LTCXXCOMPILE) -c -o $@ $<
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- $(am__make_dryrun) \
- || test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
- $(am__relativize); \
- new_distdir=$$reldir; \
- dir1=$$subdir; dir2="$(top_distdir)"; \
- $(am__relativize); \
- new_top_distdir=$$reldir; \
- echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
- echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
- ($(am__cd) $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$new_top_distdir" \
- distdir="$$new_distdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- am__skip_mode_fix=: \
- distdir) \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-recursive
-all-am: Makefile
-installdirs: installdirs-recursive
-installdirs-am:
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
- -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
- mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
- install-am install-strip tags-recursive
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am check check-am clean clean-generic clean-libtool \
- ctags ctags-recursive distclean distclean-compile \
- distclean-generic distclean-libtool distclean-tags distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs installdirs-am \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
- pdf pdf-am ps ps-am tags tags-recursive uninstall uninstall-am
-
-
-bench: $(BENCHMARK)
-%:%.C
- $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) $(OPTFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/src/FFLAS_FFPACK/check-fgemm.C b/benchmark/src/FFLAS_FFPACK/check-fgemm.C
deleted file mode 100644
index 5e7b5f5..0000000
--- a/benchmark/src/FFLAS_FFPACK/check-fgemm.C
+++ /dev/null
@@ -1,98 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-//#include "goto-def.h"
-
-/* Copyright (c) FFLAS-FFPACK
-* Written by Clément Pernet <clement.pernet at imag.fr>
-* ========LICENCE========
-* This file is part of the library FFLAS-FFPACK.
-*
-* FFLAS-FFPACK is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public
-* License as published by the Free Software Foundation; either
-* version 2.1 of the License, or (at your option) any later version.
-*
-* This library is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this library; if not, write to the Free Software
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-* ========LICENCE========
-*/
-
-#include <iostream>
-
-#include "fflas-ffpack/config-blas.h"
-#include "fflas-ffpack/fflas/fflas.h"
-#include "fflas-ffpack/field/modular-balanced.h"
-#include "fflas-ffpack/utils/timer.h"
-#include "fflas-ffpack/utils/Matio.h"
-
-using namespace std;
-
-int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file1, file2
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- // typedef FFPACK::Modular<double> Field;
- // typedef FFPACK::Modular<float> Field;
- // typedef FFPACK::ModularBalanced<double> Field;
- typedef FFPACK::ModularBalanced<float> Field;
- typedef Field::Element Element;
-
- Field F(p);
-
- Timer chrono;
- double time=0.0;// time2=0.0;
-
- Element * A, * B, * C;
-
- for (size_t i=0;i<iter;++i){
-
- if (argc > 4){
- A = read_field (F, argv[4], &n, &n);
- }
- else{
- Field::RandIter G(F);
- A = new Element[n*n];
- for (size_t j=0; j<(size_t)n*n; ++j)
- G.random (*(A+j));
- }
-
- if (argc == 6){
- B = read_field (F, argv[5], &n, &n);
- }
- else{
- Field::RandIter G(F);
- B = new Element[n*n];
- for (size_t j=0; j<(size_t)n*n; ++j)
- G.random(*(B+j));
- }
-
- C = new Element[n*n];
-
- chrono.clear();
- chrono.start();
- FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, n,n,n, F.one,
- A, n, B, n, F.zero, C,n);
- chrono.stop();
- time+=chrono.usertime();
-
- delete[] A;
- delete[] B;
- delete[] C;
- }
-
- std::cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<std::endl;
-
- return 0;
-}
-
diff --git a/benchmark/src/FFLAS_FFPACK/check-ftrsm.C b/benchmark/src/FFLAS_FFPACK/check-ftrsm.C
deleted file mode 100644
index 1ebdb90..0000000
--- a/benchmark/src/FFLAS_FFPACK/check-ftrsm.C
+++ /dev/null
@@ -1,93 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* Copyright (c) FFLAS-FFPACK
-* Written by Clément Pernet <clement.pernet at imag.fr>
-* ========LICENCE========
-* This file is part of the library FFLAS-FFPACK.
-*
-* FFLAS-FFPACK is free software: you can redistribute it and/or modify
-* it under the terms of the GNU Lesser General Public
-* License as published by the Free Software Foundation; either
-* version 2.1 of the License, or (at your option) any later version.
-*
-* This library is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* Lesser General Public License for more details.
-*
-* You should have received a copy of the GNU Lesser General Public
-* License along with this library; if not, write to the Free Software
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-* ========LICENCE========
-*/
-
-#include <iostream>
-
-#include "fflas-ffpack/fflas-ffpack.h"
-#include "fflas-ffpack/field/modular-balanced.h"
-#include "fflas-ffpack/utils/timer.h"
-#include "fflas-ffpack/utils/Matio.h"
-
-
-using namespace std;
-
-int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file1, file2
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- typedef FFPACK::Modular<double> Field;
- typedef Field::Element Element;
-
- Field F(p);
- Element * A;
- Element * B;
-
- Timer chrono;
- double time=0.0;
-
- for (size_t i=0;i<iter;++i){
- Field::RandIter G(F);
- if (argc > 4){
- A = read_field (F, argv[4], &n, &n);
- }
- else{
- A = new Element[n*n];
- for (size_t j = 0; j< (size_t)n*n; ++j)
- G.random(*(A+j));
- }
-
- if (argc == 6){
- B = read_field (F, argv[5], &n, &n);
- }
- else{
- B = new Element[n*n];
- for (size_t j=0 ; j< (size_t)n*n; ++j)
- G.random(*(A+j));
- }
-
- for (size_t k=0;k<(size_t)n;++k)
- while (F.isZero( G.random(*(A+k*(n+1)))));
-
- chrono.clear();
- chrono.start();
- FFLAS::ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans,
- FFLAS::FflasNonUnit, n,n, F.one, A, n, B, n);
-
- chrono.stop();
- time+=chrono.usertime();
- delete[] A;
- delete[] B;
-
- }
-
- cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<endl;
-
-
- return 0;
-}
diff --git a/benchmark/src/FFLAS_FFPACK/check-wino.C b/benchmark/src/FFLAS_FFPACK/check-wino.C
deleted file mode 100644
index b30a388..0000000
--- a/benchmark/src/FFLAS_FFPACK/check-wino.C
+++ /dev/null
@@ -1,132 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-//#include "goto-def.h"
-
-/* Copyright (c) 2012 FFLAS-FFPACK
- * Written by J.G. Dumas <jgdumas at imag.fr>
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- */
-
-//#define LinBoxSrcOnly
-#include <iostream>
-#include <fstream>
- //#define _LINBOX_LINBOX_CONFIG_H
-// #define __FFLASFFPACK_CONFIGURATION
-// #include "fflas-ffpack/config-blas.h"
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas-ffpack/field/modular-positive.h"
-#include "fflas-ffpack/fflas/fflas.h"
-#include "fflas-ffpack/utils/timer.h"
-
-#define CUBE(x) ((x)*(x)*(x))
-
-template<class Field>
-void launch_wino(const Field &F,
- const size_t &n,
- const size_t &NB,
- const size_t &winomax,
- const size_t &seed)
-{
-
- typedef typename Field::Element Element ;
- typename Field::RandIter G(F);
- F.write(std::cout<< "Field " ) << std::endl;
-
- double basetime(0.0), time(0.0);
-
- Element *A, *C;
- A = new Element[n*n];
- C = new Element[n*n];
- for (size_t i=0; i<n*n;++i)
- G.random(A[i]);
-
-
- Timer chrono;
- for(size_t i=0; i<NB; ++i) {
- chrono.start();
- FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- n, n, n, F.one, A, n, A, n, F.zero, C, n);
- chrono.stop();
- basetime+= chrono.usertime();
- }
- std::cout << std::endl
- << "fgemm " << n << "x" << n << ": "
- << basetime/(double)NB << " s, "
- << (2.0/basetime*(double)NB*CUBE((double)n/100.0)) << " Mffops"
- << std::endl;
-
- for(size_t w=0; w<winomax; ++w) {
-
- time = 0. ;
- chrono.clear();
- for(size_t i=0; i<NB; ++i) {
- chrono.start();
- FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- n, n, n, F.one, A, n, A, n, F.zero, C, n, w);
- chrono.stop();
- time+= chrono.usertime();
- }
- std::cout << w << "Wino " << n << "x" << n << ": "
- << time/(double)NB << " s, "
- << (2.0/time*(double)NB*CUBE((double)n/100.0)) << " Mffops"
- << std::endl;
- }
-
- std::cout << std::endl;
- std::cout << std::endl;
-
- delete[] A;
- delete[] C;
-}
-
- //using namespace LinBox;
-int main (int argc, char ** argv) {
- const size_t p = argc>1 ? atoi(argv[1]) : 65521;
- const size_t n = argc>2 ? atoi(argv[2]) : 1000;
- const size_t NB = argc>3 ? atoi(argv[3]) : 1;
- const size_t winomax = argc>4 ? atoi(argv[4]) : 8;
- const size_t seed = argc>5 ? atoi(argv[5]) : BaseTimer::seed() ;
- srand((unsigned int)seed);
-
- using namespace FFPACK;
- Modular<double> F1(p);
- Modular<float> F2(p);
- Modular<int> F3(p);
- ModularBalanced<double> F4(p);
- ModularBalanced<float> F5(p);
- ModularBalanced<int> F6((int)p);
- //! @bug no randiter in UnparametricField !!
- // UnparametricField<double> F7;
- // UnparametricField<float> F8;
- // UnparametricField<int> F9;
-
-
- launch_wino(F1,n,NB,winomax,seed);
- launch_wino(F2,n,NB,winomax,seed);
- launch_wino(F3,n,NB,winomax,seed);
- launch_wino(F4,n,NB,winomax,seed);
- launch_wino(F5,n,NB,winomax,seed);
- launch_wino(F6,n,NB,winomax,seed);
- // launch_wino(F7,n,NB,winomax,seed);
- // launch_wino(F8,n,NB,winomax,seed);
- // launch_wino(F9,n,NB,winomax,seed);
-
- return 0;
- }
-
diff --git a/benchmark/src/Makefile.in b/benchmark/src/Makefile.in
deleted file mode 100644
index 1e6eb89..0000000
--- a/benchmark/src/Makefile.in
+++ /dev/null
@@ -1,640 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-#
-# Nothing yet
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = benchmark/src
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
- $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
- distdir
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-am__relativize = \
- dir0=`pwd`; \
- sed_first='s,^\([^/]*\)/.*$$,\1,'; \
- sed_rest='s,^[^/]*/*,,'; \
- sed_last='s,^.*/\([^/]*\)$$,\1,'; \
- sed_butlast='s,/*[^/]*$$,,'; \
- while test -n "$$dir1"; do \
- first=`echo "$$dir1" | sed -e "$$sed_first"`; \
- if test "$$first" != "."; then \
- if test "$$first" = ".."; then \
- dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
- dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
- else \
- first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
- if test "$$first2" = "$$first"; then \
- dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
- else \
- dir2="../$$dir2"; \
- fi; \
- dir0="$$dir0"/"$$first"; \
- fi; \
- fi; \
- dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
- done; \
- reldir="$$dir2"
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-SUBDIRS = BLAS_LAPACK BLOCKING FFLAS_FFPACK
-all: all-recursive
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/src/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/src/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- $(am__make_dryrun) \
- || test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
- $(am__relativize); \
- new_distdir=$$reldir; \
- dir1=$$subdir; dir2="$(top_distdir)"; \
- $(am__relativize); \
- new_top_distdir=$$reldir; \
- echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
- echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
- ($(am__cd) $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$new_top_distdir" \
- distdir="$$new_distdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- am__skip_mode_fix=: \
- distdir) \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-recursive
-all-am: Makefile
-installdirs: installdirs-recursive
-installdirs-am:
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
- install-am install-strip tags-recursive
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am check check-am clean clean-generic clean-libtool \
- ctags ctags-recursive distclean distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-ps install-ps-am install-strip installcheck \
- installcheck-am installdirs installdirs-am maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
- uninstall uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/test-src/Makefile.in b/benchmark/test-src/Makefile.in
deleted file mode 100644
index b0ac461..0000000
--- a/benchmark/test-src/Makefile.in
+++ /dev/null
@@ -1,446 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = benchmark/test-src
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-
-#
-# Nothing yet
-#
-EXTRA_DIST = mesure-BLAS_LAPACK.sh \
- mesure-FFLAS_FFPACK.sh \
- mesure.sh \
- parameter.in
-
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps benchmark/test-src/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps benchmark/test-src/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags: TAGS
-TAGS:
-
-ctags: CTAGS
-CTAGS:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- distclean distclean-generic distclean-libtool distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/benchmark/test-src/mesure-BLAS_LAPACK.sh b/benchmark/test-src/mesure-BLAS_LAPACK.sh
deleted file mode 100755
index 89f6b51..0000000
--- a/benchmark/test-src/mesure-BLAS_LAPACK.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-# Written by Clément Pernet <clement.pernet at imag.fr>
-# Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-primes="65521"
-
-for p in $primes; do
- ${TEST_SRC_PATH}/mesure.sh check-dgemm $p
- ${TEST_SRC_PATH}/mesure.sh check-dgetrf $p
- ${TEST_SRC_PATH}/mesure.sh check-dgetri $p
- ${TEST_SRC_PATH}/mesure.sh check-dtrsm $p
- ${TEST_SRC_PATH}/mesure.sh check-dtrtri $p
-done
diff --git a/benchmark/test-src/mesure-FFLAS_FFPACK.sh b/benchmark/test-src/mesure-FFLAS_FFPACK.sh
deleted file mode 100755
index d85372e..0000000
--- a/benchmark/test-src/mesure-FFLAS_FFPACK.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-# Written by Clément Pernet <clement.pernet at imag.fr>
-# Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-primes="65521"
-
-for p in $primes; do
- ${TEST_SRC_PATH}/mesure.sh check-fgemm $p
- ${TEST_SRC_PATH}/mesure.sh check-lqup $p
- ${TEST_SRC_PATH}/mesure.sh check-inverse $p
- ${TEST_SRC_PATH}/mesure.sh check-ftrsm $p
- ${TEST_SRC_PATH}/mesure.sh check-ftrtri $p
-done
diff --git a/benchmark/test-src/mesure.sh b/benchmark/test-src/mesure.sh
deleted file mode 100755
index 2f7e4e1..0000000
--- a/benchmark/test-src/mesure.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-# Written by Clément Pernet <clement.pernet at imag.fr>
-# Copyright (c) FFLAS-FFPACK
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-#echo -n -e " running $1 test \t \t..."
-printf " running %-15s ........... " $1
-while read parameter
-do
- ${BIN_PATH}/$1 $2 $parameter 2>> ${TEST_PATH}/timing-$1-$2.txt
-done < "${TEST_SRC_PATH}/parameter.in"
-echo "[done]"
diff --git a/benchmark/test-src/parameter.in b/benchmark/test-src/parameter.in
deleted file mode 100644
index 8f466a9..0000000
--- a/benchmark/test-src/parameter.in
+++ /dev/null
@@ -1,7 +0,0 @@
-300 5
-500 2
-1000 1
-2000 1
-5000 1
-10000 1
-15000 1
\ No newline at end of file
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
new file mode 100644
index 0000000..e9bb878
--- /dev/null
+++ b/benchmarks/Makefile.am
@@ -0,0 +1,90 @@
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by JGD <Jean-Guillaume.Dumas at imag.fr>
+#
+# ========LICENCE========
+# This file is part of the library FFLAS-FFPACK.
+#
+# FFLAS-FFPACK is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+# ========LICENCE========
+#/
+
+SUBDIRS =
+benchmarks: $(BENCHMARKS)
+
+AM_CPPFLAGS=-I$(top_srcdir) -g
+AM_CXXFLAGS = @DEFAULT_CFLAGS@
+AM_CPPFLAGS += $(CBLAS_FLAG) $(GIVARO_CFLAGS) $(OPTFLAGS) -I$(top_srcdir)/fflas-ffpack/utils/ -I$(top_srcdir)/fflas-ffpack/fflas/ -I$(top_srcdir)/fflas-ffpack/ffpack -I$(top_srcdir)/fflas-ffpack/field $(CUDA_CFLAGS) $(PARFLAGS)
+LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS)
+AM_LDFLAGS=-static $(PARLIBS)
+
+PERFPUBLISHERFILE=benchmarks-report.xml
+
+FFLA_BENCH = benchmark-fgemm benchmark-wino benchmark-ftrsm benchmark-ftrtri benchmark-inverse benchmark-lqup benchmark-pluq benchmark-charpoly benchmark-fgemm-mp benchmark-fgemv-mp benchmark-ftrsm-mp benchmark-lqup-mp
+BLAS_BENCH = benchmark-sgemm$(EXEEXT) benchmark-dgemm benchmark-dtrsm
+LAPA_BENCH = benchmark-dtrtri benchmark-dgetri benchmark-dgetrf
+
+
+if FFLASFFPACK_HAVE_LAPACK
+USE_LAPACK_BENCH = $(LAPA_BENCH)
+benchmark_dtrtri_SOURCES = benchmark-dtrtri.C
+benchmark_dgetri_SOURCES = benchmark-dgetri.C
+benchmark_dgetrf_SOURCES = benchmark-dgetrf.C
+endif
+
+BENCHMARKS = \
+ $(FFLA_BENCH) \
+ $(BLAS_BENCH) \
+ $(USE_LAPACK_BENCH) \
+ $(USE_OMP_BENCH)
+
+CLEANFILES = $(BENCHMARKS) $(PERFPUBLISHERFILE)
+
+EXTRA_PROGRAMS = $(BENCHMARKS)
+
+benchmark_sgemm_SOURCES = benchmark-dgemm.C
+benchmark_dgemm_SOURCES = benchmark-dgemm.C
+benchmark_dtrsm_SOURCES = benchmark-dtrsm.C
+
+benchmark_fgemm_SOURCES = benchmark-fgemm.C
+benchmark_fgemm_mp_SOURCES = benchmark-fgemm-mp.C
+benchmark_fgemv_mp_SOURCES = benchmark-fgemv-mp.C
+benchmark_wino_SOURCES = benchmark-wino.C
+benchmark_ftrsm_SOURCES = benchmark-ftrsm.C
+benchmark_ftrsm_mp_SOURCES = benchmark-ftrsm-mp.C
+benchmark_ftrtri_SOURCES = benchmark-ftrtri.C
+benchmark_inverse_SOURCES = benchmark-inverse.C
+benchmark_charpoly_SOURCES = benchmark-charpoly.C
+benchmark_lqup_SOURCES = benchmark-lqup.C
+benchmark_lqup_mp_SOURCES = benchmark-lqup-mp.C
+benchmark_pluq_SOURCES = benchmark-pluq.C
+
+benchmark_sgemm_CXXFLAGS = $(AM_CXXFLAGS) -D__SGEMM__
+
+# Perfpublisher script interaction - AB 2014/11/17
+perfpublisher:
+ +./perfpublisher.sh "$(PERFPUBLISHERFILE)" "$(BENCHMARKS)" "$(CXX)"
+
+# for compilation of new benchmarks
+FFLASFFPACK_BIN=@bindir@
+
+define other_compilation
+ $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) $(OPTFLAGS) $(PARFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
+endef
+
+%:%.C
+ $(other_compilation)
+
+%:%.cpp
+ $(other_compilation)
diff --git a/benchmarks/Makefile.tests b/benchmarks/Makefile.tests
new file mode 100644
index 0000000..4c2f4b3
--- /dev/null
+++ b/benchmarks/Makefile.tests
@@ -0,0 +1,28 @@
+SIZES=6 6 7 8 9 10 11 12 13
+BITS=30 60 120 240 480 960 1920 3840 7680
+MATR=1000
+
+SHELL := /bin/bash
+index = $(words $(shell a="$(2)";echo $${a/$(1)*/$(1)} ))
+swap = $(word $(call index,$(1),${SIZES}),${BITS})
+
+OUTP=output.fgemv
+MODEL=$(shell cat /proc/cpuinfo | grep "model name" | head -1|cut -d':' -f2| tr -s ' '|sed 's/^ //')
+
+EXEC=benchmark-fgemv-mp
+WSRC=${EXEC:%=-W %.C}
+
+mkruns = make "OPTFLAGS=-Ofast -DSTD_RECINT_SIZE=$(1) -DBENCH_RECINT" ${EXEC} ${WSRC}; ${EXEC} -b $(call swap,$(1)) -m ${MATR} -k ${MATR} -i 2 |awk '{print "SIZE:",$(1),$$0}' >> ${OUTP}; make "OPTFLAGS=-Ofast -DINTEGER_NO_RNS" ${EXEC} ${WSRC}; echo "NORNS"`${EXEC} -b $(call swap,$(1)) -m ${MATR} -k ${MATR} -i 2`|awk '{print "SIZE:",$(1),$$0}' >> ${OUTP};
+
+
+all: run split
+
+run:
+ - rm ${OUTP}
+ $(foreach siz, ${SIZES}, $(call mkruns,${siz}))
+
+split:
+ fgrep RecInt ${OUTP} | sed 's/4rintILm/ /;s/EEE/ /'> ${OUTP}.rint
+ fgrep Givaro ${OUTP} | fgrep NORNS > ${OUTP}.gmp
+ fgrep Givaro ${OUTP} | fgrep -v NORNS > ${OUTP}.rns
+
\ No newline at end of file
diff --git a/benchmarks/benchmark-charpoly.C b/benchmarks/benchmark-charpoly.C
new file mode 100644
index 0000000..cf445d3
--- /dev/null
+++ b/benchmarks/benchmark-charpoly.C
@@ -0,0 +1,108 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Clement Pernet <clement.pernet at imag.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <givaro/modular.h>
+
+#include "fflas-ffpack/fflas-ffpack.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+
+using namespace std;
+
+int main(int argc, char** argv) {
+
+ size_t iter = 1;
+ int q = 131071;
+ int n = 2000;
+ std::string file = "";
+ static int variant =0;
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the input file (empty for random).", TYPE_STR , &file },
+ { 'a', "-a algorithm", "Set the algorithmic variant", TYPE_INT, &variant },
+
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+ FFPACK::FFPACK_CHARPOLY_TAG CT;
+ switch (variant){
+ case 0: CT = FFPACK::FfpackLUK; break;
+ case 1: CT = FFPACK::FfpackKG; break;
+ case 2: CT = FFPACK::FfpackDanilevski; break;
+ case 3: CT = FFPACK::FfpackKGFast; break;
+ case 4: CT = FFPACK::FfpackKGFastG; break;
+ case 5: CT = FFPACK::FfpackHybrid; break;
+ case 6: CT = FFPACK::FfpackArithProg; break;
+ default: CT = FFPACK::FfpackLUK; break;
+ }
+ typedef Givaro::ModularBalanced<double> Field;
+ typedef Field::Element Element;
+
+ Field F(q);
+ FFLAS::Timer chrono;
+ double time=0.0;
+
+ Element *A;
+
+ for (size_t i=0;i<iter;++i){
+
+ if (!file.empty()){
+ A = read_field (F, file.c_str(), &n, &n);
+ }
+ else{
+ A = FFLAS::fflas_new<Element>(n*n);
+ Field::RandIter G(F);
+ for (size_t j=0; j< (size_t)n*n; ++j)
+ G.random(*(A+j));
+ }
+
+ std::vector<Field::Element> cpol(n);
+ chrono.clear();
+ chrono.start();
+ FFPACK::CharPoly (F, cpol, n, A, n, CT);
+ chrono.stop();
+
+ time+=chrono.usertime();
+ FFLAS::fflas_delete( A);
+
+ }
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cerr << "Time: " << time / double(iter)
+ << " Gflops: " << "irrelevant";
+ FFLAS::writeCommandString(std::cerr, as) << std::endl;
+
+ return 0;
+}
+
diff --git a/benchmarks/benchmark-dgemm.C b/benchmarks/benchmark-dgemm.C
new file mode 100644
index 0000000..be1c356
--- /dev/null
+++ b/benchmarks/benchmark-dgemm.C
@@ -0,0 +1,166 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//#include "goto-def.h"
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Clément Pernet <clement.pernet at imag.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <givaro/modular.h>
+
+#include "fflas-ffpack/config-blas.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
+
+#ifndef __SGEMM__
+typedef double Floats;
+#define CBLAS_GEMM cblas_dgemm
+#else
+typedef float Floats;
+#define CBLAS_GEMM cblas_sgemm
+#endif
+
+
+using namespace std;
+
+int main(int argc, char** argv) {
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 2000;
+ std::string file1 = "";
+ std::string file2 = "";
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the first input file (empty for random).", TYPE_STR , &file1 },
+ { 'g', "-g FILE", "Set the second input file (empty for random).", TYPE_STR , &file2 },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+
+ typedef Givaro::ModularBalanced<Floats> Field;
+ typedef Field::Element Element;
+
+ Field F(q);
+
+ TTimer chrono;
+ double time=0.0;// time2=0.0;
+
+ Element * A, * B, * C;
+
+ if (iter>1) {
+ if (!file1.empty()){
+ A = read_field (F, file1.c_str(), &n, &n);
+ }
+ else{
+ Field::RandIter G(F);
+ A = FFLAS::fflas_new<Element>(n*n);
+#pragma omp parallel for
+ for (int i=0; i<n; ++i)
+ for (int j=0; j<n; ++j)
+ G.random(*(A+i*n+j));
+ }
+
+ if (!file2.empty()){
+ B = read_field (F, file2.c_str(), &n, &n);
+ }
+ else{
+ Field::RandIter G(F);
+ B = FFLAS::fflas_new<Element>(n*n);
+#pragma omp parallel for
+ for (int i=0; i<n; ++i)
+ for (int j=0; j<n; ++j)
+ G.random(*(B+i*n+j));
+ }
+
+ C = FFLAS::fflas_new<Element>(n*n);
+
+ CBLAS_GEMM (CblasRowMajor, CblasNoTrans, CblasNoTrans, n,n,n, F.one,
+ A, n, B, n, F.zero, C,n);
+
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( C);
+ }
+
+ for (size_t it=0;it<iter;++it){
+
+ if (!file1.empty()){
+ A = read_field (F, file1.c_str(), &n, &n);
+ }
+ else{
+ Field::RandIter G(F);
+ A = FFLAS::fflas_new<Element>(n*n);
+#pragma omp parallel for
+ for (int i=0; i<n; ++i)
+ for (int j=0; j<n; ++j)
+ G.random(*(A+i*n+j));
+ }
+
+ if (!file2.empty()){
+ B = read_field (F, file2.c_str(), &n, &n);
+ }
+ else{
+ Field::RandIter G(F);
+ B = FFLAS::fflas_new<Element>(n*n);
+#pragma omp parallel for
+ for (int i=0; i<n; ++i)
+ for (int j=0; j<n; ++j)
+ G.random(*(B+i*n+j));
+ }
+
+ C = FFLAS::fflas_new<Element>(n*n);
+
+ chrono.clear();
+ chrono.start();
+ CBLAS_GEMM (CblasRowMajor, CblasNoTrans, CblasNoTrans, n,n,n, F.one,
+ A, n, B, n, F.zero, C,n);
+ chrono.stop();
+ time+=chrono.usertime();
+
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( C);
+ }
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << (2.*double(n)/1000.*double(n)/1000.*double(n)/1000.0) / time * double(iter);
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
+
+ return 0;
+}
+
diff --git a/benchmark/src/BLAS_LAPACK/check-dgetrf.C b/benchmarks/benchmark-dgetrf.C
similarity index 50%
rename from benchmark/src/BLAS_LAPACK/check-dgetrf.C
rename to benchmarks/benchmark-dgetrf.C
index 92d70d1..30884e6 100644
--- a/benchmark/src/BLAS_LAPACK/check-dgetrf.C
+++ b/benchmarks/benchmark-dgetrf.C
@@ -23,50 +23,82 @@
* ========LICENCE========
*/
+#ifndef __FFLASFFPACK_HAVE_DGETRF
+#define __FFLASFFPACK_HAVE_DGETRF 1
+#endif
+#include "fflas-ffpack/fflas-ffpack-config.h"
+
#include <iostream>
#include <vector>
+#include <givaro/modular.h>
-#ifndef __FFLASFFPACK_HAVE_DGETRF
-#define __FFLASFFPACK_HAVE_DGETRF 1
-#endif
#include "fflas-ffpack/fflas-ffpack.h"
-#include "fflas-ffpack/field/modular-balanced.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
using namespace std;
-int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
- typedef FFPACK::Modular<double> Field;
+int main(int argc, char** argv) {
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 2000;
+ std::string file = "";
+
+ size_t NBK = MAX_THREADS;
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the input file (empty for random).", TYPE_STR , &file },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ typedef Givaro::Modular<double> Field;
typedef Field::Element Element;
- Field F(p);
+ Field F(q);
Field::Element * A;
- Timer chrono;
+ TTimer chrono;
double time=0.0;
std::vector<int> Piv(n,0);
- for (size_t i=0;i<iter;++i){
- if (argc > 4){
- A = read_field(F, argv[4], &n, &n);
+ if (iter>1) {
+ if (!file.empty()){
+ A = read_field(F, file.c_str(), &n, &n);
+ }
+ else {
+ A = FFLAS::fflas_new<Element>(n*n);
+ Field::RandIter G(F);
+ PAR_BLOCK{ FFLAS::pfrand(F,G,n,n,A,n/NBK); }
+ clapack_dgetrf(CblasRowMajor,n,n,A,n,&Piv[0]);
+ FFLAS::fflas_delete( A);
+ }
+ }
+ for (size_t it=0;it<iter;++it){
+ if (!file.empty()){
+ A = read_field(F, file.c_str(), &n, &n);
}
else {
- A = new Element[n*n];
+ A = FFLAS::fflas_new<Element>(n*n);
Field::RandIter G(F);
- for (size_t j=0; j<(size_t)n*n; ++j)
- G.random(*(A+j));
+ PAR_BLOCK{ FFLAS::pfrand(F,G,n,n,A,n/NBK); }
}
chrono.clear();
@@ -75,11 +107,14 @@ int main(int argc, char** argv) {
chrono.stop();
time+=chrono.usertime();
- delete[] A;
+ FFLAS::fflas_delete( A);
}
-
- cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<endl;
-
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << (2.*double(n)/1000.*double(n)/1000.*double(n)/1000.0) / time * double(iter) / 3.;
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
return 0;
}
diff --git a/benchmark/src/BLAS_LAPACK/check-dgetri.C b/benchmarks/benchmark-dgetri.C
similarity index 52%
rename from benchmark/src/BLAS_LAPACK/check-dgetri.C
rename to benchmarks/benchmark-dgetri.C
index 3f6f798..0eebd50 100644
--- a/benchmark/src/BLAS_LAPACK/check-dgetri.C
+++ b/benchmarks/benchmark-dgetri.C
@@ -23,56 +23,73 @@
* ========LICENCE========
*/
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
#include <vector>
-
-#ifndef __FFLASFFPACK_HAVE_DGETRF
-#define __FFLASFFPACK_HAVE_DGETRF 1
-#endif
-
-#ifndef __FFLASFFPACK_HAVE_DGETRI
-#define __FFLASFFPACK_HAVE_DGETRI 1
-#endif
-#ifndef __FFLASFFPACK_HAVE_DTRTRI
-#define __FFLASFFPACK_HAVE_DTRTRI 1
-#endif
-#ifndef __FFLASFFPACK_AUTOIMPLEMENT_DGETRI
-#define __FFLASFFPACK_AUTOIMPLEMENT_DGETRI 1
-#endif
+#include <givaro/modular.h>
+
+// #ifndef __FFLASFFPACK_HAVE_DGETRF
+// #define __FFLASFFPACK_HAVE_DGETRF 1
+// #endif
+
+// #ifndef __FFLASFFPACK_HAVE_DGETRI
+// #define __FFLASFFPACK_HAVE_DGETRI 1
+// #endif
+// #ifndef __FFLASFFPACK_HAVE_DTRTRI
+// #define __FFLASFFPACK_HAVE_DTRTRI 1
+// #endif
+// #ifndef __FFLASFFPACK_AUTOIMPLEMENT_DGETRI
+// #define __FFLASFFPACK_AUTOIMPLEMENT_DGETRI 1
+// #endif
#include "fflas-ffpack/fflas-ffpack.h"
-#include "fflas-ffpack/field/modular-balanced.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
using namespace std;
int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- typedef FFPACK::Modular<double> Field;
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 2000;
+ std::string file = "";
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the input file (empty for random).", TYPE_STR , &file },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+
+ typedef Givaro::Modular<double> Field;
typedef Field::Element Element;
vector<int> Piv(n,0);
- Field F(p);
+ Field F(q);
Field::Element * A;
- Timer chrono;
+ TTimer chrono;
double time=0.0;
for (size_t i=0;i<iter;++i){
- if (argc > 4){
- A = read_field(F, argv[4], &n, &n);
+ if (!file.empty()){
+ A = read_field(F, file.c_str(), &n, &n);
}
else {
- A = new Element[n*n];
+ A = FFLAS::fflas_new<Element>(n*n);
Field::RandIter G(F);
for (size_t j=0; j<(size_t)n*n; ++j)
G.random(*(A+j));
@@ -86,10 +103,14 @@ int main(int argc, char** argv) {
chrono.stop();
time+=chrono.usertime();
- delete[] A;
+ FFLAS::fflas_delete( A);
}
-
- cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<endl;
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << (2.*double(n)/1000.*double(n)/1000.*double(n)/1000.0) / time * double(iter);
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
return 0;
diff --git a/benchmark/src/BLAS_LAPACK/check-dtrsm.C b/benchmarks/benchmark-dtrsm.C
similarity index 55%
rename from benchmark/src/BLAS_LAPACK/check-dtrsm.C
rename to benchmarks/benchmark-dtrsm.C
index eb79c56..a8303d5 100644
--- a/benchmark/src/BLAS_LAPACK/check-dtrsm.C
+++ b/benchmarks/benchmark-dtrsm.C
@@ -22,51 +22,69 @@
* ========LICENCE========
*/
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
+#include <givaro/modular.h>
#include "fflas-ffpack/fflas-ffpack.h"
-#include "fflas-ffpack/field/modular-balanced.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
using namespace std;
int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file1, file2
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- typedef FFPACK::Modular<double> Field;
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 2000;
+ std::string file1 = "";
+ std::string file2 = "";
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the first input file (empty for random).", TYPE_STR , &file1 },
+ { 'g', "-g FILE", "Set the second input file (empty for random).", TYPE_STR , &file2 },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ typedef Givaro::Modular<double> Field;
typedef Field::Element Element;
- Field F(p);
+ Field F(q);
Element * A;
Element * B;
- Timer chrono;
+ TTimer chrono;
double time=0.0;
for (size_t i=0;i<iter;++i){
Field::RandIter G(F);
- if (argc > 4){
- A = read_field (F, argv[4], &n, &n);
+ if (!file1.empty()){
+ A = read_field (F, file1.c_str(), &n, &n);
}
else{
- A = new Element[n*n];
+ A = FFLAS::fflas_new<Element>(n*n);
for (size_t j = 0; j< (size_t)n*n; ++j)
G.random(*(A+j));
}
- if (argc == 6){
- B = read_field (F, argv[5], &n, &n);
+ if (!file2.empty()){
+ B = read_field (F, file2.c_str(), &n, &n);
}
else{
- B = new Element[n*n];
+ B = FFLAS::fflas_new<Element>(n*n);
for (size_t j=0 ; j< (size_t)n*n; ++j)
G.random(*(A+j));
}
@@ -81,13 +99,16 @@ int main(int argc, char** argv) {
chrono.stop();
time+=chrono.usertime();
- delete[] A;
- delete[] B;
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
}
-
- cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<endl;
-
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << (2.*double(n)/1000.*double(n)/1000.*double(n)/1000.0) / time * double(iter) / 3.;
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
return 0;
}
diff --git a/benchmark/src/BLAS_LAPACK/check-dtrtri.C b/benchmarks/benchmark-dtrtri.C
similarity index 58%
rename from benchmark/src/BLAS_LAPACK/check-dtrtri.C
rename to benchmarks/benchmark-dtrtri.C
index e8cfce3..0c3b5c5 100644
--- a/benchmark/src/BLAS_LAPACK/check-dtrtri.C
+++ b/benchmarks/benchmark-dtrtri.C
@@ -23,42 +23,58 @@
* ========LICENCE========
*/
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
+#include <givaro/modular.h>
#define __FFLASFFPACK_HAVE_DTRTRI 1
#include "fflas-ffpack/fflas-ffpack.h"
-#include "fflas-ffpack/field/modular-balanced.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
using namespace std;
int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- typedef FFPACK::Modular<double> Field;
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 2000;
+ std::string file = "";
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the input file (empty for random).", TYPE_STR , &file },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ typedef Givaro::Modular<double> Field;
typedef Field::Element Element;
- Field F(p);
+ Field F(q);
Element * A;
- Timer chrono;
+ TTimer chrono;
double time=0.0;
Field::RandIter G(F);
for (size_t i=0;i<iter;++i){
- if (argc > 4){
- A = read_field (F, argv[4], &n, &n);
+ if (!file.empty()){
+ A = read_field (F, file.c_str(), &n, &n);
} else {
- A = new Element[n*n];
+ A = FFLAS::fflas_new<Element>(n*n);
for (size_t j=0; j<(size_t) n*n; ++j)
G.random(*(A+j));
}
@@ -71,12 +87,15 @@ int main(int argc, char** argv) {
chrono.stop();
time+=chrono.usertime();
- delete[] A;
+ FFLAS::fflas_delete( A);
}
-
- cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<endl;
-
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << (2.*double(n)/1000.*double(n)/1000.*double(n)/1000.0) / time * double(iter) / 3.;
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
return 0;
}
diff --git a/benchmarks/benchmark-echelon.C b/benchmarks/benchmark-echelon.C
new file mode 100644
index 0000000..c8a2e1a
--- /dev/null
+++ b/benchmarks/benchmark-echelon.C
@@ -0,0 +1,326 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Clement Pernet <clement.pernet at imag.fr>, from benchmark-pluq by Ziad Sultan
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include "fflas-ffpack/config-blas.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include <givaro/modular-balanced.h>
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+using namespace std;
+
+typedef Givaro::ModularBalanced<double> Field;
+
+// random generator function:
+ptrdiff_t myrandom (ptrdiff_t i) { return rand()%i;}
+
+// pointer object to it:
+ptrdiff_t (*p_myrandom)(ptrdiff_t) = myrandom;
+
+typename Field::Element* construct_U(const Field& F, Field::RandIter& G, size_t n, size_t r, std::vector<size_t>& P, size_t commonseed, size_t seed)
+{
+ size_t lda = n;
+ Field::Element *U=new Field::Element[n*n];
+
+ FFLAS::ParSeqHelper::Parallel H;
+
+ std::vector<size_t> E(r);
+ PARFOR1D(i,r,H, E[i]=i; );
+
+
+ srand48(commonseed);
+ std::vector<size_t> Z(n);
+ PARFOR1D(i,n,H, Z[i]=i;);
+
+ P.resize(r);
+ for(size_t i=0; i<r; ++i) {
+ size_t index=lrand48() % Z.size();
+ P[i] = Z[ index ];
+ Z.erase(Z.begin()+index);
+ }
+
+ PARFOR1D(i,r,H,
+ while( F.isZero( G.random(U[ P[i]*lda+P[i] ]) ) ) {}
+ for(size_t j=P[i]+1;j<n;++j)
+ G.random(U[ P[i]*lda+j]);
+ );
+
+ return U;
+}
+
+typename Field::Element* construct_L(const Field& F, Field::RandIter& G, size_t m, size_t r, const std::vector<size_t>& P, size_t seed)
+{
+ FFLAS::ParSeqHelper::Parallel H;
+
+ size_t lda = m;
+ size_t taille=m*m;
+ Field::Element * L= new Field::Element[taille];
+ PARFOR1D(i,taille,H, F.init(L[i],F.zero); );
+
+ std::vector<size_t> E(r);
+ PARFOR1D(i,r,H, E[i]=i;);
+
+ srand48(seed);
+ std::vector<size_t> Z(m);
+ PARFOR1D(i,m,H, Z[i]=i; );
+
+ std::vector<size_t> Q(r);
+ for(size_t i=0; i<r; ++i) {
+ size_t index=lrand48() % Z.size();
+ Q[i] = Z[ index ];
+ Z.erase(Z.begin()+index);
+ }
+
+ for(size_t i=0; i<r; ++i) {
+ size_t index=lrand48() % E.size();
+ size_t perm = E[ index ];
+
+ E.erase(E.begin()+index);
+ F.init(L[Q[perm]*lda+P[perm]],F.one);
+ for(size_t j=Q[perm]+1;j<m;++j)
+ G.random(L[j*lda+P[perm]]);
+ }
+ return L;
+}
+
+
+typename Field::Element* M_randgen(const Field& F, typename Field::Element* L,typename Field::Element* U, size_t r, size_t m, size_t n)
+{
+ Field::Element alpha, beta;
+ F.init(alpha,1.0);
+ F.init(beta,0.0);
+ size_t lda = n;
+ Field::Element * A = new Field::Element[m*n];
+
+ // Computing produit L * U (ideally should use parallel ftrmm
+
+ /*
+ FFLAS::ftrmm(F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, m,n,1.0, U, lda, L, lda);
+ */
+
+ const FFLAS::CuttingStrategy meth = FFLAS::RECURSIVE;
+ const FFLAS::StrategyParameter strat = FFLAS::THREE_D;
+ typename FFLAS::ParSeqHelper::Parallel pWH (MAX_THREADS, meth, strat);
+ PAR_BLOCK{
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ m,n,r, alpha, L,r, U,
+ lda,beta,A,lda,pWH);
+ }
+ return L;
+}
+
+void verification_PLUQ(const Field & F, typename Field::Element * B, typename Field::Element * A,
+ size_t * P, size_t * Q, size_t m, size_t n, size_t R)
+{
+
+ FFLAS::ParSeqHelper::Parallel H;
+
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * L, *U;
+ L = FFLAS::fflas_new<Field::Element>(m*R);
+ U = FFLAS::fflas_new<Field::Element>(R*n);
+
+ PARFOR1D (i,m*R,H, F.init(L[i], 0.0); );
+
+ PARFOR1D (i,n*R,H, F.init(U[i], 0.0); );
+
+ PARFOR1D (i,m*n,H, F.init(X[i], 0.0); );
+
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ PARFOR1D (i,R,H,
+ for (size_t j=0; j<i; ++j)
+ F.assign ( *(U + i*n + j), zero);
+ for (size_t j=i; j<n; ++j)
+ F.assign (*(U + i*n + j), *(A+ i*n+j));
+ );
+ PARFOR1D (j,R,H,
+ for (size_t i=0; i<=j; ++i )
+ F.assign( *(L+i*R+j), zero);
+ F.assign(*(L+j*R+j), one);
+ for (size_t i=j+1; i<m; i++)
+ F.assign( *(L + i*R+j), *(A+i*n+j));
+ );
+
+ PAR_BLOCK{
+#pragma omp task shared(F, P, L)
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, R,0,m, L, R, P);
+#pragma omp task shared(F, Q, U)
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R,0,n, U, n, Q);
+#pragma omp taskwait
+ const FFLAS::CuttingStrategy method = FFLAS::THREE_D;
+ typename FFLAS::ParSeqHelper::Parallel pWH (MAX_THREADS, method);
+#pragma omp task shared(F, L, U, X)
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,R,
+ F.one, L,R, U,n, F.zero, X,n, pWH);
+
+ }
+ bool fail = false;
+ // PARFOR1D (size_t i=0; i<m; ++i)
+ for(size_t i=0; i<m; ++i)
+ for (size_t j=0; j<n; ++j)
+ if (!F.areEqual (*(B+i*n+j), *(X+i*n+j))){
+ std::cout << " Initial["<<i<<","<<j<<"] = " << (*(B+i*n+j))
+ << " Result"<<i<<","<<j<<"] = " << (*(X+i*n+j))
+ << std::endl;
+
+ std::stringstream errs;
+ errs << " B["<<i<<","<<j<<"] = " << (*(B+i*n+j))
+ << " X["<<i<<","<<j<<"] = " << (*(X+i*n+j))
+ << std::endl;
+ std::cout << errs;
+ fail=true;
+ std::cout<<" end verification"<<std::endl;
+ exit(1);
+ }
+
+ if (fail)
+ std::cout<<"FAIL"<<std::endl;
+ else
+ std::cout<<"PASS"<<std::endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+}
+
+
+
+int main(int argc, char** argv) {
+
+ size_t iter = 3 ;
+ int q = 131071 ;
+ Field F(q);
+ int m = 2000 ;
+ int n = 2000 ;
+ size_t r = 2000 ;
+ int v = 0;
+ // int p=0;
+ int t=MAX_THREADS;
+ int NBK = -1;
+ int a=1;
+ bool transform = false;
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'm', "-m M", "Set the row dimension of A.", TYPE_INT , &m },
+ { 'n', "-n N", "Set the col dimension of A.", TYPE_INT , &n },
+ { 'r', "-r R", "Set the rank of matrix A.", TYPE_INT , &r },
+ { 'i', "-i I", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'v', "-v V", "Set 1 if need verification of result else 0.", TYPE_INT , &v },
+ { 't', "-t T", "whether to compute the transformation matrix.", TYPE_BOOL , &transform },
+ { 'a', "-a A", "Algorithm for PLUQ decomposition: 0=LUdivine 1=PLUQ.", TYPE_INT , &a },
+ { 'b', "-b B", "number of numa blocks per dimension for the numa placement", TYPE_INT , &NBK },
+ END_OF_ARGUMENTS
+ };
+ // { 'p', "-p P", "0 for sequential, 1 for 2D iterative,
+//2 for 2D rec, 3 for 2D rec adaptive, 4 for 3D rc in-place, 5 for 3D rec, 6 for 3D rec adaptive.", TYPE_INT , &p },
+ FFLAS::parseArguments(argc,argv,as);
+ FFPACK::FFPACK_LU_TAG LuTag = a?FFPACK::FfpackTileRecursive:FFPACK::FfpackSlabRecursive;
+ if (NBK==-1) NBK = t;
+ typedef Field::Element Element;
+ Element * A, * Acop;
+ A = FFLAS::fflas_new(F,m,n,Alignment::CACHE_PAGESIZE);
+ Acop = FFLAS::fflas_new(F,m,n,Alignment::CACHE_PAGESIZE);
+
+ Field::Element * U = new Field::Element[n*n];
+ // random seed
+ ifstream f("/dev/urandom");
+ size_t seed1, seed2, seed3,seed4;
+ f.read(reinterpret_cast<char*>(&seed1), sizeof(seed1));
+ f.read(reinterpret_cast<char*>(&seed2), sizeof(seed2));
+ f.read(reinterpret_cast<char*>(&seed3), sizeof(seed3));
+ f.read(reinterpret_cast<char*>(&seed4), sizeof(seed4));
+ std::vector<size_t> Index_P(r);
+ Field::RandIter GG(F, seed1);
+
+ PAR_BLOCK{ pfrand(F,GG,m,n,A,m/NBK); }
+
+
+ // std::cout<<"Construct U"<<endl;
+ U = construct_U(F,GG, n, r, Index_P, seed4, seed3);
+ // std::cout<<"Construct L"<<endl;
+ A = construct_L(F,GG, m, r, Index_P, seed2);
+ // std::cout<<"randgen"<<endl;
+ A = M_randgen(F, A, U, r, m, n);
+ size_t R=0;
+ FFLAS::Timer chrono;
+ double time=0.0;
+
+// enum FFLAS::FFLAS_DIAG diag = FFLAS::FflasNonUnit;
+ size_t maxP, maxQ;
+ maxP = m;
+ maxQ = n;
+
+ size_t *P = FFLAS::fflas_new<size_t>(maxP);
+ size_t *Q = FFLAS::fflas_new<size_t>(maxQ);
+
+ FFLAS::ParSeqHelper::Parallel H;
+
+ PARFOR1D(i,(size_t)m,H,
+ for (size_t j=0; j<(size_t)n; ++j)
+ Acop[i*n+j]= (*(A+i*n+j));
+ );
+
+ for (size_t i=0;i<=iter;++i){
+
+ PARFOR1D(j,maxP,H, P[j]=0; );
+
+ PARFOR1D(j,maxQ,H, Q[j]=0; );
+
+ PARFOR1D(k,(size_t)m,H,
+ for (size_t j=0; j<(size_t)n; ++j)
+ *(A+k*n+j) = *(Acop+k*n+j) ;
+ );
+
+ chrono.clear();
+
+ if (i) chrono.start();
+// Added by AB 2014-12-15
+//#ifdef __FFLASFFPACK_USE_OPENMP
+ r = RowEchelonForm(F,m,n,A,n,P,Q,transform,LuTag);
+ if (i) {chrono.stop(); time+=chrono.realtime();}
+ }
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ #define CUBE(x) ((x)*(x)*(x))
+ double gflop = 2.0/3.0*CUBE(double(r)/1000.0) +2*m/1000.0*n/1000.0*double(r)/1000.0 - double(r)/1000.0*double(r)/1000.0*(m+n)/1000;
+ if (transform)
+ gflop += CUBE(double(r)/1000.0)/3.0 + double(r)/1000.0*double(r)/1000.0*double(n-r)/1000.0;
+ std::cout << "Time: " << time / double(iter) << " Gflops: " << gflop / time * double(iter-1);
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
+
+ //verification
+ if(v)
+ verification_PLUQ(F,Acop,A,P,Q,m,n,R);
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( Acop);
+
+ return 0;
+}
+
diff --git a/benchmarks/benchmark-fgemm-mp.C b/benchmarks/benchmark-fgemm-mp.C
new file mode 100755
index 0000000..f7d49b3
--- /dev/null
+++ b/benchmarks/benchmark-fgemm-mp.C
@@ -0,0 +1,265 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+#if not defined(MG_DEFAULT)
+#define MG_DEFAULT MG_ACTIVE
+#endif
+#if not defined(STD_RECINT_SIZE)
+#define STD_RECINT_SIZE 8
+#endif
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <typeinfo>
+#include <vector>
+#include <string>
+using namespace std;
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "givaro/modular-integer.h"
+#include "givaro/givcaster.h"
+#include "fflas-ffpack/paladin/parallel.h"
+#ifdef BENCH_RECINT
+#include "recint/recint.h"
+#endif
+
+#ifdef BENCH_FLINT
+#define __GMP_BITS_PER_MP_LIMB 64
+extern "C" {
+#include "flint/longlong.h"
+#include "flint/long_extras.h"
+#include "flint/fmpz_mat.h"
+#include "flint/fmpz.h"
+#include "flint/flint.h"
+}
+#endif
+
+
+template<typename T>
+std::ostream& write_matrix(std::ostream& out, Givaro::Integer p, size_t m, size_t n, T* C, size_t ldc){
+
+ size_t www(size_t((double(p.bitsize())*log(2.))/log(10.)));
+ out<<"Matrix("<<m<<','<<n<<",[[";
+ out.width(www+1);
+ out<<std::right<<C[0];
+ for (size_t j=1;j<n;++j){
+ out<<',';
+ out.width(www);
+ out<<std::right<<C[j];
+ }
+ out<<']';
+ for (size_t i=1;i<m;++i){
+ out<<endl<<",[";
+ out.width(www+1);
+ out<<std::right<<C[i*ldc];
+ for (size_t j=1;j<n;++j){
+ out<<',';
+ out.width(www);
+ out<<std::right<<C[i*ldc+j];
+ }
+ out<<']';
+ }
+ return out<<"])";
+}
+
+ static size_t iters = 3 ;
+ static Givaro::Integer q = -1 ;
+ static unsigned long b = 512 ;
+ static size_t m = 512 ;
+ static size_t k = 512 ;
+ static size_t n = 512 ;
+ static int nbw = -1 ;
+ static size_t seed= time(NULL);
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'm', "-m M", "Set the dimension m of the matrix.", TYPE_INT , &m },
+ { 'k', "-k K", "Set the dimension k of the matrix.", TYPE_INT , &k },
+ { 'n', "-n N", "Set the dimension n of the matrix.", TYPE_INT , &n },
+ { 'w', "-w N", "Set the number of winograd levels (-1 for random).", TYPE_INT , &nbw },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ { 's', "-s S", "Sets seed.", TYPE_INT , &seed },
+ END_OF_ARGUMENTS
+ };
+
+template<typename Ints>
+int tmain(){
+ srand( (int)seed);
+ srand48(seed);
+ Givaro::Integer::seeding(seed);
+
+ typedef Givaro::Modular<Ints> Field;
+ Givaro::Integer p;
+ FFLAS::Timer chrono, TimFreivalds;
+ double time=0.,timev=0.;
+#ifdef BENCH_FLINT
+ double timeFlint=0.;
+#endif
+ for (size_t loop=0;loop<iters;loop++){
+ Givaro::Integer::random_exact_2exp(p, b);
+ Givaro::IntPrimeDom IPD;
+ IPD.nextprimein(p);
+ Ints ip; Givaro::Caster<Ints,Givaro::Integer>(ip,p);
+ Givaro::Caster<Givaro::Integer,Ints>(p,ip); // to check consistency
+
+ Field F(ip);
+ size_t lda,ldb,ldc;
+ lda=k;
+ ldb=n;
+ ldc=n;
+
+ typename Field::RandIter Rand(F,seed);
+ typename Field::Element_ptr A,B,C;
+ A= FFLAS::fflas_new(F,m,lda);
+ B= FFLAS::fflas_new(F,k,ldb);
+ C= FFLAS::fflas_new(F,m,ldc);
+
+// for (size_t i=0;i<m;++i)
+// for (size_t j=0;j<k;++j)
+// Rand.random(A[i*lda+j]);
+// for (size_t i=0;i<k;++i)
+// for (size_t j=0;j<n;++j)
+// Rand.random(B[i*ldb+j]);
+// for (size_t i=0;i<m;++i)
+// for (size_t j=0;j<n;++j)
+// Rand.random(C[i*ldc+j]);
+
+ PAR_BLOCK { FFLAS::pfrand(F,Rand, m,k,A,m/size_t(MAX_THREADS)); }
+ PAR_BLOCK { FFLAS::pfrand(F,Rand, k,n,B,k/MAX_THREADS); }
+ PAR_BLOCK { FFLAS::pfzero(F, m,n,C,m/MAX_THREADS); }
+
+
+ Ints alpha,beta;
+ alpha=F.one;
+ beta=F.zero;
+
+
+#ifdef BENCH_FLINT
+ // FLINT MUL //
+ fmpz_t modp,tmp;
+ fmpz_init(modp);
+ fmpz_init(tmp);
+ fmpz_set_mpz(modp, *(reinterpret_cast<const mpz_t*>(&p)));
+ fmpz_mat_t AA,BB,CC,DD;
+ fmpz_mat_init (AA, m, k);
+ fmpz_mat_init (BB, k, n);
+ fmpz_mat_init (CC, m, n);
+ fmpz_mat_init (DD, m, n);
+ fmpz_t aalpha, bbeta;
+ fmpz_set_mpz(aalpha,*(reinterpret_cast<const mpz_t*>(&alpha)));
+ fmpz_set_mpz(bbeta,*(reinterpret_cast<const mpz_t*>(&beta)));
+
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<k;++j)
+ fmpz_set_mpz(fmpz_mat_entry(AA,i,j),*(reinterpret_cast<const mpz_t*>(A+i*lda+j)));
+ for (size_t i=0;i<k;++i)
+ for (size_t j=0;j<n;++j)
+ fmpz_set_mpz(fmpz_mat_entry(BB,i,j),*(reinterpret_cast<const mpz_t*>(B+i*ldb+j)));
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<n;++j)
+ fmpz_set_mpz(fmpz_mat_entry(CC,i,j),*(reinterpret_cast<const mpz_t*>(C+i*ldc+j)));
+ chrono.clear();chrono.start();
+ // DD= A.B
+ fmpz_mat_mul(DD,AA,BB);
+ // CC = beta.C
+ fmpz_mat_scalar_mul_fmpz(CC,CC,bbeta);
+ // CC = CC + DD.alpha
+ fmpz_mat_scalar_addmul_fmpz(CC,DD,aalpha);
+ // CC = CC mod p
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<n;++j)
+ fmpz_mod(fmpz_mat_entry(CC,i,j),fmpz_mat_entry(CC,i,j),modp);
+
+ chrono.stop();
+ timeFlint+=chrono.usertime();
+ fmpz_mat_clear(AA);
+ fmpz_mat_clear(BB);
+#endif
+ //END FLINT CODE //
+ using FFLAS::CuttingStrategy::Recursive;
+ using FFLAS::StrategyParameter::TwoDAdaptive;
+ // RNS MUL_LA
+ chrono.clear();chrono.start();
+// PAR_BLOCK{
+// FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc, SPLITTER(NUM_THREADS,Recursive,TwoDAdaptive) );
+// }
+ {
+ FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,FFLAS::ParSeqHelper::Sequential());
+ }
+
+ chrono.stop();
+ time+=chrono.realtime();
+
+ TimFreivalds.start();
+ bool pass = FFLAS::freivalds(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,k, alpha, A, k, B, n, C,n);
+ TimFreivalds.stop();
+ timev+=TimFreivalds.usertime();
+ if (!pass) {
+ std::cout<<"FAILED"<<std::endl;
+ std::cout << "p:=" << p << ';'<<std::endl;
+ write_matrix(std::cout<<"A:=",p,m,k,A,lda)<<';'<<std::endl;
+ write_matrix(std::cout<<"B:=",p,k,n,B,ldb)<<';'<<std::endl;
+ write_matrix(std::cout<<"C:=",p,m,n,C,ldc)<<';'<<std::endl;
+ }
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(B);
+ FFLAS::fflas_delete(C);
+
+ }
+
+ double Gflops=(2.*double(m)/1000.*double(n)/1000.*double(k)/1000.0) / time * double(iters);
+// Gflops*=p.bitsize()/16.;
+ cout<<typeid(Ints).name()
+ << " | Time: "<< (time/double(iters)) << " (total:" << time <<") Gflops: "<<Gflops<<" | perword: "<< (Gflops*double(p.bitsize()))/64. ;
+ FFLAS::writeCommandString(std::cout << '|' << p << " (" << p.bitsize()<<")|", as) << " | Freivalds: "<< timev/double(iters) << std::endl;
+
+#ifdef BENCH_FLINT
+ cout<<"Time FLINT: "<<timeFlint<<endl;
+#endif
+ return 0;
+ }
+
+
+
+int main(int argc, char** argv){
+ FFLAS::parseArguments(argc,argv,as);
+
+ int r1 = tmain<Givaro::Integer>();
+
+#ifdef BENCH_RECINT
+ r1 += tmain<RecInt::rint<STD_RECINT_SIZE>>();
+#endif
+ return r1;
+}
+
diff --git a/benchmarks/benchmark-fgemm.C b/benchmarks/benchmark-fgemm.C
new file mode 100644
index 0000000..11e23e6
--- /dev/null
+++ b/benchmarks/benchmark-fgemm.C
@@ -0,0 +1,248 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//#include "goto-def.h"
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Clément Pernet <clement.pernet at imag.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+// Please do not commit with any of these defines on - AB 2015-01-12
+//#define __FFLASFFPACK_USE_TBB
+//#define __FFLASFFPACK_USE_OPENMP
+//#define __FFLASFFPACK_USE_DATAFLOW
+//#define WINO_PARALLEL_TMPS
+//#define __FFLASFFPACK_FORCE_SEQ
+//#define PFGEMM_WINO_SEQ 32
+//#define CLASSIC_SEQ
+#define CLASSIC_HYBRID
+//#define WINO_SEQ
+//#define DEBUG 1
+//#undef NDEBUG
+//#define FFT_PROFILER
+//#define PROFILE_FGEMM_MP
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <givaro/modular-balanced.h>
+
+#include "fflas-ffpack/config-blas.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#ifdef __FFLASFFPACK_USE_KAAPI
+#include "libkomp.h"
+#endif
+
+
+using namespace std;
+using namespace FFLAS;
+
+
+int main(int argc, char** argv) {
+
+ size_t iter = 3 ;
+ Givaro::Integer q = 131071 ;
+ size_t m = 2000 ;
+ size_t k = 2000 ;
+ size_t n = 2000 ;
+ int nbw = -1 ;
+ int p=3;
+ int t=MAX_THREADS;
+ int NBK = -1;
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'm', "-m M", "Set the row dimension of A.", TYPE_INT , &m },
+ { 'k', "-k K", "Set the col dimension of A.", TYPE_INT , &k },
+ { 'n', "-n N", "Set the col dimension of B.", TYPE_INT , &n },
+ { 'w', "-w N", "Set the number of winograd levels (-1 for random).", TYPE_INT , &nbw },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'p', "-p P", "0 for sequential, 1 for 2D iterative, 2 for 2D rec, 3 for 2D rec adaptive, 4 for 3D rc in-place, 5 for 3D rec, 6 for 3D rec adaptive.", TYPE_INT , &p },
+ { 't', "-t T", "number of virtual threads to drive the partition.", TYPE_INT , &t },
+ { 'b', "-b B", "number of numa blocks per dimension for the numa placement", TYPE_INT , &NBK },
+ END_OF_ARGUMENTS
+ };
+
+ parseArguments(argc,argv,as);
+
+ if (NBK==-1) NBK = t;
+// typedef Givaro::Modular<Givaro::Integer> Field;
+// typedef Givaro::ModularBalanced<int32_t> Field;
+// typedef Givaro::ModularBalanced<float> Field;
+ typedef Givaro::ModularBalanced<double> Field;
+// typedef Givaro::Modular<Givaro::Integer> Field;
+ typedef Field::Element Element;
+
+ Field F(q);
+
+ Timer chrono, TimFreivalds;
+ double time=0.0, timev=0.0;
+
+ Element * A, * B, * C;
+
+ Field::RandIter G(F);
+ A = fflas_new(F,m,k,Alignment::CACHE_PAGESIZE);
+//#pragma omp parallel for collapse(2) schedule(runtime)
+ PAR_BLOCK { pfrand(F,G, m,k,A,m/size_t(NBK)); }
+
+ B = fflas_new(F,k,n,Alignment::CACHE_PAGESIZE);
+//#pragma omp parallel for collapse(2) schedule(runtime)
+ PAR_BLOCK { pfrand(F,G, k,n,B,k/NBK); }
+
+ C = fflas_new(F,m,n,Alignment::CACHE_PAGESIZE);
+
+//#pragma omp parallel for collapse(2) schedule(runtime)
+ PAR_BLOCK { pfzero(F, m,n,C,m/NBK); }
+
+
+ for (size_t i=0;i<=iter;++i){
+
+ // if (argc > 4){
+ // A = read_field (F, argv[4], &n, &n);
+ // }
+ // else{
+
+ chrono.clear();
+ if (p && p!=7){
+ // CuttingStrategy meth = RECURSIVE;
+ // StrategyParameter strat = THREADS;
+
+ typedef CuttingStrategy::Block block;
+ typedef CuttingStrategy::Recursive rec;
+ typedef StrategyParameter::Threads threads;
+ typedef StrategyParameter::TwoD twod;
+ typedef StrategyParameter::TwoDAdaptive twoda;
+ typedef StrategyParameter::ThreeD threed;
+ typedef StrategyParameter::ThreeDAdaptive threeda;
+ typedef StrategyParameter::ThreeDInPlace threedip;
+ PAR_BLOCK{
+ if (i) chrono.start();
+ switch (p){
+ case 1:{
+ MMHelper<Field, MMHelperAlgo::Winograd, typename ModeTraits<Field>::value, ParSeqHelper::Parallel<block,threads> > WH(F,nbw, SPLITTER(t,block,threads));
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n, WH);
+ break;}
+ case 2:{
+ MMHelper<Field, MMHelperAlgo::Winograd, typename ModeTraits<Field>::value, ParSeqHelper::Parallel<rec,twod> > WH(F,nbw, SPLITTER(t,rec,twod));
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n, WH);
+ break;
+ }
+ case 3:{
+ MMHelper<Field, MMHelperAlgo::Winograd, typename ModeTraits<Field>::value, ParSeqHelper::Parallel<rec,twoda> > WH(F,nbw, SPLITTER(t,rec,twoda));
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n, WH);
+ break;
+ }
+ case 4:{
+ MMHelper<Field, MMHelperAlgo::Winograd, typename ModeTraits<Field>::value, ParSeqHelper::Parallel<rec,threedip> > WH(F,nbw, SPLITTER(t,rec,threedip));
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n, WH);
+ break;
+ }
+ case 5:{
+ MMHelper<Field, MMHelperAlgo::Winograd, typename ModeTraits<Field>::value, ParSeqHelper::Parallel<rec,threed> > WH(F,nbw, SPLITTER(t,rec,threed));
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n, WH);
+ break;
+ }
+ case 6:{
+ MMHelper<Field, MMHelperAlgo::Winograd, typename ModeTraits<Field>::value, ParSeqHelper::Parallel<rec,threeda> > WH(F,nbw, SPLITTER(t,rec,threeda));
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n, WH);
+ break;
+ }
+ default:{
+ MMHelper<Field, MMHelperAlgo::Winograd, typename ModeTraits<Field>::value, ParSeqHelper::Parallel<block,threads> > WH(F,nbw, SPLITTER(t,block,threads));
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n, WH);
+ break;
+ }
+ }
+ }
+ if (i) {chrono.stop(); time+=chrono.realtime();}
+ }else{
+ if(p==7){
+
+ int nrec = 0;
+ int dim = m;
+ // if(dim < 19000)
+ nrec--;
+ while(dim >= __FFLASFFPACK_WINOTHRESHOLD*2){
+ dim=dim/2;
+ nrec++;
+ }
+ nrec=std::max(1,nrec);
+ // std::cout<<" WINO_THREShold"<<__FFLASFFPACK_WINOTHRESHOLD<<" nrec = "<<nrec<<" dim = "<<dim<<std::endl;
+ if(nbw != -1)
+ nrec=nbw;
+ nbw=nrec;
+ if (i) chrono.start();
+ PAR_BLOCK
+ {
+ MMHelper<Field, MMHelperAlgo::WinogradPar,ModeTraits<Field>::value,ParSeqHelper::Parallel<> > WH (F, nrec, ParSeqHelper::Parallel<>(t));
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n,WH);
+ }
+ if (i) {chrono.stop(); time+=chrono.realtime();}
+
+
+ // MMHelper<Field, MMHelperAlgo::WinogradPar>
+ // WH (F, nbw, ParSeqHelper::Sequential());
+ // // cout<<"wino parallel"<<endl;
+ // if (i) chrono.start();
+ // PAR_BLOCK
+ // {
+ // fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n,WH);
+ // }
+ // if (i) {chrono.stop(); time+=chrono.realtime();}
+ }
+ else{
+
+ MMHelper<Field,MMHelperAlgo::Winograd>//,
+ //typename FieldTraits<Field>::value,
+ //ParSeqHelper::Sequential>
+ WH (F, nbw, ParSeqHelper::Sequential());
+ if (i) chrono.start();
+ fgemm (F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, F.zero, C,n,WH);
+ if (i) {chrono.stop(); time+=chrono.realtime();}
+ }
+ }
+
+ TimFreivalds.clear();
+ TimFreivalds.start();
+
+ bool pass = freivalds(F, FflasNoTrans, FflasNoTrans, m,n,k, F.one, A, k, B, n, C,n);
+ TimFreivalds.stop();
+ timev+=TimFreivalds.usertime();
+ if (!pass)
+ std::cout<<"FAILED"<<std::endl;
+}
+ fflas_delete( A);
+ fflas_delete( B);
+ fflas_delete( C);
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << (2.*double(m)/1000.*double(n)/1000.*double(k)/1000.0) / time * double(iter);
+ writeCommandString(std::cout, as) << std::endl;
+
+#if DEBUG
+ std::cout<<"Freivalds vtime: "<<timev/(double)iter<<std::endl;
+#endif
+
+ return 0;
+}
+
diff --git a/benchmarks/benchmark-fgemv-mp.C b/benchmarks/benchmark-fgemv-mp.C
new file mode 100644
index 0000000..2a3cef6
--- /dev/null
+++ b/benchmarks/benchmark-fgemv-mp.C
@@ -0,0 +1,189 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#if not defined(MG_DEFAULT)
+#define MG_DEFAULT MG_ACTIVE
+#endif
+#if not defined(STD_RECINT_SIZE)
+#define STD_RECINT_SIZE 8
+#endif
+
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <typeinfo>
+#include <vector>
+#include <string>
+using namespace std;
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "givaro/modular-integer.h"
+#include "givaro/givcaster.h"
+#include "fflas-ffpack/paladin/parallel.h"
+#ifdef BENCH_RECINT
+#include "recint/recint.h"
+#endif
+
+
+template<typename T>
+std::ostream& write_matrix(std::ostream& out, Givaro::Integer p, size_t m, size_t n, T* C, size_t ldc){
+
+ size_t www(size_t((double(p.bitsize())*log(2.))/log(10.)));
+ out<<"Matrix("<<m<<','<<n<<",[[";
+ out.width(www+1);
+ out<<std::right<<C[0];
+ for (size_t j=1;j<n;++j){
+ out<<',';
+ out.width(www);
+ out<<std::right<<C[j];
+ }
+ out<<']';
+ for (size_t i=1;i<m;++i){
+ out<<endl<<",[";
+ out.width(www+1);
+ out<<std::right<<C[i*ldc];
+ for (size_t j=1;j<n;++j){
+ out<<',';
+ out.width(www);
+ out<<std::right<<C[i*ldc+j];
+ }
+ out<<']';
+ }
+ return out<<"])";
+}
+
+
+static size_t iters = 3 ;
+static Givaro::Integer q = -1 ;
+static unsigned long b = 512 ;
+static size_t m = 512 ;
+static size_t k = 512 ;
+static int nbw = -1 ;
+static size_t seed= time(NULL);
+static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'm', "-m M", "Set the dimension m of the matrix.", TYPE_INT , &m },
+ { 'k', "-k K", "Set the dimension k of the matrix.", TYPE_INT , &k },
+ { 'w', "-w N", "Set the number of winograd levels (-1 for random).", TYPE_INT , &nbw },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ { 's', "-s S", "Sets seed.", TYPE_INT , &seed },
+ END_OF_ARGUMENTS
+};
+
+template<typename Ints>
+int tmain(){
+ srand( (int)seed);
+ srand48(seed);
+ Givaro::Integer::seeding(seed);
+
+ typedef Givaro::Modular<Ints> Field;
+ Givaro::Integer p;
+ FFLAS::Timer chrono, TimFreivalds;
+ double time=0.;
+ for (size_t loop=0;loop<iters;loop++){
+ Givaro::Integer::random_exact_2exp(p, b);
+ Givaro::IntPrimeDom IPD;
+ IPD.nextprimein(p);
+ Ints ip; Givaro::Caster<Ints,Givaro::Integer>(ip,p);
+ Givaro::Caster<Givaro::Integer,Ints>(p,ip); // to check consistency
+
+ Field F(ip);
+ size_t lda,ldb,ldc;
+ lda=k;
+ ldb=1;
+ ldc=1;
+
+ typename Field::RandIter Rand(F,seed);
+ typename Field::Element_ptr A,B,C;
+ A= FFLAS::fflas_new(F,m,lda);
+ B= FFLAS::fflas_new(F,k,ldb);
+ C= FFLAS::fflas_new(F,m,ldc);
+
+// for (size_t i=0;i<m;++i)
+// for (size_t j=0;j<k;++j)
+// Rand.random(A[i*lda+j]);
+// for (size_t i=0;i<k;++i)
+// for (size_t j=0;j<n;++j)
+// Rand.random(B[i*ldb+j]);
+// for (size_t i=0;i<m;++i)
+// for (size_t j=0;j<n;++j)
+// Rand.random(C[i*ldc+j]);
+
+ PAR_BLOCK { FFLAS::pfrand(F,Rand, m,k,A,m/size_t(MAX_THREADS)); }
+ PAR_BLOCK { FFLAS::pfrand(F,Rand, k,1,B,k/MAX_THREADS); }
+ PAR_BLOCK { FFLAS::pfzero(F, m,1,C,m/MAX_THREADS); }
+
+
+ Ints alpha,beta;
+ alpha=F.one;
+ beta=F.zero;
+
+
+ using FFLAS::CuttingStrategy::Recursive;
+ using FFLAS::StrategyParameter::TwoDAdaptive;
+ // RNS MUL_LA
+ chrono.clear();chrono.start();
+ {
+ FFLAS::fgemv(F,FFLAS::FflasNoTrans,m,k,alpha,A,lda,B,ldb,beta,C,ldc);
+ }
+ chrono.stop();
+ time+=chrono.realtime();
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(B);
+ FFLAS::fflas_delete(C);
+
+ }
+
+ double Mflops=((2.*double(m)-1)/1000.*double(k)/1000.0) /time * double(iters);
+// Mflops*=p.bitsize()/16.;
+ cout<<typeid(Ints).name()
+ << " | Time: "<< (time/double(iters)) << " (total:" << time <<") | Mflops: "<<Mflops<<" | perword: "<< (Mflops*double(p.bitsize()))/64. ;
+ FFLAS::writeCommandString(std::cout << " | " << p << " (" << p.bitsize()<<")|", as) << std::endl;
+
+ return 0;
+}
+
+
+
+int main(int argc, char** argv){
+ FFLAS::parseArguments(argc,argv,as);
+
+ int r1 = tmain<Givaro::Integer>();
+
+#ifdef BENCH_RECINT
+ r1 += tmain<RecInt::rint<STD_RECINT_SIZE>>();
+#endif
+ return r1;
+}
+
diff --git a/benchmarks/benchmark-fspmm.C b/benchmarks/benchmark-fspmm.C
new file mode 100644
index 0000000..817a6d0
--- /dev/null
+++ b/benchmarks/benchmark-fspmm.C
@@ -0,0 +1,199 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+#include <iostream>
+#include <vector>
+#include <sstream>
+#include <cstdio>
+#include <cstdlib>
+
+#include "fflas-ffpack/config-blas.h"
+// #include "fflas-ffpac/field/modular-double.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas/fflas_sparse.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
+
+using namespace std;
+using namespace FFLAS;
+
+template <typename T> T from_string(std::string const &s) {
+ std::stringstream ss(s);
+ T result;
+ ss >> result; // TODO handle errors
+ return result;
+}
+
+
+template <class MatT, class Field, class IndexT>
+std::pair<double, uint64_t> test_fspmm(size_t iter, const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim, uint64_t nnz,
+ int blocksize, typename Field::Element_ptr x, int ldx,
+ typename Field::Element beta, typename Field::Element_ptr y, int ldy) {
+ MatT matrix;
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ TTimer time;
+ time.clear();
+ time.start();
+ for (size_t i = 0; i < iter; ++i)
+ fspmm(F, matrix, blocksize, x, ldx, 1, y, ldy);
+ time.stop();
+ sparse_delete(matrix);
+ return make_pair(time.usertime(), matrix.nElements);
+}
+
+template <class T1, class T2, class T> void print_res(pair<T1, T2> &p, size_t iter, T as, int blocksize) {
+// cout << 2*p.second*blocksize*iter << endl;
+ std::cout << "Time: " << p.first / double(iter)
+ << " Gflops: " << ((2*blocksize*p.second)/(1000000.*p.first))*(double(iter)/1000) ;
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
+}
+
+int main(int argc, char **argv) {
+
+ using Field = Givaro::Modular<double>;
+ using Element = typename Field::Element;
+
+ size_t iter = 10;
+ int q = 1009;
+ int blocksize = 4;
+ int s = 0;
+ std::string matrixFile = "";
+
+ Argument as[] = { { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT, &q },
+ { 'b', "-b Q", "Set the block size.", TYPE_INT, &blocksize },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT, &iter },
+ { 's', "-s S", "Compute and print matrix statistics.", TYPE_INT, &s },
+ { 'f', "-f FILE", "Set matrix file.", TYPE_STR, &matrixFile },
+ END_OF_ARGUMENTS };
+
+ // matrixFile = "matrix/cis.mk8-8.sms";
+ // matrixFile = "matrix/M06-D9.sms";
+ // matrixFile = "matrix/GL7d17.sms";
+ // matrixFile = "data/mat11.sms";
+
+ FFLAS::parseArguments(argc, argv, as);
+
+ // cout << matrixFile << endl;
+
+ Field F(q);
+
+ index_t *row = nullptr, *col = nullptr;
+ typename Field::Element_ptr dat;
+ index_t rowdim, coldim;
+ uint64_t nnz;
+
+ index_t * st = nullptr ;
+ readSmsFormat(matrixFile, F, st, col, dat, rowdim, coldim, nnz);
+ row = fflas_new<index_t>(nnz);
+ for (index_t j = 0 ; j < rowdim ; ++j) {
+ for (index_t k = st[j] ; k < st[j+1] ; ++k)
+ row[k] = j ;
+ }
+
+ if (s) {
+ // auto stats = sparse_details::getStat(F, row, col, dat, rowdim, coldim, nnz);
+ // std::cout << "Sparse Matrix statistics : " << std::endl;
+ // stats.print();
+ std::cout << std::endl;
+ }
+
+ auto x = FFLAS::fflas_new(F, coldim, blocksize, Alignment::CACHE_LINE);
+ auto y = FFLAS::fflas_new(F, rowdim, blocksize, Alignment::CACHE_LINE);
+
+ for (size_t i = 0; i < coldim * blocksize; ++i) {
+ x[i] = 1;
+ }
+
+ for (size_t i = 0; i < rowdim * blocksize; ++i) {
+ y[i] = 0;
+ }
+
+ // auto coo = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::COO>>(iter, F, row, col, dat, rowdim, coldim, nnz,
+ // blocksize, x, blocksize, 1, y, blocksize);
+ // cout << "COO : ";
+ // print_res(coo, iter, as);
+
+ // auto coozo = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::COO_ZO>>(iter, F, row, col, dat, rowdim, coldim,
+ // nnz, blocksize, x, blocksize, 1, y, blocksize);
+ // cout << "COO_ZO : ";
+ // print_res(coozo, iter, as);
+ auto csr = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::CSR>>(iter, F, row, col, dat, rowdim, coldim, nnz,
+ blocksize, x, blocksize, 1, y, blocksize);
+ cout << "CSR : ";
+ print_res(csr, iter, as, blocksize);
+ auto ell = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::ELL>>(iter, F, row, col, dat, rowdim, coldim, nnz,
+ blocksize, x, blocksize, 1, y, blocksize);
+ cout << "ELL : ";
+ print_res(ell, iter, as, blocksize);
+ auto ellzo = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::ELL_ZO>>(iter, F, row, col, dat, rowdim, coldim, nnz,
+ blocksize, x, blocksize, 1, y, blocksize);
+ cout << "ELL_ZO : ";
+ print_res(ellzo, iter, as, blocksize);
+ // auto csrzo = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::CSR_ZO>>(iter, F, row, col, dat, rowdim, coldim,
+ // nnz, blocksize, x, blocksize, 1, y, blocksize);
+ // cout << "CSR_ZO : ";
+ // print_res(csrzo, iter, as);
+ // auto ell = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::ELL>>(iter, F, row, col, dat, rowdim, coldim, nnz,
+ // blocksize, x, blocksize, 1, y, blocksize);
+ // cout << "ELL : ";
+ // print_res(ell, iter, as);
+ // auto ellzo = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::ELL_ZO>>(iter, F, row, col, dat, rowdim, coldim,
+ // nnz, blocksize, x, blocksize, 1, y, blocksize);
+ // cout << "ELL_ZO : ";
+ // print_res(ellzo, iter, as);
+ auto hybzo = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::HYB_ZO>>(iter, F, row, col, dat, rowdim, coldim, nnz,
+ blocksize, x, blocksize, 1, y, blocksize);
+ cout << "HYB_ZO : ";
+ print_res(hybzo, iter, as, blocksize);
+ auto csrhyb = test_fspmm<Sparse<Field, FFLAS::SparseMatrix_t::CSR_HYB>>(iter, F, row, col, dat, rowdim, coldim, nnz,
+ blocksize, x, blocksize, 1, y, blocksize);
+ cout << "CSR_HYB : ";
+ print_res(csrhyb, iter, as, blocksize);
+ // for (size_t i = 0; i < 10*blocksize; ++i) {
+ // std::cout << y[i] << " ";
+ // }
+ // std::cout << std::endl;
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+
+ // std::cout << "Time: " << coo.first / double(iter)
+ // << " Gflops: " << (2*coo.second)/1000000000. / coo.first * double(iter);
+ // FFLAS::writeCommandString(std::cout, as) << std::endl;
+
+ // std::cout << "Time: " << csr.first / double(iter)
+ // << " Gflops: " << (2*csr.second)/1000000000. / csr.first * double(iter);
+ // FFLAS::writeCommandString(std::cout, as) << std::endl;
+ fflas_delete(x);
+ fflas_delete(y);
+ return 0;
+}
diff --git a/benchmarks/benchmark-fspmv.C b/benchmarks/benchmark-fspmv.C
new file mode 100644
index 0000000..ed62295
--- /dev/null
+++ b/benchmarks/benchmark-fspmv.C
@@ -0,0 +1,194 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+#include <iostream>
+#include <vector>
+#include <sstream>
+#include <cstdio>
+#include <cstdlib>
+
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+
+#include "fflas-ffpack/config-blas.h"
+// #include "fflas-ffpac/field/modular-double.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas/fflas_sparse.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
+
+using namespace std;
+using namespace FFLAS;
+
+template <typename T> T from_string(std::string const &s) {
+ std::stringstream ss(s);
+ T result;
+ ss >> result; // TODO handle errors
+ return result;
+}
+
+
+template <class MatT, class Field, class IndexT>
+std::pair<double, uint64_t> test_fspmv(size_t iter, const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim, uint64_t nnz,
+ typename Field::Element_ptr x, typename Field::Element_ptr y,
+ typename Field::Element beta) {
+ MatT matrix;
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ TTimer time;
+ time.clear();
+ time.start();
+ for (size_t i = 0; i < iter; ++i)
+ fspmv(F, matrix, x, 1, y);
+ time.stop();
+ sparse_delete(matrix);
+ return make_pair(time.usertime(), matrix.nElements);
+}
+
+template <class T1, class T2, class T> void print_res(pair<T1, T2> &p, size_t iter, T as, int blocksize = 1) {
+ std::cout << "Time: " << p.first / double(iter)
+ << " Gflops: " << (2 * blocksize * p.second) / 1000000000. / p.first * double(iter);
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
+}
+
+int main(int argc, char **argv) {
+
+ using Field = Givaro::Modular<int64_t,int64_t>;
+ using Element = typename Field::Element;
+
+ size_t iter = 10;
+ Givaro::Integer q = 1009;
+ int s = 0;
+ std::string matrixFile = "";
+
+ Argument as[] = { { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER, &q },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT, &iter },
+ { 's', "-s S", "Compute and print matrix statistics.", TYPE_INT, &s },
+ { 'f', "-f FILE", "Set matrix file.", TYPE_STR, &matrixFile },
+ END_OF_ARGUMENTS };
+
+ // matrixFile = "matrix/cis.mk8-8.sms";
+ // matrixFile = "matrix/GL7d17.sms";
+ // matrixFile = "data/mat11.sms";
+
+ FFLAS::parseArguments(argc, argv, as);
+
+ // cout << matrixFile << endl;
+
+ Field F(q);
+
+ index_t *row = nullptr, *col = nullptr;
+ typename Field::Element_ptr dat;
+ index_t rowdim = 0, coldim = 0;
+ uint64_t nnz;
+
+ if ( (matrixFile.find(".sms") != std::string::npos) || (matrixFile.find(".smf") != std::string::npos)) {
+ index_t * st = nullptr ;
+ readSmsFormat(matrixFile, F, st, col, dat, rowdim, coldim, nnz);
+ row = fflas_new<index_t>(nnz);
+ for (index_t j = 0 ; j < rowdim ; ++j) {
+ for (index_t k = st[j] ; k < st[j+1] ; ++k)
+ row[k] = j ;
+ }
+ } else if (matrixFile.find(".spr") != std::string::npos) {
+ readSprFormat(matrixFile, F, row, col, dat, rowdim, coldim, nnz);
+ }
+
+ if (s) {
+ //auto stats = sparse_details::getStat(F, row, col, dat, rowdim, coldim, nnz);
+ //std::cout << "Sparse Matrix statistics : " << std::endl;
+ //stats.print();
+ //std::cout << std::endl;
+ }
+
+ auto x = FFLAS::fflas_new(F, coldim, 1, Alignment::CACHE_LINE);
+ auto y = FFLAS::fflas_new(F, rowdim, 1, Alignment::CACHE_LINE);
+
+ for (size_t i = 0; i < coldim; ++i) {
+ x[i] = 1;
+ }
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y[i] = 0;
+ }
+
+ auto coo =
+ test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::COO>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "COO : ";
+ print_res(coo, iter, as);
+ auto coozo =
+ test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::COO_ZO>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "COO_ZO : ";
+ print_res(coozo, iter, as);
+ auto csr =
+ test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::CSR>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "CSR : ";
+ print_res(csr, iter, as);
+ auto csrzo =
+ test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::CSR_ZO>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "CSR_ZO : ";
+ print_res(csrzo, iter, as);
+ auto ell =
+ test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::ELL>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "ELL : ";
+ print_res(ell, iter, as);
+ auto ellzo =
+ test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::ELL_ZO>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "ELL_ZO : ";
+ print_res(ellzo, iter, as);
+ // auto ellsimd = test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::ELL_simd>>(iter, F, row, col, dat, rowdim, coldim,
+ // nnz, x, y, 1);
+ // cout << "ELL_simd : ";
+ // print_res(ellsimd, iter, as);
+ // auto ellsimdzo = test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::ELL_simd_ZO>>(iter, F, row, col, dat, rowdim,
+ // coldim, nnz, x, y, 1);
+ // cout << "ELL_simd_ZO : ";
+ // print_res(ellsimdzo, iter, as);
+ auto csrhyb =
+ test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::CSR_HYB>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "CSR_HYB : ";
+ print_res(csrhyb, iter, as);
+ auto hybzo =
+ test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::HYB_ZO>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "HYB_ZO : ";
+ print_res(hybzo, iter, as);
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+
+ // std::cout << "Time: " << coo.first / double(iter)
+ // << " Gflops: " << (2*coo.second)/1000000000. / coo.first * double(iter);
+ // FFLAS::writeCommandString(std::cout, as) << std::endl;
+
+ // std::cout << "Time: " << csr.first / double(iter)
+ // << " Gflops: " << (2*csr.second)/1000000000. / csr.first * double(iter);
+ // FFLAS::writeCommandString(std::cout, as) << std::endl;
+ return 0;
+}
diff --git a/benchmarks/benchmark-ftrsm-mp.C b/benchmarks/benchmark-ftrsm-mp.C
new file mode 100644
index 0000000..864d82a
--- /dev/null
+++ b/benchmarks/benchmark-ftrsm-mp.C
@@ -0,0 +1,105 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <vector>
+#include <string>
+using namespace std;
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "givaro/modular-integer.h"
+
+int main(int argc, char** argv){
+ srand((int)time(NULL));
+ srand48(time(NULL));
+
+ static size_t iters = 3 ;
+ static Givaro::Integer q = -1 ;
+ static unsigned long b = 512 ;
+ static size_t m = 512 ;
+ static size_t n = 512 ;
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'm', "-m M", "Set the dimension m of the matrix.", TYPE_INT , &m },
+ { 'n', "-n N", "Set the dimension n of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ END_OF_ARGUMENTS
+ };
+ FFLAS::parseArguments(argc,argv,as);
+
+ size_t seed= time(NULL);
+ typedef Givaro::Modular<Givaro::Integer> Field;
+ FFLAS::Timer chrono;
+ double time=0.;
+ Givaro::Integer p;
+ Givaro::IntPrimeDom IPD;
+
+ for (size_t i=0;i<iters;i++) {
+
+ Givaro::Integer::random_exact_2exp(p, b);
+ IPD.prevprimein(p);
+ Field F(p);
+ size_t lda,ldb;
+ lda=m;
+ ldb=n;
+
+ typename Field::RandIter Rand(F,seed);
+ Field::Element_ptr A,B;
+ A= FFLAS::fflas_new(F,m,lda);
+ B= FFLAS::fflas_new(F,m,ldb);
+
+ for (size_t ii=0;ii<m*m;++ii)
+ Rand.random(A[ii]);
+ for (size_t ii=0;ii<m*n;++ii)
+ Rand.random(B[ii]);
+
+ Givaro::Integer alpha;
+ alpha=1;
+
+ chrono.clear();chrono.start();
+ FFLAS::ftrsm(F,FFLAS::FflasRight, FFLAS::FflasUpper,FFLAS::FflasNoTrans,FFLAS::FflasUnit, m,n,alpha,A,lda,B,ldb);
+ chrono.stop();
+ time+=chrono.usertime();
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(B);
+ }
+ double Gflops=(1.*double(m)/1000.*double(m)/1000.*double(n)/1000.0) / chrono.usertime() * double(iters);
+ Gflops*=p.bitsize()/16.;
+ cout<<"Time: "<<time/iters<<" Gflops: "<<Gflops<<endl;
+
+
+
+ return 0;
+}
+
diff --git a/benchmarks/benchmark-ftrsm.C b/benchmarks/benchmark-ftrsm.C
new file mode 100644
index 0000000..21009c8
--- /dev/null
+++ b/benchmarks/benchmark-ftrsm.C
@@ -0,0 +1,151 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (c) FFLAS-FFPACK
+ * Written by Clément Pernet <clement.pernet at imag.fr>
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <givaro/modular.h>
+
+#include "fflas-ffpack/fflas-ffpack.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+using namespace std;
+
+int main(int argc, char** argv) {
+
+ size_t iter = 3;
+ int q = 1009;
+ int m = 2000 ;
+ int n = 2000;
+ std::string file1 = "";
+ std::string file2 = "";
+ int t=MAX_THREADS;
+ int NBK = -1;
+ int p = 3; // 0 for sequential 1 for pIter-sRec ; 2 for pRec; 3 for hybrid
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'm', "-m M", "Set the row dimension of the RHS matrix.", TYPE_INT , &m },
+ { 'n', "-n N", "Set the col dimension of the RHS matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the first input file (empty for random).", TYPE_STR, &file1 },
+ { 'g', "-g FILE", "Set the second input file (empty for random).", TYPE_STR, &file2 },
+ { 't', "-t T", "number of virtual threads to drive the partition.", TYPE_INT, &t },
+ { 'b', "-b B", "number of numa blocks per dimension for the numa placement", TYPE_INT, &NBK },
+ { 'p', "-p P", "0 for sequential, 1 for Iterative, 2 for Recursive, 3 for Hybrid.", TYPE_INT , &p },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+ if (NBK==-1) NBK = t;
+
+ typedef Givaro::ModularBalanced<double> Field;
+ typedef Field::Element Element;
+
+ Field F(q);
+ Element * A;
+ Element * B;
+
+ FFLAS::Timer chrono;
+ double time=0.0;
+ Field::RandIter G(F);
+ // if (argc > 5){
+ // A = read_field (F, argv[5], &n, &n);
+ // }
+ // else{
+ if (!file1.empty()){
+ A = read_field (F, file1.c_str(), &n, &n);
+ }
+ else{
+ A = FFLAS::fflas_new (F,m,m,Alignment::CACHE_PAGESIZE);
+ PAR_BLOCK{ FFLAS::pfrand(F,G,m,m,A,m/NBK); }
+
+ for (size_t k=0;k<(size_t)m;++k)
+ while (F.isZero( G.random(*(A+k*(m+1)))));
+ }
+
+ if (!file2.empty()){
+ B = read_field (F, file2.c_str(), &m, &n);
+ }
+ else{
+ B = FFLAS::fflas_new(F,m,n,Alignment::CACHE_PAGESIZE);
+ PAR_BLOCK{ FFLAS::pfrand(F,G,m,n,B,m/NBK); }
+ }
+ //}
+
+ for (size_t i=0;i<=iter;++i){
+ chrono.clear();
+ if (i) chrono.start();
+
+ if (!p){
+ FFLAS::ParSeqHelper::Sequential H;
+ FFLAS::ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower,
+ FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ m,n, F.one, A, m, B, n, H);
+ }
+ else{
+ FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> PSH(t);
+ PAR_BLOCK{
+ switch (p) {
+ case 1: {
+ FFLAS::TRSMHelper<FFLAS::StructureHelper::Iterative,
+ FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> >
+ PH (PSH);
+ FFLAS::ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower,
+ FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ m,n, F.one, A, m, B, n, PH);
+ break;}
+ case 2: {FFLAS::TRSMHelper<FFLAS::StructureHelper::Recursive,
+ FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> >
+ PH (PSH);
+ FFLAS::ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower,
+ FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ m,n, F.one, A, m, B, n, PH);
+ break;}
+ case 3:
+ FFLAS::TRSMHelper<FFLAS::StructureHelper::Hybrid, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> >
+ PH (PSH);
+ FFLAS::ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower,
+ FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ m,n, F.one, A, m, B, n, PH);
+ break;
+ }
+
+ }
+ }
+ if (i) {chrono.stop(); time+=chrono.realtime();}
+ }
+
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << (double(m)/1000.*double(m)/1000.*double(n)/1000.0) / time * double(iter);
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
+
+ return 0;
+}
diff --git a/benchmark/src/FFLAS_FFPACK/check-ftrtri.C b/benchmarks/benchmark-ftrtri.C
similarity index 62%
rename from benchmark/src/FFLAS_FFPACK/check-ftrtri.C
rename to benchmarks/benchmark-ftrtri.C
index 9b5a60d..7575cc1 100644
--- a/benchmark/src/FFLAS_FFPACK/check-ftrtri.C
+++ b/benchmarks/benchmark-ftrtri.C
@@ -23,33 +23,42 @@
* ========LICENCE========
*/
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
-
+#include <givaro/modular.h>
#include "fflas-ffpack/fflas-ffpack.h"
-#include "fflas-ffpack/field/modular-balanced.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
using namespace std;
int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- typedef FFPACK::Modular<double> Field;
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 2000;
+ std::string file = "";
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the input file (empty for random).", TYPE_STR , &file },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ typedef Givaro::Modular<double> Field;
typedef Field::Element Element;
- Field F(p);
+ Field F(q);
Element * A;
- Timer chrono;
+ FFLAS::Timer chrono;
double time=0.0;
Field::RandIter G(F);
@@ -57,7 +66,7 @@ int main(int argc, char** argv) {
if (argc > 4){
A = read_field (F, argv[4], &n, &n);
} else {
- A = new Element[n*n];
+ A = FFLAS::fflas_new<Element>(n*n);
for (size_t j=0; j<(size_t) n*n; ++j)
G.random(*(A+j));
}
@@ -70,11 +79,16 @@ int main(int argc, char** argv) {
chrono.stop();
time+=chrono.usertime();
- delete[] A;
+ FFLAS::fflas_delete( A);
}
-
- cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<endl;
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ #define CUBE(x) ((x)*(x)*(x))
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << CUBE(double(n)/1000.) / time * double(iter) / 3.;
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
return 0;
diff --git a/benchmark/src/FFLAS_FFPACK/check-inverse.C b/benchmarks/benchmark-inverse.C
similarity index 59%
rename from benchmark/src/FFLAS_FFPACK/check-inverse.C
rename to benchmarks/benchmark-inverse.C
index 6e33141..2593144 100644
--- a/benchmark/src/FFLAS_FFPACK/check-inverse.C
+++ b/benchmarks/benchmark-inverse.C
@@ -23,40 +23,50 @@
* ========LICENCE========
*/
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
+#include <givaro/modular.h>
#include "fflas-ffpack/fflas-ffpack.h"
-#include "fflas-ffpack/field/modular-balanced.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
using namespace std;
int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- typedef FFPACK::Modular<double> Field;
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 2000;
+ std::string file = "";
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the input file (empty for random).", TYPE_STR , &file },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ typedef Givaro::Modular<double> Field;
typedef Field::Element Element;
- Field F(p);
+ Field F(q);
Field::Element * A;
- Timer chrono;
+ FFLAS::Timer chrono;
double time=0.0;
for (size_t i=0;i<iter;++i){
- if (argc > 4){
- A = read_field(F, argv[4], &n, &n);
+ if (!file.empty()){
+ A = read_field(F, file.c_str(), &n, &n);
}
else {
- A = new Element[n*n];
+ A = FFLAS::fflas_new<Element>(n*n);
Field::RandIter G(F);
for (size_t j=0; j<(size_t)n*n; ++j)
G.random(*(A+j));
@@ -69,10 +79,15 @@ int main(int argc, char** argv) {
chrono.stop();
time+=chrono.usertime();
- delete[] A;
+ FFLAS::fflas_delete( A);
}
-
- cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<endl;
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ #define CUBE(x) ((x)*(x)*(x))
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << 2. * CUBE(double(n)/1000.) / time * double(iter);
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
return 0;
diff --git a/benchmarks/benchmark-lqup-mp.C b/benchmarks/benchmark-lqup-mp.C
new file mode 100644
index 0000000..44b354d
--- /dev/null
+++ b/benchmarks/benchmark-lqup-mp.C
@@ -0,0 +1,102 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <vector>
+#include <string>
+using namespace std;
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "givaro/modular-integer.h"
+
+int main(int argc, char** argv){
+ srand((int)time(NULL));
+ srand48(time(NULL));
+
+ static size_t iters = 3 ;
+ static Givaro::Integer q = -1 ;
+ static unsigned long b = 512 ;
+ static size_t m = 512 ;
+ static size_t n = 512 ;
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'm', "-m M", "Set the dimension m of the matrix.", TYPE_INT , &m },
+ { 'n', "-n N", "Set the dimension n of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ END_OF_ARGUMENTS
+ };
+ FFLAS::parseArguments(argc,argv,as);
+
+ size_t seed= time(NULL);
+ typedef Givaro::Modular<Givaro::Integer> Field;
+ FFLAS::Timer chrono;
+ double time=0.;
+ Givaro::Integer p;
+ Givaro::IntPrimeDom IPD;
+
+ for (size_t i=0;i<iters;i++) {
+
+ Givaro::Integer::random_exact_2exp(p, b);
+ IPD.prevprimein(p);
+ Field F(p);
+ size_t lda;
+ lda=n;
+
+ typename Field::RandIter Rand(F,seed);
+ Field::Element_ptr A;
+ A= FFLAS::fflas_new(F,m,lda);
+ size_t * P = FFLAS::fflas_new<size_t>(n) ;
+ size_t * Q = FFLAS::fflas_new<size_t>(m) ;
+
+ for (size_t ii=0;ii<m*lda;++ii)
+ Rand.random(A[ii]);
+
+ Givaro::Integer alpha;
+ alpha=1;
+ chrono.clear();chrono.start();
+ FFPACK::LUdivine (F, FFLAS::FflasUnit, FFLAS::FflasNoTrans, m, n, A, lda, P, Q);
+ chrono.stop();
+ time+=chrono.usertime();
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(P);
+ FFLAS::fflas_delete(Q);
+ }
+ double Gflops=(2./3.*double(m)/1000.*double(m)/1000.*double(n)/1000.0) / chrono.usertime() * double(iters);
+ Gflops*=p.bitsize()/16.;
+ cout<<"Time: "<<time/iters<<" Gflops: "<<Gflops<<endl;
+
+
+ return 0;
+}
+
diff --git a/benchmark/src/FFLAS_FFPACK/check-lqup.C b/benchmarks/benchmark-lqup.C
similarity index 55%
rename from benchmark/src/FFLAS_FFPACK/check-lqup.C
rename to benchmarks/benchmark-lqup.C
index 52c84bb..a4793fe 100644
--- a/benchmark/src/FFLAS_FFPACK/check-lqup.C
+++ b/benchmarks/benchmark-lqup.C
@@ -22,49 +22,60 @@
* ========LICENCE========
*/
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
+#include <givaro/modular.h>
#include "fflas-ffpack/fflas-ffpack.h"
-#include "fflas-ffpack/field/modular-balanced.h"
#include "fflas-ffpack/utils/timer.h"
#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
using namespace std;
int main(int argc, char** argv) {
-
- // parameter: p, n, iteration, file
-
- int p = atoi(argv[1]);
- int n = atoi(argv[2]);
- size_t iter = atoi(argv[3]);
-
-
- typedef FFPACK::Modular<double> Field;
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 2000;
+ std::string file = "";
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'f', "-f FILE", "Set the input file (empty for random).", TYPE_STR , &file },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ FFLAS::parseArguments(argc,argv,as);
+ typedef Givaro::Modular<double> Field;
typedef Field::Element Element;
- Field F(p);
+ Field F(q);
- Timer chrono;
+ FFLAS::Timer chrono;
double time=0.0;
Element *A;
for (size_t i=0;i<iter;++i){
- if (argc > 4){
- A = read_field (F, argv[4], &n, &n);
+ if (!file.empty()){
+ A = read_field (F, file.c_str(), &n, &n);
}
else{
- A = new Element[n*n];
+ A = FFLAS::fflas_new<Element>(n*n);
Field::RandIter G(F);
for (size_t j=0; j< (size_t)n*n; ++j)
G.random(*(A+j));
}
- size_t * P = new size_t[n];
- size_t * Q = new size_t[n];
+ size_t * P = FFLAS::fflas_new<size_t>(n);
+ size_t * Q = FFLAS::fflas_new<size_t>(n);
chrono.clear();
chrono.start();
@@ -73,14 +84,18 @@ int main(int argc, char** argv) {
chrono.stop();
time+=chrono.usertime();
- delete[] P;
- delete[] Q;
- delete[] A;
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ FFLAS::fflas_delete( A);
}
-
- cerr<<"n: "<<n<<" p: "<<p<<" time: "<<time/(double)iter<<endl;
-
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ #define CUBE(x) ((x)*(x)*(x))
+ std::cout << "Time: " << time / double(iter)
+ << " Gflops: " << 2. * CUBE(double(n)/1000.) / 3. / time * double(iter);
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
return 0;
}
diff --git a/benchmarks/benchmark-pfspmv.C b/benchmarks/benchmark-pfspmv.C
new file mode 100644
index 0000000..a179ef5
--- /dev/null
+++ b/benchmarks/benchmark-pfspmv.C
@@ -0,0 +1,169 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+#include <iostream>
+#include <vector>
+#include <sstream>
+#include <cstdio>
+#include <cstdlib>
+
+#include "fflas-ffpack/config-blas.h"
+// #include "fflas-ffpac/field/modular-double.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
+
+using namespace std;
+using namespace FFLAS;
+
+template <typename T> T from_string(std::string const &s) {
+ std::stringstream ss(s);
+ T result;
+ ss >> result; // TODO handle errors
+ return result;
+}
+
+
+template <class MatT, class Field, class IndexT>
+std::pair<double, uint64_t> test_pfspmv(size_t iter, const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim, uint64_t nnz,
+ typename Field::Element_ptr x, typename Field::Element_ptr y,
+ typename Field::Element beta) {
+ MatT matrix;
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ TTimer time;
+ time.clear();
+ time.start();
+ for (size_t i = 0; i < iter; ++i)
+ pfspmv(F, matrix, x, 1, y);
+ time.stop();
+ sparse_delete(matrix);
+ return make_pair(time.usertime(), matrix.nElements);
+}
+
+template <class T1, class T2, class T> void print_res(pair<T1, T2> &p, size_t iter, T as, int blocksize = 1) {
+ std::cout << "Time: " << p.first / double(iter)
+ << " Gflops: " << (2 * blocksize * p.second) / 1000000000. / p.first * double(iter);
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
+}
+
+int main(int argc, char **argv) {
+
+ using Field = FFPACK::Modular<double>;
+ using Element = typename Field::Element;
+
+ size_t iter = 10;
+ int q = 1009;
+ std::string matrixFile = "";
+
+ Argument as[] = { { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT, &q },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT, &iter },
+ { 'f', "-f FILE", "Set matrix file.", TYPE_STR, &matrixFile },
+ END_OF_ARGUMENTS };
+
+ matrixFile = "matrix/cis.mk8-8.sms";
+ // matrixFile = "matrix/GL7d17.sms";
+ // matrixFile = "data/mat11.sms";
+ // cout << matrixFile << endl;
+
+ FFLAS::parseArguments(argc, argv, as);
+
+ Field F(q);
+
+ index_t *row = nullptr, *col = nullptr;
+ typename Field::Element_ptr dat;
+ index_t rowdim, coldim;
+ uint64_t nnz;
+
+ if (matrixFile.find(".sms") != std::string::npos) {
+ readSmsFormat(matrixFile, F, row, col, dat, rowdim, coldim, nnz);
+ } else if (matrixFile.find(".spr") != std::string::npos) {
+ readSprFormat(matrixFile, F, row, col, dat, rowdim, coldim, nnz);
+ }
+
+ if (s) {
+ auto stats = sparse_details::getStat(F, row, col, dat, rowdim, coldim, nnz);
+ std::cout << "Sparse Matrix statistics : " << std::endl;
+ stats.print();
+ std::cout << std::endl;
+ }
+
+ auto x = FFLAS::fflas_new(F, coldim, 1, Alignment::CACHE_LINE);
+ auto y = FFLAS::fflas_new(F, rowdim, 1, Alignment::CACHE_LINE);
+
+ for (size_t i = 0; i < coldim; ++i) {
+ x[i] = 1;
+ }
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y[i] = 0;
+ }
+
+ auto csr =
+ test_pfspmv<Sparse<Field, FFLAS::SparseMatrix_t::CSR>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "CSR : ";
+ print_res(csr, iter, as);
+ auto csrzo =
+ test_pfspmv<Sparse<Field, FFLAS::SparseMatrix_t::CSR_ZO>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "CSR_ZO : ";
+ print_res(csrzo, iter, as);
+ auto ell =
+ test_pfspmv<Sparse<Field, FFLAS::SparseMatrix_t::ELL>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "ELL : ";
+ print_res(ell, iter, as);
+ auto ellzo =
+ test_pfspmv<Sparse<Field, FFLAS::SparseMatrix_t::ELL_ZO>>(iter, F, row, col, dat, rowdim, coldim, nnz, x, y, 1);
+ cout << "ELL_ZO : ";
+ print_res(ellzo, iter, as);
+ auto ellsimd = test_pfspmv<Sparse<Field, FFLAS::SparseMatrix_t::ELL_simd>>(iter, F, row, col, dat, rowdim, coldim,
+ nnz, x, y, 1);
+ cout << "ELL_simd : ";
+ print_res(ellsimd, iter, as);
+ auto ellsimdzo = test_pfspmv<Sparse<Field, FFLAS::SparseMatrix_t::ELL_simd_ZO>>(iter, F, row, col, dat, rowdim,
+ coldim, nnz, x, y, 1);
+ cout << "ELL_simd_ZO : ";
+ print_res(ellsimdzo, iter, as);
+ // auto csrhyb = test_fspmv<Sparse<Field, FFLAS::SparseMatrix_t::CSR_HYB>>(iter, F, row, col, dat, rowdim, coldim,
+ // nnz, x, y, 1);
+ // cout << "CSR_HYB : ";
+ // print_res(csrhyb, iter, as);
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+
+ // std::cout << "Time: " << coo.first / double(iter)
+ // << " Gflops: " << (2*coo.second)/1000000000. / coo.first * double(iter);
+ // FFLAS::writeCommandString(std::cout, as) << std::endl;
+
+ // std::cout << "Time: " << csr.first / double(iter)
+ // << " Gflops: " << (2*csr.second)/1000000000. / csr.first * double(iter);
+ // FFLAS::writeCommandString(std::cout, as) << std::endl;
+ return 0;
+}
diff --git a/benchmarks/benchmark-pluq.C b/benchmarks/benchmark-pluq.C
new file mode 100644
index 0000000..1f7e786
--- /dev/null
+++ b/benchmarks/benchmark-pluq.C
@@ -0,0 +1,259 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//#include "goto-def.h"
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Ziad Sultan <ziad.sultan at imag.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+//#define __FFLASFFPACK_USE_OPENMP
+//#define __FFLASFFPACK_USE_TBB
+
+//#define __FFLASFFPACK_USE_DATAFLOW
+//#define __FFLASFFPACK_FORCE_SEQ
+//#define WINOPAR_KERNEL
+//#define CLASSIC_SEQ
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <givaro/modular.h>
+#include <givaro/givranditer.h>
+#include <iostream>
+
+#include "fflas-ffpack/config-blas.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/fflas_randommatrix.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+#ifdef __FFLASFFPACK_USE_KAAPI
+#include "libkomp.h"
+#endif
+
+using namespace std;
+
+//typedef Givaro::ModularBalanced<double> Field;
+//typedef Givaro::ModularBalanced<float> Field;
+typedef Givaro::ZRing<double> Field;
+//typedef Givaro::UnparametricZRing<double> Field;
+
+void verification_PLUQ(const Field & F, typename Field::Element * B, typename Field::Element * A,
+ size_t * P, size_t * Q, size_t m, size_t n, size_t R)
+{
+
+ FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> H;
+
+ Field::Element_ptr X = FFLAS::fflas_new (F, m,n);
+ Field::Element_ptr L, U;
+ L = FFLAS::fflas_new(F, m,R);
+ U = FFLAS::fflas_new(F, R,n);
+
+ PARFOR1D (i, m*R,H, F.init(L[i], 0.0); );
+
+ PARFOR1D (i,n*R,H, F.init(U[i], 0.0); );
+
+ PARFOR1D (i,m*n,H, F.init(X[i], 0.0); );
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ PARFOR1D (i,R,H,
+ for (size_t j=0; j<i; ++j)
+ F.assign ( *(U + i*n + j), zero);
+ for (size_t j=i; j<n; ++j)
+ F.assign (*(U + i*n + j), *(A+ i*n+j));
+ );
+
+ PARFOR1D (j,R,H,
+ for (size_t i=0; i<=j; ++i )
+ F.assign( *(L+i*R+j), zero);
+ F.assign(*(L+j*R+j), one);
+ for (size_t i=j+1; i<m; i++)
+ F.assign( *(L + i*R+j), *(A+i*n+j));
+ );
+
+ PAR_BLOCK{
+ SYNCH_GROUP(
+
+ TASK(MODE(CONSTREFERENCE(F,P,L)),
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, R,0,m, L, R, P););
+ TASK(MODE(CONSTREFERENCE(F,Q,U)),
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R,0,n, U, n, Q););
+ WAIT;
+ typename FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> pWH (MAX_THREADS);
+
+ TASK(MODE(CONSTREFERENCE(F,U,L,X)),
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,R,
+ F.one, L,R, U,n, F.zero, X,n, pWH););
+ );
+ }
+ bool fail = false;
+ for(size_t i=0; i<m; ++i)
+ for (size_t j=0; j<n; ++j)
+ if (!F.areEqual (*(B+i*n+j), *(X+i*n+j))){
+ std::cout << " Initial["<<i<<","<<j<<"] = " << (*(B+i*n+j))
+ << " Result["<<i<<","<<j<<"] = " << (*(X+i*n+j))
+ << std::endl;
+ fail=true;
+ }
+
+ if (fail)
+ std::cout<<"FAIL"<<std::endl;
+ else
+ std::cout<<"PASS"<<std::endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+}
+
+void Rec_Initialize(Field &F, Field::Element * C, size_t m, size_t n, size_t ldc)
+{
+ if(std::min(m,n) <= ldc/NUM_THREADS){
+ for(size_t i=0; i<m; i++)
+ FFLAS::fzero(F, 1, n, C+i*n, n);
+ }
+ else{
+ size_t M2 = m >> 1;
+ size_t N2 = n >> 1;
+ typename Field::Element * C2 = C + N2;
+ typename Field::Element * C3 = C + M2*ldc;
+ typename Field::Element * C4 = C3 + N2;
+
+ SYNCH_GROUP(
+ TASK(MODE(CONSTREFERENCE(F)), Rec_Initialize(F,C,M2,N2, ldc););
+ TASK(MODE(CONSTREFERENCE(F)), Rec_Initialize(F,C2,M2,n-N2, ldc););
+ TASK(MODE(CONSTREFERENCE(F)), Rec_Initialize(F,C3,m-M2,N2, ldc););
+ TASK(MODE(CONSTREFERENCE(F)), Rec_Initialize(F,C4,m-M2,n-N2, ldc););
+ );
+ }
+}
+
+int main(int argc, char** argv) {
+
+ size_t iter = 3 ;
+ int q = 131071 ;
+ Field F(q);
+ int m = 2000 ;
+ int n = 2000 ;
+ int r = 2000 ;
+ int v = 0;
+ int t=MAX_THREADS;
+ int NBK = -1;
+ bool par=true;
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'm', "-m M", "Set the row dimension of A.", TYPE_INT , &m },
+ { 'n', "-n N", "Set the col dimension of A.", TYPE_INT , &n },
+ { 'r', "-r R", "Set the rank of matrix A.", TYPE_INT , &r },
+ { 'i', "-i I", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'v', "-v V", "Set 1 if need verification of result else 0.", TYPE_INT , &v },
+ { 't', "-t T", "number of virtual threads to drive the partition.", TYPE_INT , &t },
+ { 'b', "-b B", "number of numa blocks per dimension for the numa placement", TYPE_INT , &NBK },
+ { 'p', "-p P", "whether to run or not the parallel PLUQ", TYPE_BOOL , &par },
+ END_OF_ARGUMENTS
+ };
+ FFLAS::parseArguments(argc,argv,as);
+
+ if (r > std::min(m,n)){
+ std::cerr<<"Warning: rank can not be greater than min (m,n). It has been forced to min (m,n)"<<std::endl;
+ r=std::min(m,n);
+ }
+ if (!par) t=1;NBK=1;
+ if (NBK==-1) NBK = t;
+
+ Field::Element_ptr A, Acop;
+ A = FFLAS::fflas_new(F,m,n);
+
+ PAR_BLOCK{
+ Rec_Initialize(F, A, m, n, n);
+ // FFLAS::pfzero(F,m,n,A,m/NBK);
+ FFPACK::RandomMatrixWithRankandRandomRPM (F, A, n, r, m,n);
+ }
+
+ size_t R;
+ FFLAS::Timer chrono;
+ double *time=new double[iter];
+
+ enum FFLAS::FFLAS_DIAG diag = FFLAS::FflasNonUnit;
+ size_t maxP, maxQ;
+ maxP = m;
+ maxQ = n;
+
+ size_t *P = FFLAS::fflas_new<size_t>(maxP);
+ size_t *Q = FFLAS::fflas_new<size_t>(maxQ);
+
+ FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::TwoDAdaptive> H;
+
+ Acop = FFLAS::fflas_new(F,m,n);
+ PARFOR1D(i,(size_t)m,H,
+ FFLAS::fassign(F, n, A + i*n, 1, Acop + i*n, 1);
+ // for (size_t j=0; j<(size_t)n; ++j)
+ // Acop[i*n+j]= A[i*n+j];
+ );
+ size_t BC;
+ for (size_t i=0;i<=iter;++i){
+
+ PARFOR1D(j,maxP,H, P[j]=0; );
+ PARFOR1D(j,maxQ,H, Q[j]=0; );
+ PARFOR1D(k,(size_t)m,H,
+ FFLAS::fassign(F, n, Acop + k*n, 1, A + k*n, 1);
+ // for (size_t j=0; j<(size_t)n; ++j)
+ // F.assign( A[k*n+j] , Acop[k*n+j]) ;
+ );
+ chrono.clear();
+
+ if (i) chrono.start();
+ if (par){
+
+
+ PAR_BLOCK{
+ R = FFPACK::pPLUQ(F, diag, m, n, A, n, P, Q, t);
+ BC = n/NUM_THREADS;
+ }
+ }
+ else
+ R = FFPACK::PLUQ(F, diag, m, n, A, n, P, Q);
+ if (i) {chrono.stop(); time[i-1]=chrono.realtime();}
+
+ }
+ std::sort(time, time+iter);
+ double meantime = time[iter/2];
+ delete[] time;
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+#define CUBE(x) ((x)*(x)*(x))
+ double gflop = 2.0/3.0*CUBE(double(r)/1000.0) +2*m/1000.0*n/1000.0*double(r)/1000.0 - double(r)/1000.0*double(r)/1000.0*(m+n)/1000;
+ std::cout << "Time: " << meantime
+ << " Gflops: " << gflop / meantime << " BC: "<<BC;
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
+
+ //verification
+ if(v)
+ verification_PLUQ(F,Acop,A,P,Q,m,n,R);
+
+ FFLAS::fflas_delete (P);
+ FFLAS::fflas_delete (Q);
+ FFLAS::fflas_delete (A);
+ FFLAS::fflas_delete (Acop);
+
+ return 0;
+}
+
diff --git a/benchmarks/benchmark-wino.C b/benchmarks/benchmark-wino.C
new file mode 100644
index 0000000..6d041e4
--- /dev/null
+++ b/benchmarks/benchmark-wino.C
@@ -0,0 +1,178 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//#include "goto-def.h"
+
+/* Copyright (c) 2012 FFLAS-FFPACK
+ * Written by J.G. Dumas <jgdumas at imag.fr>
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <fstream>
+#include <givaro/modular.h>
+
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#define CUBE(x) ((x)*(x)*(x))
+
+template<class Field>
+void launch_wino(const Field &F,
+ const size_t &n,
+ const size_t &NB,
+ const size_t &wino,
+ const bool &asmax,
+ const size_t &seed,
+ const bool compare)
+{
+
+ typedef typename Field::Element Element ;
+ typename Field::RandIter G(F);
+
+ if (compare)
+ F.write(std::cout << "Field ") << std::endl;
+
+ double basetime(0.0), time(0.0);
+
+ Element *A, *C;
+ A = FFLAS::fflas_new<Element>(n*n);
+ C = FFLAS::fflas_new<Element>(n*n);
+ for (size_t i=0; i<n*n;++i)
+ G.random(A[i]);
+
+ // ----- Compare with fgemm
+ FFLAS::Timer chrono;
+ if (compare) {
+ for(size_t i=0; i<NB; ++i) {
+ chrono.start();
+ FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ n,n,n, F.one,
+ A, n, A, n, F.zero, C,n);
+ chrono.stop();
+ basetime+= chrono.usertime();
+ }
+
+ std::cout << "Time: " << basetime / double(NB)
+ << " Gflops: " << 2. * CUBE(double(n)/1000.0) / basetime * double(NB)
+ << " [fgemm result]" << std::endl;
+ }
+
+ // ----- Winograd
+ for(size_t w = (asmax)? 0 : wino; w <= wino; ++w) {
+ FFLAS::MMHelper<Field, FFLAS::MMHelperAlgo::Winograd> WH (F,(int)w);
+ time = 0. ;
+ chrono.clear();
+ for(size_t i=0; i<NB; ++i) {
+ chrono.start();
+ FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ n, n, n, F.one, A, n, A, n, F.zero, C, n, WH);
+ chrono.stop();
+ time+= chrono.usertime();
+ }
+
+ // -----------
+ // Standard output for benchmark - Alexis Breust 2014/11/14
+ std::cout << "Time: " << time / double(NB)
+ << " Gflops: " << 2. * CUBE(double(n)/1000.0) / time * double(NB);
+
+ if (compare || asmax)
+ std::cout << " [wino" << w << " result]" << std::endl;
+ }
+
+ if (compare)
+ std::cout << std::endl;
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(C);
+}
+
+int main (int argc, char ** argv) {
+
+ size_t iter = 1;
+ int q = 1009;
+ int n = 1000;
+ int w = 7;
+ size_t seed = 0;
+ bool compare = false;
+ bool balanced = false;
+ std::string type = "double";
+ bool levelasmax = false;
+
+ Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INT , &q },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'w', "-w N", "Set the winograd level.", TYPE_INT , &w },
+ { 'l', "-l {YN}", "Use -w info a max (Yes or No).", TYPE_BOOL , &levelasmax },
+ { 's', "-s S", "Set the seed for randomness (0 for random).", TYPE_INT , &seed },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
+ { 'c', "-c {YN}", "Compare mode, overrides -b and -t options (Yes or No).", TYPE_BOOL , &compare },
+ { 'b', "-b {YN}", "Use balanced modular (Yes or No).", TYPE_BOOL , &balanced },
+ { 't', "-t TYPE", "Set the field type (double/float/int).", TYPE_STR , &type },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ if (!seed)
+ seed = FFLAS::BaseTimer::seed();
+ srand((uint32_t)seed);
+
+ if (compare) {
+ Givaro::Modular<double> F1(q);
+ Givaro::Modular<float> F2(q);
+ Givaro::Modular<int> F3(q);
+ Givaro::ModularBalanced<double> F4(q);
+ Givaro::ModularBalanced<float> F5(q);
+ Givaro::ModularBalanced<int> F6(q);
+ // ZZ<double> F7;
+ // ZZ<float> F8;
+ // ZZ<int> F9;
+
+ launch_wino(F1,n,iter,w,levelasmax,seed,true);
+ launch_wino(F2,n,iter,w,levelasmax,seed,true);
+ launch_wino(F3,n,iter,w,levelasmax,seed,true);
+ launch_wino(F4,n,iter,w,levelasmax,seed,true);
+ launch_wino(F5,n,iter,w,levelasmax,seed,true);
+ launch_wino(F6,n,iter,w,levelasmax,seed,true);
+ // launch_wino(F7,n,iter,winomax,seed);
+ // launch_wino(F8,n,iter,winomax,seed);
+ // launch_wino(F9,n,iter,winomax,seed);
+ }
+ else {
+ if (balanced) {
+ if (type == "double") launch_wino(Givaro::ModularBalanced<double>(q),n,iter,w,levelasmax,seed,false);
+ else if (type == "float") launch_wino(Givaro::ModularBalanced<float>(q),n,iter,w,levelasmax,seed,false);
+ else if (type == "int") launch_wino(Givaro::ModularBalanced<int>(q),n,iter,w,levelasmax,seed,false);
+ }
+ else {
+ if (type == "double") launch_wino(Givaro::Modular<double>(q),n,iter,w,levelasmax,seed,false);
+ else if (type == "float") launch_wino(Givaro::Modular<float>(q),n,iter,w,levelasmax,seed,false);
+ else if (type == "int") launch_wino(Givaro::Modular<int>(q),n,iter,w,levelasmax,seed,false);
+ }
+ }
+
+ if (compare || levelasmax)
+ std::cout << "Lauch with:";
+ FFLAS::writeCommandString(std::cout, as) << std::endl;
+
+ return 0;
+}
+
diff --git a/benchmarks/files/mat1916-1916x1916-195985.smf.gz b/benchmarks/files/mat1916-1916x1916-195985.smf.gz
new file mode 100644
index 0000000..82a13e9
Binary files /dev/null and b/benchmarks/files/mat1916-1916x1916-195985.smf.gz differ
diff --git a/benchmarks/files/mat1916-1916x1916-195985.sms.gz b/benchmarks/files/mat1916-1916x1916-195985.sms.gz
new file mode 100644
index 0000000..2fad5ab
Binary files /dev/null and b/benchmarks/files/mat1916-1916x1916-195985.sms.gz differ
diff --git a/benchmarks/perfpublisher.sh b/benchmarks/perfpublisher.sh
new file mode 100755
index 0000000..8be3168
--- /dev/null
+++ b/benchmarks/perfpublisher.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# Script to format benchmarks results into a single xml file.
+# See https://wiki.jenkins-ci.org/display/JENKINS/PerfPublisher+Plugin
+# -----
+# 2014/11/17 - Written by AB <Alexis.Breust at imag.fr>
+
+XMLFILE=$1
+benchmarks=$2
+COMPILER=$3
+
+#=================#
+# Plateform infos #
+#=================#
+
+COMPILERVERSION=$($COMPILER --version 2>&1 | head -1)
+CPUFREQ=$(lscpu | grep "MHz" | rev | cut -f1 -d' ' | rev)
+ARCH=$(uname -m)
+OSNAME=$(uname -s)
+OSVERSION=$(uname -r)
+
+if hash lsb_release 2>/dev/null
+ then DISTRIB=$(lsb_release -ds)
+ else DISTRIB='Unknown distribution'
+fi
+
+#==========#
+# Prologue #
+#==========#
+
+if [[ -f $XMLFILE ]]
+then
+ echo '----> WARNING: File '$XMLFILE' is not empty.'
+ echo '----> Results will be added to its end.'
+fi
+
+#========#
+# Header #
+#========#
+
+echo '<?xml version="1.0" encoding="UTF-8"?>' >> $XMLFILE
+echo '<report name="benchmarks-report" categ="benchmarks">' >> $XMLFILE
+
+#=======#
+# Start #
+#=======#
+
+echo '<start>' >> $XMLFILE
+echo '<date format="YYYYMMDD" val="'$(date +%Y%m%d)'" />' >> $XMLFILE
+echo '<time format="HHMMSS" val="'$(date +%H%M%S)'" />' >> $XMLFILE
+echo '</start>' >> $XMLFILE
+
+#============#
+# Benchmarks #
+#============#
+
+for benchmark in $benchmarks
+do
+ if [[ ! -f $benchmark ]]
+ then
+ #File does not exist: compile it
+ echo '[Compiling]' $benchmark
+ COMPILESTART=$(date +%s%3N)
+ COMPILELOG=$(make $benchmark 2>&1; echo 'Returned state: '$?)
+ COMPILEEND=$(date +%s%3N)
+ COMPILETIME=$(($COMPILEEND - $COMPILESTART))
+ COMPILECHECK=$(echo $COMPILELOG | grep -o '[^ ]*$')
+ COMPILETIMERELEVANT='true'
+ else
+ #File does exist
+ echo '[Already compiled]' $benchmark
+ COMPILELOG='(Previously compiled)'
+ COMPILETIME='0.0'
+ COMPILECHECK='0'
+ COMPILETIMERELEVANT='false'
+ fi
+
+ if [[ $COMPILECHECK -ne 0 ]]
+ then
+ #Compilation failure
+ # EXECUTED='no' - keep it to yes so that Jenkins
+ # uses it within its results
+ EXECUTED='yes'
+ PASSED='no'
+ STATE='0'
+ EXECUTIONLOG='(Not executed)'
+ EXECUTIONTIME='0.0'
+ PERFORMANCEFLOPS='0.0'
+ COMPILETIMERELEVANT='false'
+ EXECUTIONTIMERELEVANT='false'
+ PERFORMANCEFLOPSRELEVANT='false'
+ ERRORLOG='Does not compile.'
+ echo '-> Does not compile.'
+ else
+ #Compilation success
+ echo '[Executing]' $benchmark
+ EXECUTED='yes'
+ EXECUTIONLOG=$(./$benchmark 2>&1)
+
+ if [[ ${EXECUTIONLOG,,} != "time:"* ]]
+ then
+ #Execution failure
+ PASSED='no'
+ STATE='0'
+ EXECUTIONTIME='0.0'
+ PERFORMANCEFLOPS='0.0'
+ EXECUTIONTIMERELEVANT='false'
+ PERFORMANCEFLOPSRELEVANT='false'
+ ERRORLOG='Unexpected output.'
+ echo '-> Unexpected output.'
+ else
+ #Execution success
+ PASSED='yes'
+ STATE='100'
+ EXECUTIONTIME=$(echo $EXECUTIONLOG | cut -d' ' -f2)
+ PERFORMANCEFLOPS=$(echo $EXECUTIONLOG | cut -d' ' -f4)
+ EXECUTIONTIMERELEVANT='true'
+ if [[ ${PERFORMANCEFLOPS,,} != "irrelevant" ]]
+ then
+ PERFORMANCEFLOPSRELEVANT='true'
+ else
+ PERFORMANCEFLOPSRELEVANT='false'
+ PERFORMANCEFLOPS='0.0'
+ fi
+ ERRORLOG=''
+ fi
+ fi
+
+ echo '<test name="'$benchmark'" executed="'$EXECUTED'">' >> $XMLFILE
+ echo '<targets><target>BENCHMARK</target></targets>' >> $XMLFILE
+ echo '<platform>' >> $XMLFILE
+ echo '<os>' >> $XMLFILE
+ echo '<name><![CDATA['$OSNAME']]></name>' >> $XMLFILE
+ echo '<version><![CDATA['$OSVERSION']]></version>' >> $XMLFILE
+ echo '<distribution><![CDATA['$DISTRIB']]></distribution>' >> $XMLFILE
+ echo '</os>' >> $XMLFILE
+ echo '<processor arch="'$ARCH'">' >> $XMLFILE
+ echo '<frequency unit="MHz" cpufreq="'$CPUFREQ'" />' >> $XMLFILE
+ echo '</processor>' >> $XMLFILE
+ echo '<compiler name="'$COMPILER'" version="'$COMPILERVERSION'" />' >> $XMLFILE
+ echo '</platform>' >> $XMLFILE
+ echo '<result>' >> $XMLFILE
+
+ # Logs
+ echo '<success passed="'$PASSED'" state="'$STATE'" />' >> $XMLFILE
+ echo '<errorlog><![CDATA['$ERRORLOG']]></errorlog>' >> $XMLFILE
+ echo '<log name="Compile output"><![CDATA['"$COMPILELOG"']]></log>' >> $XMLFILE
+ echo '<log name="Execution output"><![CDATA['"$benchmark $EXECUTIONLOG"']]></log>' >> $XMLFILE
+
+ # Times
+ echo '<compiletime unit="ms" mesure="'$COMPILETIME'" isRelevant="'$COMPILETIMERELEVANT'" />' >> $XMLFILE
+ echo '<executiontime unit="s" mesure="'$EXECUTIONTIME'" isRelevant="'$EXECUTIONTIMERELEVANT'" />' >> $XMLFILE
+ echo '<performance unit="GFLOPS" mesure="'$PERFORMANCEFLOPS'" isRelevant="'$PERFORMANCEFLOPSRELEVANT'" />' >> $XMLFILE
+
+ echo '</result>' >> $XMLFILE
+ echo '</test>' >> $XMLFILE
+done
+
+#========#
+# Footer #
+#========#
+
+echo '</report>' >> $XMLFILE
+
+#==========#
+# Epilogue #
+#==========#
+
+echo 'Results correctly exported to' $XMLFILE
+
diff --git a/build-aux/config.guess b/build-aux/config.guess
deleted file mode 100755
index 8ca32db..0000000
--- a/build-aux/config.guess
+++ /dev/null
@@ -1,1526 +0,0 @@
-#! /bin/sh
-# Attempt to guess a canonical system name.
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-# 2011 Free Software Foundation, Inc.
-
-timestamp='2011-02-02'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-
-# Originally written by Per Bothner. Please send patches (context
-# diff format) to <config-patches at gnu.org> and include a ChangeLog
-# entry.
-#
-# This script attempts to guess a canonical system name similar to
-# config.sub. If it succeeds, it prints the system name on stdout, and
-# exits with 0. Otherwise, it exits with 1.
-#
-# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION]
-
-Output the configuration name of the system \`$me' is run on.
-
-Operation modes:
- -h, --help print this help, then exit
- -t, --time-stamp print date of last modification, then exit
- -v, --version print version number, then exit
-
-Report bugs and patches to <config-patches at gnu.org>."
-
-version="\
-GNU config.guess ($timestamp)
-
-Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
-Software Foundation, Inc.
-
-This is free software; see the source for copying conditions. There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
- case $1 in
- --time-stamp | --time* | -t )
- echo "$timestamp" ; exit ;;
- --version | -v )
- echo "$version" ; exit ;;
- --help | --h* | -h )
- echo "$usage"; exit ;;
- -- ) # Stop option processing
- shift; break ;;
- - ) # Use stdin as input.
- break ;;
- -* )
- echo "$me: invalid option $1$help" >&2
- exit 1 ;;
- * )
- break ;;
- esac
-done
-
-if test $# != 0; then
- echo "$me: too many arguments$help" >&2
- exit 1
-fi
-
-trap 'exit 1' 1 2 15
-
-# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
-# compiler to aid in system detection is discouraged as it requires
-# temporary files to be created and, as you can see below, it is a
-# headache to deal with in a portable fashion.
-
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
-
-# Portable tmp directory creation inspired by the Autoconf team.
-
-set_cc_for_build='
-trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
-trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
-: ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
- { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
- { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
- { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
-dummy=$tmp/dummy ;
-tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
-case $CC_FOR_BUILD,$HOST_CC,$CC in
- ,,) echo "int x;" > $dummy.c ;
- for c in cc gcc c89 c99 ; do
- if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
- CC_FOR_BUILD="$c"; break ;
- fi ;
- done ;
- if test x"$CC_FOR_BUILD" = x ; then
- CC_FOR_BUILD=no_compiler_found ;
- fi
- ;;
- ,,*) CC_FOR_BUILD=$CC ;;
- ,*,*) CC_FOR_BUILD=$HOST_CC ;;
-esac ; set_cc_for_build= ;'
-
-# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
-# (ghazi at noc.rutgers.edu 1994-08-24)
-if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
- PATH=$PATH:/.attbin ; export PATH
-fi
-
-UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
-UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
-UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
-
-case "${UNAME_SYSTEM}" in
-Linux|GNU/*)
- eval $set_cc_for_build
- cat <<-EOF > $dummy.c
- #include <features.h>
- #ifdef __UCLIBC__
- # ifdef __UCLIBC_CONFIG_VERSION__
- LIBC=uclibc __UCLIBC_CONFIG_VERSION__
- # else
- LIBC=uclibc
- # endif
- #else
- # ifdef __dietlibc__
- LIBC=dietlibc
- # else
- LIBC=gnu
- # endif
- #endif
- EOF
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
- ;;
-esac
-
-# Note: order is significant - the case branches are not exclusive.
-
-case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
- *:NetBSD:*:*)
- # NetBSD (nbsd) targets should (where applicable) match one or
- # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
- # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
- # switched to ELF, *-*-netbsd* would select the old
- # object file format. This provides both forward
- # compatibility and a consistent mechanism for selecting the
- # object file format.
- #
- # Note: NetBSD doesn't particularly care about the vendor
- # portion of the name. We always set it to "unknown".
- sysctl="sysctl -n hw.machine_arch"
- UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
- /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
- case "${UNAME_MACHINE_ARCH}" in
- armeb) machine=armeb-unknown ;;
- arm*) machine=arm-unknown ;;
- sh3el) machine=shl-unknown ;;
- sh3eb) machine=sh-unknown ;;
- sh5el) machine=sh5le-unknown ;;
- *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
- esac
- # The Operating System including object format, if it has switched
- # to ELF recently, or will in the future.
- case "${UNAME_MACHINE_ARCH}" in
- arm*|i386|m68k|ns32k|sh3*|sparc|vax)
- eval $set_cc_for_build
- if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
- | grep -q __ELF__
- then
- # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
- # Return netbsd for either. FIX?
- os=netbsd
- else
- os=netbsdelf
- fi
- ;;
- *)
- os=netbsd
- ;;
- esac
- # The OS release
- # Debian GNU/NetBSD machines have a different userland, and
- # thus, need a distinct triplet. However, they do not need
- # kernel version information, so it can be replaced with a
- # suitable tag, in the style of linux-gnu.
- case "${UNAME_VERSION}" in
- Debian*)
- release='-gnu'
- ;;
- *)
- release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
- ;;
- esac
- # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
- # contains redundant information, the shorter form:
- # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
- echo "${machine}-${os}${release}"
- exit ;;
- *:OpenBSD:*:*)
- UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
- echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
- exit ;;
- *:ekkoBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
- exit ;;
- *:SolidBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
- exit ;;
- macppc:MirBSD:*:*)
- echo powerpc-unknown-mirbsd${UNAME_RELEASE}
- exit ;;
- *:MirBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
- exit ;;
- alpha:OSF1:*:*)
- case $UNAME_RELEASE in
- *4.0)
- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
- ;;
- *5.*)
- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
- ;;
- esac
- # According to Compaq, /usr/sbin/psrinfo has been available on
- # OSF/1 and Tru64 systems produced since 1995. I hope that
- # covers most systems running today. This code pipes the CPU
- # types through head -n 1, so we only detect the type of CPU 0.
- ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1`
- case "$ALPHA_CPU_TYPE" in
- "EV4 (21064)")
- UNAME_MACHINE="alpha" ;;
- "EV4.5 (21064)")
- UNAME_MACHINE="alpha" ;;
- "LCA4 (21066/21068)")
- UNAME_MACHINE="alpha" ;;
- "EV5 (21164)")
- UNAME_MACHINE="alphaev5" ;;
- "EV5.6 (21164A)")
- UNAME_MACHINE="alphaev56" ;;
- "EV5.6 (21164PC)")
- UNAME_MACHINE="alphapca56" ;;
- "EV5.7 (21164PC)")
- UNAME_MACHINE="alphapca57" ;;
- "EV6 (21264)")
- UNAME_MACHINE="alphaev6" ;;
- "EV6.7 (21264A)")
- UNAME_MACHINE="alphaev67" ;;
- "EV6.8CB (21264C)")
- UNAME_MACHINE="alphaev68" ;;
- "EV6.8AL (21264B)")
- UNAME_MACHINE="alphaev68" ;;
- "EV6.8CX (21264D)")
- UNAME_MACHINE="alphaev68" ;;
- "EV6.9A (21264/EV69A)")
- UNAME_MACHINE="alphaev69" ;;
- "EV7 (21364)")
- UNAME_MACHINE="alphaev7" ;;
- "EV7.9 (21364A)")
- UNAME_MACHINE="alphaev79" ;;
- esac
- # A Pn.n version is a patched version.
- # A Vn.n version is a released version.
- # A Tn.n version is a released field test version.
- # A Xn.n version is an unreleased experimental baselevel.
- # 1.2 uses "1.2" for uname -r.
- echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- # Reset EXIT trap before exiting to avoid spurious non-zero exit code.
- exitcode=$?
- trap '' 0
- exit $exitcode ;;
- Alpha\ *:Windows_NT*:*)
- # How do we know it's Interix rather than the generic POSIX subsystem?
- # Should we change UNAME_MACHINE based on the output of uname instead
- # of the specific Alpha model?
- echo alpha-pc-interix
- exit ;;
- 21064:Windows_NT:50:3)
- echo alpha-dec-winnt3.5
- exit ;;
- Amiga*:UNIX_System_V:4.0:*)
- echo m68k-unknown-sysv4
- exit ;;
- *:[Aa]miga[Oo][Ss]:*:*)
- echo ${UNAME_MACHINE}-unknown-amigaos
- exit ;;
- *:[Mm]orph[Oo][Ss]:*:*)
- echo ${UNAME_MACHINE}-unknown-morphos
- exit ;;
- *:OS/390:*:*)
- echo i370-ibm-openedition
- exit ;;
- *:z/VM:*:*)
- echo s390-ibm-zvmoe
- exit ;;
- *:OS400:*:*)
- echo powerpc-ibm-os400
- exit ;;
- arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
- echo arm-acorn-riscix${UNAME_RELEASE}
- exit ;;
- arm:riscos:*:*|arm:RISCOS:*:*)
- echo arm-unknown-riscos
- exit ;;
- SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
- echo hppa1.1-hitachi-hiuxmpp
- exit ;;
- Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
- # akee at wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
- if test "`(/bin/universe) 2>/dev/null`" = att ; then
- echo pyramid-pyramid-sysv3
- else
- echo pyramid-pyramid-bsd
- fi
- exit ;;
- NILE*:*:*:dcosx)
- echo pyramid-pyramid-svr4
- exit ;;
- DRS?6000:unix:4.0:6*)
- echo sparc-icl-nx6
- exit ;;
- DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
- case `/usr/bin/uname -p` in
- sparc) echo sparc-icl-nx7; exit ;;
- esac ;;
- s390x:SunOS:*:*)
- echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- sun4H:SunOS:5.*:*)
- echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
- echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
- echo i386-pc-auroraux${UNAME_RELEASE}
- exit ;;
- i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
- eval $set_cc_for_build
- SUN_ARCH="i386"
- # If there is a compiler, see if it is configured for 64-bit objects.
- # Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
- # This test works for both compilers.
- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
- if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
- grep IS_64BIT_ARCH >/dev/null
- then
- SUN_ARCH="x86_64"
- fi
- fi
- echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- sun4*:SunOS:6*:*)
- # According to config.sub, this is the proper way to canonicalize
- # SunOS6. Hard to guess exactly what SunOS6 will be like, but
- # it's likely to be more like Solaris than SunOS4.
- echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- sun4*:SunOS:*:*)
- case "`/usr/bin/arch -k`" in
- Series*|S4*)
- UNAME_RELEASE=`uname -v`
- ;;
- esac
- # Japanese Language versions have a version number like `4.1.3-JL'.
- echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
- exit ;;
- sun3*:SunOS:*:*)
- echo m68k-sun-sunos${UNAME_RELEASE}
- exit ;;
- sun*:*:4.2BSD:*)
- UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
- test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
- case "`/bin/arch`" in
- sun3)
- echo m68k-sun-sunos${UNAME_RELEASE}
- ;;
- sun4)
- echo sparc-sun-sunos${UNAME_RELEASE}
- ;;
- esac
- exit ;;
- aushp:SunOS:*:*)
- echo sparc-auspex-sunos${UNAME_RELEASE}
- exit ;;
- # The situation for MiNT is a little confusing. The machine name
- # can be virtually everything (everything which is not
- # "atarist" or "atariste" at least should have a processor
- # > m68000). The system name ranges from "MiNT" over "FreeMiNT"
- # to the lowercase version "mint" (or "freemint"). Finally
- # the system name "TOS" denotes a system which is actually not
- # MiNT. But MiNT is downward compatible to TOS, so this should
- # be no problem.
- atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
- exit ;;
- atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
- exit ;;
- *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
- exit ;;
- milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
- echo m68k-milan-mint${UNAME_RELEASE}
- exit ;;
- hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
- echo m68k-hades-mint${UNAME_RELEASE}
- exit ;;
- *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
- echo m68k-unknown-mint${UNAME_RELEASE}
- exit ;;
- m68k:machten:*:*)
- echo m68k-apple-machten${UNAME_RELEASE}
- exit ;;
- powerpc:machten:*:*)
- echo powerpc-apple-machten${UNAME_RELEASE}
- exit ;;
- RISC*:Mach:*:*)
- echo mips-dec-mach_bsd4.3
- exit ;;
- RISC*:ULTRIX:*:*)
- echo mips-dec-ultrix${UNAME_RELEASE}
- exit ;;
- VAX*:ULTRIX*:*:*)
- echo vax-dec-ultrix${UNAME_RELEASE}
- exit ;;
- 2020:CLIX:*:* | 2430:CLIX:*:*)
- echo clipper-intergraph-clix${UNAME_RELEASE}
- exit ;;
- mips:*:*:UMIPS | mips:*:*:RISCos)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
-#ifdef __cplusplus
-#include <stdio.h> /* for printf() prototype */
- int main (int argc, char *argv[]) {
-#else
- int main (argc, argv) int argc; char *argv[]; {
-#endif
- #if defined (host_mips) && defined (MIPSEB)
- #if defined (SYSTYPE_SYSV)
- printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
- #endif
- #if defined (SYSTYPE_SVR4)
- printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
- #endif
- #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
- printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
- #endif
- #endif
- exit (-1);
- }
-EOF
- $CC_FOR_BUILD -o $dummy $dummy.c &&
- dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
- SYSTEM_NAME=`$dummy $dummyarg` &&
- { echo "$SYSTEM_NAME"; exit; }
- echo mips-mips-riscos${UNAME_RELEASE}
- exit ;;
- Motorola:PowerMAX_OS:*:*)
- echo powerpc-motorola-powermax
- exit ;;
- Motorola:*:4.3:PL8-*)
- echo powerpc-harris-powermax
- exit ;;
- Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
- echo powerpc-harris-powermax
- exit ;;
- Night_Hawk:Power_UNIX:*:*)
- echo powerpc-harris-powerunix
- exit ;;
- m88k:CX/UX:7*:*)
- echo m88k-harris-cxux7
- exit ;;
- m88k:*:4*:R4*)
- echo m88k-motorola-sysv4
- exit ;;
- m88k:*:3*:R3*)
- echo m88k-motorola-sysv3
- exit ;;
- AViiON:dgux:*:*)
- # DG/UX returns AViiON for all architectures
- UNAME_PROCESSOR=`/usr/bin/uname -p`
- if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
- then
- if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
- [ ${TARGET_BINARY_INTERFACE}x = x ]
- then
- echo m88k-dg-dgux${UNAME_RELEASE}
- else
- echo m88k-dg-dguxbcs${UNAME_RELEASE}
- fi
- else
- echo i586-dg-dgux${UNAME_RELEASE}
- fi
- exit ;;
- M88*:DolphinOS:*:*) # DolphinOS (SVR3)
- echo m88k-dolphin-sysv3
- exit ;;
- M88*:*:R3*:*)
- # Delta 88k system running SVR3
- echo m88k-motorola-sysv3
- exit ;;
- XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
- echo m88k-tektronix-sysv3
- exit ;;
- Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
- echo m68k-tektronix-bsd
- exit ;;
- *:IRIX*:*:*)
- echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
- exit ;;
- ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
- echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
- exit ;; # Note that: echo "'`uname -s`'" gives 'AIX '
- i*86:AIX:*:*)
- echo i386-ibm-aix
- exit ;;
- ia64:AIX:*:*)
- if [ -x /usr/bin/oslevel ] ; then
- IBM_REV=`/usr/bin/oslevel`
- else
- IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
- fi
- echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
- exit ;;
- *:AIX:2:3)
- if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #include <sys/systemcfg.h>
-
- main()
- {
- if (!__power_pc())
- exit(1);
- puts("powerpc-ibm-aix3.2.5");
- exit(0);
- }
-EOF
- if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
- then
- echo "$SYSTEM_NAME"
- else
- echo rs6000-ibm-aix3.2.5
- fi
- elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
- echo rs6000-ibm-aix3.2.4
- else
- echo rs6000-ibm-aix3.2
- fi
- exit ;;
- *:AIX:*:[4567])
- IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
- if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
- IBM_ARCH=rs6000
- else
- IBM_ARCH=powerpc
- fi
- if [ -x /usr/bin/oslevel ] ; then
- IBM_REV=`/usr/bin/oslevel`
- else
- IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
- fi
- echo ${IBM_ARCH}-ibm-aix${IBM_REV}
- exit ;;
- *:AIX:*:*)
- echo rs6000-ibm-aix
- exit ;;
- ibmrt:4.4BSD:*|romp-ibm:BSD:*)
- echo romp-ibm-bsd4.4
- exit ;;
- ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
- echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
- exit ;; # report: romp-ibm BSD 4.3
- *:BOSX:*:*)
- echo rs6000-bull-bosx
- exit ;;
- DPX/2?00:B.O.S.:*:*)
- echo m68k-bull-sysv3
- exit ;;
- 9000/[34]??:4.3bsd:1.*:*)
- echo m68k-hp-bsd
- exit ;;
- hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
- echo m68k-hp-bsd4.4
- exit ;;
- 9000/[34678]??:HP-UX:*:*)
- HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
- case "${UNAME_MACHINE}" in
- 9000/31? ) HP_ARCH=m68000 ;;
- 9000/[34]?? ) HP_ARCH=m68k ;;
- 9000/[678][0-9][0-9])
- if [ -x /usr/bin/getconf ]; then
- sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
- sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
- case "${sc_cpu_version}" in
- 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
- 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
- 532) # CPU_PA_RISC2_0
- case "${sc_kernel_bits}" in
- 32) HP_ARCH="hppa2.0n" ;;
- 64) HP_ARCH="hppa2.0w" ;;
- '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
- esac ;;
- esac
- fi
- if [ "${HP_ARCH}" = "" ]; then
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
-
- #define _HPUX_SOURCE
- #include <stdlib.h>
- #include <unistd.h>
-
- int main ()
- {
- #if defined(_SC_KERNEL_BITS)
- long bits = sysconf(_SC_KERNEL_BITS);
- #endif
- long cpu = sysconf (_SC_CPU_VERSION);
-
- switch (cpu)
- {
- case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
- case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
- case CPU_PA_RISC2_0:
- #if defined(_SC_KERNEL_BITS)
- switch (bits)
- {
- case 64: puts ("hppa2.0w"); break;
- case 32: puts ("hppa2.0n"); break;
- default: puts ("hppa2.0"); break;
- } break;
- #else /* !defined(_SC_KERNEL_BITS) */
- puts ("hppa2.0"); break;
- #endif
- default: puts ("hppa1.0"); break;
- }
- exit (0);
- }
-EOF
- (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
- test -z "$HP_ARCH" && HP_ARCH=hppa
- fi ;;
- esac
- if [ ${HP_ARCH} = "hppa2.0w" ]
- then
- eval $set_cc_for_build
-
- # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
- # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
- # generating 64-bit code. GNU and HP use different nomenclature:
- #
- # $ CC_FOR_BUILD=cc ./config.guess
- # => hppa2.0w-hp-hpux11.23
- # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
- # => hppa64-hp-hpux11.23
-
- if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
- grep -q __LP64__
- then
- HP_ARCH="hppa2.0w"
- else
- HP_ARCH="hppa64"
- fi
- fi
- echo ${HP_ARCH}-hp-hpux${HPUX_REV}
- exit ;;
- ia64:HP-UX:*:*)
- HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
- echo ia64-hp-hpux${HPUX_REV}
- exit ;;
- 3050*:HI-UX:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #include <unistd.h>
- int
- main ()
- {
- long cpu = sysconf (_SC_CPU_VERSION);
- /* The order matters, because CPU_IS_HP_MC68K erroneously returns
- true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct
- results, however. */
- if (CPU_IS_PA_RISC (cpu))
- {
- switch (cpu)
- {
- case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
- case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
- case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
- default: puts ("hppa-hitachi-hiuxwe2"); break;
- }
- }
- else if (CPU_IS_HP_MC68K (cpu))
- puts ("m68k-hitachi-hiuxwe2");
- else puts ("unknown-hitachi-hiuxwe2");
- exit (0);
- }
-EOF
- $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
- { echo "$SYSTEM_NAME"; exit; }
- echo unknown-hitachi-hiuxwe2
- exit ;;
- 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
- echo hppa1.1-hp-bsd
- exit ;;
- 9000/8??:4.3bsd:*:*)
- echo hppa1.0-hp-bsd
- exit ;;
- *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
- echo hppa1.0-hp-mpeix
- exit ;;
- hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
- echo hppa1.1-hp-osf
- exit ;;
- hp8??:OSF1:*:*)
- echo hppa1.0-hp-osf
- exit ;;
- i*86:OSF1:*:*)
- if [ -x /usr/sbin/sysversion ] ; then
- echo ${UNAME_MACHINE}-unknown-osf1mk
- else
- echo ${UNAME_MACHINE}-unknown-osf1
- fi
- exit ;;
- parisc*:Lites*:*:*)
- echo hppa1.1-hp-lites
- exit ;;
- C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
- echo c1-convex-bsd
- exit ;;
- C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
- if getsysinfo -f scalar_acc
- then echo c32-convex-bsd
- else echo c2-convex-bsd
- fi
- exit ;;
- C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
- echo c34-convex-bsd
- exit ;;
- C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
- echo c38-convex-bsd
- exit ;;
- C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
- echo c4-convex-bsd
- exit ;;
- CRAY*Y-MP:*:*:*)
- echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- CRAY*[A-Z]90:*:*:*)
- echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
- | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
- -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
- -e 's/\.[^.]*$/.X/'
- exit ;;
- CRAY*TS:*:*:*)
- echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- CRAY*T3E:*:*:*)
- echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- CRAY*SV1:*:*:*)
- echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- *:UNICOS/mp:*:*)
- echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
- exit ;;
- F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
- FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
- echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
- exit ;;
- 5000:UNIX_System_V:4.*:*)
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
- echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
- exit ;;
- i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
- echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
- exit ;;
- sparc*:BSD/OS:*:*)
- echo sparc-unknown-bsdi${UNAME_RELEASE}
- exit ;;
- *:BSD/OS:*:*)
- echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
- exit ;;
- *:FreeBSD:*:*)
- case ${UNAME_MACHINE} in
- pc98)
- echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
- amd64)
- echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
- *)
- echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
- esac
- exit ;;
- i*:CYGWIN*:*)
- echo ${UNAME_MACHINE}-pc-cygwin
- exit ;;
- *:MINGW*:*)
- echo ${UNAME_MACHINE}-pc-mingw32
- exit ;;
- i*:windows32*:*)
- # uname -m includes "-pc" on this system.
- echo ${UNAME_MACHINE}-mingw32
- exit ;;
- i*:PW*:*)
- echo ${UNAME_MACHINE}-pc-pw32
- exit ;;
- *:Interix*:*)
- case ${UNAME_MACHINE} in
- x86)
- echo i586-pc-interix${UNAME_RELEASE}
- exit ;;
- authenticamd | genuineintel | EM64T)
- echo x86_64-unknown-interix${UNAME_RELEASE}
- exit ;;
- IA64)
- echo ia64-unknown-interix${UNAME_RELEASE}
- exit ;;
- esac ;;
- [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
- echo i${UNAME_MACHINE}-pc-mks
- exit ;;
- 8664:Windows_NT:*)
- echo x86_64-pc-mks
- exit ;;
- i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
- # How do we know it's Interix rather than the generic POSIX subsystem?
- # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
- # UNAME_MACHINE based on the output of uname instead of i386?
- echo i586-pc-interix
- exit ;;
- i*:UWIN*:*)
- echo ${UNAME_MACHINE}-pc-uwin
- exit ;;
- amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
- echo x86_64-unknown-cygwin
- exit ;;
- p*:CYGWIN*:*)
- echo powerpcle-unknown-cygwin
- exit ;;
- prep*:SunOS:5.*:*)
- echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
- exit ;;
- *:GNU:*:*)
- # the GNU system
- echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
- exit ;;
- *:GNU/*:*:*)
- # other systems with GNU libc and userland
- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
- exit ;;
- i*86:Minix:*:*)
- echo ${UNAME_MACHINE}-pc-minix
- exit ;;
- alpha:Linux:*:*)
- case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
- EV5) UNAME_MACHINE=alphaev5 ;;
- EV56) UNAME_MACHINE=alphaev56 ;;
- PCA56) UNAME_MACHINE=alphapca56 ;;
- PCA57) UNAME_MACHINE=alphapca56 ;;
- EV6) UNAME_MACHINE=alphaev6 ;;
- EV67) UNAME_MACHINE=alphaev67 ;;
- EV68*) UNAME_MACHINE=alphaev68 ;;
- esac
- objdump --private-headers /bin/sh | grep -q ld.so.1
- if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- arm*:Linux:*:*)
- eval $set_cc_for_build
- if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
- | grep -q __ARM_EABI__
- then
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- else
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
- fi
- exit ;;
- avr32*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- cris:Linux:*:*)
- echo cris-axis-linux-${LIBC}
- exit ;;
- crisv32:Linux:*:*)
- echo crisv32-axis-linux-${LIBC}
- exit ;;
- frv:Linux:*:*)
- echo frv-unknown-linux-${LIBC}
- exit ;;
- i*86:Linux:*:*)
- echo ${UNAME_MACHINE}-pc-linux-${LIBC}
- exit ;;
- ia64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- m32r*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- m68*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- mips:Linux:*:* | mips64:Linux:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
- #undef CPU
- #undef ${UNAME_MACHINE}
- #undef ${UNAME_MACHINE}el
- #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=${UNAME_MACHINE}el
- #else
- #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=${UNAME_MACHINE}
- #else
- CPU=
- #endif
- #endif
-EOF
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
- ;;
- or32:Linux:*:*)
- echo or32-unknown-linux-${LIBC}
- exit ;;
- padre:Linux:*:*)
- echo sparc-unknown-linux-${LIBC}
- exit ;;
- parisc64:Linux:*:* | hppa64:Linux:*:*)
- echo hppa64-unknown-linux-${LIBC}
- exit ;;
- parisc:Linux:*:* | hppa:Linux:*:*)
- # Look for CPU level
- case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
- PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
- PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
- *) echo hppa-unknown-linux-${LIBC} ;;
- esac
- exit ;;
- ppc64:Linux:*:*)
- echo powerpc64-unknown-linux-${LIBC}
- exit ;;
- ppc:Linux:*:*)
- echo powerpc-unknown-linux-${LIBC}
- exit ;;
- s390:Linux:*:* | s390x:Linux:*:*)
- echo ${UNAME_MACHINE}-ibm-linux
- exit ;;
- sh64*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- sh*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- sparc:Linux:*:* | sparc64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- tile*:Linux:*:*)
- echo ${UNAME_MACHINE}-tilera-linux-gnu
- exit ;;
- vax:Linux:*:*)
- echo ${UNAME_MACHINE}-dec-linux-${LIBC}
- exit ;;
- x86_64:Linux:*:*)
- echo x86_64-unknown-linux-${LIBC}
- exit ;;
- xtensa*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
- exit ;;
- i*86:DYNIX/ptx:4*:*)
- # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
- # earlier versions are messed up and put the nodename in both
- # sysname and nodename.
- echo i386-sequent-sysv4
- exit ;;
- i*86:UNIX_SV:4.2MP:2.*)
- # Unixware is an offshoot of SVR4, but it has its own version
- # number series starting with 2...
- # I am not positive that other SVR4 systems won't match this,
- # I just have to hope. -- rms.
- # Use sysv4.2uw... so that sysv4* matches it.
- echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
- exit ;;
- i*86:OS/2:*:*)
- # If we were able to find `uname', then EMX Unix compatibility
- # is probably installed.
- echo ${UNAME_MACHINE}-pc-os2-emx
- exit ;;
- i*86:XTS-300:*:STOP)
- echo ${UNAME_MACHINE}-unknown-stop
- exit ;;
- i*86:atheos:*:*)
- echo ${UNAME_MACHINE}-unknown-atheos
- exit ;;
- i*86:syllable:*:*)
- echo ${UNAME_MACHINE}-pc-syllable
- exit ;;
- i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
- echo i386-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- i*86:*DOS:*:*)
- echo ${UNAME_MACHINE}-pc-msdosdjgpp
- exit ;;
- i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
- UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
- if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
- echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
- else
- echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
- fi
- exit ;;
- i*86:*:5:[678]*)
- # UnixWare 7.x, OpenUNIX and OpenServer 6.
- case `/bin/uname -X | grep "^Machine"` in
- *486*) UNAME_MACHINE=i486 ;;
- *Pentium) UNAME_MACHINE=i586 ;;
- *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
- esac
- echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
- exit ;;
- i*86:*:3.2:*)
- if test -f /usr/options/cb.name; then
- UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
- echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
- elif /bin/uname -X 2>/dev/null >/dev/null ; then
- UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
- (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
- (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
- && UNAME_MACHINE=i586
- (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
- && UNAME_MACHINE=i686
- (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
- && UNAME_MACHINE=i686
- echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
- else
- echo ${UNAME_MACHINE}-pc-sysv32
- fi
- exit ;;
- pc:*:*:*)
- # Left here for compatibility:
- # uname -m prints for DJGPP always 'pc', but it prints nothing about
- # the processor, so we play safe by assuming i586.
- # Note: whatever this is, it MUST be the same as what config.sub
- # prints for the "djgpp" host, or else GDB configury will decide that
- # this is a cross-build.
- echo i586-pc-msdosdjgpp
- exit ;;
- Intel:Mach:3*:*)
- echo i386-pc-mach3
- exit ;;
- paragon:*:*:*)
- echo i860-intel-osf1
- exit ;;
- i860:*:4.*:*) # i860-SVR4
- if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
- echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
- else # Add other i860-SVR4 vendors below as they are discovered.
- echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4
- fi
- exit ;;
- mini*:CTIX:SYS*5:*)
- # "miniframe"
- echo m68010-convergent-sysv
- exit ;;
- mc68k:UNIX:SYSTEM5:3.51m)
- echo m68k-convergent-sysv
- exit ;;
- M680?0:D-NIX:5.3:*)
- echo m68k-diab-dnix
- exit ;;
- M68*:*:R3V[5678]*:*)
- test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
- 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
- OS_REL=''
- test -r /etc/.relid \
- && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
- /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
- /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
- && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
- 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
- /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4; exit; } ;;
- NCR*:*:4.2:* | MPRAS*:*:4.2:*)
- OS_REL='.3'
- test -r /etc/.relid \
- && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
- /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
- /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
- && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
- /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
- && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
- m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
- echo m68k-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- mc68030:UNIX_System_V:4.*:*)
- echo m68k-atari-sysv4
- exit ;;
- TSUNAMI:LynxOS:2.*:*)
- echo sparc-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- rs6000:LynxOS:2.*:*)
- echo rs6000-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
- echo powerpc-unknown-lynxos${UNAME_RELEASE}
- exit ;;
- SM[BE]S:UNIX_SV:*:*)
- echo mips-dde-sysv${UNAME_RELEASE}
- exit ;;
- RM*:ReliantUNIX-*:*:*)
- echo mips-sni-sysv4
- exit ;;
- RM*:SINIX-*:*:*)
- echo mips-sni-sysv4
- exit ;;
- *:SINIX-*:*:*)
- if uname -p 2>/dev/null >/dev/null ; then
- UNAME_MACHINE=`(uname -p) 2>/dev/null`
- echo ${UNAME_MACHINE}-sni-sysv4
- else
- echo ns32k-sni-sysv
- fi
- exit ;;
- PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
- # says <Richard.M.Bartel at ccMail.Census.GOV>
- echo i586-unisys-sysv4
- exit ;;
- *:UNIX_System_V:4*:FTX*)
- # From Gerald Hewes <hewes at openmarket.com>.
- # How about differentiating between stratus architectures? -djm
- echo hppa1.1-stratus-sysv4
- exit ;;
- *:*:*:FTX*)
- # From seanf at swdc.stratus.com.
- echo i860-stratus-sysv4
- exit ;;
- i*86:VOS:*:*)
- # From Paul.Green at stratus.com.
- echo ${UNAME_MACHINE}-stratus-vos
- exit ;;
- *:VOS:*:*)
- # From Paul.Green at stratus.com.
- echo hppa1.1-stratus-vos
- exit ;;
- mc68*:A/UX:*:*)
- echo m68k-apple-aux${UNAME_RELEASE}
- exit ;;
- news*:NEWS-OS:6*:*)
- echo mips-sony-newsos6
- exit ;;
- R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
- if [ -d /usr/nec ]; then
- echo mips-nec-sysv${UNAME_RELEASE}
- else
- echo mips-unknown-sysv${UNAME_RELEASE}
- fi
- exit ;;
- BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
- echo powerpc-be-beos
- exit ;;
- BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only.
- echo powerpc-apple-beos
- exit ;;
- BePC:BeOS:*:*) # BeOS running on Intel PC compatible.
- echo i586-pc-beos
- exit ;;
- BePC:Haiku:*:*) # Haiku running on Intel PC compatible.
- echo i586-pc-haiku
- exit ;;
- SX-4:SUPER-UX:*:*)
- echo sx4-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-5:SUPER-UX:*:*)
- echo sx5-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-6:SUPER-UX:*:*)
- echo sx6-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-7:SUPER-UX:*:*)
- echo sx7-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-8:SUPER-UX:*:*)
- echo sx8-nec-superux${UNAME_RELEASE}
- exit ;;
- SX-8R:SUPER-UX:*:*)
- echo sx8r-nec-superux${UNAME_RELEASE}
- exit ;;
- Power*:Rhapsody:*:*)
- echo powerpc-apple-rhapsody${UNAME_RELEASE}
- exit ;;
- *:Rhapsody:*:*)
- echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
- exit ;;
- *:Darwin:*:*)
- UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
- case $UNAME_PROCESSOR in
- i386)
- eval $set_cc_for_build
- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
- grep IS_64BIT_ARCH >/dev/null
- then
- UNAME_PROCESSOR="x86_64"
- fi
- fi ;;
- unknown) UNAME_PROCESSOR=powerpc ;;
- esac
- echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
- exit ;;
- *:procnto*:*:* | *:QNX:[0123456789]*:*)
- UNAME_PROCESSOR=`uname -p`
- if test "$UNAME_PROCESSOR" = "x86"; then
- UNAME_PROCESSOR=i386
- UNAME_MACHINE=pc
- fi
- echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
- exit ;;
- *:QNX:*:4*)
- echo i386-pc-qnx
- exit ;;
- NEO-?:NONSTOP_KERNEL:*:*)
- echo neo-tandem-nsk${UNAME_RELEASE}
- exit ;;
- NSE-?:NONSTOP_KERNEL:*:*)
- echo nse-tandem-nsk${UNAME_RELEASE}
- exit ;;
- NSR-?:NONSTOP_KERNEL:*:*)
- echo nsr-tandem-nsk${UNAME_RELEASE}
- exit ;;
- *:NonStop-UX:*:*)
- echo mips-compaq-nonstopux
- exit ;;
- BS2000:POSIX*:*:*)
- echo bs2000-siemens-sysv
- exit ;;
- DS/*:UNIX_System_V:*:*)
- echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
- exit ;;
- *:Plan9:*:*)
- # "uname -m" is not consistent, so use $cputype instead. 386
- # is converted to i386 for consistency with other x86
- # operating systems.
- if test "$cputype" = "386"; then
- UNAME_MACHINE=i386
- else
- UNAME_MACHINE="$cputype"
- fi
- echo ${UNAME_MACHINE}-unknown-plan9
- exit ;;
- *:TOPS-10:*:*)
- echo pdp10-unknown-tops10
- exit ;;
- *:TENEX:*:*)
- echo pdp10-unknown-tenex
- exit ;;
- KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
- echo pdp10-dec-tops20
- exit ;;
- XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
- echo pdp10-xkl-tops20
- exit ;;
- *:TOPS-20:*:*)
- echo pdp10-unknown-tops20
- exit ;;
- *:ITS:*:*)
- echo pdp10-unknown-its
- exit ;;
- SEI:*:*:SEIUX)
- echo mips-sei-seiux${UNAME_RELEASE}
- exit ;;
- *:DragonFly:*:*)
- echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
- exit ;;
- *:*VMS:*:*)
- UNAME_MACHINE=`(uname -p) 2>/dev/null`
- case "${UNAME_MACHINE}" in
- A*) echo alpha-dec-vms ; exit ;;
- I*) echo ia64-dec-vms ; exit ;;
- V*) echo vax-dec-vms ; exit ;;
- esac ;;
- *:XENIX:*:SysV)
- echo i386-pc-xenix
- exit ;;
- i*86:skyos:*:*)
- echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
- exit ;;
- i*86:rdos:*:*)
- echo ${UNAME_MACHINE}-pc-rdos
- exit ;;
- i*86:AROS:*:*)
- echo ${UNAME_MACHINE}-pc-aros
- exit ;;
-esac
-
-#echo '(No uname command or uname output not recognized.)' 1>&2
-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
-
-eval $set_cc_for_build
-cat >$dummy.c <<EOF
-#ifdef _SEQUENT_
-# include <sys/types.h>
-# include <sys/utsname.h>
-#endif
-main ()
-{
-#if defined (sony)
-#if defined (MIPSEB)
- /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
- I don't know.... */
- printf ("mips-sony-bsd\n"); exit (0);
-#else
-#include <sys/param.h>
- printf ("m68k-sony-newsos%s\n",
-#ifdef NEWSOS4
- "4"
-#else
- ""
-#endif
- ); exit (0);
-#endif
-#endif
-
-#if defined (__arm) && defined (__acorn) && defined (__unix)
- printf ("arm-acorn-riscix\n"); exit (0);
-#endif
-
-#if defined (hp300) && !defined (hpux)
- printf ("m68k-hp-bsd\n"); exit (0);
-#endif
-
-#if defined (NeXT)
-#if !defined (__ARCHITECTURE__)
-#define __ARCHITECTURE__ "m68k"
-#endif
- int version;
- version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
- if (version < 4)
- printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
- else
- printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
- exit (0);
-#endif
-
-#if defined (MULTIMAX) || defined (n16)
-#if defined (UMAXV)
- printf ("ns32k-encore-sysv\n"); exit (0);
-#else
-#if defined (CMU)
- printf ("ns32k-encore-mach\n"); exit (0);
-#else
- printf ("ns32k-encore-bsd\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (__386BSD__)
- printf ("i386-pc-bsd\n"); exit (0);
-#endif
-
-#if defined (sequent)
-#if defined (i386)
- printf ("i386-sequent-dynix\n"); exit (0);
-#endif
-#if defined (ns32000)
- printf ("ns32k-sequent-dynix\n"); exit (0);
-#endif
-#endif
-
-#if defined (_SEQUENT_)
- struct utsname un;
-
- uname(&un);
-
- if (strncmp(un.version, "V2", 2) == 0) {
- printf ("i386-sequent-ptx2\n"); exit (0);
- }
- if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
- printf ("i386-sequent-ptx1\n"); exit (0);
- }
- printf ("i386-sequent-ptx\n"); exit (0);
-
-#endif
-
-#if defined (vax)
-# if !defined (ultrix)
-# include <sys/param.h>
-# if defined (BSD)
-# if BSD == 43
- printf ("vax-dec-bsd4.3\n"); exit (0);
-# else
-# if BSD == 199006
- printf ("vax-dec-bsd4.3reno\n"); exit (0);
-# else
- printf ("vax-dec-bsd\n"); exit (0);
-# endif
-# endif
-# else
- printf ("vax-dec-bsd\n"); exit (0);
-# endif
-# else
- printf ("vax-dec-ultrix\n"); exit (0);
-# endif
-#endif
-
-#if defined (alliant) && defined (i860)
- printf ("i860-alliant-bsd\n"); exit (0);
-#endif
-
- exit (1);
-}
-EOF
-
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
- { echo "$SYSTEM_NAME"; exit; }
-
-# Apollos put the system type in the environment.
-
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
-
-# Convex versions that predate uname can use getsysinfo(1)
-
-if [ -x /usr/convex/getsysinfo ]
-then
- case `getsysinfo -f cpu_type` in
- c1*)
- echo c1-convex-bsd
- exit ;;
- c2*)
- if getsysinfo -f scalar_acc
- then echo c32-convex-bsd
- else echo c2-convex-bsd
- fi
- exit ;;
- c34*)
- echo c34-convex-bsd
- exit ;;
- c38*)
- echo c38-convex-bsd
- exit ;;
- c4*)
- echo c4-convex-bsd
- exit ;;
- esac
-fi
-
-cat >&2 <<EOF
-$0: unable to guess system type
-
-This script, last modified $timestamp, has failed to recognize
-the operating system you are using. It is advised that you
-download the most up to date version of the config scripts from
-
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
-and
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
-
-If the version you run ($0) is already up to date, please
-send the following data and any information you think might be
-pertinent to <config-patches at gnu.org> in order to provide the needed
-information to handle your system.
-
-config.guess timestamp = $timestamp
-
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
-
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
-/bin/uname -X = `(/bin/uname -X) 2>/dev/null`
-
-hostinfo = `(hostinfo) 2>/dev/null`
-/bin/universe = `(/bin/universe) 2>/dev/null`
-/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null`
-/bin/arch = `(/bin/arch) 2>/dev/null`
-/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
-
-UNAME_MACHINE = ${UNAME_MACHINE}
-UNAME_RELEASE = ${UNAME_RELEASE}
-UNAME_SYSTEM = ${UNAME_SYSTEM}
-UNAME_VERSION = ${UNAME_VERSION}
-EOF
-
-exit 1
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/build-aux/config.sub b/build-aux/config.sub
deleted file mode 100755
index eda465b..0000000
--- a/build-aux/config.sub
+++ /dev/null
@@ -1,1757 +0,0 @@
-#! /bin/sh
-# Configuration validation subroutine script.
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-# 2011 Free Software Foundation, Inc.
-
-timestamp='2011-02-02'
-
-# This file is (in principle) common to ALL GNU software.
-# The presence of a machine in this file suggests that SOME GNU software
-# can handle that machine. It does not imply ALL GNU software can.
-#
-# This file is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-
-# Please send patches to <config-patches at gnu.org>. Submit a context
-# diff and a properly formatted GNU ChangeLog entry.
-#
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support. The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS
- $0 [OPTION] ALIAS
-
-Canonicalize a configuration name.
-
-Operation modes:
- -h, --help print this help, then exit
- -t, --time-stamp print date of last modification, then exit
- -v, --version print version number, then exit
-
-Report bugs and patches to <config-patches at gnu.org>."
-
-version="\
-GNU config.sub ($timestamp)
-
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
-Software Foundation, Inc.
-
-This is free software; see the source for copying conditions. There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
- case $1 in
- --time-stamp | --time* | -t )
- echo "$timestamp" ; exit ;;
- --version | -v )
- echo "$version" ; exit ;;
- --help | --h* | -h )
- echo "$usage"; exit ;;
- -- ) # Stop option processing
- shift; break ;;
- - ) # Use stdin as input.
- break ;;
- -* )
- echo "$me: invalid option $1$help"
- exit 1 ;;
-
- *local*)
- # First pass through any local machine types.
- echo $1
- exit ;;
-
- * )
- break ;;
- esac
-done
-
-case $# in
- 0) echo "$me: missing argument$help" >&2
- exit 1;;
- 1) ;;
- *) echo "$me: too many arguments$help" >&2
- exit 1;;
-esac
-
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
- nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
- linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
- knetbsd*-gnu* | netbsd*-gnu* | \
- kopensolaris*-gnu* | \
- storm-chaos* | os2-emx* | rtmk-nova*)
- os=-$maybe_os
- basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
- ;;
- *)
- basic_machine=`echo $1 | sed 's/-[^-]*$//'`
- if [ $basic_machine != $1 ]
- then os=`echo $1 | sed 's/.*-/-/'`
- else os=; fi
- ;;
-esac
-
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work. We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
- -sun*os*)
- # Prevent following clause from handling this invalid input.
- ;;
- -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
- -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
- -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
- -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
- -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
- -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
- -apple | -axis | -knuth | -cray | -microblaze)
- os=
- basic_machine=$1
- ;;
- -bluegene*)
- os=-cnk
- ;;
- -sim | -cisco | -oki | -wec | -winbond)
- os=
- basic_machine=$1
- ;;
- -scout)
- ;;
- -wrs)
- os=-vxworks
- basic_machine=$1
- ;;
- -chorusos*)
- os=-chorusos
- basic_machine=$1
- ;;
- -chorusrdb)
- os=-chorusrdb
- basic_machine=$1
- ;;
- -hiux*)
- os=-hiuxwe2
- ;;
- -sco6)
- os=-sco5v6
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco5)
- os=-sco3.2v5
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco4)
- os=-sco3.2v4
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2.[4-9]*)
- os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2v[4-9]*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco5v6*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco*)
- os=-sco3.2v2
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -udk*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -isc)
- os=-isc2.2
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -clix*)
- basic_machine=clipper-intergraph
- ;;
- -isc*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -lynx*)
- os=-lynxos
- ;;
- -ptx*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
- ;;
- -windowsnt*)
- os=`echo $os | sed -e 's/windowsnt/winnt/'`
- ;;
- -psos*)
- os=-psos
- ;;
- -mint | -mint[0-9]*)
- basic_machine=m68k-atari
- os=-mint
- ;;
-esac
-
-# Decode aliases for certain CPU-COMPANY combinations.
-case $basic_machine in
- # Recognize the basic CPU types without company name.
- # Some are omitted here because they have special meanings below.
- 1750a | 580 \
- | a29k \
- | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
- | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
- | am33_2.0 \
- | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
- | bfin \
- | c4x | clipper \
- | d10v | d30v | dlx | dsp16xx | dvp \
- | fido | fr30 | frv \
- | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
- | i370 | i860 | i960 | ia64 \
- | ip2k | iq2000 \
- | lm32 \
- | m32c | m32r | m32rle | m68000 | m68k | m88k \
- | maxq | mb | microblaze | mcore | mep | metag \
- | mips | mipsbe | mipseb | mipsel | mipsle \
- | mips16 \
- | mips64 | mips64el \
- | mips64octeon | mips64octeonel \
- | mips64orion | mips64orionel \
- | mips64r5900 | mips64r5900el \
- | mips64vr | mips64vrel \
- | mips64vr4100 | mips64vr4100el \
- | mips64vr4300 | mips64vr4300el \
- | mips64vr5000 | mips64vr5000el \
- | mips64vr5900 | mips64vr5900el \
- | mipsisa32 | mipsisa32el \
- | mipsisa32r2 | mipsisa32r2el \
- | mipsisa64 | mipsisa64el \
- | mipsisa64r2 | mipsisa64r2el \
- | mipsisa64sb1 | mipsisa64sb1el \
- | mipsisa64sr71k | mipsisa64sr71kel \
- | mipstx39 | mipstx39el \
- | mn10200 | mn10300 \
- | moxie \
- | mt \
- | msp430 \
- | nds32 | nds32le | nds32be \
- | nios | nios2 \
- | ns16k | ns32k \
- | or32 \
- | pdp10 | pdp11 | pj | pjl \
- | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
- | pyramid \
- | rx \
- | score \
- | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
- | sh64 | sh64le \
- | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
- | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
- | spu | strongarm \
- | tahoe | thumb | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
- | ubicom32 \
- | v850 | v850e \
- | we32k \
- | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
- | z8k | z80)
- basic_machine=$basic_machine-unknown
- ;;
- c54x)
- basic_machine=tic54x-unknown
- ;;
- c55x)
- basic_machine=tic55x-unknown
- ;;
- c6x)
- basic_machine=tic6x-unknown
- ;;
- m6811 | m68hc11 | m6812 | m68hc12 | picochip)
- # Motorola 68HC11/12.
- basic_machine=$basic_machine-unknown
- os=-none
- ;;
- m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
- ;;
- ms1)
- basic_machine=mt-unknown
- ;;
-
- # We use `pc' rather than `unknown'
- # because (1) that's what they normally are, and
- # (2) the word "unknown" tends to confuse beginning users.
- i*86 | x86_64)
- basic_machine=$basic_machine-pc
- ;;
- # Object if more than one company name word.
- *-*-*)
- echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
- exit 1
- ;;
- # Recognize the basic CPU types with company name.
- 580-* \
- | a29k-* \
- | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
- | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
- | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
- | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
- | avr-* | avr32-* \
- | bfin-* | bs2000-* \
- | c[123]* | c30-* | [cjt]90-* | c4x-* \
- | clipper-* | craynv-* | cydra-* \
- | d10v-* | d30v-* | dlx-* \
- | elxsi-* \
- | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
- | h8300-* | h8500-* \
- | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
- | i*86-* | i860-* | i960-* | ia64-* \
- | ip2k-* | iq2000-* \
- | lm32-* \
- | m32c-* | m32r-* | m32rle-* \
- | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
- | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
- | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
- | mips16-* \
- | mips64-* | mips64el-* \
- | mips64octeon-* | mips64octeonel-* \
- | mips64orion-* | mips64orionel-* \
- | mips64r5900-* | mips64r5900el-* \
- | mips64vr-* | mips64vrel-* \
- | mips64vr4100-* | mips64vr4100el-* \
- | mips64vr4300-* | mips64vr4300el-* \
- | mips64vr5000-* | mips64vr5000el-* \
- | mips64vr5900-* | mips64vr5900el-* \
- | mipsisa32-* | mipsisa32el-* \
- | mipsisa32r2-* | mipsisa32r2el-* \
- | mipsisa64-* | mipsisa64el-* \
- | mipsisa64r2-* | mipsisa64r2el-* \
- | mipsisa64sb1-* | mipsisa64sb1el-* \
- | mipsisa64sr71k-* | mipsisa64sr71kel-* \
- | mipstx39-* | mipstx39el-* \
- | mmix-* \
- | mt-* \
- | msp430-* \
- | nds32-* | nds32le-* | nds32be-* \
- | nios-* | nios2-* \
- | none-* | np1-* | ns16k-* | ns32k-* \
- | orion-* \
- | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
- | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
- | pyramid-* \
- | romp-* | rs6000-* | rx-* \
- | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
- | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
- | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
- | sparclite-* \
- | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
- | tahoe-* | thumb-* \
- | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
- | tile-* | tilegx-* \
- | tron-* \
- | ubicom32-* \
- | v850-* | v850e-* | vax-* \
- | we32k-* \
- | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
- | xstormy16-* | xtensa*-* \
- | ymp-* \
- | z8k-* | z80-*)
- ;;
- # Recognize the basic CPU types without company name, with glob match.
- xtensa*)
- basic_machine=$basic_machine-unknown
- ;;
- # Recognize the various machine names and aliases which stand
- # for a CPU type and a company and sometimes even an OS.
- 386bsd)
- basic_machine=i386-unknown
- os=-bsd
- ;;
- 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
- basic_machine=m68000-att
- ;;
- 3b*)
- basic_machine=we32k-att
- ;;
- a29khif)
- basic_machine=a29k-amd
- os=-udi
- ;;
- abacus)
- basic_machine=abacus-unknown
- ;;
- adobe68k)
- basic_machine=m68010-adobe
- os=-scout
- ;;
- alliant | fx80)
- basic_machine=fx80-alliant
- ;;
- altos | altos3068)
- basic_machine=m68k-altos
- ;;
- am29k)
- basic_machine=a29k-none
- os=-bsd
- ;;
- amd64)
- basic_machine=x86_64-pc
- ;;
- amd64-*)
- basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- amdahl)
- basic_machine=580-amdahl
- os=-sysv
- ;;
- amiga | amiga-*)
- basic_machine=m68k-unknown
- ;;
- amigaos | amigados)
- basic_machine=m68k-unknown
- os=-amigaos
- ;;
- amigaunix | amix)
- basic_machine=m68k-unknown
- os=-sysv4
- ;;
- apollo68)
- basic_machine=m68k-apollo
- os=-sysv
- ;;
- apollo68bsd)
- basic_machine=m68k-apollo
- os=-bsd
- ;;
- aros)
- basic_machine=i386-pc
- os=-aros
- ;;
- aux)
- basic_machine=m68k-apple
- os=-aux
- ;;
- balance)
- basic_machine=ns32k-sequent
- os=-dynix
- ;;
- blackfin)
- basic_machine=bfin-unknown
- os=-linux
- ;;
- blackfin-*)
- basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
- ;;
- bluegene*)
- basic_machine=powerpc-ibm
- os=-cnk
- ;;
- c54x-*)
- basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- c55x-*)
- basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- c6x-*)
- basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- c90)
- basic_machine=c90-cray
- os=-unicos
- ;;
- cegcc)
- basic_machine=arm-unknown
- os=-cegcc
- ;;
- convex-c1)
- basic_machine=c1-convex
- os=-bsd
- ;;
- convex-c2)
- basic_machine=c2-convex
- os=-bsd
- ;;
- convex-c32)
- basic_machine=c32-convex
- os=-bsd
- ;;
- convex-c34)
- basic_machine=c34-convex
- os=-bsd
- ;;
- convex-c38)
- basic_machine=c38-convex
- os=-bsd
- ;;
- cray | j90)
- basic_machine=j90-cray
- os=-unicos
- ;;
- craynv)
- basic_machine=craynv-cray
- os=-unicosmp
- ;;
- cr16 | cr16-*)
- basic_machine=cr16-unknown
- os=-elf
- ;;
- crds | unos)
- basic_machine=m68k-crds
- ;;
- crisv32 | crisv32-* | etraxfs*)
- basic_machine=crisv32-axis
- ;;
- cris | cris-* | etrax*)
- basic_machine=cris-axis
- ;;
- crx)
- basic_machine=crx-unknown
- os=-elf
- ;;
- da30 | da30-*)
- basic_machine=m68k-da30
- ;;
- decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
- basic_machine=mips-dec
- ;;
- decsystem10* | dec10*)
- basic_machine=pdp10-dec
- os=-tops10
- ;;
- decsystem20* | dec20*)
- basic_machine=pdp10-dec
- os=-tops20
- ;;
- delta | 3300 | motorola-3300 | motorola-delta \
- | 3300-motorola | delta-motorola)
- basic_machine=m68k-motorola
- ;;
- delta88)
- basic_machine=m88k-motorola
- os=-sysv3
- ;;
- dicos)
- basic_machine=i686-pc
- os=-dicos
- ;;
- djgpp)
- basic_machine=i586-pc
- os=-msdosdjgpp
- ;;
- dpx20 | dpx20-*)
- basic_machine=rs6000-bull
- os=-bosx
- ;;
- dpx2* | dpx2*-bull)
- basic_machine=m68k-bull
- os=-sysv3
- ;;
- ebmon29k)
- basic_machine=a29k-amd
- os=-ebmon
- ;;
- elxsi)
- basic_machine=elxsi-elxsi
- os=-bsd
- ;;
- encore | umax | mmax)
- basic_machine=ns32k-encore
- ;;
- es1800 | OSE68k | ose68k | ose | OSE)
- basic_machine=m68k-ericsson
- os=-ose
- ;;
- fx2800)
- basic_machine=i860-alliant
- ;;
- genix)
- basic_machine=ns32k-ns
- ;;
- gmicro)
- basic_machine=tron-gmicro
- os=-sysv
- ;;
- go32)
- basic_machine=i386-pc
- os=-go32
- ;;
- h3050r* | hiux*)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- h8300hms)
- basic_machine=h8300-hitachi
- os=-hms
- ;;
- h8300xray)
- basic_machine=h8300-hitachi
- os=-xray
- ;;
- h8500hms)
- basic_machine=h8500-hitachi
- os=-hms
- ;;
- harris)
- basic_machine=m88k-harris
- os=-sysv3
- ;;
- hp300-*)
- basic_machine=m68k-hp
- ;;
- hp300bsd)
- basic_machine=m68k-hp
- os=-bsd
- ;;
- hp300hpux)
- basic_machine=m68k-hp
- os=-hpux
- ;;
- hp3k9[0-9][0-9] | hp9[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hp9k2[0-9][0-9] | hp9k31[0-9])
- basic_machine=m68000-hp
- ;;
- hp9k3[2-9][0-9])
- basic_machine=m68k-hp
- ;;
- hp9k6[0-9][0-9] | hp6[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hp9k7[0-79][0-9] | hp7[0-79][0-9])
- basic_machine=hppa1.1-hp
- ;;
- hp9k78[0-9] | hp78[0-9])
- # FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
- # FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[0-9][13679] | hp8[0-9][13679])
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[0-9][0-9] | hp8[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hppa-next)
- os=-nextstep3
- ;;
- hppaosf)
- basic_machine=hppa1.1-hp
- os=-osf
- ;;
- hppro)
- basic_machine=hppa1.1-hp
- os=-proelf
- ;;
- i370-ibm* | ibm*)
- basic_machine=i370-ibm
- ;;
-# I'm not sure what "Sysv32" means. Should this be sysv3.2?
- i*86v32)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv32
- ;;
- i*86v4*)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv4
- ;;
- i*86v)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv
- ;;
- i*86sol2)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-solaris2
- ;;
- i386mach)
- basic_machine=i386-mach
- os=-mach
- ;;
- i386-vsta | vsta)
- basic_machine=i386-unknown
- os=-vsta
- ;;
- iris | iris4d)
- basic_machine=mips-sgi
- case $os in
- -irix*)
- ;;
- *)
- os=-irix4
- ;;
- esac
- ;;
- isi68 | isi)
- basic_machine=m68k-isi
- os=-sysv
- ;;
- m68knommu)
- basic_machine=m68k-unknown
- os=-linux
- ;;
- m68knommu-*)
- basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
- ;;
- m88k-omron*)
- basic_machine=m88k-omron
- ;;
- magnum | m3230)
- basic_machine=mips-mips
- os=-sysv
- ;;
- merlin)
- basic_machine=ns32k-utek
- os=-sysv
- ;;
- microblaze)
- basic_machine=microblaze-xilinx
- ;;
- mingw32)
- basic_machine=i386-pc
- os=-mingw32
- ;;
- mingw32ce)
- basic_machine=arm-unknown
- os=-mingw32ce
- ;;
- miniframe)
- basic_machine=m68000-convergent
- ;;
- *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
- basic_machine=m68k-atari
- os=-mint
- ;;
- mipsEE* | ee | ps2)
- basic_machine=mips64r5900el-scei
- case $os in
- -linux*)
- ;;
- *)
- os=-elf
- ;;
- esac
- ;;
- iop)
- basic_machine=mipsel-scei
- os=-irx
- ;;
- dvp)
- basic_machine=dvp-scei
- os=-elf
- ;;
- mips3*-*)
- basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
- ;;
- mips3*)
- basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
- ;;
- monitor)
- basic_machine=m68k-rom68k
- os=-coff
- ;;
- morphos)
- basic_machine=powerpc-unknown
- os=-morphos
- ;;
- msdos)
- basic_machine=i386-pc
- os=-msdos
- ;;
- ms1-*)
- basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
- ;;
- mvs)
- basic_machine=i370-ibm
- os=-mvs
- ;;
- ncr3000)
- basic_machine=i486-ncr
- os=-sysv4
- ;;
- netbsd386)
- basic_machine=i386-unknown
- os=-netbsd
- ;;
- netwinder)
- basic_machine=armv4l-rebel
- os=-linux
- ;;
- news | news700 | news800 | news900)
- basic_machine=m68k-sony
- os=-newsos
- ;;
- news1000)
- basic_machine=m68030-sony
- os=-newsos
- ;;
- news-3600 | risc-news)
- basic_machine=mips-sony
- os=-newsos
- ;;
- necv70)
- basic_machine=v70-nec
- os=-sysv
- ;;
- next | m*-next )
- basic_machine=m68k-next
- case $os in
- -nextstep* )
- ;;
- -ns2*)
- os=-nextstep2
- ;;
- *)
- os=-nextstep3
- ;;
- esac
- ;;
- nh3000)
- basic_machine=m68k-harris
- os=-cxux
- ;;
- nh[45]000)
- basic_machine=m88k-harris
- os=-cxux
- ;;
- nindy960)
- basic_machine=i960-intel
- os=-nindy
- ;;
- mon960)
- basic_machine=i960-intel
- os=-mon960
- ;;
- nonstopux)
- basic_machine=mips-compaq
- os=-nonstopux
- ;;
- np1)
- basic_machine=np1-gould
- ;;
- neo-tandem)
- basic_machine=neo-tandem
- ;;
- nse-tandem)
- basic_machine=nse-tandem
- ;;
- nsr-tandem)
- basic_machine=nsr-tandem
- ;;
- op50n-* | op60c-*)
- basic_machine=hppa1.1-oki
- os=-proelf
- ;;
- openrisc | openrisc-*)
- basic_machine=or32-unknown
- ;;
- os400)
- basic_machine=powerpc-ibm
- os=-os400
- ;;
- OSE68000 | ose68000)
- basic_machine=m68000-ericsson
- os=-ose
- ;;
- os68k)
- basic_machine=m68k-none
- os=-os68k
- ;;
- pa-hitachi)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- paragon)
- basic_machine=i860-intel
- os=-osf
- ;;
- parisc)
- basic_machine=hppa-unknown
- os=-linux
- ;;
- parisc-*)
- basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
- ;;
- pbd)
- basic_machine=sparc-tti
- ;;
- pbb)
- basic_machine=m68k-tti
- ;;
- pc532 | pc532-*)
- basic_machine=ns32k-pc532
- ;;
- pc98)
- basic_machine=i386-pc
- ;;
- pc98-*)
- basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentium | p5 | k5 | k6 | nexgen | viac3)
- basic_machine=i586-pc
- ;;
- pentiumpro | p6 | 6x86 | athlon | athlon_*)
- basic_machine=i686-pc
- ;;
- pentiumii | pentium2 | pentiumiii | pentium3)
- basic_machine=i686-pc
- ;;
- pentium4)
- basic_machine=i786-pc
- ;;
- pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
- basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentiumpro-* | p6-* | 6x86-* | athlon-*)
- basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
- basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentium4-*)
- basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pn)
- basic_machine=pn-gould
- ;;
- power) basic_machine=power-ibm
- ;;
- ppc) basic_machine=powerpc-unknown
- ;;
- ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppcle | powerpclittle | ppc-le | powerpc-little)
- basic_machine=powerpcle-unknown
- ;;
- ppcle-* | powerpclittle-*)
- basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppc64) basic_machine=powerpc64-unknown
- ;;
- ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppc64le | powerpc64little | ppc64-le | powerpc64-little)
- basic_machine=powerpc64le-unknown
- ;;
- ppc64le-* | powerpc64little-*)
- basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ps2)
- basic_machine=i386-ibm
- ;;
- pw32)
- basic_machine=i586-unknown
- os=-pw32
- ;;
- rdos)
- basic_machine=i386-pc
- os=-rdos
- ;;
- rom68k)
- basic_machine=m68k-rom68k
- os=-coff
- ;;
- rm[46]00)
- basic_machine=mips-siemens
- ;;
- rtpc | rtpc-*)
- basic_machine=romp-ibm
- ;;
- s390 | s390-*)
- basic_machine=s390-ibm
- ;;
- s390x | s390x-*)
- basic_machine=s390x-ibm
- ;;
- sa29200)
- basic_machine=a29k-amd
- os=-udi
- ;;
- sb1)
- basic_machine=mipsisa64sb1-unknown
- ;;
- sb1el)
- basic_machine=mipsisa64sb1el-unknown
- ;;
- sde)
- basic_machine=mipsisa32-sde
- os=-elf
- ;;
- sei)
- basic_machine=mips-sei
- os=-seiux
- ;;
- sequent)
- basic_machine=i386-sequent
- ;;
- sh)
- basic_machine=sh-hitachi
- os=-hms
- ;;
- sh5el)
- basic_machine=sh5le-unknown
- ;;
- sh64)
- basic_machine=sh64-unknown
- ;;
- sparclite-wrs | simso-wrs)
- basic_machine=sparclite-wrs
- os=-vxworks
- ;;
- sps7)
- basic_machine=m68k-bull
- os=-sysv2
- ;;
- spur)
- basic_machine=spur-unknown
- ;;
- st2000)
- basic_machine=m68k-tandem
- ;;
- stratus)
- basic_machine=i860-stratus
- os=-sysv4
- ;;
- sun2)
- basic_machine=m68000-sun
- ;;
- sun2os3)
- basic_machine=m68000-sun
- os=-sunos3
- ;;
- sun2os4)
- basic_machine=m68000-sun
- os=-sunos4
- ;;
- sun3os3)
- basic_machine=m68k-sun
- os=-sunos3
- ;;
- sun3os4)
- basic_machine=m68k-sun
- os=-sunos4
- ;;
- sun4os3)
- basic_machine=sparc-sun
- os=-sunos3
- ;;
- sun4os4)
- basic_machine=sparc-sun
- os=-sunos4
- ;;
- sun4sol2)
- basic_machine=sparc-sun
- os=-solaris2
- ;;
- sun3 | sun3-*)
- basic_machine=m68k-sun
- ;;
- sun4)
- basic_machine=sparc-sun
- ;;
- sun386 | sun386i | roadrunner)
- basic_machine=i386-sun
- ;;
- sv1)
- basic_machine=sv1-cray
- os=-unicos
- ;;
- symmetry)
- basic_machine=i386-sequent
- os=-dynix
- ;;
- t3e)
- basic_machine=alphaev5-cray
- os=-unicos
- ;;
- t90)
- basic_machine=t90-cray
- os=-unicos
- ;;
- # This must be matched before tile*.
- tilegx*)
- basic_machine=tilegx-unknown
- os=-linux-gnu
- ;;
- tile*)
- basic_machine=tile-unknown
- os=-linux-gnu
- ;;
- tx39)
- basic_machine=mipstx39-unknown
- ;;
- tx39el)
- basic_machine=mipstx39el-unknown
- ;;
- toad1)
- basic_machine=pdp10-xkl
- os=-tops20
- ;;
- tower | tower-32)
- basic_machine=m68k-ncr
- ;;
- tpf)
- basic_machine=s390x-ibm
- os=-tpf
- ;;
- udi29k)
- basic_machine=a29k-amd
- os=-udi
- ;;
- ultra3)
- basic_machine=a29k-nyu
- os=-sym1
- ;;
- v810 | necv810)
- basic_machine=v810-nec
- os=-none
- ;;
- vaxv)
- basic_machine=vax-dec
- os=-sysv
- ;;
- vms)
- basic_machine=vax-dec
- os=-vms
- ;;
- vpp*|vx|vx-*)
- basic_machine=f301-fujitsu
- ;;
- vxworks960)
- basic_machine=i960-wrs
- os=-vxworks
- ;;
- vxworks68)
- basic_machine=m68k-wrs
- os=-vxworks
- ;;
- vxworks29k)
- basic_machine=a29k-wrs
- os=-vxworks
- ;;
- w65*)
- basic_machine=w65-wdc
- os=-none
- ;;
- w89k-*)
- basic_machine=hppa1.1-winbond
- os=-proelf
- ;;
- xbox)
- basic_machine=i686-pc
- os=-mingw32
- ;;
- xps | xps100)
- basic_machine=xps100-honeywell
- ;;
- ymp)
- basic_machine=ymp-cray
- os=-unicos
- ;;
- z8k-*-coff)
- basic_machine=z8k-unknown
- os=-sim
- ;;
- z80-*-coff)
- basic_machine=z80-unknown
- os=-sim
- ;;
- none)
- basic_machine=none-none
- os=-none
- ;;
-
-# Here we handle the default manufacturer of certain CPU types. It is in
-# some cases the only manufacturer, in others, it is the most popular.
- w89k)
- basic_machine=hppa1.1-winbond
- ;;
- op50n)
- basic_machine=hppa1.1-oki
- ;;
- op60c)
- basic_machine=hppa1.1-oki
- ;;
- romp)
- basic_machine=romp-ibm
- ;;
- mmix)
- basic_machine=mmix-knuth
- ;;
- rs6000)
- basic_machine=rs6000-ibm
- ;;
- vax)
- basic_machine=vax-dec
- ;;
- pdp10)
- # there are many clones, so DEC is not a safe bet
- basic_machine=pdp10-unknown
- ;;
- pdp11)
- basic_machine=pdp11-dec
- ;;
- we32k)
- basic_machine=we32k-att
- ;;
- sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
- basic_machine=sh-unknown
- ;;
- sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
- basic_machine=sparc-sun
- ;;
- cydra)
- basic_machine=cydra-cydrome
- ;;
- orion)
- basic_machine=orion-highlevel
- ;;
- orion105)
- basic_machine=clipper-highlevel
- ;;
- mac | mpw | mac-mpw)
- basic_machine=m68k-apple
- ;;
- pmac | pmac-mpw)
- basic_machine=powerpc-apple
- ;;
- *-unknown)
- # Make sure to match an already-canonicalized machine name.
- ;;
- *)
- echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
- exit 1
- ;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
- *-digital*)
- basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
- ;;
- *-commodore*)
- basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
- ;;
- *)
- ;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if [ x"$os" != x"" ]
-then
-case $os in
- # First match some system type aliases
- # that might get confused with valid system types.
- # -solaris* is a basic system type, with this one exception.
- -auroraux)
- os=-auroraux
- ;;
- -solaris1 | -solaris1.*)
- os=`echo $os | sed -e 's|solaris1|sunos4|'`
- ;;
- -solaris)
- os=-solaris2
- ;;
- -svr4*)
- os=-sysv4
- ;;
- -unixware*)
- os=-sysv4.2uw
- ;;
- -gnu/linux*)
- os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
- ;;
- # First accept the basic system types.
- # The portable systems comes first.
- # Each alternative MUST END IN A *, to match a version number.
- # -sysv* is not here because it comes later, after sysvr4.
- -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
- | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
- | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
- | -sym* | -kopensolaris* \
- | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
- | -aos* | -aros* \
- | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
- | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
- | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
- | -openbsd* | -solidbsd* \
- | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
- | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
- | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
- | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
- | -chorusos* | -chorusrdb* | -cegcc* \
- | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -mingw32* | -linux-gnu* | -linux-android* \
- | -linux-newlib* | -linux-uclibc* \
- | -uxpv* | -beos* | -mpeix* | -udk* \
- | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
- | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
- | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -irx* \
- | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
- | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
- | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
- | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
- # Remember, each alternative MUST END IN *, to match a version number.
- ;;
- -qnx*)
- case $basic_machine in
- x86-* | i*86-*)
- ;;
- *)
- os=-nto$os
- ;;
- esac
- ;;
- -nto-qnx*)
- ;;
- -nto*)
- os=`echo $os | sed -e 's|nto|nto-qnx|'`
- ;;
- -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
- | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
- | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
- ;;
- -mac*)
- os=`echo $os | sed -e 's|mac|macos|'`
- ;;
- -linux-dietlibc)
- os=-linux-dietlibc
- ;;
- -linux*)
- os=`echo $os | sed -e 's|linux|linux-gnu|'`
- ;;
- -sunos5*)
- os=`echo $os | sed -e 's|sunos5|solaris2|'`
- ;;
- -sunos6*)
- os=`echo $os | sed -e 's|sunos6|solaris3|'`
- ;;
- -opened*)
- os=-openedition
- ;;
- -os400*)
- os=-os400
- ;;
- -wince*)
- os=-wince
- ;;
- -osfrose*)
- os=-osfrose
- ;;
- -osf*)
- os=-osf
- ;;
- -utek*)
- os=-bsd
- ;;
- -dynix*)
- os=-bsd
- ;;
- -acis*)
- os=-aos
- ;;
- -atheos*)
- os=-atheos
- ;;
- -syllable*)
- os=-syllable
- ;;
- -386bsd)
- os=-bsd
- ;;
- -ctix* | -uts*)
- os=-sysv
- ;;
- -nova*)
- os=-rtmk-nova
- ;;
- -ns2 )
- os=-nextstep2
- ;;
- -nsk*)
- os=-nsk
- ;;
- # Preserve the version number of sinix5.
- -sinix5.*)
- os=`echo $os | sed -e 's|sinix|sysv|'`
- ;;
- -sinix*)
- os=-sysv4
- ;;
- -tpf*)
- os=-tpf
- ;;
- -triton*)
- os=-sysv3
- ;;
- -oss*)
- os=-sysv3
- ;;
- -svr4)
- os=-sysv4
- ;;
- -svr3)
- os=-sysv3
- ;;
- -sysvr4)
- os=-sysv4
- ;;
- # This must come after -sysvr4.
- -sysv*)
- ;;
- -ose*)
- os=-ose
- ;;
- -es1800*)
- os=-ose
- ;;
- -xenix)
- os=-xenix
- ;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
- os=-mint
- ;;
- -aros*)
- os=-aros
- ;;
- -kaos*)
- os=-kaos
- ;;
- -zvmoe)
- os=-zvmoe
- ;;
- -dicos*)
- os=-dicos
- ;;
- -nacl*)
- ;;
- -none)
- ;;
- *)
- # Get rid of the `-' at the beginning of $os.
- os=`echo $os | sed 's/[^-]*-//'`
- echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
- exit 1
- ;;
-esac
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system. Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-case $basic_machine in
- score-*)
- os=-elf
- ;;
- spu-*)
- os=-elf
- ;;
- *-acorn)
- os=-riscix1.2
- ;;
- arm*-rebel)
- os=-linux
- ;;
- arm*-semi)
- os=-aout
- ;;
- c4x-* | tic4x-*)
- os=-coff
- ;;
- tic54x-*)
- os=-coff
- ;;
- tic55x-*)
- os=-coff
- ;;
- tic6x-*)
- os=-coff
- ;;
- # This must come before the *-dec entry.
- pdp10-*)
- os=-tops20
- ;;
- pdp11-*)
- os=-none
- ;;
- *-dec | vax-*)
- os=-ultrix4.2
- ;;
- m68*-apollo)
- os=-domain
- ;;
- i386-sun)
- os=-sunos4.0.2
- ;;
- m68000-sun)
- os=-sunos3
- # This also exists in the configure program, but was not the
- # default.
- # os=-sunos4
- ;;
- m68*-cisco)
- os=-aout
- ;;
- mep-*)
- os=-elf
- ;;
- mips*-cisco)
- os=-elf
- ;;
- mips*-*)
- os=-elf
- ;;
- or32-*)
- os=-coff
- ;;
- *-tti) # must be before sparc entry or we get the wrong os.
- os=-sysv3
- ;;
- sparc-* | *-sun)
- os=-sunos4.1.1
- ;;
- *-be)
- os=-beos
- ;;
- *-haiku)
- os=-haiku
- ;;
- *-ibm)
- os=-aix
- ;;
- *-knuth)
- os=-mmixware
- ;;
- *-wec)
- os=-proelf
- ;;
- *-winbond)
- os=-proelf
- ;;
- *-oki)
- os=-proelf
- ;;
- *-hp)
- os=-hpux
- ;;
- *-hitachi)
- os=-hiux
- ;;
- i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
- os=-sysv
- ;;
- *-cbm)
- os=-amigaos
- ;;
- *-dg)
- os=-dgux
- ;;
- *-dolphin)
- os=-sysv3
- ;;
- m68k-ccur)
- os=-rtu
- ;;
- m88k-omron*)
- os=-luna
- ;;
- *-next )
- os=-nextstep
- ;;
- *-sequent)
- os=-ptx
- ;;
- *-crds)
- os=-unos
- ;;
- *-ns)
- os=-genix
- ;;
- i370-*)
- os=-mvs
- ;;
- *-next)
- os=-nextstep3
- ;;
- *-gould)
- os=-sysv
- ;;
- *-highlevel)
- os=-bsd
- ;;
- *-encore)
- os=-bsd
- ;;
- *-sgi)
- os=-irix
- ;;
- *-siemens)
- os=-sysv4
- ;;
- *-masscomp)
- os=-rtu
- ;;
- f30[01]-fujitsu | f700-fujitsu)
- os=-uxpv
- ;;
- *-rom68k)
- os=-coff
- ;;
- *-*bug)
- os=-coff
- ;;
- *-apple)
- os=-macos
- ;;
- *-atari*)
- os=-mint
- ;;
- *)
- os=-none
- ;;
-esac
-fi
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer. We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
- *-unknown)
- case $os in
- -riscix*)
- vendor=acorn
- ;;
- -sunos*)
- vendor=sun
- ;;
- -cnk*|-aix*)
- vendor=ibm
- ;;
- -beos*)
- vendor=be
- ;;
- -hpux*)
- vendor=hp
- ;;
- -mpeix*)
- vendor=hp
- ;;
- -hiux*)
- vendor=hitachi
- ;;
- -unos*)
- vendor=crds
- ;;
- -dgux*)
- vendor=dg
- ;;
- -luna*)
- vendor=omron
- ;;
- -genix*)
- vendor=ns
- ;;
- -mvs* | -opened*)
- vendor=ibm
- ;;
- -os400*)
- vendor=ibm
- ;;
- -ptx*)
- vendor=sequent
- ;;
- -tpf*)
- vendor=ibm
- ;;
- -vxsim* | -vxworks* | -windiss*)
- vendor=wrs
- ;;
- -aux*)
- vendor=apple
- ;;
- -hms*)
- vendor=hitachi
- ;;
- -mpw* | -macos*)
- vendor=apple
- ;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
- vendor=atari
- ;;
- -vos*)
- vendor=stratus
- ;;
- esac
- basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
- ;;
-esac
-
-echo $basic_machine$os
-exit
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/build-aux/install-sh b/build-aux/install-sh
deleted file mode 100755
index 6781b98..0000000
--- a/build-aux/install-sh
+++ /dev/null
@@ -1,520 +0,0 @@
-#!/bin/sh
-# install - install a program, script, or datafile
-
-scriptversion=2009-04-28.21; # UTC
-
-# This originates from X11R5 (mit/util/scripts/install.sh), which was
-# later released in X11R6 (xc/config/util/install.sh) with the
-# following copyright and license.
-#
-# Copyright (C) 1994 X Consortium
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
-# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
-# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# Except as contained in this notice, the name of the X Consortium shall not
-# be used in advertising or otherwise to promote the sale, use or other deal-
-# ings in this Software without prior written authorization from the X Consor-
-# tium.
-#
-#
-# FSF changes to this file are in the public domain.
-#
-# Calling this script install-sh is preferred over install.sh, to prevent
-# `make' implicit rules from creating a file called install from it
-# when there is no Makefile.
-#
-# This script is compatible with the BSD install script, but was written
-# from scratch.
-
-nl='
-'
-IFS=" "" $nl"
-
-# set DOITPROG to echo to test this script
-
-# Don't use :- since 4.3BSD and earlier shells don't like it.
-doit=${DOITPROG-}
-if test -z "$doit"; then
- doit_exec=exec
-else
- doit_exec=$doit
-fi
-
-# Put in absolute file names if you don't have them in your path;
-# or use environment vars.
-
-chgrpprog=${CHGRPPROG-chgrp}
-chmodprog=${CHMODPROG-chmod}
-chownprog=${CHOWNPROG-chown}
-cmpprog=${CMPPROG-cmp}
-cpprog=${CPPROG-cp}
-mkdirprog=${MKDIRPROG-mkdir}
-mvprog=${MVPROG-mv}
-rmprog=${RMPROG-rm}
-stripprog=${STRIPPROG-strip}
-
-posix_glob='?'
-initialize_posix_glob='
- test "$posix_glob" != "?" || {
- if (set -f) 2>/dev/null; then
- posix_glob=
- else
- posix_glob=:
- fi
- }
-'
-
-posix_mkdir=
-
-# Desired mode of installed file.
-mode=0755
-
-chgrpcmd=
-chmodcmd=$chmodprog
-chowncmd=
-mvcmd=$mvprog
-rmcmd="$rmprog -f"
-stripcmd=
-
-src=
-dst=
-dir_arg=
-dst_arg=
-
-copy_on_change=false
-no_target_directory=
-
-usage="\
-Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
- or: $0 [OPTION]... SRCFILES... DIRECTORY
- or: $0 [OPTION]... -t DIRECTORY SRCFILES...
- or: $0 [OPTION]... -d DIRECTORIES...
-
-In the 1st form, copy SRCFILE to DSTFILE.
-In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
-In the 4th, create DIRECTORIES.
-
-Options:
- --help display this help and exit.
- --version display version info and exit.
-
- -c (ignored)
- -C install only if different (preserve the last data modification time)
- -d create directories instead of installing files.
- -g GROUP $chgrpprog installed files to GROUP.
- -m MODE $chmodprog installed files to MODE.
- -o USER $chownprog installed files to USER.
- -s $stripprog installed files.
- -t DIRECTORY install into DIRECTORY.
- -T report an error if DSTFILE is a directory.
-
-Environment variables override the default commands:
- CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
- RMPROG STRIPPROG
-"
-
-while test $# -ne 0; do
- case $1 in
- -c) ;;
-
- -C) copy_on_change=true;;
-
- -d) dir_arg=true;;
-
- -g) chgrpcmd="$chgrpprog $2"
- shift;;
-
- --help) echo "$usage"; exit $?;;
-
- -m) mode=$2
- case $mode in
- *' '* | *' '* | *'
-'* | *'*'* | *'?'* | *'['*)
- echo "$0: invalid mode: $mode" >&2
- exit 1;;
- esac
- shift;;
-
- -o) chowncmd="$chownprog $2"
- shift;;
-
- -s) stripcmd=$stripprog;;
-
- -t) dst_arg=$2
- shift;;
-
- -T) no_target_directory=true;;
-
- --version) echo "$0 $scriptversion"; exit $?;;
-
- --) shift
- break;;
-
- -*) echo "$0: invalid option: $1" >&2
- exit 1;;
-
- *) break;;
- esac
- shift
-done
-
-if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
- # When -d is used, all remaining arguments are directories to create.
- # When -t is used, the destination is already specified.
- # Otherwise, the last argument is the destination. Remove it from $@.
- for arg
- do
- if test -n "$dst_arg"; then
- # $@ is not empty: it contains at least $arg.
- set fnord "$@" "$dst_arg"
- shift # fnord
- fi
- shift # arg
- dst_arg=$arg
- done
-fi
-
-if test $# -eq 0; then
- if test -z "$dir_arg"; then
- echo "$0: no input file specified." >&2
- exit 1
- fi
- # It's OK to call `install-sh -d' without argument.
- # This can happen when creating conditional directories.
- exit 0
-fi
-
-if test -z "$dir_arg"; then
- trap '(exit $?); exit' 1 2 13 15
-
- # Set umask so as not to create temps with too-generous modes.
- # However, 'strip' requires both read and write access to temps.
- case $mode in
- # Optimize common cases.
- *644) cp_umask=133;;
- *755) cp_umask=22;;
-
- *[0-7])
- if test -z "$stripcmd"; then
- u_plus_rw=
- else
- u_plus_rw='% 200'
- fi
- cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
- *)
- if test -z "$stripcmd"; then
- u_plus_rw=
- else
- u_plus_rw=,u+rw
- fi
- cp_umask=$mode$u_plus_rw;;
- esac
-fi
-
-for src
-do
- # Protect names starting with `-'.
- case $src in
- -*) src=./$src;;
- esac
-
- if test -n "$dir_arg"; then
- dst=$src
- dstdir=$dst
- test -d "$dstdir"
- dstdir_status=$?
- else
-
- # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
- # might cause directories to be created, which would be especially bad
- # if $src (and thus $dsttmp) contains '*'.
- if test ! -f "$src" && test ! -d "$src"; then
- echo "$0: $src does not exist." >&2
- exit 1
- fi
-
- if test -z "$dst_arg"; then
- echo "$0: no destination specified." >&2
- exit 1
- fi
-
- dst=$dst_arg
- # Protect names starting with `-'.
- case $dst in
- -*) dst=./$dst;;
- esac
-
- # If destination is a directory, append the input filename; won't work
- # if double slashes aren't ignored.
- if test -d "$dst"; then
- if test -n "$no_target_directory"; then
- echo "$0: $dst_arg: Is a directory" >&2
- exit 1
- fi
- dstdir=$dst
- dst=$dstdir/`basename "$src"`
- dstdir_status=0
- else
- # Prefer dirname, but fall back on a substitute if dirname fails.
- dstdir=`
- (dirname "$dst") 2>/dev/null ||
- expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$dst" : 'X\(//\)[^/]' \| \
- X"$dst" : 'X\(//\)$' \| \
- X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
- echo X"$dst" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'
- `
-
- test -d "$dstdir"
- dstdir_status=$?
- fi
- fi
-
- obsolete_mkdir_used=false
-
- if test $dstdir_status != 0; then
- case $posix_mkdir in
- '')
- # Create intermediate dirs using mode 755 as modified by the umask.
- # This is like FreeBSD 'install' as of 1997-10-28.
- umask=`umask`
- case $stripcmd.$umask in
- # Optimize common cases.
- *[2367][2367]) mkdir_umask=$umask;;
- .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
-
- *[0-7])
- mkdir_umask=`expr $umask + 22 \
- - $umask % 100 % 40 + $umask % 20 \
- - $umask % 10 % 4 + $umask % 2
- `;;
- *) mkdir_umask=$umask,go-w;;
- esac
-
- # With -d, create the new directory with the user-specified mode.
- # Otherwise, rely on $mkdir_umask.
- if test -n "$dir_arg"; then
- mkdir_mode=-m$mode
- else
- mkdir_mode=
- fi
-
- posix_mkdir=false
- case $umask in
- *[123567][0-7][0-7])
- # POSIX mkdir -p sets u+wx bits regardless of umask, which
- # is incompatible with FreeBSD 'install' when (umask & 300) != 0.
- ;;
- *)
- tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
- trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
-
- if (umask $mkdir_umask &&
- exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
- then
- if test -z "$dir_arg" || {
- # Check for POSIX incompatibilities with -m.
- # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
- # other-writeable bit of parent directory when it shouldn't.
- # FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
- ls_ld_tmpdir=`ls -ld "$tmpdir"`
- case $ls_ld_tmpdir in
- d????-?r-*) different_mode=700;;
- d????-?--*) different_mode=755;;
- *) false;;
- esac &&
- $mkdirprog -m$different_mode -p -- "$tmpdir" && {
- ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
- test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
- }
- }
- then posix_mkdir=:
- fi
- rmdir "$tmpdir/d" "$tmpdir"
- else
- # Remove any dirs left behind by ancient mkdir implementations.
- rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
- fi
- trap '' 0;;
- esac;;
- esac
-
- if
- $posix_mkdir && (
- umask $mkdir_umask &&
- $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
- )
- then :
- else
-
- # The umask is ridiculous, or mkdir does not conform to POSIX,
- # or it failed possibly due to a race condition. Create the
- # directory the slow way, step by step, checking for races as we go.
-
- case $dstdir in
- /*) prefix='/';;
- -*) prefix='./';;
- *) prefix='';;
- esac
-
- eval "$initialize_posix_glob"
-
- oIFS=$IFS
- IFS=/
- $posix_glob set -f
- set fnord $dstdir
- shift
- $posix_glob set +f
- IFS=$oIFS
-
- prefixes=
-
- for d
- do
- test -z "$d" && continue
-
- prefix=$prefix$d
- if test -d "$prefix"; then
- prefixes=
- else
- if $posix_mkdir; then
- (umask=$mkdir_umask &&
- $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
- # Don't fail if two instances are running concurrently.
- test -d "$prefix" || exit 1
- else
- case $prefix in
- *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
- *) qprefix=$prefix;;
- esac
- prefixes="$prefixes '$qprefix'"
- fi
- fi
- prefix=$prefix/
- done
-
- if test -n "$prefixes"; then
- # Don't fail if two instances are running concurrently.
- (umask $mkdir_umask &&
- eval "\$doit_exec \$mkdirprog $prefixes") ||
- test -d "$dstdir" || exit 1
- obsolete_mkdir_used=true
- fi
- fi
- fi
-
- if test -n "$dir_arg"; then
- { test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
- { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
- { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
- test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
- else
-
- # Make a couple of temp file names in the proper directory.
- dsttmp=$dstdir/_inst.$$_
- rmtmp=$dstdir/_rm.$$_
-
- # Trap to clean up those temp files at exit.
- trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
-
- # Copy the file name to the temp name.
- (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
-
- # and set any options; do chmod last to preserve setuid bits.
- #
- # If any of these fail, we abort the whole thing. If we want to
- # ignore errors from any of these, just make sure not to ignore
- # errors from the above "$doit $cpprog $src $dsttmp" command.
- #
- { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
- { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
- { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
- { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
-
- # If -C, don't bother to copy if it wouldn't change the file.
- if $copy_on_change &&
- old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
- new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
-
- eval "$initialize_posix_glob" &&
- $posix_glob set -f &&
- set X $old && old=:$2:$4:$5:$6 &&
- set X $new && new=:$2:$4:$5:$6 &&
- $posix_glob set +f &&
-
- test "$old" = "$new" &&
- $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
- then
- rm -f "$dsttmp"
- else
- # Rename the file to the real destination.
- $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
-
- # The rename failed, perhaps because mv can't rename something else
- # to itself, or perhaps because mv is so ancient that it does not
- # support -f.
- {
- # Now remove or move aside any old file at destination location.
- # We try this two ways since rm can't unlink itself on some
- # systems and the destination file might be busy for other
- # reasons. In this case, the final cleanup might fail but the new
- # file should still install successfully.
- {
- test ! -f "$dst" ||
- $doit $rmcmd -f "$dst" 2>/dev/null ||
- { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
- { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
- } ||
- { echo "$0: cannot unlink or rename $dst" >&2
- (exit 1); exit 1
- }
- } &&
-
- # Now rename the file to the real destination.
- $doit $mvcmd "$dsttmp" "$dst"
- }
- fi || exit 1
-
- trap '' 0
- fi
-done
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-time-zone: "UTC"
-# time-stamp-end: "; # UTC"
-# End:
diff --git a/build-aux/ltmain.sh b/build-aux/ltmain.sh
deleted file mode 100644
index 63ae69d..0000000
--- a/build-aux/ltmain.sh
+++ /dev/null
@@ -1,9655 +0,0 @@
-
-# libtool (GNU libtool) 2.4.2
-# Written by Gordon Matzigkeit <gord at gnu.ai.mit.edu>, 1996
-
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006,
-# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
-# This is free software; see the source for copying conditions. There is NO
-# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
-# GNU Libtool is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# As a special exception to the GNU General Public License,
-# if you distribute this file as part of a program or library that
-# is built using GNU Libtool, you may include this file under the
-# same distribution terms that you use for the rest of that program.
-#
-# GNU Libtool is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Libtool; see the file COPYING. If not, a copy
-# can be downloaded from http://www.gnu.org/licenses/gpl.html,
-# or obtained by writing to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-# Usage: $progname [OPTION]... [MODE-ARG]...
-#
-# Provide generalized library-building support services.
-#
-# --config show all configuration variables
-# --debug enable verbose shell tracing
-# -n, --dry-run display commands without modifying any files
-# --features display basic configuration information and exit
-# --mode=MODE use operation mode MODE
-# --preserve-dup-deps don't remove duplicate dependency libraries
-# --quiet, --silent don't print informational messages
-# --no-quiet, --no-silent
-# print informational messages (default)
-# --no-warn don't display warning messages
-# --tag=TAG use configuration variables from tag TAG
-# -v, --verbose print more informational messages than default
-# --no-verbose don't print the extra informational messages
-# --version print version information
-# -h, --help, --help-all print short, long, or detailed help message
-#
-# MODE must be one of the following:
-#
-# clean remove files from the build directory
-# compile compile a source file into a libtool object
-# execute automatically set library path, then run a program
-# finish complete the installation of libtool libraries
-# install install libraries or executables
-# link create a library or an executable
-# uninstall remove libraries from an installed directory
-#
-# MODE-ARGS vary depending on the MODE. When passed as first option,
-# `--mode=MODE' may be abbreviated as `MODE' or a unique abbreviation of that.
-# Try `$progname --help --mode=MODE' for a more detailed description of MODE.
-#
-# When reporting a bug, please describe a test case to reproduce it and
-# include the following information:
-#
-# host-triplet: $host
-# shell: $SHELL
-# compiler: $LTCC
-# compiler flags: $LTCFLAGS
-# linker: $LD (gnu? $with_gnu_ld)
-# $progname: (GNU libtool) 2.4.2
-# automake: $automake_version
-# autoconf: $autoconf_version
-#
-# Report bugs to <bug-libtool at gnu.org>.
-# GNU libtool home page: <http://www.gnu.org/software/libtool/>.
-# General help using GNU software: <http://www.gnu.org/gethelp/>.
-
-PROGRAM=libtool
-PACKAGE=libtool
-VERSION=2.4.2
-TIMESTAMP=""
-package_revision=1.3337
-
-# Be Bourne compatible
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
- emulate sh
- NULLCMD=:
- # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '${1+"$@"}'='"$@"'
- setopt NO_GLOB_SUBST
-else
- case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
-fi
-BIN_SH=xpg4; export BIN_SH # for Tru64
-DUALCASE=1; export DUALCASE # for MKS sh
-
-# A function that is used when there is no print builtin or printf.
-func_fallback_echo ()
-{
- eval 'cat <<_LTECHO_EOF
-$1
-_LTECHO_EOF'
-}
-
-# NLS nuisances: We save the old values to restore during execute mode.
-lt_user_locale=
-lt_safe_locale=
-for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES
-do
- eval "if test \"\${$lt_var+set}\" = set; then
- save_$lt_var=\$$lt_var
- $lt_var=C
- export $lt_var
- lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\"
- lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\"
- fi"
-done
-LC_ALL=C
-LANGUAGE=C
-export LANGUAGE LC_ALL
-
-$lt_unset CDPATH
-
-
-# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh
-# is ksh but when the shell is invoked as "sh" and the current value of
-# the _XPG environment variable is not equal to 1 (one), the special
-# positional parameter $0, within a function call, is the name of the
-# function.
-progpath="$0"
-
-
-
-: ${CP="cp -f"}
-test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'}
-: ${MAKE="make"}
-: ${MKDIR="mkdir"}
-: ${MV="mv -f"}
-: ${RM="rm -f"}
-: ${SHELL="${CONFIG_SHELL-/bin/sh}"}
-: ${Xsed="$SED -e 1s/^X//"}
-
-# Global variables:
-EXIT_SUCCESS=0
-EXIT_FAILURE=1
-EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing.
-EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake.
-
-exit_status=$EXIT_SUCCESS
-
-# Make sure IFS has a sensible default
-lt_nl='
-'
-IFS=" $lt_nl"
-
-dirname="s,/[^/]*$,,"
-basename="s,^.*/,,"
-
-# func_dirname file append nondir_replacement
-# Compute the dirname of FILE. If nonempty, add APPEND to the result,
-# otherwise set result to NONDIR_REPLACEMENT.
-func_dirname ()
-{
- func_dirname_result=`$ECHO "${1}" | $SED "$dirname"`
- if test "X$func_dirname_result" = "X${1}"; then
- func_dirname_result="${3}"
- else
- func_dirname_result="$func_dirname_result${2}"
- fi
-} # func_dirname may be replaced by extended shell implementation
-
-
-# func_basename file
-func_basename ()
-{
- func_basename_result=`$ECHO "${1}" | $SED "$basename"`
-} # func_basename may be replaced by extended shell implementation
-
-
-# func_dirname_and_basename file append nondir_replacement
-# perform func_basename and func_dirname in a single function
-# call:
-# dirname: Compute the dirname of FILE. If nonempty,
-# add APPEND to the result, otherwise set result
-# to NONDIR_REPLACEMENT.
-# value returned in "$func_dirname_result"
-# basename: Compute filename of FILE.
-# value retuned in "$func_basename_result"
-# Implementation must be kept synchronized with func_dirname
-# and func_basename. For efficiency, we do not delegate to
-# those functions but instead duplicate the functionality here.
-func_dirname_and_basename ()
-{
- # Extract subdirectory from the argument.
- func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"`
- if test "X$func_dirname_result" = "X${1}"; then
- func_dirname_result="${3}"
- else
- func_dirname_result="$func_dirname_result${2}"
- fi
- func_basename_result=`$ECHO "${1}" | $SED -e "$basename"`
-} # func_dirname_and_basename may be replaced by extended shell implementation
-
-
-# func_stripname prefix suffix name
-# strip PREFIX and SUFFIX off of NAME.
-# PREFIX and SUFFIX must not contain globbing or regex special
-# characters, hashes, percent signs, but SUFFIX may contain a leading
-# dot (in which case that matches only a dot).
-# func_strip_suffix prefix name
-func_stripname ()
-{
- case ${2} in
- .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
- *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
- esac
-} # func_stripname may be replaced by extended shell implementation
-
-
-# These SED scripts presuppose an absolute path with a trailing slash.
-pathcar='s,^/\([^/]*\).*$,\1,'
-pathcdr='s,^/[^/]*,,'
-removedotparts=':dotsl
- s@/\./@/@g
- t dotsl
- s,/\.$,/,'
-collapseslashes='s@/\{1,\}@/@g'
-finalslash='s,/*$,/,'
-
-# func_normal_abspath PATH
-# Remove doubled-up and trailing slashes, "." path components,
-# and cancel out any ".." path components in PATH after making
-# it an absolute path.
-# value returned in "$func_normal_abspath_result"
-func_normal_abspath ()
-{
- # Start from root dir and reassemble the path.
- func_normal_abspath_result=
- func_normal_abspath_tpath=$1
- func_normal_abspath_altnamespace=
- case $func_normal_abspath_tpath in
- "")
- # Empty path, that just means $cwd.
- func_stripname '' '/' "`pwd`"
- func_normal_abspath_result=$func_stripname_result
- return
- ;;
- # The next three entries are used to spot a run of precisely
- # two leading slashes without using negated character classes;
- # we take advantage of case's first-match behaviour.
- ///*)
- # Unusual form of absolute path, do nothing.
- ;;
- //*)
- # Not necessarily an ordinary path; POSIX reserves leading '//'
- # and for example Cygwin uses it to access remote file shares
- # over CIFS/SMB, so we conserve a leading double slash if found.
- func_normal_abspath_altnamespace=/
- ;;
- /*)
- # Absolute path, do nothing.
- ;;
- *)
- # Relative path, prepend $cwd.
- func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath
- ;;
- esac
- # Cancel out all the simple stuff to save iterations. We also want
- # the path to end with a slash for ease of parsing, so make sure
- # there is one (and only one) here.
- func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \
- -e "$removedotparts" -e "$collapseslashes" -e "$finalslash"`
- while :; do
- # Processed it all yet?
- if test "$func_normal_abspath_tpath" = / ; then
- # If we ascended to the root using ".." the result may be empty now.
- if test -z "$func_normal_abspath_result" ; then
- func_normal_abspath_result=/
- fi
- break
- fi
- func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \
- -e "$pathcar"`
- func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \
- -e "$pathcdr"`
- # Figure out what to do with it
- case $func_normal_abspath_tcomponent in
- "")
- # Trailing empty path component, ignore it.
- ;;
- ..)
- # Parent dir; strip last assembled component from result.
- func_dirname "$func_normal_abspath_result"
- func_normal_abspath_result=$func_dirname_result
- ;;
- *)
- # Actual path component, append it.
- func_normal_abspath_result=$func_normal_abspath_result/$func_normal_abspath_tcomponent
- ;;
- esac
- done
- # Restore leading double-slash if one was found on entry.
- func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result
-}
-
-# func_relative_path SRCDIR DSTDIR
-# generates a relative path from SRCDIR to DSTDIR, with a trailing
-# slash if non-empty, suitable for immediately appending a filename
-# without needing to append a separator.
-# value returned in "$func_relative_path_result"
-func_relative_path ()
-{
- func_relative_path_result=
- func_normal_abspath "$1"
- func_relative_path_tlibdir=$func_normal_abspath_result
- func_normal_abspath "$2"
- func_relative_path_tbindir=$func_normal_abspath_result
-
- # Ascend the tree starting from libdir
- while :; do
- # check if we have found a prefix of bindir
- case $func_relative_path_tbindir in
- $func_relative_path_tlibdir)
- # found an exact match
- func_relative_path_tcancelled=
- break
- ;;
- $func_relative_path_tlibdir*)
- # found a matching prefix
- func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir"
- func_relative_path_tcancelled=$func_stripname_result
- if test -z "$func_relative_path_result"; then
- func_relative_path_result=.
- fi
- break
- ;;
- *)
- func_dirname $func_relative_path_tlibdir
- func_relative_path_tlibdir=${func_dirname_result}
- if test "x$func_relative_path_tlibdir" = x ; then
- # Have to descend all the way to the root!
- func_relative_path_result=../$func_relative_path_result
- func_relative_path_tcancelled=$func_relative_path_tbindir
- break
- fi
- func_relative_path_result=../$func_relative_path_result
- ;;
- esac
- done
-
- # Now calculate path; take care to avoid doubling-up slashes.
- func_stripname '' '/' "$func_relative_path_result"
- func_relative_path_result=$func_stripname_result
- func_stripname '/' '/' "$func_relative_path_tcancelled"
- if test "x$func_stripname_result" != x ; then
- func_relative_path_result=${func_relative_path_result}/${func_stripname_result}
- fi
-
- # Normalisation. If bindir is libdir, return empty string,
- # else relative path ending with a slash; either way, target
- # file name can be directly appended.
- if test ! -z "$func_relative_path_result"; then
- func_stripname './' '' "$func_relative_path_result/"
- func_relative_path_result=$func_stripname_result
- fi
-}
-
-# The name of this program:
-func_dirname_and_basename "$progpath"
-progname=$func_basename_result
-
-# Make sure we have an absolute path for reexecution:
-case $progpath in
- [\\/]*|[A-Za-z]:\\*) ;;
- *[\\/]*)
- progdir=$func_dirname_result
- progdir=`cd "$progdir" && pwd`
- progpath="$progdir/$progname"
- ;;
- *)
- save_IFS="$IFS"
- IFS=${PATH_SEPARATOR-:}
- for progdir in $PATH; do
- IFS="$save_IFS"
- test -x "$progdir/$progname" && break
- done
- IFS="$save_IFS"
- test -n "$progdir" || progdir=`pwd`
- progpath="$progdir/$progname"
- ;;
-esac
-
-# Sed substitution that helps us do robust quoting. It backslashifies
-# metacharacters that are still active within double-quoted strings.
-Xsed="${SED}"' -e 1s/^X//'
-sed_quote_subst='s/\([`"$\\]\)/\\\1/g'
-
-# Same as above, but do not quote variable references.
-double_quote_subst='s/\(["`\\]\)/\\\1/g'
-
-# Sed substitution that turns a string into a regex matching for the
-# string literally.
-sed_make_literal_regex='s,[].[^$\\*\/],\\&,g'
-
-# Sed substitution that converts a w32 file name or path
-# which contains forward slashes, into one that contains
-# (escaped) backslashes. A very naive implementation.
-lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g'
-
-# Re-`\' parameter expansions in output of double_quote_subst that were
-# `\'-ed in input to the same. If an odd number of `\' preceded a '$'
-# in input to double_quote_subst, that '$' was protected from expansion.
-# Since each input `\' is now two `\'s, look for any number of runs of
-# four `\'s followed by two `\'s and then a '$'. `\' that '$'.
-bs='\\'
-bs2='\\\\'
-bs4='\\\\\\\\'
-dollar='\$'
-sed_double_backslash="\
- s/$bs4/&\\
-/g
- s/^$bs2$dollar/$bs&/
- s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g
- s/\n//g"
-
-# Standard options:
-opt_dry_run=false
-opt_help=false
-opt_quiet=false
-opt_verbose=false
-opt_warning=:
-
-# func_echo arg...
-# Echo program name prefixed message, along with the current mode
-# name if it has been set yet.
-func_echo ()
-{
- $ECHO "$progname: ${opt_mode+$opt_mode: }$*"
-}
-
-# func_verbose arg...
-# Echo program name prefixed message in verbose mode only.
-func_verbose ()
-{
- $opt_verbose && func_echo ${1+"$@"}
-
- # A bug in bash halts the script if the last line of a function
- # fails when set -e is in force, so we need another command to
- # work around that:
- :
-}
-
-# func_echo_all arg...
-# Invoke $ECHO with all args, space-separated.
-func_echo_all ()
-{
- $ECHO "$*"
-}
-
-# func_error arg...
-# Echo program name prefixed message to standard error.
-func_error ()
-{
- $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2
-}
-
-# func_warning arg...
-# Echo program name prefixed warning message to standard error.
-func_warning ()
-{
- $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2
-
- # bash bug again:
- :
-}
-
-# func_fatal_error arg...
-# Echo program name prefixed message to standard error, and exit.
-func_fatal_error ()
-{
- func_error ${1+"$@"}
- exit $EXIT_FAILURE
-}
-
-# func_fatal_help arg...
-# Echo program name prefixed message to standard error, followed by
-# a help hint, and exit.
-func_fatal_help ()
-{
- func_error ${1+"$@"}
- func_fatal_error "$help"
-}
-help="Try \`$progname --help' for more information." ## default
-
-
-# func_grep expression filename
-# Check whether EXPRESSION matches any line of FILENAME, without output.
-func_grep ()
-{
- $GREP "$1" "$2" >/dev/null 2>&1
-}
-
-
-# func_mkdir_p directory-path
-# Make sure the entire path to DIRECTORY-PATH is available.
-func_mkdir_p ()
-{
- my_directory_path="$1"
- my_dir_list=
-
- if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then
-
- # Protect directory names starting with `-'
- case $my_directory_path in
- -*) my_directory_path="./$my_directory_path" ;;
- esac
-
- # While some portion of DIR does not yet exist...
- while test ! -d "$my_directory_path"; do
- # ...make a list in topmost first order. Use a colon delimited
- # list incase some portion of path contains whitespace.
- my_dir_list="$my_directory_path:$my_dir_list"
-
- # If the last portion added has no slash in it, the list is done
- case $my_directory_path in */*) ;; *) break ;; esac
-
- # ...otherwise throw away the child directory and loop
- my_directory_path=`$ECHO "$my_directory_path" | $SED -e "$dirname"`
- done
- my_dir_list=`$ECHO "$my_dir_list" | $SED 's,:*$,,'`
-
- save_mkdir_p_IFS="$IFS"; IFS=':'
- for my_dir in $my_dir_list; do
- IFS="$save_mkdir_p_IFS"
- # mkdir can fail with a `File exist' error if two processes
- # try to create one of the directories concurrently. Don't
- # stop in that case!
- $MKDIR "$my_dir" 2>/dev/null || :
- done
- IFS="$save_mkdir_p_IFS"
-
- # Bail out if we (or some other process) failed to create a directory.
- test -d "$my_directory_path" || \
- func_fatal_error "Failed to create \`$1'"
- fi
-}
-
-
-# func_mktempdir [string]
-# Make a temporary directory that won't clash with other running
-# libtool processes, and avoids race conditions if possible. If
-# given, STRING is the basename for that directory.
-func_mktempdir ()
-{
- my_template="${TMPDIR-/tmp}/${1-$progname}"
-
- if test "$opt_dry_run" = ":"; then
- # Return a directory name, but don't create it in dry-run mode
- my_tmpdir="${my_template}-$$"
- else
-
- # If mktemp works, use that first and foremost
- my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null`
-
- if test ! -d "$my_tmpdir"; then
- # Failing that, at least try and use $RANDOM to avoid a race
- my_tmpdir="${my_template}-${RANDOM-0}$$"
-
- save_mktempdir_umask=`umask`
- umask 0077
- $MKDIR "$my_tmpdir"
- umask $save_mktempdir_umask
- fi
-
- # If we're not in dry-run mode, bomb out on failure
- test -d "$my_tmpdir" || \
- func_fatal_error "cannot create temporary directory \`$my_tmpdir'"
- fi
-
- $ECHO "$my_tmpdir"
-}
-
-
-# func_quote_for_eval arg
-# Aesthetically quote ARG to be evaled later.
-# This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT
-# is double-quoted, suitable for a subsequent eval, whereas
-# FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters
-# which are still active within double quotes backslashified.
-func_quote_for_eval ()
-{
- case $1 in
- *[\\\`\"\$]*)
- func_quote_for_eval_unquoted_result=`$ECHO "$1" | $SED "$sed_quote_subst"` ;;
- *)
- func_quote_for_eval_unquoted_result="$1" ;;
- esac
-
- case $func_quote_for_eval_unquoted_result in
- # Double-quote args containing shell metacharacters to delay
- # word splitting, command substitution and and variable
- # expansion for a subsequent eval.
- # Many Bourne shells cannot handle close brackets correctly
- # in scan sets, so we specify it separately.
- *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
- func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\""
- ;;
- *)
- func_quote_for_eval_result="$func_quote_for_eval_unquoted_result"
- esac
-}
-
-
-# func_quote_for_expand arg
-# Aesthetically quote ARG to be evaled later; same as above,
-# but do not quote variable references.
-func_quote_for_expand ()
-{
- case $1 in
- *[\\\`\"]*)
- my_arg=`$ECHO "$1" | $SED \
- -e "$double_quote_subst" -e "$sed_double_backslash"` ;;
- *)
- my_arg="$1" ;;
- esac
-
- case $my_arg in
- # Double-quote args containing shell metacharacters to delay
- # word splitting and command substitution for a subsequent eval.
- # Many Bourne shells cannot handle close brackets correctly
- # in scan sets, so we specify it separately.
- *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
- my_arg="\"$my_arg\""
- ;;
- esac
-
- func_quote_for_expand_result="$my_arg"
-}
-
-
-# func_show_eval cmd [fail_exp]
-# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is
-# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP
-# is given, then evaluate it.
-func_show_eval ()
-{
- my_cmd="$1"
- my_fail_exp="${2-:}"
-
- ${opt_silent-false} || {
- func_quote_for_expand "$my_cmd"
- eval "func_echo $func_quote_for_expand_result"
- }
-
- if ${opt_dry_run-false}; then :; else
- eval "$my_cmd"
- my_status=$?
- if test "$my_status" -eq 0; then :; else
- eval "(exit $my_status); $my_fail_exp"
- fi
- fi
-}
-
-
-# func_show_eval_locale cmd [fail_exp]
-# Unless opt_silent is true, then output CMD. Then, if opt_dryrun is
-# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP
-# is given, then evaluate it. Use the saved locale for evaluation.
-func_show_eval_locale ()
-{
- my_cmd="$1"
- my_fail_exp="${2-:}"
-
- ${opt_silent-false} || {
- func_quote_for_expand "$my_cmd"
- eval "func_echo $func_quote_for_expand_result"
- }
-
- if ${opt_dry_run-false}; then :; else
- eval "$lt_user_locale
- $my_cmd"
- my_status=$?
- eval "$lt_safe_locale"
- if test "$my_status" -eq 0; then :; else
- eval "(exit $my_status); $my_fail_exp"
- fi
- fi
-}
-
-# func_tr_sh
-# Turn $1 into a string suitable for a shell variable name.
-# Result is stored in $func_tr_sh_result. All characters
-# not in the set a-zA-Z0-9_ are replaced with '_'. Further,
-# if $1 begins with a digit, a '_' is prepended as well.
-func_tr_sh ()
-{
- case $1 in
- [0-9]* | *[!a-zA-Z0-9_]*)
- func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'`
- ;;
- * )
- func_tr_sh_result=$1
- ;;
- esac
-}
-
-
-# func_version
-# Echo version message to standard output and exit.
-func_version ()
-{
- $opt_debug
-
- $SED -n '/(C)/!b go
- :more
- /\./!{
- N
- s/\n# / /
- b more
- }
- :go
- /^# '$PROGRAM' (GNU /,/# warranty; / {
- s/^# //
- s/^# *$//
- s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/
- p
- }' < "$progpath"
- exit $?
-}
-
-# func_usage
-# Echo short help message to standard output and exit.
-func_usage ()
-{
- $opt_debug
-
- $SED -n '/^# Usage:/,/^# *.*--help/ {
- s/^# //
- s/^# *$//
- s/\$progname/'$progname'/
- p
- }' < "$progpath"
- echo
- $ECHO "run \`$progname --help | more' for full usage"
- exit $?
-}
-
-# func_help [NOEXIT]
-# Echo long help message to standard output and exit,
-# unless 'noexit' is passed as argument.
-func_help ()
-{
- $opt_debug
-
- $SED -n '/^# Usage:/,/# Report bugs to/ {
- :print
- s/^# //
- s/^# *$//
- s*\$progname*'$progname'*
- s*\$host*'"$host"'*
- s*\$SHELL*'"$SHELL"'*
- s*\$LTCC*'"$LTCC"'*
- s*\$LTCFLAGS*'"$LTCFLAGS"'*
- s*\$LD*'"$LD"'*
- s/\$with_gnu_ld/'"$with_gnu_ld"'/
- s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/
- s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/
- p
- d
- }
- /^# .* home page:/b print
- /^# General help using/b print
- ' < "$progpath"
- ret=$?
- if test -z "$1"; then
- exit $ret
- fi
-}
-
-# func_missing_arg argname
-# Echo program name prefixed message to standard error and set global
-# exit_cmd.
-func_missing_arg ()
-{
- $opt_debug
-
- func_error "missing argument for $1."
- exit_cmd=exit
-}
-
-
-# func_split_short_opt shortopt
-# Set func_split_short_opt_name and func_split_short_opt_arg shell
-# variables after splitting SHORTOPT after the 2nd character.
-func_split_short_opt ()
-{
- my_sed_short_opt='1s/^\(..\).*$/\1/;q'
- my_sed_short_rest='1s/^..\(.*\)$/\1/;q'
-
- func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"`
- func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"`
-} # func_split_short_opt may be replaced by extended shell implementation
-
-
-# func_split_long_opt longopt
-# Set func_split_long_opt_name and func_split_long_opt_arg shell
-# variables after splitting LONGOPT at the `=' sign.
-func_split_long_opt ()
-{
- my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q'
- my_sed_long_arg='1s/^--[^=]*=//'
-
- func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"`
- func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"`
-} # func_split_long_opt may be replaced by extended shell implementation
-
-exit_cmd=:
-
-
-
-
-
-magic="%%%MAGIC variable%%%"
-magic_exe="%%%MAGIC EXE variable%%%"
-
-# Global variables.
-nonopt=
-preserve_args=
-lo2o="s/\\.lo\$/.${objext}/"
-o2lo="s/\\.${objext}\$/.lo/"
-extracted_archives=
-extracted_serial=0
-
-# If this variable is set in any of the actions, the command in it
-# will be execed at the end. This prevents here-documents from being
-# left over by shells.
-exec_cmd=
-
-# func_append var value
-# Append VALUE to the end of shell variable VAR.
-func_append ()
-{
- eval "${1}=\$${1}\${2}"
-} # func_append may be replaced by extended shell implementation
-
-# func_append_quoted var value
-# Quote VALUE and append to the end of shell variable VAR, separated
-# by a space.
-func_append_quoted ()
-{
- func_quote_for_eval "${2}"
- eval "${1}=\$${1}\\ \$func_quote_for_eval_result"
-} # func_append_quoted may be replaced by extended shell implementation
-
-
-# func_arith arithmetic-term...
-func_arith ()
-{
- func_arith_result=`expr "${@}"`
-} # func_arith may be replaced by extended shell implementation
-
-
-# func_len string
-# STRING may not start with a hyphen.
-func_len ()
-{
- func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len`
-} # func_len may be replaced by extended shell implementation
-
-
-# func_lo2o object
-func_lo2o ()
-{
- func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"`
-} # func_lo2o may be replaced by extended shell implementation
-
-
-# func_xform libobj-or-source
-func_xform ()
-{
- func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'`
-} # func_xform may be replaced by extended shell implementation
-
-
-# func_fatal_configuration arg...
-# Echo program name prefixed message to standard error, followed by
-# a configuration failure hint, and exit.
-func_fatal_configuration ()
-{
- func_error ${1+"$@"}
- func_error "See the $PACKAGE documentation for more information."
- func_fatal_error "Fatal configuration error."
-}
-
-
-# func_config
-# Display the configuration for all the tags in this script.
-func_config ()
-{
- re_begincf='^# ### BEGIN LIBTOOL'
- re_endcf='^# ### END LIBTOOL'
-
- # Default configuration.
- $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath"
-
- # Now print the configurations for the tags.
- for tagname in $taglist; do
- $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath"
- done
-
- exit $?
-}
-
-# func_features
-# Display the features supported by this script.
-func_features ()
-{
- echo "host: $host"
- if test "$build_libtool_libs" = yes; then
- echo "enable shared libraries"
- else
- echo "disable shared libraries"
- fi
- if test "$build_old_libs" = yes; then
- echo "enable static libraries"
- else
- echo "disable static libraries"
- fi
-
- exit $?
-}
-
-# func_enable_tag tagname
-# Verify that TAGNAME is valid, and either flag an error and exit, or
-# enable the TAGNAME tag. We also add TAGNAME to the global $taglist
-# variable here.
-func_enable_tag ()
-{
- # Global variable:
- tagname="$1"
-
- re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$"
- re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$"
- sed_extractcf="/$re_begincf/,/$re_endcf/p"
-
- # Validate tagname.
- case $tagname in
- *[!-_A-Za-z0-9,/]*)
- func_fatal_error "invalid tag name: $tagname"
- ;;
- esac
-
- # Don't test for the "default" C tag, as we know it's
- # there but not specially marked.
- case $tagname in
- CC) ;;
- *)
- if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then
- taglist="$taglist $tagname"
-
- # Evaluate the configuration. Be careful to quote the path
- # and the sed script, to avoid splitting on whitespace, but
- # also don't use non-portable quotes within backquotes within
- # quotes we have to do it in 2 steps:
- extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"`
- eval "$extractedcf"
- else
- func_error "ignoring unknown tag $tagname"
- fi
- ;;
- esac
-}
-
-# func_check_version_match
-# Ensure that we are using m4 macros, and libtool script from the same
-# release of libtool.
-func_check_version_match ()
-{
- if test "$package_revision" != "$macro_revision"; then
- if test "$VERSION" != "$macro_version"; then
- if test -z "$macro_version"; then
- cat >&2 <<_LT_EOF
-$progname: Version mismatch error. This is $PACKAGE $VERSION, but the
-$progname: definition of this LT_INIT comes from an older release.
-$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
-$progname: and run autoconf again.
-_LT_EOF
- else
- cat >&2 <<_LT_EOF
-$progname: Version mismatch error. This is $PACKAGE $VERSION, but the
-$progname: definition of this LT_INIT comes from $PACKAGE $macro_version.
-$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION
-$progname: and run autoconf again.
-_LT_EOF
- fi
- else
- cat >&2 <<_LT_EOF
-$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision,
-$progname: but the definition of this LT_INIT comes from revision $macro_revision.
-$progname: You should recreate aclocal.m4 with macros from revision $package_revision
-$progname: of $PACKAGE $VERSION and run autoconf again.
-_LT_EOF
- fi
-
- exit $EXIT_MISMATCH
- fi
-}
-
-
-# Shorthand for --mode=foo, only valid as the first argument
-case $1 in
-clean|clea|cle|cl)
- shift; set dummy --mode clean ${1+"$@"}; shift
- ;;
-compile|compil|compi|comp|com|co|c)
- shift; set dummy --mode compile ${1+"$@"}; shift
- ;;
-execute|execut|execu|exec|exe|ex|e)
- shift; set dummy --mode execute ${1+"$@"}; shift
- ;;
-finish|finis|fini|fin|fi|f)
- shift; set dummy --mode finish ${1+"$@"}; shift
- ;;
-install|instal|insta|inst|ins|in|i)
- shift; set dummy --mode install ${1+"$@"}; shift
- ;;
-link|lin|li|l)
- shift; set dummy --mode link ${1+"$@"}; shift
- ;;
-uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u)
- shift; set dummy --mode uninstall ${1+"$@"}; shift
- ;;
-esac
-
-
-
-# Option defaults:
-opt_debug=:
-opt_dry_run=false
-opt_config=false
-opt_preserve_dup_deps=false
-opt_features=false
-opt_finish=false
-opt_help=false
-opt_help_all=false
-opt_silent=:
-opt_warning=:
-opt_verbose=:
-opt_silent=false
-opt_verbose=false
-
-
-# Parse options once, thoroughly. This comes as soon as possible in the
-# script to make things like `--version' happen as quickly as we can.
-{
- # this just eases exit handling
- while test $# -gt 0; do
- opt="$1"
- shift
- case $opt in
- --debug|-x) opt_debug='set -x'
- func_echo "enabling shell trace mode"
- $opt_debug
- ;;
- --dry-run|--dryrun|-n)
- opt_dry_run=:
- ;;
- --config)
- opt_config=:
-func_config
- ;;
- --dlopen|-dlopen)
- optarg="$1"
- opt_dlopen="${opt_dlopen+$opt_dlopen
-}$optarg"
- shift
- ;;
- --preserve-dup-deps)
- opt_preserve_dup_deps=:
- ;;
- --features)
- opt_features=:
-func_features
- ;;
- --finish)
- opt_finish=:
-set dummy --mode finish ${1+"$@"}; shift
- ;;
- --help)
- opt_help=:
- ;;
- --help-all)
- opt_help_all=:
-opt_help=': help-all'
- ;;
- --mode)
- test $# = 0 && func_missing_arg $opt && break
- optarg="$1"
- opt_mode="$optarg"
-case $optarg in
- # Valid mode arguments:
- clean|compile|execute|finish|install|link|relink|uninstall) ;;
-
- # Catch anything else as an error
- *) func_error "invalid argument for $opt"
- exit_cmd=exit
- break
- ;;
-esac
- shift
- ;;
- --no-silent|--no-quiet)
- opt_silent=false
-func_append preserve_args " $opt"
- ;;
- --no-warning|--no-warn)
- opt_warning=false
-func_append preserve_args " $opt"
- ;;
- --no-verbose)
- opt_verbose=false
-func_append preserve_args " $opt"
- ;;
- --silent|--quiet)
- opt_silent=:
-func_append preserve_args " $opt"
- opt_verbose=false
- ;;
- --verbose|-v)
- opt_verbose=:
-func_append preserve_args " $opt"
-opt_silent=false
- ;;
- --tag)
- test $# = 0 && func_missing_arg $opt && break
- optarg="$1"
- opt_tag="$optarg"
-func_append preserve_args " $opt $optarg"
-func_enable_tag "$optarg"
- shift
- ;;
-
- -\?|-h) func_usage ;;
- --help) func_help ;;
- --version) func_version ;;
-
- # Separate optargs to long options:
- --*=*)
- func_split_long_opt "$opt"
- set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"}
- shift
- ;;
-
- # Separate non-argument short options:
- -\?*|-h*|-n*|-v*)
- func_split_short_opt "$opt"
- set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"}
- shift
- ;;
-
- --) break ;;
- -*) func_fatal_help "unrecognized option \`$opt'" ;;
- *) set dummy "$opt" ${1+"$@"}; shift; break ;;
- esac
- done
-
- # Validate options:
-
- # save first non-option argument
- if test "$#" -gt 0; then
- nonopt="$opt"
- shift
- fi
-
- # preserve --debug
- test "$opt_debug" = : || func_append preserve_args " --debug"
-
- case $host in
- *cygwin* | *mingw* | *pw32* | *cegcc*)
- # don't eliminate duplications in $postdeps and $predeps
- opt_duplicate_compiler_generated_deps=:
- ;;
- *)
- opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps
- ;;
- esac
-
- $opt_help || {
- # Sanity checks first:
- func_check_version_match
-
- if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
- func_fatal_configuration "not configured to build any kind of library"
- fi
-
- # Darwin sucks
- eval std_shrext=\"$shrext_cmds\"
-
- # Only execute mode is allowed to have -dlopen flags.
- if test -n "$opt_dlopen" && test "$opt_mode" != execute; then
- func_error "unrecognized option \`-dlopen'"
- $ECHO "$help" 1>&2
- exit $EXIT_FAILURE
- fi
-
- # Change the help message to a mode-specific one.
- generic_help="$help"
- help="Try \`$progname --help --mode=$opt_mode' for more information."
- }
-
-
- # Bail if the options were screwed
- $exit_cmd $EXIT_FAILURE
-}
-
-
-
-
-## ----------- ##
-## Main. ##
-## ----------- ##
-
-# func_lalib_p file
-# True iff FILE is a libtool `.la' library or `.lo' object file.
-# This function is only a basic sanity check; it will hardly flush out
-# determined imposters.
-func_lalib_p ()
-{
- test -f "$1" &&
- $SED -e 4q "$1" 2>/dev/null \
- | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1
-}
-
-# func_lalib_unsafe_p file
-# True iff FILE is a libtool `.la' library or `.lo' object file.
-# This function implements the same check as func_lalib_p without
-# resorting to external programs. To this end, it redirects stdin and
-# closes it afterwards, without saving the original file descriptor.
-# As a safety measure, use it only where a negative result would be
-# fatal anyway. Works if `file' does not exist.
-func_lalib_unsafe_p ()
-{
- lalib_p=no
- if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then
- for lalib_p_l in 1 2 3 4
- do
- read lalib_p_line
- case "$lalib_p_line" in
- \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;;
- esac
- done
- exec 0<&5 5<&-
- fi
- test "$lalib_p" = yes
-}
-
-# func_ltwrapper_script_p file
-# True iff FILE is a libtool wrapper script
-# This function is only a basic sanity check; it will hardly flush out
-# determined imposters.
-func_ltwrapper_script_p ()
-{
- func_lalib_p "$1"
-}
-
-# func_ltwrapper_executable_p file
-# True iff FILE is a libtool wrapper executable
-# This function is only a basic sanity check; it will hardly flush out
-# determined imposters.
-func_ltwrapper_executable_p ()
-{
- func_ltwrapper_exec_suffix=
- case $1 in
- *.exe) ;;
- *) func_ltwrapper_exec_suffix=.exe ;;
- esac
- $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1
-}
-
-# func_ltwrapper_scriptname file
-# Assumes file is an ltwrapper_executable
-# uses $file to determine the appropriate filename for a
-# temporary ltwrapper_script.
-func_ltwrapper_scriptname ()
-{
- func_dirname_and_basename "$1" "" "."
- func_stripname '' '.exe' "$func_basename_result"
- func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper"
-}
-
-# func_ltwrapper_p file
-# True iff FILE is a libtool wrapper script or wrapper executable
-# This function is only a basic sanity check; it will hardly flush out
-# determined imposters.
-func_ltwrapper_p ()
-{
- func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1"
-}
-
-
-# func_execute_cmds commands fail_cmd
-# Execute tilde-delimited COMMANDS.
-# If FAIL_CMD is given, eval that upon failure.
-# FAIL_CMD may read-access the current command in variable CMD!
-func_execute_cmds ()
-{
- $opt_debug
- save_ifs=$IFS; IFS='~'
- for cmd in $1; do
- IFS=$save_ifs
- eval cmd=\"$cmd\"
- func_show_eval "$cmd" "${2-:}"
- done
- IFS=$save_ifs
-}
-
-
-# func_source file
-# Source FILE, adding directory component if necessary.
-# Note that it is not necessary on cygwin/mingw to append a dot to
-# FILE even if both FILE and FILE.exe exist: automatic-append-.exe
-# behavior happens only for exec(3), not for open(2)! Also, sourcing
-# `FILE.' does not work on cygwin managed mounts.
-func_source ()
-{
- $opt_debug
- case $1 in
- */* | *\\*) . "$1" ;;
- *) . "./$1" ;;
- esac
-}
-
-
-# func_resolve_sysroot PATH
-# Replace a leading = in PATH with a sysroot. Store the result into
-# func_resolve_sysroot_result
-func_resolve_sysroot ()
-{
- func_resolve_sysroot_result=$1
- case $func_resolve_sysroot_result in
- =*)
- func_stripname '=' '' "$func_resolve_sysroot_result"
- func_resolve_sysroot_result=$lt_sysroot$func_stripname_result
- ;;
- esac
-}
-
-# func_replace_sysroot PATH
-# If PATH begins with the sysroot, replace it with = and
-# store the result into func_replace_sysroot_result.
-func_replace_sysroot ()
-{
- case "$lt_sysroot:$1" in
- ?*:"$lt_sysroot"*)
- func_stripname "$lt_sysroot" '' "$1"
- func_replace_sysroot_result="=$func_stripname_result"
- ;;
- *)
- # Including no sysroot.
- func_replace_sysroot_result=$1
- ;;
- esac
-}
-
-# func_infer_tag arg
-# Infer tagged configuration to use if any are available and
-# if one wasn't chosen via the "--tag" command line option.
-# Only attempt this if the compiler in the base compile
-# command doesn't match the default compiler.
-# arg is usually of the form 'gcc ...'
-func_infer_tag ()
-{
- $opt_debug
- if test -n "$available_tags" && test -z "$tagname"; then
- CC_quoted=
- for arg in $CC; do
- func_append_quoted CC_quoted "$arg"
- done
- CC_expanded=`func_echo_all $CC`
- CC_quoted_expanded=`func_echo_all $CC_quoted`
- case $@ in
- # Blanks in the command may have been stripped by the calling shell,
- # but not from the CC environment variable when configure was run.
- " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \
- " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;;
- # Blanks at the start of $base_compile will cause this to fail
- # if we don't check for them as well.
- *)
- for z in $available_tags; do
- if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then
- # Evaluate the configuration.
- eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`"
- CC_quoted=
- for arg in $CC; do
- # Double-quote args containing other shell metacharacters.
- func_append_quoted CC_quoted "$arg"
- done
- CC_expanded=`func_echo_all $CC`
- CC_quoted_expanded=`func_echo_all $CC_quoted`
- case "$@ " in
- " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \
- " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*)
- # The compiler in the base compile command matches
- # the one in the tagged configuration.
- # Assume this is the tagged configuration we want.
- tagname=$z
- break
- ;;
- esac
- fi
- done
- # If $tagname still isn't set, then no tagged configuration
- # was found and let the user know that the "--tag" command
- # line option must be used.
- if test -z "$tagname"; then
- func_echo "unable to infer tagged configuration"
- func_fatal_error "specify a tag with \`--tag'"
-# else
-# func_verbose "using $tagname tagged configuration"
- fi
- ;;
- esac
- fi
-}
-
-
-
-# func_write_libtool_object output_name pic_name nonpic_name
-# Create a libtool object file (analogous to a ".la" file),
-# but don't create it if we're doing a dry run.
-func_write_libtool_object ()
-{
- write_libobj=${1}
- if test "$build_libtool_libs" = yes; then
- write_lobj=\'${2}\'
- else
- write_lobj=none
- fi
-
- if test "$build_old_libs" = yes; then
- write_oldobj=\'${3}\'
- else
- write_oldobj=none
- fi
-
- $opt_dry_run || {
- cat >${write_libobj}T <<EOF
-# $write_libobj - a libtool object file
-# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
-#
-# Please DO NOT delete this file!
-# It is necessary for linking the library.
-
-# Name of the PIC object.
-pic_object=$write_lobj
-
-# Name of the non-PIC object
-non_pic_object=$write_oldobj
-
-EOF
- $MV "${write_libobj}T" "${write_libobj}"
- }
-}
-
-
-##################################################
-# FILE NAME AND PATH CONVERSION HELPER FUNCTIONS #
-##################################################
-
-# func_convert_core_file_wine_to_w32 ARG
-# Helper function used by file name conversion functions when $build is *nix,
-# and $host is mingw, cygwin, or some other w32 environment. Relies on a
-# correctly configured wine environment available, with the winepath program
-# in $build's $PATH.
-#
-# ARG is the $build file name to be converted to w32 format.
-# Result is available in $func_convert_core_file_wine_to_w32_result, and will
-# be empty on error (or when ARG is empty)
-func_convert_core_file_wine_to_w32 ()
-{
- $opt_debug
- func_convert_core_file_wine_to_w32_result="$1"
- if test -n "$1"; then
- # Unfortunately, winepath does not exit with a non-zero error code, so we
- # are forced to check the contents of stdout. On the other hand, if the
- # command is not found, the shell will set an exit code of 127 and print
- # *an error message* to stdout. So we must check for both error code of
- # zero AND non-empty stdout, which explains the odd construction:
- func_convert_core_file_wine_to_w32_tmp=`winepath -w "$1" 2>/dev/null`
- if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then
- func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" |
- $SED -e "$lt_sed_naive_backslashify"`
- else
- func_convert_core_file_wine_to_w32_result=
- fi
- fi
-}
-# end: func_convert_core_file_wine_to_w32
-
-
-# func_convert_core_path_wine_to_w32 ARG
-# Helper function used by path conversion functions when $build is *nix, and
-# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly
-# configured wine environment available, with the winepath program in $build's
-# $PATH. Assumes ARG has no leading or trailing path separator characters.
-#
-# ARG is path to be converted from $build format to win32.
-# Result is available in $func_convert_core_path_wine_to_w32_result.
-# Unconvertible file (directory) names in ARG are skipped; if no directory names
-# are convertible, then the result may be empty.
-func_convert_core_path_wine_to_w32 ()
-{
- $opt_debug
- # unfortunately, winepath doesn't convert paths, only file names
- func_convert_core_path_wine_to_w32_result=""
- if test -n "$1"; then
- oldIFS=$IFS
- IFS=:
- for func_convert_core_path_wine_to_w32_f in $1; do
- IFS=$oldIFS
- func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f"
- if test -n "$func_convert_core_file_wine_to_w32_result" ; then
- if test -z "$func_convert_core_path_wine_to_w32_result"; then
- func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result"
- else
- func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result"
- fi
- fi
- done
- IFS=$oldIFS
- fi
-}
-# end: func_convert_core_path_wine_to_w32
-
-
-# func_cygpath ARGS...
-# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when
-# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2)
-# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or
-# (2), returns the Cygwin file name or path in func_cygpath_result (input
-# file name or path is assumed to be in w32 format, as previously converted
-# from $build's *nix or MSYS format). In case (3), returns the w32 file name
-# or path in func_cygpath_result (input file name or path is assumed to be in
-# Cygwin format). Returns an empty string on error.
-#
-# ARGS are passed to cygpath, with the last one being the file name or path to
-# be converted.
-#
-# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH
-# environment variable; do not put it in $PATH.
-func_cygpath ()
-{
- $opt_debug
- if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then
- func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null`
- if test "$?" -ne 0; then
- # on failure, ensure result is empty
- func_cygpath_result=
- fi
- else
- func_cygpath_result=
- func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'"
- fi
-}
-#end: func_cygpath
-
-
-# func_convert_core_msys_to_w32 ARG
-# Convert file name or path ARG from MSYS format to w32 format. Return
-# result in func_convert_core_msys_to_w32_result.
-func_convert_core_msys_to_w32 ()
-{
- $opt_debug
- # awkward: cmd appends spaces to result
- func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null |
- $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"`
-}
-#end: func_convert_core_msys_to_w32
-
-
-# func_convert_file_check ARG1 ARG2
-# Verify that ARG1 (a file name in $build format) was converted to $host
-# format in ARG2. Otherwise, emit an error message, but continue (resetting
-# func_to_host_file_result to ARG1).
-func_convert_file_check ()
-{
- $opt_debug
- if test -z "$2" && test -n "$1" ; then
- func_error "Could not determine host file name corresponding to"
- func_error " \`$1'"
- func_error "Continuing, but uninstalled executables may not work."
- # Fallback:
- func_to_host_file_result="$1"
- fi
-}
-# end func_convert_file_check
-
-
-# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH
-# Verify that FROM_PATH (a path in $build format) was converted to $host
-# format in TO_PATH. Otherwise, emit an error message, but continue, resetting
-# func_to_host_file_result to a simplistic fallback value (see below).
-func_convert_path_check ()
-{
- $opt_debug
- if test -z "$4" && test -n "$3"; then
- func_error "Could not determine the host path corresponding to"
- func_error " \`$3'"
- func_error "Continuing, but uninstalled executables may not work."
- # Fallback. This is a deliberately simplistic "conversion" and
- # should not be "improved". See libtool.info.
- if test "x$1" != "x$2"; then
- lt_replace_pathsep_chars="s|$1|$2|g"
- func_to_host_path_result=`echo "$3" |
- $SED -e "$lt_replace_pathsep_chars"`
- else
- func_to_host_path_result="$3"
- fi
- fi
-}
-# end func_convert_path_check
-
-
-# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG
-# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT
-# and appending REPL if ORIG matches BACKPAT.
-func_convert_path_front_back_pathsep ()
-{
- $opt_debug
- case $4 in
- $1 ) func_to_host_path_result="$3$func_to_host_path_result"
- ;;
- esac
- case $4 in
- $2 ) func_append func_to_host_path_result "$3"
- ;;
- esac
-}
-# end func_convert_path_front_back_pathsep
-
-
-##################################################
-# $build to $host FILE NAME CONVERSION FUNCTIONS #
-##################################################
-# invoked via `$to_host_file_cmd ARG'
-#
-# In each case, ARG is the path to be converted from $build to $host format.
-# Result will be available in $func_to_host_file_result.
-
-
-# func_to_host_file ARG
-# Converts the file name ARG from $build format to $host format. Return result
-# in func_to_host_file_result.
-func_to_host_file ()
-{
- $opt_debug
- $to_host_file_cmd "$1"
-}
-# end func_to_host_file
-
-
-# func_to_tool_file ARG LAZY
-# converts the file name ARG from $build format to toolchain format. Return
-# result in func_to_tool_file_result. If the conversion in use is listed
-# in (the comma separated) LAZY, no conversion takes place.
-func_to_tool_file ()
-{
- $opt_debug
- case ,$2, in
- *,"$to_tool_file_cmd",*)
- func_to_tool_file_result=$1
- ;;
- *)
- $to_tool_file_cmd "$1"
- func_to_tool_file_result=$func_to_host_file_result
- ;;
- esac
-}
-# end func_to_tool_file
-
-
-# func_convert_file_noop ARG
-# Copy ARG to func_to_host_file_result.
-func_convert_file_noop ()
-{
- func_to_host_file_result="$1"
-}
-# end func_convert_file_noop
-
-
-# func_convert_file_msys_to_w32 ARG
-# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic
-# conversion to w32 is not available inside the cwrapper. Returns result in
-# func_to_host_file_result.
-func_convert_file_msys_to_w32 ()
-{
- $opt_debug
- func_to_host_file_result="$1"
- if test -n "$1"; then
- func_convert_core_msys_to_w32 "$1"
- func_to_host_file_result="$func_convert_core_msys_to_w32_result"
- fi
- func_convert_file_check "$1" "$func_to_host_file_result"
-}
-# end func_convert_file_msys_to_w32
-
-
-# func_convert_file_cygwin_to_w32 ARG
-# Convert file name ARG from Cygwin to w32 format. Returns result in
-# func_to_host_file_result.
-func_convert_file_cygwin_to_w32 ()
-{
- $opt_debug
- func_to_host_file_result="$1"
- if test -n "$1"; then
- # because $build is cygwin, we call "the" cygpath in $PATH; no need to use
- # LT_CYGPATH in this case.
- func_to_host_file_result=`cygpath -m "$1"`
- fi
- func_convert_file_check "$1" "$func_to_host_file_result"
-}
-# end func_convert_file_cygwin_to_w32
-
-
-# func_convert_file_nix_to_w32 ARG
-# Convert file name ARG from *nix to w32 format. Requires a wine environment
-# and a working winepath. Returns result in func_to_host_file_result.
-func_convert_file_nix_to_w32 ()
-{
- $opt_debug
- func_to_host_file_result="$1"
- if test -n "$1"; then
- func_convert_core_file_wine_to_w32 "$1"
- func_to_host_file_result="$func_convert_core_file_wine_to_w32_result"
- fi
- func_convert_file_check "$1" "$func_to_host_file_result"
-}
-# end func_convert_file_nix_to_w32
-
-
-# func_convert_file_msys_to_cygwin ARG
-# Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set.
-# Returns result in func_to_host_file_result.
-func_convert_file_msys_to_cygwin ()
-{
- $opt_debug
- func_to_host_file_result="$1"
- if test -n "$1"; then
- func_convert_core_msys_to_w32 "$1"
- func_cygpath -u "$func_convert_core_msys_to_w32_result"
- func_to_host_file_result="$func_cygpath_result"
- fi
- func_convert_file_check "$1" "$func_to_host_file_result"
-}
-# end func_convert_file_msys_to_cygwin
-
-
-# func_convert_file_nix_to_cygwin ARG
-# Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed
-# in a wine environment, working winepath, and LT_CYGPATH set. Returns result
-# in func_to_host_file_result.
-func_convert_file_nix_to_cygwin ()
-{
- $opt_debug
- func_to_host_file_result="$1"
- if test -n "$1"; then
- # convert from *nix to w32, then use cygpath to convert from w32 to cygwin.
- func_convert_core_file_wine_to_w32 "$1"
- func_cygpath -u "$func_convert_core_file_wine_to_w32_result"
- func_to_host_file_result="$func_cygpath_result"
- fi
- func_convert_file_check "$1" "$func_to_host_file_result"
-}
-# end func_convert_file_nix_to_cygwin
-
-
-#############################################
-# $build to $host PATH CONVERSION FUNCTIONS #
-#############################################
-# invoked via `$to_host_path_cmd ARG'
-#
-# In each case, ARG is the path to be converted from $build to $host format.
-# The result will be available in $func_to_host_path_result.
-#
-# Path separators are also converted from $build format to $host format. If
-# ARG begins or ends with a path separator character, it is preserved (but
-# converted to $host format) on output.
-#
-# All path conversion functions are named using the following convention:
-# file name conversion function : func_convert_file_X_to_Y ()
-# path conversion function : func_convert_path_X_to_Y ()
-# where, for any given $build/$host combination the 'X_to_Y' value is the
-# same. If conversion functions are added for new $build/$host combinations,
-# the two new functions must follow this pattern, or func_init_to_host_path_cmd
-# will break.
-
-
-# func_init_to_host_path_cmd
-# Ensures that function "pointer" variable $to_host_path_cmd is set to the
-# appropriate value, based on the value of $to_host_file_cmd.
-to_host_path_cmd=
-func_init_to_host_path_cmd ()
-{
- $opt_debug
- if test -z "$to_host_path_cmd"; then
- func_stripname 'func_convert_file_' '' "$to_host_file_cmd"
- to_host_path_cmd="func_convert_path_${func_stripname_result}"
- fi
-}
-
-
-# func_to_host_path ARG
-# Converts the path ARG from $build format to $host format. Return result
-# in func_to_host_path_result.
-func_to_host_path ()
-{
- $opt_debug
- func_init_to_host_path_cmd
- $to_host_path_cmd "$1"
-}
-# end func_to_host_path
-
-
-# func_convert_path_noop ARG
-# Copy ARG to func_to_host_path_result.
-func_convert_path_noop ()
-{
- func_to_host_path_result="$1"
-}
-# end func_convert_path_noop
-
-
-# func_convert_path_msys_to_w32 ARG
-# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic
-# conversion to w32 is not available inside the cwrapper. Returns result in
-# func_to_host_path_result.
-func_convert_path_msys_to_w32 ()
-{
- $opt_debug
- func_to_host_path_result="$1"
- if test -n "$1"; then
- # Remove leading and trailing path separator characters from ARG. MSYS
- # behavior is inconsistent here; cygpath turns them into '.;' and ';.';
- # and winepath ignores them completely.
- func_stripname : : "$1"
- func_to_host_path_tmp1=$func_stripname_result
- func_convert_core_msys_to_w32 "$func_to_host_path_tmp1"
- func_to_host_path_result="$func_convert_core_msys_to_w32_result"
- func_convert_path_check : ";" \
- "$func_to_host_path_tmp1" "$func_to_host_path_result"
- func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
- fi
-}
-# end func_convert_path_msys_to_w32
-
-
-# func_convert_path_cygwin_to_w32 ARG
-# Convert path ARG from Cygwin to w32 format. Returns result in
-# func_to_host_file_result.
-func_convert_path_cygwin_to_w32 ()
-{
- $opt_debug
- func_to_host_path_result="$1"
- if test -n "$1"; then
- # See func_convert_path_msys_to_w32:
- func_stripname : : "$1"
- func_to_host_path_tmp1=$func_stripname_result
- func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"`
- func_convert_path_check : ";" \
- "$func_to_host_path_tmp1" "$func_to_host_path_result"
- func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
- fi
-}
-# end func_convert_path_cygwin_to_w32
-
-
-# func_convert_path_nix_to_w32 ARG
-# Convert path ARG from *nix to w32 format. Requires a wine environment and
-# a working winepath. Returns result in func_to_host_file_result.
-func_convert_path_nix_to_w32 ()
-{
- $opt_debug
- func_to_host_path_result="$1"
- if test -n "$1"; then
- # See func_convert_path_msys_to_w32:
- func_stripname : : "$1"
- func_to_host_path_tmp1=$func_stripname_result
- func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1"
- func_to_host_path_result="$func_convert_core_path_wine_to_w32_result"
- func_convert_path_check : ";" \
- "$func_to_host_path_tmp1" "$func_to_host_path_result"
- func_convert_path_front_back_pathsep ":*" "*:" ";" "$1"
- fi
-}
-# end func_convert_path_nix_to_w32
-
-
-# func_convert_path_msys_to_cygwin ARG
-# Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set.
-# Returns result in func_to_host_file_result.
-func_convert_path_msys_to_cygwin ()
-{
- $opt_debug
- func_to_host_path_result="$1"
- if test -n "$1"; then
- # See func_convert_path_msys_to_w32:
- func_stripname : : "$1"
- func_to_host_path_tmp1=$func_stripname_result
- func_convert_core_msys_to_w32 "$func_to_host_path_tmp1"
- func_cygpath -u -p "$func_convert_core_msys_to_w32_result"
- func_to_host_path_result="$func_cygpath_result"
- func_convert_path_check : : \
- "$func_to_host_path_tmp1" "$func_to_host_path_result"
- func_convert_path_front_back_pathsep ":*" "*:" : "$1"
- fi
-}
-# end func_convert_path_msys_to_cygwin
-
-
-# func_convert_path_nix_to_cygwin ARG
-# Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a
-# a wine environment, working winepath, and LT_CYGPATH set. Returns result in
-# func_to_host_file_result.
-func_convert_path_nix_to_cygwin ()
-{
- $opt_debug
- func_to_host_path_result="$1"
- if test -n "$1"; then
- # Remove leading and trailing path separator characters from
- # ARG. msys behavior is inconsistent here, cygpath turns them
- # into '.;' and ';.', and winepath ignores them completely.
- func_stripname : : "$1"
- func_to_host_path_tmp1=$func_stripname_result
- func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1"
- func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result"
- func_to_host_path_result="$func_cygpath_result"
- func_convert_path_check : : \
- "$func_to_host_path_tmp1" "$func_to_host_path_result"
- func_convert_path_front_back_pathsep ":*" "*:" : "$1"
- fi
-}
-# end func_convert_path_nix_to_cygwin
-
-
-# func_mode_compile arg...
-func_mode_compile ()
-{
- $opt_debug
- # Get the compilation command and the source file.
- base_compile=
- srcfile="$nonopt" # always keep a non-empty value in "srcfile"
- suppress_opt=yes
- suppress_output=
- arg_mode=normal
- libobj=
- later=
- pie_flag=
-
- for arg
- do
- case $arg_mode in
- arg )
- # do not "continue". Instead, add this to base_compile
- lastarg="$arg"
- arg_mode=normal
- ;;
-
- target )
- libobj="$arg"
- arg_mode=normal
- continue
- ;;
-
- normal )
- # Accept any command-line options.
- case $arg in
- -o)
- test -n "$libobj" && \
- func_fatal_error "you cannot specify \`-o' more than once"
- arg_mode=target
- continue
- ;;
-
- -pie | -fpie | -fPIE)
- func_append pie_flag " $arg"
- continue
- ;;
-
- -shared | -static | -prefer-pic | -prefer-non-pic)
- func_append later " $arg"
- continue
- ;;
-
- -no-suppress)
- suppress_opt=no
- continue
- ;;
-
- -Xcompiler)
- arg_mode=arg # the next one goes into the "base_compile" arg list
- continue # The current "srcfile" will either be retained or
- ;; # replaced later. I would guess that would be a bug.
-
- -Wc,*)
- func_stripname '-Wc,' '' "$arg"
- args=$func_stripname_result
- lastarg=
- save_ifs="$IFS"; IFS=','
- for arg in $args; do
- IFS="$save_ifs"
- func_append_quoted lastarg "$arg"
- done
- IFS="$save_ifs"
- func_stripname ' ' '' "$lastarg"
- lastarg=$func_stripname_result
-
- # Add the arguments to base_compile.
- func_append base_compile " $lastarg"
- continue
- ;;
-
- *)
- # Accept the current argument as the source file.
- # The previous "srcfile" becomes the current argument.
- #
- lastarg="$srcfile"
- srcfile="$arg"
- ;;
- esac # case $arg
- ;;
- esac # case $arg_mode
-
- # Aesthetically quote the previous argument.
- func_append_quoted base_compile "$lastarg"
- done # for arg
-
- case $arg_mode in
- arg)
- func_fatal_error "you must specify an argument for -Xcompile"
- ;;
- target)
- func_fatal_error "you must specify a target with \`-o'"
- ;;
- *)
- # Get the name of the library object.
- test -z "$libobj" && {
- func_basename "$srcfile"
- libobj="$func_basename_result"
- }
- ;;
- esac
-
- # Recognize several different file suffixes.
- # If the user specifies -o file.o, it is replaced with file.lo
- case $libobj in
- *.[cCFSifmso] | \
- *.ada | *.adb | *.ads | *.asm | \
- *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \
- *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup)
- func_xform "$libobj"
- libobj=$func_xform_result
- ;;
- esac
-
- case $libobj in
- *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;;
- *)
- func_fatal_error "cannot determine name of library object from \`$libobj'"
- ;;
- esac
-
- func_infer_tag $base_compile
-
- for arg in $later; do
- case $arg in
- -shared)
- test "$build_libtool_libs" != yes && \
- func_fatal_configuration "can not build a shared library"
- build_old_libs=no
- continue
- ;;
-
- -static)
- build_libtool_libs=no
- build_old_libs=yes
- continue
- ;;
-
- -prefer-pic)
- pic_mode=yes
- continue
- ;;
-
- -prefer-non-pic)
- pic_mode=no
- continue
- ;;
- esac
- done
-
- func_quote_for_eval "$libobj"
- test "X$libobj" != "X$func_quote_for_eval_result" \
- && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \
- && func_warning "libobj name \`$libobj' may not contain shell special characters."
- func_dirname_and_basename "$obj" "/" ""
- objname="$func_basename_result"
- xdir="$func_dirname_result"
- lobj=${xdir}$objdir/$objname
-
- test -z "$base_compile" && \
- func_fatal_help "you must specify a compilation command"
-
- # Delete any leftover library objects.
- if test "$build_old_libs" = yes; then
- removelist="$obj $lobj $libobj ${libobj}T"
- else
- removelist="$lobj $libobj ${libobj}T"
- fi
-
- # On Cygwin there's no "real" PIC flag so we must build both object types
- case $host_os in
- cygwin* | mingw* | pw32* | os2* | cegcc*)
- pic_mode=default
- ;;
- esac
- if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then
- # non-PIC code in shared libraries is not supported
- pic_mode=default
- fi
-
- # Calculate the filename of the output object if compiler does
- # not support -o with -c
- if test "$compiler_c_o" = no; then
- output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.${objext}
- lockfile="$output_obj.lock"
- else
- output_obj=
- need_locks=no
- lockfile=
- fi
-
- # Lock this critical section if it is needed
- # We use this script file to make the link, it avoids creating a new file
- if test "$need_locks" = yes; then
- until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do
- func_echo "Waiting for $lockfile to be removed"
- sleep 2
- done
- elif test "$need_locks" = warn; then
- if test -f "$lockfile"; then
- $ECHO "\
-*** ERROR, $lockfile exists and contains:
-`cat $lockfile 2>/dev/null`
-
-This indicates that another process is trying to use the same
-temporary object file, and libtool could not work around it because
-your compiler does not support \`-c' and \`-o' together. If you
-repeat this compilation, it may succeed, by chance, but you had better
-avoid parallel builds (make -j) in this platform, or get a better
-compiler."
-
- $opt_dry_run || $RM $removelist
- exit $EXIT_FAILURE
- fi
- func_append removelist " $output_obj"
- $ECHO "$srcfile" > "$lockfile"
- fi
-
- $opt_dry_run || $RM $removelist
- func_append removelist " $lockfile"
- trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15
-
- func_to_tool_file "$srcfile" func_convert_file_msys_to_w32
- srcfile=$func_to_tool_file_result
- func_quote_for_eval "$srcfile"
- qsrcfile=$func_quote_for_eval_result
-
- # Only build a PIC object if we are building libtool libraries.
- if test "$build_libtool_libs" = yes; then
- # Without this assignment, base_compile gets emptied.
- fbsd_hideous_sh_bug=$base_compile
-
- if test "$pic_mode" != no; then
- command="$base_compile $qsrcfile $pic_flag"
- else
- # Don't build PIC code
- command="$base_compile $qsrcfile"
- fi
-
- func_mkdir_p "$xdir$objdir"
-
- if test -z "$output_obj"; then
- # Place PIC objects in $objdir
- func_append command " -o $lobj"
- fi
-
- func_show_eval_locale "$command" \
- 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE'
-
- if test "$need_locks" = warn &&
- test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
- $ECHO "\
-*** ERROR, $lockfile contains:
-`cat $lockfile 2>/dev/null`
-
-but it should contain:
-$srcfile
-
-This indicates that another process is trying to use the same
-temporary object file, and libtool could not work around it because
-your compiler does not support \`-c' and \`-o' together. If you
-repeat this compilation, it may succeed, by chance, but you had better
-avoid parallel builds (make -j) in this platform, or get a better
-compiler."
-
- $opt_dry_run || $RM $removelist
- exit $EXIT_FAILURE
- fi
-
- # Just move the object if needed, then go on to compile the next one
- if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then
- func_show_eval '$MV "$output_obj" "$lobj"' \
- 'error=$?; $opt_dry_run || $RM $removelist; exit $error'
- fi
-
- # Allow error messages only from the first compilation.
- if test "$suppress_opt" = yes; then
- suppress_output=' >/dev/null 2>&1'
- fi
- fi
-
- # Only build a position-dependent object if we build old libraries.
- if test "$build_old_libs" = yes; then
- if test "$pic_mode" != yes; then
- # Don't build PIC code
- command="$base_compile $qsrcfile$pie_flag"
- else
- command="$base_compile $qsrcfile $pic_flag"
- fi
- if test "$compiler_c_o" = yes; then
- func_append command " -o $obj"
- fi
-
- # Suppress compiler output if we already did a PIC compilation.
- func_append command "$suppress_output"
- func_show_eval_locale "$command" \
- '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE'
-
- if test "$need_locks" = warn &&
- test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
- $ECHO "\
-*** ERROR, $lockfile contains:
-`cat $lockfile 2>/dev/null`
-
-but it should contain:
-$srcfile
-
-This indicates that another process is trying to use the same
-temporary object file, and libtool could not work around it because
-your compiler does not support \`-c' and \`-o' together. If you
-repeat this compilation, it may succeed, by chance, but you had better
-avoid parallel builds (make -j) in this platform, or get a better
-compiler."
-
- $opt_dry_run || $RM $removelist
- exit $EXIT_FAILURE
- fi
-
- # Just move the object if needed
- if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then
- func_show_eval '$MV "$output_obj" "$obj"' \
- 'error=$?; $opt_dry_run || $RM $removelist; exit $error'
- fi
- fi
-
- $opt_dry_run || {
- func_write_libtool_object "$libobj" "$objdir/$objname" "$objname"
-
- # Unlock the critical section if it was locked
- if test "$need_locks" != no; then
- removelist=$lockfile
- $RM "$lockfile"
- fi
- }
-
- exit $EXIT_SUCCESS
-}
-
-$opt_help || {
- test "$opt_mode" = compile && func_mode_compile ${1+"$@"}
-}
-
-func_mode_help ()
-{
- # We need to display help for each of the modes.
- case $opt_mode in
- "")
- # Generic help is extracted from the usage comments
- # at the start of this file.
- func_help
- ;;
-
- clean)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
-
-Remove files from the build directory.
-
-RM is the name of the program to use to delete files associated with each FILE
-(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed
-to RM.
-
-If FILE is a libtool library, object or program, all the files associated
-with it are deleted. Otherwise, only FILE itself is deleted using RM."
- ;;
-
- compile)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
-
-Compile a source file into a libtool library object.
-
-This mode accepts the following additional options:
-
- -o OUTPUT-FILE set the output file name to OUTPUT-FILE
- -no-suppress do not suppress compiler output for multiple passes
- -prefer-pic try to build PIC objects only
- -prefer-non-pic try to build non-PIC objects only
- -shared do not build a \`.o' file suitable for static linking
- -static only build a \`.o' file suitable for static linking
- -Wc,FLAG pass FLAG directly to the compiler
-
-COMPILE-COMMAND is a command to be used in creating a \`standard' object file
-from the given SOURCEFILE.
-
-The output file name is determined by removing the directory component from
-SOURCEFILE, then substituting the C source code suffix \`.c' with the
-library object suffix, \`.lo'."
- ;;
-
- execute)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]...
-
-Automatically set library path, then run a program.
-
-This mode accepts the following additional options:
-
- -dlopen FILE add the directory containing FILE to the library path
-
-This mode sets the library path environment variable according to \`-dlopen'
-flags.
-
-If any of the ARGS are libtool executable wrappers, then they are translated
-into their corresponding uninstalled binary, and any of their required library
-directories are added to the library path.
-
-Then, COMMAND is executed, with ARGS as arguments."
- ;;
-
- finish)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=finish [LIBDIR]...
-
-Complete the installation of libtool libraries.
-
-Each LIBDIR is a directory that contains libtool libraries.
-
-The commands that this mode executes may require superuser privileges. Use
-the \`--dry-run' option if you just want to see what would be executed."
- ;;
-
- install)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND...
-
-Install executables or libraries.
-
-INSTALL-COMMAND is the installation command. The first component should be
-either the \`install' or \`cp' program.
-
-The following components of INSTALL-COMMAND are treated specially:
-
- -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation
-
-The rest of the components are interpreted as arguments to that command (only
-BSD-compatible install options are recognized)."
- ;;
-
- link)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=link LINK-COMMAND...
-
-Link object files or libraries together to form another library, or to
-create an executable program.
-
-LINK-COMMAND is a command using the C compiler that you would use to create
-a program from several object files.
-
-The following components of LINK-COMMAND are treated specially:
-
- -all-static do not do any dynamic linking at all
- -avoid-version do not add a version suffix if possible
- -bindir BINDIR specify path to binaries directory (for systems where
- libraries must be found in the PATH setting at runtime)
- -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime
- -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols
- -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
- -export-symbols SYMFILE
- try to export only the symbols listed in SYMFILE
- -export-symbols-regex REGEX
- try to export only the symbols matching REGEX
- -LLIBDIR search LIBDIR for required installed libraries
- -lNAME OUTPUT-FILE requires the installed library libNAME
- -module build a library that can dlopened
- -no-fast-install disable the fast-install mode
- -no-install link a not-installable executable
- -no-undefined declare that a library does not refer to external symbols
- -o OUTPUT-FILE create OUTPUT-FILE from the specified objects
- -objectlist FILE Use a list of object files found in FILE to specify objects
- -precious-files-regex REGEX
- don't remove output files matching REGEX
- -release RELEASE specify package release information
- -rpath LIBDIR the created library will eventually be installed in LIBDIR
- -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries
- -shared only do dynamic linking of libtool libraries
- -shrext SUFFIX override the standard shared library file extension
- -static do not do any dynamic linking of uninstalled libtool libraries
- -static-libtool-libs
- do not do any dynamic linking of libtool libraries
- -version-info CURRENT[:REVISION[:AGE]]
- specify library version info [each variable defaults to 0]
- -weak LIBNAME declare that the target provides the LIBNAME interface
- -Wc,FLAG
- -Xcompiler FLAG pass linker-specific FLAG directly to the compiler
- -Wl,FLAG
- -Xlinker FLAG pass linker-specific FLAG directly to the linker
- -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC)
-
-All other options (arguments beginning with \`-') are ignored.
-
-Every other argument is treated as a filename. Files ending in \`.la' are
-treated as uninstalled libtool libraries, other files are standard or library
-object files.
-
-If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
-only library objects (\`.lo' files) may be specified, and \`-rpath' is
-required, except when creating a convenience library.
-
-If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
-using \`ar' and \`ranlib', or on Windows using \`lib'.
-
-If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
-is created, otherwise an executable program is created."
- ;;
-
- uninstall)
- $ECHO \
-"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
-
-Remove libraries from an installation directory.
-
-RM is the name of the program to use to delete files associated with each FILE
-(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed
-to RM.
-
-If FILE is a libtool library, all the files associated with it are deleted.
-Otherwise, only FILE itself is deleted using RM."
- ;;
-
- *)
- func_fatal_help "invalid operation mode \`$opt_mode'"
- ;;
- esac
-
- echo
- $ECHO "Try \`$progname --help' for more information about other modes."
-}
-
-# Now that we've collected a possible --mode arg, show help if necessary
-if $opt_help; then
- if test "$opt_help" = :; then
- func_mode_help
- else
- {
- func_help noexit
- for opt_mode in compile link execute install finish uninstall clean; do
- func_mode_help
- done
- } | sed -n '1p; 2,$s/^Usage:/ or: /p'
- {
- func_help noexit
- for opt_mode in compile link execute install finish uninstall clean; do
- echo
- func_mode_help
- done
- } |
- sed '1d
- /^When reporting/,/^Report/{
- H
- d
- }
- $x
- /information about other modes/d
- /more detailed .*MODE/d
- s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/'
- fi
- exit $?
-fi
-
-
-# func_mode_execute arg...
-func_mode_execute ()
-{
- $opt_debug
- # The first argument is the command name.
- cmd="$nonopt"
- test -z "$cmd" && \
- func_fatal_help "you must specify a COMMAND"
-
- # Handle -dlopen flags immediately.
- for file in $opt_dlopen; do
- test -f "$file" \
- || func_fatal_help "\`$file' is not a file"
-
- dir=
- case $file in
- *.la)
- func_resolve_sysroot "$file"
- file=$func_resolve_sysroot_result
-
- # Check to see that this really is a libtool archive.
- func_lalib_unsafe_p "$file" \
- || func_fatal_help "\`$lib' is not a valid libtool archive"
-
- # Read the libtool library.
- dlname=
- library_names=
- func_source "$file"
-
- # Skip this library if it cannot be dlopened.
- if test -z "$dlname"; then
- # Warn if it was a shared library.
- test -n "$library_names" && \
- func_warning "\`$file' was not linked with \`-export-dynamic'"
- continue
- fi
-
- func_dirname "$file" "" "."
- dir="$func_dirname_result"
-
- if test -f "$dir/$objdir/$dlname"; then
- func_append dir "/$objdir"
- else
- if test ! -f "$dir/$dlname"; then
- func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'"
- fi
- fi
- ;;
-
- *.lo)
- # Just add the directory containing the .lo file.
- func_dirname "$file" "" "."
- dir="$func_dirname_result"
- ;;
-
- *)
- func_warning "\`-dlopen' is ignored for non-libtool libraries and objects"
- continue
- ;;
- esac
-
- # Get the absolute pathname.
- absdir=`cd "$dir" && pwd`
- test -n "$absdir" && dir="$absdir"
-
- # Now add the directory to shlibpath_var.
- if eval "test -z \"\$$shlibpath_var\""; then
- eval "$shlibpath_var=\"\$dir\""
- else
- eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
- fi
- done
-
- # This variable tells wrapper scripts just to set shlibpath_var
- # rather than running their programs.
- libtool_execute_magic="$magic"
-
- # Check if any of the arguments is a wrapper script.
- args=
- for file
- do
- case $file in
- -* | *.la | *.lo ) ;;
- *)
- # Do a test to see if this is really a libtool program.
- if func_ltwrapper_script_p "$file"; then
- func_source "$file"
- # Transform arg to wrapped name.
- file="$progdir/$program"
- elif func_ltwrapper_executable_p "$file"; then
- func_ltwrapper_scriptname "$file"
- func_source "$func_ltwrapper_scriptname_result"
- # Transform arg to wrapped name.
- file="$progdir/$program"
- fi
- ;;
- esac
- # Quote arguments (to preserve shell metacharacters).
- func_append_quoted args "$file"
- done
-
- if test "X$opt_dry_run" = Xfalse; then
- if test -n "$shlibpath_var"; then
- # Export the shlibpath_var.
- eval "export $shlibpath_var"
- fi
-
- # Restore saved environment variables
- for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES
- do
- eval "if test \"\${save_$lt_var+set}\" = set; then
- $lt_var=\$save_$lt_var; export $lt_var
- else
- $lt_unset $lt_var
- fi"
- done
-
- # Now prepare to actually exec the command.
- exec_cmd="\$cmd$args"
- else
- # Display what would be done.
- if test -n "$shlibpath_var"; then
- eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\""
- echo "export $shlibpath_var"
- fi
- $ECHO "$cmd$args"
- exit $EXIT_SUCCESS
- fi
-}
-
-test "$opt_mode" = execute && func_mode_execute ${1+"$@"}
-
-
-# func_mode_finish arg...
-func_mode_finish ()
-{
- $opt_debug
- libs=
- libdirs=
- admincmds=
-
- for opt in "$nonopt" ${1+"$@"}
- do
- if test -d "$opt"; then
- func_append libdirs " $opt"
-
- elif test -f "$opt"; then
- if func_lalib_unsafe_p "$opt"; then
- func_append libs " $opt"
- else
- func_warning "\`$opt' is not a valid libtool archive"
- fi
-
- else
- func_fatal_error "invalid argument \`$opt'"
- fi
- done
-
- if test -n "$libs"; then
- if test -n "$lt_sysroot"; then
- sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"`
- sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;"
- else
- sysroot_cmd=
- fi
-
- # Remove sysroot references
- if $opt_dry_run; then
- for lib in $libs; do
- echo "removing references to $lt_sysroot and \`=' prefixes from $lib"
- done
- else
- tmpdir=`func_mktempdir`
- for lib in $libs; do
- sed -e "${sysroot_cmd} s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \
- > $tmpdir/tmp-la
- mv -f $tmpdir/tmp-la $lib
- done
- ${RM}r "$tmpdir"
- fi
- fi
-
- if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
- for libdir in $libdirs; do
- if test -n "$finish_cmds"; then
- # Do each command in the finish commands.
- func_execute_cmds "$finish_cmds" 'admincmds="$admincmds
-'"$cmd"'"'
- fi
- if test -n "$finish_eval"; then
- # Do the single finish_eval.
- eval cmds=\"$finish_eval\"
- $opt_dry_run || eval "$cmds" || func_append admincmds "
- $cmds"
- fi
- done
- fi
-
- # Exit here if they wanted silent mode.
- $opt_silent && exit $EXIT_SUCCESS
-
- if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
- echo "----------------------------------------------------------------------"
- echo "Libraries have been installed in:"
- for libdir in $libdirs; do
- $ECHO " $libdir"
- done
- echo
- echo "If you ever happen to want to link against installed libraries"
- echo "in a given directory, LIBDIR, you must either use libtool, and"
- echo "specify the full pathname of the library, or use the \`-LLIBDIR'"
- echo "flag during linking and do at least one of the following:"
- if test -n "$shlibpath_var"; then
- echo " - add LIBDIR to the \`$shlibpath_var' environment variable"
- echo " during execution"
- fi
- if test -n "$runpath_var"; then
- echo " - add LIBDIR to the \`$runpath_var' environment variable"
- echo " during linking"
- fi
- if test -n "$hardcode_libdir_flag_spec"; then
- libdir=LIBDIR
- eval flag=\"$hardcode_libdir_flag_spec\"
-
- $ECHO " - use the \`$flag' linker flag"
- fi
- if test -n "$admincmds"; then
- $ECHO " - have your system administrator run these commands:$admincmds"
- fi
- if test -f /etc/ld.so.conf; then
- echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
- fi
- echo
-
- echo "See any operating system documentation about shared libraries for"
- case $host in
- solaris2.[6789]|solaris2.1[0-9])
- echo "more information, such as the ld(1), crle(1) and ld.so(8) manual"
- echo "pages."
- ;;
- *)
- echo "more information, such as the ld(1) and ld.so(8) manual pages."
- ;;
- esac
- echo "----------------------------------------------------------------------"
- fi
- exit $EXIT_SUCCESS
-}
-
-test "$opt_mode" = finish && func_mode_finish ${1+"$@"}
-
-
-# func_mode_install arg...
-func_mode_install ()
-{
- $opt_debug
- # There may be an optional sh(1) argument at the beginning of
- # install_prog (especially on Windows NT).
- if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh ||
- # Allow the use of GNU shtool's install command.
- case $nonopt in *shtool*) :;; *) false;; esac; then
- # Aesthetically quote it.
- func_quote_for_eval "$nonopt"
- install_prog="$func_quote_for_eval_result "
- arg=$1
- shift
- else
- install_prog=
- arg=$nonopt
- fi
-
- # The real first argument should be the name of the installation program.
- # Aesthetically quote it.
- func_quote_for_eval "$arg"
- func_append install_prog "$func_quote_for_eval_result"
- install_shared_prog=$install_prog
- case " $install_prog " in
- *[\\\ /]cp\ *) install_cp=: ;;
- *) install_cp=false ;;
- esac
-
- # We need to accept at least all the BSD install flags.
- dest=
- files=
- opts=
- prev=
- install_type=
- isdir=no
- stripme=
- no_mode=:
- for arg
- do
- arg2=
- if test -n "$dest"; then
- func_append files " $dest"
- dest=$arg
- continue
- fi
-
- case $arg in
- -d) isdir=yes ;;
- -f)
- if $install_cp; then :; else
- prev=$arg
- fi
- ;;
- -g | -m | -o)
- prev=$arg
- ;;
- -s)
- stripme=" -s"
- continue
- ;;
- -*)
- ;;
- *)
- # If the previous option needed an argument, then skip it.
- if test -n "$prev"; then
- if test "x$prev" = x-m && test -n "$install_override_mode"; then
- arg2=$install_override_mode
- no_mode=false
- fi
- prev=
- else
- dest=$arg
- continue
- fi
- ;;
- esac
-
- # Aesthetically quote the argument.
- func_quote_for_eval "$arg"
- func_append install_prog " $func_quote_for_eval_result"
- if test -n "$arg2"; then
- func_quote_for_eval "$arg2"
- fi
- func_append install_shared_prog " $func_quote_for_eval_result"
- done
-
- test -z "$install_prog" && \
- func_fatal_help "you must specify an install program"
-
- test -n "$prev" && \
- func_fatal_help "the \`$prev' option requires an argument"
-
- if test -n "$install_override_mode" && $no_mode; then
- if $install_cp; then :; else
- func_quote_for_eval "$install_override_mode"
- func_append install_shared_prog " -m $func_quote_for_eval_result"
- fi
- fi
-
- if test -z "$files"; then
- if test -z "$dest"; then
- func_fatal_help "no file or destination specified"
- else
- func_fatal_help "you must specify a destination"
- fi
- fi
-
- # Strip any trailing slash from the destination.
- func_stripname '' '/' "$dest"
- dest=$func_stripname_result
-
- # Check to see that the destination is a directory.
- test -d "$dest" && isdir=yes
- if test "$isdir" = yes; then
- destdir="$dest"
- destname=
- else
- func_dirname_and_basename "$dest" "" "."
- destdir="$func_dirname_result"
- destname="$func_basename_result"
-
- # Not a directory, so check to see that there is only one file specified.
- set dummy $files; shift
- test "$#" -gt 1 && \
- func_fatal_help "\`$dest' is not a directory"
- fi
- case $destdir in
- [\\/]* | [A-Za-z]:[\\/]*) ;;
- *)
- for file in $files; do
- case $file in
- *.lo) ;;
- *)
- func_fatal_help "\`$destdir' must be an absolute directory name"
- ;;
- esac
- done
- ;;
- esac
-
- # This variable tells wrapper scripts just to set variables rather
- # than running their programs.
- libtool_install_magic="$magic"
-
- staticlibs=
- future_libdirs=
- current_libdirs=
- for file in $files; do
-
- # Do each installation.
- case $file in
- *.$libext)
- # Do the static libraries later.
- func_append staticlibs " $file"
- ;;
-
- *.la)
- func_resolve_sysroot "$file"
- file=$func_resolve_sysroot_result
-
- # Check to see that this really is a libtool archive.
- func_lalib_unsafe_p "$file" \
- || func_fatal_help "\`$file' is not a valid libtool archive"
-
- library_names=
- old_library=
- relink_command=
- func_source "$file"
-
- # Add the libdir to current_libdirs if it is the destination.
- if test "X$destdir" = "X$libdir"; then
- case "$current_libdirs " in
- *" $libdir "*) ;;
- *) func_append current_libdirs " $libdir" ;;
- esac
- else
- # Note the libdir as a future libdir.
- case "$future_libdirs " in
- *" $libdir "*) ;;
- *) func_append future_libdirs " $libdir" ;;
- esac
- fi
-
- func_dirname "$file" "/" ""
- dir="$func_dirname_result"
- func_append dir "$objdir"
-
- if test -n "$relink_command"; then
- # Determine the prefix the user has applied to our future dir.
- inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"`
-
- # Don't allow the user to place us outside of our expected
- # location b/c this prevents finding dependent libraries that
- # are installed to the same prefix.
- # At present, this check doesn't affect windows .dll's that
- # are installed into $libdir/../bin (currently, that works fine)
- # but it's something to keep an eye on.
- test "$inst_prefix_dir" = "$destdir" && \
- func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir"
-
- if test -n "$inst_prefix_dir"; then
- # Stick the inst_prefix_dir data into the link command.
- relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"`
- else
- relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"`
- fi
-
- func_warning "relinking \`$file'"
- func_show_eval "$relink_command" \
- 'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"'
- fi
-
- # See the names of the shared library.
- set dummy $library_names; shift
- if test -n "$1"; then
- realname="$1"
- shift
-
- srcname="$realname"
- test -n "$relink_command" && srcname="$realname"T
-
- # Install the shared library and build the symlinks.
- func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \
- 'exit $?'
- tstripme="$stripme"
- case $host_os in
- cygwin* | mingw* | pw32* | cegcc*)
- case $realname in
- *.dll.a)
- tstripme=""
- ;;
- esac
- ;;
- esac
- if test -n "$tstripme" && test -n "$striplib"; then
- func_show_eval "$striplib $destdir/$realname" 'exit $?'
- fi
-
- if test "$#" -gt 0; then
- # Delete the old symlinks, and create new ones.
- # Try `ln -sf' first, because the `ln' binary might depend on
- # the symlink we replace! Solaris /bin/ln does not understand -f,
- # so we also need to try rm && ln -s.
- for linkname
- do
- test "$linkname" != "$realname" \
- && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })"
- done
- fi
-
- # Do each command in the postinstall commands.
- lib="$destdir/$realname"
- func_execute_cmds "$postinstall_cmds" 'exit $?'
- fi
-
- # Install the pseudo-library for information purposes.
- func_basename "$file"
- name="$func_basename_result"
- instname="$dir/$name"i
- func_show_eval "$install_prog $instname $destdir/$name" 'exit $?'
-
- # Maybe install the static library, too.
- test -n "$old_library" && func_append staticlibs " $dir/$old_library"
- ;;
-
- *.lo)
- # Install (i.e. copy) a libtool object.
-
- # Figure out destination file name, if it wasn't already specified.
- if test -n "$destname"; then
- destfile="$destdir/$destname"
- else
- func_basename "$file"
- destfile="$func_basename_result"
- destfile="$destdir/$destfile"
- fi
-
- # Deduce the name of the destination old-style object file.
- case $destfile in
- *.lo)
- func_lo2o "$destfile"
- staticdest=$func_lo2o_result
- ;;
- *.$objext)
- staticdest="$destfile"
- destfile=
- ;;
- *)
- func_fatal_help "cannot copy a libtool object to \`$destfile'"
- ;;
- esac
-
- # Install the libtool object if requested.
- test -n "$destfile" && \
- func_show_eval "$install_prog $file $destfile" 'exit $?'
-
- # Install the old object if enabled.
- if test "$build_old_libs" = yes; then
- # Deduce the name of the old-style object file.
- func_lo2o "$file"
- staticobj=$func_lo2o_result
- func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?'
- fi
- exit $EXIT_SUCCESS
- ;;
-
- *)
- # Figure out destination file name, if it wasn't already specified.
- if test -n "$destname"; then
- destfile="$destdir/$destname"
- else
- func_basename "$file"
- destfile="$func_basename_result"
- destfile="$destdir/$destfile"
- fi
-
- # If the file is missing, and there is a .exe on the end, strip it
- # because it is most likely a libtool script we actually want to
- # install
- stripped_ext=""
- case $file in
- *.exe)
- if test ! -f "$file"; then
- func_stripname '' '.exe' "$file"
- file=$func_stripname_result
- stripped_ext=".exe"
- fi
- ;;
- esac
-
- # Do a test to see if this is really a libtool program.
- case $host in
- *cygwin* | *mingw*)
- if func_ltwrapper_executable_p "$file"; then
- func_ltwrapper_scriptname "$file"
- wrapper=$func_ltwrapper_scriptname_result
- else
- func_stripname '' '.exe' "$file"
- wrapper=$func_stripname_result
- fi
- ;;
- *)
- wrapper=$file
- ;;
- esac
- if func_ltwrapper_script_p "$wrapper"; then
- notinst_deplibs=
- relink_command=
-
- func_source "$wrapper"
-
- # Check the variables that should have been set.
- test -z "$generated_by_libtool_version" && \
- func_fatal_error "invalid libtool wrapper script \`$wrapper'"
-
- finalize=yes
- for lib in $notinst_deplibs; do
- # Check to see that each library is installed.
- libdir=
- if test -f "$lib"; then
- func_source "$lib"
- fi
- libfile="$libdir/"`$ECHO "$lib" | $SED 's%^.*/%%g'` ### testsuite: skip nested quoting test
- if test -n "$libdir" && test ! -f "$libfile"; then
- func_warning "\`$lib' has not been installed in \`$libdir'"
- finalize=no
- fi
- done
-
- relink_command=
- func_source "$wrapper"
-
- outputname=
- if test "$fast_install" = no && test -n "$relink_command"; then
- $opt_dry_run || {
- if test "$finalize" = yes; then
- tmpdir=`func_mktempdir`
- func_basename "$file$stripped_ext"
- file="$func_basename_result"
- outputname="$tmpdir/$file"
- # Replace the output file specification.
- relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'`
-
- $opt_silent || {
- func_quote_for_expand "$relink_command"
- eval "func_echo $func_quote_for_expand_result"
- }
- if eval "$relink_command"; then :
- else
- func_error "error: relink \`$file' with the above command before installing it"
- $opt_dry_run || ${RM}r "$tmpdir"
- continue
- fi
- file="$outputname"
- else
- func_warning "cannot relink \`$file'"
- fi
- }
- else
- # Install the binary that we compiled earlier.
- file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"`
- fi
- fi
-
- # remove .exe since cygwin /usr/bin/install will append another
- # one anyway
- case $install_prog,$host in
- */usr/bin/install*,*cygwin*)
- case $file:$destfile in
- *.exe:*.exe)
- # this is ok
- ;;
- *.exe:*)
- destfile=$destfile.exe
- ;;
- *:*.exe)
- func_stripname '' '.exe' "$destfile"
- destfile=$func_stripname_result
- ;;
- esac
- ;;
- esac
- func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?'
- $opt_dry_run || if test -n "$outputname"; then
- ${RM}r "$tmpdir"
- fi
- ;;
- esac
- done
-
- for file in $staticlibs; do
- func_basename "$file"
- name="$func_basename_result"
-
- # Set up the ranlib parameters.
- oldlib="$destdir/$name"
- func_to_tool_file "$oldlib" func_convert_file_msys_to_w32
- tool_oldlib=$func_to_tool_file_result
-
- func_show_eval "$install_prog \$file \$oldlib" 'exit $?'
-
- if test -n "$stripme" && test -n "$old_striplib"; then
- func_show_eval "$old_striplib $tool_oldlib" 'exit $?'
- fi
-
- # Do each command in the postinstall commands.
- func_execute_cmds "$old_postinstall_cmds" 'exit $?'
- done
-
- test -n "$future_libdirs" && \
- func_warning "remember to run \`$progname --finish$future_libdirs'"
-
- if test -n "$current_libdirs"; then
- # Maybe just do a dry run.
- $opt_dry_run && current_libdirs=" -n$current_libdirs"
- exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs'
- else
- exit $EXIT_SUCCESS
- fi
-}
-
-test "$opt_mode" = install && func_mode_install ${1+"$@"}
-
-
-# func_generate_dlsyms outputname originator pic_p
-# Extract symbols from dlprefiles and create ${outputname}S.o with
-# a dlpreopen symbol table.
-func_generate_dlsyms ()
-{
- $opt_debug
- my_outputname="$1"
- my_originator="$2"
- my_pic_p="${3-no}"
- my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'`
- my_dlsyms=
-
- if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
- if test -n "$NM" && test -n "$global_symbol_pipe"; then
- my_dlsyms="${my_outputname}S.c"
- else
- func_error "not configured to extract global symbols from dlpreopened files"
- fi
- fi
-
- if test -n "$my_dlsyms"; then
- case $my_dlsyms in
- "") ;;
- *.c)
- # Discover the nlist of each of the dlfiles.
- nlist="$output_objdir/${my_outputname}.nm"
-
- func_show_eval "$RM $nlist ${nlist}S ${nlist}T"
-
- # Parse the name list into a source file.
- func_verbose "creating $output_objdir/$my_dlsyms"
-
- $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\
-/* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */
-/* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */
-
-#ifdef __cplusplus
-extern \"C\" {
-#endif
-
-#if defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4))
-#pragma GCC diagnostic ignored \"-Wstrict-prototypes\"
-#endif
-
-/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
-/* DATA imports from DLLs on WIN32 con't be const, because runtime
- relocations are performed -- see ld's documentation on pseudo-relocs. */
-# define LT_DLSYM_CONST
-#elif defined(__osf__)
-/* This system does not cope well with relocations in const data. */
-# define LT_DLSYM_CONST
-#else
-# define LT_DLSYM_CONST const
-#endif
-
-/* External symbol declarations for the compiler. */\
-"
-
- if test "$dlself" = yes; then
- func_verbose "generating symbol list for \`$output'"
-
- $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist"
-
- # Add our own program objects to the symbol list.
- progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP`
- for progfile in $progfiles; do
- func_to_tool_file "$progfile" func_convert_file_msys_to_w32
- func_verbose "extracting global C symbols from \`$func_to_tool_file_result'"
- $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'"
- done
-
- if test -n "$exclude_expsyms"; then
- $opt_dry_run || {
- eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
- eval '$MV "$nlist"T "$nlist"'
- }
- fi
-
- if test -n "$export_symbols_regex"; then
- $opt_dry_run || {
- eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T'
- eval '$MV "$nlist"T "$nlist"'
- }
- fi
-
- # Prepare the list of exported symbols
- if test -z "$export_symbols"; then
- export_symbols="$output_objdir/$outputname.exp"
- $opt_dry_run || {
- $RM $export_symbols
- eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
- case $host in
- *cygwin* | *mingw* | *cegcc* )
- eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
- eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"'
- ;;
- esac
- }
- else
- $opt_dry_run || {
- eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"'
- eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T'
- eval '$MV "$nlist"T "$nlist"'
- case $host in
- *cygwin* | *mingw* | *cegcc* )
- eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
- eval 'cat "$nlist" >> "$output_objdir/$outputname.def"'
- ;;
- esac
- }
- fi
- fi
-
- for dlprefile in $dlprefiles; do
- func_verbose "extracting global C symbols from \`$dlprefile'"
- func_basename "$dlprefile"
- name="$func_basename_result"
- case $host in
- *cygwin* | *mingw* | *cegcc* )
- # if an import library, we need to obtain dlname
- if func_win32_import_lib_p "$dlprefile"; then
- func_tr_sh "$dlprefile"
- eval "curr_lafile=\$libfile_$func_tr_sh_result"
- dlprefile_dlbasename=""
- if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then
- # Use subshell, to avoid clobbering current variable values
- dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"`
- if test -n "$dlprefile_dlname" ; then
- func_basename "$dlprefile_dlname"
- dlprefile_dlbasename="$func_basename_result"
- else
- # no lafile. user explicitly requested -dlpreopen <import library>.
- $sharedlib_from_linklib_cmd "$dlprefile"
- dlprefile_dlbasename=$sharedlib_from_linklib_result
- fi
- fi
- $opt_dry_run || {
- if test -n "$dlprefile_dlbasename" ; then
- eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"'
- else
- func_warning "Could not compute DLL name from $name"
- eval '$ECHO ": $name " >> "$nlist"'
- fi
- func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
- eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe |
- $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'"
- }
- else # not an import lib
- $opt_dry_run || {
- eval '$ECHO ": $name " >> "$nlist"'
- func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
- eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'"
- }
- fi
- ;;
- *)
- $opt_dry_run || {
- eval '$ECHO ": $name " >> "$nlist"'
- func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32
- eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'"
- }
- ;;
- esac
- done
-
- $opt_dry_run || {
- # Make sure we have at least an empty file.
- test -f "$nlist" || : > "$nlist"
-
- if test -n "$exclude_expsyms"; then
- $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
- $MV "$nlist"T "$nlist"
- fi
-
- # Try sorting and uniquifying the output.
- if $GREP -v "^: " < "$nlist" |
- if sort -k 3 </dev/null >/dev/null 2>&1; then
- sort -k 3
- else
- sort +2
- fi |
- uniq > "$nlist"S; then
- :
- else
- $GREP -v "^: " < "$nlist" > "$nlist"S
- fi
-
- if test -f "$nlist"S; then
- eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"'
- else
- echo '/* NONE */' >> "$output_objdir/$my_dlsyms"
- fi
-
- echo >> "$output_objdir/$my_dlsyms" "\
-
-/* The mapping between symbol names and symbols. */
-typedef struct {
- const char *name;
- void *address;
-} lt_dlsymlist;
-extern LT_DLSYM_CONST lt_dlsymlist
-lt_${my_prefix}_LTX_preloaded_symbols[];
-LT_DLSYM_CONST lt_dlsymlist
-lt_${my_prefix}_LTX_preloaded_symbols[] =
-{\
- { \"$my_originator\", (void *) 0 },"
-
- case $need_lib_prefix in
- no)
- eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms"
- ;;
- *)
- eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms"
- ;;
- esac
- echo >> "$output_objdir/$my_dlsyms" "\
- {0, (void *) 0}
-};
-
-/* This works around a problem in FreeBSD linker */
-#ifdef FREEBSD_WORKAROUND
-static const void *lt_preloaded_setup() {
- return lt_${my_prefix}_LTX_preloaded_symbols;
-}
-#endif
-
-#ifdef __cplusplus
-}
-#endif\
-"
- } # !$opt_dry_run
-
- pic_flag_for_symtable=
- case "$compile_command " in
- *" -static "*) ;;
- *)
- case $host in
- # compiling the symbol table file with pic_flag works around
- # a FreeBSD bug that causes programs to crash when -lm is
- # linked before any other PIC object. But we must not use
- # pic_flag when linking with -static. The problem exists in
- # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
- *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
- pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;;
- *-*-hpux*)
- pic_flag_for_symtable=" $pic_flag" ;;
- *)
- if test "X$my_pic_p" != Xno; then
- pic_flag_for_symtable=" $pic_flag"
- fi
- ;;
- esac
- ;;
- esac
- symtab_cflags=
- for arg in $LTCFLAGS; do
- case $arg in
- -pie | -fpie | -fPIE) ;;
- *) func_append symtab_cflags " $arg" ;;
- esac
- done
-
- # Now compile the dynamic symbol file.
- func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?'
-
- # Clean up the generated files.
- func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"'
-
- # Transform the symbol file into the correct name.
- symfileobj="$output_objdir/${my_outputname}S.$objext"
- case $host in
- *cygwin* | *mingw* | *cegcc* )
- if test -f "$output_objdir/$my_outputname.def"; then
- compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"`
- finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"`
- else
- compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"`
- finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"`
- fi
- ;;
- *)
- compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"`
- finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"`
- ;;
- esac
- ;;
- *)
- func_fatal_error "unknown suffix for \`$my_dlsyms'"
- ;;
- esac
- else
- # We keep going just in case the user didn't refer to
- # lt_preloaded_symbols. The linker will fail if global_symbol_pipe
- # really was required.
-
- # Nullify the symbol file.
- compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"`
- finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"`
- fi
-}
-
-# func_win32_libid arg
-# return the library type of file 'arg'
-#
-# Need a lot of goo to handle *both* DLLs and import libs
-# Has to be a shell function in order to 'eat' the argument
-# that is supplied when $file_magic_command is called.
-# Despite the name, also deal with 64 bit binaries.
-func_win32_libid ()
-{
- $opt_debug
- win32_libid_type="unknown"
- win32_fileres=`file -L $1 2>/dev/null`
- case $win32_fileres in
- *ar\ archive\ import\ library*) # definitely import
- win32_libid_type="x86 archive import"
- ;;
- *ar\ archive*) # could be an import, or static
- # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD.
- if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null |
- $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then
- func_to_tool_file "$1" func_convert_file_msys_to_w32
- win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" |
- $SED -n -e '
- 1,100{
- / I /{
- s,.*,import,
- p
- q
- }
- }'`
- case $win32_nmres in
- import*) win32_libid_type="x86 archive import";;
- *) win32_libid_type="x86 archive static";;
- esac
- fi
- ;;
- *DLL*)
- win32_libid_type="x86 DLL"
- ;;
- *executable*) # but shell scripts are "executable" too...
- case $win32_fileres in
- *MS\ Windows\ PE\ Intel*)
- win32_libid_type="x86 DLL"
- ;;
- esac
- ;;
- esac
- $ECHO "$win32_libid_type"
-}
-
-# func_cygming_dll_for_implib ARG
-#
-# Platform-specific function to extract the
-# name of the DLL associated with the specified
-# import library ARG.
-# Invoked by eval'ing the libtool variable
-# $sharedlib_from_linklib_cmd
-# Result is available in the variable
-# $sharedlib_from_linklib_result
-func_cygming_dll_for_implib ()
-{
- $opt_debug
- sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"`
-}
-
-# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs
-#
-# The is the core of a fallback implementation of a
-# platform-specific function to extract the name of the
-# DLL associated with the specified import library LIBNAME.
-#
-# SECTION_NAME is either .idata$6 or .idata$7, depending
-# on the platform and compiler that created the implib.
-#
-# Echos the name of the DLL associated with the
-# specified import library.
-func_cygming_dll_for_implib_fallback_core ()
-{
- $opt_debug
- match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"`
- $OBJDUMP -s --section "$1" "$2" 2>/dev/null |
- $SED '/^Contents of section '"$match_literal"':/{
- # Place marker at beginning of archive member dllname section
- s/.*/====MARK====/
- p
- d
- }
- # These lines can sometimes be longer than 43 characters, but
- # are always uninteresting
- /:[ ]*file format pe[i]\{,1\}-/d
- /^In archive [^:]*:/d
- # Ensure marker is printed
- /^====MARK====/p
- # Remove all lines with less than 43 characters
- /^.\{43\}/!d
- # From remaining lines, remove first 43 characters
- s/^.\{43\}//' |
- $SED -n '
- # Join marker and all lines until next marker into a single line
- /^====MARK====/ b para
- H
- $ b para
- b
- :para
- x
- s/\n//g
- # Remove the marker
- s/^====MARK====//
- # Remove trailing dots and whitespace
- s/[\. \t]*$//
- # Print
- /./p' |
- # we now have a list, one entry per line, of the stringified
- # contents of the appropriate section of all members of the
- # archive which possess that section. Heuristic: eliminate
- # all those which have a first or second character that is
- # a '.' (that is, objdump's representation of an unprintable
- # character.) This should work for all archives with less than
- # 0x302f exports -- but will fail for DLLs whose name actually
- # begins with a literal '.' or a single character followed by
- # a '.'.
- #
- # Of those that remain, print the first one.
- $SED -e '/^\./d;/^.\./d;q'
-}
-
-# func_cygming_gnu_implib_p ARG
-# This predicate returns with zero status (TRUE) if
-# ARG is a GNU/binutils-style import library. Returns
-# with nonzero status (FALSE) otherwise.
-func_cygming_gnu_implib_p ()
-{
- $opt_debug
- func_to_tool_file "$1" func_convert_file_msys_to_w32
- func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'`
- test -n "$func_cygming_gnu_implib_tmp"
-}
-
-# func_cygming_ms_implib_p ARG
-# This predicate returns with zero status (TRUE) if
-# ARG is an MS-style import library. Returns
-# with nonzero status (FALSE) otherwise.
-func_cygming_ms_implib_p ()
-{
- $opt_debug
- func_to_tool_file "$1" func_convert_file_msys_to_w32
- func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'`
- test -n "$func_cygming_ms_implib_tmp"
-}
-
-# func_cygming_dll_for_implib_fallback ARG
-# Platform-specific function to extract the
-# name of the DLL associated with the specified
-# import library ARG.
-#
-# This fallback implementation is for use when $DLLTOOL
-# does not support the --identify-strict option.
-# Invoked by eval'ing the libtool variable
-# $sharedlib_from_linklib_cmd
-# Result is available in the variable
-# $sharedlib_from_linklib_result
-func_cygming_dll_for_implib_fallback ()
-{
- $opt_debug
- if func_cygming_gnu_implib_p "$1" ; then
- # binutils import library
- sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"`
- elif func_cygming_ms_implib_p "$1" ; then
- # ms-generated import library
- sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"`
- else
- # unknown
- sharedlib_from_linklib_result=""
- fi
-}
-
-
-# func_extract_an_archive dir oldlib
-func_extract_an_archive ()
-{
- $opt_debug
- f_ex_an_ar_dir="$1"; shift
- f_ex_an_ar_oldlib="$1"
- if test "$lock_old_archive_extraction" = yes; then
- lockfile=$f_ex_an_ar_oldlib.lock
- until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do
- func_echo "Waiting for $lockfile to be removed"
- sleep 2
- done
- fi
- func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \
- 'stat=$?; rm -f "$lockfile"; exit $stat'
- if test "$lock_old_archive_extraction" = yes; then
- $opt_dry_run || rm -f "$lockfile"
- fi
- if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then
- :
- else
- func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib"
- fi
-}
-
-
-# func_extract_archives gentop oldlib ...
-func_extract_archives ()
-{
- $opt_debug
- my_gentop="$1"; shift
- my_oldlibs=${1+"$@"}
- my_oldobjs=""
- my_xlib=""
- my_xabs=""
- my_xdir=""
-
- for my_xlib in $my_oldlibs; do
- # Extract the objects.
- case $my_xlib in
- [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;;
- *) my_xabs=`pwd`"/$my_xlib" ;;
- esac
- func_basename "$my_xlib"
- my_xlib="$func_basename_result"
- my_xlib_u=$my_xlib
- while :; do
- case " $extracted_archives " in
- *" $my_xlib_u "*)
- func_arith $extracted_serial + 1
- extracted_serial=$func_arith_result
- my_xlib_u=lt$extracted_serial-$my_xlib ;;
- *) break ;;
- esac
- done
- extracted_archives="$extracted_archives $my_xlib_u"
- my_xdir="$my_gentop/$my_xlib_u"
-
- func_mkdir_p "$my_xdir"
-
- case $host in
- *-darwin*)
- func_verbose "Extracting $my_xabs"
- # Do not bother doing anything if just a dry run
- $opt_dry_run || {
- darwin_orig_dir=`pwd`
- cd $my_xdir || exit $?
- darwin_archive=$my_xabs
- darwin_curdir=`pwd`
- darwin_base_archive=`basename "$darwin_archive"`
- darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true`
- if test -n "$darwin_arches"; then
- darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'`
- darwin_arch=
- func_verbose "$darwin_base_archive has multiple architectures $darwin_arches"
- for darwin_arch in $darwin_arches ; do
- func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}"
- $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}"
- cd "unfat-$$/${darwin_base_archive}-${darwin_arch}"
- func_extract_an_archive "`pwd`" "${darwin_base_archive}"
- cd "$darwin_curdir"
- $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}"
- done # $darwin_arches
- ## Okay now we've a bunch of thin objects, gotta fatten them up :)
- darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u`
- darwin_file=
- darwin_files=
- for darwin_file in $darwin_filelist; do
- darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP`
- $LIPO -create -output "$darwin_file" $darwin_files
- done # $darwin_filelist
- $RM -rf unfat-$$
- cd "$darwin_orig_dir"
- else
- cd $darwin_orig_dir
- func_extract_an_archive "$my_xdir" "$my_xabs"
- fi # $darwin_arches
- } # !$opt_dry_run
- ;;
- *)
- func_extract_an_archive "$my_xdir" "$my_xabs"
- ;;
- esac
- my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP`
- done
-
- func_extract_archives_result="$my_oldobjs"
-}
-
-
-# func_emit_wrapper [arg=no]
-#
-# Emit a libtool wrapper script on stdout.
-# Don't directly open a file because we may want to
-# incorporate the script contents within a cygwin/mingw
-# wrapper executable. Must ONLY be called from within
-# func_mode_link because it depends on a number of variables
-# set therein.
-#
-# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR
-# variable will take. If 'yes', then the emitted script
-# will assume that the directory in which it is stored is
-# the $objdir directory. This is a cygwin/mingw-specific
-# behavior.
-func_emit_wrapper ()
-{
- func_emit_wrapper_arg1=${1-no}
-
- $ECHO "\
-#! $SHELL
-
-# $output - temporary wrapper script for $objdir/$outputname
-# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
-#
-# The $output program cannot be directly executed until all the libtool
-# libraries that it depends on are installed.
-#
-# This wrapper script should never be moved out of the build directory.
-# If it is, it will not operate correctly.
-
-# Sed substitution that helps us do robust quoting. It backslashifies
-# metacharacters that are still active within double-quoted strings.
-sed_quote_subst='$sed_quote_subst'
-
-# Be Bourne compatible
-if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then
- emulate sh
- NULLCMD=:
- # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '\${1+\"\$@\"}'='\"\$@\"'
- setopt NO_GLOB_SUBST
-else
- case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac
-fi
-BIN_SH=xpg4; export BIN_SH # for Tru64
-DUALCASE=1; export DUALCASE # for MKS sh
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-relink_command=\"$relink_command\"
-
-# This environment variable determines our operation mode.
-if test \"\$libtool_install_magic\" = \"$magic\"; then
- # install mode needs the following variables:
- generated_by_libtool_version='$macro_version'
- notinst_deplibs='$notinst_deplibs'
-else
- # When we are sourced in execute mode, \$file and \$ECHO are already set.
- if test \"\$libtool_execute_magic\" != \"$magic\"; then
- file=\"\$0\""
-
- qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"`
- $ECHO "\
-
-# A function that is used when there is no print builtin or printf.
-func_fallback_echo ()
-{
- eval 'cat <<_LTECHO_EOF
-\$1
-_LTECHO_EOF'
-}
- ECHO=\"$qECHO\"
- fi
-
-# Very basic option parsing. These options are (a) specific to
-# the libtool wrapper, (b) are identical between the wrapper
-# /script/ and the wrapper /executable/ which is used only on
-# windows platforms, and (c) all begin with the string "--lt-"
-# (application programs are unlikely to have options which match
-# this pattern).
-#
-# There are only two supported options: --lt-debug and
-# --lt-dump-script. There is, deliberately, no --lt-help.
-#
-# The first argument to this parsing function should be the
-# script's $0 value, followed by "$@".
-lt_option_debug=
-func_parse_lt_options ()
-{
- lt_script_arg0=\$0
- shift
- for lt_opt
- do
- case \"\$lt_opt\" in
- --lt-debug) lt_option_debug=1 ;;
- --lt-dump-script)
- lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\`
- test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=.
- lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\`
- cat \"\$lt_dump_D/\$lt_dump_F\"
- exit 0
- ;;
- --lt-*)
- \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2
- exit 1
- ;;
- esac
- done
-
- # Print the debug banner immediately:
- if test -n \"\$lt_option_debug\"; then
- echo \"${outputname}:${output}:\${LINENO}: libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\" 1>&2
- fi
-}
-
-# Used when --lt-debug. Prints its arguments to stdout
-# (redirection is the responsibility of the caller)
-func_lt_dump_args ()
-{
- lt_dump_args_N=1;
- for lt_arg
- do
- \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[\$lt_dump_args_N]: \$lt_arg\"
- lt_dump_args_N=\`expr \$lt_dump_args_N + 1\`
- done
-}
-
-# Core function for launching the target application
-func_exec_program_core ()
-{
-"
- case $host in
- # Backslashes separate directories on plain windows
- *-*-mingw | *-*-os2* | *-cegcc*)
- $ECHO "\
- if test -n \"\$lt_option_debug\"; then
- \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir\\\\\$program\" 1>&2
- func_lt_dump_args \${1+\"\$@\"} 1>&2
- fi
- exec \"\$progdir\\\\\$program\" \${1+\"\$@\"}
-"
- ;;
-
- *)
- $ECHO "\
- if test -n \"\$lt_option_debug\"; then
- \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir/\$program\" 1>&2
- func_lt_dump_args \${1+\"\$@\"} 1>&2
- fi
- exec \"\$progdir/\$program\" \${1+\"\$@\"}
-"
- ;;
- esac
- $ECHO "\
- \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2
- exit 1
-}
-
-# A function to encapsulate launching the target application
-# Strips options in the --lt-* namespace from \$@ and
-# launches target application with the remaining arguments.
-func_exec_program ()
-{
- case \" \$* \" in
- *\\ --lt-*)
- for lt_wr_arg
- do
- case \$lt_wr_arg in
- --lt-*) ;;
- *) set x \"\$@\" \"\$lt_wr_arg\"; shift;;
- esac
- shift
- done ;;
- esac
- func_exec_program_core \${1+\"\$@\"}
-}
-
- # Parse options
- func_parse_lt_options \"\$0\" \${1+\"\$@\"}
-
- # Find the directory that this script lives in.
- thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\`
- test \"x\$thisdir\" = \"x\$file\" && thisdir=.
-
- # Follow symbolic links until we get to the real thisdir.
- file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\`
- while test -n \"\$file\"; do
- destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\`
-
- # If there was a directory component, then change thisdir.
- if test \"x\$destdir\" != \"x\$file\"; then
- case \"\$destdir\" in
- [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;;
- *) thisdir=\"\$thisdir/\$destdir\" ;;
- esac
- fi
-
- file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\`
- file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\`
- done
-
- # Usually 'no', except on cygwin/mingw when embedded into
- # the cwrapper.
- WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1
- if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then
- # special case for '.'
- if test \"\$thisdir\" = \".\"; then
- thisdir=\`pwd\`
- fi
- # remove .libs from thisdir
- case \"\$thisdir\" in
- *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;;
- $objdir ) thisdir=. ;;
- esac
- fi
-
- # Try to get the absolute directory name.
- absdir=\`cd \"\$thisdir\" && pwd\`
- test -n \"\$absdir\" && thisdir=\"\$absdir\"
-"
-
- if test "$fast_install" = yes; then
- $ECHO "\
- program=lt-'$outputname'$exeext
- progdir=\"\$thisdir/$objdir\"
-
- if test ! -f \"\$progdir/\$program\" ||
- { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\
- test \"X\$file\" != \"X\$progdir/\$program\"; }; then
-
- file=\"\$\$-\$program\"
-
- if test ! -d \"\$progdir\"; then
- $MKDIR \"\$progdir\"
- else
- $RM \"\$progdir/\$file\"
- fi"
-
- $ECHO "\
-
- # relink executable if necessary
- if test -n \"\$relink_command\"; then
- if relink_command_output=\`eval \$relink_command 2>&1\`; then :
- else
- $ECHO \"\$relink_command_output\" >&2
- $RM \"\$progdir/\$file\"
- exit 1
- fi
- fi
-
- $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
- { $RM \"\$progdir/\$program\";
- $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; }
- $RM \"\$progdir/\$file\"
- fi"
- else
- $ECHO "\
- program='$outputname'
- progdir=\"\$thisdir/$objdir\"
-"
- fi
-
- $ECHO "\
-
- if test -f \"\$progdir/\$program\"; then"
-
- # fixup the dll searchpath if we need to.
- #
- # Fix the DLL searchpath if we need to. Do this before prepending
- # to shlibpath, because on Windows, both are PATH and uninstalled
- # libraries must come first.
- if test -n "$dllsearchpath"; then
- $ECHO "\
- # Add the dll search path components to the executable PATH
- PATH=$dllsearchpath:\$PATH
-"
- fi
-
- # Export our shlibpath_var if we have one.
- if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
- $ECHO "\
- # Add our own library path to $shlibpath_var
- $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
-
- # Some systems cannot cope with colon-terminated $shlibpath_var
- # The second colon is a workaround for a bug in BeOS R4 sed
- $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\`
-
- export $shlibpath_var
-"
- fi
-
- $ECHO "\
- if test \"\$libtool_execute_magic\" != \"$magic\"; then
- # Run the actual program with our arguments.
- func_exec_program \${1+\"\$@\"}
- fi
- else
- # The program doesn't exist.
- \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2
- \$ECHO \"This script is just a wrapper for \$program.\" 1>&2
- \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2
- exit 1
- fi
-fi\
-"
-}
-
-
-# func_emit_cwrapperexe_src
-# emit the source code for a wrapper executable on stdout
-# Must ONLY be called from within func_mode_link because
-# it depends on a number of variable set therein.
-func_emit_cwrapperexe_src ()
-{
- cat <<EOF
-
-/* $cwrappersource - temporary wrapper executable for $objdir/$outputname
- Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
-
- The $output program cannot be directly executed until all the libtool
- libraries that it depends on are installed.
-
- This wrapper executable should never be moved out of the build directory.
- If it is, it will not operate correctly.
-*/
-EOF
- cat <<"EOF"
-#ifdef _MSC_VER
-# define _CRT_SECURE_NO_DEPRECATE 1
-#endif
-#include <stdio.h>
-#include <stdlib.h>
-#ifdef _MSC_VER
-# include <direct.h>
-# include <process.h>
-# include <io.h>
-#else
-# include <unistd.h>
-# include <stdint.h>
-# ifdef __CYGWIN__
-# include <io.h>
-# endif
-#endif
-#include <malloc.h>
-#include <stdarg.h>
-#include <assert.h>
-#include <string.h>
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-
-/* declarations of non-ANSI functions */
-#if defined(__MINGW32__)
-# ifdef __STRICT_ANSI__
-int _putenv (const char *);
-# endif
-#elif defined(__CYGWIN__)
-# ifdef __STRICT_ANSI__
-char *realpath (const char *, char *);
-int putenv (char *);
-int setenv (const char *, const char *, int);
-# endif
-/* #elif defined (other platforms) ... */
-#endif
-
-/* portability defines, excluding path handling macros */
-#if defined(_MSC_VER)
-# define setmode _setmode
-# define stat _stat
-# define chmod _chmod
-# define getcwd _getcwd
-# define putenv _putenv
-# define S_IXUSR _S_IEXEC
-# ifndef _INTPTR_T_DEFINED
-# define _INTPTR_T_DEFINED
-# define intptr_t int
-# endif
-#elif defined(__MINGW32__)
-# define setmode _setmode
-# define stat _stat
-# define chmod _chmod
-# define getcwd _getcwd
-# define putenv _putenv
-#elif defined(__CYGWIN__)
-# define HAVE_SETENV
-# define FOPEN_WB "wb"
-/* #elif defined (other platforms) ... */
-#endif
-
-#if defined(PATH_MAX)
-# define LT_PATHMAX PATH_MAX
-#elif defined(MAXPATHLEN)
-# define LT_PATHMAX MAXPATHLEN
-#else
-# define LT_PATHMAX 1024
-#endif
-
-#ifndef S_IXOTH
-# define S_IXOTH 0
-#endif
-#ifndef S_IXGRP
-# define S_IXGRP 0
-#endif
-
-/* path handling portability macros */
-#ifndef DIR_SEPARATOR
-# define DIR_SEPARATOR '/'
-# define PATH_SEPARATOR ':'
-#endif
-
-#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \
- defined (__OS2__)
-# define HAVE_DOS_BASED_FILE_SYSTEM
-# define FOPEN_WB "wb"
-# ifndef DIR_SEPARATOR_2
-# define DIR_SEPARATOR_2 '\\'
-# endif
-# ifndef PATH_SEPARATOR_2
-# define PATH_SEPARATOR_2 ';'
-# endif
-#endif
-
-#ifndef DIR_SEPARATOR_2
-# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
-#else /* DIR_SEPARATOR_2 */
-# define IS_DIR_SEPARATOR(ch) \
- (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
-#endif /* DIR_SEPARATOR_2 */
-
-#ifndef PATH_SEPARATOR_2
-# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR)
-#else /* PATH_SEPARATOR_2 */
-# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2)
-#endif /* PATH_SEPARATOR_2 */
-
-#ifndef FOPEN_WB
-# define FOPEN_WB "w"
-#endif
-#ifndef _O_BINARY
-# define _O_BINARY 0
-#endif
-
-#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type)))
-#define XFREE(stale) do { \
- if (stale) { free ((void *) stale); stale = 0; } \
-} while (0)
-
-#if defined(LT_DEBUGWRAPPER)
-static int lt_debug = 1;
-#else
-static int lt_debug = 0;
-#endif
-
-const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */
-
-void *xmalloc (size_t num);
-char *xstrdup (const char *string);
-const char *base_name (const char *name);
-char *find_executable (const char *wrapper);
-char *chase_symlinks (const char *pathspec);
-int make_executable (const char *path);
-int check_executable (const char *path);
-char *strendzap (char *str, const char *pat);
-void lt_debugprintf (const char *file, int line, const char *fmt, ...);
-void lt_fatal (const char *file, int line, const char *message, ...);
-static const char *nonnull (const char *s);
-static const char *nonempty (const char *s);
-void lt_setenv (const char *name, const char *value);
-char *lt_extend_str (const char *orig_value, const char *add, int to_end);
-void lt_update_exe_path (const char *name, const char *value);
-void lt_update_lib_path (const char *name, const char *value);
-char **prepare_spawn (char **argv);
-void lt_dump_script (FILE *f);
-EOF
-
- cat <<EOF
-volatile const char * MAGIC_EXE = "$magic_exe";
-const char * LIB_PATH_VARNAME = "$shlibpath_var";
-EOF
-
- if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
- func_to_host_path "$temp_rpath"
- cat <<EOF
-const char * LIB_PATH_VALUE = "$func_to_host_path_result";
-EOF
- else
- cat <<"EOF"
-const char * LIB_PATH_VALUE = "";
-EOF
- fi
-
- if test -n "$dllsearchpath"; then
- func_to_host_path "$dllsearchpath:"
- cat <<EOF
-const char * EXE_PATH_VARNAME = "PATH";
-const char * EXE_PATH_VALUE = "$func_to_host_path_result";
-EOF
- else
- cat <<"EOF"
-const char * EXE_PATH_VARNAME = "";
-const char * EXE_PATH_VALUE = "";
-EOF
- fi
-
- if test "$fast_install" = yes; then
- cat <<EOF
-const char * TARGET_PROGRAM_NAME = "lt-$outputname"; /* hopefully, no .exe */
-EOF
- else
- cat <<EOF
-const char * TARGET_PROGRAM_NAME = "$outputname"; /* hopefully, no .exe */
-EOF
- fi
-
-
- cat <<"EOF"
-
-#define LTWRAPPER_OPTION_PREFIX "--lt-"
-
-static const char *ltwrapper_option_prefix = LTWRAPPER_OPTION_PREFIX;
-static const char *dumpscript_opt = LTWRAPPER_OPTION_PREFIX "dump-script";
-static const char *debug_opt = LTWRAPPER_OPTION_PREFIX "debug";
-
-int
-main (int argc, char *argv[])
-{
- char **newargz;
- int newargc;
- char *tmp_pathspec;
- char *actual_cwrapper_path;
- char *actual_cwrapper_name;
- char *target_name;
- char *lt_argv_zero;
- intptr_t rval = 127;
-
- int i;
-
- program_name = (char *) xstrdup (base_name (argv[0]));
- newargz = XMALLOC (char *, argc + 1);
-
- /* very simple arg parsing; don't want to rely on getopt
- * also, copy all non cwrapper options to newargz, except
- * argz[0], which is handled differently
- */
- newargc=0;
- for (i = 1; i < argc; i++)
- {
- if (strcmp (argv[i], dumpscript_opt) == 0)
- {
-EOF
- case "$host" in
- *mingw* | *cygwin* )
- # make stdout use "unix" line endings
- echo " setmode(1,_O_BINARY);"
- ;;
- esac
-
- cat <<"EOF"
- lt_dump_script (stdout);
- return 0;
- }
- if (strcmp (argv[i], debug_opt) == 0)
- {
- lt_debug = 1;
- continue;
- }
- if (strcmp (argv[i], ltwrapper_option_prefix) == 0)
- {
- /* however, if there is an option in the LTWRAPPER_OPTION_PREFIX
- namespace, but it is not one of the ones we know about and
- have already dealt with, above (inluding dump-script), then
- report an error. Otherwise, targets might begin to believe
- they are allowed to use options in the LTWRAPPER_OPTION_PREFIX
- namespace. The first time any user complains about this, we'll
- need to make LTWRAPPER_OPTION_PREFIX a configure-time option
- or a configure.ac-settable value.
- */
- lt_fatal (__FILE__, __LINE__,
- "unrecognized %s option: '%s'",
- ltwrapper_option_prefix, argv[i]);
- }
- /* otherwise ... */
- newargz[++newargc] = xstrdup (argv[i]);
- }
- newargz[++newargc] = NULL;
-
-EOF
- cat <<EOF
- /* The GNU banner must be the first non-error debug message */
- lt_debugprintf (__FILE__, __LINE__, "libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\n");
-EOF
- cat <<"EOF"
- lt_debugprintf (__FILE__, __LINE__, "(main) argv[0]: %s\n", argv[0]);
- lt_debugprintf (__FILE__, __LINE__, "(main) program_name: %s\n", program_name);
-
- tmp_pathspec = find_executable (argv[0]);
- if (tmp_pathspec == NULL)
- lt_fatal (__FILE__, __LINE__, "couldn't find %s", argv[0]);
- lt_debugprintf (__FILE__, __LINE__,
- "(main) found exe (before symlink chase) at: %s\n",
- tmp_pathspec);
-
- actual_cwrapper_path = chase_symlinks (tmp_pathspec);
- lt_debugprintf (__FILE__, __LINE__,
- "(main) found exe (after symlink chase) at: %s\n",
- actual_cwrapper_path);
- XFREE (tmp_pathspec);
-
- actual_cwrapper_name = xstrdup (base_name (actual_cwrapper_path));
- strendzap (actual_cwrapper_path, actual_cwrapper_name);
-
- /* wrapper name transforms */
- strendzap (actual_cwrapper_name, ".exe");
- tmp_pathspec = lt_extend_str (actual_cwrapper_name, ".exe", 1);
- XFREE (actual_cwrapper_name);
- actual_cwrapper_name = tmp_pathspec;
- tmp_pathspec = 0;
-
- /* target_name transforms -- use actual target program name; might have lt- prefix */
- target_name = xstrdup (base_name (TARGET_PROGRAM_NAME));
- strendzap (target_name, ".exe");
- tmp_pathspec = lt_extend_str (target_name, ".exe", 1);
- XFREE (target_name);
- target_name = tmp_pathspec;
- tmp_pathspec = 0;
-
- lt_debugprintf (__FILE__, __LINE__,
- "(main) libtool target name: %s\n",
- target_name);
-EOF
-
- cat <<EOF
- newargz[0] =
- XMALLOC (char, (strlen (actual_cwrapper_path) +
- strlen ("$objdir") + 1 + strlen (actual_cwrapper_name) + 1));
- strcpy (newargz[0], actual_cwrapper_path);
- strcat (newargz[0], "$objdir");
- strcat (newargz[0], "/");
-EOF
-
- cat <<"EOF"
- /* stop here, and copy so we don't have to do this twice */
- tmp_pathspec = xstrdup (newargz[0]);
-
- /* do NOT want the lt- prefix here, so use actual_cwrapper_name */
- strcat (newargz[0], actual_cwrapper_name);
-
- /* DO want the lt- prefix here if it exists, so use target_name */
- lt_argv_zero = lt_extend_str (tmp_pathspec, target_name, 1);
- XFREE (tmp_pathspec);
- tmp_pathspec = NULL;
-EOF
-
- case $host_os in
- mingw*)
- cat <<"EOF"
- {
- char* p;
- while ((p = strchr (newargz[0], '\\')) != NULL)
- {
- *p = '/';
- }
- while ((p = strchr (lt_argv_zero, '\\')) != NULL)
- {
- *p = '/';
- }
- }
-EOF
- ;;
- esac
-
- cat <<"EOF"
- XFREE (target_name);
- XFREE (actual_cwrapper_path);
- XFREE (actual_cwrapper_name);
-
- lt_setenv ("BIN_SH", "xpg4"); /* for Tru64 */
- lt_setenv ("DUALCASE", "1"); /* for MSK sh */
- /* Update the DLL searchpath. EXE_PATH_VALUE ($dllsearchpath) must
- be prepended before (that is, appear after) LIB_PATH_VALUE ($temp_rpath)
- because on Windows, both *_VARNAMEs are PATH but uninstalled
- libraries must come first. */
- lt_update_exe_path (EXE_PATH_VARNAME, EXE_PATH_VALUE);
- lt_update_lib_path (LIB_PATH_VARNAME, LIB_PATH_VALUE);
-
- lt_debugprintf (__FILE__, __LINE__, "(main) lt_argv_zero: %s\n",
- nonnull (lt_argv_zero));
- for (i = 0; i < newargc; i++)
- {
- lt_debugprintf (__FILE__, __LINE__, "(main) newargz[%d]: %s\n",
- i, nonnull (newargz[i]));
- }
-
-EOF
-
- case $host_os in
- mingw*)
- cat <<"EOF"
- /* execv doesn't actually work on mingw as expected on unix */
- newargz = prepare_spawn (newargz);
- rval = _spawnv (_P_WAIT, lt_argv_zero, (const char * const *) newargz);
- if (rval == -1)
- {
- /* failed to start process */
- lt_debugprintf (__FILE__, __LINE__,
- "(main) failed to launch target \"%s\": %s\n",
- lt_argv_zero, nonnull (strerror (errno)));
- return 127;
- }
- return rval;
-EOF
- ;;
- *)
- cat <<"EOF"
- execv (lt_argv_zero, newargz);
- return rval; /* =127, but avoids unused variable warning */
-EOF
- ;;
- esac
-
- cat <<"EOF"
-}
-
-void *
-xmalloc (size_t num)
-{
- void *p = (void *) malloc (num);
- if (!p)
- lt_fatal (__FILE__, __LINE__, "memory exhausted");
-
- return p;
-}
-
-char *
-xstrdup (const char *string)
-{
- return string ? strcpy ((char *) xmalloc (strlen (string) + 1),
- string) : NULL;
-}
-
-const char *
-base_name (const char *name)
-{
- const char *base;
-
-#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
- /* Skip over the disk name in MSDOS pathnames. */
- if (isalpha ((unsigned char) name[0]) && name[1] == ':')
- name += 2;
-#endif
-
- for (base = name; *name; name++)
- if (IS_DIR_SEPARATOR (*name))
- base = name + 1;
- return base;
-}
-
-int
-check_executable (const char *path)
-{
- struct stat st;
-
- lt_debugprintf (__FILE__, __LINE__, "(check_executable): %s\n",
- nonempty (path));
- if ((!path) || (!*path))
- return 0;
-
- if ((stat (path, &st) >= 0)
- && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)))
- return 1;
- else
- return 0;
-}
-
-int
-make_executable (const char *path)
-{
- int rval = 0;
- struct stat st;
-
- lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n",
- nonempty (path));
- if ((!path) || (!*path))
- return 0;
-
- if (stat (path, &st) >= 0)
- {
- rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR);
- }
- return rval;
-}
-
-/* Searches for the full path of the wrapper. Returns
- newly allocated full path name if found, NULL otherwise
- Does not chase symlinks, even on platforms that support them.
-*/
-char *
-find_executable (const char *wrapper)
-{
- int has_slash = 0;
- const char *p;
- const char *p_next;
- /* static buffer for getcwd */
- char tmp[LT_PATHMAX + 1];
- int tmp_len;
- char *concat_name;
-
- lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n",
- nonempty (wrapper));
-
- if ((wrapper == NULL) || (*wrapper == '\0'))
- return NULL;
-
- /* Absolute path? */
-#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
- if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':')
- {
- concat_name = xstrdup (wrapper);
- if (check_executable (concat_name))
- return concat_name;
- XFREE (concat_name);
- }
- else
- {
-#endif
- if (IS_DIR_SEPARATOR (wrapper[0]))
- {
- concat_name = xstrdup (wrapper);
- if (check_executable (concat_name))
- return concat_name;
- XFREE (concat_name);
- }
-#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
- }
-#endif
-
- for (p = wrapper; *p; p++)
- if (*p == '/')
- {
- has_slash = 1;
- break;
- }
- if (!has_slash)
- {
- /* no slashes; search PATH */
- const char *path = getenv ("PATH");
- if (path != NULL)
- {
- for (p = path; *p; p = p_next)
- {
- const char *q;
- size_t p_len;
- for (q = p; *q; q++)
- if (IS_PATH_SEPARATOR (*q))
- break;
- p_len = q - p;
- p_next = (*q == '\0' ? q : q + 1);
- if (p_len == 0)
- {
- /* empty path: current directory */
- if (getcwd (tmp, LT_PATHMAX) == NULL)
- lt_fatal (__FILE__, __LINE__, "getcwd failed: %s",
- nonnull (strerror (errno)));
- tmp_len = strlen (tmp);
- concat_name =
- XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1);
- memcpy (concat_name, tmp, tmp_len);
- concat_name[tmp_len] = '/';
- strcpy (concat_name + tmp_len + 1, wrapper);
- }
- else
- {
- concat_name =
- XMALLOC (char, p_len + 1 + strlen (wrapper) + 1);
- memcpy (concat_name, p, p_len);
- concat_name[p_len] = '/';
- strcpy (concat_name + p_len + 1, wrapper);
- }
- if (check_executable (concat_name))
- return concat_name;
- XFREE (concat_name);
- }
- }
- /* not found in PATH; assume curdir */
- }
- /* Relative path | not found in path: prepend cwd */
- if (getcwd (tmp, LT_PATHMAX) == NULL)
- lt_fatal (__FILE__, __LINE__, "getcwd failed: %s",
- nonnull (strerror (errno)));
- tmp_len = strlen (tmp);
- concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1);
- memcpy (concat_name, tmp, tmp_len);
- concat_name[tmp_len] = '/';
- strcpy (concat_name + tmp_len + 1, wrapper);
-
- if (check_executable (concat_name))
- return concat_name;
- XFREE (concat_name);
- return NULL;
-}
-
-char *
-chase_symlinks (const char *pathspec)
-{
-#ifndef S_ISLNK
- return xstrdup (pathspec);
-#else
- char buf[LT_PATHMAX];
- struct stat s;
- char *tmp_pathspec = xstrdup (pathspec);
- char *p;
- int has_symlinks = 0;
- while (strlen (tmp_pathspec) && !has_symlinks)
- {
- lt_debugprintf (__FILE__, __LINE__,
- "checking path component for symlinks: %s\n",
- tmp_pathspec);
- if (lstat (tmp_pathspec, &s) == 0)
- {
- if (S_ISLNK (s.st_mode) != 0)
- {
- has_symlinks = 1;
- break;
- }
-
- /* search backwards for last DIR_SEPARATOR */
- p = tmp_pathspec + strlen (tmp_pathspec) - 1;
- while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p)))
- p--;
- if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p)))
- {
- /* no more DIR_SEPARATORS left */
- break;
- }
- *p = '\0';
- }
- else
- {
- lt_fatal (__FILE__, __LINE__,
- "error accessing file \"%s\": %s",
- tmp_pathspec, nonnull (strerror (errno)));
- }
- }
- XFREE (tmp_pathspec);
-
- if (!has_symlinks)
- {
- return xstrdup (pathspec);
- }
-
- tmp_pathspec = realpath (pathspec, buf);
- if (tmp_pathspec == 0)
- {
- lt_fatal (__FILE__, __LINE__,
- "could not follow symlinks for %s", pathspec);
- }
- return xstrdup (tmp_pathspec);
-#endif
-}
-
-char *
-strendzap (char *str, const char *pat)
-{
- size_t len, patlen;
-
- assert (str != NULL);
- assert (pat != NULL);
-
- len = strlen (str);
- patlen = strlen (pat);
-
- if (patlen <= len)
- {
- str += len - patlen;
- if (strcmp (str, pat) == 0)
- *str = '\0';
- }
- return str;
-}
-
-void
-lt_debugprintf (const char *file, int line, const char *fmt, ...)
-{
- va_list args;
- if (lt_debug)
- {
- (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line);
- va_start (args, fmt);
- (void) vfprintf (stderr, fmt, args);
- va_end (args);
- }
-}
-
-static void
-lt_error_core (int exit_status, const char *file,
- int line, const char *mode,
- const char *message, va_list ap)
-{
- fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode);
- vfprintf (stderr, message, ap);
- fprintf (stderr, ".\n");
-
- if (exit_status >= 0)
- exit (exit_status);
-}
-
-void
-lt_fatal (const char *file, int line, const char *message, ...)
-{
- va_list ap;
- va_start (ap, message);
- lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap);
- va_end (ap);
-}
-
-static const char *
-nonnull (const char *s)
-{
- return s ? s : "(null)";
-}
-
-static const char *
-nonempty (const char *s)
-{
- return (s && !*s) ? "(empty)" : nonnull (s);
-}
-
-void
-lt_setenv (const char *name, const char *value)
-{
- lt_debugprintf (__FILE__, __LINE__,
- "(lt_setenv) setting '%s' to '%s'\n",
- nonnull (name), nonnull (value));
- {
-#ifdef HAVE_SETENV
- /* always make a copy, for consistency with !HAVE_SETENV */
- char *str = xstrdup (value);
- setenv (name, str, 1);
-#else
- int len = strlen (name) + 1 + strlen (value) + 1;
- char *str = XMALLOC (char, len);
- sprintf (str, "%s=%s", name, value);
- if (putenv (str) != EXIT_SUCCESS)
- {
- XFREE (str);
- }
-#endif
- }
-}
-
-char *
-lt_extend_str (const char *orig_value, const char *add, int to_end)
-{
- char *new_value;
- if (orig_value && *orig_value)
- {
- int orig_value_len = strlen (orig_value);
- int add_len = strlen (add);
- new_value = XMALLOC (char, add_len + orig_value_len + 1);
- if (to_end)
- {
- strcpy (new_value, orig_value);
- strcpy (new_value + orig_value_len, add);
- }
- else
- {
- strcpy (new_value, add);
- strcpy (new_value + add_len, orig_value);
- }
- }
- else
- {
- new_value = xstrdup (add);
- }
- return new_value;
-}
-
-void
-lt_update_exe_path (const char *name, const char *value)
-{
- lt_debugprintf (__FILE__, __LINE__,
- "(lt_update_exe_path) modifying '%s' by prepending '%s'\n",
- nonnull (name), nonnull (value));
-
- if (name && *name && value && *value)
- {
- char *new_value = lt_extend_str (getenv (name), value, 0);
- /* some systems can't cope with a ':'-terminated path #' */
- int len = strlen (new_value);
- while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1]))
- {
- new_value[len-1] = '\0';
- }
- lt_setenv (name, new_value);
- XFREE (new_value);
- }
-}
-
-void
-lt_update_lib_path (const char *name, const char *value)
-{
- lt_debugprintf (__FILE__, __LINE__,
- "(lt_update_lib_path) modifying '%s' by prepending '%s'\n",
- nonnull (name), nonnull (value));
-
- if (name && *name && value && *value)
- {
- char *new_value = lt_extend_str (getenv (name), value, 0);
- lt_setenv (name, new_value);
- XFREE (new_value);
- }
-}
-
-EOF
- case $host_os in
- mingw*)
- cat <<"EOF"
-
-/* Prepares an argument vector before calling spawn().
- Note that spawn() does not by itself call the command interpreter
- (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") :
- ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
- GetVersionEx(&v);
- v.dwPlatformId == VER_PLATFORM_WIN32_NT;
- }) ? "cmd.exe" : "command.com").
- Instead it simply concatenates the arguments, separated by ' ', and calls
- CreateProcess(). We must quote the arguments since Win32 CreateProcess()
- interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a
- special way:
- - Space and tab are interpreted as delimiters. They are not treated as
- delimiters if they are surrounded by double quotes: "...".
- - Unescaped double quotes are removed from the input. Their only effect is
- that within double quotes, space and tab are treated like normal
- characters.
- - Backslashes not followed by double quotes are not special.
- - But 2*n+1 backslashes followed by a double quote become
- n backslashes followed by a double quote (n >= 0):
- \" -> "
- \\\" -> \"
- \\\\\" -> \\"
- */
-#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
-#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
-char **
-prepare_spawn (char **argv)
-{
- size_t argc;
- char **new_argv;
- size_t i;
-
- /* Count number of arguments. */
- for (argc = 0; argv[argc] != NULL; argc++)
- ;
-
- /* Allocate new argument vector. */
- new_argv = XMALLOC (char *, argc + 1);
-
- /* Put quoted arguments into the new argument vector. */
- for (i = 0; i < argc; i++)
- {
- const char *string = argv[i];
-
- if (string[0] == '\0')
- new_argv[i] = xstrdup ("\"\"");
- else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL)
- {
- int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL);
- size_t length;
- unsigned int backslashes;
- const char *s;
- char *quoted_string;
- char *p;
-
- length = 0;
- backslashes = 0;
- if (quote_around)
- length++;
- for (s = string; *s != '\0'; s++)
- {
- char c = *s;
- if (c == '"')
- length += backslashes + 1;
- length++;
- if (c == '\\')
- backslashes++;
- else
- backslashes = 0;
- }
- if (quote_around)
- length += backslashes + 1;
-
- quoted_string = XMALLOC (char, length + 1);
-
- p = quoted_string;
- backslashes = 0;
- if (quote_around)
- *p++ = '"';
- for (s = string; *s != '\0'; s++)
- {
- char c = *s;
- if (c == '"')
- {
- unsigned int j;
- for (j = backslashes + 1; j > 0; j--)
- *p++ = '\\';
- }
- *p++ = c;
- if (c == '\\')
- backslashes++;
- else
- backslashes = 0;
- }
- if (quote_around)
- {
- unsigned int j;
- for (j = backslashes; j > 0; j--)
- *p++ = '\\';
- *p++ = '"';
- }
- *p = '\0';
-
- new_argv[i] = quoted_string;
- }
- else
- new_argv[i] = (char *) string;
- }
- new_argv[argc] = NULL;
-
- return new_argv;
-}
-EOF
- ;;
- esac
-
- cat <<"EOF"
-void lt_dump_script (FILE* f)
-{
-EOF
- func_emit_wrapper yes |
- $SED -n -e '
-s/^\(.\{79\}\)\(..*\)/\1\
-\2/
-h
-s/\([\\"]\)/\\\1/g
-s/$/\\n/
-s/\([^\n]*\).*/ fputs ("\1", f);/p
-g
-D'
- cat <<"EOF"
-}
-EOF
-}
-# end: func_emit_cwrapperexe_src
-
-# func_win32_import_lib_p ARG
-# True if ARG is an import lib, as indicated by $file_magic_cmd
-func_win32_import_lib_p ()
-{
- $opt_debug
- case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in
- *import*) : ;;
- *) false ;;
- esac
-}
-
-# func_mode_link arg...
-func_mode_link ()
-{
- $opt_debug
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
- # It is impossible to link a dll without this setting, and
- # we shouldn't force the makefile maintainer to figure out
- # which system we are compiling for in order to pass an extra
- # flag for every libtool invocation.
- # allow_undefined=no
-
- # FIXME: Unfortunately, there are problems with the above when trying
- # to make a dll which has undefined symbols, in which case not
- # even a static library is built. For now, we need to specify
- # -no-undefined on the libtool link line when we can be certain
- # that all symbols are satisfied, otherwise we get a static library.
- allow_undefined=yes
- ;;
- *)
- allow_undefined=yes
- ;;
- esac
- libtool_args=$nonopt
- base_compile="$nonopt $@"
- compile_command=$nonopt
- finalize_command=$nonopt
-
- compile_rpath=
- finalize_rpath=
- compile_shlibpath=
- finalize_shlibpath=
- convenience=
- old_convenience=
- deplibs=
- old_deplibs=
- compiler_flags=
- linker_flags=
- dllsearchpath=
- lib_search_path=`pwd`
- inst_prefix_dir=
- new_inherited_linker_flags=
-
- avoid_version=no
- bindir=
- dlfiles=
- dlprefiles=
- dlself=no
- export_dynamic=no
- export_symbols=
- export_symbols_regex=
- generated=
- libobjs=
- ltlibs=
- module=no
- no_install=no
- objs=
- non_pic_objects=
- precious_files_regex=
- prefer_static_libs=no
- preload=no
- prev=
- prevarg=
- release=
- rpath=
- xrpath=
- perm_rpath=
- temp_rpath=
- thread_safe=no
- vinfo=
- vinfo_number=no
- weak_libs=
- single_module="${wl}-single_module"
- func_infer_tag $base_compile
-
- # We need to know -static, to get the right output filenames.
- for arg
- do
- case $arg in
- -shared)
- test "$build_libtool_libs" != yes && \
- func_fatal_configuration "can not build a shared library"
- build_old_libs=no
- break
- ;;
- -all-static | -static | -static-libtool-libs)
- case $arg in
- -all-static)
- if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
- func_warning "complete static linking is impossible in this configuration"
- fi
- if test -n "$link_static_flag"; then
- dlopen_self=$dlopen_self_static
- fi
- prefer_static_libs=yes
- ;;
- -static)
- if test -z "$pic_flag" && test -n "$link_static_flag"; then
- dlopen_self=$dlopen_self_static
- fi
- prefer_static_libs=built
- ;;
- -static-libtool-libs)
- if test -z "$pic_flag" && test -n "$link_static_flag"; then
- dlopen_self=$dlopen_self_static
- fi
- prefer_static_libs=yes
- ;;
- esac
- build_libtool_libs=no
- build_old_libs=yes
- break
- ;;
- esac
- done
-
- # See if our shared archives depend on static archives.
- test -n "$old_archive_from_new_cmds" && build_old_libs=yes
-
- # Go through the arguments, transforming them on the way.
- while test "$#" -gt 0; do
- arg="$1"
- shift
- func_quote_for_eval "$arg"
- qarg=$func_quote_for_eval_unquoted_result
- func_append libtool_args " $func_quote_for_eval_result"
-
- # If the previous option needs an argument, assign it.
- if test -n "$prev"; then
- case $prev in
- output)
- func_append compile_command " @OUTPUT@"
- func_append finalize_command " @OUTPUT@"
- ;;
- esac
-
- case $prev in
- bindir)
- bindir="$arg"
- prev=
- continue
- ;;
- dlfiles|dlprefiles)
- if test "$preload" = no; then
- # Add the symbol object into the linking commands.
- func_append compile_command " @SYMFILE@"
- func_append finalize_command " @SYMFILE@"
- preload=yes
- fi
- case $arg in
- *.la | *.lo) ;; # We handle these cases below.
- force)
- if test "$dlself" = no; then
- dlself=needless
- export_dynamic=yes
- fi
- prev=
- continue
- ;;
- self)
- if test "$prev" = dlprefiles; then
- dlself=yes
- elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
- dlself=yes
- else
- dlself=needless
- export_dynamic=yes
- fi
- prev=
- continue
- ;;
- *)
- if test "$prev" = dlfiles; then
- func_append dlfiles " $arg"
- else
- func_append dlprefiles " $arg"
- fi
- prev=
- continue
- ;;
- esac
- ;;
- expsyms)
- export_symbols="$arg"
- test -f "$arg" \
- || func_fatal_error "symbol file \`$arg' does not exist"
- prev=
- continue
- ;;
- expsyms_regex)
- export_symbols_regex="$arg"
- prev=
- continue
- ;;
- framework)
- case $host in
- *-*-darwin*)
- case "$deplibs " in
- *" $qarg.ltframework "*) ;;
- *) func_append deplibs " $qarg.ltframework" # this is fixed later
- ;;
- esac
- ;;
- esac
- prev=
- continue
- ;;
- inst_prefix)
- inst_prefix_dir="$arg"
- prev=
- continue
- ;;
- objectlist)
- if test -f "$arg"; then
- save_arg=$arg
- moreargs=
- for fil in `cat "$save_arg"`
- do
-# func_append moreargs " $fil"
- arg=$fil
- # A libtool-controlled object.
-
- # Check to see that this really is a libtool object.
- if func_lalib_unsafe_p "$arg"; then
- pic_object=
- non_pic_object=
-
- # Read the .lo file
- func_source "$arg"
-
- if test -z "$pic_object" ||
- test -z "$non_pic_object" ||
- test "$pic_object" = none &&
- test "$non_pic_object" = none; then
- func_fatal_error "cannot find name of object for \`$arg'"
- fi
-
- # Extract subdirectory from the argument.
- func_dirname "$arg" "/" ""
- xdir="$func_dirname_result"
-
- if test "$pic_object" != none; then
- # Prepend the subdirectory the object is found in.
- pic_object="$xdir$pic_object"
-
- if test "$prev" = dlfiles; then
- if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
- func_append dlfiles " $pic_object"
- prev=
- continue
- else
- # If libtool objects are unsupported, then we need to preload.
- prev=dlprefiles
- fi
- fi
-
- # CHECK ME: I think I busted this. -Ossama
- if test "$prev" = dlprefiles; then
- # Preload the old-style object.
- func_append dlprefiles " $pic_object"
- prev=
- fi
-
- # A PIC object.
- func_append libobjs " $pic_object"
- arg="$pic_object"
- fi
-
- # Non-PIC object.
- if test "$non_pic_object" != none; then
- # Prepend the subdirectory the object is found in.
- non_pic_object="$xdir$non_pic_object"
-
- # A standard non-PIC object
- func_append non_pic_objects " $non_pic_object"
- if test -z "$pic_object" || test "$pic_object" = none ; then
- arg="$non_pic_object"
- fi
- else
- # If the PIC object exists, use it instead.
- # $xdir was prepended to $pic_object above.
- non_pic_object="$pic_object"
- func_append non_pic_objects " $non_pic_object"
- fi
- else
- # Only an error if not doing a dry-run.
- if $opt_dry_run; then
- # Extract subdirectory from the argument.
- func_dirname "$arg" "/" ""
- xdir="$func_dirname_result"
-
- func_lo2o "$arg"
- pic_object=$xdir$objdir/$func_lo2o_result
- non_pic_object=$xdir$func_lo2o_result
- func_append libobjs " $pic_object"
- func_append non_pic_objects " $non_pic_object"
- else
- func_fatal_error "\`$arg' is not a valid libtool object"
- fi
- fi
- done
- else
- func_fatal_error "link input file \`$arg' does not exist"
- fi
- arg=$save_arg
- prev=
- continue
- ;;
- precious_regex)
- precious_files_regex="$arg"
- prev=
- continue
- ;;
- release)
- release="-$arg"
- prev=
- continue
- ;;
- rpath | xrpath)
- # We need an absolute path.
- case $arg in
- [\\/]* | [A-Za-z]:[\\/]*) ;;
- *)
- func_fatal_error "only absolute run-paths are allowed"
- ;;
- esac
- if test "$prev" = rpath; then
- case "$rpath " in
- *" $arg "*) ;;
- *) func_append rpath " $arg" ;;
- esac
- else
- case "$xrpath " in
- *" $arg "*) ;;
- *) func_append xrpath " $arg" ;;
- esac
- fi
- prev=
- continue
- ;;
- shrext)
- shrext_cmds="$arg"
- prev=
- continue
- ;;
- weak)
- func_append weak_libs " $arg"
- prev=
- continue
- ;;
- xcclinker)
- func_append linker_flags " $qarg"
- func_append compiler_flags " $qarg"
- prev=
- func_append compile_command " $qarg"
- func_append finalize_command " $qarg"
- continue
- ;;
- xcompiler)
- func_append compiler_flags " $qarg"
- prev=
- func_append compile_command " $qarg"
- func_append finalize_command " $qarg"
- continue
- ;;
- xlinker)
- func_append linker_flags " $qarg"
- func_append compiler_flags " $wl$qarg"
- prev=
- func_append compile_command " $wl$qarg"
- func_append finalize_command " $wl$qarg"
- continue
- ;;
- *)
- eval "$prev=\"\$arg\""
- prev=
- continue
- ;;
- esac
- fi # test -n "$prev"
-
- prevarg="$arg"
-
- case $arg in
- -all-static)
- if test -n "$link_static_flag"; then
- # See comment for -static flag below, for more details.
- func_append compile_command " $link_static_flag"
- func_append finalize_command " $link_static_flag"
- fi
- continue
- ;;
-
- -allow-undefined)
- # FIXME: remove this flag sometime in the future.
- func_fatal_error "\`-allow-undefined' must not be used because it is the default"
- ;;
-
- -avoid-version)
- avoid_version=yes
- continue
- ;;
-
- -bindir)
- prev=bindir
- continue
- ;;
-
- -dlopen)
- prev=dlfiles
- continue
- ;;
-
- -dlpreopen)
- prev=dlprefiles
- continue
- ;;
-
- -export-dynamic)
- export_dynamic=yes
- continue
- ;;
-
- -export-symbols | -export-symbols-regex)
- if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
- func_fatal_error "more than one -exported-symbols argument is not allowed"
- fi
- if test "X$arg" = "X-export-symbols"; then
- prev=expsyms
- else
- prev=expsyms_regex
- fi
- continue
- ;;
-
- -framework)
- prev=framework
- continue
- ;;
-
- -inst-prefix-dir)
- prev=inst_prefix
- continue
- ;;
-
- # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:*
- # so, if we see these flags be careful not to treat them like -L
- -L[A-Z][A-Z]*:*)
- case $with_gcc/$host in
- no/*-*-irix* | /*-*-irix*)
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- ;;
- esac
- continue
- ;;
-
- -L*)
- func_stripname "-L" '' "$arg"
- if test -z "$func_stripname_result"; then
- if test "$#" -gt 0; then
- func_fatal_error "require no space between \`-L' and \`$1'"
- else
- func_fatal_error "need path for \`-L' option"
- fi
- fi
- func_resolve_sysroot "$func_stripname_result"
- dir=$func_resolve_sysroot_result
- # We need an absolute path.
- case $dir in
- [\\/]* | [A-Za-z]:[\\/]*) ;;
- *)
- absdir=`cd "$dir" && pwd`
- test -z "$absdir" && \
- func_fatal_error "cannot determine absolute directory name of \`$dir'"
- dir="$absdir"
- ;;
- esac
- case "$deplibs " in
- *" -L$dir "* | *" $arg "*)
- # Will only happen for absolute or sysroot arguments
- ;;
- *)
- # Preserve sysroot, but never include relative directories
- case $dir in
- [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;;
- *) func_append deplibs " -L$dir" ;;
- esac
- func_append lib_search_path " $dir"
- ;;
- esac
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
- testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'`
- case :$dllsearchpath: in
- *":$dir:"*) ;;
- ::) dllsearchpath=$dir;;
- *) func_append dllsearchpath ":$dir";;
- esac
- case :$dllsearchpath: in
- *":$testbindir:"*) ;;
- ::) dllsearchpath=$testbindir;;
- *) func_append dllsearchpath ":$testbindir";;
- esac
- ;;
- esac
- continue
- ;;
-
- -l*)
- if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*)
- # These systems don't actually have a C or math library (as such)
- continue
- ;;
- *-*-os2*)
- # These systems don't actually have a C library (as such)
- test "X$arg" = "X-lc" && continue
- ;;
- *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
- # Do not include libc due to us having libc/libc_r.
- test "X$arg" = "X-lc" && continue
- ;;
- *-*-rhapsody* | *-*-darwin1.[012])
- # Rhapsody C and math libraries are in the System framework
- func_append deplibs " System.ltframework"
- continue
- ;;
- *-*-sco3.2v5* | *-*-sco5v6*)
- # Causes problems with __ctype
- test "X$arg" = "X-lc" && continue
- ;;
- *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
- # Compiler inserts libc in the correct place for threads to work
- test "X$arg" = "X-lc" && continue
- ;;
- esac
- elif test "X$arg" = "X-lc_r"; then
- case $host in
- *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
- # Do not include libc_r directly, use -pthread flag.
- continue
- ;;
- esac
- fi
- func_append deplibs " $arg"
- continue
- ;;
-
- -module)
- module=yes
- continue
- ;;
-
- # Tru64 UNIX uses -model [arg] to determine the layout of C++
- # classes, name mangling, and exception handling.
- # Darwin uses the -arch flag to determine output architecture.
- -model|-arch|-isysroot|--sysroot)
- func_append compiler_flags " $arg"
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- prev=xcompiler
- continue
- ;;
-
- -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
- |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
- func_append compiler_flags " $arg"
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- case "$new_inherited_linker_flags " in
- *" $arg "*) ;;
- * ) func_append new_inherited_linker_flags " $arg" ;;
- esac
- continue
- ;;
-
- -multi_module)
- single_module="${wl}-multi_module"
- continue
- ;;
-
- -no-fast-install)
- fast_install=no
- continue
- ;;
-
- -no-install)
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*)
- # The PATH hackery in wrapper scripts is required on Windows
- # and Darwin in order for the loader to find any dlls it needs.
- func_warning "\`-no-install' is ignored for $host"
- func_warning "assuming \`-no-fast-install' instead"
- fast_install=no
- ;;
- *) no_install=yes ;;
- esac
- continue
- ;;
-
- -no-undefined)
- allow_undefined=no
- continue
- ;;
-
- -objectlist)
- prev=objectlist
- continue
- ;;
-
- -o) prev=output ;;
-
- -precious-files-regex)
- prev=precious_regex
- continue
- ;;
-
- -release)
- prev=release
- continue
- ;;
-
- -rpath)
- prev=rpath
- continue
- ;;
-
- -R)
- prev=xrpath
- continue
- ;;
-
- -R*)
- func_stripname '-R' '' "$arg"
- dir=$func_stripname_result
- # We need an absolute path.
- case $dir in
- [\\/]* | [A-Za-z]:[\\/]*) ;;
- =*)
- func_stripname '=' '' "$dir"
- dir=$lt_sysroot$func_stripname_result
- ;;
- *)
- func_fatal_error "only absolute run-paths are allowed"
- ;;
- esac
- case "$xrpath " in
- *" $dir "*) ;;
- *) func_append xrpath " $dir" ;;
- esac
- continue
- ;;
-
- -shared)
- # The effects of -shared are defined in a previous loop.
- continue
- ;;
-
- -shrext)
- prev=shrext
- continue
- ;;
-
- -static | -static-libtool-libs)
- # The effects of -static are defined in a previous loop.
- # We used to do the same as -all-static on platforms that
- # didn't have a PIC flag, but the assumption that the effects
- # would be equivalent was wrong. It would break on at least
- # Digital Unix and AIX.
- continue
- ;;
-
- -thread-safe)
- thread_safe=yes
- continue
- ;;
-
- -version-info)
- prev=vinfo
- continue
- ;;
-
- -version-number)
- prev=vinfo
- vinfo_number=yes
- continue
- ;;
-
- -weak)
- prev=weak
- continue
- ;;
-
- -Wc,*)
- func_stripname '-Wc,' '' "$arg"
- args=$func_stripname_result
- arg=
- save_ifs="$IFS"; IFS=','
- for flag in $args; do
- IFS="$save_ifs"
- func_quote_for_eval "$flag"
- func_append arg " $func_quote_for_eval_result"
- func_append compiler_flags " $func_quote_for_eval_result"
- done
- IFS="$save_ifs"
- func_stripname ' ' '' "$arg"
- arg=$func_stripname_result
- ;;
-
- -Wl,*)
- func_stripname '-Wl,' '' "$arg"
- args=$func_stripname_result
- arg=
- save_ifs="$IFS"; IFS=','
- for flag in $args; do
- IFS="$save_ifs"
- func_quote_for_eval "$flag"
- func_append arg " $wl$func_quote_for_eval_result"
- func_append compiler_flags " $wl$func_quote_for_eval_result"
- func_append linker_flags " $func_quote_for_eval_result"
- done
- IFS="$save_ifs"
- func_stripname ' ' '' "$arg"
- arg=$func_stripname_result
- ;;
-
- -Xcompiler)
- prev=xcompiler
- continue
- ;;
-
- -Xlinker)
- prev=xlinker
- continue
- ;;
-
- -XCClinker)
- prev=xcclinker
- continue
- ;;
-
- # -msg_* for osf cc
- -msg_*)
- func_quote_for_eval "$arg"
- arg="$func_quote_for_eval_result"
- ;;
-
- # Flags to be passed through unchanged, with rationale:
- # -64, -mips[0-9] enable 64-bit mode for the SGI compiler
- # -r[0-9][0-9]* specify processor for the SGI compiler
- # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler
- # +DA*, +DD* enable 64-bit mode for the HP compiler
- # -q* compiler args for the IBM compiler
- # -m*, -t[45]*, -txscale* architecture-specific flags for GCC
- # -F/path path to uninstalled frameworks, gcc on darwin
- # -p, -pg, --coverage, -fprofile-* profiling flags for GCC
- # @file GCC response files
- # -tp=* Portland pgcc target processor selection
- # --sysroot=* for sysroot support
- # -O*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization
- -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \
- -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \
- -O*|-flto*|-fwhopr*|-fuse-linker-plugin)
- func_quote_for_eval "$arg"
- arg="$func_quote_for_eval_result"
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- func_append compiler_flags " $arg"
- continue
- ;;
-
- # Some other compiler flag.
- -* | +*)
- func_quote_for_eval "$arg"
- arg="$func_quote_for_eval_result"
- ;;
-
- *.$objext)
- # A standard object.
- func_append objs " $arg"
- ;;
-
- *.lo)
- # A libtool-controlled object.
-
- # Check to see that this really is a libtool object.
- if func_lalib_unsafe_p "$arg"; then
- pic_object=
- non_pic_object=
-
- # Read the .lo file
- func_source "$arg"
-
- if test -z "$pic_object" ||
- test -z "$non_pic_object" ||
- test "$pic_object" = none &&
- test "$non_pic_object" = none; then
- func_fatal_error "cannot find name of object for \`$arg'"
- fi
-
- # Extract subdirectory from the argument.
- func_dirname "$arg" "/" ""
- xdir="$func_dirname_result"
-
- if test "$pic_object" != none; then
- # Prepend the subdirectory the object is found in.
- pic_object="$xdir$pic_object"
-
- if test "$prev" = dlfiles; then
- if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
- func_append dlfiles " $pic_object"
- prev=
- continue
- else
- # If libtool objects are unsupported, then we need to preload.
- prev=dlprefiles
- fi
- fi
-
- # CHECK ME: I think I busted this. -Ossama
- if test "$prev" = dlprefiles; then
- # Preload the old-style object.
- func_append dlprefiles " $pic_object"
- prev=
- fi
-
- # A PIC object.
- func_append libobjs " $pic_object"
- arg="$pic_object"
- fi
-
- # Non-PIC object.
- if test "$non_pic_object" != none; then
- # Prepend the subdirectory the object is found in.
- non_pic_object="$xdir$non_pic_object"
-
- # A standard non-PIC object
- func_append non_pic_objects " $non_pic_object"
- if test -z "$pic_object" || test "$pic_object" = none ; then
- arg="$non_pic_object"
- fi
- else
- # If the PIC object exists, use it instead.
- # $xdir was prepended to $pic_object above.
- non_pic_object="$pic_object"
- func_append non_pic_objects " $non_pic_object"
- fi
- else
- # Only an error if not doing a dry-run.
- if $opt_dry_run; then
- # Extract subdirectory from the argument.
- func_dirname "$arg" "/" ""
- xdir="$func_dirname_result"
-
- func_lo2o "$arg"
- pic_object=$xdir$objdir/$func_lo2o_result
- non_pic_object=$xdir$func_lo2o_result
- func_append libobjs " $pic_object"
- func_append non_pic_objects " $non_pic_object"
- else
- func_fatal_error "\`$arg' is not a valid libtool object"
- fi
- fi
- ;;
-
- *.$libext)
- # An archive.
- func_append deplibs " $arg"
- func_append old_deplibs " $arg"
- continue
- ;;
-
- *.la)
- # A libtool-controlled library.
-
- func_resolve_sysroot "$arg"
- if test "$prev" = dlfiles; then
- # This library was specified with -dlopen.
- func_append dlfiles " $func_resolve_sysroot_result"
- prev=
- elif test "$prev" = dlprefiles; then
- # The library was specified with -dlpreopen.
- func_append dlprefiles " $func_resolve_sysroot_result"
- prev=
- else
- func_append deplibs " $func_resolve_sysroot_result"
- fi
- continue
- ;;
-
- # Some other compiler argument.
- *)
- # Unknown arguments in both finalize_command and compile_command need
- # to be aesthetically quoted because they are evaled later.
- func_quote_for_eval "$arg"
- arg="$func_quote_for_eval_result"
- ;;
- esac # arg
-
- # Now actually substitute the argument into the commands.
- if test -n "$arg"; then
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- fi
- done # argument parsing loop
-
- test -n "$prev" && \
- func_fatal_help "the \`$prevarg' option requires an argument"
-
- if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
- eval arg=\"$export_dynamic_flag_spec\"
- func_append compile_command " $arg"
- func_append finalize_command " $arg"
- fi
-
- oldlibs=
- # calculate the name of the file, without its directory
- func_basename "$output"
- outputname="$func_basename_result"
- libobjs_save="$libobjs"
-
- if test -n "$shlibpath_var"; then
- # get the directories listed in $shlibpath_var
- eval shlib_search_path=\`\$ECHO \"\${$shlibpath_var}\" \| \$SED \'s/:/ /g\'\`
- else
- shlib_search_path=
- fi
- eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
- eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
-
- func_dirname "$output" "/" ""
- output_objdir="$func_dirname_result$objdir"
- func_to_tool_file "$output_objdir/"
- tool_output_objdir=$func_to_tool_file_result
- # Create the object directory.
- func_mkdir_p "$output_objdir"
-
- # Determine the type of output
- case $output in
- "")
- func_fatal_help "you must specify an output file"
- ;;
- *.$libext) linkmode=oldlib ;;
- *.lo | *.$objext) linkmode=obj ;;
- *.la) linkmode=lib ;;
- *) linkmode=prog ;; # Anything else should be a program.
- esac
-
- specialdeplibs=
-
- libs=
- # Find all interdependent deplibs by searching for libraries
- # that are linked more than once (e.g. -la -lb -la)
- for deplib in $deplibs; do
- if $opt_preserve_dup_deps ; then
- case "$libs " in
- *" $deplib "*) func_append specialdeplibs " $deplib" ;;
- esac
- fi
- func_append libs " $deplib"
- done
-
- if test "$linkmode" = lib; then
- libs="$predeps $libs $compiler_lib_search_path $postdeps"
-
- # Compute libraries that are listed more than once in $predeps
- # $postdeps and mark them as special (i.e., whose duplicates are
- # not to be eliminated).
- pre_post_deps=
- if $opt_duplicate_compiler_generated_deps; then
- for pre_post_dep in $predeps $postdeps; do
- case "$pre_post_deps " in
- *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;;
- esac
- func_append pre_post_deps " $pre_post_dep"
- done
- fi
- pre_post_deps=
- fi
-
- deplibs=
- newdependency_libs=
- newlib_search_path=
- need_relink=no # whether we're linking any uninstalled libtool libraries
- notinst_deplibs= # not-installed libtool libraries
- notinst_path= # paths that contain not-installed libtool libraries
-
- case $linkmode in
- lib)
- passes="conv dlpreopen link"
- for file in $dlfiles $dlprefiles; do
- case $file in
- *.la) ;;
- *)
- func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file"
- ;;
- esac
- done
- ;;
- prog)
- compile_deplibs=
- finalize_deplibs=
- alldeplibs=no
- newdlfiles=
- newdlprefiles=
- passes="conv scan dlopen dlpreopen link"
- ;;
- *) passes="conv"
- ;;
- esac
-
- for pass in $passes; do
- # The preopen pass in lib mode reverses $deplibs; put it back here
- # so that -L comes before libs that need it for instance...
- if test "$linkmode,$pass" = "lib,link"; then
- ## FIXME: Find the place where the list is rebuilt in the wrong
- ## order, and fix it there properly
- tmp_deplibs=
- for deplib in $deplibs; do
- tmp_deplibs="$deplib $tmp_deplibs"
- done
- deplibs="$tmp_deplibs"
- fi
-
- if test "$linkmode,$pass" = "lib,link" ||
- test "$linkmode,$pass" = "prog,scan"; then
- libs="$deplibs"
- deplibs=
- fi
- if test "$linkmode" = prog; then
- case $pass in
- dlopen) libs="$dlfiles" ;;
- dlpreopen) libs="$dlprefiles" ;;
- link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
- esac
- fi
- if test "$linkmode,$pass" = "lib,dlpreopen"; then
- # Collect and forward deplibs of preopened libtool libs
- for lib in $dlprefiles; do
- # Ignore non-libtool-libs
- dependency_libs=
- func_resolve_sysroot "$lib"
- case $lib in
- *.la) func_source "$func_resolve_sysroot_result" ;;
- esac
-
- # Collect preopened libtool deplibs, except any this library
- # has declared as weak libs
- for deplib in $dependency_libs; do
- func_basename "$deplib"
- deplib_base=$func_basename_result
- case " $weak_libs " in
- *" $deplib_base "*) ;;
- *) func_append deplibs " $deplib" ;;
- esac
- done
- done
- libs="$dlprefiles"
- fi
- if test "$pass" = dlopen; then
- # Collect dlpreopened libraries
- save_deplibs="$deplibs"
- deplibs=
- fi
-
- for deplib in $libs; do
- lib=
- found=no
- case $deplib in
- -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
- |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
- if test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- func_append compiler_flags " $deplib"
- if test "$linkmode" = lib ; then
- case "$new_inherited_linker_flags " in
- *" $deplib "*) ;;
- * ) func_append new_inherited_linker_flags " $deplib" ;;
- esac
- fi
- fi
- continue
- ;;
- -l*)
- if test "$linkmode" != lib && test "$linkmode" != prog; then
- func_warning "\`-l' is ignored for archives/objects"
- continue
- fi
- func_stripname '-l' '' "$deplib"
- name=$func_stripname_result
- if test "$linkmode" = lib; then
- searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path"
- else
- searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path"
- fi
- for searchdir in $searchdirs; do
- for search_ext in .la $std_shrext .so .a; do
- # Search the libtool library
- lib="$searchdir/lib${name}${search_ext}"
- if test -f "$lib"; then
- if test "$search_ext" = ".la"; then
- found=yes
- else
- found=no
- fi
- break 2
- fi
- done
- done
- if test "$found" != yes; then
- # deplib doesn't seem to be a libtool library
- if test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- deplibs="$deplib $deplibs"
- test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
- fi
- continue
- else # deplib is a libtool library
- # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib,
- # We need to do some special things here, and not later.
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $deplib "*)
- if func_lalib_p "$lib"; then
- library_names=
- old_library=
- func_source "$lib"
- for l in $old_library $library_names; do
- ll="$l"
- done
- if test "X$ll" = "X$old_library" ; then # only static version available
- found=no
- func_dirname "$lib" "" "."
- ladir="$func_dirname_result"
- lib=$ladir/$old_library
- if test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- deplibs="$deplib $deplibs"
- test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
- fi
- continue
- fi
- fi
- ;;
- *) ;;
- esac
- fi
- fi
- ;; # -l
- *.ltframework)
- if test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- deplibs="$deplib $deplibs"
- if test "$linkmode" = lib ; then
- case "$new_inherited_linker_flags " in
- *" $deplib "*) ;;
- * ) func_append new_inherited_linker_flags " $deplib" ;;
- esac
- fi
- fi
- continue
- ;;
- -L*)
- case $linkmode in
- lib)
- deplibs="$deplib $deplibs"
- test "$pass" = conv && continue
- newdependency_libs="$deplib $newdependency_libs"
- func_stripname '-L' '' "$deplib"
- func_resolve_sysroot "$func_stripname_result"
- func_append newlib_search_path " $func_resolve_sysroot_result"
- ;;
- prog)
- if test "$pass" = conv; then
- deplibs="$deplib $deplibs"
- continue
- fi
- if test "$pass" = scan; then
- deplibs="$deplib $deplibs"
- else
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- fi
- func_stripname '-L' '' "$deplib"
- func_resolve_sysroot "$func_stripname_result"
- func_append newlib_search_path " $func_resolve_sysroot_result"
- ;;
- *)
- func_warning "\`-L' is ignored for archives/objects"
- ;;
- esac # linkmode
- continue
- ;; # -L
- -R*)
- if test "$pass" = link; then
- func_stripname '-R' '' "$deplib"
- func_resolve_sysroot "$func_stripname_result"
- dir=$func_resolve_sysroot_result
- # Make sure the xrpath contains only unique directories.
- case "$xrpath " in
- *" $dir "*) ;;
- *) func_append xrpath " $dir" ;;
- esac
- fi
- deplibs="$deplib $deplibs"
- continue
- ;;
- *.la)
- func_resolve_sysroot "$deplib"
- lib=$func_resolve_sysroot_result
- ;;
- *.$libext)
- if test "$pass" = conv; then
- deplibs="$deplib $deplibs"
- continue
- fi
- case $linkmode in
- lib)
- # Linking convenience modules into shared libraries is allowed,
- # but linking other static libraries is non-portable.
- case " $dlpreconveniencelibs " in
- *" $deplib "*) ;;
- *)
- valid_a_lib=no
- case $deplibs_check_method in
- match_pattern*)
- set dummy $deplibs_check_method; shift
- match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
- if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \
- | $EGREP "$match_pattern_regex" > /dev/null; then
- valid_a_lib=yes
- fi
- ;;
- pass_all)
- valid_a_lib=yes
- ;;
- esac
- if test "$valid_a_lib" != yes; then
- echo
- $ECHO "*** Warning: Trying to link with static lib archive $deplib."
- echo "*** I have the capability to make that library automatically link in when"
- echo "*** you link to this library. But I can only do this if you have a"
- echo "*** shared version of the library, which you do not appear to have"
- echo "*** because the file extensions .$libext of this argument makes me believe"
- echo "*** that it is just a static archive that I should not use here."
- else
- echo
- $ECHO "*** Warning: Linking the shared library $output against the"
- $ECHO "*** static library $deplib is not portable!"
- deplibs="$deplib $deplibs"
- fi
- ;;
- esac
- continue
- ;;
- prog)
- if test "$pass" != link; then
- deplibs="$deplib $deplibs"
- else
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- fi
- continue
- ;;
- esac # linkmode
- ;; # *.$libext
- *.lo | *.$objext)
- if test "$pass" = conv; then
- deplibs="$deplib $deplibs"
- elif test "$linkmode" = prog; then
- if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
- # If there is no dlopen support or we're linking statically,
- # we need to preload.
- func_append newdlprefiles " $deplib"
- compile_deplibs="$deplib $compile_deplibs"
- finalize_deplibs="$deplib $finalize_deplibs"
- else
- func_append newdlfiles " $deplib"
- fi
- fi
- continue
- ;;
- %DEPLIBS%)
- alldeplibs=yes
- continue
- ;;
- esac # case $deplib
-
- if test "$found" = yes || test -f "$lib"; then :
- else
- func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'"
- fi
-
- # Check to see that this really is a libtool archive.
- func_lalib_unsafe_p "$lib" \
- || func_fatal_error "\`$lib' is not a valid libtool archive"
-
- func_dirname "$lib" "" "."
- ladir="$func_dirname_result"
-
- dlname=
- dlopen=
- dlpreopen=
- libdir=
- library_names=
- old_library=
- inherited_linker_flags=
- # If the library was installed with an old release of libtool,
- # it will not redefine variables installed, or shouldnotlink
- installed=yes
- shouldnotlink=no
- avoidtemprpath=
-
-
- # Read the .la file
- func_source "$lib"
-
- # Convert "-framework foo" to "foo.ltframework"
- if test -n "$inherited_linker_flags"; then
- tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'`
- for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do
- case " $new_inherited_linker_flags " in
- *" $tmp_inherited_linker_flag "*) ;;
- *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";;
- esac
- done
- fi
- dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
- if test "$linkmode,$pass" = "lib,link" ||
- test "$linkmode,$pass" = "prog,scan" ||
- { test "$linkmode" != prog && test "$linkmode" != lib; }; then
- test -n "$dlopen" && func_append dlfiles " $dlopen"
- test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen"
- fi
-
- if test "$pass" = conv; then
- # Only check for convenience libraries
- deplibs="$lib $deplibs"
- if test -z "$libdir"; then
- if test -z "$old_library"; then
- func_fatal_error "cannot find name of link library for \`$lib'"
- fi
- # It is a libtool convenience library, so add in its objects.
- func_append convenience " $ladir/$objdir/$old_library"
- func_append old_convenience " $ladir/$objdir/$old_library"
- elif test "$linkmode" != prog && test "$linkmode" != lib; then
- func_fatal_error "\`$lib' is not a convenience library"
- fi
- tmp_libs=
- for deplib in $dependency_libs; do
- deplibs="$deplib $deplibs"
- if $opt_preserve_dup_deps ; then
- case "$tmp_libs " in
- *" $deplib "*) func_append specialdeplibs " $deplib" ;;
- esac
- fi
- func_append tmp_libs " $deplib"
- done
- continue
- fi # $pass = conv
-
-
- # Get the name of the library we link against.
- linklib=
- if test -n "$old_library" &&
- { test "$prefer_static_libs" = yes ||
- test "$prefer_static_libs,$installed" = "built,no"; }; then
- linklib=$old_library
- else
- for l in $old_library $library_names; do
- linklib="$l"
- done
- fi
- if test -z "$linklib"; then
- func_fatal_error "cannot find name of link library for \`$lib'"
- fi
-
- # This library was specified with -dlopen.
- if test "$pass" = dlopen; then
- if test -z "$libdir"; then
- func_fatal_error "cannot -dlopen a convenience library: \`$lib'"
- fi
- if test -z "$dlname" ||
- test "$dlopen_support" != yes ||
- test "$build_libtool_libs" = no; then
- # If there is no dlname, no dlopen support or we're linking
- # statically, we need to preload. We also need to preload any
- # dependent libraries so libltdl's deplib preloader doesn't
- # bomb out in the load deplibs phase.
- func_append dlprefiles " $lib $dependency_libs"
- else
- func_append newdlfiles " $lib"
- fi
- continue
- fi # $pass = dlopen
-
- # We need an absolute path.
- case $ladir in
- [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
- *)
- abs_ladir=`cd "$ladir" && pwd`
- if test -z "$abs_ladir"; then
- func_warning "cannot determine absolute directory name of \`$ladir'"
- func_warning "passing it literally to the linker, although it might fail"
- abs_ladir="$ladir"
- fi
- ;;
- esac
- func_basename "$lib"
- laname="$func_basename_result"
-
- # Find the relevant object directory and library name.
- if test "X$installed" = Xyes; then
- if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
- func_warning "library \`$lib' was moved."
- dir="$ladir"
- absdir="$abs_ladir"
- libdir="$abs_ladir"
- else
- dir="$lt_sysroot$libdir"
- absdir="$lt_sysroot$libdir"
- fi
- test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes
- else
- if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then
- dir="$ladir"
- absdir="$abs_ladir"
- # Remove this search path later
- func_append notinst_path " $abs_ladir"
- else
- dir="$ladir/$objdir"
- absdir="$abs_ladir/$objdir"
- # Remove this search path later
- func_append notinst_path " $abs_ladir"
- fi
- fi # $installed = yes
- func_stripname 'lib' '.la' "$laname"
- name=$func_stripname_result
-
- # This library was specified with -dlpreopen.
- if test "$pass" = dlpreopen; then
- if test -z "$libdir" && test "$linkmode" = prog; then
- func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'"
- fi
- case "$host" in
- # special handling for platforms with PE-DLLs.
- *cygwin* | *mingw* | *cegcc* )
- # Linker will automatically link against shared library if both
- # static and shared are present. Therefore, ensure we extract
- # symbols from the import library if a shared library is present
- # (otherwise, the dlopen module name will be incorrect). We do
- # this by putting the import library name into $newdlprefiles.
- # We recover the dlopen module name by 'saving' the la file
- # name in a special purpose variable, and (later) extracting the
- # dlname from the la file.
- if test -n "$dlname"; then
- func_tr_sh "$dir/$linklib"
- eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname"
- func_append newdlprefiles " $dir/$linklib"
- else
- func_append newdlprefiles " $dir/$old_library"
- # Keep a list of preopened convenience libraries to check
- # that they are being used correctly in the link pass.
- test -z "$libdir" && \
- func_append dlpreconveniencelibs " $dir/$old_library"
- fi
- ;;
- * )
- # Prefer using a static library (so that no silly _DYNAMIC symbols
- # are required to link).
- if test -n "$old_library"; then
- func_append newdlprefiles " $dir/$old_library"
- # Keep a list of preopened convenience libraries to check
- # that they are being used correctly in the link pass.
- test -z "$libdir" && \
- func_append dlpreconveniencelibs " $dir/$old_library"
- # Otherwise, use the dlname, so that lt_dlopen finds it.
- elif test -n "$dlname"; then
- func_append newdlprefiles " $dir/$dlname"
- else
- func_append newdlprefiles " $dir/$linklib"
- fi
- ;;
- esac
- fi # $pass = dlpreopen
-
- if test -z "$libdir"; then
- # Link the convenience library
- if test "$linkmode" = lib; then
- deplibs="$dir/$old_library $deplibs"
- elif test "$linkmode,$pass" = "prog,link"; then
- compile_deplibs="$dir/$old_library $compile_deplibs"
- finalize_deplibs="$dir/$old_library $finalize_deplibs"
- else
- deplibs="$lib $deplibs" # used for prog,scan pass
- fi
- continue
- fi
-
-
- if test "$linkmode" = prog && test "$pass" != link; then
- func_append newlib_search_path " $ladir"
- deplibs="$lib $deplibs"
-
- linkalldeplibs=no
- if test "$link_all_deplibs" != no || test -z "$library_names" ||
- test "$build_libtool_libs" = no; then
- linkalldeplibs=yes
- fi
-
- tmp_libs=
- for deplib in $dependency_libs; do
- case $deplib in
- -L*) func_stripname '-L' '' "$deplib"
- func_resolve_sysroot "$func_stripname_result"
- func_append newlib_search_path " $func_resolve_sysroot_result"
- ;;
- esac
- # Need to link against all dependency_libs?
- if test "$linkalldeplibs" = yes; then
- deplibs="$deplib $deplibs"
- else
- # Need to hardcode shared library paths
- # or/and link against static libraries
- newdependency_libs="$deplib $newdependency_libs"
- fi
- if $opt_preserve_dup_deps ; then
- case "$tmp_libs " in
- *" $deplib "*) func_append specialdeplibs " $deplib" ;;
- esac
- fi
- func_append tmp_libs " $deplib"
- done # for deplib
- continue
- fi # $linkmode = prog...
-
- if test "$linkmode,$pass" = "prog,link"; then
- if test -n "$library_names" &&
- { { test "$prefer_static_libs" = no ||
- test "$prefer_static_libs,$installed" = "built,yes"; } ||
- test -z "$old_library"; }; then
- # We need to hardcode the library path
- if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then
- # Make sure the rpath contains only unique directories.
- case "$temp_rpath:" in
- *"$absdir:"*) ;;
- *) func_append temp_rpath "$absdir:" ;;
- esac
- fi
-
- # Hardcode the library path.
- # Skip directories that are in the system default run-time
- # search path.
- case " $sys_lib_dlsearch_path " in
- *" $absdir "*) ;;
- *)
- case "$compile_rpath " in
- *" $absdir "*) ;;
- *) func_append compile_rpath " $absdir" ;;
- esac
- ;;
- esac
- case " $sys_lib_dlsearch_path " in
- *" $libdir "*) ;;
- *)
- case "$finalize_rpath " in
- *" $libdir "*) ;;
- *) func_append finalize_rpath " $libdir" ;;
- esac
- ;;
- esac
- fi # $linkmode,$pass = prog,link...
-
- if test "$alldeplibs" = yes &&
- { test "$deplibs_check_method" = pass_all ||
- { test "$build_libtool_libs" = yes &&
- test -n "$library_names"; }; }; then
- # We only need to search for static libraries
- continue
- fi
- fi
-
- link_static=no # Whether the deplib will be linked statically
- use_static_libs=$prefer_static_libs
- if test "$use_static_libs" = built && test "$installed" = yes; then
- use_static_libs=no
- fi
- if test -n "$library_names" &&
- { test "$use_static_libs" = no || test -z "$old_library"; }; then
- case $host in
- *cygwin* | *mingw* | *cegcc*)
- # No point in relinking DLLs because paths are not encoded
- func_append notinst_deplibs " $lib"
- need_relink=no
- ;;
- *)
- if test "$installed" = no; then
- func_append notinst_deplibs " $lib"
- need_relink=yes
- fi
- ;;
- esac
- # This is a shared library
-
- # Warn about portability, can't link against -module's on some
- # systems (darwin). Don't bleat about dlopened modules though!
- dlopenmodule=""
- for dlpremoduletest in $dlprefiles; do
- if test "X$dlpremoduletest" = "X$lib"; then
- dlopenmodule="$dlpremoduletest"
- break
- fi
- done
- if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then
- echo
- if test "$linkmode" = prog; then
- $ECHO "*** Warning: Linking the executable $output against the loadable module"
- else
- $ECHO "*** Warning: Linking the shared library $output against the loadable module"
- fi
- $ECHO "*** $linklib is not portable!"
- fi
- if test "$linkmode" = lib &&
- test "$hardcode_into_libs" = yes; then
- # Hardcode the library path.
- # Skip directories that are in the system default run-time
- # search path.
- case " $sys_lib_dlsearch_path " in
- *" $absdir "*) ;;
- *)
- case "$compile_rpath " in
- *" $absdir "*) ;;
- *) func_append compile_rpath " $absdir" ;;
- esac
- ;;
- esac
- case " $sys_lib_dlsearch_path " in
- *" $libdir "*) ;;
- *)
- case "$finalize_rpath " in
- *" $libdir "*) ;;
- *) func_append finalize_rpath " $libdir" ;;
- esac
- ;;
- esac
- fi
-
- if test -n "$old_archive_from_expsyms_cmds"; then
- # figure out the soname
- set dummy $library_names
- shift
- realname="$1"
- shift
- libname=`eval "\\$ECHO \"$libname_spec\""`
- # use dlname if we got it. it's perfectly good, no?
- if test -n "$dlname"; then
- soname="$dlname"
- elif test -n "$soname_spec"; then
- # bleh windows
- case $host in
- *cygwin* | mingw* | *cegcc*)
- func_arith $current - $age
- major=$func_arith_result
- versuffix="-$major"
- ;;
- esac
- eval soname=\"$soname_spec\"
- else
- soname="$realname"
- fi
-
- # Make a new name for the extract_expsyms_cmds to use
- soroot="$soname"
- func_basename "$soroot"
- soname="$func_basename_result"
- func_stripname 'lib' '.dll' "$soname"
- newlib=libimp-$func_stripname_result.a
-
- # If the library has no export list, then create one now
- if test -f "$output_objdir/$soname-def"; then :
- else
- func_verbose "extracting exported symbol list from \`$soname'"
- func_execute_cmds "$extract_expsyms_cmds" 'exit $?'
- fi
-
- # Create $newlib
- if test -f "$output_objdir/$newlib"; then :; else
- func_verbose "generating import library for \`$soname'"
- func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?'
- fi
- # make sure the library variables are pointing to the new library
- dir=$output_objdir
- linklib=$newlib
- fi # test -n "$old_archive_from_expsyms_cmds"
-
- if test "$linkmode" = prog || test "$opt_mode" != relink; then
- add_shlibpath=
- add_dir=
- add=
- lib_linked=yes
- case $hardcode_action in
- immediate | unsupported)
- if test "$hardcode_direct" = no; then
- add="$dir/$linklib"
- case $host in
- *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;;
- *-*-sysv4*uw2*) add_dir="-L$dir" ;;
- *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \
- *-*-unixware7*) add_dir="-L$dir" ;;
- *-*-darwin* )
- # if the lib is a (non-dlopened) module then we can not
- # link against it, someone is ignoring the earlier warnings
- if /usr/bin/file -L $add 2> /dev/null |
- $GREP ": [^:]* bundle" >/dev/null ; then
- if test "X$dlopenmodule" != "X$lib"; then
- $ECHO "*** Warning: lib $linklib is a module, not a shared library"
- if test -z "$old_library" ; then
- echo
- echo "*** And there doesn't seem to be a static archive available"
- echo "*** The link will probably fail, sorry"
- else
- add="$dir/$old_library"
- fi
- elif test -n "$old_library"; then
- add="$dir/$old_library"
- fi
- fi
- esac
- elif test "$hardcode_minus_L" = no; then
- case $host in
- *-*-sunos*) add_shlibpath="$dir" ;;
- esac
- add_dir="-L$dir"
- add="-l$name"
- elif test "$hardcode_shlibpath_var" = no; then
- add_shlibpath="$dir"
- add="-l$name"
- else
- lib_linked=no
- fi
- ;;
- relink)
- if test "$hardcode_direct" = yes &&
- test "$hardcode_direct_absolute" = no; then
- add="$dir/$linklib"
- elif test "$hardcode_minus_L" = yes; then
- add_dir="-L$absdir"
- # Try looking first in the location we're being installed to.
- if test -n "$inst_prefix_dir"; then
- case $libdir in
- [\\/]*)
- func_append add_dir " -L$inst_prefix_dir$libdir"
- ;;
- esac
- fi
- add="-l$name"
- elif test "$hardcode_shlibpath_var" = yes; then
- add_shlibpath="$dir"
- add="-l$name"
- else
- lib_linked=no
- fi
- ;;
- *) lib_linked=no ;;
- esac
-
- if test "$lib_linked" != yes; then
- func_fatal_configuration "unsupported hardcode properties"
- fi
-
- if test -n "$add_shlibpath"; then
- case :$compile_shlibpath: in
- *":$add_shlibpath:"*) ;;
- *) func_append compile_shlibpath "$add_shlibpath:" ;;
- esac
- fi
- if test "$linkmode" = prog; then
- test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
- test -n "$add" && compile_deplibs="$add $compile_deplibs"
- else
- test -n "$add_dir" && deplibs="$add_dir $deplibs"
- test -n "$add" && deplibs="$add $deplibs"
- if test "$hardcode_direct" != yes &&
- test "$hardcode_minus_L" != yes &&
- test "$hardcode_shlibpath_var" = yes; then
- case :$finalize_shlibpath: in
- *":$libdir:"*) ;;
- *) func_append finalize_shlibpath "$libdir:" ;;
- esac
- fi
- fi
- fi
-
- if test "$linkmode" = prog || test "$opt_mode" = relink; then
- add_shlibpath=
- add_dir=
- add=
- # Finalize command for both is simple: just hardcode it.
- if test "$hardcode_direct" = yes &&
- test "$hardcode_direct_absolute" = no; then
- add="$libdir/$linklib"
- elif test "$hardcode_minus_L" = yes; then
- add_dir="-L$libdir"
- add="-l$name"
- elif test "$hardcode_shlibpath_var" = yes; then
- case :$finalize_shlibpath: in
- *":$libdir:"*) ;;
- *) func_append finalize_shlibpath "$libdir:" ;;
- esac
- add="-l$name"
- elif test "$hardcode_automatic" = yes; then
- if test -n "$inst_prefix_dir" &&
- test -f "$inst_prefix_dir$libdir/$linklib" ; then
- add="$inst_prefix_dir$libdir/$linklib"
- else
- add="$libdir/$linklib"
- fi
- else
- # We cannot seem to hardcode it, guess we'll fake it.
- add_dir="-L$libdir"
- # Try looking first in the location we're being installed to.
- if test -n "$inst_prefix_dir"; then
- case $libdir in
- [\\/]*)
- func_append add_dir " -L$inst_prefix_dir$libdir"
- ;;
- esac
- fi
- add="-l$name"
- fi
-
- if test "$linkmode" = prog; then
- test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
- test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
- else
- test -n "$add_dir" && deplibs="$add_dir $deplibs"
- test -n "$add" && deplibs="$add $deplibs"
- fi
- fi
- elif test "$linkmode" = prog; then
- # Here we assume that one of hardcode_direct or hardcode_minus_L
- # is not unsupported. This is valid on all known static and
- # shared platforms.
- if test "$hardcode_direct" != unsupported; then
- test -n "$old_library" && linklib="$old_library"
- compile_deplibs="$dir/$linklib $compile_deplibs"
- finalize_deplibs="$dir/$linklib $finalize_deplibs"
- else
- compile_deplibs="-l$name -L$dir $compile_deplibs"
- finalize_deplibs="-l$name -L$dir $finalize_deplibs"
- fi
- elif test "$build_libtool_libs" = yes; then
- # Not a shared library
- if test "$deplibs_check_method" != pass_all; then
- # We're trying link a shared library against a static one
- # but the system doesn't support it.
-
- # Just print a warning and add the library to dependency_libs so
- # that the program can be linked against the static library.
- echo
- $ECHO "*** Warning: This system can not link to static lib archive $lib."
- echo "*** I have the capability to make that library automatically link in when"
- echo "*** you link to this library. But I can only do this if you have a"
- echo "*** shared version of the library, which you do not appear to have."
- if test "$module" = yes; then
- echo "*** But as you try to build a module library, libtool will still create "
- echo "*** a static module, that should work as long as the dlopening application"
- echo "*** is linked with the -dlopen flag to resolve symbols at runtime."
- if test -z "$global_symbol_pipe"; then
- echo
- echo "*** However, this would only work if libtool was able to extract symbol"
- echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
- echo "*** not find such a program. So, this module is probably useless."
- echo "*** \`nm' from GNU binutils and a full rebuild may help."
- fi
- if test "$build_old_libs" = no; then
- build_libtool_libs=module
- build_old_libs=yes
- else
- build_libtool_libs=no
- fi
- fi
- else
- deplibs="$dir/$old_library $deplibs"
- link_static=yes
- fi
- fi # link shared/static library?
-
- if test "$linkmode" = lib; then
- if test -n "$dependency_libs" &&
- { test "$hardcode_into_libs" != yes ||
- test "$build_old_libs" = yes ||
- test "$link_static" = yes; }; then
- # Extract -R from dependency_libs
- temp_deplibs=
- for libdir in $dependency_libs; do
- case $libdir in
- -R*) func_stripname '-R' '' "$libdir"
- temp_xrpath=$func_stripname_result
- case " $xrpath " in
- *" $temp_xrpath "*) ;;
- *) func_append xrpath " $temp_xrpath";;
- esac;;
- *) func_append temp_deplibs " $libdir";;
- esac
- done
- dependency_libs="$temp_deplibs"
- fi
-
- func_append newlib_search_path " $absdir"
- # Link against this library
- test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
- # ... and its dependency_libs
- tmp_libs=
- for deplib in $dependency_libs; do
- newdependency_libs="$deplib $newdependency_libs"
- case $deplib in
- -L*) func_stripname '-L' '' "$deplib"
- func_resolve_sysroot "$func_stripname_result";;
- *) func_resolve_sysroot "$deplib" ;;
- esac
- if $opt_preserve_dup_deps ; then
- case "$tmp_libs " in
- *" $func_resolve_sysroot_result "*)
- func_append specialdeplibs " $func_resolve_sysroot_result" ;;
- esac
- fi
- func_append tmp_libs " $func_resolve_sysroot_result"
- done
-
- if test "$link_all_deplibs" != no; then
- # Add the search paths of all dependency libraries
- for deplib in $dependency_libs; do
- path=
- case $deplib in
- -L*) path="$deplib" ;;
- *.la)
- func_resolve_sysroot "$deplib"
- deplib=$func_resolve_sysroot_result
- func_dirname "$deplib" "" "."
- dir=$func_dirname_result
- # We need an absolute path.
- case $dir in
- [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
- *)
- absdir=`cd "$dir" && pwd`
- if test -z "$absdir"; then
- func_warning "cannot determine absolute directory name of \`$dir'"
- absdir="$dir"
- fi
- ;;
- esac
- if $GREP "^installed=no" $deplib > /dev/null; then
- case $host in
- *-*-darwin*)
- depdepl=
- eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib`
- if test -n "$deplibrary_names" ; then
- for tmp in $deplibrary_names ; do
- depdepl=$tmp
- done
- if test -f "$absdir/$objdir/$depdepl" ; then
- depdepl="$absdir/$objdir/$depdepl"
- darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'`
- if test -z "$darwin_install_name"; then
- darwin_install_name=`${OTOOL64} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'`
- fi
- func_append compiler_flags " ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}"
- func_append linker_flags " -dylib_file ${darwin_install_name}:${depdepl}"
- path=
- fi
- fi
- ;;
- *)
- path="-L$absdir/$objdir"
- ;;
- esac
- else
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
- test -z "$libdir" && \
- func_fatal_error "\`$deplib' is not a valid libtool archive"
- test "$absdir" != "$libdir" && \
- func_warning "\`$deplib' seems to be moved"
-
- path="-L$absdir"
- fi
- ;;
- esac
- case " $deplibs " in
- *" $path "*) ;;
- *) deplibs="$path $deplibs" ;;
- esac
- done
- fi # link_all_deplibs != no
- fi # linkmode = lib
- done # for deplib in $libs
- if test "$pass" = link; then
- if test "$linkmode" = "prog"; then
- compile_deplibs="$new_inherited_linker_flags $compile_deplibs"
- finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs"
- else
- compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
- fi
- fi
- dependency_libs="$newdependency_libs"
- if test "$pass" = dlpreopen; then
- # Link the dlpreopened libraries before other libraries
- for deplib in $save_deplibs; do
- deplibs="$deplib $deplibs"
- done
- fi
- if test "$pass" != dlopen; then
- if test "$pass" != conv; then
- # Make sure lib_search_path contains only unique directories.
- lib_search_path=
- for dir in $newlib_search_path; do
- case "$lib_search_path " in
- *" $dir "*) ;;
- *) func_append lib_search_path " $dir" ;;
- esac
- done
- newlib_search_path=
- fi
-
- if test "$linkmode,$pass" != "prog,link"; then
- vars="deplibs"
- else
- vars="compile_deplibs finalize_deplibs"
- fi
- for var in $vars dependency_libs; do
- # Add libraries to $var in reverse order
- eval tmp_libs=\"\$$var\"
- new_libs=
- for deplib in $tmp_libs; do
- # FIXME: Pedantically, this is the right thing to do, so
- # that some nasty dependency loop isn't accidentally
- # broken:
- #new_libs="$deplib $new_libs"
- # Pragmatically, this seems to cause very few problems in
- # practice:
- case $deplib in
- -L*) new_libs="$deplib $new_libs" ;;
- -R*) ;;
- *)
- # And here is the reason: when a library appears more
- # than once as an explicit dependence of a library, or
- # is implicitly linked in more than once by the
- # compiler, it is considered special, and multiple
- # occurrences thereof are not removed. Compare this
- # with having the same library being listed as a
- # dependency of multiple other libraries: in this case,
- # we know (pedantically, we assume) the library does not
- # need to be listed more than once, so we keep only the
- # last copy. This is not always right, but it is rare
- # enough that we require users that really mean to play
- # such unportable linking tricks to link the library
- # using -Wl,-lname, so that libtool does not consider it
- # for duplicate removal.
- case " $specialdeplibs " in
- *" $deplib "*) new_libs="$deplib $new_libs" ;;
- *)
- case " $new_libs " in
- *" $deplib "*) ;;
- *) new_libs="$deplib $new_libs" ;;
- esac
- ;;
- esac
- ;;
- esac
- done
- tmp_libs=
- for deplib in $new_libs; do
- case $deplib in
- -L*)
- case " $tmp_libs " in
- *" $deplib "*) ;;
- *) func_append tmp_libs " $deplib" ;;
- esac
- ;;
- *) func_append tmp_libs " $deplib" ;;
- esac
- done
- eval $var=\"$tmp_libs\"
- done # for var
- fi
- # Last step: remove runtime libs from dependency_libs
- # (they stay in deplibs)
- tmp_libs=
- for i in $dependency_libs ; do
- case " $predeps $postdeps $compiler_lib_search_path " in
- *" $i "*)
- i=""
- ;;
- esac
- if test -n "$i" ; then
- func_append tmp_libs " $i"
- fi
- done
- dependency_libs=$tmp_libs
- done # for pass
- if test "$linkmode" = prog; then
- dlfiles="$newdlfiles"
- fi
- if test "$linkmode" = prog || test "$linkmode" = lib; then
- dlprefiles="$newdlprefiles"
- fi
-
- case $linkmode in
- oldlib)
- if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
- func_warning "\`-dlopen' is ignored for archives"
- fi
-
- case " $deplibs" in
- *\ -l* | *\ -L*)
- func_warning "\`-l' and \`-L' are ignored for archives" ;;
- esac
-
- test -n "$rpath" && \
- func_warning "\`-rpath' is ignored for archives"
-
- test -n "$xrpath" && \
- func_warning "\`-R' is ignored for archives"
-
- test -n "$vinfo" && \
- func_warning "\`-version-info/-version-number' is ignored for archives"
-
- test -n "$release" && \
- func_warning "\`-release' is ignored for archives"
-
- test -n "$export_symbols$export_symbols_regex" && \
- func_warning "\`-export-symbols' is ignored for archives"
-
- # Now set the variables for building old libraries.
- build_libtool_libs=no
- oldlibs="$output"
- func_append objs "$old_deplibs"
- ;;
-
- lib)
- # Make sure we only generate libraries of the form `libNAME.la'.
- case $outputname in
- lib*)
- func_stripname 'lib' '.la' "$outputname"
- name=$func_stripname_result
- eval shared_ext=\"$shrext_cmds\"
- eval libname=\"$libname_spec\"
- ;;
- *)
- test "$module" = no && \
- func_fatal_help "libtool library \`$output' must begin with \`lib'"
-
- if test "$need_lib_prefix" != no; then
- # Add the "lib" prefix for modules if required
- func_stripname '' '.la' "$outputname"
- name=$func_stripname_result
- eval shared_ext=\"$shrext_cmds\"
- eval libname=\"$libname_spec\"
- else
- func_stripname '' '.la' "$outputname"
- libname=$func_stripname_result
- fi
- ;;
- esac
-
- if test -n "$objs"; then
- if test "$deplibs_check_method" != pass_all; then
- func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs"
- else
- echo
- $ECHO "*** Warning: Linking the shared library $output against the non-libtool"
- $ECHO "*** objects $objs is not portable!"
- func_append libobjs " $objs"
- fi
- fi
-
- test "$dlself" != no && \
- func_warning "\`-dlopen self' is ignored for libtool libraries"
-
- set dummy $rpath
- shift
- test "$#" -gt 1 && \
- func_warning "ignoring multiple \`-rpath's for a libtool library"
-
- install_libdir="$1"
-
- oldlibs=
- if test -z "$rpath"; then
- if test "$build_libtool_libs" = yes; then
- # Building a libtool convenience library.
- # Some compilers have problems with a `.al' extension so
- # convenience libraries should have the same extension an
- # archive normally would.
- oldlibs="$output_objdir/$libname.$libext $oldlibs"
- build_libtool_libs=convenience
- build_old_libs=yes
- fi
-
- test -n "$vinfo" && \
- func_warning "\`-version-info/-version-number' is ignored for convenience libraries"
-
- test -n "$release" && \
- func_warning "\`-release' is ignored for convenience libraries"
- else
-
- # Parse the version information argument.
- save_ifs="$IFS"; IFS=':'
- set dummy $vinfo 0 0 0
- shift
- IFS="$save_ifs"
-
- test -n "$7" && \
- func_fatal_help "too many parameters to \`-version-info'"
-
- # convert absolute version numbers to libtool ages
- # this retains compatibility with .la files and attempts
- # to make the code below a bit more comprehensible
-
- case $vinfo_number in
- yes)
- number_major="$1"
- number_minor="$2"
- number_revision="$3"
- #
- # There are really only two kinds -- those that
- # use the current revision as the major version
- # and those that subtract age and use age as
- # a minor version. But, then there is irix
- # which has an extra 1 added just for fun
- #
- case $version_type in
- # correct linux to gnu/linux during the next big refactor
- darwin|linux|osf|windows|none)
- func_arith $number_major + $number_minor
- current=$func_arith_result
- age="$number_minor"
- revision="$number_revision"
- ;;
- freebsd-aout|freebsd-elf|qnx|sunos)
- current="$number_major"
- revision="$number_minor"
- age="0"
- ;;
- irix|nonstopux)
- func_arith $number_major + $number_minor
- current=$func_arith_result
- age="$number_minor"
- revision="$number_minor"
- lt_irix_increment=no
- ;;
- esac
- ;;
- no)
- current="$1"
- revision="$2"
- age="$3"
- ;;
- esac
-
- # Check that each of the things are valid numbers.
- case $current in
- 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
- *)
- func_error "CURRENT \`$current' must be a nonnegative integer"
- func_fatal_error "\`$vinfo' is not valid version information"
- ;;
- esac
-
- case $revision in
- 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
- *)
- func_error "REVISION \`$revision' must be a nonnegative integer"
- func_fatal_error "\`$vinfo' is not valid version information"
- ;;
- esac
-
- case $age in
- 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
- *)
- func_error "AGE \`$age' must be a nonnegative integer"
- func_fatal_error "\`$vinfo' is not valid version information"
- ;;
- esac
-
- if test "$age" -gt "$current"; then
- func_error "AGE \`$age' is greater than the current interface number \`$current'"
- func_fatal_error "\`$vinfo' is not valid version information"
- fi
-
- # Calculate the version variables.
- major=
- versuffix=
- verstring=
- case $version_type in
- none) ;;
-
- darwin)
- # Like Linux, but with the current version available in
- # verstring for coding it into the library header
- func_arith $current - $age
- major=.$func_arith_result
- versuffix="$major.$age.$revision"
- # Darwin ld doesn't like 0 for these options...
- func_arith $current + 1
- minor_current=$func_arith_result
- xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision"
- verstring="-compatibility_version $minor_current -current_version $minor_current.$revision"
- ;;
-
- freebsd-aout)
- major=".$current"
- versuffix=".$current.$revision";
- ;;
-
- freebsd-elf)
- major=".$current"
- versuffix=".$current"
- ;;
-
- irix | nonstopux)
- if test "X$lt_irix_increment" = "Xno"; then
- func_arith $current - $age
- else
- func_arith $current - $age + 1
- fi
- major=$func_arith_result
-
- case $version_type in
- nonstopux) verstring_prefix=nonstopux ;;
- *) verstring_prefix=sgi ;;
- esac
- verstring="$verstring_prefix$major.$revision"
-
- # Add in all the interfaces that we are compatible with.
- loop=$revision
- while test "$loop" -ne 0; do
- func_arith $revision - $loop
- iface=$func_arith_result
- func_arith $loop - 1
- loop=$func_arith_result
- verstring="$verstring_prefix$major.$iface:$verstring"
- done
-
- # Before this point, $major must not contain `.'.
- major=.$major
- versuffix="$major.$revision"
- ;;
-
- linux) # correct to gnu/linux during the next big refactor
- func_arith $current - $age
- major=.$func_arith_result
- versuffix="$major.$age.$revision"
- ;;
-
- osf)
- func_arith $current - $age
- major=.$func_arith_result
- versuffix=".$current.$age.$revision"
- verstring="$current.$age.$revision"
-
- # Add in all the interfaces that we are compatible with.
- loop=$age
- while test "$loop" -ne 0; do
- func_arith $current - $loop
- iface=$func_arith_result
- func_arith $loop - 1
- loop=$func_arith_result
- verstring="$verstring:${iface}.0"
- done
-
- # Make executables depend on our current version.
- func_append verstring ":${current}.0"
- ;;
-
- qnx)
- major=".$current"
- versuffix=".$current"
- ;;
-
- sunos)
- major=".$current"
- versuffix=".$current.$revision"
- ;;
-
- windows)
- # Use '-' rather than '.', since we only want one
- # extension on DOS 8.3 filesystems.
- func_arith $current - $age
- major=$func_arith_result
- versuffix="-$major"
- ;;
-
- *)
- func_fatal_configuration "unknown library version type \`$version_type'"
- ;;
- esac
-
- # Clear the version info if we defaulted, and they specified a release.
- if test -z "$vinfo" && test -n "$release"; then
- major=
- case $version_type in
- darwin)
- # we can't check for "0.0" in archive_cmds due to quoting
- # problems, so we reset it completely
- verstring=
- ;;
- *)
- verstring="0.0"
- ;;
- esac
- if test "$need_version" = no; then
- versuffix=
- else
- versuffix=".0.0"
- fi
- fi
-
- # Remove version info from name if versioning should be avoided
- if test "$avoid_version" = yes && test "$need_version" = no; then
- major=
- versuffix=
- verstring=""
- fi
-
- # Check to see if the archive will have undefined symbols.
- if test "$allow_undefined" = yes; then
- if test "$allow_undefined_flag" = unsupported; then
- func_warning "undefined symbols not allowed in $host shared libraries"
- build_libtool_libs=no
- build_old_libs=yes
- fi
- else
- # Don't allow undefined symbols.
- allow_undefined_flag="$no_undefined_flag"
- fi
-
- fi
-
- func_generate_dlsyms "$libname" "$libname" "yes"
- func_append libobjs " $symfileobj"
- test "X$libobjs" = "X " && libobjs=
-
- if test "$opt_mode" != relink; then
- # Remove our outputs, but don't remove object files since they
- # may have been created when compiling PIC objects.
- removelist=
- tempremovelist=`$ECHO "$output_objdir/*"`
- for p in $tempremovelist; do
- case $p in
- *.$objext | *.gcno)
- ;;
- $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*)
- if test "X$precious_files_regex" != "X"; then
- if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1
- then
- continue
- fi
- fi
- func_append removelist " $p"
- ;;
- *) ;;
- esac
- done
- test -n "$removelist" && \
- func_show_eval "${RM}r \$removelist"
- fi
-
- # Now set the variables for building old libraries.
- if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
- func_append oldlibs " $output_objdir/$libname.$libext"
-
- # Transform .lo files to .o files.
- oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; $lo2o" | $NL2SP`
- fi
-
- # Eliminate all temporary directories.
- #for path in $notinst_path; do
- # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"`
- # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"`
- # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"`
- #done
-
- if test -n "$xrpath"; then
- # If the user specified any rpath flags, then add them.
- temp_xrpath=
- for libdir in $xrpath; do
- func_replace_sysroot "$libdir"
- func_append temp_xrpath " -R$func_replace_sysroot_result"
- case "$finalize_rpath " in
- *" $libdir "*) ;;
- *) func_append finalize_rpath " $libdir" ;;
- esac
- done
- if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then
- dependency_libs="$temp_xrpath $dependency_libs"
- fi
- fi
-
- # Make sure dlfiles contains only unique files that won't be dlpreopened
- old_dlfiles="$dlfiles"
- dlfiles=
- for lib in $old_dlfiles; do
- case " $dlprefiles $dlfiles " in
- *" $lib "*) ;;
- *) func_append dlfiles " $lib" ;;
- esac
- done
-
- # Make sure dlprefiles contains only unique files
- old_dlprefiles="$dlprefiles"
- dlprefiles=
- for lib in $old_dlprefiles; do
- case "$dlprefiles " in
- *" $lib "*) ;;
- *) func_append dlprefiles " $lib" ;;
- esac
- done
-
- if test "$build_libtool_libs" = yes; then
- if test -n "$rpath"; then
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*)
- # these systems don't actually have a c library (as such)!
- ;;
- *-*-rhapsody* | *-*-darwin1.[012])
- # Rhapsody C library is in the System framework
- func_append deplibs " System.ltframework"
- ;;
- *-*-netbsd*)
- # Don't link with libc until the a.out ld.so is fixed.
- ;;
- *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
- # Do not include libc due to us having libc/libc_r.
- ;;
- *-*-sco3.2v5* | *-*-sco5v6*)
- # Causes problems with __ctype
- ;;
- *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
- # Compiler inserts libc in the correct place for threads to work
- ;;
- *)
- # Add libc to deplibs on all other systems if necessary.
- if test "$build_libtool_need_lc" = "yes"; then
- func_append deplibs " -lc"
- fi
- ;;
- esac
- fi
-
- # Transform deplibs into only deplibs that can be linked in shared.
- name_save=$name
- libname_save=$libname
- release_save=$release
- versuffix_save=$versuffix
- major_save=$major
- # I'm not sure if I'm treating the release correctly. I think
- # release should show up in the -l (ie -lgmp5) so we don't want to
- # add it in twice. Is that correct?
- release=""
- versuffix=""
- major=""
- newdeplibs=
- droppeddeps=no
- case $deplibs_check_method in
- pass_all)
- # Don't check for shared/static. Everything works.
- # This might be a little naive. We might want to check
- # whether the library exists or not. But this is on
- # osf3 & osf4 and I'm not really sure... Just
- # implementing what was already the behavior.
- newdeplibs=$deplibs
- ;;
- test_compile)
- # This code stresses the "libraries are programs" paradigm to its
- # limits. Maybe even breaks it. We compile a program, linking it
- # against the deplibs as a proxy for the library. Then we can check
- # whether they linked in statically or dynamically with ldd.
- $opt_dry_run || $RM conftest.c
- cat > conftest.c <<EOF
- int main() { return 0; }
-EOF
- $opt_dry_run || $RM conftest
- if $LTCC $LTCFLAGS -o conftest conftest.c $deplibs; then
- ldd_output=`ldd conftest`
- for i in $deplibs; do
- case $i in
- -l*)
- func_stripname -l '' "$i"
- name=$func_stripname_result
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $i "*)
- func_append newdeplibs " $i"
- i=""
- ;;
- esac
- fi
- if test -n "$i" ; then
- libname=`eval "\\$ECHO \"$libname_spec\""`
- deplib_matches=`eval "\\$ECHO \"$library_names_spec\""`
- set dummy $deplib_matches; shift
- deplib_match=$1
- if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
- func_append newdeplibs " $i"
- else
- droppeddeps=yes
- echo
- $ECHO "*** Warning: dynamic linker does not accept needed library $i."
- echo "*** I have the capability to make that library automatically link in when"
- echo "*** you link to this library. But I can only do this if you have a"
- echo "*** shared version of the library, which I believe you do not have"
- echo "*** because a test_compile did reveal that the linker did not use it for"
- echo "*** its dynamic dependency list that programs get resolved with at runtime."
- fi
- fi
- ;;
- *)
- func_append newdeplibs " $i"
- ;;
- esac
- done
- else
- # Error occurred in the first compile. Let's try to salvage
- # the situation: Compile a separate program for each library.
- for i in $deplibs; do
- case $i in
- -l*)
- func_stripname -l '' "$i"
- name=$func_stripname_result
- $opt_dry_run || $RM conftest
- if $LTCC $LTCFLAGS -o conftest conftest.c $i; then
- ldd_output=`ldd conftest`
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $i "*)
- func_append newdeplibs " $i"
- i=""
- ;;
- esac
- fi
- if test -n "$i" ; then
- libname=`eval "\\$ECHO \"$libname_spec\""`
- deplib_matches=`eval "\\$ECHO \"$library_names_spec\""`
- set dummy $deplib_matches; shift
- deplib_match=$1
- if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
- func_append newdeplibs " $i"
- else
- droppeddeps=yes
- echo
- $ECHO "*** Warning: dynamic linker does not accept needed library $i."
- echo "*** I have the capability to make that library automatically link in when"
- echo "*** you link to this library. But I can only do this if you have a"
- echo "*** shared version of the library, which you do not appear to have"
- echo "*** because a test_compile did reveal that the linker did not use this one"
- echo "*** as a dynamic dependency that programs can get resolved with at runtime."
- fi
- fi
- else
- droppeddeps=yes
- echo
- $ECHO "*** Warning! Library $i is needed by this library but I was not able to"
- echo "*** make it link in! You will probably need to install it or some"
- echo "*** library that it depends on before this library will be fully"
- echo "*** functional. Installing it before continuing would be even better."
- fi
- ;;
- *)
- func_append newdeplibs " $i"
- ;;
- esac
- done
- fi
- ;;
- file_magic*)
- set dummy $deplibs_check_method; shift
- file_magic_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
- for a_deplib in $deplibs; do
- case $a_deplib in
- -l*)
- func_stripname -l '' "$a_deplib"
- name=$func_stripname_result
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $a_deplib "*)
- func_append newdeplibs " $a_deplib"
- a_deplib=""
- ;;
- esac
- fi
- if test -n "$a_deplib" ; then
- libname=`eval "\\$ECHO \"$libname_spec\""`
- if test -n "$file_magic_glob"; then
- libnameglob=`func_echo_all "$libname" | $SED -e $file_magic_glob`
- else
- libnameglob=$libname
- fi
- test "$want_nocaseglob" = yes && nocaseglob=`shopt -p nocaseglob`
- for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
- if test "$want_nocaseglob" = yes; then
- shopt -s nocaseglob
- potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null`
- $nocaseglob
- else
- potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null`
- fi
- for potent_lib in $potential_libs; do
- # Follow soft links.
- if ls -lLd "$potent_lib" 2>/dev/null |
- $GREP " -> " >/dev/null; then
- continue
- fi
- # The statement above tries to avoid entering an
- # endless loop below, in case of cyclic links.
- # We might still enter an endless loop, since a link
- # loop can be closed while we follow links,
- # but so what?
- potlib="$potent_lib"
- while test -h "$potlib" 2>/dev/null; do
- potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'`
- case $potliblink in
- [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
- *) potlib=`$ECHO "$potlib" | $SED 's,[^/]*$,,'`"$potliblink";;
- esac
- done
- if eval $file_magic_cmd \"\$potlib\" 2>/dev/null |
- $SED -e 10q |
- $EGREP "$file_magic_regex" > /dev/null; then
- func_append newdeplibs " $a_deplib"
- a_deplib=""
- break 2
- fi
- done
- done
- fi
- if test -n "$a_deplib" ; then
- droppeddeps=yes
- echo
- $ECHO "*** Warning: linker path does not have real file for library $a_deplib."
- echo "*** I have the capability to make that library automatically link in when"
- echo "*** you link to this library. But I can only do this if you have a"
- echo "*** shared version of the library, which you do not appear to have"
- echo "*** because I did check the linker path looking for a file starting"
- if test -z "$potlib" ; then
- $ECHO "*** with $libname but no candidates were found. (...for file magic test)"
- else
- $ECHO "*** with $libname and none of the candidates passed a file format test"
- $ECHO "*** using a file magic. Last file checked: $potlib"
- fi
- fi
- ;;
- *)
- # Add a -L argument.
- func_append newdeplibs " $a_deplib"
- ;;
- esac
- done # Gone through all deplibs.
- ;;
- match_pattern*)
- set dummy $deplibs_check_method; shift
- match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"`
- for a_deplib in $deplibs; do
- case $a_deplib in
- -l*)
- func_stripname -l '' "$a_deplib"
- name=$func_stripname_result
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- case " $predeps $postdeps " in
- *" $a_deplib "*)
- func_append newdeplibs " $a_deplib"
- a_deplib=""
- ;;
- esac
- fi
- if test -n "$a_deplib" ; then
- libname=`eval "\\$ECHO \"$libname_spec\""`
- for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
- potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
- for potent_lib in $potential_libs; do
- potlib="$potent_lib" # see symlink-check above in file_magic test
- if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \
- $EGREP "$match_pattern_regex" > /dev/null; then
- func_append newdeplibs " $a_deplib"
- a_deplib=""
- break 2
- fi
- done
- done
- fi
- if test -n "$a_deplib" ; then
- droppeddeps=yes
- echo
- $ECHO "*** Warning: linker path does not have real file for library $a_deplib."
- echo "*** I have the capability to make that library automatically link in when"
- echo "*** you link to this library. But I can only do this if you have a"
- echo "*** shared version of the library, which you do not appear to have"
- echo "*** because I did check the linker path looking for a file starting"
- if test -z "$potlib" ; then
- $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)"
- else
- $ECHO "*** with $libname and none of the candidates passed a file format test"
- $ECHO "*** using a regex pattern. Last file checked: $potlib"
- fi
- fi
- ;;
- *)
- # Add a -L argument.
- func_append newdeplibs " $a_deplib"
- ;;
- esac
- done # Gone through all deplibs.
- ;;
- none | unknown | *)
- newdeplibs=""
- tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'`
- if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
- for i in $predeps $postdeps ; do
- # can't use Xsed below, because $i might contain '/'
- tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s,$i,,"`
- done
- fi
- case $tmp_deplibs in
- *[!\ \ ]*)
- echo
- if test "X$deplibs_check_method" = "Xnone"; then
- echo "*** Warning: inter-library dependencies are not supported in this platform."
- else
- echo "*** Warning: inter-library dependencies are not known to be supported."
- fi
- echo "*** All declared inter-library dependencies are being dropped."
- droppeddeps=yes
- ;;
- esac
- ;;
- esac
- versuffix=$versuffix_save
- major=$major_save
- release=$release_save
- libname=$libname_save
- name=$name_save
-
- case $host in
- *-*-rhapsody* | *-*-darwin1.[012])
- # On Rhapsody replace the C library with the System framework
- newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'`
- ;;
- esac
-
- if test "$droppeddeps" = yes; then
- if test "$module" = yes; then
- echo
- echo "*** Warning: libtool could not satisfy all declared inter-library"
- $ECHO "*** dependencies of module $libname. Therefore, libtool will create"
- echo "*** a static module, that should work as long as the dlopening"
- echo "*** application is linked with the -dlopen flag."
- if test -z "$global_symbol_pipe"; then
- echo
- echo "*** However, this would only work if libtool was able to extract symbol"
- echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
- echo "*** not find such a program. So, this module is probably useless."
- echo "*** \`nm' from GNU binutils and a full rebuild may help."
- fi
- if test "$build_old_libs" = no; then
- oldlibs="$output_objdir/$libname.$libext"
- build_libtool_libs=module
- build_old_libs=yes
- else
- build_libtool_libs=no
- fi
- else
- echo "*** The inter-library dependencies that have been dropped here will be"
- echo "*** automatically added whenever a program is linked with this library"
- echo "*** or is declared to -dlopen it."
-
- if test "$allow_undefined" = no; then
- echo
- echo "*** Since this library must not contain undefined symbols,"
- echo "*** because either the platform does not support them or"
- echo "*** it was explicitly requested with -no-undefined,"
- echo "*** libtool will only create a static version of it."
- if test "$build_old_libs" = no; then
- oldlibs="$output_objdir/$libname.$libext"
- build_libtool_libs=module
- build_old_libs=yes
- else
- build_libtool_libs=no
- fi
- fi
- fi
- fi
- # Done checking deplibs!
- deplibs=$newdeplibs
- fi
- # Time to change all our "foo.ltframework" stuff back to "-framework foo"
- case $host in
- *-*-darwin*)
- newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
- new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
- deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
- ;;
- esac
-
- # move library search paths that coincide with paths to not yet
- # installed libraries to the beginning of the library search list
- new_libs=
- for path in $notinst_path; do
- case " $new_libs " in
- *" -L$path/$objdir "*) ;;
- *)
- case " $deplibs " in
- *" -L$path/$objdir "*)
- func_append new_libs " -L$path/$objdir" ;;
- esac
- ;;
- esac
- done
- for deplib in $deplibs; do
- case $deplib in
- -L*)
- case " $new_libs " in
- *" $deplib "*) ;;
- *) func_append new_libs " $deplib" ;;
- esac
- ;;
- *) func_append new_libs " $deplib" ;;
- esac
- done
- deplibs="$new_libs"
-
- # All the library-specific variables (install_libdir is set above).
- library_names=
- old_library=
- dlname=
-
- # Test again, we may have decided not to build it any more
- if test "$build_libtool_libs" = yes; then
- # Remove ${wl} instances when linking with ld.
- # FIXME: should test the right _cmds variable.
- case $archive_cmds in
- *\$LD\ *) wl= ;;
- esac
- if test "$hardcode_into_libs" = yes; then
- # Hardcode the library paths
- hardcode_libdirs=
- dep_rpath=
- rpath="$finalize_rpath"
- test "$opt_mode" != relink && rpath="$compile_rpath$rpath"
- for libdir in $rpath; do
- if test -n "$hardcode_libdir_flag_spec"; then
- if test -n "$hardcode_libdir_separator"; then
- func_replace_sysroot "$libdir"
- libdir=$func_replace_sysroot_result
- if test -z "$hardcode_libdirs"; then
- hardcode_libdirs="$libdir"
- else
- # Just accumulate the unique libdirs.
- case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
- *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
- ;;
- *)
- func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
- ;;
- esac
- fi
- else
- eval flag=\"$hardcode_libdir_flag_spec\"
- func_append dep_rpath " $flag"
- fi
- elif test -n "$runpath_var"; then
- case "$perm_rpath " in
- *" $libdir "*) ;;
- *) func_append perm_rpath " $libdir" ;;
- esac
- fi
- done
- # Substitute the hardcoded libdirs into the rpath.
- if test -n "$hardcode_libdir_separator" &&
- test -n "$hardcode_libdirs"; then
- libdir="$hardcode_libdirs"
- eval "dep_rpath=\"$hardcode_libdir_flag_spec\""
- fi
- if test -n "$runpath_var" && test -n "$perm_rpath"; then
- # We should set the runpath_var.
- rpath=
- for dir in $perm_rpath; do
- func_append rpath "$dir:"
- done
- eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
- fi
- test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
- fi
-
- shlibpath="$finalize_shlibpath"
- test "$opt_mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
- if test -n "$shlibpath"; then
- eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
- fi
-
- # Get the real and link names of the library.
- eval shared_ext=\"$shrext_cmds\"
- eval library_names=\"$library_names_spec\"
- set dummy $library_names
- shift
- realname="$1"
- shift
-
- if test -n "$soname_spec"; then
- eval soname=\"$soname_spec\"
- else
- soname="$realname"
- fi
- if test -z "$dlname"; then
- dlname=$soname
- fi
-
- lib="$output_objdir/$realname"
- linknames=
- for link
- do
- func_append linknames " $link"
- done
-
- # Use standard objects if they are pic
- test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP`
- test "X$libobjs" = "X " && libobjs=
-
- delfiles=
- if test -n "$export_symbols" && test -n "$include_expsyms"; then
- $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp"
- export_symbols="$output_objdir/$libname.uexp"
- func_append delfiles " $export_symbols"
- fi
-
- orig_export_symbols=
- case $host_os in
- cygwin* | mingw* | cegcc*)
- if test -n "$export_symbols" && test -z "$export_symbols_regex"; then
- # exporting using user supplied symfile
- if test "x`$SED 1q $export_symbols`" != xEXPORTS; then
- # and it's NOT already a .def file. Must figure out
- # which of the given symbols are data symbols and tag
- # them as such. So, trigger use of export_symbols_cmds.
- # export_symbols gets reassigned inside the "prepare
- # the list of exported symbols" if statement, so the
- # include_expsyms logic still works.
- orig_export_symbols="$export_symbols"
- export_symbols=
- always_export_symbols=yes
- fi
- fi
- ;;
- esac
-
- # Prepare the list of exported symbols
- if test -z "$export_symbols"; then
- if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
- func_verbose "generating symbol list for \`$libname.la'"
- export_symbols="$output_objdir/$libname.exp"
- $opt_dry_run || $RM $export_symbols
- cmds=$export_symbols_cmds
- save_ifs="$IFS"; IFS='~'
- for cmd1 in $cmds; do
- IFS="$save_ifs"
- # Take the normal branch if the nm_file_list_spec branch
- # doesn't work or if tool conversion is not needed.
- case $nm_file_list_spec~$to_tool_file_cmd in
- *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*)
- try_normal_branch=yes
- eval cmd=\"$cmd1\"
- func_len " $cmd"
- len=$func_len_result
- ;;
- *)
- try_normal_branch=no
- ;;
- esac
- if test "$try_normal_branch" = yes \
- && { test "$len" -lt "$max_cmd_len" \
- || test "$max_cmd_len" -le -1; }
- then
- func_show_eval "$cmd" 'exit $?'
- skipped_export=false
- elif test -n "$nm_file_list_spec"; then
- func_basename "$output"
- output_la=$func_basename_result
- save_libobjs=$libobjs
- save_output=$output
- output=${output_objdir}/${output_la}.nm
- func_to_tool_file "$output"
- libobjs=$nm_file_list_spec$func_to_tool_file_result
- func_append delfiles " $output"
- func_verbose "creating $NM input file list: $output"
- for obj in $save_libobjs; do
- func_to_tool_file "$obj"
- $ECHO "$func_to_tool_file_result"
- done > "$output"
- eval cmd=\"$cmd1\"
- func_show_eval "$cmd" 'exit $?'
- output=$save_output
- libobjs=$save_libobjs
- skipped_export=false
- else
- # The command line is too long to execute in one step.
- func_verbose "using reloadable object file for export list..."
- skipped_export=:
- # Break out early, otherwise skipped_export may be
- # set to false by a later but shorter cmd.
- break
- fi
- done
- IFS="$save_ifs"
- if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then
- func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
- func_show_eval '$MV "${export_symbols}T" "$export_symbols"'
- fi
- fi
- fi
-
- if test -n "$export_symbols" && test -n "$include_expsyms"; then
- tmp_export_symbols="$export_symbols"
- test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols"
- $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"'
- fi
-
- if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then
- # The given exports_symbols file has to be filtered, so filter it.
- func_verbose "filter symbol list for \`$libname.la' to tag DATA exports"
- # FIXME: $output_objdir/$libname.filter potentially contains lots of
- # 's' commands which not all seds can handle. GNU sed should be fine
- # though. Also, the filter scales superlinearly with the number of
- # global variables. join(1) would be nice here, but unfortunately
- # isn't a blessed tool.
- $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter
- func_append delfiles " $export_symbols $output_objdir/$libname.filter"
- export_symbols=$output_objdir/$libname.def
- $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols
- fi
-
- tmp_deplibs=
- for test_deplib in $deplibs; do
- case " $convenience " in
- *" $test_deplib "*) ;;
- *)
- func_append tmp_deplibs " $test_deplib"
- ;;
- esac
- done
- deplibs="$tmp_deplibs"
-
- if test -n "$convenience"; then
- if test -n "$whole_archive_flag_spec" &&
- test "$compiler_needs_object" = yes &&
- test -z "$libobjs"; then
- # extract the archives, so we have objects to list.
- # TODO: could optimize this to just extract one archive.
- whole_archive_flag_spec=
- fi
- if test -n "$whole_archive_flag_spec"; then
- save_libobjs=$libobjs
- eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
- test "X$libobjs" = "X " && libobjs=
- else
- gentop="$output_objdir/${outputname}x"
- func_append generated " $gentop"
-
- func_extract_archives $gentop $convenience
- func_append libobjs " $func_extract_archives_result"
- test "X$libobjs" = "X " && libobjs=
- fi
- fi
-
- if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
- eval flag=\"$thread_safe_flag_spec\"
- func_append linker_flags " $flag"
- fi
-
- # Make a backup of the uninstalled library when relinking
- if test "$opt_mode" = relink; then
- $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $?
- fi
-
- # Do each of the archive commands.
- if test "$module" = yes && test -n "$module_cmds" ; then
- if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
- eval test_cmds=\"$module_expsym_cmds\"
- cmds=$module_expsym_cmds
- else
- eval test_cmds=\"$module_cmds\"
- cmds=$module_cmds
- fi
- else
- if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
- eval test_cmds=\"$archive_expsym_cmds\"
- cmds=$archive_expsym_cmds
- else
- eval test_cmds=\"$archive_cmds\"
- cmds=$archive_cmds
- fi
- fi
-
- if test "X$skipped_export" != "X:" &&
- func_len " $test_cmds" &&
- len=$func_len_result &&
- test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
- :
- else
- # The command line is too long to link in one step, link piecewise
- # or, if using GNU ld and skipped_export is not :, use a linker
- # script.
-
- # Save the value of $output and $libobjs because we want to
- # use them later. If we have whole_archive_flag_spec, we
- # want to use save_libobjs as it was before
- # whole_archive_flag_spec was expanded, because we can't
- # assume the linker understands whole_archive_flag_spec.
- # This may have to be revisited, in case too many
- # convenience libraries get linked in and end up exceeding
- # the spec.
- if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then
- save_libobjs=$libobjs
- fi
- save_output=$output
- func_basename "$output"
- output_la=$func_basename_result
-
- # Clear the reloadable object creation command queue and
- # initialize k to one.
- test_cmds=
- concat_cmds=
- objlist=
- last_robj=
- k=1
-
- if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then
- output=${output_objdir}/${output_la}.lnkscript
- func_verbose "creating GNU ld script: $output"
- echo 'INPUT (' > $output
- for obj in $save_libobjs
- do
- func_to_tool_file "$obj"
- $ECHO "$func_to_tool_file_result" >> $output
- done
- echo ')' >> $output
- func_append delfiles " $output"
- func_to_tool_file "$output"
- output=$func_to_tool_file_result
- elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then
- output=${output_objdir}/${output_la}.lnk
- func_verbose "creating linker input file list: $output"
- : > $output
- set x $save_libobjs
- shift
- firstobj=
- if test "$compiler_needs_object" = yes; then
- firstobj="$1 "
- shift
- fi
- for obj
- do
- func_to_tool_file "$obj"
- $ECHO "$func_to_tool_file_result" >> $output
- done
- func_append delfiles " $output"
- func_to_tool_file "$output"
- output=$firstobj\"$file_list_spec$func_to_tool_file_result\"
- else
- if test -n "$save_libobjs"; then
- func_verbose "creating reloadable object files..."
- output=$output_objdir/$output_la-${k}.$objext
- eval test_cmds=\"$reload_cmds\"
- func_len " $test_cmds"
- len0=$func_len_result
- len=$len0
-
- # Loop over the list of objects to be linked.
- for obj in $save_libobjs
- do
- func_len " $obj"
- func_arith $len + $func_len_result
- len=$func_arith_result
- if test "X$objlist" = X ||
- test "$len" -lt "$max_cmd_len"; then
- func_append objlist " $obj"
- else
- # The command $test_cmds is almost too long, add a
- # command to the queue.
- if test "$k" -eq 1 ; then
- # The first file doesn't have a previous command to add.
- reload_objs=$objlist
- eval concat_cmds=\"$reload_cmds\"
- else
- # All subsequent reloadable object files will link in
- # the last one created.
- reload_objs="$objlist $last_robj"
- eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\"
- fi
- last_robj=$output_objdir/$output_la-${k}.$objext
- func_arith $k + 1
- k=$func_arith_result
- output=$output_objdir/$output_la-${k}.$objext
- objlist=" $obj"
- func_len " $last_robj"
- func_arith $len0 + $func_len_result
- len=$func_arith_result
- fi
- done
- # Handle the remaining objects by creating one last
- # reloadable object file. All subsequent reloadable object
- # files will link in the last one created.
- test -z "$concat_cmds" || concat_cmds=$concat_cmds~
- reload_objs="$objlist $last_robj"
- eval concat_cmds=\"\${concat_cmds}$reload_cmds\"
- if test -n "$last_robj"; then
- eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\"
- fi
- func_append delfiles " $output"
-
- else
- output=
- fi
-
- if ${skipped_export-false}; then
- func_verbose "generating symbol list for \`$libname.la'"
- export_symbols="$output_objdir/$libname.exp"
- $opt_dry_run || $RM $export_symbols
- libobjs=$output
- # Append the command to create the export file.
- test -z "$concat_cmds" || concat_cmds=$concat_cmds~
- eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\"
- if test -n "$last_robj"; then
- eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\"
- fi
- fi
-
- test -n "$save_libobjs" &&
- func_verbose "creating a temporary reloadable object file: $output"
-
- # Loop through the commands generated above and execute them.
- save_ifs="$IFS"; IFS='~'
- for cmd in $concat_cmds; do
- IFS="$save_ifs"
- $opt_silent || {
- func_quote_for_expand "$cmd"
- eval "func_echo $func_quote_for_expand_result"
- }
- $opt_dry_run || eval "$cmd" || {
- lt_exit=$?
-
- # Restore the uninstalled library and exit
- if test "$opt_mode" = relink; then
- ( cd "$output_objdir" && \
- $RM "${realname}T" && \
- $MV "${realname}U" "$realname" )
- fi
-
- exit $lt_exit
- }
- done
- IFS="$save_ifs"
-
- if test -n "$export_symbols_regex" && ${skipped_export-false}; then
- func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
- func_show_eval '$MV "${export_symbols}T" "$export_symbols"'
- fi
- fi
-
- if ${skipped_export-false}; then
- if test -n "$export_symbols" && test -n "$include_expsyms"; then
- tmp_export_symbols="$export_symbols"
- test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols"
- $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"'
- fi
-
- if test -n "$orig_export_symbols"; then
- # The given exports_symbols file has to be filtered, so filter it.
- func_verbose "filter symbol list for \`$libname.la' to tag DATA exports"
- # FIXME: $output_objdir/$libname.filter potentially contains lots of
- # 's' commands which not all seds can handle. GNU sed should be fine
- # though. Also, the filter scales superlinearly with the number of
- # global variables. join(1) would be nice here, but unfortunately
- # isn't a blessed tool.
- $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter
- func_append delfiles " $export_symbols $output_objdir/$libname.filter"
- export_symbols=$output_objdir/$libname.def
- $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols
- fi
- fi
-
- libobjs=$output
- # Restore the value of output.
- output=$save_output
-
- if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then
- eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
- test "X$libobjs" = "X " && libobjs=
- fi
- # Expand the library linking commands again to reset the
- # value of $libobjs for piecewise linking.
-
- # Do each of the archive commands.
- if test "$module" = yes && test -n "$module_cmds" ; then
- if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
- cmds=$module_expsym_cmds
- else
- cmds=$module_cmds
- fi
- else
- if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
- cmds=$archive_expsym_cmds
- else
- cmds=$archive_cmds
- fi
- fi
- fi
-
- if test -n "$delfiles"; then
- # Append the command to remove temporary files to $cmds.
- eval cmds=\"\$cmds~\$RM $delfiles\"
- fi
-
- # Add any objects from preloaded convenience libraries
- if test -n "$dlprefiles"; then
- gentop="$output_objdir/${outputname}x"
- func_append generated " $gentop"
-
- func_extract_archives $gentop $dlprefiles
- func_append libobjs " $func_extract_archives_result"
- test "X$libobjs" = "X " && libobjs=
- fi
-
- save_ifs="$IFS"; IFS='~'
- for cmd in $cmds; do
- IFS="$save_ifs"
- eval cmd=\"$cmd\"
- $opt_silent || {
- func_quote_for_expand "$cmd"
- eval "func_echo $func_quote_for_expand_result"
- }
- $opt_dry_run || eval "$cmd" || {
- lt_exit=$?
-
- # Restore the uninstalled library and exit
- if test "$opt_mode" = relink; then
- ( cd "$output_objdir" && \
- $RM "${realname}T" && \
- $MV "${realname}U" "$realname" )
- fi
-
- exit $lt_exit
- }
- done
- IFS="$save_ifs"
-
- # Restore the uninstalled library and exit
- if test "$opt_mode" = relink; then
- $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $?
-
- if test -n "$convenience"; then
- if test -z "$whole_archive_flag_spec"; then
- func_show_eval '${RM}r "$gentop"'
- fi
- fi
-
- exit $EXIT_SUCCESS
- fi
-
- # Create links to the real library.
- for linkname in $linknames; do
- if test "$realname" != "$linkname"; then
- func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?'
- fi
- done
-
- # If -module or -export-dynamic was specified, set the dlname.
- if test "$module" = yes || test "$export_dynamic" = yes; then
- # On all known operating systems, these are identical.
- dlname="$soname"
- fi
- fi
- ;;
-
- obj)
- if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
- func_warning "\`-dlopen' is ignored for objects"
- fi
-
- case " $deplibs" in
- *\ -l* | *\ -L*)
- func_warning "\`-l' and \`-L' are ignored for objects" ;;
- esac
-
- test -n "$rpath" && \
- func_warning "\`-rpath' is ignored for objects"
-
- test -n "$xrpath" && \
- func_warning "\`-R' is ignored for objects"
-
- test -n "$vinfo" && \
- func_warning "\`-version-info' is ignored for objects"
-
- test -n "$release" && \
- func_warning "\`-release' is ignored for objects"
-
- case $output in
- *.lo)
- test -n "$objs$old_deplibs" && \
- func_fatal_error "cannot build library object \`$output' from non-libtool objects"
-
- libobj=$output
- func_lo2o "$libobj"
- obj=$func_lo2o_result
- ;;
- *)
- libobj=
- obj="$output"
- ;;
- esac
-
- # Delete the old objects.
- $opt_dry_run || $RM $obj $libobj
-
- # Objects from convenience libraries. This assumes
- # single-version convenience libraries. Whenever we create
- # different ones for PIC/non-PIC, this we'll have to duplicate
- # the extraction.
- reload_conv_objs=
- gentop=
- # reload_cmds runs $LD directly, so let us get rid of
- # -Wl from whole_archive_flag_spec and hope we can get by with
- # turning comma into space..
- wl=
-
- if test -n "$convenience"; then
- if test -n "$whole_archive_flag_spec"; then
- eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\"
- reload_conv_objs=$reload_objs\ `$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'`
- else
- gentop="$output_objdir/${obj}x"
- func_append generated " $gentop"
-
- func_extract_archives $gentop $convenience
- reload_conv_objs="$reload_objs $func_extract_archives_result"
- fi
- fi
-
- # If we're not building shared, we need to use non_pic_objs
- test "$build_libtool_libs" != yes && libobjs="$non_pic_objects"
-
- # Create the old-style object.
- reload_objs="$objs$old_deplibs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; /\.lib$/d; $lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
-
- output="$obj"
- func_execute_cmds "$reload_cmds" 'exit $?'
-
- # Exit if we aren't doing a library object file.
- if test -z "$libobj"; then
- if test -n "$gentop"; then
- func_show_eval '${RM}r "$gentop"'
- fi
-
- exit $EXIT_SUCCESS
- fi
-
- if test "$build_libtool_libs" != yes; then
- if test -n "$gentop"; then
- func_show_eval '${RM}r "$gentop"'
- fi
-
- # Create an invalid libtool object if no PIC, so that we don't
- # accidentally link it into a program.
- # $show "echo timestamp > $libobj"
- # $opt_dry_run || eval "echo timestamp > $libobj" || exit $?
- exit $EXIT_SUCCESS
- fi
-
- if test -n "$pic_flag" || test "$pic_mode" != default; then
- # Only do commands if we really have different PIC objects.
- reload_objs="$libobjs $reload_conv_objs"
- output="$libobj"
- func_execute_cmds "$reload_cmds" 'exit $?'
- fi
-
- if test -n "$gentop"; then
- func_show_eval '${RM}r "$gentop"'
- fi
-
- exit $EXIT_SUCCESS
- ;;
-
- prog)
- case $host in
- *cygwin*) func_stripname '' '.exe' "$output"
- output=$func_stripname_result.exe;;
- esac
- test -n "$vinfo" && \
- func_warning "\`-version-info' is ignored for programs"
-
- test -n "$release" && \
- func_warning "\`-release' is ignored for programs"
-
- test "$preload" = yes \
- && test "$dlopen_support" = unknown \
- && test "$dlopen_self" = unknown \
- && test "$dlopen_self_static" = unknown && \
- func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support."
-
- case $host in
- *-*-rhapsody* | *-*-darwin1.[012])
- # On Rhapsody replace the C library is the System framework
- compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'`
- finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'`
- ;;
- esac
-
- case $host in
- *-*-darwin*)
- # Don't allow lazy linking, it breaks C++ global constructors
- # But is supposedly fixed on 10.4 or later (yay!).
- if test "$tagname" = CXX ; then
- case ${MACOSX_DEPLOYMENT_TARGET-10.0} in
- 10.[0123])
- func_append compile_command " ${wl}-bind_at_load"
- func_append finalize_command " ${wl}-bind_at_load"
- ;;
- esac
- fi
- # Time to change all our "foo.ltframework" stuff back to "-framework foo"
- compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
- finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'`
- ;;
- esac
-
-
- # move library search paths that coincide with paths to not yet
- # installed libraries to the beginning of the library search list
- new_libs=
- for path in $notinst_path; do
- case " $new_libs " in
- *" -L$path/$objdir "*) ;;
- *)
- case " $compile_deplibs " in
- *" -L$path/$objdir "*)
- func_append new_libs " -L$path/$objdir" ;;
- esac
- ;;
- esac
- done
- for deplib in $compile_deplibs; do
- case $deplib in
- -L*)
- case " $new_libs " in
- *" $deplib "*) ;;
- *) func_append new_libs " $deplib" ;;
- esac
- ;;
- *) func_append new_libs " $deplib" ;;
- esac
- done
- compile_deplibs="$new_libs"
-
-
- func_append compile_command " $compile_deplibs"
- func_append finalize_command " $finalize_deplibs"
-
- if test -n "$rpath$xrpath"; then
- # If the user specified any rpath flags, then add them.
- for libdir in $rpath $xrpath; do
- # This is the magic to use -rpath.
- case "$finalize_rpath " in
- *" $libdir "*) ;;
- *) func_append finalize_rpath " $libdir" ;;
- esac
- done
- fi
-
- # Now hardcode the library paths
- rpath=
- hardcode_libdirs=
- for libdir in $compile_rpath $finalize_rpath; do
- if test -n "$hardcode_libdir_flag_spec"; then
- if test -n "$hardcode_libdir_separator"; then
- if test -z "$hardcode_libdirs"; then
- hardcode_libdirs="$libdir"
- else
- # Just accumulate the unique libdirs.
- case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
- *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
- ;;
- *)
- func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
- ;;
- esac
- fi
- else
- eval flag=\"$hardcode_libdir_flag_spec\"
- func_append rpath " $flag"
- fi
- elif test -n "$runpath_var"; then
- case "$perm_rpath " in
- *" $libdir "*) ;;
- *) func_append perm_rpath " $libdir" ;;
- esac
- fi
- case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*)
- testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'`
- case :$dllsearchpath: in
- *":$libdir:"*) ;;
- ::) dllsearchpath=$libdir;;
- *) func_append dllsearchpath ":$libdir";;
- esac
- case :$dllsearchpath: in
- *":$testbindir:"*) ;;
- ::) dllsearchpath=$testbindir;;
- *) func_append dllsearchpath ":$testbindir";;
- esac
- ;;
- esac
- done
- # Substitute the hardcoded libdirs into the rpath.
- if test -n "$hardcode_libdir_separator" &&
- test -n "$hardcode_libdirs"; then
- libdir="$hardcode_libdirs"
- eval rpath=\" $hardcode_libdir_flag_spec\"
- fi
- compile_rpath="$rpath"
-
- rpath=
- hardcode_libdirs=
- for libdir in $finalize_rpath; do
- if test -n "$hardcode_libdir_flag_spec"; then
- if test -n "$hardcode_libdir_separator"; then
- if test -z "$hardcode_libdirs"; then
- hardcode_libdirs="$libdir"
- else
- # Just accumulate the unique libdirs.
- case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
- *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
- ;;
- *)
- func_append hardcode_libdirs "$hardcode_libdir_separator$libdir"
- ;;
- esac
- fi
- else
- eval flag=\"$hardcode_libdir_flag_spec\"
- func_append rpath " $flag"
- fi
- elif test -n "$runpath_var"; then
- case "$finalize_perm_rpath " in
- *" $libdir "*) ;;
- *) func_append finalize_perm_rpath " $libdir" ;;
- esac
- fi
- done
- # Substitute the hardcoded libdirs into the rpath.
- if test -n "$hardcode_libdir_separator" &&
- test -n "$hardcode_libdirs"; then
- libdir="$hardcode_libdirs"
- eval rpath=\" $hardcode_libdir_flag_spec\"
- fi
- finalize_rpath="$rpath"
-
- if test -n "$libobjs" && test "$build_old_libs" = yes; then
- # Transform all the library objects into standard objects.
- compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP`
- finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP`
- fi
-
- func_generate_dlsyms "$outputname" "@PROGRAM@" "no"
-
- # template prelinking step
- if test -n "$prelink_cmds"; then
- func_execute_cmds "$prelink_cmds" 'exit $?'
- fi
-
- wrappers_required=yes
- case $host in
- *cegcc* | *mingw32ce*)
- # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway.
- wrappers_required=no
- ;;
- *cygwin* | *mingw* )
- if test "$build_libtool_libs" != yes; then
- wrappers_required=no
- fi
- ;;
- *)
- if test "$need_relink" = no || test "$build_libtool_libs" != yes; then
- wrappers_required=no
- fi
- ;;
- esac
- if test "$wrappers_required" = no; then
- # Replace the output file specification.
- compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'`
- link_command="$compile_command$compile_rpath"
-
- # We have no uninstalled library dependencies, so finalize right now.
- exit_status=0
- func_show_eval "$link_command" 'exit_status=$?'
-
- if test -n "$postlink_cmds"; then
- func_to_tool_file "$output"
- postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
- func_execute_cmds "$postlink_cmds" 'exit $?'
- fi
-
- # Delete the generated files.
- if test -f "$output_objdir/${outputname}S.${objext}"; then
- func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"'
- fi
-
- exit $exit_status
- fi
-
- if test -n "$compile_shlibpath$finalize_shlibpath"; then
- compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
- fi
- if test -n "$finalize_shlibpath"; then
- finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
- fi
-
- compile_var=
- finalize_var=
- if test -n "$runpath_var"; then
- if test -n "$perm_rpath"; then
- # We should set the runpath_var.
- rpath=
- for dir in $perm_rpath; do
- func_append rpath "$dir:"
- done
- compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
- fi
- if test -n "$finalize_perm_rpath"; then
- # We should set the runpath_var.
- rpath=
- for dir in $finalize_perm_rpath; do
- func_append rpath "$dir:"
- done
- finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
- fi
- fi
-
- if test "$no_install" = yes; then
- # We don't need to create a wrapper script.
- link_command="$compile_var$compile_command$compile_rpath"
- # Replace the output file specification.
- link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'`
- # Delete the old output file.
- $opt_dry_run || $RM $output
- # Link the executable and exit
- func_show_eval "$link_command" 'exit $?'
-
- if test -n "$postlink_cmds"; then
- func_to_tool_file "$output"
- postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
- func_execute_cmds "$postlink_cmds" 'exit $?'
- fi
-
- exit $EXIT_SUCCESS
- fi
-
- if test "$hardcode_action" = relink; then
- # Fast installation is not supported
- link_command="$compile_var$compile_command$compile_rpath"
- relink_command="$finalize_var$finalize_command$finalize_rpath"
-
- func_warning "this platform does not like uninstalled shared libraries"
- func_warning "\`$output' will be relinked during installation"
- else
- if test "$fast_install" != no; then
- link_command="$finalize_var$compile_command$finalize_rpath"
- if test "$fast_install" = yes; then
- relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'`
- else
- # fast_install is set to needless
- relink_command=
- fi
- else
- link_command="$compile_var$compile_command$compile_rpath"
- relink_command="$finalize_var$finalize_command$finalize_rpath"
- fi
- fi
-
- # Replace the output file specification.
- link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
-
- # Delete the old output files.
- $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname
-
- func_show_eval "$link_command" 'exit $?'
-
- if test -n "$postlink_cmds"; then
- func_to_tool_file "$output_objdir/$outputname"
- postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'`
- func_execute_cmds "$postlink_cmds" 'exit $?'
- fi
-
- # Now create the wrapper script.
- func_verbose "creating $output"
-
- # Quote the relink command for shipping.
- if test -n "$relink_command"; then
- # Preserve any variables that may affect compiler behavior
- for var in $variables_saved_for_relink; do
- if eval test -z \"\${$var+set}\"; then
- relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command"
- elif eval var_value=\$$var; test -z "$var_value"; then
- relink_command="$var=; export $var; $relink_command"
- else
- func_quote_for_eval "$var_value"
- relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command"
- fi
- done
- relink_command="(cd `pwd`; $relink_command)"
- relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"`
- fi
-
- # Only actually do things if not in dry run mode.
- $opt_dry_run || {
- # win32 will think the script is a binary if it has
- # a .exe suffix, so we strip it off here.
- case $output in
- *.exe) func_stripname '' '.exe' "$output"
- output=$func_stripname_result ;;
- esac
- # test for cygwin because mv fails w/o .exe extensions
- case $host in
- *cygwin*)
- exeext=.exe
- func_stripname '' '.exe' "$outputname"
- outputname=$func_stripname_result ;;
- *) exeext= ;;
- esac
- case $host in
- *cygwin* | *mingw* )
- func_dirname_and_basename "$output" "" "."
- output_name=$func_basename_result
- output_path=$func_dirname_result
- cwrappersource="$output_path/$objdir/lt-$output_name.c"
- cwrapper="$output_path/$output_name.exe"
- $RM $cwrappersource $cwrapper
- trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15
-
- func_emit_cwrapperexe_src > $cwrappersource
-
- # The wrapper executable is built using the $host compiler,
- # because it contains $host paths and files. If cross-
- # compiling, it, like the target executable, must be
- # executed on the $host or under an emulation environment.
- $opt_dry_run || {
- $LTCC $LTCFLAGS -o $cwrapper $cwrappersource
- $STRIP $cwrapper
- }
-
- # Now, create the wrapper script for func_source use:
- func_ltwrapper_scriptname $cwrapper
- $RM $func_ltwrapper_scriptname_result
- trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15
- $opt_dry_run || {
- # note: this script will not be executed, so do not chmod.
- if test "x$build" = "x$host" ; then
- $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result
- else
- func_emit_wrapper no > $func_ltwrapper_scriptname_result
- fi
- }
- ;;
- * )
- $RM $output
- trap "$RM $output; exit $EXIT_FAILURE" 1 2 15
-
- func_emit_wrapper no > $output
- chmod +x $output
- ;;
- esac
- }
- exit $EXIT_SUCCESS
- ;;
- esac
-
- # See if we need to build an old-fashioned archive.
- for oldlib in $oldlibs; do
-
- if test "$build_libtool_libs" = convenience; then
- oldobjs="$libobjs_save $symfileobj"
- addlibs="$convenience"
- build_libtool_libs=no
- else
- if test "$build_libtool_libs" = module; then
- oldobjs="$libobjs_save"
- build_libtool_libs=no
- else
- oldobjs="$old_deplibs $non_pic_objects"
- if test "$preload" = yes && test -f "$symfileobj"; then
- func_append oldobjs " $symfileobj"
- fi
- fi
- addlibs="$old_convenience"
- fi
-
- if test -n "$addlibs"; then
- gentop="$output_objdir/${outputname}x"
- func_append generated " $gentop"
-
- func_extract_archives $gentop $addlibs
- func_append oldobjs " $func_extract_archives_result"
- fi
-
- # Do each command in the archive commands.
- if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
- cmds=$old_archive_from_new_cmds
- else
-
- # Add any objects from preloaded convenience libraries
- if test -n "$dlprefiles"; then
- gentop="$output_objdir/${outputname}x"
- func_append generated " $gentop"
-
- func_extract_archives $gentop $dlprefiles
- func_append oldobjs " $func_extract_archives_result"
- fi
-
- # POSIX demands no paths to be encoded in archives. We have
- # to avoid creating archives with duplicate basenames if we
- # might have to extract them afterwards, e.g., when creating a
- # static archive out of a convenience library, or when linking
- # the entirety of a libtool archive into another (currently
- # not supported by libtool).
- if (for obj in $oldobjs
- do
- func_basename "$obj"
- $ECHO "$func_basename_result"
- done | sort | sort -uc >/dev/null 2>&1); then
- :
- else
- echo "copying selected object files to avoid basename conflicts..."
- gentop="$output_objdir/${outputname}x"
- func_append generated " $gentop"
- func_mkdir_p "$gentop"
- save_oldobjs=$oldobjs
- oldobjs=
- counter=1
- for obj in $save_oldobjs
- do
- func_basename "$obj"
- objbase="$func_basename_result"
- case " $oldobjs " in
- " ") oldobjs=$obj ;;
- *[\ /]"$objbase "*)
- while :; do
- # Make sure we don't pick an alternate name that also
- # overlaps.
- newobj=lt$counter-$objbase
- func_arith $counter + 1
- counter=$func_arith_result
- case " $oldobjs " in
- *[\ /]"$newobj "*) ;;
- *) if test ! -f "$gentop/$newobj"; then break; fi ;;
- esac
- done
- func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj"
- func_append oldobjs " $gentop/$newobj"
- ;;
- *) func_append oldobjs " $obj" ;;
- esac
- done
- fi
- func_to_tool_file "$oldlib" func_convert_file_msys_to_w32
- tool_oldlib=$func_to_tool_file_result
- eval cmds=\"$old_archive_cmds\"
-
- func_len " $cmds"
- len=$func_len_result
- if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then
- cmds=$old_archive_cmds
- elif test -n "$archiver_list_spec"; then
- func_verbose "using command file archive linking..."
- for obj in $oldobjs
- do
- func_to_tool_file "$obj"
- $ECHO "$func_to_tool_file_result"
- done > $output_objdir/$libname.libcmd
- func_to_tool_file "$output_objdir/$libname.libcmd"
- oldobjs=" $archiver_list_spec$func_to_tool_file_result"
- cmds=$old_archive_cmds
- else
- # the command line is too long to link in one step, link in parts
- func_verbose "using piecewise archive linking..."
- save_RANLIB=$RANLIB
- RANLIB=:
- objlist=
- concat_cmds=
- save_oldobjs=$oldobjs
- oldobjs=
- # Is there a better way of finding the last object in the list?
- for obj in $save_oldobjs
- do
- last_oldobj=$obj
- done
- eval test_cmds=\"$old_archive_cmds\"
- func_len " $test_cmds"
- len0=$func_len_result
- len=$len0
- for obj in $save_oldobjs
- do
- func_len " $obj"
- func_arith $len + $func_len_result
- len=$func_arith_result
- func_append objlist " $obj"
- if test "$len" -lt "$max_cmd_len"; then
- :
- else
- # the above command should be used before it gets too long
- oldobjs=$objlist
- if test "$obj" = "$last_oldobj" ; then
- RANLIB=$save_RANLIB
- fi
- test -z "$concat_cmds" || concat_cmds=$concat_cmds~
- eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\"
- objlist=
- len=$len0
- fi
- done
- RANLIB=$save_RANLIB
- oldobjs=$objlist
- if test "X$oldobjs" = "X" ; then
- eval cmds=\"\$concat_cmds\"
- else
- eval cmds=\"\$concat_cmds~\$old_archive_cmds\"
- fi
- fi
- fi
- func_execute_cmds "$cmds" 'exit $?'
- done
-
- test -n "$generated" && \
- func_show_eval "${RM}r$generated"
-
- # Now create the libtool archive.
- case $output in
- *.la)
- old_library=
- test "$build_old_libs" = yes && old_library="$libname.$libext"
- func_verbose "creating $output"
-
- # Preserve any variables that may affect compiler behavior
- for var in $variables_saved_for_relink; do
- if eval test -z \"\${$var+set}\"; then
- relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command"
- elif eval var_value=\$$var; test -z "$var_value"; then
- relink_command="$var=; export $var; $relink_command"
- else
- func_quote_for_eval "$var_value"
- relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command"
- fi
- done
- # Quote the link command for shipping.
- relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)"
- relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"`
- if test "$hardcode_automatic" = yes ; then
- relink_command=
- fi
-
- # Only create the output if not a dry run.
- $opt_dry_run || {
- for installed in no yes; do
- if test "$installed" = yes; then
- if test -z "$install_libdir"; then
- break
- fi
- output="$output_objdir/$outputname"i
- # Replace all uninstalled libtool libraries with the installed ones
- newdependency_libs=
- for deplib in $dependency_libs; do
- case $deplib in
- *.la)
- func_basename "$deplib"
- name="$func_basename_result"
- func_resolve_sysroot "$deplib"
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result`
- test -z "$libdir" && \
- func_fatal_error "\`$deplib' is not a valid libtool archive"
- func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name"
- ;;
- -L*)
- func_stripname -L '' "$deplib"
- func_replace_sysroot "$func_stripname_result"
- func_append newdependency_libs " -L$func_replace_sysroot_result"
- ;;
- -R*)
- func_stripname -R '' "$deplib"
- func_replace_sysroot "$func_stripname_result"
- func_append newdependency_libs " -R$func_replace_sysroot_result"
- ;;
- *) func_append newdependency_libs " $deplib" ;;
- esac
- done
- dependency_libs="$newdependency_libs"
- newdlfiles=
-
- for lib in $dlfiles; do
- case $lib in
- *.la)
- func_basename "$lib"
- name="$func_basename_result"
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
- test -z "$libdir" && \
- func_fatal_error "\`$lib' is not a valid libtool archive"
- func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name"
- ;;
- *) func_append newdlfiles " $lib" ;;
- esac
- done
- dlfiles="$newdlfiles"
- newdlprefiles=
- for lib in $dlprefiles; do
- case $lib in
- *.la)
- # Only pass preopened files to the pseudo-archive (for
- # eventual linking with the app. that links it) if we
- # didn't already link the preopened objects directly into
- # the library:
- func_basename "$lib"
- name="$func_basename_result"
- eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
- test -z "$libdir" && \
- func_fatal_error "\`$lib' is not a valid libtool archive"
- func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name"
- ;;
- esac
- done
- dlprefiles="$newdlprefiles"
- else
- newdlfiles=
- for lib in $dlfiles; do
- case $lib in
- [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
- *) abs=`pwd`"/$lib" ;;
- esac
- func_append newdlfiles " $abs"
- done
- dlfiles="$newdlfiles"
- newdlprefiles=
- for lib in $dlprefiles; do
- case $lib in
- [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
- *) abs=`pwd`"/$lib" ;;
- esac
- func_append newdlprefiles " $abs"
- done
- dlprefiles="$newdlprefiles"
- fi
- $RM $output
- # place dlname in correct position for cygwin
- # In fact, it would be nice if we could use this code for all target
- # systems that can't hard-code library paths into their executables
- # and that have no shared library path variable independent of PATH,
- # but it turns out we can't easily determine that from inspecting
- # libtool variables, so we have to hard-code the OSs to which it
- # applies here; at the moment, that means platforms that use the PE
- # object format with DLL files. See the long comment at the top of
- # tests/bindir.at for full details.
- tdlname=$dlname
- case $host,$output,$installed,$module,$dlname in
- *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll)
- # If a -bindir argument was supplied, place the dll there.
- if test "x$bindir" != x ;
- then
- func_relative_path "$install_libdir" "$bindir"
- tdlname=$func_relative_path_result$dlname
- else
- # Otherwise fall back on heuristic.
- tdlname=../bin/$dlname
- fi
- ;;
- esac
- $ECHO > $output "\
-# $outputname - a libtool library file
-# Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION
-#
-# Please DO NOT delete this file!
-# It is necessary for linking the library.
-
-# The name that we can dlopen(3).
-dlname='$tdlname'
-
-# Names of this library.
-library_names='$library_names'
-
-# The name of the static archive.
-old_library='$old_library'
-
-# Linker flags that can not go in dependency_libs.
-inherited_linker_flags='$new_inherited_linker_flags'
-
-# Libraries that this one depends upon.
-dependency_libs='$dependency_libs'
-
-# Names of additional weak libraries provided by this library
-weak_library_names='$weak_libs'
-
-# Version information for $libname.
-current=$current
-age=$age
-revision=$revision
-
-# Is this an already installed library?
-installed=$installed
-
-# Should we warn about portability when linking against -modules?
-shouldnotlink=$module
-
-# Files to dlopen/dlpreopen
-dlopen='$dlfiles'
-dlpreopen='$dlprefiles'
-
-# Directory that this library needs to be installed in:
-libdir='$install_libdir'"
- if test "$installed" = no && test "$need_relink" = yes; then
- $ECHO >> $output "\
-relink_command=\"$relink_command\""
- fi
- done
- }
-
- # Do a symbolic link so that the libtool archive can be found in
- # LD_LIBRARY_PATH before the program is installed.
- func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?'
- ;;
- esac
- exit $EXIT_SUCCESS
-}
-
-{ test "$opt_mode" = link || test "$opt_mode" = relink; } &&
- func_mode_link ${1+"$@"}
-
-
-# func_mode_uninstall arg...
-func_mode_uninstall ()
-{
- $opt_debug
- RM="$nonopt"
- files=
- rmforce=
- exit_status=0
-
- # This variable tells wrapper scripts just to set variables rather
- # than running their programs.
- libtool_install_magic="$magic"
-
- for arg
- do
- case $arg in
- -f) func_append RM " $arg"; rmforce=yes ;;
- -*) func_append RM " $arg" ;;
- *) func_append files " $arg" ;;
- esac
- done
-
- test -z "$RM" && \
- func_fatal_help "you must specify an RM program"
-
- rmdirs=
-
- for file in $files; do
- func_dirname "$file" "" "."
- dir="$func_dirname_result"
- if test "X$dir" = X.; then
- odir="$objdir"
- else
- odir="$dir/$objdir"
- fi
- func_basename "$file"
- name="$func_basename_result"
- test "$opt_mode" = uninstall && odir="$dir"
-
- # Remember odir for removal later, being careful to avoid duplicates
- if test "$opt_mode" = clean; then
- case " $rmdirs " in
- *" $odir "*) ;;
- *) func_append rmdirs " $odir" ;;
- esac
- fi
-
- # Don't error if the file doesn't exist and rm -f was used.
- if { test -L "$file"; } >/dev/null 2>&1 ||
- { test -h "$file"; } >/dev/null 2>&1 ||
- test -f "$file"; then
- :
- elif test -d "$file"; then
- exit_status=1
- continue
- elif test "$rmforce" = yes; then
- continue
- fi
-
- rmfiles="$file"
-
- case $name in
- *.la)
- # Possibly a libtool archive, so verify it.
- if func_lalib_p "$file"; then
- func_source $dir/$name
-
- # Delete the libtool libraries and symlinks.
- for n in $library_names; do
- func_append rmfiles " $odir/$n"
- done
- test -n "$old_library" && func_append rmfiles " $odir/$old_library"
-
- case "$opt_mode" in
- clean)
- case " $library_names " in
- *" $dlname "*) ;;
- *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;;
- esac
- test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i"
- ;;
- uninstall)
- if test -n "$library_names"; then
- # Do each command in the postuninstall commands.
- func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1'
- fi
-
- if test -n "$old_library"; then
- # Do each command in the old_postuninstall commands.
- func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1'
- fi
- # FIXME: should reinstall the best remaining shared library.
- ;;
- esac
- fi
- ;;
-
- *.lo)
- # Possibly a libtool object, so verify it.
- if func_lalib_p "$file"; then
-
- # Read the .lo file
- func_source $dir/$name
-
- # Add PIC object to the list of files to remove.
- if test -n "$pic_object" &&
- test "$pic_object" != none; then
- func_append rmfiles " $dir/$pic_object"
- fi
-
- # Add non-PIC object to the list of files to remove.
- if test -n "$non_pic_object" &&
- test "$non_pic_object" != none; then
- func_append rmfiles " $dir/$non_pic_object"
- fi
- fi
- ;;
-
- *)
- if test "$opt_mode" = clean ; then
- noexename=$name
- case $file in
- *.exe)
- func_stripname '' '.exe' "$file"
- file=$func_stripname_result
- func_stripname '' '.exe' "$name"
- noexename=$func_stripname_result
- # $file with .exe has already been added to rmfiles,
- # add $file without .exe
- func_append rmfiles " $file"
- ;;
- esac
- # Do a test to see if this is a libtool program.
- if func_ltwrapper_p "$file"; then
- if func_ltwrapper_executable_p "$file"; then
- func_ltwrapper_scriptname "$file"
- relink_command=
- func_source $func_ltwrapper_scriptname_result
- func_append rmfiles " $func_ltwrapper_scriptname_result"
- else
- relink_command=
- func_source $dir/$noexename
- fi
-
- # note $name still contains .exe if it was in $file originally
- # as does the version of $file that was added into $rmfiles
- func_append rmfiles " $odir/$name $odir/${name}S.${objext}"
- if test "$fast_install" = yes && test -n "$relink_command"; then
- func_append rmfiles " $odir/lt-$name"
- fi
- if test "X$noexename" != "X$name" ; then
- func_append rmfiles " $odir/lt-${noexename}.c"
- fi
- fi
- fi
- ;;
- esac
- func_show_eval "$RM $rmfiles" 'exit_status=1'
- done
-
- # Try to remove the ${objdir}s in the directories where we deleted files
- for dir in $rmdirs; do
- if test -d "$dir"; then
- func_show_eval "rmdir $dir >/dev/null 2>&1"
- fi
- done
-
- exit $exit_status
-}
-
-{ test "$opt_mode" = uninstall || test "$opt_mode" = clean; } &&
- func_mode_uninstall ${1+"$@"}
-
-test -z "$opt_mode" && {
- help="$generic_help"
- func_fatal_help "you must specify a MODE"
-}
-
-test -z "$exec_cmd" && \
- func_fatal_help "invalid operation mode \`$opt_mode'"
-
-if test -n "$exec_cmd"; then
- eval exec "$exec_cmd"
- exit $EXIT_FAILURE
-fi
-
-exit $exit_status
-
-
-# The TAGs below are defined such that we never get into a situation
-# in which we disable both kinds of libraries. Given conflicting
-# choices, we go for a static library, that is the most portable,
-# since we can't tell whether shared libraries were disabled because
-# the user asked for that or because the platform doesn't support
-# them. This is particularly important on AIX, because we don't
-# support having both static and shared libraries enabled at the same
-# time on that platform, so we default to a shared-only configuration.
-# If a disable-shared tag is given, we'll fallback to a static-only
-# configuration. But we'll never go from static-only to shared-only.
-
-# ### BEGIN LIBTOOL TAG CONFIG: disable-shared
-build_libtool_libs=no
-build_old_libs=yes
-# ### END LIBTOOL TAG CONFIG: disable-shared
-
-# ### BEGIN LIBTOOL TAG CONFIG: disable-static
-build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac`
-# ### END LIBTOOL TAG CONFIG: disable-static
-
-# Local Variables:
-# mode:shell-script
-# sh-indentation:2
-# End:
-# vi:sw=2
-
diff --git a/build-aux/missing b/build-aux/missing
deleted file mode 100755
index 28055d2..0000000
--- a/build-aux/missing
+++ /dev/null
@@ -1,376 +0,0 @@
-#! /bin/sh
-# Common stub for a few missing GNU programs while installing.
-
-scriptversion=2009-04-28.21; # UTC
-
-# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006,
-# 2008, 2009 Free Software Foundation, Inc.
-# Originally by Fran,cois Pinard <pinard at iro.umontreal.ca>, 1996.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-if test $# -eq 0; then
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
-fi
-
-run=:
-sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
-sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
-
-# In the cases where this matters, `missing' is being run in the
-# srcdir already.
-if test -f configure.ac; then
- configure_ac=configure.ac
-else
- configure_ac=configure.in
-fi
-
-msg="missing on your system"
-
-case $1 in
---run)
- # Try to run requested program, and just exit if it succeeds.
- run=
- shift
- "$@" && exit 0
- # Exit code 63 means version mismatch. This often happens
- # when the user try to use an ancient version of a tool on
- # a file that requires a minimum version. In this case we
- # we should proceed has if the program had been absent, or
- # if --run hadn't been passed.
- if test $? = 63; then
- run=:
- msg="probably too old"
- fi
- ;;
-
- -h|--h|--he|--hel|--help)
- echo "\
-$0 [OPTION]... PROGRAM [ARGUMENT]...
-
-Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
-error status if there is no known handling for PROGRAM.
-
-Options:
- -h, --help display this help and exit
- -v, --version output version information and exit
- --run try to run the given command, and emulate it if it fails
-
-Supported PROGRAM values:
- aclocal touch file \`aclocal.m4'
- autoconf touch file \`configure'
- autoheader touch file \`config.h.in'
- autom4te touch the output file, or create a stub one
- automake touch all \`Makefile.in' files
- bison create \`y.tab.[ch]', if possible, from existing .[ch]
- flex create \`lex.yy.c', if possible, from existing .c
- help2man touch the output file
- lex create \`lex.yy.c', if possible, from existing .c
- makeinfo touch the output file
- tar try tar, gnutar, gtar, then tar without non-portable flags
- yacc create \`y.tab.[ch]', if possible, from existing .[ch]
-
-Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and
-\`g' are ignored when checking the name.
-
-Send bug reports to <bug-automake at gnu.org>."
- exit $?
- ;;
-
- -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
- echo "missing $scriptversion (GNU Automake)"
- exit $?
- ;;
-
- -*)
- echo 1>&2 "$0: Unknown \`$1' option"
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
- ;;
-
-esac
-
-# normalize program name to check for.
-program=`echo "$1" | sed '
- s/^gnu-//; t
- s/^gnu//; t
- s/^g//; t'`
-
-# Now exit if we have it, but it failed. Also exit now if we
-# don't have it and --version was passed (most likely to detect
-# the program). This is about non-GNU programs, so use $1 not
-# $program.
-case $1 in
- lex*|yacc*)
- # Not GNU programs, they don't have --version.
- ;;
-
- tar*)
- if test -n "$run"; then
- echo 1>&2 "ERROR: \`tar' requires --run"
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- exit 1
- fi
- ;;
-
- *)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- # Could not run --version or --help. This is probably someone
- # running `$TOOL --version' or `$TOOL --help' to check whether
- # $TOOL exists and not knowing $TOOL uses missing.
- exit 1
- fi
- ;;
-esac
-
-# If it does not exist, or fails to run (possibly an outdated version),
-# try to emulate it.
-case $program in
- aclocal*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`acinclude.m4' or \`${configure_ac}'. You might want
- to install the \`Automake' and \`Perl' packages. Grab them from
- any GNU archive site."
- touch aclocal.m4
- ;;
-
- autoconf*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`${configure_ac}'. You might want to install the
- \`Autoconf' and \`GNU m4' packages. Grab them from any GNU
- archive site."
- touch configure
- ;;
-
- autoheader*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`acconfig.h' or \`${configure_ac}'. You might want
- to install the \`Autoconf' and \`GNU m4' packages. Grab them
- from any GNU archive site."
- files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
- test -z "$files" && files="config.h"
- touch_files=
- for f in $files; do
- case $f in
- *:*) touch_files="$touch_files "`echo "$f" |
- sed -e 's/^[^:]*://' -e 's/:.*//'`;;
- *) touch_files="$touch_files $f.in";;
- esac
- done
- touch $touch_files
- ;;
-
- automake*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
- You might want to install the \`Automake' and \`Perl' packages.
- Grab them from any GNU archive site."
- find . -type f -name Makefile.am -print |
- sed 's/\.am$/.in/' |
- while read f; do touch "$f"; done
- ;;
-
- autom4te*)
- echo 1>&2 "\
-WARNING: \`$1' is needed, but is $msg.
- You might have modified some files without having the
- proper tools for further handling them.
- You can get \`$1' as part of \`Autoconf' from any GNU
- archive site."
-
- file=`echo "$*" | sed -n "$sed_output"`
- test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
- if test -f "$file"; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo "#! /bin/sh"
- echo "# Created by GNU Automake missing as a replacement of"
- echo "# $ $@"
- echo "exit 0"
- chmod +x $file
- exit 1
- fi
- ;;
-
- bison*|yacc*)
- echo 1>&2 "\
-WARNING: \`$1' $msg. You should only need it if
- you modified a \`.y' file. You may need the \`Bison' package
- in order for those modifications to take effect. You can get
- \`Bison' from any GNU archive site."
- rm -f y.tab.c y.tab.h
- if test $# -ne 1; then
- eval LASTARG="\${$#}"
- case $LASTARG in
- *.y)
- SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
- if test -f "$SRCFILE"; then
- cp "$SRCFILE" y.tab.c
- fi
- SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
- if test -f "$SRCFILE"; then
- cp "$SRCFILE" y.tab.h
- fi
- ;;
- esac
- fi
- if test ! -f y.tab.h; then
- echo >y.tab.h
- fi
- if test ! -f y.tab.c; then
- echo 'main() { return 0; }' >y.tab.c
- fi
- ;;
-
- lex*|flex*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a \`.l' file. You may need the \`Flex' package
- in order for those modifications to take effect. You can get
- \`Flex' from any GNU archive site."
- rm -f lex.yy.c
- if test $# -ne 1; then
- eval LASTARG="\${$#}"
- case $LASTARG in
- *.l)
- SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
- if test -f "$SRCFILE"; then
- cp "$SRCFILE" lex.yy.c
- fi
- ;;
- esac
- fi
- if test ! -f lex.yy.c; then
- echo 'main() { return 0; }' >lex.yy.c
- fi
- ;;
-
- help2man*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a dependency of a manual page. You may need the
- \`Help2man' package in order for those modifications to take
- effect. You can get \`Help2man' from any GNU archive site."
-
- file=`echo "$*" | sed -n "$sed_output"`
- test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
- if test -f "$file"; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo ".ab help2man is required to generate this page"
- exit $?
- fi
- ;;
-
- makeinfo*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a \`.texi' or \`.texinfo' file, or any other file
- indirectly affecting the aspect of the manual. The spurious
- call might also be the consequence of using a buggy \`make' (AIX,
- DU, IRIX). You might want to install the \`Texinfo' package or
- the \`GNU make' package. Grab either from any GNU archive site."
- # The file to touch is that specified with -o ...
- file=`echo "$*" | sed -n "$sed_output"`
- test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
- if test -z "$file"; then
- # ... or it is the one specified with @setfilename ...
- infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
- file=`sed -n '
- /^@setfilename/{
- s/.* \([^ ]*\) *$/\1/
- p
- q
- }' $infile`
- # ... or it is derived from the source name (dir/f.texi becomes f.info)
- test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
- fi
- # If the file does not exist, the user really needs makeinfo;
- # let's fail without touching anything.
- test -f $file || exit 1
- touch $file
- ;;
-
- tar*)
- shift
-
- # We have already tried tar in the generic part.
- # Look for gnutar/gtar before invocation to avoid ugly error
- # messages.
- if (gnutar --version > /dev/null 2>&1); then
- gnutar "$@" && exit 0
- fi
- if (gtar --version > /dev/null 2>&1); then
- gtar "$@" && exit 0
- fi
- firstarg="$1"
- if shift; then
- case $firstarg in
- *o*)
- firstarg=`echo "$firstarg" | sed s/o//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- case $firstarg in
- *h*)
- firstarg=`echo "$firstarg" | sed s/h//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- fi
-
- echo 1>&2 "\
-WARNING: I can't seem to be able to run \`tar' with the given arguments.
- You may want to install GNU tar or Free paxutils, or check the
- command line arguments."
- exit 1
- ;;
-
- *)
- echo 1>&2 "\
-WARNING: \`$1' is needed, and is $msg.
- You might have modified some files without having the
- proper tools for further handling them. Check the \`README' file,
- it often tells you about the needed prerequisites for installing
- this package. You may also peek at any GNU archive site, in case
- some other package would contain this missing \`$1' program."
- exit 1
- ;;
-esac
-
-exit 0
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-time-zone: "UTC"
-# time-stamp-end: "; # UTC"
-# End:
diff --git a/config.h.in b/config.h.in
deleted file mode 100644
index 0656ff3..0000000
--- a/config.h.in
+++ /dev/null
@@ -1,134 +0,0 @@
-/* config.h.in. Generated from configure.ac by autoheader. */
-
-/* Define if building universal (internal helper macro) */
-#undef AC_APPLE_UNIVERSAL_BUILD
-
-/* Define if GMP is version 3.xxx */
-#undef GMP_VERSION_3
-
-/* Define that architecture uses big endian storage */
-#undef HAVE_BIG_ENDIAN
-
-/* Define if BLAS is installed */
-#undef HAVE_BLAS
-
-/* Define if C interface to BLAS is available */
-#undef HAVE_CBLAS
-
-/* Define if C interface to LAPACK is available */
-#undef HAVE_CLAPACK
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#undef HAVE_DLFCN_H
-
-/* Define to 1 if you have the <float.h> header file. */
-#undef HAVE_FLOAT_H
-
-/* Define if GIVARO is installed */
-#undef HAVE_GIVARO
-
-/* Define if GMP is installed */
-#undef HAVE_GMP
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define if LAPACK is installed */
-#undef HAVE_LAPACK
-
-/* Define to 1 if you have the <limits.h> header file. */
-#undef HAVE_LIMITS_H
-
-/* Define that architecture uses little endian storage */
-#undef HAVE_LITTLE_ENDIAN
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Define to 1 if you have the <stddef.h> header file. */
-#undef HAVE_STDDEF_H
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/time.h> header file. */
-#undef HAVE_SYS_TIME_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
- */
-#undef LT_OBJDIR
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the home page for this package. */
-#undef PACKAGE_URL
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* The size of `char', as computed by sizeof. */
-#undef SIZEOF_CHAR
-
-/* The size of `int', as computed by sizeof. */
-#undef SIZEOF_INT
-
-/* The size of `long', as computed by sizeof. */
-#undef SIZEOF_LONG
-
-/* The size of `long long', as computed by sizeof. */
-#undef SIZEOF_LONG_LONG
-
-/* The size of `short', as computed by sizeof. */
-#undef SIZEOF_SHORT
-
-/* The size of `__int64', as computed by sizeof. */
-#undef SIZEOF___INT64
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* Version number of package */
-#undef VERSION
-
-/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
- significant byte first (like Motorola and SPARC, unlike Intel). */
-#if defined AC_APPLE_UNIVERSAL_BUILD
-# if defined __BIG_ENDIAN__
-# define WORDS_BIGENDIAN 1
-# endif
-#else
-# ifndef WORDS_BIGENDIAN
-# undef WORDS_BIGENDIAN
-# endif
-#endif
diff --git a/configure b/configure
deleted file mode 100755
index 55cf466..0000000
--- a/configure
+++ /dev/null
@@ -1,20727 +0,0 @@
-#! /bin/sh
-# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for FFLAS-FFPACK 1.6.0.
-#
-# Report bugs to <ffpack-devel at googlegroups.com>.
-#
-#
-# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
-#
-#
-# This configure script is free software; the Free Software Foundation
-# gives unlimited permission to copy, distribute and modify it.
-## -------------------- ##
-## M4sh Initialization. ##
-## -------------------- ##
-
-# Be more Bourne compatible
-DUALCASE=1; export DUALCASE # for MKS sh
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
- emulate sh
- NULLCMD=:
- # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '${1+"$@"}'='"$@"'
- setopt NO_GLOB_SUBST
-else
- case `(set -o) 2>/dev/null` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
-esac
-fi
-
-
-as_nl='
-'
-export as_nl
-# Printing a long string crashes Solaris 7 /usr/bin/printf.
-as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
-as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
-as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
-# Prefer a ksh shell builtin over an external printf program on Solaris,
-# but without wasting forks for bash or zsh.
-if test -z "$BASH_VERSION$ZSH_VERSION" \
- && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
- as_echo='print -r --'
- as_echo_n='print -rn --'
-elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
- as_echo='printf %s\n'
- as_echo_n='printf %s'
-else
- if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
- as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
- as_echo_n='/usr/ucb/echo -n'
- else
- as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
- as_echo_n_body='eval
- arg=$1;
- case $arg in #(
- *"$as_nl"*)
- expr "X$arg" : "X\\(.*\\)$as_nl";
- arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
- esac;
- expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
- '
- export as_echo_n_body
- as_echo_n='sh -c $as_echo_n_body as_echo'
- fi
- export as_echo_body
- as_echo='sh -c $as_echo_body as_echo'
-fi
-
-# The user is always right.
-if test "${PATH_SEPARATOR+set}" != set; then
- PATH_SEPARATOR=:
- (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
- (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
- PATH_SEPARATOR=';'
- }
-fi
-
-
-# IFS
-# We need space, tab and new line, in precisely that order. Quoting is
-# there to prevent editors from complaining about space-tab.
-# (If _AS_PATH_WALK were called with IFS unset, it would disable word
-# splitting by setting IFS to empty value.)
-IFS=" "" $as_nl"
-
-# Find who we are. Look in the path if we contain no directory separator.
-as_myself=
-case $0 in #((
- *[\\/]* ) as_myself=$0 ;;
- *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
- done
-IFS=$as_save_IFS
-
- ;;
-esac
-# We did not find ourselves, most probably we were run as `sh COMMAND'
-# in which case we are not to be found in the path.
-if test "x$as_myself" = x; then
- as_myself=$0
-fi
-if test ! -f "$as_myself"; then
- $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
- exit 1
-fi
-
-# Unset variables that we do not need and which cause bugs (e.g. in
-# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
-# suppresses any "Segmentation fault" message there. '((' could
-# trigger a bug in pdksh 5.2.14.
-for as_var in BASH_ENV ENV MAIL MAILPATH
-do eval test x\${$as_var+set} = xset \
- && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
-done
-PS1='$ '
-PS2='> '
-PS4='+ '
-
-# NLS nuisances.
-LC_ALL=C
-export LC_ALL
-LANGUAGE=C
-export LANGUAGE
-
-# CDPATH.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-# Use a proper internal environment variable to ensure we don't fall
- # into an infinite loop, continuously re-executing ourselves.
- if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
- _as_can_reexec=no; export _as_can_reexec;
- # We cannot yet assume a decent shell, so we have to provide a
-# neutralization value for shells without unset; and this also
-# works around shells that cannot unset nonexistent variables.
-# Preserve -v and -x to the replacement shell.
-BASH_ENV=/dev/null
-ENV=/dev/null
-(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
-case $- in # ((((
- *v*x* | *x*v* ) as_opts=-vx ;;
- *v* ) as_opts=-v ;;
- *x* ) as_opts=-x ;;
- * ) as_opts= ;;
-esac
-exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
-# Admittedly, this is quite paranoid, since all the known shells bail
-# out after a failed `exec'.
-$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
-as_fn_exit 255
- fi
- # We don't want this to propagate to other subprocesses.
- { _as_can_reexec=; unset _as_can_reexec;}
-if test "x$CONFIG_SHELL" = x; then
- as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
- emulate sh
- NULLCMD=:
- # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '\${1+\"\$@\"}'='\"\$@\"'
- setopt NO_GLOB_SUBST
-else
- case \`(set -o) 2>/dev/null\` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
-esac
-fi
-"
- as_required="as_fn_return () { (exit \$1); }
-as_fn_success () { as_fn_return 0; }
-as_fn_failure () { as_fn_return 1; }
-as_fn_ret_success () { return 0; }
-as_fn_ret_failure () { return 1; }
-
-exitcode=0
-as_fn_success || { exitcode=1; echo as_fn_success failed.; }
-as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
-as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
-as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
-if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
-
-else
- exitcode=1; echo positional parameters were not saved.
-fi
-test x\$exitcode = x0 || exit 1
-test -x / || exit 1"
- as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
- as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
- eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
- test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1
-test \$(( 1 + 1 )) = 2 || exit 1
-
- test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || (
- ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
- ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO
- ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO
- PATH=/empty FPATH=/empty; export PATH FPATH
- test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\
- || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1"
- if (eval "$as_required") 2>/dev/null; then :
- as_have_required=yes
-else
- as_have_required=no
-fi
- if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then :
-
-else
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-as_found=false
-for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- as_found=:
- case $as_dir in #(
- /*)
- for as_base in sh bash ksh sh5; do
- # Try only shells that exist, to save several forks.
- as_shell=$as_dir/$as_base
- if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
- { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then :
- CONFIG_SHELL=$as_shell as_have_required=yes
- if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then :
- break 2
-fi
-fi
- done;;
- esac
- as_found=false
-done
-$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
- { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then :
- CONFIG_SHELL=$SHELL as_have_required=yes
-fi; }
-IFS=$as_save_IFS
-
-
- if test "x$CONFIG_SHELL" != x; then :
- export CONFIG_SHELL
- # We cannot yet assume a decent shell, so we have to provide a
-# neutralization value for shells without unset; and this also
-# works around shells that cannot unset nonexistent variables.
-# Preserve -v and -x to the replacement shell.
-BASH_ENV=/dev/null
-ENV=/dev/null
-(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
-case $- in # ((((
- *v*x* | *x*v* ) as_opts=-vx ;;
- *v* ) as_opts=-v ;;
- *x* ) as_opts=-x ;;
- * ) as_opts= ;;
-esac
-exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
-# Admittedly, this is quite paranoid, since all the known shells bail
-# out after a failed `exec'.
-$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
-exit 255
-fi
-
- if test x$as_have_required = xno; then :
- $as_echo "$0: This script requires a shell more modern than all"
- $as_echo "$0: the shells that I found on your system."
- if test x${ZSH_VERSION+set} = xset ; then
- $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should"
- $as_echo "$0: be upgraded to zsh 4.3.4 or later."
- else
- $as_echo "$0: Please tell bug-autoconf at gnu.org and
-$0: ffpack-devel at googlegroups.com about your system,
-$0: including any error possibly output before this
-$0: message. Then install a modern shell, or manually run
-$0: the script under such a shell if you do have one."
- fi
- exit 1
-fi
-fi
-fi
-SHELL=${CONFIG_SHELL-/bin/sh}
-export SHELL
-# Unset more variables known to interfere with behavior of common tools.
-CLICOLOR_FORCE= GREP_OPTIONS=
-unset CLICOLOR_FORCE GREP_OPTIONS
-
-## --------------------- ##
-## M4sh Shell Functions. ##
-## --------------------- ##
-# as_fn_unset VAR
-# ---------------
-# Portably unset VAR.
-as_fn_unset ()
-{
- { eval $1=; unset $1;}
-}
-as_unset=as_fn_unset
-
-# as_fn_set_status STATUS
-# -----------------------
-# Set $? to STATUS, without forking.
-as_fn_set_status ()
-{
- return $1
-} # as_fn_set_status
-
-# as_fn_exit STATUS
-# -----------------
-# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
-as_fn_exit ()
-{
- set +e
- as_fn_set_status $1
- exit $1
-} # as_fn_exit
-
-# as_fn_mkdir_p
-# -------------
-# Create "$as_dir" as a directory, including parents if necessary.
-as_fn_mkdir_p ()
-{
-
- case $as_dir in #(
- -*) as_dir=./$as_dir;;
- esac
- test -d "$as_dir" || eval $as_mkdir_p || {
- as_dirs=
- while :; do
- case $as_dir in #(
- *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
- *) as_qdir=$as_dir;;
- esac
- as_dirs="'$as_qdir' $as_dirs"
- as_dir=`$as_dirname -- "$as_dir" ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- test -d "$as_dir" && break
- done
- test -z "$as_dirs" || eval "mkdir $as_dirs"
- } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
-
-
-} # as_fn_mkdir_p
-
-# as_fn_executable_p FILE
-# -----------------------
-# Test if FILE is an executable regular file.
-as_fn_executable_p ()
-{
- test -f "$1" && test -x "$1"
-} # as_fn_executable_p
-# as_fn_append VAR VALUE
-# ----------------------
-# Append the text in VALUE to the end of the definition contained in VAR. Take
-# advantage of any shell optimizations that allow amortized linear growth over
-# repeated appends, instead of the typical quadratic growth present in naive
-# implementations.
-if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
- eval 'as_fn_append ()
- {
- eval $1+=\$2
- }'
-else
- as_fn_append ()
- {
- eval $1=\$$1\$2
- }
-fi # as_fn_append
-
-# as_fn_arith ARG...
-# ------------------
-# Perform arithmetic evaluation on the ARGs, and store the result in the
-# global $as_val. Take advantage of shells that can avoid forks. The arguments
-# must be portable across $(()) and expr.
-if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
- eval 'as_fn_arith ()
- {
- as_val=$(( $* ))
- }'
-else
- as_fn_arith ()
- {
- as_val=`expr "$@" || test $? -eq 1`
- }
-fi # as_fn_arith
-
-
-# as_fn_error STATUS ERROR [LINENO LOG_FD]
-# ----------------------------------------
-# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
-# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with STATUS, using 1 if that was 0.
-as_fn_error ()
-{
- as_status=$1; test $as_status -eq 0 && as_status=1
- if test "$4"; then
- as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
- fi
- $as_echo "$as_me: error: $2" >&2
- as_fn_exit $as_status
-} # as_fn_error
-
-if expr a : '\(a\)' >/dev/null 2>&1 &&
- test "X`expr 00001 : '.*\(...\)'`" = X001; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
- as_basename=basename
-else
- as_basename=false
-fi
-
-if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
- as_dirname=dirname
-else
- as_dirname=false
-fi
-
-as_me=`$as_basename -- "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X/"$0" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{
- s//\1/
- q
- }
- /^X\/\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\/\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
-
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
-
-
- as_lineno_1=$LINENO as_lineno_1a=$LINENO
- as_lineno_2=$LINENO as_lineno_2a=$LINENO
- eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
- test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
- # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-)
- sed -n '
- p
- /[$]LINENO/=
- ' <$as_myself |
- sed '
- s/[$]LINENO.*/&-/
- t lineno
- b
- :lineno
- N
- :loop
- s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
- t loop
- s/-\n.*//
- ' >$as_me.lineno &&
- chmod +x "$as_me.lineno" ||
- { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
-
- # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
- # already done that, so ensure we don't try to do so again and fall
- # in an infinite loop. This has already happened in practice.
- _as_can_reexec=no; export _as_can_reexec
- # Don't try to exec as it changes $[0], causing all sort of problems
- # (the dirname of $[0] is not the place where we might find the
- # original and so on. Autoconf is especially sensitive to this).
- . "./$as_me.lineno"
- # Exit status is that of the last command.
- exit
-}
-
-ECHO_C= ECHO_N= ECHO_T=
-case `echo -n x` in #(((((
--n*)
- case `echo 'xy\c'` in
- *c*) ECHO_T=' ';; # ECHO_T is single tab character.
- xy) ECHO_C='\c';;
- *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
- ECHO_T=' ';;
- esac;;
-*)
- ECHO_N='-n';;
-esac
-
-rm -f conf$$ conf$$.exe conf$$.file
-if test -d conf$$.dir; then
- rm -f conf$$.dir/conf$$.file
-else
- rm -f conf$$.dir
- mkdir conf$$.dir 2>/dev/null
-fi
-if (echo >conf$$.file) 2>/dev/null; then
- if ln -s conf$$.file conf$$ 2>/dev/null; then
- as_ln_s='ln -s'
- # ... but there are two gotchas:
- # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
- # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
- # In both cases, we have to default to `cp -pR'.
- ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
- as_ln_s='cp -pR'
- elif ln conf$$.file conf$$ 2>/dev/null; then
- as_ln_s=ln
- else
- as_ln_s='cp -pR'
- fi
-else
- as_ln_s='cp -pR'
-fi
-rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
-rmdir conf$$.dir 2>/dev/null
-
-if mkdir -p . 2>/dev/null; then
- as_mkdir_p='mkdir -p "$as_dir"'
-else
- test -d ./-p && rmdir ./-p
- as_mkdir_p=false
-fi
-
-as_test_x='test -x'
-as_executable_p=as_fn_executable_p
-
-# Sed expression to map a string onto a valid CPP name.
-as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
-
-# Sed expression to map a string onto a valid variable name.
-as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
-
-SHELL=${CONFIG_SHELL-/bin/sh}
-
-
-test -n "$DJDIR" || exec 7<&0 </dev/null
-exec 6>&1
-
-# Name of the host.
-# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
-# so uname gets run too.
-ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
-
-#
-# Initializations.
-#
-ac_default_prefix=/usr/local
-ac_clean_files=
-ac_config_libobj_dir=.
-LIBOBJS=
-cross_compiling=no
-subdirs=
-MFLAGS=
-MAKEFLAGS=
-
-# Identity of this package.
-PACKAGE_NAME='FFLAS-FFPACK'
-PACKAGE_TARNAME='fflas-ffpack'
-PACKAGE_VERSION='1.6.0'
-PACKAGE_STRING='FFLAS-FFPACK 1.6.0'
-PACKAGE_BUGREPORT='ffpack-devel at googlegroups.com'
-PACKAGE_URL='http://www.linalg.org/projects/fflas-ffpack'
-
-# Factoring default headers for most tests.
-ac_includes_default="\
-#include <stdio.h>
-#ifdef HAVE_SYS_TYPES_H
-# include <sys/types.h>
-#endif
-#ifdef HAVE_SYS_STAT_H
-# include <sys/stat.h>
-#endif
-#ifdef STDC_HEADERS
-# include <stdlib.h>
-# include <stddef.h>
-#else
-# ifdef HAVE_STDLIB_H
-# include <stdlib.h>
-# endif
-#endif
-#ifdef HAVE_STRING_H
-# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
-# include <memory.h>
-# endif
-# include <string.h>
-#endif
-#ifdef HAVE_STRINGS_H
-# include <strings.h>
-#endif
-#ifdef HAVE_INTTYPES_H
-# include <inttypes.h>
-#endif
-#ifdef HAVE_STDINT_H
-# include <stdint.h>
-#endif
-#ifdef HAVE_UNISTD_H
-# include <unistd.h>
-#endif"
-
-ac_subst_vars='am__EXEEXT_FALSE
-am__EXEEXT_TRUE
-LTLIBOBJS
-LIBOBJS
-FFLASFFPACK_BUILD_DOC_FALSE
-FFLASFFPACK_BUILD_DOC_TRUE
-FFLASFFPACK_DOC_PATH
-LAPACK_LIBS
-FFLASFFPACK_HAVE_BLAS_FALSE
-FFLASFFPACK_HAVE_BLAS_TRUE
-BLAS_FOUND
-BLAS_PATH
-CBLAS_FLAG
-BLAS_LIBS
-BLAS_VENDOR
-LINBOX_HAVE_GIVARO_FALSE
-LINBOX_HAVE_GIVARO_TRUE
-GIVARO_LIBS
-GIVARO_CFLAGS
-GMP_VERSION
-GMP_LIBS
-GMP_CFLAGS
-OTOOL64
-OTOOL
-LIPO
-NMEDIT
-DSYMUTIL
-MANIFEST_TOOL
-RANLIB
-ac_ct_AR
-AR
-DLLTOOL
-OBJDUMP
-LN_S
-NM
-ac_ct_DUMPBIN
-DUMPBIN
-LD
-FGREP
-SED
-ac_ct_CC
-CFLAGS
-CC
-host_os
-host_vendor
-host_cpu
-host
-build_os
-build_vendor
-build_cpu
-build
-LIBTOOL
-EGREP
-GREP
-CXXCPP
-TESTS_CFLAGS
-DEBUG_CFLAGS
-DEFAULT_CFLAGS
-CCNAM
-OBJEXT
-EXEEXT
-ac_ct_CXX
-CPPFLAGS
-LDFLAGS
-CXXFLAGS
-CXX
-WARN
-PROF
-PROFILE_FALSE
-PROFILE_TRUE
-DBG
-DEBUG_FALSE
-DEBUG_TRUE
-INSIDE_GNOME_COMMON_FALSE
-INSIDE_GNOME_COMMON_TRUE
-am__nodep
-AMDEPBACKSLASH
-AMDEP_FALSE
-AMDEP_TRUE
-MAINT
-MAINTAINER_MODE_FALSE
-MAINTAINER_MODE_TRUE
-RM
-am__untar
-am__tar
-AMTAR
-am__leading_dot
-SET_MAKE
-AWK
-mkdir_p
-MKDIR_P
-INSTALL_STRIP_PROGRAM
-STRIP
-install_sh
-MAKEINFO
-AUTOHEADER
-AUTOMAKE
-AUTOCONF
-ACLOCAL
-VERSION
-PACKAGE
-CYGPATH_W
-am__isrc
-INSTALL_DATA
-INSTALL_SCRIPT
-INSTALL_PROGRAM
-target_alias
-host_alias
-build_alias
-LIBS
-ECHO_T
-ECHO_N
-ECHO_C
-DEFS
-mandir
-localedir
-libdir
-psdir
-pdfdir
-dvidir
-htmldir
-infodir
-docdir
-oldincludedir
-includedir
-localstatedir
-sharedstatedir
-sysconfdir
-datadir
-datarootdir
-libexecdir
-sbindir
-bindir
-program_transform_name
-prefix
-exec_prefix
-PACKAGE_URL
-PACKAGE_BUGREPORT
-PACKAGE_STRING
-PACKAGE_VERSION
-PACKAGE_TARNAME
-PACKAGE_NAME
-PATH_SEPARATOR
-SHELL'
-ac_subst_files=''
-ac_user_opts='
-enable_option_checking
-enable_maintainer_mode
-enable_dependency_tracking
-enable_debug
-enable_profile
-enable_warnings
-enable_shared
-enable_static
-with_pic
-enable_fast_install
-with_gnu_ld
-with_sysroot
-enable_libtool_lock
-with_default
-with_all
-with_gmp
-with_givaro
-with_blas
-with_gotoblas2
-with_gsl
-with_cblas
-with_otherblas
-with_lapack
-with_docdir
-with_doxygen
-enable_doc
-enable_optimization
-'
- ac_precious_vars='build_alias
-host_alias
-target_alias
-CXX
-CXXFLAGS
-LDFLAGS
-LIBS
-CPPFLAGS
-CCC
-CXXCPP
-CC
-CFLAGS'
-
-
-# Initialize some variables set by options.
-ac_init_help=
-ac_init_version=false
-ac_unrecognized_opts=
-ac_unrecognized_sep=
-# The variables have the same names as the options, with
-# dashes changed to underlines.
-cache_file=/dev/null
-exec_prefix=NONE
-no_create=
-no_recursion=
-prefix=NONE
-program_prefix=NONE
-program_suffix=NONE
-program_transform_name=s,x,x,
-silent=
-site=
-srcdir=
-verbose=
-x_includes=NONE
-x_libraries=NONE
-
-# Installation directory options.
-# These are left unexpanded so users can "make install exec_prefix=/foo"
-# and all the variables that are supposed to be based on exec_prefix
-# by default will actually change.
-# Use braces instead of parens because sh, perl, etc. also accept them.
-# (The list follows the same order as the GNU Coding Standards.)
-bindir='${exec_prefix}/bin'
-sbindir='${exec_prefix}/sbin'
-libexecdir='${exec_prefix}/libexec'
-datarootdir='${prefix}/share'
-datadir='${datarootdir}'
-sysconfdir='${prefix}/etc'
-sharedstatedir='${prefix}/com'
-localstatedir='${prefix}/var'
-includedir='${prefix}/include'
-oldincludedir='/usr/include'
-docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
-infodir='${datarootdir}/info'
-htmldir='${docdir}'
-dvidir='${docdir}'
-pdfdir='${docdir}'
-psdir='${docdir}'
-libdir='${exec_prefix}/lib'
-localedir='${datarootdir}/locale'
-mandir='${datarootdir}/man'
-
-ac_prev=
-ac_dashdash=
-for ac_option
-do
- # If the previous option needs an argument, assign it.
- if test -n "$ac_prev"; then
- eval $ac_prev=\$ac_option
- ac_prev=
- continue
- fi
-
- case $ac_option in
- *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
- *=) ac_optarg= ;;
- *) ac_optarg=yes ;;
- esac
-
- # Accept the important Cygnus configure options, so we can diagnose typos.
-
- case $ac_dashdash$ac_option in
- --)
- ac_dashdash=yes ;;
-
- -bindir | --bindir | --bindi | --bind | --bin | --bi)
- ac_prev=bindir ;;
- -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
- bindir=$ac_optarg ;;
-
- -build | --build | --buil | --bui | --bu)
- ac_prev=build_alias ;;
- -build=* | --build=* | --buil=* | --bui=* | --bu=*)
- build_alias=$ac_optarg ;;
-
- -cache-file | --cache-file | --cache-fil | --cache-fi \
- | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
- ac_prev=cache_file ;;
- -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
- | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
- cache_file=$ac_optarg ;;
-
- --config-cache | -C)
- cache_file=config.cache ;;
-
- -datadir | --datadir | --datadi | --datad)
- ac_prev=datadir ;;
- -datadir=* | --datadir=* | --datadi=* | --datad=*)
- datadir=$ac_optarg ;;
-
- -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
- | --dataroo | --dataro | --datar)
- ac_prev=datarootdir ;;
- -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
- | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
- datarootdir=$ac_optarg ;;
-
- -disable-* | --disable-*)
- ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid feature name: $ac_useropt"
- ac_useropt_orig=$ac_useropt
- ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
- case $ac_user_opts in
- *"
-"enable_$ac_useropt"
-"*) ;;
- *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
- ac_unrecognized_sep=', ';;
- esac
- eval enable_$ac_useropt=no ;;
-
- -docdir | --docdir | --docdi | --doc | --do)
- ac_prev=docdir ;;
- -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
- docdir=$ac_optarg ;;
-
- -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
- ac_prev=dvidir ;;
- -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
- dvidir=$ac_optarg ;;
-
- -enable-* | --enable-*)
- ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid feature name: $ac_useropt"
- ac_useropt_orig=$ac_useropt
- ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
- case $ac_user_opts in
- *"
-"enable_$ac_useropt"
-"*) ;;
- *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
- ac_unrecognized_sep=', ';;
- esac
- eval enable_$ac_useropt=\$ac_optarg ;;
-
- -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
- | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
- | --exec | --exe | --ex)
- ac_prev=exec_prefix ;;
- -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
- | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
- | --exec=* | --exe=* | --ex=*)
- exec_prefix=$ac_optarg ;;
-
- -gas | --gas | --ga | --g)
- # Obsolete; use --with-gas.
- with_gas=yes ;;
-
- -help | --help | --hel | --he | -h)
- ac_init_help=long ;;
- -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
- ac_init_help=recursive ;;
- -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
- ac_init_help=short ;;
-
- -host | --host | --hos | --ho)
- ac_prev=host_alias ;;
- -host=* | --host=* | --hos=* | --ho=*)
- host_alias=$ac_optarg ;;
-
- -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
- ac_prev=htmldir ;;
- -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
- | --ht=*)
- htmldir=$ac_optarg ;;
-
- -includedir | --includedir | --includedi | --included | --include \
- | --includ | --inclu | --incl | --inc)
- ac_prev=includedir ;;
- -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
- | --includ=* | --inclu=* | --incl=* | --inc=*)
- includedir=$ac_optarg ;;
-
- -infodir | --infodir | --infodi | --infod | --info | --inf)
- ac_prev=infodir ;;
- -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
- infodir=$ac_optarg ;;
-
- -libdir | --libdir | --libdi | --libd)
- ac_prev=libdir ;;
- -libdir=* | --libdir=* | --libdi=* | --libd=*)
- libdir=$ac_optarg ;;
-
- -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
- | --libexe | --libex | --libe)
- ac_prev=libexecdir ;;
- -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
- | --libexe=* | --libex=* | --libe=*)
- libexecdir=$ac_optarg ;;
-
- -localedir | --localedir | --localedi | --localed | --locale)
- ac_prev=localedir ;;
- -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
- localedir=$ac_optarg ;;
-
- -localstatedir | --localstatedir | --localstatedi | --localstated \
- | --localstate | --localstat | --localsta | --localst | --locals)
- ac_prev=localstatedir ;;
- -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
- | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
- localstatedir=$ac_optarg ;;
-
- -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
- ac_prev=mandir ;;
- -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
- mandir=$ac_optarg ;;
-
- -nfp | --nfp | --nf)
- # Obsolete; use --without-fp.
- with_fp=no ;;
-
- -no-create | --no-create | --no-creat | --no-crea | --no-cre \
- | --no-cr | --no-c | -n)
- no_create=yes ;;
-
- -no-recursion | --no-recursion | --no-recursio | --no-recursi \
- | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
- no_recursion=yes ;;
-
- -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
- | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
- | --oldin | --oldi | --old | --ol | --o)
- ac_prev=oldincludedir ;;
- -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
- | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
- | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
- oldincludedir=$ac_optarg ;;
-
- -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
- ac_prev=prefix ;;
- -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
- prefix=$ac_optarg ;;
-
- -program-prefix | --program-prefix | --program-prefi | --program-pref \
- | --program-pre | --program-pr | --program-p)
- ac_prev=program_prefix ;;
- -program-prefix=* | --program-prefix=* | --program-prefi=* \
- | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
- program_prefix=$ac_optarg ;;
-
- -program-suffix | --program-suffix | --program-suffi | --program-suff \
- | --program-suf | --program-su | --program-s)
- ac_prev=program_suffix ;;
- -program-suffix=* | --program-suffix=* | --program-suffi=* \
- | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
- program_suffix=$ac_optarg ;;
-
- -program-transform-name | --program-transform-name \
- | --program-transform-nam | --program-transform-na \
- | --program-transform-n | --program-transform- \
- | --program-transform | --program-transfor \
- | --program-transfo | --program-transf \
- | --program-trans | --program-tran \
- | --progr-tra | --program-tr | --program-t)
- ac_prev=program_transform_name ;;
- -program-transform-name=* | --program-transform-name=* \
- | --program-transform-nam=* | --program-transform-na=* \
- | --program-transform-n=* | --program-transform-=* \
- | --program-transform=* | --program-transfor=* \
- | --program-transfo=* | --program-transf=* \
- | --program-trans=* | --program-tran=* \
- | --progr-tra=* | --program-tr=* | --program-t=*)
- program_transform_name=$ac_optarg ;;
-
- -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
- ac_prev=pdfdir ;;
- -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
- pdfdir=$ac_optarg ;;
-
- -psdir | --psdir | --psdi | --psd | --ps)
- ac_prev=psdir ;;
- -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
- psdir=$ac_optarg ;;
-
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil)
- silent=yes ;;
-
- -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
- ac_prev=sbindir ;;
- -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
- | --sbi=* | --sb=*)
- sbindir=$ac_optarg ;;
-
- -sharedstatedir | --sharedstatedir | --sharedstatedi \
- | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
- | --sharedst | --shareds | --shared | --share | --shar \
- | --sha | --sh)
- ac_prev=sharedstatedir ;;
- -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
- | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
- | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
- | --sha=* | --sh=*)
- sharedstatedir=$ac_optarg ;;
-
- -site | --site | --sit)
- ac_prev=site ;;
- -site=* | --site=* | --sit=*)
- site=$ac_optarg ;;
-
- -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
- ac_prev=srcdir ;;
- -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
- srcdir=$ac_optarg ;;
-
- -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
- | --syscon | --sysco | --sysc | --sys | --sy)
- ac_prev=sysconfdir ;;
- -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
- | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
- sysconfdir=$ac_optarg ;;
-
- -target | --target | --targe | --targ | --tar | --ta | --t)
- ac_prev=target_alias ;;
- -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
- target_alias=$ac_optarg ;;
-
- -v | -verbose | --verbose | --verbos | --verbo | --verb)
- verbose=yes ;;
-
- -version | --version | --versio | --versi | --vers | -V)
- ac_init_version=: ;;
-
- -with-* | --with-*)
- ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid package name: $ac_useropt"
- ac_useropt_orig=$ac_useropt
- ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
- case $ac_user_opts in
- *"
-"with_$ac_useropt"
-"*) ;;
- *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
- ac_unrecognized_sep=', ';;
- esac
- eval with_$ac_useropt=\$ac_optarg ;;
-
- -without-* | --without-*)
- ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error $? "invalid package name: $ac_useropt"
- ac_useropt_orig=$ac_useropt
- ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
- case $ac_user_opts in
- *"
-"with_$ac_useropt"
-"*) ;;
- *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
- ac_unrecognized_sep=', ';;
- esac
- eval with_$ac_useropt=no ;;
-
- --x)
- # Obsolete; use --with-x.
- with_x=yes ;;
-
- -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
- | --x-incl | --x-inc | --x-in | --x-i)
- ac_prev=x_includes ;;
- -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
- | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
- x_includes=$ac_optarg ;;
-
- -x-libraries | --x-libraries | --x-librarie | --x-librari \
- | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
- ac_prev=x_libraries ;;
- -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
- | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
- x_libraries=$ac_optarg ;;
-
- -*) as_fn_error $? "unrecognized option: \`$ac_option'
-Try \`$0 --help' for more information"
- ;;
-
- *=*)
- ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
- # Reject names that are not valid shell variable names.
- case $ac_envvar in #(
- '' | [0-9]* | *[!_$as_cr_alnum]* )
- as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
- esac
- eval $ac_envvar=\$ac_optarg
- export $ac_envvar ;;
-
- *)
- # FIXME: should be removed in autoconf 3.0.
- $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
- expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
- $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
- : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
- ;;
-
- esac
-done
-
-if test -n "$ac_prev"; then
- ac_option=--`echo $ac_prev | sed 's/_/-/g'`
- as_fn_error $? "missing argument to $ac_option"
-fi
-
-if test -n "$ac_unrecognized_opts"; then
- case $enable_option_checking in
- no) ;;
- fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
- *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
- esac
-fi
-
-# Check all directory arguments for consistency.
-for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
- datadir sysconfdir sharedstatedir localstatedir includedir \
- oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
- libdir localedir mandir
-do
- eval ac_val=\$$ac_var
- # Remove trailing slashes.
- case $ac_val in
- */ )
- ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
- eval $ac_var=\$ac_val;;
- esac
- # Be sure to have absolute directory names.
- case $ac_val in
- [\\/$]* | ?:[\\/]* ) continue;;
- NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
- esac
- as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
-done
-
-# There might be people who depend on the old broken behavior: `$host'
-# used to hold the argument of --host etc.
-# FIXME: To remove some day.
-build=$build_alias
-host=$host_alias
-target=$target_alias
-
-# FIXME: To remove some day.
-if test "x$host_alias" != x; then
- if test "x$build_alias" = x; then
- cross_compiling=maybe
- elif test "x$build_alias" != "x$host_alias"; then
- cross_compiling=yes
- fi
-fi
-
-ac_tool_prefix=
-test -n "$host_alias" && ac_tool_prefix=$host_alias-
-
-test "$silent" = yes && exec 6>/dev/null
-
-
-ac_pwd=`pwd` && test -n "$ac_pwd" &&
-ac_ls_di=`ls -di .` &&
-ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
- as_fn_error $? "working directory cannot be determined"
-test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
- as_fn_error $? "pwd does not report name of working directory"
-
-
-# Find the source files, if location was not specified.
-if test -z "$srcdir"; then
- ac_srcdir_defaulted=yes
- # Try the directory containing this script, then the parent directory.
- ac_confdir=`$as_dirname -- "$as_myself" ||
-$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_myself" : 'X\(//\)[^/]' \| \
- X"$as_myself" : 'X\(//\)$' \| \
- X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$as_myself" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- srcdir=$ac_confdir
- if test ! -r "$srcdir/$ac_unique_file"; then
- srcdir=..
- fi
-else
- ac_srcdir_defaulted=no
-fi
-if test ! -r "$srcdir/$ac_unique_file"; then
- test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
- as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
-fi
-ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
-ac_abs_confdir=`(
- cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
- pwd)`
-# When building in place, set srcdir=.
-if test "$ac_abs_confdir" = "$ac_pwd"; then
- srcdir=.
-fi
-# Remove unnecessary trailing slashes from srcdir.
-# Double slashes in file names in object file debugging info
-# mess up M-x gdb in Emacs.
-case $srcdir in
-*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
-esac
-for ac_var in $ac_precious_vars; do
- eval ac_env_${ac_var}_set=\${${ac_var}+set}
- eval ac_env_${ac_var}_value=\$${ac_var}
- eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
- eval ac_cv_env_${ac_var}_value=\$${ac_var}
-done
-
-#
-# Report the --help message.
-#
-if test "$ac_init_help" = "long"; then
- # Omit some internal or obsolete options to make the list less imposing.
- # This message is too long to be a string in the A/UX 3.1 sh.
- cat <<_ACEOF
-\`configure' configures FFLAS-FFPACK 1.6.0 to adapt to many kinds of systems.
-
-Usage: $0 [OPTION]... [VAR=VALUE]...
-
-To assign environment variables (e.g., CC, CFLAGS...), specify them as
-VAR=VALUE. See below for descriptions of some of the useful variables.
-
-Defaults for the options are specified in brackets.
-
-Configuration:
- -h, --help display this help and exit
- --help=short display options specific to this package
- --help=recursive display the short help of all the included packages
- -V, --version display version information and exit
- -q, --quiet, --silent do not print \`checking ...' messages
- --cache-file=FILE cache test results in FILE [disabled]
- -C, --config-cache alias for \`--cache-file=config.cache'
- -n, --no-create do not create output files
- --srcdir=DIR find the sources in DIR [configure dir or \`..']
-
-Installation directories:
- --prefix=PREFIX install architecture-independent files in PREFIX
- [$ac_default_prefix]
- --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
- [PREFIX]
-
-By default, \`make install' will install all the files in
-\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
-an installation prefix other than \`$ac_default_prefix' using \`--prefix',
-for instance \`--prefix=\$HOME'.
-
-For better control, use the options below.
-
-Fine tuning of the installation directories:
- --bindir=DIR user executables [EPREFIX/bin]
- --sbindir=DIR system admin executables [EPREFIX/sbin]
- --libexecdir=DIR program executables [EPREFIX/libexec]
- --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
- --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
- --localstatedir=DIR modifiable single-machine data [PREFIX/var]
- --libdir=DIR object code libraries [EPREFIX/lib]
- --includedir=DIR C header files [PREFIX/include]
- --oldincludedir=DIR C header files for non-gcc [/usr/include]
- --datarootdir=DIR read-only arch.-independent data root [PREFIX/share]
- --datadir=DIR read-only architecture-independent data [DATAROOTDIR]
- --infodir=DIR info documentation [DATAROOTDIR/info]
- --localedir=DIR locale-dependent data [DATAROOTDIR/locale]
- --mandir=DIR man documentation [DATAROOTDIR/man]
- --docdir=DIR documentation root [DATAROOTDIR/doc/fflas-ffpack]
- --htmldir=DIR html documentation [DOCDIR]
- --dvidir=DIR dvi documentation [DOCDIR]
- --pdfdir=DIR pdf documentation [DOCDIR]
- --psdir=DIR ps documentation [DOCDIR]
-_ACEOF
-
- cat <<\_ACEOF
-
-Program names:
- --program-prefix=PREFIX prepend PREFIX to installed program names
- --program-suffix=SUFFIX append SUFFIX to installed program names
- --program-transform-name=PROGRAM run sed PROGRAM on installed program names
-
-System types:
- --build=BUILD configure for building on BUILD [guessed]
- --host=HOST cross-compile to build programs to run on HOST [BUILD]
-_ACEOF
-fi
-
-if test -n "$ac_init_help"; then
- case $ac_init_help in
- short | recursive ) echo "Configuration of FFLAS-FFPACK 1.6.0:";;
- esac
- cat <<\_ACEOF
-
-Optional Features:
- --disable-option-checking ignore unrecognized --enable/--with options
- --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
- --enable-FEATURE[=ARG] include FEATURE [ARG=yes]
- --enable-maintainer-mode enable make rules and dependencies not useful
- (and sometimes confusing) to the casual installer
- --disable-dependency-tracking speeds up one-time build
- --enable-dependency-tracking do not reject slow dependency extractors
- --enable-debug enable debugging options in library
- --enable-profile enable profiling options in library
- --enable-warnings=yes|full|no
- enable warnings when compiling the library. If
- nothing or yes is given, more aggressive compiler
- warnings are passed to the compiler. If full is
- given, we become paranoïd about warnings and treat
- them as errors.
- --enable-shared[=PKGS] build shared libraries [default=yes]
- --enable-static[=PKGS] build static libraries [default=yes]
- --enable-fast-install[=PKGS]
- optimize for fast installation [default=yes]
- --disable-libtool-lock avoid locking (might break parallel builds)
- --enable-doc Enable building documentation
- --disable-optimization Disable run time optimization in FflasFpack code
-
-Optional Packages:
- --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
- --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
- --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use
- both]
- --with-gnu-ld assume the C compiler uses GNU ld [default=no]
- --with-sysroot=DIR Search for dependent libraries within DIR
- (or the compiler's sysroot if not specified).
- --with-default=<path> Add <path> to the default path for external package
- checking. Set as default with /usr and /usr/local.
- --with-all= <path>|yes|no
- Use all external packages. If the argument is no,
- you not sure that all libraries are reachable with
- the default path. If the argument is yes or <empty>,
- that means that all libraries are reachable with the
- default path. Otherwise add <path> to default path
- and enable all external packages.
- --with-gmp= <path>|yes Use GMP library. This library is mandatory for
- LinBox compilation. If argument is yes or <empty>
- that means the library is reachable with the
- standard search path "/usr" or "/usr/local" (set as
- default). Otherwise you give the <path> to the
- directory which contain the library.
- --with-givaro=<path>|yes
- Use Givaro library. This library is mandatory for
- LinBox compilation. If argument is yes or <empty>
- that means the library is reachable with the
- standard search path (/usr or /usr/local). Otherwise
- you give the <path> to the directory which contains
- the library.
- --with-blas=<lflags> Use BLAS library. This library is mandatory for
- FFLAS-FFPACK compilation. The user has the
- responsability to provide library flags such that
- the compiler will find and use BLAS (and LAPACK). An
- example could be --with-blas=/path/to/blas or
- --with-blas="-L/path/to/blas/lib -lsomeblas".\n *
- Warning : we don't really handle .a archives
- alone...
-
- --with-gotoblas2=<path|yes>
- Use GOTO2 blas library. BLAS are mandatory for
- FFLAS-FFPACK compilation. If argument is <yes> that
- means the library is reachable with the standard
- search path (/usr or /usr/local). Otherwise you give
- the <path> to the directory which contains the
- library. If empty, GOTO2 are not searched for.
-
- --with-gsl=<path|yes> Use GSL blas library. BLAS are mandatory for
- FFLAS-FFPACK compilation. If argument is <yes> that
- means the library is reachable with the standard
- search path (/usr or /usr/local). Otherwise you give
- the <path> to the directory which contains the
- library. If empty, GSL is not searched for.
-
- --with-cblas=<lib> Use BLAS library. This library is mandatory for
- FFLAS-FFPACK compilation. If argument is <empty>
- that means the library is reachable with the
- standard search path (/usr or /usr/local). Otherwise
- you give the <path> to the directory which contains
- the library.
-
- --with-otherblas=<lib> Use BLAS library. This library is mandatory for
- FFLAS-FFPACK compilation. If argument is <empty>
- that means the library is reachable with the
- standard search path (/usr or /usr/local). Otherwise
- you give the <path> to the directory which contains
- the library.
-
- --with-lapack=<blas|path>
- Use LAPACK functions. This library is mandatory for
- LinBox compilation. If argument is <empty> that
- means the library is reachable with the standard
- search path (/usr or /usr/local). Or, you can give
- the <path> to the directory which contains the
- library. If the argument is 'blas', then we look in
- the BLAS vendor library. We look for a C interface
- (clapack_), and if not present, look for standard
- functions (as dgetrf_). First one available in order
- in '$path /usr /usr/local', first chosen, even if it
- is not clapack_ (example: clapack_ in /usr but
- dgetrf_ in $path : dgetrf_ chosen, $path not even
- looked into).
-
- --with-docdir=<path> Where the FFLAS-FFPACK documentation should be
- installed
- --with-doxygen=<path> Give the path to Doxygen. Note: --enable-doc needed
-
-Some influential environment variables:
- CXX C++ compiler command
- CXXFLAGS C++ compiler flags
- LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
- nonstandard directory <lib dir>
- LIBS libraries to pass to the linker, e.g. -l<library>
- CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
- you have headers in a nonstandard directory <include dir>
- CXXCPP C++ preprocessor
- CC C compiler command
- CFLAGS C compiler flags
-
-Use these variables to override the choices made by `configure' or to help
-it to find libraries and programs with nonstandard names/locations.
-
-Report bugs to <ffpack-devel at googlegroups.com>.
-FFLAS-FFPACK home page: <http://www.linalg.org/projects/fflas-ffpack>.
-_ACEOF
-ac_status=$?
-fi
-
-if test "$ac_init_help" = "recursive"; then
- # If there are subdirs, report their specific --help.
- for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
- test -d "$ac_dir" ||
- { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
- continue
- ac_builddir=.
-
-case "$ac_dir" in
-.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
-*)
- ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
- # A ".." for each directory in $ac_dir_suffix.
- ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
- case $ac_top_builddir_sub in
- "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
- *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
- esac ;;
-esac
-ac_abs_top_builddir=$ac_pwd
-ac_abs_builddir=$ac_pwd$ac_dir_suffix
-# for backward compatibility:
-ac_top_builddir=$ac_top_build_prefix
-
-case $srcdir in
- .) # We are building in place.
- ac_srcdir=.
- ac_top_srcdir=$ac_top_builddir_sub
- ac_abs_top_srcdir=$ac_pwd ;;
- [\\/]* | ?:[\\/]* ) # Absolute name.
- ac_srcdir=$srcdir$ac_dir_suffix;
- ac_top_srcdir=$srcdir
- ac_abs_top_srcdir=$srcdir ;;
- *) # Relative name.
- ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
- ac_top_srcdir=$ac_top_build_prefix$srcdir
- ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
-esac
-ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
-
- cd "$ac_dir" || { ac_status=$?; continue; }
- # Check for guested configure.
- if test -f "$ac_srcdir/configure.gnu"; then
- echo &&
- $SHELL "$ac_srcdir/configure.gnu" --help=recursive
- elif test -f "$ac_srcdir/configure"; then
- echo &&
- $SHELL "$ac_srcdir/configure" --help=recursive
- else
- $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
- fi || ac_status=$?
- cd "$ac_pwd" || { ac_status=$?; break; }
- done
-fi
-
-test -n "$ac_init_help" && exit $ac_status
-if $ac_init_version; then
- cat <<\_ACEOF
-FFLAS-FFPACK configure 1.6.0
-generated by GNU Autoconf 2.69
-
-Copyright (C) 2012 Free Software Foundation, Inc.
-This configure script is free software; the Free Software Foundation
-gives unlimited permission to copy, distribute and modify it.
-_ACEOF
- exit
-fi
-
-## ------------------------ ##
-## Autoconf initialization. ##
-## ------------------------ ##
-
-# ac_fn_cxx_try_compile LINENO
-# ----------------------------
-# Try to compile conftest.$ac_ext, and return whether this succeeded.
-ac_fn_cxx_try_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext
- if { { ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compile") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_cxx_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest.$ac_objext; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
- as_fn_set_status $ac_retval
-
-} # ac_fn_cxx_try_compile
-
-# ac_fn_cxx_try_run LINENO
-# ------------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
-# that executables *can* be run.
-ac_fn_cxx_try_run ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
- { { case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- ac_retval=0
-else
- $as_echo "$as_me: program exited with status $ac_status" >&5
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=$ac_status
-fi
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
- as_fn_set_status $ac_retval
-
-} # ac_fn_cxx_try_run
-
-# ac_fn_cxx_try_cpp LINENO
-# ------------------------
-# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
-ac_fn_cxx_try_cpp ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if { { ac_try="$ac_cpp conftest.$ac_ext"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } > conftest.i && {
- test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" ||
- test ! -s conftest.err
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
- as_fn_set_status $ac_retval
-
-} # ac_fn_cxx_try_cpp
-
-# ac_fn_c_try_compile LINENO
-# --------------------------
-# Try to compile conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext
- if { { ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compile") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_c_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest.$ac_objext; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_compile
-
-# ac_fn_c_try_link LINENO
-# -----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_link ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext conftest$ac_exeext
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_c_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest$ac_exeext && {
- test "$cross_compiling" = yes ||
- test -x conftest$ac_exeext
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
- # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
- # interfere with the next link command; also delete a directory that is
- # left behind by Apple's compiler. We do this before executing the actions.
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_link
-
-# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists and can be compiled using the include files in
-# INCLUDES, setting the cache variable VAR accordingly.
-ac_fn_c_check_header_compile ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- eval "$3=yes"
-else
- eval "$3=no"
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-
-} # ac_fn_c_check_header_compile
-
-# ac_fn_c_check_func LINENO FUNC VAR
-# ----------------------------------
-# Tests whether FUNC exists, setting the cache variable VAR accordingly
-ac_fn_c_check_func ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
- For example, HP-UX 11i <limits.h> declares gettimeofday. */
-#define $2 innocuous_$2
-
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $2 (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $2
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char $2 ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined __stub_$2 || defined __stub___$2
-choke me
-#endif
-
-int
-main ()
-{
-return $2 ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- eval "$3=yes"
-else
- eval "$3=no"
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-
-} # ac_fn_c_check_func
-
-# ac_fn_cxx_try_link LINENO
-# -------------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded.
-ac_fn_cxx_try_link ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext conftest$ac_exeext
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_cxx_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest$ac_exeext && {
- test "$cross_compiling" = yes ||
- test -x conftest$ac_exeext
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
- # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
- # interfere with the next link command; also delete a directory that is
- # left behind by Apple's compiler. We do this before executing the actions.
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
- as_fn_set_status $ac_retval
-
-} # ac_fn_cxx_try_link
-
-# ac_fn_cxx_compute_int LINENO EXPR VAR INCLUDES
-# ----------------------------------------------
-# Tries to find the compile-time value of EXPR in a program that includes
-# INCLUDES, setting VAR accordingly. Returns whether the value could be
-# computed
-ac_fn_cxx_compute_int ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if test "$cross_compiling" = yes; then
- # Depending upon the size, compute the lo and hi bounds.
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-int
-main ()
-{
-static int test_array [1 - 2 * !(($2) >= 0)];
-test_array [0] = 0;
-return test_array [0];
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_lo=0 ac_mid=0
- while :; do
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-int
-main ()
-{
-static int test_array [1 - 2 * !(($2) <= $ac_mid)];
-test_array [0] = 0;
-return test_array [0];
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_hi=$ac_mid; break
-else
- as_fn_arith $ac_mid + 1 && ac_lo=$as_val
- if test $ac_lo -le $ac_mid; then
- ac_lo= ac_hi=
- break
- fi
- as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- done
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-int
-main ()
-{
-static int test_array [1 - 2 * !(($2) < 0)];
-test_array [0] = 0;
-return test_array [0];
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_hi=-1 ac_mid=-1
- while :; do
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-int
-main ()
-{
-static int test_array [1 - 2 * !(($2) >= $ac_mid)];
-test_array [0] = 0;
-return test_array [0];
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_lo=$ac_mid; break
-else
- as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val
- if test $ac_mid -le $ac_hi; then
- ac_lo= ac_hi=
- break
- fi
- as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- done
-else
- ac_lo= ac_hi=
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-# Binary search between lo and hi bounds.
-while test "x$ac_lo" != "x$ac_hi"; do
- as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-int
-main ()
-{
-static int test_array [1 - 2 * !(($2) <= $ac_mid)];
-test_array [0] = 0;
-return test_array [0];
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_hi=$ac_mid
-else
- as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-done
-case $ac_lo in #((
-?*) eval "$3=\$ac_lo"; ac_retval=0 ;;
-'') ac_retval=1 ;;
-esac
- else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-static long int longval () { return $2; }
-static unsigned long int ulongval () { return $2; }
-#include <stdio.h>
-#include <stdlib.h>
-int
-main ()
-{
-
- FILE *f = fopen ("conftest.val", "w");
- if (! f)
- return 1;
- if (($2) < 0)
- {
- long int i = longval ();
- if (i != ($2))
- return 1;
- fprintf (f, "%ld", i);
- }
- else
- {
- unsigned long int i = ulongval ();
- if (i != ($2))
- return 1;
- fprintf (f, "%lu", i);
- }
- /* Do not output a trailing newline, as this causes \r\n confusion
- on some platforms. */
- return ferror (f) || fclose (f) != 0;
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- echo >>conftest.val; read $3 <conftest.val; ac_retval=0
-else
- ac_retval=1
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-rm -f conftest.val
-
- fi
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
- as_fn_set_status $ac_retval
-
-} # ac_fn_cxx_compute_int
-
-# ac_fn_cxx_check_header_mongrel LINENO HEADER VAR INCLUDES
-# ---------------------------------------------------------
-# Tests whether HEADER exists, giving a warning if it cannot be compiled using
-# the include files in INCLUDES and setting the cache variable VAR
-# accordingly.
-ac_fn_cxx_check_header_mongrel ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if eval \${$3+:} false; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
- $as_echo_n "(cached) " >&6
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-else
- # Is the header compilable?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
-$as_echo_n "checking $2 usability... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_header_compiler=yes
-else
- ac_header_compiler=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
-$as_echo "$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
-$as_echo_n "checking $2 presence... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <$2>
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
- ac_header_preproc=yes
-else
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
-$as_echo "$ac_header_preproc" >&6; }
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in #((
- yes:no: )
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
-$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
- ;;
- no:yes:* )
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
-$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
-$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
-$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
-$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-( $as_echo "## -------------------------------------------- ##
-## Report this to ffpack-devel at googlegroups.com ##
-## -------------------------------------------- ##"
- ) | sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- eval "$3=\$ac_header_compiler"
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-fi
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-
-} # ac_fn_cxx_check_header_mongrel
-cat >config.log <<_ACEOF
-This file contains any messages produced by compilers while
-running configure, to aid debugging if configure makes a mistake.
-
-It was created by FFLAS-FFPACK $as_me 1.6.0, which was
-generated by GNU Autoconf 2.69. Invocation command line was
-
- $ $0 $@
-
-_ACEOF
-exec 5>>config.log
-{
-cat <<_ASUNAME
-## --------- ##
-## Platform. ##
-## --------- ##
-
-hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
-
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
-/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
-
-/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
-/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
-/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown`
-/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
-/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
-/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
-
-_ASUNAME
-
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- $as_echo "PATH: $as_dir"
- done
-IFS=$as_save_IFS
-
-} >&5
-
-cat >&5 <<_ACEOF
-
-
-## ----------- ##
-## Core tests. ##
-## ----------- ##
-
-_ACEOF
-
-
-# Keep a trace of the command line.
-# Strip out --no-create and --no-recursion so they do not pile up.
-# Strip out --silent because we don't want to record it for future runs.
-# Also quote any args containing shell meta-characters.
-# Make two passes to allow for proper duplicate-argument suppression.
-ac_configure_args=
-ac_configure_args0=
-ac_configure_args1=
-ac_must_keep_next=false
-for ac_pass in 1 2
-do
- for ac_arg
- do
- case $ac_arg in
- -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil)
- continue ;;
- *\'*)
- ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
- esac
- case $ac_pass in
- 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
- 2)
- as_fn_append ac_configure_args1 " '$ac_arg'"
- if test $ac_must_keep_next = true; then
- ac_must_keep_next=false # Got value, back to normal.
- else
- case $ac_arg in
- *=* | --config-cache | -C | -disable-* | --disable-* \
- | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
- | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
- | -with-* | --with-* | -without-* | --without-* | --x)
- case "$ac_configure_args0 " in
- "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
- esac
- ;;
- -* ) ac_must_keep_next=true ;;
- esac
- fi
- as_fn_append ac_configure_args " '$ac_arg'"
- ;;
- esac
- done
-done
-{ ac_configure_args0=; unset ac_configure_args0;}
-{ ac_configure_args1=; unset ac_configure_args1;}
-
-# When interrupted or exit'd, cleanup temporary files, and complete
-# config.log. We remove comments because anyway the quotes in there
-# would cause problems or look ugly.
-# WARNING: Use '\'' to represent an apostrophe within the trap.
-# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
-trap 'exit_status=$?
- # Save into config.log some information that might help in debugging.
- {
- echo
-
- $as_echo "## ---------------- ##
-## Cache variables. ##
-## ---------------- ##"
- echo
- # The following way of writing the cache mishandles newlines in values,
-(
- for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
- eval ac_val=\$$ac_var
- case $ac_val in #(
- *${as_nl}*)
- case $ac_var in #(
- *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
-$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
- esac
- case $ac_var in #(
- _ | IFS | as_nl) ;; #(
- BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
- *) { eval $ac_var=; unset $ac_var;} ;;
- esac ;;
- esac
- done
- (set) 2>&1 |
- case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
- *${as_nl}ac_space=\ *)
- sed -n \
- "s/'\''/'\''\\\\'\'''\''/g;
- s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
- ;; #(
- *)
- sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
- ;;
- esac |
- sort
-)
- echo
-
- $as_echo "## ----------------- ##
-## Output variables. ##
-## ----------------- ##"
- echo
- for ac_var in $ac_subst_vars
- do
- eval ac_val=\$$ac_var
- case $ac_val in
- *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
- esac
- $as_echo "$ac_var='\''$ac_val'\''"
- done | sort
- echo
-
- if test -n "$ac_subst_files"; then
- $as_echo "## ------------------- ##
-## File substitutions. ##
-## ------------------- ##"
- echo
- for ac_var in $ac_subst_files
- do
- eval ac_val=\$$ac_var
- case $ac_val in
- *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
- esac
- $as_echo "$ac_var='\''$ac_val'\''"
- done | sort
- echo
- fi
-
- if test -s confdefs.h; then
- $as_echo "## ----------- ##
-## confdefs.h. ##
-## ----------- ##"
- echo
- cat confdefs.h
- echo
- fi
- test "$ac_signal" != 0 &&
- $as_echo "$as_me: caught signal $ac_signal"
- $as_echo "$as_me: exit $exit_status"
- } >&5
- rm -f core *.core core.conftest.* &&
- rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
- exit $exit_status
-' 0
-for ac_signal in 1 2 13 15; do
- trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
-done
-ac_signal=0
-
-# confdefs.h avoids OS command line length limits that DEFS can exceed.
-rm -f -r conftest* confdefs.h
-
-$as_echo "/* confdefs.h */" > confdefs.h
-
-# Predefined preprocessor variables.
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_NAME "$PACKAGE_NAME"
-_ACEOF
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
-_ACEOF
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_VERSION "$PACKAGE_VERSION"
-_ACEOF
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_STRING "$PACKAGE_STRING"
-_ACEOF
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
-_ACEOF
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_URL "$PACKAGE_URL"
-_ACEOF
-
-
-# Let the site file select an alternate cache file if it wants to.
-# Prefer an explicitly selected file to automatically selected ones.
-ac_site_file1=NONE
-ac_site_file2=NONE
-if test -n "$CONFIG_SITE"; then
- # We do not want a PATH search for config.site.
- case $CONFIG_SITE in #((
- -*) ac_site_file1=./$CONFIG_SITE;;
- */*) ac_site_file1=$CONFIG_SITE;;
- *) ac_site_file1=./$CONFIG_SITE;;
- esac
-elif test "x$prefix" != xNONE; then
- ac_site_file1=$prefix/share/config.site
- ac_site_file2=$prefix/etc/config.site
-else
- ac_site_file1=$ac_default_prefix/share/config.site
- ac_site_file2=$ac_default_prefix/etc/config.site
-fi
-for ac_site_file in "$ac_site_file1" "$ac_site_file2"
-do
- test "x$ac_site_file" = xNONE && continue
- if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
-$as_echo "$as_me: loading site script $ac_site_file" >&6;}
- sed 's/^/| /' "$ac_site_file" >&5
- . "$ac_site_file" \
- || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "failed to load site script $ac_site_file
-See \`config.log' for more details" "$LINENO" 5; }
- fi
-done
-
-if test -r "$cache_file"; then
- # Some versions of bash will fail to source /dev/null (special files
- # actually), so we avoid doing that. DJGPP emulates it as a regular file.
- if test /dev/null != "$cache_file" && test -f "$cache_file"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
-$as_echo "$as_me: loading cache $cache_file" >&6;}
- case $cache_file in
- [\\/]* | ?:[\\/]* ) . "$cache_file";;
- *) . "./$cache_file";;
- esac
- fi
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
-$as_echo "$as_me: creating cache $cache_file" >&6;}
- >$cache_file
-fi
-
-# Check that the precious variables saved in the cache have kept the same
-# value.
-ac_cache_corrupted=false
-for ac_var in $ac_precious_vars; do
- eval ac_old_set=\$ac_cv_env_${ac_var}_set
- eval ac_new_set=\$ac_env_${ac_var}_set
- eval ac_old_val=\$ac_cv_env_${ac_var}_value
- eval ac_new_val=\$ac_env_${ac_var}_value
- case $ac_old_set,$ac_new_set in
- set,)
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
-$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
- ac_cache_corrupted=: ;;
- ,set)
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5
-$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
- ac_cache_corrupted=: ;;
- ,);;
- *)
- if test "x$ac_old_val" != "x$ac_new_val"; then
- # differences in whitespace do not lead to failure.
- ac_old_val_w=`echo x $ac_old_val`
- ac_new_val_w=`echo x $ac_new_val`
- if test "$ac_old_val_w" != "$ac_new_val_w"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5
-$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
- ac_cache_corrupted=:
- else
- { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
-$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
- eval $ac_var=\$ac_old_val
- fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5
-$as_echo "$as_me: former value: \`$ac_old_val'" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5
-$as_echo "$as_me: current value: \`$ac_new_val'" >&2;}
- fi;;
- esac
- # Pass precious variables to config.status.
- if test "$ac_new_set" = set; then
- case $ac_new_val in
- *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
- *) ac_arg=$ac_var=$ac_new_val ;;
- esac
- case " $ac_configure_args " in
- *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
- *) as_fn_append ac_configure_args " '$ac_arg'" ;;
- esac
- fi
-done
-if $ac_cache_corrupted; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
-$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
- as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
-fi
-## -------------------- ##
-## Main body of script. ##
-## -------------------- ##
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-
-ac_aux_dir=
-for ac_dir in build-aux "$srcdir"/build-aux; do
- if test -f "$ac_dir/install-sh"; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/install-sh -c"
- break
- elif test -f "$ac_dir/install.sh"; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/install.sh -c"
- break
- elif test -f "$ac_dir/shtool"; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/shtool install -c"
- break
- fi
-done
-if test -z "$ac_aux_dir"; then
- as_fn_error $? "cannot find install-sh, install.sh, or shtool in build-aux \"$srcdir\"/build-aux" "$LINENO" 5
-fi
-
-# These three variables are undocumented and unsupported,
-# and are intended to be withdrawn in a future Autoconf release.
-# They can cause serious problems if a builder's source tree is in a directory
-# whose full name contains unusual characters.
-ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var.
-ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var.
-ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var.
-
-
-am__api_version='1.11'
-
-# Find a good install program. We prefer a C program (faster),
-# so one script is as good as another. But avoid the broken or
-# incompatible versions:
-# SysV /etc/install, /usr/sbin/install
-# SunOS /usr/etc/install
-# IRIX /sbin/install
-# AIX /bin/install
-# AmigaOS /C/install, which installs bootblocks on floppy discs
-# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
-# AFS /usr/afsws/bin/install, which mishandles nonexistent args
-# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
-# OS/2's system install, which has a completely different semantic
-# ./install, which can be erroneously created by make from ./install.sh.
-# Reject install programs that cannot install multiple files.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
-$as_echo_n "checking for a BSD-compatible install... " >&6; }
-if test -z "$INSTALL"; then
-if ${ac_cv_path_install+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- # Account for people who put trailing slashes in PATH elements.
-case $as_dir/ in #((
- ./ | .// | /[cC]/* | \
- /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
- ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
- /usr/ucb/* ) ;;
- *)
- # OSF1 and SCO ODT 3.0 have their own names for install.
- # Don't use installbsd from OSF since it installs stuff as root
- # by default.
- for ac_prog in ginstall scoinst install; do
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
- if test $ac_prog = install &&
- grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
- # AIX install. It has an incompatible calling convention.
- :
- elif test $ac_prog = install &&
- grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
- # program-specific install script used by HP pwplus--don't use.
- :
- else
- rm -rf conftest.one conftest.two conftest.dir
- echo one > conftest.one
- echo two > conftest.two
- mkdir conftest.dir
- if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" &&
- test -s conftest.one && test -s conftest.two &&
- test -s conftest.dir/conftest.one &&
- test -s conftest.dir/conftest.two
- then
- ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
- break 3
- fi
- fi
- fi
- done
- done
- ;;
-esac
-
- done
-IFS=$as_save_IFS
-
-rm -rf conftest.one conftest.two conftest.dir
-
-fi
- if test "${ac_cv_path_install+set}" = set; then
- INSTALL=$ac_cv_path_install
- else
- # As a last resort, use the slow shell script. Don't cache a
- # value for INSTALL within a source directory, because that will
- # break other packages using the cache if that directory is
- # removed, or if the value is a relative name.
- INSTALL=$ac_install_sh
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
-$as_echo "$INSTALL" >&6; }
-
-# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
-# It thinks the first close brace ends the variable substitution.
-test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
-
-test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
-
-test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5
-$as_echo_n "checking whether build environment is sane... " >&6; }
-# Just in case
-sleep 1
-echo timestamp > conftest.file
-# Reject unsafe characters in $srcdir or the absolute working directory
-# name. Accept space and tab only in the latter.
-am_lf='
-'
-case `pwd` in
- *[\\\"\#\$\&\'\`$am_lf]*)
- as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
-esac
-case $srcdir in
- *[\\\"\#\$\&\'\`$am_lf\ \ ]*)
- as_fn_error $? "unsafe srcdir value: \`$srcdir'" "$LINENO" 5;;
-esac
-
-# Do `set' in a subshell so we don't clobber the current shell's
-# arguments. Must try -L first in case configure is actually a
-# symlink; some systems play weird games with the mod time of symlinks
-# (eg FreeBSD returns the mod time of the symlink's containing
-# directory).
-if (
- set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
- if test "$*" = "X"; then
- # -L didn't work.
- set X `ls -t "$srcdir/configure" conftest.file`
- fi
- rm -f conftest.file
- if test "$*" != "X $srcdir/configure conftest.file" \
- && test "$*" != "X conftest.file $srcdir/configure"; then
-
- # If neither matched, then we have a broken ls. This can happen
- # if, for instance, CONFIG_SHELL is bash and it inherits a
- # broken ls alias from the environment. This has actually
- # happened. Such a system could not be considered "sane".
- as_fn_error $? "ls -t appears to fail. Make sure there is not a broken
-alias in your environment" "$LINENO" 5
- fi
-
- test "$2" = conftest.file
- )
-then
- # Ok.
- :
-else
- as_fn_error $? "newly created file is older than distributed files!
-Check your system clock" "$LINENO" 5
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-test "$program_prefix" != NONE &&
- program_transform_name="s&^&$program_prefix&;$program_transform_name"
-# Use a double $ so make ignores it.
-test "$program_suffix" != NONE &&
- program_transform_name="s&\$&$program_suffix&;$program_transform_name"
-# Double any \ or $.
-# By default was `s,x,x', remove it if useless.
-ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
-program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
-
-# expand $ac_aux_dir to an absolute path
-am_aux_dir=`cd $ac_aux_dir && pwd`
-
-if test x"${MISSING+set}" != xset; then
- case $am_aux_dir in
- *\ * | *\ *)
- MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
- *)
- MISSING="\${SHELL} $am_aux_dir/missing" ;;
- esac
-fi
-# Use eval to expand $SHELL
-if eval "$MISSING --run true"; then
- am_missing_run="$MISSING --run "
-else
- am_missing_run=
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`missing' script is too old or missing" >&5
-$as_echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;}
-fi
-
-if test x"${install_sh}" != xset; then
- case $am_aux_dir in
- *\ * | *\ *)
- install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
- *)
- install_sh="\${SHELL} $am_aux_dir/install-sh"
- esac
-fi
-
-# Installed binaries are usually stripped using `strip' when the user
-# run `make install-strip'. However `strip' might not be the right
-# tool to use in cross-compilation environments, therefore Automake
-# will honor the `STRIP' environment variable to overrule this program.
-if test "$cross_compiling" != no; then
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
-set dummy ${ac_tool_prefix}strip; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_STRIP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$STRIP"; then
- ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_STRIP="${ac_tool_prefix}strip"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-STRIP=$ac_cv_prog_STRIP
-if test -n "$STRIP"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
-$as_echo "$STRIP" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_STRIP"; then
- ac_ct_STRIP=$STRIP
- # Extract the first word of "strip", so it can be a program name with args.
-set dummy strip; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_STRIP"; then
- ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_STRIP="strip"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
-if test -n "$ac_ct_STRIP"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
-$as_echo "$ac_ct_STRIP" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_STRIP" = x; then
- STRIP=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- STRIP=$ac_ct_STRIP
- fi
-else
- STRIP="$ac_cv_prog_STRIP"
-fi
-
-fi
-INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5
-$as_echo_n "checking for a thread-safe mkdir -p... " >&6; }
-if test -z "$MKDIR_P"; then
- if ${ac_cv_path_mkdir+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_prog in mkdir gmkdir; do
- for ac_exec_ext in '' $ac_executable_extensions; do
- as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
- case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
- 'mkdir (GNU coreutils) '* | \
- 'mkdir (coreutils) '* | \
- 'mkdir (fileutils) '4.1*)
- ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
- break 3;;
- esac
- done
- done
- done
-IFS=$as_save_IFS
-
-fi
-
- test -d ./--version && rmdir ./--version
- if test "${ac_cv_path_mkdir+set}" = set; then
- MKDIR_P="$ac_cv_path_mkdir -p"
- else
- # As a last resort, use the slow shell script. Don't cache a
- # value for MKDIR_P within a source directory, because that will
- # break other packages using the cache if that directory is
- # removed, or if the value is a relative name.
- MKDIR_P="$ac_install_sh -d"
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5
-$as_echo "$MKDIR_P" >&6; }
-
-mkdir_p="$MKDIR_P"
-case $mkdir_p in
- [\\/$]* | ?:[\\/]*) ;;
- */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
-esac
-
-for ac_prog in gawk mawk nawk awk
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_AWK+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$AWK"; then
- ac_cv_prog_AWK="$AWK" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_AWK="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-AWK=$ac_cv_prog_AWK
-if test -n "$AWK"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
-$as_echo "$AWK" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$AWK" && break
-done
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5
-$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
-set x ${MAKE-make}
-ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
-if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat >conftest.make <<\_ACEOF
-SHELL = /bin/sh
-all:
- @echo '@@@%%%=$(MAKE)=@@@%%%'
-_ACEOF
-# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
-case `${MAKE-make} -f conftest.make 2>/dev/null` in
- *@@@%%%=?*=@@@%%%*)
- eval ac_cv_prog_make_${ac_make}_set=yes;;
- *)
- eval ac_cv_prog_make_${ac_make}_set=no;;
-esac
-rm -f conftest.make
-fi
-if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
- SET_MAKE=
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
- SET_MAKE="MAKE=${MAKE-make}"
-fi
-
-rm -rf .tst 2>/dev/null
-mkdir .tst 2>/dev/null
-if test -d .tst; then
- am__leading_dot=.
-else
- am__leading_dot=_
-fi
-rmdir .tst 2>/dev/null
-
-if test "`cd $srcdir && pwd`" != "`pwd`"; then
- # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
- # is not polluted with repeated "-I."
- am__isrc=' -I$(srcdir)'
- # test to see if srcdir already configured
- if test -f $srcdir/config.status; then
- as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
- fi
-fi
-
-# test whether we have cygpath
-if test -z "$CYGPATH_W"; then
- if (cygpath --version) >/dev/null 2>/dev/null; then
- CYGPATH_W='cygpath -w'
- else
- CYGPATH_W=echo
- fi
-fi
-
-
-# Define the identity of the package.
- PACKAGE='fflas-ffpack'
- VERSION='1.6.0'
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE "$PACKAGE"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define VERSION "$VERSION"
-_ACEOF
-
-# Some tools Automake needs.
-
-ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"}
-
-
-AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
-
-
-AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"}
-
-
-AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
-
-
-MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
-
-# We need awk for the "check" target. The system "awk" is bad on
-# some platforms.
-# Always define AMTAR for backward compatibility. Yes, it's still used
-# in the wild :-( We should find a proper way to deprecate it ...
-AMTAR='$${TAR-tar}'
-
-am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
-
-
-
-
-
-ac_config_headers="$ac_config_headers config.h"
-
-
-ac_config_commands="$ac_config_commands fflas-ffpack/fflas-ffpack-config.h"
-
-# Extract the first word of "rm", so it can be a program name with args.
-set dummy rm; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_path_RM+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- case $RM in
- [\\/]* | ?:[\\/]*)
- ac_cv_path_RM="$RM" # Let the user override the test with a path.
- ;;
- *)
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_path_RM="$as_dir/$ac_word$ac_exec_ext"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
- test -z "$ac_cv_path_RM" && ac_cv_path_RM="$FALSE"
- ;;
-esac
-fi
-RM=$ac_cv_path_RM
-if test -n "$RM"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RM" >&5
-$as_echo "$RM" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-RM="$RM -f"
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5
-$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; }
- # Check whether --enable-maintainer-mode was given.
-if test "${enable_maintainer_mode+set}" = set; then :
- enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval
-else
- USE_MAINTAINER_MODE=no
-fi
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5
-$as_echo "$USE_MAINTAINER_MODE" >&6; }
- if test $USE_MAINTAINER_MODE = yes; then
- MAINTAINER_MODE_TRUE=
- MAINTAINER_MODE_FALSE='#'
-else
- MAINTAINER_MODE_TRUE='#'
- MAINTAINER_MODE_FALSE=
-fi
-
- MAINT=$MAINTAINER_MODE_TRUE
-
-
-# Check whether --enable-dependency-tracking was given.
-if test "${enable_dependency_tracking+set}" = set; then :
- enableval=$enable_dependency_tracking;
-fi
-
-if test "x$enable_dependency_tracking" != xno; then
- am_depcomp="$ac_aux_dir/depcomp"
- AMDEPBACKSLASH='\'
- am__nodep='_no'
-fi
- if test "x$enable_dependency_tracking" != xno; then
- AMDEP_TRUE=
- AMDEP_FALSE='#'
-else
- AMDEP_TRUE='#'
- AMDEP_FALSE=
-fi
-
-
-ac_config_commands="$ac_config_commands depfiles"
-
-
-
-
- if test x = y; then
- INSIDE_GNOME_COMMON_TRUE=
- INSIDE_GNOME_COMMON_FALSE='#'
-else
- INSIDE_GNOME_COMMON_TRUE='#'
- INSIDE_GNOME_COMMON_FALSE=
-fi
-
-
- test -n "$ACLOCAL_FLAGS" && ACLOCAL="$ACLOCAL $ACLOCAL_FLAGS"
-
- for k in macros ; do ACLOCAL="$ACLOCAL -I $k" ; done
-
-
-# work around to fix the backward compatibility issue of automake 1.10 with 1.9 (pb with MKDIR_P)
-
-
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-
-echo "-----------------------------------------------"
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable debugging options in the library" >&5
-$as_echo_n "checking whether to enable debugging options in the library... " >&6; }
- # Check whether --enable-debug was given.
-if test "${enable_debug+set}" = set; then :
- enableval=$enable_debug; USE_DEBUG=$enableval
-else
- USE_DEBUG=no
-fi
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_DEBUG" >&5
-$as_echo "$USE_DEBUG" >&6; }
- if test $USE_DEBUG = yes; then
- DEBUG_TRUE=
- DEBUG_FALSE='#'
-else
- DEBUG_TRUE='#'
- DEBUG_FALSE=
-fi
-
- DBG=$USE_DEBUG
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable profiling everything in the library" >&5
-$as_echo_n "checking whether to enable profiling everything in the library... " >&6; }
- # Check whether --enable-profile was given.
-if test "${enable_profile+set}" = set; then :
- enableval=$enable_profile; USE_PROFILE=$enableval
-else
- USE_PROFILE=no
-fi
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_PROFILE" >&5
-$as_echo "$USE_PROFILE" >&6; }
- if test $USE_PROFILE = yes; then
- PROFILE_TRUE=
- PROFILE_FALSE='#'
-else
- PROFILE_TRUE='#'
- PROFILE_FALSE=
-fi
-
- PROF=$USE_PROFILE
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable warnings when compiling the library" >&5
-$as_echo_n "checking whether to enable warnings when compiling the library... " >&6; }
- # Check whether --enable-warnings was given.
-if test "${enable_warnings+set}" = set; then :
- enableval=$enable_warnings; USE_WARNINGS=$enableval
-else
- USE_WARNINGS=no
-fi
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_WARNINGS" >&5
-$as_echo "$USE_WARNINGS" >&6; }
- WARN=$USE_WARNINGS
-
-
-echo "-----------------------------------------------"
-# CFLAGS=${CFLAGS:-$DEFAULT_CFLAGS}
-# CXXFLAGS=${CXXFLAGS:-$DEFAULT_CXXFLAGS}
-
-######################################################
-# Try and pass different flags according to compiler #
-######################################################
-
-# disable default -g -O2 CXXFLAGS
-: ${CXXFLAGS=""}
-
-#set CXX
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-if test -z "$CXX"; then
- if test -n "$CCC"; then
- CXX=$CCC
- else
- if test -n "$ac_tool_prefix"; then
- for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
- do
- # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
-set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$CXX"; then
- ac_cv_prog_CXX="$CXX" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-CXX=$ac_cv_prog_CXX
-if test -n "$CXX"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5
-$as_echo "$CXX" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$CXX" && break
- done
-fi
-if test -z "$CXX"; then
- ac_ct_CXX=$CXX
- for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_CXX"; then
- ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_CXX="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
-if test -n "$ac_ct_CXX"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5
-$as_echo "$ac_ct_CXX" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$ac_ct_CXX" && break
-done
-
- if test "x$ac_ct_CXX" = x; then
- CXX="g++"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- CXX=$ac_ct_CXX
- fi
-fi
-
- fi
-fi
-# Provide some information about the compiler.
-$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5
-set X $ac_compile
-ac_compiler=$2
-for ac_option in --version -v -V -qversion; do
- { { ac_try="$ac_compiler $ac_option >&5"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compiler $ac_option >&5") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- sed '10a\
-... rest of stderr output deleted ...
- 10q' conftest.err >conftest.er1
- cat conftest.er1 >&5
- fi
- rm -f conftest.er1 conftest.err
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-done
-
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-ac_clean_files_save=$ac_clean_files
-ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
-# Try to create an executable without -o first, disregard a.out.
-# It will help us diagnose broken compilers, and finding out an intuition
-# of exeext.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C++ compiler works" >&5
-$as_echo_n "checking whether the C++ compiler works... " >&6; }
-ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
-
-# The possible output files:
-ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
-
-ac_rmfiles=
-for ac_file in $ac_files
-do
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
- * ) ac_rmfiles="$ac_rmfiles $ac_file";;
- esac
-done
-rm -f $ac_rmfiles
-
-if { { ac_try="$ac_link_default"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link_default") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
- # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
-# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
-# in a Makefile. We should not override ac_cv_exeext if it was cached,
-# so that the user can short-circuit this test for compilers unknown to
-# Autoconf.
-for ac_file in $ac_files ''
-do
- test -f "$ac_file" || continue
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
- ;;
- [ab].out )
- # We found the default executable, but exeext='' is most
- # certainly right.
- break;;
- *.* )
- if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
- then :; else
- ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
- fi
- # We set ac_cv_exeext here because the later test for it is not
- # safe: cross compilers may not add the suffix if given an `-o'
- # argument, so we may need to know it at that point already.
- # Even if this section looks crufty: it has the advantage of
- # actually working.
- break;;
- * )
- break;;
- esac
-done
-test "$ac_cv_exeext" = no && ac_cv_exeext=
-
-else
- ac_file=''
-fi
-if test -z "$ac_file"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-$as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "C++ compiler cannot create executables
-See \`config.log' for more details" "$LINENO" 5; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler default output file name" >&5
-$as_echo_n "checking for C++ compiler default output file name... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
-$as_echo "$ac_file" >&6; }
-ac_exeext=$ac_cv_exeext
-
-rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
-ac_clean_files=$ac_clean_files_save
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
-$as_echo_n "checking for suffix of executables... " >&6; }
-if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
- # If both `conftest.exe' and `conftest' are `present' (well, observable)
-# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
-# work properly (i.e., refer to `conftest.exe'), while it won't with
-# `rm'.
-for ac_file in conftest.exe conftest conftest.*; do
- test -f "$ac_file" || continue
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
- *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
- break;;
- * ) break;;
- esac
-done
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-rm -f conftest conftest$ac_cv_exeext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
-$as_echo "$ac_cv_exeext" >&6; }
-
-rm -f conftest.$ac_ext
-EXEEXT=$ac_cv_exeext
-ac_exeext=$EXEEXT
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <stdio.h>
-int
-main ()
-{
-FILE *f = fopen ("conftest.out", "w");
- return ferror (f) || fclose (f) != 0;
-
- ;
- return 0;
-}
-_ACEOF
-ac_clean_files="$ac_clean_files conftest.out"
-# Check that the compiler produces executables we can run. If not, either
-# the compiler is broken, or we cross compile.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
-$as_echo_n "checking whether we are cross compiling... " >&6; }
-if test "$cross_compiling" != yes; then
- { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
- if { ac_try='./conftest$ac_cv_exeext'
- { { case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_try") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then
- cross_compiling=no
- else
- if test "$cross_compiling" = maybe; then
- cross_compiling=yes
- else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run C++ compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details" "$LINENO" 5; }
- fi
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
-$as_echo "$cross_compiling" >&6; }
-
-rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
-ac_clean_files=$ac_clean_files_save
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
-$as_echo_n "checking for suffix of object files... " >&6; }
-if ${ac_cv_objext+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.o conftest.obj
-if { { ac_try="$ac_compile"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compile") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then :
- for ac_file in conftest.o conftest.obj conftest.*; do
- test -f "$ac_file" || continue;
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
- *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
- break;;
- esac
-done
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot compute suffix of object files: cannot compile
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-rm -f conftest.$ac_cv_objext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
-$as_echo "$ac_cv_objext" >&6; }
-OBJEXT=$ac_cv_objext
-ac_objext=$OBJEXT
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
-$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
-if ${ac_cv_cxx_compiler_gnu+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-#ifndef __GNUC__
- choke me
-#endif
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_compiler_gnu=yes
-else
- ac_compiler_gnu=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5
-$as_echo "$ac_cv_cxx_compiler_gnu" >&6; }
-if test $ac_compiler_gnu = yes; then
- GXX=yes
-else
- GXX=
-fi
-ac_test_CXXFLAGS=${CXXFLAGS+set}
-ac_save_CXXFLAGS=$CXXFLAGS
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
-$as_echo_n "checking whether $CXX accepts -g... " >&6; }
-if ${ac_cv_prog_cxx_g+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_save_cxx_werror_flag=$ac_cxx_werror_flag
- ac_cxx_werror_flag=yes
- ac_cv_prog_cxx_g=no
- CXXFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_cv_prog_cxx_g=yes
-else
- CXXFLAGS=""
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
-
-else
- ac_cxx_werror_flag=$ac_save_cxx_werror_flag
- CXXFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_cv_prog_cxx_g=yes
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- ac_cxx_werror_flag=$ac_save_cxx_werror_flag
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5
-$as_echo "$ac_cv_prog_cxx_g" >&6; }
-if test "$ac_test_CXXFLAGS" = set; then
- CXXFLAGS=$ac_save_CXXFLAGS
-elif test $ac_cv_prog_cxx_g = yes; then
- if test "$GXX" = yes; then
- CXXFLAGS="-g -O2"
- else
- CXXFLAGS="-g"
- fi
-else
- if test "$GXX" = yes; then
- CXXFLAGS="-O2"
- else
- CXXFLAGS=
- fi
-fi
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for family name of compiler" >&5
-$as_echo_n "checking for family name of compiler... " >&6; }
-
- if test "$cross_compiling" = yes; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
- #ifdef __INTEL_COMPILER
- int main() { return 0 ; }
- #else
- pas intel
- #endif
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: icc" >&5
-$as_echo "icc" >&6; }
- CCNAM=icc
-
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
- if test -z "${CCNAM}"; then :
-
- if test "$cross_compiling" = yes; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
- #ifdef __PATHSCALE__
- int main() { return !(__PATHCC__ >= 4) ; }
- #else
- pas ekopath non plus.
- #endif
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: eko" >&5
-$as_echo "eko" >&6; }
- CCNAM=eko
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-fi
-
- if test -z "${CCNAM}"; then :
-
- if test "$cross_compiling" = yes; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
- #ifdef __GNUC__
- int main() { return !(__GNUC__ >= 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) ; }
- #else
- pas gcc non plus ???
- #endif
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: gcc" >&5
-$as_echo "gcc" >&6; }
- CCNAM=gcc
-
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-fi
-
-
- if test -z "${CCNAM}"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- CCNAM=unknown
-
- echo
- echo " *** unknow compiler. please file a bug "
- echo
-
-fi
-
-
-
-
-
-
-TESTS_CFLAGS="-O0"
-DEBUG_CFLAGS="-g"
-DEFAULT_CFLAGS="-pipe"
-WARN_CFLAGS="-Wall"
-
-if test "x$DBG" = "xyes" ; then
- DEFAULT_CFLAGS="-O0 ${DEFAULT_CFLAGS} " #those are CXXFLAGS
- DEBUG_CFLAGS="${DEBUG_CFLAGS} -DDEBUG -DFFLASFFPACK_DEBUG"
-else
- DEFAULT_CFLAGS="-O2 ${DEFAULT_CFLAGS} "
- DEBUG_CFLAGS="${DEBUG_CFLAGS} -DNDEBUG -UFFLASFFPACK_DEBUG -UFFLASFFPACK_DEBUG"
-fi
-
-if test "x$PROF" = "xyes" ; then
- DEFAULT_CFLAGS="${DEFAULT_CFLAGS} -pg"
-fi
-
-if test "x$WARN" = "xyes" -o "x$WARN" = "xfull" ; then
- if test "x${CCNAM}" = "xicc" ; then
- WARN_CFLAGS="${WARN_CFLAGS} -Wcheck"
- # DEBUG_CFLAGS="-fast"
- else
- if test "x${CCNAM}" = "xgcc" -o "x${CCNAM}" = "xeko" ; then
- WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter"
- if test "x${WARN}" = "xfull" ; then
- WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -ansi -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long"
- fi
- else
- echo
- echo "*******************************************************"
- echo "unsupported compiler ($CCNAM). Please file a bug."
- echo "*******************************************************"
- echo
- WARN_CFLAGS="${WARN_CFLAGS}"
- fi
- fi
-fi
-
-
-DEFAULT_CFLAGS="${DEFAULT_CFLAGS} ${WARN_CFLAGS} ${DEBUG_CFLAGS}"
-TESTS_CFLAGS="${TESTS_CFLAGS} ${WARN_CFLAGS} ${DEBUG_CFLAGS}"
-
-
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
-$as_echo_n "checking how to run the C++ preprocessor... " >&6; }
-if test -z "$CXXCPP"; then
- if ${ac_cv_prog_CXXCPP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- # Double quotes because CXXCPP needs to be expanded
- for CXXCPP in "$CXX -E" "/lib/cpp"
- do
- ac_preproc_ok=false
-for ac_cxx_preproc_warn_flag in '' yes
-do
- # Use a header file that comes with gcc, so configuring glibc
- # with a fresh cross-compiler works.
- # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- # <limits.h> exists even on freestanding compilers.
- # On the NeXT, cc -E runs the code through the compiler's parser,
- # not just through cpp. "Syntax error" is here to catch this case.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
- Syntax error
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
-
-else
- # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
- # OK, works on sane cases. Now check whether nonexistent headers
- # can be detected and how.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <ac_nonexistent.h>
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
- # Broken: success on invalid input.
-continue
-else
- # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.i conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
- break
-fi
-
- done
- ac_cv_prog_CXXCPP=$CXXCPP
-
-fi
- CXXCPP=$ac_cv_prog_CXXCPP
-else
- ac_cv_prog_CXXCPP=$CXXCPP
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5
-$as_echo "$CXXCPP" >&6; }
-ac_preproc_ok=false
-for ac_cxx_preproc_warn_flag in '' yes
-do
- # Use a header file that comes with gcc, so configuring glibc
- # with a fresh cross-compiler works.
- # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- # <limits.h> exists even on freestanding compilers.
- # On the NeXT, cc -E runs the code through the compiler's parser,
- # not just through cpp. "Syntax error" is here to catch this case.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
- Syntax error
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
-
-else
- # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
- # OK, works on sane cases. Now check whether nonexistent headers
- # can be detected and how.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <ac_nonexistent.h>
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
- # Broken: success on invalid input.
-continue
-else
- # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.i conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
-
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
-$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
-if ${ac_cv_path_GREP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -z "$GREP"; then
- ac_path_GREP_found=false
- # Loop through the user's path and test for each of PROGNAME-LIST
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_prog in grep ggrep; do
- for ac_exec_ext in '' $ac_executable_extensions; do
- ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
- as_fn_executable_p "$ac_path_GREP" || continue
-# Check for GNU ac_path_GREP and select it if it is found.
- # Check for GNU $ac_path_GREP
-case `"$ac_path_GREP" --version 2>&1` in
-*GNU*)
- ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
-*)
- ac_count=0
- $as_echo_n 0123456789 >"conftest.in"
- while :
- do
- cat "conftest.in" "conftest.in" >"conftest.tmp"
- mv "conftest.tmp" "conftest.in"
- cp "conftest.in" "conftest.nl"
- $as_echo 'GREP' >> "conftest.nl"
- "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
- diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
- as_fn_arith $ac_count + 1 && ac_count=$as_val
- if test $ac_count -gt ${ac_path_GREP_max-0}; then
- # Best one so far, save it but keep looking for a better one
- ac_cv_path_GREP="$ac_path_GREP"
- ac_path_GREP_max=$ac_count
- fi
- # 10*(2^10) chars as input seems more than enough
- test $ac_count -gt 10 && break
- done
- rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
-esac
-
- $ac_path_GREP_found && break 3
- done
- done
- done
-IFS=$as_save_IFS
- if test -z "$ac_cv_path_GREP"; then
- as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
- fi
-else
- ac_cv_path_GREP=$GREP
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
-$as_echo "$ac_cv_path_GREP" >&6; }
- GREP="$ac_cv_path_GREP"
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
-$as_echo_n "checking for egrep... " >&6; }
-if ${ac_cv_path_EGREP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
- then ac_cv_path_EGREP="$GREP -E"
- else
- if test -z "$EGREP"; then
- ac_path_EGREP_found=false
- # Loop through the user's path and test for each of PROGNAME-LIST
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_prog in egrep; do
- for ac_exec_ext in '' $ac_executable_extensions; do
- ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
- as_fn_executable_p "$ac_path_EGREP" || continue
-# Check for GNU ac_path_EGREP and select it if it is found.
- # Check for GNU $ac_path_EGREP
-case `"$ac_path_EGREP" --version 2>&1` in
-*GNU*)
- ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
-*)
- ac_count=0
- $as_echo_n 0123456789 >"conftest.in"
- while :
- do
- cat "conftest.in" "conftest.in" >"conftest.tmp"
- mv "conftest.tmp" "conftest.in"
- cp "conftest.in" "conftest.nl"
- $as_echo 'EGREP' >> "conftest.nl"
- "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
- diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
- as_fn_arith $ac_count + 1 && ac_count=$as_val
- if test $ac_count -gt ${ac_path_EGREP_max-0}; then
- # Best one so far, save it but keep looking for a better one
- ac_cv_path_EGREP="$ac_path_EGREP"
- ac_path_EGREP_max=$ac_count
- fi
- # 10*(2^10) chars as input seems more than enough
- test $ac_count -gt 10 && break
- done
- rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
-esac
-
- $ac_path_EGREP_found && break 3
- done
- done
- done
-IFS=$as_save_IFS
- if test -z "$ac_cv_path_EGREP"; then
- as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
- fi
-else
- ac_cv_path_EGREP=$EGREP
-fi
-
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
-$as_echo "$ac_cv_path_EGREP" >&6; }
- EGREP="$ac_cv_path_EGREP"
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
-$as_echo_n "checking for ANSI C header files... " >&6; }
-if ${ac_cv_header_stdc+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <float.h>
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_cv_header_stdc=yes
-else
- ac_cv_header_stdc=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-if test $ac_cv_header_stdc = yes; then
- # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <string.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "memchr" >/dev/null 2>&1; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <stdlib.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "free" >/dev/null 2>&1; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
- if test "$cross_compiling" = yes; then :
- :
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <ctype.h>
-#include <stdlib.h>
-#if ((' ' & 0x0FF) == 0x020)
-# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
-# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
-#else
-# define ISLOWER(c) \
- (('a' <= (c) && (c) <= 'i') \
- || ('j' <= (c) && (c) <= 'r') \
- || ('s' <= (c) && (c) <= 'z'))
-# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
-#endif
-
-#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
-int
-main ()
-{
- int i;
- for (i = 0; i < 256; i++)
- if (XOR (islower (i), ISLOWER (i))
- || toupper (i) != TOUPPER (i))
- return 2;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
-$as_echo "$ac_cv_header_stdc" >&6; }
-if test $ac_cv_header_stdc = yes; then
-
-$as_echo "#define STDC_HEADERS 1" >>confdefs.h
-
-fi
-
-case `pwd` in
- *\ * | *\ *)
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5
-$as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;;
-esac
-
-
-
-macro_version='2.4.2'
-macro_revision='1.3337'
-
-
-
-
-
-
-
-
-
-
-
-
-
-ltmain="$ac_aux_dir/ltmain.sh"
-
-# Make sure we can run config.sub.
-$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
- as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
-$as_echo_n "checking build system type... " >&6; }
-if ${ac_cv_build+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_build_alias=$build_alias
-test "x$ac_build_alias" = x &&
- ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
-test "x$ac_build_alias" = x &&
- as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
-ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
- as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
-$as_echo "$ac_cv_build" >&6; }
-case $ac_cv_build in
-*-*-*) ;;
-*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
-esac
-build=$ac_cv_build
-ac_save_IFS=$IFS; IFS='-'
-set x $ac_cv_build
-shift
-build_cpu=$1
-build_vendor=$2
-shift; shift
-# Remember, the first character of IFS is used to create $*,
-# except with old shells:
-build_os=$*
-IFS=$ac_save_IFS
-case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
-$as_echo_n "checking host system type... " >&6; }
-if ${ac_cv_host+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test "x$host_alias" = x; then
- ac_cv_host=$ac_cv_build
-else
- ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
- as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5
-$as_echo "$ac_cv_host" >&6; }
-case $ac_cv_host in
-*-*-*) ;;
-*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
-esac
-host=$ac_cv_host
-ac_save_IFS=$IFS; IFS='-'
-set x $ac_cv_host
-shift
-host_cpu=$1
-host_vendor=$2
-shift; shift
-# Remember, the first character of IFS is used to create $*,
-# except with old shells:
-host_os=$*
-IFS=$ac_save_IFS
-case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
-
-
-# Backslashify metacharacters that are still active within
-# double-quoted strings.
-sed_quote_subst='s/\(["`$\\]\)/\\\1/g'
-
-# Same as above, but do not quote variable references.
-double_quote_subst='s/\(["`\\]\)/\\\1/g'
-
-# Sed substitution to delay expansion of an escaped shell variable in a
-# double_quote_subst'ed string.
-delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
-
-# Sed substitution to delay expansion of an escaped single quote.
-delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g'
-
-# Sed substitution to avoid accidental globbing in evaled expressions
-no_glob_subst='s/\*/\\\*/g'
-
-ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
-ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
-ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5
-$as_echo_n "checking how to print strings... " >&6; }
-# Test print first, because it will be a builtin if present.
-if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \
- test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then
- ECHO='print -r --'
-elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then
- ECHO='printf %s\n'
-else
- # Use this function as a fallback that always works.
- func_fallback_echo ()
- {
- eval 'cat <<_LTECHO_EOF
-$1
-_LTECHO_EOF'
- }
- ECHO='func_fallback_echo'
-fi
-
-# func_echo_all arg...
-# Invoke $ECHO with all args, space-separated.
-func_echo_all ()
-{
- $ECHO ""
-}
-
-case "$ECHO" in
- printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5
-$as_echo "printf" >&6; } ;;
- print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5
-$as_echo "print -r" >&6; } ;;
- *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5
-$as_echo "cat" >&6; } ;;
-esac
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
-set dummy ${ac_tool_prefix}gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_CC+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_CC="${ac_tool_prefix}gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-CC=$ac_cv_prog_CC
-if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
-$as_echo "$CC" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_CC"; then
- ac_ct_CC=$CC
- # Extract the first word of "gcc", so it can be a program name with args.
-set dummy gcc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_CC+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_CC"; then
- ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_CC="gcc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_CC=$ac_cv_prog_ac_ct_CC
-if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
-$as_echo "$ac_ct_CC" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_CC" = x; then
- CC=""
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- CC=$ac_ct_CC
- fi
-else
- CC="$ac_cv_prog_CC"
-fi
-
-if test -z "$CC"; then
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
-set dummy ${ac_tool_prefix}cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_CC+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_CC="${ac_tool_prefix}cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-CC=$ac_cv_prog_CC
-if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
-$as_echo "$CC" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- fi
-fi
-if test -z "$CC"; then
- # Extract the first word of "cc", so it can be a program name with args.
-set dummy cc; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_CC+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
- ac_prog_rejected=no
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
- ac_prog_rejected=yes
- continue
- fi
- ac_cv_prog_CC="cc"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-if test $ac_prog_rejected = yes; then
- # We found a bogon in the path, so make sure we never use it.
- set dummy $ac_cv_prog_CC
- shift
- if test $# != 0; then
- # We chose a different compiler from the bogus one.
- # However, it has the same basename, so the bogon will be chosen
- # first if we set CC to just the basename; use the full file name.
- shift
- ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
- fi
-fi
-fi
-fi
-CC=$ac_cv_prog_CC
-if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
-$as_echo "$CC" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$CC"; then
- if test -n "$ac_tool_prefix"; then
- for ac_prog in cl.exe
- do
- # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
-set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_CC+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-CC=$ac_cv_prog_CC
-if test -n "$CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
-$as_echo "$CC" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$CC" && break
- done
-fi
-if test -z "$CC"; then
- ac_ct_CC=$CC
- for ac_prog in cl.exe
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_CC+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_CC"; then
- ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_CC="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_CC=$ac_cv_prog_ac_ct_CC
-if test -n "$ac_ct_CC"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
-$as_echo "$ac_ct_CC" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$ac_ct_CC" && break
-done
-
- if test "x$ac_ct_CC" = x; then
- CC=""
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- CC=$ac_ct_CC
- fi
-fi
-
-fi
-
-
-test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "no acceptable C compiler found in \$PATH
-See \`config.log' for more details" "$LINENO" 5; }
-
-# Provide some information about the compiler.
-$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
-set X $ac_compile
-ac_compiler=$2
-for ac_option in --version -v -V -qversion; do
- { { ac_try="$ac_compiler $ac_option >&5"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_compiler $ac_option >&5") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- sed '10a\
-... rest of stderr output deleted ...
- 10q' conftest.err >conftest.er1
- cat conftest.er1 >&5
- fi
- rm -f conftest.er1 conftest.err
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
-done
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
-$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if ${ac_cv_c_compiler_gnu+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-#ifndef __GNUC__
- choke me
-#endif
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- ac_compiler_gnu=yes
-else
- ac_compiler_gnu=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-ac_cv_c_compiler_gnu=$ac_compiler_gnu
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5
-$as_echo "$ac_cv_c_compiler_gnu" >&6; }
-if test $ac_compiler_gnu = yes; then
- GCC=yes
-else
- GCC=
-fi
-ac_test_CFLAGS=${CFLAGS+set}
-ac_save_CFLAGS=$CFLAGS
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
-$as_echo_n "checking whether $CC accepts -g... " >&6; }
-if ${ac_cv_prog_cc_g+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_save_c_werror_flag=$ac_c_werror_flag
- ac_c_werror_flag=yes
- ac_cv_prog_cc_g=no
- CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- ac_cv_prog_cc_g=yes
-else
- CFLAGS=""
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-
-else
- ac_c_werror_flag=$ac_save_c_werror_flag
- CFLAGS="-g"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- ac_cv_prog_cc_g=yes
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- ac_c_werror_flag=$ac_save_c_werror_flag
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
-$as_echo "$ac_cv_prog_cc_g" >&6; }
-if test "$ac_test_CFLAGS" = set; then
- CFLAGS=$ac_save_CFLAGS
-elif test $ac_cv_prog_cc_g = yes; then
- if test "$GCC" = yes; then
- CFLAGS="-g -O2"
- else
- CFLAGS="-g"
- fi
-else
- if test "$GCC" = yes; then
- CFLAGS="-O2"
- else
- CFLAGS=
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
-$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if ${ac_cv_prog_cc_c89+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_cv_prog_cc_c89=no
-ac_save_CC=$CC
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <stdarg.h>
-#include <stdio.h>
-struct stat;
-/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
-struct buf { int x; };
-FILE * (*rcsopen) (struct buf *, struct stat *, int);
-static char *e (p, i)
- char **p;
- int i;
-{
- return p[i];
-}
-static char *f (char * (*g) (char **, int), char **p, ...)
-{
- char *s;
- va_list v;
- va_start (v,p);
- s = g (p, va_arg (v,int));
- va_end (v);
- return s;
-}
-
-/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has
- function prototypes and stuff, but not '\xHH' hex character constants.
- These don't provoke an error unfortunately, instead are silently treated
- as 'x'. The following induces an error, until -std is added to get
- proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an
- array size at least. It's necessary to write '\x00'==0 to get something
- that's true only with -std. */
-int osf4_cc_array ['\x00' == 0 ? 1 : -1];
-
-/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
- inside strings and character constants. */
-#define FOO(x) 'x'
-int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
-
-int test (int i, double x);
-struct s1 {int (*f) (int a);};
-struct s2 {int (*f) (double a);};
-int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
-int argc;
-char **argv;
-int
-main ()
-{
-return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
- ;
- return 0;
-}
-_ACEOF
-for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
- -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
-do
- CC="$ac_save_CC $ac_arg"
- if ac_fn_c_try_compile "$LINENO"; then :
- ac_cv_prog_cc_c89=$ac_arg
-fi
-rm -f core conftest.err conftest.$ac_objext
- test "x$ac_cv_prog_cc_c89" != "xno" && break
-done
-rm -f conftest.$ac_ext
-CC=$ac_save_CC
-
-fi
-# AC_CACHE_VAL
-case "x$ac_cv_prog_cc_c89" in
- x)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
-$as_echo "none needed" >&6; } ;;
- xno)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
-$as_echo "unsupported" >&6; } ;;
- *)
- CC="$CC $ac_cv_prog_cc_c89"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
-$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
-esac
-if test "x$ac_cv_prog_cc_c89" != xno; then :
-
-fi
-
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
-$as_echo_n "checking for a sed that does not truncate output... " >&6; }
-if ${ac_cv_path_SED+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
- for ac_i in 1 2 3 4 5 6 7; do
- ac_script="$ac_script$as_nl$ac_script"
- done
- echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed
- { ac_script=; unset ac_script;}
- if test -z "$SED"; then
- ac_path_SED_found=false
- # Loop through the user's path and test for each of PROGNAME-LIST
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_prog in sed gsed; do
- for ac_exec_ext in '' $ac_executable_extensions; do
- ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
- as_fn_executable_p "$ac_path_SED" || continue
-# Check for GNU ac_path_SED and select it if it is found.
- # Check for GNU $ac_path_SED
-case `"$ac_path_SED" --version 2>&1` in
-*GNU*)
- ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;;
-*)
- ac_count=0
- $as_echo_n 0123456789 >"conftest.in"
- while :
- do
- cat "conftest.in" "conftest.in" >"conftest.tmp"
- mv "conftest.tmp" "conftest.in"
- cp "conftest.in" "conftest.nl"
- $as_echo '' >> "conftest.nl"
- "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break
- diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
- as_fn_arith $ac_count + 1 && ac_count=$as_val
- if test $ac_count -gt ${ac_path_SED_max-0}; then
- # Best one so far, save it but keep looking for a better one
- ac_cv_path_SED="$ac_path_SED"
- ac_path_SED_max=$ac_count
- fi
- # 10*(2^10) chars as input seems more than enough
- test $ac_count -gt 10 && break
- done
- rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
-esac
-
- $ac_path_SED_found && break 3
- done
- done
- done
-IFS=$as_save_IFS
- if test -z "$ac_cv_path_SED"; then
- as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5
- fi
-else
- ac_cv_path_SED=$SED
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5
-$as_echo "$ac_cv_path_SED" >&6; }
- SED="$ac_cv_path_SED"
- rm -f conftest.sed
-
-test -z "$SED" && SED=sed
-Xsed="$SED -e 1s/^X//"
-
-
-
-
-
-
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5
-$as_echo_n "checking for fgrep... " >&6; }
-if ${ac_cv_path_FGREP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1
- then ac_cv_path_FGREP="$GREP -F"
- else
- if test -z "$FGREP"; then
- ac_path_FGREP_found=false
- # Loop through the user's path and test for each of PROGNAME-LIST
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_prog in fgrep; do
- for ac_exec_ext in '' $ac_executable_extensions; do
- ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext"
- as_fn_executable_p "$ac_path_FGREP" || continue
-# Check for GNU ac_path_FGREP and select it if it is found.
- # Check for GNU $ac_path_FGREP
-case `"$ac_path_FGREP" --version 2>&1` in
-*GNU*)
- ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;;
-*)
- ac_count=0
- $as_echo_n 0123456789 >"conftest.in"
- while :
- do
- cat "conftest.in" "conftest.in" >"conftest.tmp"
- mv "conftest.tmp" "conftest.in"
- cp "conftest.in" "conftest.nl"
- $as_echo 'FGREP' >> "conftest.nl"
- "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break
- diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
- as_fn_arith $ac_count + 1 && ac_count=$as_val
- if test $ac_count -gt ${ac_path_FGREP_max-0}; then
- # Best one so far, save it but keep looking for a better one
- ac_cv_path_FGREP="$ac_path_FGREP"
- ac_path_FGREP_max=$ac_count
- fi
- # 10*(2^10) chars as input seems more than enough
- test $ac_count -gt 10 && break
- done
- rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
-esac
-
- $ac_path_FGREP_found && break 3
- done
- done
- done
-IFS=$as_save_IFS
- if test -z "$ac_cv_path_FGREP"; then
- as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
- fi
-else
- ac_cv_path_FGREP=$FGREP
-fi
-
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5
-$as_echo "$ac_cv_path_FGREP" >&6; }
- FGREP="$ac_cv_path_FGREP"
-
-
-test -z "$GREP" && GREP=grep
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# Check whether --with-gnu-ld was given.
-if test "${with_gnu_ld+set}" = set; then :
- withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes
-else
- with_gnu_ld=no
-fi
-
-ac_prog=ld
-if test "$GCC" = yes; then
- # Check if gcc -print-prog-name=ld gives a path.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5
-$as_echo_n "checking for ld used by $CC... " >&6; }
- case $host in
- *-*-mingw*)
- # gcc leaves a trailing carriage return which upsets mingw
- ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
- *)
- ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
- esac
- case $ac_prog in
- # Accept absolute paths.
- [\\/]* | ?:[\\/]*)
- re_direlt='/[^/][^/]*/\.\./'
- # Canonicalize the pathname of ld
- ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
- while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
- ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
- done
- test -z "$LD" && LD="$ac_prog"
- ;;
- "")
- # If it fails, then pretend we aren't using GCC.
- ac_prog=ld
- ;;
- *)
- # If it is relative, then search for the first ld in PATH.
- with_gnu_ld=unknown
- ;;
- esac
-elif test "$with_gnu_ld" = yes; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5
-$as_echo_n "checking for GNU ld... " >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
-$as_echo_n "checking for non-GNU ld... " >&6; }
-fi
-if ${lt_cv_path_LD+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -z "$LD"; then
- lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
- for ac_dir in $PATH; do
- IFS="$lt_save_ifs"
- test -z "$ac_dir" && ac_dir=.
- if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
- lt_cv_path_LD="$ac_dir/$ac_prog"
- # Check to see if the program is GNU ld. I'd rather use --version,
- # but apparently some variants of GNU ld only accept -v.
- # Break only if it was the GNU/non-GNU ld that we prefer.
- case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
- *GNU* | *'with BFD'*)
- test "$with_gnu_ld" != no && break
- ;;
- *)
- test "$with_gnu_ld" != yes && break
- ;;
- esac
- fi
- done
- IFS="$lt_save_ifs"
-else
- lt_cv_path_LD="$LD" # Let the user override the test with a path.
-fi
-fi
-
-LD="$lt_cv_path_LD"
-if test -n "$LD"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LD" >&5
-$as_echo "$LD" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
-$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
-if ${lt_cv_prog_gnu_ld+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- # I'd rather use --version here, but apparently some GNU lds only accept -v.
-case `$LD -v 2>&1 </dev/null` in
-*GNU* | *'with BFD'*)
- lt_cv_prog_gnu_ld=yes
- ;;
-*)
- lt_cv_prog_gnu_ld=no
- ;;
-esac
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_gnu_ld" >&5
-$as_echo "$lt_cv_prog_gnu_ld" >&6; }
-with_gnu_ld=$lt_cv_prog_gnu_ld
-
-
-
-
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5
-$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
-if ${lt_cv_path_NM+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$NM"; then
- # Let the user override the test.
- lt_cv_path_NM="$NM"
-else
- lt_nm_to_check="${ac_tool_prefix}nm"
- if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
- lt_nm_to_check="$lt_nm_to_check nm"
- fi
- for lt_tmp_nm in $lt_nm_to_check; do
- lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
- for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
- IFS="$lt_save_ifs"
- test -z "$ac_dir" && ac_dir=.
- tmp_nm="$ac_dir/$lt_tmp_nm"
- if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
- # Check to see if the nm accepts a BSD-compat flag.
- # Adding the `sed 1q' prevents false positives on HP-UX, which says:
- # nm: unknown option "B" ignored
- # Tru64's nm complains that /dev/null is an invalid object file
- case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
- */dev/null* | *'Invalid file or object type'*)
- lt_cv_path_NM="$tmp_nm -B"
- break
- ;;
- *)
- case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
- */dev/null*)
- lt_cv_path_NM="$tmp_nm -p"
- break
- ;;
- *)
- lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
- continue # so that we can try to find one that supports BSD flags
- ;;
- esac
- ;;
- esac
- fi
- done
- IFS="$lt_save_ifs"
- done
- : ${lt_cv_path_NM=no}
-fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5
-$as_echo "$lt_cv_path_NM" >&6; }
-if test "$lt_cv_path_NM" != "no"; then
- NM="$lt_cv_path_NM"
-else
- # Didn't find any BSD compatible name lister, look for dumpbin.
- if test -n "$DUMPBIN"; then :
- # Let the user override the test.
- else
- if test -n "$ac_tool_prefix"; then
- for ac_prog in dumpbin "link -dump"
- do
- # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
-set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_DUMPBIN+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$DUMPBIN"; then
- ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-DUMPBIN=$ac_cv_prog_DUMPBIN
-if test -n "$DUMPBIN"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5
-$as_echo "$DUMPBIN" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$DUMPBIN" && break
- done
-fi
-if test -z "$DUMPBIN"; then
- ac_ct_DUMPBIN=$DUMPBIN
- for ac_prog in dumpbin "link -dump"
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_DUMPBIN"; then
- ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN
-if test -n "$ac_ct_DUMPBIN"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5
-$as_echo "$ac_ct_DUMPBIN" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$ac_ct_DUMPBIN" && break
-done
-
- if test "x$ac_ct_DUMPBIN" = x; then
- DUMPBIN=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- DUMPBIN=$ac_ct_DUMPBIN
- fi
-fi
-
- case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
- *COFF*)
- DUMPBIN="$DUMPBIN -symbols"
- ;;
- *)
- DUMPBIN=:
- ;;
- esac
- fi
-
- if test "$DUMPBIN" != ":"; then
- NM="$DUMPBIN"
- fi
-fi
-test -z "$NM" && NM=nm
-
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5
-$as_echo_n "checking the name lister ($NM) interface... " >&6; }
-if ${lt_cv_nm_interface+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_nm_interface="BSD nm"
- echo "int some_variable = 0;" > conftest.$ac_ext
- (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5)
- (eval "$ac_compile" 2>conftest.err)
- cat conftest.err >&5
- (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
- (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
- cat conftest.err >&5
- (eval echo "\"\$as_me:$LINENO: output\"" >&5)
- cat conftest.out >&5
- if $GREP 'External.*some_variable' conftest.out > /dev/null; then
- lt_cv_nm_interface="MS dumpbin"
- fi
- rm -f conftest*
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5
-$as_echo "$lt_cv_nm_interface" >&6; }
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5
-$as_echo_n "checking whether ln -s works... " >&6; }
-LN_S=$as_ln_s
-if test "$LN_S" = "ln -s"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5
-$as_echo "no, using $LN_S" >&6; }
-fi
-
-# find the maximum length of command line arguments
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5
-$as_echo_n "checking the maximum length of command line arguments... " >&6; }
-if ${lt_cv_sys_max_cmd_len+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- i=0
- teststring="ABCD"
-
- case $build_os in
- msdosdjgpp*)
- # On DJGPP, this test can blow up pretty badly due to problems in libc
- # (any single argument exceeding 2000 bytes causes a buffer overrun
- # during glob expansion). Even if it were fixed, the result of this
- # check would be larger than it should be.
- lt_cv_sys_max_cmd_len=12288; # 12K is about right
- ;;
-
- gnu*)
- # Under GNU Hurd, this test is not required because there is
- # no limit to the length of command line arguments.
- # Libtool will interpret -1 as no limit whatsoever
- lt_cv_sys_max_cmd_len=-1;
- ;;
-
- cygwin* | mingw* | cegcc*)
- # On Win9x/ME, this test blows up -- it succeeds, but takes
- # about 5 minutes as the teststring grows exponentially.
- # Worse, since 9x/ME are not pre-emptively multitasking,
- # you end up with a "frozen" computer, even though with patience
- # the test eventually succeeds (with a max line length of 256k).
- # Instead, let's just punt: use the minimum linelength reported by
- # all of the supported platforms: 8192 (on NT/2K/XP).
- lt_cv_sys_max_cmd_len=8192;
- ;;
-
- mint*)
- # On MiNT this can take a long time and run out of memory.
- lt_cv_sys_max_cmd_len=8192;
- ;;
-
- amigaos*)
- # On AmigaOS with pdksh, this test takes hours, literally.
- # So we just punt and use a minimum line length of 8192.
- lt_cv_sys_max_cmd_len=8192;
- ;;
-
- netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
- # This has been around since 386BSD, at least. Likely further.
- if test -x /sbin/sysctl; then
- lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
- elif test -x /usr/sbin/sysctl; then
- lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
- else
- lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs
- fi
- # And add a safety zone
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
- ;;
-
- interix*)
- # We know the value 262144 and hardcode it with a safety zone (like BSD)
- lt_cv_sys_max_cmd_len=196608
- ;;
-
- os2*)
- # The test takes a long time on OS/2.
- lt_cv_sys_max_cmd_len=8192
- ;;
-
- osf*)
- # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
- # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
- # nice to cause kernel panics so lets avoid the loop below.
- # First set a reasonable default.
- lt_cv_sys_max_cmd_len=16384
- #
- if test -x /sbin/sysconfig; then
- case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
- *1*) lt_cv_sys_max_cmd_len=-1 ;;
- esac
- fi
- ;;
- sco3.2v5*)
- lt_cv_sys_max_cmd_len=102400
- ;;
- sysv5* | sco5v6* | sysv4.2uw2*)
- kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
- if test -n "$kargmax"; then
- lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'`
- else
- lt_cv_sys_max_cmd_len=32768
- fi
- ;;
- *)
- lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
- if test -n "$lt_cv_sys_max_cmd_len"; then
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
- else
- # Make teststring a little bigger before we do anything with it.
- # a 1K string should be a reasonable start.
- for i in 1 2 3 4 5 6 7 8 ; do
- teststring=$teststring$teststring
- done
- SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
- # If test is not a shell built-in, we'll probably end up computing a
- # maximum length that is only half of the actual maximum length, but
- # we can't tell.
- while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
- = "X$teststring$teststring"; } >/dev/null 2>&1 &&
- test $i != 17 # 1/2 MB should be enough
- do
- i=`expr $i + 1`
- teststring=$teststring$teststring
- done
- # Only check the string length outside the loop.
- lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1`
- teststring=
- # Add a significant safety factor because C++ compilers can tack on
- # massive amounts of additional arguments before passing them to the
- # linker. It appears as though 1/2 is a usable value.
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
- fi
- ;;
- esac
-
-fi
-
-if test -n $lt_cv_sys_max_cmd_len ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5
-$as_echo "$lt_cv_sys_max_cmd_len" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5
-$as_echo "none" >&6; }
-fi
-max_cmd_len=$lt_cv_sys_max_cmd_len
-
-
-
-
-
-
-: ${CP="cp -f"}
-: ${MV="mv -f"}
-: ${RM="rm -f"}
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands some XSI constructs" >&5
-$as_echo_n "checking whether the shell understands some XSI constructs... " >&6; }
-# Try some XSI features
-xsi_shell=no
-( _lt_dummy="a/b/c"
- test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \
- = c,a/b,b/c, \
- && eval 'test $(( 1 + 1 )) -eq 2 \
- && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \
- && xsi_shell=yes
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5
-$as_echo "$xsi_shell" >&6; }
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands \"+=\"" >&5
-$as_echo_n "checking whether the shell understands \"+=\"... " >&6; }
-lt_shell_append=no
-( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \
- >/dev/null 2>&1 \
- && lt_shell_append=yes
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5
-$as_echo "$lt_shell_append" >&6; }
-
-
-if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
- lt_unset=unset
-else
- lt_unset=false
-fi
-
-
-
-
-
-# test EBCDIC or ASCII
-case `echo X|tr X '\101'` in
- A) # ASCII based system
- # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
- lt_SP2NL='tr \040 \012'
- lt_NL2SP='tr \015\012 \040\040'
- ;;
- *) # EBCDIC based system
- lt_SP2NL='tr \100 \n'
- lt_NL2SP='tr \r\n \100\100'
- ;;
-esac
-
-
-
-
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5
-$as_echo_n "checking how to convert $build file names to $host format... " >&6; }
-if ${lt_cv_to_host_file_cmd+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- case $host in
- *-*-mingw* )
- case $build in
- *-*-mingw* ) # actually msys
- lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32
- ;;
- *-*-cygwin* )
- lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32
- ;;
- * ) # otherwise, assume *nix
- lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32
- ;;
- esac
- ;;
- *-*-cygwin* )
- case $build in
- *-*-mingw* ) # actually msys
- lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin
- ;;
- *-*-cygwin* )
- lt_cv_to_host_file_cmd=func_convert_file_noop
- ;;
- * ) # otherwise, assume *nix
- lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin
- ;;
- esac
- ;;
- * ) # unhandled hosts (and "normal" native builds)
- lt_cv_to_host_file_cmd=func_convert_file_noop
- ;;
-esac
-
-fi
-
-to_host_file_cmd=$lt_cv_to_host_file_cmd
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5
-$as_echo "$lt_cv_to_host_file_cmd" >&6; }
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5
-$as_echo_n "checking how to convert $build file names to toolchain format... " >&6; }
-if ${lt_cv_to_tool_file_cmd+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- #assume ordinary cross tools, or native build.
-lt_cv_to_tool_file_cmd=func_convert_file_noop
-case $host in
- *-*-mingw* )
- case $build in
- *-*-mingw* ) # actually msys
- lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32
- ;;
- esac
- ;;
-esac
-
-fi
-
-to_tool_file_cmd=$lt_cv_to_tool_file_cmd
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5
-$as_echo "$lt_cv_to_tool_file_cmd" >&6; }
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5
-$as_echo_n "checking for $LD option to reload object files... " >&6; }
-if ${lt_cv_ld_reload_flag+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_ld_reload_flag='-r'
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5
-$as_echo "$lt_cv_ld_reload_flag" >&6; }
-reload_flag=$lt_cv_ld_reload_flag
-case $reload_flag in
-"" | " "*) ;;
-*) reload_flag=" $reload_flag" ;;
-esac
-reload_cmds='$LD$reload_flag -o $output$reload_objs'
-case $host_os in
- cygwin* | mingw* | pw32* | cegcc*)
- if test "$GCC" != yes; then
- reload_cmds=false
- fi
- ;;
- darwin*)
- if test "$GCC" = yes; then
- reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs'
- else
- reload_cmds='$LD$reload_flag -o $output$reload_objs'
- fi
- ;;
-esac
-
-
-
-
-
-
-
-
-
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args.
-set dummy ${ac_tool_prefix}objdump; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_OBJDUMP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$OBJDUMP"; then
- ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-OBJDUMP=$ac_cv_prog_OBJDUMP
-if test -n "$OBJDUMP"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5
-$as_echo "$OBJDUMP" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_OBJDUMP"; then
- ac_ct_OBJDUMP=$OBJDUMP
- # Extract the first word of "objdump", so it can be a program name with args.
-set dummy objdump; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_OBJDUMP"; then
- ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_OBJDUMP="objdump"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP
-if test -n "$ac_ct_OBJDUMP"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5
-$as_echo "$ac_ct_OBJDUMP" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_OBJDUMP" = x; then
- OBJDUMP="false"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- OBJDUMP=$ac_ct_OBJDUMP
- fi
-else
- OBJDUMP="$ac_cv_prog_OBJDUMP"
-fi
-
-test -z "$OBJDUMP" && OBJDUMP=objdump
-
-
-
-
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5
-$as_echo_n "checking how to recognize dependent libraries... " >&6; }
-if ${lt_cv_deplibs_check_method+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_file_magic_cmd='$MAGIC_CMD'
-lt_cv_file_magic_test_file=
-lt_cv_deplibs_check_method='unknown'
-# Need to set the preceding variable on all platforms that support
-# interlibrary dependencies.
-# 'none' -- dependencies not supported.
-# `unknown' -- same as none, but documents that we really don't know.
-# 'pass_all' -- all dependencies passed with no checks.
-# 'test_compile' -- check by making test program.
-# 'file_magic [[regex]]' -- check by looking for files in library path
-# which responds to the $file_magic_cmd with a given extended regex.
-# If you have `file' or equivalent on your system and you're not sure
-# whether `pass_all' will *always* work, you probably want this one.
-
-case $host_os in
-aix[4-9]*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-beos*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-bsdi[45]*)
- lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'
- lt_cv_file_magic_cmd='/usr/bin/file -L'
- lt_cv_file_magic_test_file=/shlib/libc.so
- ;;
-
-cygwin*)
- # func_win32_libid is a shell function defined in ltmain.sh
- lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
- lt_cv_file_magic_cmd='func_win32_libid'
- ;;
-
-mingw* | pw32*)
- # Base MSYS/MinGW do not provide the 'file' command needed by
- # func_win32_libid shell function, so use a weaker test based on 'objdump',
- # unless we find 'file', for example because we are cross-compiling.
- # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin.
- if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then
- lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
- lt_cv_file_magic_cmd='func_win32_libid'
- else
- # Keep this pattern in sync with the one in func_win32_libid.
- lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)'
- lt_cv_file_magic_cmd='$OBJDUMP -f'
- fi
- ;;
-
-cegcc*)
- # use the weaker test based on 'objdump'. See mingw*.
- lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?'
- lt_cv_file_magic_cmd='$OBJDUMP -f'
- ;;
-
-darwin* | rhapsody*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-freebsd* | dragonfly*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
- case $host_cpu in
- i*86 )
- # Not sure whether the presence of OpenBSD here was a mistake.
- # Let's accept both of them until this is cleared up.
- lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library'
- lt_cv_file_magic_cmd=/usr/bin/file
- lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
- ;;
- esac
- else
- lt_cv_deplibs_check_method=pass_all
- fi
- ;;
-
-gnu*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-haiku*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-hpux10.20* | hpux11*)
- lt_cv_file_magic_cmd=/usr/bin/file
- case $host_cpu in
- ia64*)
- lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64'
- lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
- ;;
- hppa*64*)
- lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'
- lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
- ;;
- *)
- lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library'
- lt_cv_file_magic_test_file=/usr/lib/libc.sl
- ;;
- esac
- ;;
-
-interix[3-9]*)
- # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
- lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$'
- ;;
-
-irix5* | irix6* | nonstopux*)
- case $LD in
- *-32|*"-32 ") libmagic=32-bit;;
- *-n32|*"-n32 ") libmagic=N32;;
- *-64|*"-64 ") libmagic=64-bit;;
- *) libmagic=never-match;;
- esac
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-# This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-netbsd*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
- lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
- else
- lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$'
- fi
- ;;
-
-newos6*)
- lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)'
- lt_cv_file_magic_cmd=/usr/bin/file
- lt_cv_file_magic_test_file=/usr/lib/libnls.so
- ;;
-
-*nto* | *qnx*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-openbsd*)
- if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$'
- else
- lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
- fi
- ;;
-
-osf3* | osf4* | osf5*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-rdos*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-solaris*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-sysv4 | sysv4.3*)
- case $host_vendor in
- motorola)
- lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'
- lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
- ;;
- ncr)
- lt_cv_deplibs_check_method=pass_all
- ;;
- sequent)
- lt_cv_file_magic_cmd='/bin/file'
- lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )'
- ;;
- sni)
- lt_cv_file_magic_cmd='/bin/file'
- lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib"
- lt_cv_file_magic_test_file=/lib/libc.so
- ;;
- siemens)
- lt_cv_deplibs_check_method=pass_all
- ;;
- pc)
- lt_cv_deplibs_check_method=pass_all
- ;;
- esac
- ;;
-
-tpf*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-esac
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5
-$as_echo "$lt_cv_deplibs_check_method" >&6; }
-
-file_magic_glob=
-want_nocaseglob=no
-if test "$build" = "$host"; then
- case $host_os in
- mingw* | pw32*)
- if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then
- want_nocaseglob=yes
- else
- file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"`
- fi
- ;;
- esac
-fi
-
-file_magic_cmd=$lt_cv_file_magic_cmd
-deplibs_check_method=$lt_cv_deplibs_check_method
-test -z "$deplibs_check_method" && deplibs_check_method=unknown
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args.
-set dummy ${ac_tool_prefix}dlltool; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_DLLTOOL+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$DLLTOOL"; then
- ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-DLLTOOL=$ac_cv_prog_DLLTOOL
-if test -n "$DLLTOOL"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5
-$as_echo "$DLLTOOL" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_DLLTOOL"; then
- ac_ct_DLLTOOL=$DLLTOOL
- # Extract the first word of "dlltool", so it can be a program name with args.
-set dummy dlltool; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_DLLTOOL"; then
- ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_DLLTOOL="dlltool"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL
-if test -n "$ac_ct_DLLTOOL"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5
-$as_echo "$ac_ct_DLLTOOL" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_DLLTOOL" = x; then
- DLLTOOL="false"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- DLLTOOL=$ac_ct_DLLTOOL
- fi
-else
- DLLTOOL="$ac_cv_prog_DLLTOOL"
-fi
-
-test -z "$DLLTOOL" && DLLTOOL=dlltool
-
-
-
-
-
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5
-$as_echo_n "checking how to associate runtime and link libraries... " >&6; }
-if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_sharedlib_from_linklib_cmd='unknown'
-
-case $host_os in
-cygwin* | mingw* | pw32* | cegcc*)
- # two different shell functions defined in ltmain.sh
- # decide which to use based on capabilities of $DLLTOOL
- case `$DLLTOOL --help 2>&1` in
- *--identify-strict*)
- lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib
- ;;
- *)
- lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback
- ;;
- esac
- ;;
-*)
- # fallback: assume linklib IS sharedlib
- lt_cv_sharedlib_from_linklib_cmd="$ECHO"
- ;;
-esac
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5
-$as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; }
-sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd
-test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO
-
-
-
-
-
-
-
-if test -n "$ac_tool_prefix"; then
- for ac_prog in ar
- do
- # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
-set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_AR+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$AR"; then
- ac_cv_prog_AR="$AR" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_AR="$ac_tool_prefix$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-AR=$ac_cv_prog_AR
-if test -n "$AR"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
-$as_echo "$AR" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$AR" && break
- done
-fi
-if test -z "$AR"; then
- ac_ct_AR=$AR
- for ac_prog in ar
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_AR+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_AR"; then
- ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_AR="$ac_prog"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_AR=$ac_cv_prog_ac_ct_AR
-if test -n "$ac_ct_AR"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
-$as_echo "$ac_ct_AR" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- test -n "$ac_ct_AR" && break
-done
-
- if test "x$ac_ct_AR" = x; then
- AR="false"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- AR=$ac_ct_AR
- fi
-fi
-
-: ${AR=ar}
-: ${AR_FLAGS=cru}
-
-
-
-
-
-
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5
-$as_echo_n "checking for archiver @FILE support... " >&6; }
-if ${lt_cv_ar_at_file+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_ar_at_file=no
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- echo conftest.$ac_objext > conftest.lst
- lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5
- (eval $lt_ar_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
- if test "$ac_status" -eq 0; then
- # Ensure the archiver fails upon bogus file names.
- rm -f conftest.$ac_objext libconftest.a
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5
- (eval $lt_ar_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
- if test "$ac_status" -ne 0; then
- lt_cv_ar_at_file=@
- fi
- fi
- rm -f conftest.* libconftest.a
-
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5
-$as_echo "$lt_cv_ar_at_file" >&6; }
-
-if test "x$lt_cv_ar_at_file" = xno; then
- archiver_list_spec=
-else
- archiver_list_spec=$lt_cv_ar_at_file
-fi
-
-
-
-
-
-
-
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
-set dummy ${ac_tool_prefix}strip; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_STRIP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$STRIP"; then
- ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_STRIP="${ac_tool_prefix}strip"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-STRIP=$ac_cv_prog_STRIP
-if test -n "$STRIP"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
-$as_echo "$STRIP" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_STRIP"; then
- ac_ct_STRIP=$STRIP
- # Extract the first word of "strip", so it can be a program name with args.
-set dummy strip; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_STRIP"; then
- ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_STRIP="strip"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
-if test -n "$ac_ct_STRIP"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
-$as_echo "$ac_ct_STRIP" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_STRIP" = x; then
- STRIP=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- STRIP=$ac_ct_STRIP
- fi
-else
- STRIP="$ac_cv_prog_STRIP"
-fi
-
-test -z "$STRIP" && STRIP=:
-
-
-
-
-
-
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
-set dummy ${ac_tool_prefix}ranlib; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_RANLIB+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$RANLIB"; then
- ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-RANLIB=$ac_cv_prog_RANLIB
-if test -n "$RANLIB"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5
-$as_echo "$RANLIB" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_RANLIB"; then
- ac_ct_RANLIB=$RANLIB
- # Extract the first word of "ranlib", so it can be a program name with args.
-set dummy ranlib; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_RANLIB"; then
- ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_RANLIB="ranlib"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
-if test -n "$ac_ct_RANLIB"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5
-$as_echo "$ac_ct_RANLIB" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_RANLIB" = x; then
- RANLIB=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- RANLIB=$ac_ct_RANLIB
- fi
-else
- RANLIB="$ac_cv_prog_RANLIB"
-fi
-
-test -z "$RANLIB" && RANLIB=:
-
-
-
-
-
-
-# Determine commands to create old-style static archives.
-old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
-old_postinstall_cmds='chmod 644 $oldlib'
-old_postuninstall_cmds=
-
-if test -n "$RANLIB"; then
- case $host_os in
- openbsd*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
- ;;
- *)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
- ;;
- esac
- old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
-fi
-
-case $host_os in
- darwin*)
- lock_old_archive_extraction=yes ;;
- *)
- lock_old_archive_extraction=no ;;
-esac
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# If no C compiler was specified, use CC.
-LTCC=${LTCC-"$CC"}
-
-# If no C compiler flags were specified, use CFLAGS.
-LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
-
-# Allow CC to be a program name with arguments.
-compiler=$CC
-
-
-# Check for command to grab the raw symbol name followed by C symbol from nm.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5
-$as_echo_n "checking command to parse $NM output from $compiler object... " >&6; }
-if ${lt_cv_sys_global_symbol_pipe+:} false; then :
- $as_echo_n "(cached) " >&6
-else
-
-# These are sane defaults that work on at least a few old systems.
-# [They come from Ultrix. What could be older than Ultrix?!! ;)]
-
-# Character class describing NM global symbol codes.
-symcode='[BCDEGRST]'
-
-# Regexp to match symbols that can be accessed directly from C.
-sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
-
-# Define system-specific variables.
-case $host_os in
-aix*)
- symcode='[BCDT]'
- ;;
-cygwin* | mingw* | pw32* | cegcc*)
- symcode='[ABCDGISTW]'
- ;;
-hpux*)
- if test "$host_cpu" = ia64; then
- symcode='[ABCDEGRST]'
- fi
- ;;
-irix* | nonstopux*)
- symcode='[BCDEGRST]'
- ;;
-osf*)
- symcode='[BCDEGQRST]'
- ;;
-solaris*)
- symcode='[BDRT]'
- ;;
-sco3.2v5*)
- symcode='[DT]'
- ;;
-sysv4.2uw2*)
- symcode='[DT]'
- ;;
-sysv5* | sco5v6* | unixware* | OpenUNIX*)
- symcode='[ABDT]'
- ;;
-sysv4)
- symcode='[DFNSTU]'
- ;;
-esac
-
-# If we're using GNU nm, then use its standard symbol codes.
-case `$NM -V 2>&1` in
-*GNU* | *'with BFD'*)
- symcode='[ABCDGIRSTW]' ;;
-esac
-
-# Transform an extracted symbol line into a proper C declaration.
-# Some systems (esp. on ia64) link data and code symbols differently,
-# so use this general approach.
-lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
-
-# Transform an extracted symbol line into symbol name and symbol address
-lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (void *) \&\2},/p'"
-lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ ]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"lib\2\", (void *) \&\2},/p'"
-
-# Handle CRLF in mingw tool chain
-opt_cr=
-case $build_os in
-mingw*)
- opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp
- ;;
-esac
-
-# Try without a prefix underscore, then with it.
-for ac_symprfx in "" "_"; do
-
- # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
- symxfrm="\\1 $ac_symprfx\\2 \\2"
-
- # Write the raw and C identifiers.
- if test "$lt_cv_nm_interface" = "MS dumpbin"; then
- # Fake it for dumpbin and say T for any non-static function
- # and D for any global variable.
- # Also find C++ and __fastcall symbols from MSVC++,
- # which start with @ or ?.
- lt_cv_sys_global_symbol_pipe="$AWK '"\
-" {last_section=section; section=\$ 3};"\
-" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
-" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
-" \$ 0!~/External *\|/{next};"\
-" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
-" {if(hide[section]) next};"\
-" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\
-" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\
-" s[1]~/^[@?]/{print s[1], s[1]; next};"\
-" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\
-" ' prfx=^$ac_symprfx"
- else
- lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
- fi
- lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'"
-
- # Check to see that the pipe works correctly.
- pipe_works=no
-
- rm -f conftest*
- cat > conftest.$ac_ext <<_LT_EOF
-#ifdef __cplusplus
-extern "C" {
-#endif
-char nm_test_var;
-void nm_test_func(void);
-void nm_test_func(void){}
-#ifdef __cplusplus
-}
-#endif
-int main(){nm_test_var='a';nm_test_func();return(0);}
-_LT_EOF
-
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then
- # Now try to grab the symbols.
- nlist=conftest.nm
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5
- (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && test -s "$nlist"; then
- # Try sorting and uniquifying the output.
- if sort "$nlist" | uniq > "$nlist"T; then
- mv -f "$nlist"T "$nlist"
- else
- rm -f "$nlist"T
- fi
-
- # Make sure that we snagged all the symbols we need.
- if $GREP ' nm_test_var$' "$nlist" >/dev/null; then
- if $GREP ' nm_test_func$' "$nlist" >/dev/null; then
- cat <<_LT_EOF > conftest.$ac_ext
-/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
-/* DATA imports from DLLs on WIN32 con't be const, because runtime
- relocations are performed -- see ld's documentation on pseudo-relocs. */
-# define LT_DLSYM_CONST
-#elif defined(__osf__)
-/* This system does not cope well with relocations in const data. */
-# define LT_DLSYM_CONST
-#else
-# define LT_DLSYM_CONST const
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-_LT_EOF
- # Now generate the symbol file.
- eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext'
-
- cat <<_LT_EOF >> conftest.$ac_ext
-
-/* The mapping between symbol names and symbols. */
-LT_DLSYM_CONST struct {
- const char *name;
- void *address;
-}
-lt__PROGRAM__LTX_preloaded_symbols[] =
-{
- { "@PROGRAM@", (void *) 0 },
-_LT_EOF
- $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext
- cat <<\_LT_EOF >> conftest.$ac_ext
- {0, (void *) 0}
-};
-
-/* This works around a problem in FreeBSD linker */
-#ifdef FREEBSD_WORKAROUND
-static const void *lt_preloaded_setup() {
- return lt__PROGRAM__LTX_preloaded_symbols;
-}
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-_LT_EOF
- # Now try linking the two files.
- mv conftest.$ac_objext conftstm.$ac_objext
- lt_globsym_save_LIBS=$LIBS
- lt_globsym_save_CFLAGS=$CFLAGS
- LIBS="conftstm.$ac_objext"
- CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag"
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && test -s conftest${ac_exeext}; then
- pipe_works=yes
- fi
- LIBS=$lt_globsym_save_LIBS
- CFLAGS=$lt_globsym_save_CFLAGS
- else
- echo "cannot find nm_test_func in $nlist" >&5
- fi
- else
- echo "cannot find nm_test_var in $nlist" >&5
- fi
- else
- echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5
- fi
- else
- echo "$progname: failed program was:" >&5
- cat conftest.$ac_ext >&5
- fi
- rm -rf conftest* conftst*
-
- # Do not use the global_symbol_pipe unless it works.
- if test "$pipe_works" = yes; then
- break
- else
- lt_cv_sys_global_symbol_pipe=
- fi
-done
-
-fi
-
-if test -z "$lt_cv_sys_global_symbol_pipe"; then
- lt_cv_sys_global_symbol_to_cdecl=
-fi
-if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5
-$as_echo "failed" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
-$as_echo "ok" >&6; }
-fi
-
-# Response file support.
-if test "$lt_cv_nm_interface" = "MS dumpbin"; then
- nm_file_list_spec='@'
-elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then
- nm_file_list_spec='@'
-fi
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5
-$as_echo_n "checking for sysroot... " >&6; }
-
-# Check whether --with-sysroot was given.
-if test "${with_sysroot+set}" = set; then :
- withval=$with_sysroot;
-else
- with_sysroot=no
-fi
-
-
-lt_sysroot=
-case ${with_sysroot} in #(
- yes)
- if test "$GCC" = yes; then
- lt_sysroot=`$CC --print-sysroot 2>/dev/null`
- fi
- ;; #(
- /*)
- lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
- ;; #(
- no|'')
- ;; #(
- *)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5
-$as_echo "${with_sysroot}" >&6; }
- as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5
- ;;
-esac
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5
-$as_echo "${lt_sysroot:-no}" >&6; }
-
-
-
-
-
-
-# Check whether --enable-libtool-lock was given.
-if test "${enable_libtool_lock+set}" = set; then :
- enableval=$enable_libtool_lock;
-fi
-
-test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
-
-# Some flags need to be propagated to the compiler or linker for good
-# libtool support.
-case $host in
-ia64-*-hpux*)
- # Find out which ABI we are using.
- echo 'int i;' > conftest.$ac_ext
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then
- case `/usr/bin/file conftest.$ac_objext` in
- *ELF-32*)
- HPUX_IA64_MODE="32"
- ;;
- *ELF-64*)
- HPUX_IA64_MODE="64"
- ;;
- esac
- fi
- rm -rf conftest*
- ;;
-*-*-irix6*)
- # Find out which ABI we are using.
- echo '#line '$LINENO' "configure"' > conftest.$ac_ext
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then
- if test "$lt_cv_prog_gnu_ld" = yes; then
- case `/usr/bin/file conftest.$ac_objext` in
- *32-bit*)
- LD="${LD-ld} -melf32bsmip"
- ;;
- *N32*)
- LD="${LD-ld} -melf32bmipn32"
- ;;
- *64-bit*)
- LD="${LD-ld} -melf64bmip"
- ;;
- esac
- else
- case `/usr/bin/file conftest.$ac_objext` in
- *32-bit*)
- LD="${LD-ld} -32"
- ;;
- *N32*)
- LD="${LD-ld} -n32"
- ;;
- *64-bit*)
- LD="${LD-ld} -64"
- ;;
- esac
- fi
- fi
- rm -rf conftest*
- ;;
-
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
-s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
- # Find out which ABI we are using.
- echo 'int i;' > conftest.$ac_ext
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then
- case `/usr/bin/file conftest.o` in
- *32-bit*)
- case $host in
- x86_64-*kfreebsd*-gnu)
- LD="${LD-ld} -m elf_i386_fbsd"
- ;;
- x86_64-*linux*)
- LD="${LD-ld} -m elf_i386"
- ;;
- ppc64-*linux*|powerpc64-*linux*)
- LD="${LD-ld} -m elf32ppclinux"
- ;;
- s390x-*linux*)
- LD="${LD-ld} -m elf_s390"
- ;;
- sparc64-*linux*)
- LD="${LD-ld} -m elf32_sparc"
- ;;
- esac
- ;;
- *64-bit*)
- case $host in
- x86_64-*kfreebsd*-gnu)
- LD="${LD-ld} -m elf_x86_64_fbsd"
- ;;
- x86_64-*linux*)
- LD="${LD-ld} -m elf_x86_64"
- ;;
- ppc*-*linux*|powerpc*-*linux*)
- LD="${LD-ld} -m elf64ppc"
- ;;
- s390*-*linux*|s390*-*tpf*)
- LD="${LD-ld} -m elf64_s390"
- ;;
- sparc*-*linux*)
- LD="${LD-ld} -m elf64_sparc"
- ;;
- esac
- ;;
- esac
- fi
- rm -rf conftest*
- ;;
-
-*-*-sco3.2v5*)
- # On SCO OpenServer 5, we need -belf to get full-featured binaries.
- SAVE_CFLAGS="$CFLAGS"
- CFLAGS="$CFLAGS -belf"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5
-$as_echo_n "checking whether the C compiler needs -belf... " >&6; }
-if ${lt_cv_cc_needs_belf+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- lt_cv_cc_needs_belf=yes
-else
- lt_cv_cc_needs_belf=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5
-$as_echo "$lt_cv_cc_needs_belf" >&6; }
- if test x"$lt_cv_cc_needs_belf" != x"yes"; then
- # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
- CFLAGS="$SAVE_CFLAGS"
- fi
- ;;
-*-*solaris*)
- # Find out which ABI we are using.
- echo 'int i;' > conftest.$ac_ext
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then
- case `/usr/bin/file conftest.o` in
- *64-bit*)
- case $lt_cv_prog_gnu_ld in
- yes*)
- case $host in
- i?86-*-solaris*)
- LD="${LD-ld} -m elf_x86_64"
- ;;
- sparc*-*-solaris*)
- LD="${LD-ld} -m elf64_sparc"
- ;;
- esac
- # GNU ld 2.21 introduced _sol2 emulations. Use them if available.
- if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
- LD="${LD-ld}_sol2"
- fi
- ;;
- *)
- if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
- LD="${LD-ld} -64"
- fi
- ;;
- esac
- ;;
- esac
- fi
- rm -rf conftest*
- ;;
-esac
-
-need_locks="$enable_libtool_lock"
-
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args.
-set dummy ${ac_tool_prefix}mt; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_MANIFEST_TOOL+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$MANIFEST_TOOL"; then
- ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL
-if test -n "$MANIFEST_TOOL"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5
-$as_echo "$MANIFEST_TOOL" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_MANIFEST_TOOL"; then
- ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL
- # Extract the first word of "mt", so it can be a program name with args.
-set dummy mt; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_MANIFEST_TOOL"; then
- ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_MANIFEST_TOOL="mt"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL
-if test -n "$ac_ct_MANIFEST_TOOL"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5
-$as_echo "$ac_ct_MANIFEST_TOOL" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_MANIFEST_TOOL" = x; then
- MANIFEST_TOOL=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL
- fi
-else
- MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL"
-fi
-
-test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5
-$as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; }
-if ${lt_cv_path_mainfest_tool+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_path_mainfest_tool=no
- echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5
- $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out
- cat conftest.err >&5
- if $GREP 'Manifest Tool' conftest.out > /dev/null; then
- lt_cv_path_mainfest_tool=yes
- fi
- rm -f conftest*
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5
-$as_echo "$lt_cv_path_mainfest_tool" >&6; }
-if test "x$lt_cv_path_mainfest_tool" != xyes; then
- MANIFEST_TOOL=:
-fi
-
-
-
-
-
-
- case $host_os in
- rhapsody* | darwin*)
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args.
-set dummy ${ac_tool_prefix}dsymutil; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_DSYMUTIL+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$DSYMUTIL"; then
- ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-DSYMUTIL=$ac_cv_prog_DSYMUTIL
-if test -n "$DSYMUTIL"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5
-$as_echo "$DSYMUTIL" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_DSYMUTIL"; then
- ac_ct_DSYMUTIL=$DSYMUTIL
- # Extract the first word of "dsymutil", so it can be a program name with args.
-set dummy dsymutil; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_DSYMUTIL"; then
- ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_DSYMUTIL="dsymutil"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL
-if test -n "$ac_ct_DSYMUTIL"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5
-$as_echo "$ac_ct_DSYMUTIL" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_DSYMUTIL" = x; then
- DSYMUTIL=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- DSYMUTIL=$ac_ct_DSYMUTIL
- fi
-else
- DSYMUTIL="$ac_cv_prog_DSYMUTIL"
-fi
-
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args.
-set dummy ${ac_tool_prefix}nmedit; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_NMEDIT+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$NMEDIT"; then
- ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-NMEDIT=$ac_cv_prog_NMEDIT
-if test -n "$NMEDIT"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5
-$as_echo "$NMEDIT" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_NMEDIT"; then
- ac_ct_NMEDIT=$NMEDIT
- # Extract the first word of "nmedit", so it can be a program name with args.
-set dummy nmedit; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_NMEDIT"; then
- ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_NMEDIT="nmedit"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT
-if test -n "$ac_ct_NMEDIT"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5
-$as_echo "$ac_ct_NMEDIT" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_NMEDIT" = x; then
- NMEDIT=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- NMEDIT=$ac_ct_NMEDIT
- fi
-else
- NMEDIT="$ac_cv_prog_NMEDIT"
-fi
-
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args.
-set dummy ${ac_tool_prefix}lipo; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_LIPO+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$LIPO"; then
- ac_cv_prog_LIPO="$LIPO" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_LIPO="${ac_tool_prefix}lipo"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-LIPO=$ac_cv_prog_LIPO
-if test -n "$LIPO"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5
-$as_echo "$LIPO" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_LIPO"; then
- ac_ct_LIPO=$LIPO
- # Extract the first word of "lipo", so it can be a program name with args.
-set dummy lipo; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_LIPO+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_LIPO"; then
- ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_LIPO="lipo"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO
-if test -n "$ac_ct_LIPO"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5
-$as_echo "$ac_ct_LIPO" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_LIPO" = x; then
- LIPO=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- LIPO=$ac_ct_LIPO
- fi
-else
- LIPO="$ac_cv_prog_LIPO"
-fi
-
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args.
-set dummy ${ac_tool_prefix}otool; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_OTOOL+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$OTOOL"; then
- ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_OTOOL="${ac_tool_prefix}otool"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-OTOOL=$ac_cv_prog_OTOOL
-if test -n "$OTOOL"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5
-$as_echo "$OTOOL" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_OTOOL"; then
- ac_ct_OTOOL=$OTOOL
- # Extract the first word of "otool", so it can be a program name with args.
-set dummy otool; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_OTOOL+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_OTOOL"; then
- ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_OTOOL="otool"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL
-if test -n "$ac_ct_OTOOL"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5
-$as_echo "$ac_ct_OTOOL" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_OTOOL" = x; then
- OTOOL=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- OTOOL=$ac_ct_OTOOL
- fi
-else
- OTOOL="$ac_cv_prog_OTOOL"
-fi
-
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args.
-set dummy ${ac_tool_prefix}otool64; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_OTOOL64+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$OTOOL64"; then
- ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-OTOOL64=$ac_cv_prog_OTOOL64
-if test -n "$OTOOL64"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5
-$as_echo "$OTOOL64" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_prog_OTOOL64"; then
- ac_ct_OTOOL64=$OTOOL64
- # Extract the first word of "otool64", so it can be a program name with args.
-set dummy otool64; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -n "$ac_ct_OTOOL64"; then
- ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_OTOOL64="otool64"
- $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
- done
-IFS=$as_save_IFS
-
-fi
-fi
-ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64
-if test -n "$ac_ct_OTOOL64"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5
-$as_echo "$ac_ct_OTOOL64" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
- if test "x$ac_ct_OTOOL64" = x; then
- OTOOL64=":"
- else
- case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
- OTOOL64=$ac_ct_OTOOL64
- fi
-else
- OTOOL64="$ac_cv_prog_OTOOL64"
-fi
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5
-$as_echo_n "checking for -single_module linker flag... " >&6; }
-if ${lt_cv_apple_cc_single_mod+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_apple_cc_single_mod=no
- if test -z "${LT_MULTI_MODULE}"; then
- # By default we will add the -single_module flag. You can override
- # by either setting the environment variable LT_MULTI_MODULE
- # non-empty at configure time, or by adding -multi_module to the
- # link flags.
- rm -rf libconftest.dylib*
- echo "int foo(void){return 1;}" > conftest.c
- echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
--dynamiclib -Wl,-single_module conftest.c" >&5
- $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
- -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
- _lt_result=$?
- # If there is a non-empty error log, and "single_module"
- # appears in it, assume the flag caused a linker warning
- if test -s conftest.err && $GREP single_module conftest.err; then
- cat conftest.err >&5
- # Otherwise, if the output was created with a 0 exit code from
- # the compiler, it worked.
- elif test -f libconftest.dylib && test $_lt_result -eq 0; then
- lt_cv_apple_cc_single_mod=yes
- else
- cat conftest.err >&5
- fi
- rm -rf libconftest.dylib*
- rm -f conftest.*
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5
-$as_echo "$lt_cv_apple_cc_single_mod" >&6; }
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
-$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
-if ${lt_cv_ld_exported_symbols_list+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_ld_exported_symbols_list=no
- save_LDFLAGS=$LDFLAGS
- echo "_main" > conftest.sym
- LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- lt_cv_ld_exported_symbols_list=yes
-else
- lt_cv_ld_exported_symbols_list=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- LDFLAGS="$save_LDFLAGS"
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5
-$as_echo "$lt_cv_ld_exported_symbols_list" >&6; }
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
-$as_echo_n "checking for -force_load linker flag... " >&6; }
-if ${lt_cv_ld_force_load+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_ld_force_load=no
- cat > conftest.c << _LT_EOF
-int forced_loaded() { return 2;}
-_LT_EOF
- echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5
- $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5
- echo "$AR cru libconftest.a conftest.o" >&5
- $AR cru libconftest.a conftest.o 2>&5
- echo "$RANLIB libconftest.a" >&5
- $RANLIB libconftest.a 2>&5
- cat > conftest.c << _LT_EOF
-int main() { return 0;}
-_LT_EOF
- echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5
- $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
- _lt_result=$?
- if test -s conftest.err && $GREP force_load conftest.err; then
- cat conftest.err >&5
- elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
- lt_cv_ld_force_load=yes
- else
- cat conftest.err >&5
- fi
- rm -f conftest.err libconftest.a conftest conftest.c
- rm -rf conftest.dSYM
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5
-$as_echo "$lt_cv_ld_force_load" >&6; }
- case $host_os in
- rhapsody* | darwin1.[012])
- _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;;
- darwin1.*)
- _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
- darwin*) # darwin 5.x on
- # if running on 10.5 or later, the deployment target defaults
- # to the OS version, if on x86, and 10.4, the deployment
- # target defaults to 10.4. Don't you love it?
- case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in
- 10.0,*86*-darwin8*|10.0,*-darwin[91]*)
- _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
- 10.[012]*)
- _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
- 10.*)
- _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
- esac
- ;;
- esac
- if test "$lt_cv_apple_cc_single_mod" = "yes"; then
- _lt_dar_single_mod='$single_module'
- fi
- if test "$lt_cv_ld_exported_symbols_list" = "yes"; then
- _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym'
- else
- _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}'
- fi
- if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then
- _lt_dsymutil='~$DSYMUTIL $lib || :'
- else
- _lt_dsymutil=
- fi
- ;;
- esac
-
-# On IRIX 5.3, sys/types and inttypes.h are conflicting.
-for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
- inttypes.h stdint.h unistd.h
-do :
- as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
-ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
-"
-if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
- cat >>confdefs.h <<_ACEOF
-#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
-done
-
-
-for ac_header in dlfcn.h
-do :
- ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default
-"
-if test "x$ac_cv_header_dlfcn_h" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_DLFCN_H 1
-_ACEOF
-
-fi
-
-done
-
-
-
-func_stripname_cnf ()
-{
- case ${2} in
- .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
- *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
- esac
-} # func_stripname_cnf
-
-
-
-
-
-# Set options
-
-
-
- enable_dlopen=no
-
-
- enable_win32_dll=no
-
-
- # Check whether --enable-shared was given.
-if test "${enable_shared+set}" = set; then :
- enableval=$enable_shared; p=${PACKAGE-default}
- case $enableval in
- yes) enable_shared=yes ;;
- no) enable_shared=no ;;
- *)
- enable_shared=no
- # Look at the argument we got. We use all the common list separators.
- lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
- for pkg in $enableval; do
- IFS="$lt_save_ifs"
- if test "X$pkg" = "X$p"; then
- enable_shared=yes
- fi
- done
- IFS="$lt_save_ifs"
- ;;
- esac
-else
- enable_shared=yes
-fi
-
-
-
-
-
-
-
-
-
- # Check whether --enable-static was given.
-if test "${enable_static+set}" = set; then :
- enableval=$enable_static; p=${PACKAGE-default}
- case $enableval in
- yes) enable_static=yes ;;
- no) enable_static=no ;;
- *)
- enable_static=no
- # Look at the argument we got. We use all the common list separators.
- lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
- for pkg in $enableval; do
- IFS="$lt_save_ifs"
- if test "X$pkg" = "X$p"; then
- enable_static=yes
- fi
- done
- IFS="$lt_save_ifs"
- ;;
- esac
-else
- enable_static=yes
-fi
-
-
-
-
-
-
-
-
-
-
-# Check whether --with-pic was given.
-if test "${with_pic+set}" = set; then :
- withval=$with_pic; lt_p=${PACKAGE-default}
- case $withval in
- yes|no) pic_mode=$withval ;;
- *)
- pic_mode=default
- # Look at the argument we got. We use all the common list separators.
- lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
- for lt_pkg in $withval; do
- IFS="$lt_save_ifs"
- if test "X$lt_pkg" = "X$lt_p"; then
- pic_mode=yes
- fi
- done
- IFS="$lt_save_ifs"
- ;;
- esac
-else
- pic_mode=default
-fi
-
-
-test -z "$pic_mode" && pic_mode=default
-
-
-
-
-
-
-
- # Check whether --enable-fast-install was given.
-if test "${enable_fast_install+set}" = set; then :
- enableval=$enable_fast_install; p=${PACKAGE-default}
- case $enableval in
- yes) enable_fast_install=yes ;;
- no) enable_fast_install=no ;;
- *)
- enable_fast_install=no
- # Look at the argument we got. We use all the common list separators.
- lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
- for pkg in $enableval; do
- IFS="$lt_save_ifs"
- if test "X$pkg" = "X$p"; then
- enable_fast_install=yes
- fi
- done
- IFS="$lt_save_ifs"
- ;;
- esac
-else
- enable_fast_install=yes
-fi
-
-
-
-
-
-
-
-
-
-
-
-# This can be used to rebuild libtool when needed
-LIBTOOL_DEPS="$ltmain"
-
-# Always use our own libtool.
-LIBTOOL='$(SHELL) $(top_builddir)/libtool'
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-test -z "$LN_S" && LN_S="ln -s"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-if test -n "${ZSH_VERSION+set}" ; then
- setopt NO_GLOB_SUBST
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5
-$as_echo_n "checking for objdir... " >&6; }
-if ${lt_cv_objdir+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- rm -f .libs 2>/dev/null
-mkdir .libs 2>/dev/null
-if test -d .libs; then
- lt_cv_objdir=.libs
-else
- # MS-DOS does not allow filenames that begin with a dot.
- lt_cv_objdir=_libs
-fi
-rmdir .libs 2>/dev/null
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5
-$as_echo "$lt_cv_objdir" >&6; }
-objdir=$lt_cv_objdir
-
-
-
-
-
-cat >>confdefs.h <<_ACEOF
-#define LT_OBJDIR "$lt_cv_objdir/"
-_ACEOF
-
-
-
-
-case $host_os in
-aix3*)
- # AIX sometimes has problems with the GCC collect2 program. For some
- # reason, if we set the COLLECT_NAMES environment variable, the problems
- # vanish in a puff of smoke.
- if test "X${COLLECT_NAMES+set}" != Xset; then
- COLLECT_NAMES=
- export COLLECT_NAMES
- fi
- ;;
-esac
-
-# Global variables:
-ofile=libtool
-can_build_shared=yes
-
-# All known linkers require a `.a' archive for static linking (except MSVC,
-# which needs '.lib').
-libext=a
-
-with_gnu_ld="$lt_cv_prog_gnu_ld"
-
-old_CC="$CC"
-old_CFLAGS="$CFLAGS"
-
-# Set sane defaults for various variables
-test -z "$CC" && CC=cc
-test -z "$LTCC" && LTCC=$CC
-test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
-test -z "$LD" && LD=ld
-test -z "$ac_objext" && ac_objext=o
-
-for cc_temp in $compiler""; do
- case $cc_temp in
- compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
- distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
- \-*) ;;
- *) break;;
- esac
-done
-cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
-
-
-# Only perform the check for file, if the check method requires it
-test -z "$MAGIC_CMD" && MAGIC_CMD=file
-case $deplibs_check_method in
-file_magic*)
- if test "$file_magic_cmd" = '$MAGIC_CMD'; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5
-$as_echo_n "checking for ${ac_tool_prefix}file... " >&6; }
-if ${lt_cv_path_MAGIC_CMD+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- case $MAGIC_CMD in
-[\\/*] | ?:[\\/]*)
- lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
- ;;
-*)
- lt_save_MAGIC_CMD="$MAGIC_CMD"
- lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
- ac_dummy="/usr/bin$PATH_SEPARATOR$PATH"
- for ac_dir in $ac_dummy; do
- IFS="$lt_save_ifs"
- test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/${ac_tool_prefix}file; then
- lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file"
- if test -n "$file_magic_test_file"; then
- case $deplibs_check_method in
- "file_magic "*)
- file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
- MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
- if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
- $EGREP "$file_magic_regex" > /dev/null; then
- :
- else
- cat <<_LT_EOF 1>&2
-
-*** Warning: the command libtool uses to detect shared libraries,
-*** $file_magic_cmd, produces output that libtool cannot recognize.
-*** The result is that libtool may fail to recognize shared libraries
-*** as such. This will affect the creation of libtool libraries that
-*** depend on shared libraries, but programs linked with such libtool
-*** libraries will work regardless of this problem. Nevertheless, you
-*** may want to report the problem to your system manager and/or to
-*** bug-libtool at gnu.org
-
-_LT_EOF
- fi ;;
- esac
- fi
- break
- fi
- done
- IFS="$lt_save_ifs"
- MAGIC_CMD="$lt_save_MAGIC_CMD"
- ;;
-esac
-fi
-
-MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
-if test -n "$MAGIC_CMD"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5
-$as_echo "$MAGIC_CMD" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-
-
-
-if test -z "$lt_cv_path_MAGIC_CMD"; then
- if test -n "$ac_tool_prefix"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5
-$as_echo_n "checking for file... " >&6; }
-if ${lt_cv_path_MAGIC_CMD+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- case $MAGIC_CMD in
-[\\/*] | ?:[\\/]*)
- lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
- ;;
-*)
- lt_save_MAGIC_CMD="$MAGIC_CMD"
- lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
- ac_dummy="/usr/bin$PATH_SEPARATOR$PATH"
- for ac_dir in $ac_dummy; do
- IFS="$lt_save_ifs"
- test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/file; then
- lt_cv_path_MAGIC_CMD="$ac_dir/file"
- if test -n "$file_magic_test_file"; then
- case $deplibs_check_method in
- "file_magic "*)
- file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
- MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
- if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
- $EGREP "$file_magic_regex" > /dev/null; then
- :
- else
- cat <<_LT_EOF 1>&2
-
-*** Warning: the command libtool uses to detect shared libraries,
-*** $file_magic_cmd, produces output that libtool cannot recognize.
-*** The result is that libtool may fail to recognize shared libraries
-*** as such. This will affect the creation of libtool libraries that
-*** depend on shared libraries, but programs linked with such libtool
-*** libraries will work regardless of this problem. Nevertheless, you
-*** may want to report the problem to your system manager and/or to
-*** bug-libtool at gnu.org
-
-_LT_EOF
- fi ;;
- esac
- fi
- break
- fi
- done
- IFS="$lt_save_ifs"
- MAGIC_CMD="$lt_save_MAGIC_CMD"
- ;;
-esac
-fi
-
-MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
-if test -n "$MAGIC_CMD"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5
-$as_echo "$MAGIC_CMD" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
- else
- MAGIC_CMD=:
- fi
-fi
-
- fi
- ;;
-esac
-
-# Use C for the default configuration in the libtool script
-
-lt_save_CC="$CC"
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-# Source file extension for C test sources.
-ac_ext=c
-
-# Object file extension for compiled C test sources.
-objext=o
-objext=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code="int some_variable = 0;"
-
-# Code to be used in simple link tests
-lt_simple_link_test_code='int main(){return(0);}'
-
-
-
-
-
-
-
-# If no C compiler was specified, use CC.
-LTCC=${LTCC-"$CC"}
-
-# If no C compiler flags were specified, use CFLAGS.
-LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
-
-# Allow CC to be a program name with arguments.
-compiler=$CC
-
-# Save the default compiler, since it gets overwritten when the other
-# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP.
-compiler_DEFAULT=$CC
-
-# save warnings/boilerplate of simple test code
-ac_outfile=conftest.$ac_objext
-echo "$lt_simple_compile_test_code" >conftest.$ac_ext
-eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
-_lt_compiler_boilerplate=`cat conftest.err`
-$RM conftest*
-
-ac_outfile=conftest.$ac_objext
-echo "$lt_simple_link_test_code" >conftest.$ac_ext
-eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
-_lt_linker_boilerplate=`cat conftest.err`
-$RM -r conftest*
-
-
-## CAVEAT EMPTOR:
-## There is no encapsulation within the following macros, do not change
-## the running order or otherwise move them around unless you know exactly
-## what you are doing...
-if test -n "$compiler"; then
-
-lt_prog_compiler_no_builtin_flag=
-
-if test "$GCC" = yes; then
- case $cc_basename in
- nvcc*)
- lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;;
- *)
- lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;;
- esac
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5
-$as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; }
-if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_rtti_exceptions=no
- ac_outfile=conftest.$ac_objext
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
- lt_compiler_flag="-fno-rtti -fno-exceptions"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- # The option is referenced via a variable to avoid confusing sed.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
- (eval "$lt_compile" 2>conftest.err)
- ac_status=$?
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- if (exit $ac_status) && test -s "$ac_outfile"; then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings other than the usual output.
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
- $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
- if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_rtti_exceptions=yes
- fi
- fi
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5
-$as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; }
-
-if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then
- lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions"
-else
- :
-fi
-
-fi
-
-
-
-
-
-
- lt_prog_compiler_wl=
-lt_prog_compiler_pic=
-lt_prog_compiler_static=
-
-
- if test "$GCC" = yes; then
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_static='-static'
-
- case $host_os in
- aix*)
- # All AIX code is PIC.
- if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- lt_prog_compiler_static='-Bstatic'
- fi
- ;;
-
- amigaos*)
- case $host_cpu in
- powerpc)
- # see comment about AmigaOS4 .so support
- lt_prog_compiler_pic='-fPIC'
- ;;
- m68k)
- # FIXME: we need at least 68020 code to build shared libraries, but
- # adding the `-m68020' flag to GCC prevents building anything better,
- # like `-m68040'.
- lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4'
- ;;
- esac
- ;;
-
- beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
- # PIC is the default for these OSes.
- ;;
-
- mingw* | cygwin* | pw32* | os2* | cegcc*)
- # This hack is so that the source file can tell whether it is being
- # built for inclusion in a dll (and should export symbols for example).
- # Although the cygwin gcc ignores -fPIC, still need this for old-style
- # (--disable-auto-import) libraries
- lt_prog_compiler_pic='-DDLL_EXPORT'
- ;;
-
- darwin* | rhapsody*)
- # PIC is the default on this platform
- # Common symbols not allowed in MH_DYLIB files
- lt_prog_compiler_pic='-fno-common'
- ;;
-
- haiku*)
- # PIC is the default for Haiku.
- # The "-static" flag exists, but is broken.
- lt_prog_compiler_static=
- ;;
-
- hpux*)
- # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
- # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag
- # sets the default TLS model and affects inlining.
- case $host_cpu in
- hppa*64*)
- # +Z the default
- ;;
- *)
- lt_prog_compiler_pic='-fPIC'
- ;;
- esac
- ;;
-
- interix[3-9]*)
- # Interix 3.x gcc -fpic/-fPIC options generate broken code.
- # Instead, we relocate shared libraries at runtime.
- ;;
-
- msdosdjgpp*)
- # Just because we use GCC doesn't mean we suddenly get shared libraries
- # on systems that don't support them.
- lt_prog_compiler_can_build_shared=no
- enable_shared=no
- ;;
-
- *nto* | *qnx*)
- # QNX uses GNU C++, but need to define -shared option too, otherwise
- # it will coredump.
- lt_prog_compiler_pic='-fPIC -shared'
- ;;
-
- sysv4*MP*)
- if test -d /usr/nec; then
- lt_prog_compiler_pic=-Kconform_pic
- fi
- ;;
-
- *)
- lt_prog_compiler_pic='-fPIC'
- ;;
- esac
-
- case $cc_basename in
- nvcc*) # Cuda Compiler Driver 2.2
- lt_prog_compiler_wl='-Xlinker '
- if test -n "$lt_prog_compiler_pic"; then
- lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic"
- fi
- ;;
- esac
- else
- # PORTME Check for flag to pass linker flags through the system compiler.
- case $host_os in
- aix*)
- lt_prog_compiler_wl='-Wl,'
- if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- lt_prog_compiler_static='-Bstatic'
- else
- lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp'
- fi
- ;;
-
- mingw* | cygwin* | pw32* | os2* | cegcc*)
- # This hack is so that the source file can tell whether it is being
- # built for inclusion in a dll (and should export symbols for example).
- lt_prog_compiler_pic='-DDLL_EXPORT'
- ;;
-
- hpux9* | hpux10* | hpux11*)
- lt_prog_compiler_wl='-Wl,'
- # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
- # not for PA HP-UX.
- case $host_cpu in
- hppa*64*|ia64*)
- # +Z the default
- ;;
- *)
- lt_prog_compiler_pic='+Z'
- ;;
- esac
- # Is there a better lt_prog_compiler_static that works with the bundled CC?
- lt_prog_compiler_static='${wl}-a ${wl}archive'
- ;;
-
- irix5* | irix6* | nonstopux*)
- lt_prog_compiler_wl='-Wl,'
- # PIC (with -KPIC) is the default.
- lt_prog_compiler_static='-non_shared'
- ;;
-
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
- case $cc_basename in
- # old Intel for x86_64 which still supported -KPIC.
- ecc*)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='-KPIC'
- lt_prog_compiler_static='-static'
- ;;
- # icc used to be incompatible with GCC.
- # ICC 10 doesn't accept -KPIC any more.
- icc* | ifort*)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='-fPIC'
- lt_prog_compiler_static='-static'
- ;;
- # Lahey Fortran 8.1.
- lf95*)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='--shared'
- lt_prog_compiler_static='--static'
- ;;
- nagfor*)
- # NAG Fortran compiler
- lt_prog_compiler_wl='-Wl,-Wl,,'
- lt_prog_compiler_pic='-PIC'
- lt_prog_compiler_static='-Bstatic'
- ;;
- pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
- # Portland Group compilers (*not* the Pentium gcc compiler,
- # which looks to be a dead project)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='-fpic'
- lt_prog_compiler_static='-Bstatic'
- ;;
- ccc*)
- lt_prog_compiler_wl='-Wl,'
- # All Alpha code is PIC.
- lt_prog_compiler_static='-non_shared'
- ;;
- xl* | bgxl* | bgf* | mpixl*)
- # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='-qpic'
- lt_prog_compiler_static='-qstaticlink'
- ;;
- *)
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*)
- # Sun Fortran 8.3 passes all unrecognized flags to the linker
- lt_prog_compiler_pic='-KPIC'
- lt_prog_compiler_static='-Bstatic'
- lt_prog_compiler_wl=''
- ;;
- *Sun\ F* | *Sun*Fortran*)
- lt_prog_compiler_pic='-KPIC'
- lt_prog_compiler_static='-Bstatic'
- lt_prog_compiler_wl='-Qoption ld '
- ;;
- *Sun\ C*)
- # Sun C 5.9
- lt_prog_compiler_pic='-KPIC'
- lt_prog_compiler_static='-Bstatic'
- lt_prog_compiler_wl='-Wl,'
- ;;
- *Intel*\ [CF]*Compiler*)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='-fPIC'
- lt_prog_compiler_static='-static'
- ;;
- *Portland\ Group*)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='-fpic'
- lt_prog_compiler_static='-Bstatic'
- ;;
- esac
- ;;
- esac
- ;;
-
- newsos6)
- lt_prog_compiler_pic='-KPIC'
- lt_prog_compiler_static='-Bstatic'
- ;;
-
- *nto* | *qnx*)
- # QNX uses GNU C++, but need to define -shared option too, otherwise
- # it will coredump.
- lt_prog_compiler_pic='-fPIC -shared'
- ;;
-
- osf3* | osf4* | osf5*)
- lt_prog_compiler_wl='-Wl,'
- # All OSF/1 code is PIC.
- lt_prog_compiler_static='-non_shared'
- ;;
-
- rdos*)
- lt_prog_compiler_static='-non_shared'
- ;;
-
- solaris*)
- lt_prog_compiler_pic='-KPIC'
- lt_prog_compiler_static='-Bstatic'
- case $cc_basename in
- f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
- lt_prog_compiler_wl='-Qoption ld ';;
- *)
- lt_prog_compiler_wl='-Wl,';;
- esac
- ;;
-
- sunos4*)
- lt_prog_compiler_wl='-Qoption ld '
- lt_prog_compiler_pic='-PIC'
- lt_prog_compiler_static='-Bstatic'
- ;;
-
- sysv4 | sysv4.2uw2* | sysv4.3*)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='-KPIC'
- lt_prog_compiler_static='-Bstatic'
- ;;
-
- sysv4*MP*)
- if test -d /usr/nec ;then
- lt_prog_compiler_pic='-Kconform_pic'
- lt_prog_compiler_static='-Bstatic'
- fi
- ;;
-
- sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_pic='-KPIC'
- lt_prog_compiler_static='-Bstatic'
- ;;
-
- unicos*)
- lt_prog_compiler_wl='-Wl,'
- lt_prog_compiler_can_build_shared=no
- ;;
-
- uts4*)
- lt_prog_compiler_pic='-pic'
- lt_prog_compiler_static='-Bstatic'
- ;;
-
- *)
- lt_prog_compiler_can_build_shared=no
- ;;
- esac
- fi
-
-case $host_os in
- # For platforms which do not support PIC, -DPIC is meaningless:
- *djgpp*)
- lt_prog_compiler_pic=
- ;;
- *)
- lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC"
- ;;
-esac
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
-$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
-if ${lt_cv_prog_compiler_pic+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_pic=$lt_prog_compiler_pic
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5
-$as_echo "$lt_cv_prog_compiler_pic" >&6; }
-lt_prog_compiler_pic=$lt_cv_prog_compiler_pic
-
-#
-# Check to make sure the PIC flag actually works.
-#
-if test -n "$lt_prog_compiler_pic"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5
-$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; }
-if ${lt_cv_prog_compiler_pic_works+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_pic_works=no
- ac_outfile=conftest.$ac_objext
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
- lt_compiler_flag="$lt_prog_compiler_pic -DPIC"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- # The option is referenced via a variable to avoid confusing sed.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
- (eval "$lt_compile" 2>conftest.err)
- ac_status=$?
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- if (exit $ac_status) && test -s "$ac_outfile"; then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings other than the usual output.
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
- $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
- if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_pic_works=yes
- fi
- fi
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5
-$as_echo "$lt_cv_prog_compiler_pic_works" >&6; }
-
-if test x"$lt_cv_prog_compiler_pic_works" = xyes; then
- case $lt_prog_compiler_pic in
- "" | " "*) ;;
- *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;;
- esac
-else
- lt_prog_compiler_pic=
- lt_prog_compiler_can_build_shared=no
-fi
-
-fi
-
-
-
-
-
-
-
-
-
-
-
-#
-# Check to make sure the static flag actually works.
-#
-wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
-$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
-if ${lt_cv_prog_compiler_static_works+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_static_works=no
- save_LDFLAGS="$LDFLAGS"
- LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
- echo "$lt_simple_link_test_code" > conftest.$ac_ext
- if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
- # The linker can only warn and ignore the option if not recognized
- # So say no if there are warnings
- if test -s conftest.err; then
- # Append any errors to the config.log.
- cat conftest.err 1>&5
- $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
- $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
- if diff conftest.exp conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_static_works=yes
- fi
- else
- lt_cv_prog_compiler_static_works=yes
- fi
- fi
- $RM -r conftest*
- LDFLAGS="$save_LDFLAGS"
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5
-$as_echo "$lt_cv_prog_compiler_static_works" >&6; }
-
-if test x"$lt_cv_prog_compiler_static_works" = xyes; then
- :
-else
- lt_prog_compiler_static=
-fi
-
-
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
-$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if ${lt_cv_prog_compiler_c_o+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_c_o=no
- $RM -r conftest 2>/dev/null
- mkdir conftest
- cd conftest
- mkdir out
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
- lt_compiler_flag="-o out/conftest2.$ac_objext"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
- (eval "$lt_compile" 2>out/conftest.err)
- ac_status=$?
- cat out/conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- if (exit $ac_status) && test -s out/conftest2.$ac_objext
- then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
- $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
- if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_c_o=yes
- fi
- fi
- chmod u+w . 2>&5
- $RM conftest*
- # SGI C++ compiler will create directory out/ii_files/ for
- # template instantiation
- test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
- $RM out/* && rmdir out
- cd ..
- $RM -r conftest
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5
-$as_echo "$lt_cv_prog_compiler_c_o" >&6; }
-
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
-$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if ${lt_cv_prog_compiler_c_o+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_c_o=no
- $RM -r conftest 2>/dev/null
- mkdir conftest
- cd conftest
- mkdir out
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
- lt_compiler_flag="-o out/conftest2.$ac_objext"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
- (eval "$lt_compile" 2>out/conftest.err)
- ac_status=$?
- cat out/conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- if (exit $ac_status) && test -s out/conftest2.$ac_objext
- then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
- $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
- if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_c_o=yes
- fi
- fi
- chmod u+w . 2>&5
- $RM conftest*
- # SGI C++ compiler will create directory out/ii_files/ for
- # template instantiation
- test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
- $RM out/* && rmdir out
- cd ..
- $RM -r conftest
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5
-$as_echo "$lt_cv_prog_compiler_c_o" >&6; }
-
-
-
-
-hard_links="nottested"
-if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then
- # do not overwrite the value of need_locks provided by the user
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5
-$as_echo_n "checking if we can lock with hard links... " >&6; }
- hard_links=yes
- $RM conftest*
- ln conftest.a conftest.b 2>/dev/null && hard_links=no
- touch conftest.a
- ln conftest.a conftest.b 2>&5 || hard_links=no
- ln conftest.a conftest.b 2>/dev/null && hard_links=no
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5
-$as_echo "$hard_links" >&6; }
- if test "$hard_links" = no; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5
-$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;}
- need_locks=warn
- fi
-else
- need_locks=no
-fi
-
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
-$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
-
- runpath_var=
- allow_undefined_flag=
- always_export_symbols=no
- archive_cmds=
- archive_expsym_cmds=
- compiler_needs_object=no
- enable_shared_with_static_runtimes=no
- export_dynamic_flag_spec=
- export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
- hardcode_automatic=no
- hardcode_direct=no
- hardcode_direct_absolute=no
- hardcode_libdir_flag_spec=
- hardcode_libdir_separator=
- hardcode_minus_L=no
- hardcode_shlibpath_var=unsupported
- inherit_rpath=no
- link_all_deplibs=unknown
- module_cmds=
- module_expsym_cmds=
- old_archive_from_new_cmds=
- old_archive_from_expsyms_cmds=
- thread_safe_flag_spec=
- whole_archive_flag_spec=
- # include_expsyms should be a list of space-separated symbols to be *always*
- # included in the symbol list
- include_expsyms=
- # exclude_expsyms can be an extended regexp of symbols to exclude
- # it will be wrapped by ` (' and `)$', so one must not match beginning or
- # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
- # as well as any symbol that contains `d'.
- exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'
- # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
- # platforms (ab)use it in PIC code, but their linkers get confused if
- # the symbol is explicitly referenced. Since portable code cannot
- # rely on this symbol name, it's probably fine to never include it in
- # preloaded symbol tables.
- # Exclude shared library initialization/finalization symbols.
- extract_expsyms_cmds=
-
- case $host_os in
- cygwin* | mingw* | pw32* | cegcc*)
- # FIXME: the MSVC++ port hasn't been tested in a loooong time
- # When not using gcc, we currently assume that we are using
- # Microsoft Visual C++.
- if test "$GCC" != yes; then
- with_gnu_ld=no
- fi
- ;;
- interix*)
- # we just hope/assume this is gcc and not c89 (= MSVC++)
- with_gnu_ld=yes
- ;;
- openbsd*)
- with_gnu_ld=no
- ;;
- esac
-
- ld_shlibs=yes
-
- # On some targets, GNU ld is compatible enough with the native linker
- # that we're better off using the native interface for both.
- lt_use_gnu_ld_interface=no
- if test "$with_gnu_ld" = yes; then
- case $host_os in
- aix*)
- # The AIX port of GNU ld has always aspired to compatibility
- # with the native linker. However, as the warning in the GNU ld
- # block says, versions before 2.19.5* couldn't really create working
- # shared libraries, regardless of the interface used.
- case `$LD -v 2>&1` in
- *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
- *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;;
- *\ \(GNU\ Binutils\)\ [3-9]*) ;;
- *)
- lt_use_gnu_ld_interface=yes
- ;;
- esac
- ;;
- *)
- lt_use_gnu_ld_interface=yes
- ;;
- esac
- fi
-
- if test "$lt_use_gnu_ld_interface" = yes; then
- # If archive_cmds runs LD, not CC, wlarc should be empty
- wlarc='${wl}'
-
- # Set some defaults for GNU ld with shared library support. These
- # are reset later if shared libraries are not supported. Putting them
- # here allows them to be overridden if necessary.
- runpath_var=LD_RUN_PATH
- hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
- export_dynamic_flag_spec='${wl}--export-dynamic'
- # ancient GNU ld didn't support --whole-archive et. al.
- if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
- whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
- else
- whole_archive_flag_spec=
- fi
- supports_anon_versioning=no
- case `$LD -v 2>&1` in
- *GNU\ gold*) supports_anon_versioning=yes ;;
- *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11
- *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
- *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
- *\ 2.11.*) ;; # other 2.11 versions
- *) supports_anon_versioning=yes ;;
- esac
-
- # See if GNU ld supports shared libraries.
- case $host_os in
- aix[3-9]*)
- # On AIX/PPC, the GNU linker is very broken
- if test "$host_cpu" != ia64; then
- ld_shlibs=no
- cat <<_LT_EOF 1>&2
-
-*** Warning: the GNU linker, at least up to release 2.19, is reported
-*** to be unable to reliably create shared libraries on AIX.
-*** Therefore, libtool is disabling shared libraries support. If you
-*** really care for shared libraries, you may want to install binutils
-*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
-*** You will then need to restart the configuration process.
-
-_LT_EOF
- fi
- ;;
-
- amigaos*)
- case $host_cpu in
- powerpc)
- # see comment about AmigaOS4 .so support
- archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds=''
- ;;
- m68k)
- archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
- hardcode_libdir_flag_spec='-L$libdir'
- hardcode_minus_L=yes
- ;;
- esac
- ;;
-
- beos*)
- if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- allow_undefined_flag=unsupported
- # Joseph Beckenbach <jrb3 at best.com> says some releases of gcc
- # support --undefined. This deserves some investigation. FIXME
- archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- else
- ld_shlibs=no
- fi
- ;;
-
- cygwin* | mingw* | pw32* | cegcc*)
- # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless,
- # as there is no search path for DLLs.
- hardcode_libdir_flag_spec='-L$libdir'
- export_dynamic_flag_spec='${wl}--export-all-symbols'
- allow_undefined_flag=unsupported
- always_export_symbols=no
- enable_shared_with_static_runtimes=yes
- export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
- exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
-
- if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
- archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
- # If the export-symbols file already is a .def file (1st line
- # is EXPORTS), use it as is; otherwise, prepend...
- archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
- cp $export_symbols $output_objdir/$soname.def;
- else
- echo EXPORTS > $output_objdir/$soname.def;
- cat $export_symbols >> $output_objdir/$soname.def;
- fi~
- $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
- else
- ld_shlibs=no
- fi
- ;;
-
- haiku*)
- archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- link_all_deplibs=yes
- ;;
-
- interix[3-9]*)
- hardcode_direct=no
- hardcode_shlibpath_var=no
- hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
- export_dynamic_flag_spec='${wl}-E'
- # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
- # Instead, shared libraries are loaded at an image base (0x10000000 by
- # default) and relocated if they conflict, which is a slow very memory
- # consuming and fragmenting process. To avoid this, we pick a random,
- # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
- # time. Moving up from 0x10000000 also allows more sbrk(2) space.
- archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
- archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
- ;;
-
- gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
- tmp_diet=no
- if test "$host_os" = linux-dietlibc; then
- case $cc_basename in
- diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn)
- esac
- fi
- if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
- && test "$tmp_diet" = no
- then
- tmp_addflag=' $pic_flag'
- tmp_sharedflag='-shared'
- case $cc_basename,$host_cpu in
- pgcc*) # Portland Group C compiler
- whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- tmp_addflag=' $pic_flag'
- ;;
- pgf77* | pgf90* | pgf95* | pgfortran*)
- # Portland Group f77 and f90 compilers
- whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- tmp_addflag=' $pic_flag -Mnomain' ;;
- ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64
- tmp_addflag=' -i_dynamic' ;;
- efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64
- tmp_addflag=' -i_dynamic -nofor_main' ;;
- ifc* | ifort*) # Intel Fortran compiler
- tmp_addflag=' -nofor_main' ;;
- lf95*) # Lahey Fortran 8.1
- whole_archive_flag_spec=
- tmp_sharedflag='--shared' ;;
- xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below)
- tmp_sharedflag='-qmkshrobj'
- tmp_addflag= ;;
- nvcc*) # Cuda Compiler Driver 2.2
- whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- compiler_needs_object=yes
- ;;
- esac
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ C*) # Sun C 5.9
- whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- compiler_needs_object=yes
- tmp_sharedflag='-G' ;;
- *Sun\ F*) # Sun Fortran 8.3
- tmp_sharedflag='-G' ;;
- esac
- archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-
- if test "x$supports_anon_versioning" = xyes; then
- archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
- cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
- echo "local: *; };" >> $output_objdir/$libname.ver~
- $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
- fi
-
- case $cc_basename in
- xlf* | bgf* | bgxlf* | mpixlf*)
- # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
- whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive'
- hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
- archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
- if test "x$supports_anon_versioning" = xyes; then
- archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
- cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
- echo "local: *; };" >> $output_objdir/$libname.ver~
- $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
- fi
- ;;
- esac
- else
- ld_shlibs=no
- fi
- ;;
-
- netbsd*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
- wlarc=
- else
- archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- fi
- ;;
-
- solaris*)
- if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
- ld_shlibs=no
- cat <<_LT_EOF 1>&2
-
-*** Warning: The releases 2.8.* of the GNU linker cannot reliably
-*** create shared libraries on Solaris systems. Therefore, libtool
-*** is disabling shared libraries support. We urge you to upgrade GNU
-*** binutils to release 2.9.1 or newer. Another option is to modify
-*** your PATH or compiler configuration so that the native linker is
-*** used, and then restart.
-
-_LT_EOF
- elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- else
- ld_shlibs=no
- fi
- ;;
-
- sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
- case `$LD -v 2>&1` in
- *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*)
- ld_shlibs=no
- cat <<_LT_EOF 1>&2
-
-*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
-*** reliably create shared libraries on SCO systems. Therefore, libtool
-*** is disabling shared libraries support. We urge you to upgrade GNU
-*** binutils to release 2.16.91.0.3 or newer. Another option is to modify
-*** your PATH or compiler configuration so that the native linker is
-*** used, and then restart.
-
-_LT_EOF
- ;;
- *)
- # For security reasons, it is highly recommended that you always
- # use absolute paths for naming shared libraries, and exclude the
- # DT_RUNPATH tag from executables and libraries. But doing so
- # requires that you compile everything twice, which is a pain.
- if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
- archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- else
- ld_shlibs=no
- fi
- ;;
- esac
- ;;
-
- sunos4*)
- archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
- wlarc=
- hardcode_direct=yes
- hardcode_shlibpath_var=no
- ;;
-
- *)
- if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- else
- ld_shlibs=no
- fi
- ;;
- esac
-
- if test "$ld_shlibs" = no; then
- runpath_var=
- hardcode_libdir_flag_spec=
- export_dynamic_flag_spec=
- whole_archive_flag_spec=
- fi
- else
- # PORTME fill in a description of your system's linker (not GNU ld)
- case $host_os in
- aix3*)
- allow_undefined_flag=unsupported
- always_export_symbols=yes
- archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
- # Note: this linker hardcodes the directories in LIBPATH if there
- # are no directories specified by -L.
- hardcode_minus_L=yes
- if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
- # Neither direct hardcoding nor static linking is supported with a
- # broken collect2.
- hardcode_direct=unsupported
- fi
- ;;
-
- aix[4-9]*)
- if test "$host_cpu" = ia64; then
- # On IA64, the linker does run time linking by default, so we don't
- # have to do anything special.
- aix_use_runtimelinking=no
- exp_sym_flag='-Bexport'
- no_entry_flag=""
- else
- # If we're using GNU nm, then we don't want the "-C" option.
- # -C means demangle to AIX nm, but means don't demangle with GNU nm
- # Also, AIX nm treats weak defined symbols like other global
- # defined symbols, whereas GNU nm marks them as "W".
- if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
- export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
- else
- export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
- fi
- aix_use_runtimelinking=no
-
- # Test if we are trying to use run time linking or normal
- # AIX style linking. If -brtl is somewhere in LDFLAGS, we
- # need to do runtime linking.
- case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
- for ld_flag in $LDFLAGS; do
- if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
- aix_use_runtimelinking=yes
- break
- fi
- done
- ;;
- esac
-
- exp_sym_flag='-bexport'
- no_entry_flag='-bnoentry'
- fi
-
- # When large executables or shared objects are built, AIX ld can
- # have problems creating the table of contents. If linking a library
- # or program results in "error TOC overflow" add -mminimal-toc to
- # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not
- # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
-
- archive_cmds=''
- hardcode_direct=yes
- hardcode_direct_absolute=yes
- hardcode_libdir_separator=':'
- link_all_deplibs=yes
- file_list_spec='${wl}-f,'
-
- if test "$GCC" = yes; then
- case $host_os in aix4.[012]|aix4.[012].*)
- # We only want to do this on AIX 4.2 and lower, the check
- # below for broken collect2 doesn't work under 4.3+
- collect2name=`${CC} -print-prog-name=collect2`
- if test -f "$collect2name" &&
- strings "$collect2name" | $GREP resolve_lib_name >/dev/null
- then
- # We have reworked collect2
- :
- else
- # We have old collect2
- hardcode_direct=unsupported
- # It fails to find uninstalled libraries when the uninstalled
- # path is not listed in the libpath. Setting hardcode_minus_L
- # to unsupported forces relinking
- hardcode_minus_L=yes
- hardcode_libdir_flag_spec='-L$libdir'
- hardcode_libdir_separator=
- fi
- ;;
- esac
- shared_flag='-shared'
- if test "$aix_use_runtimelinking" = yes; then
- shared_flag="$shared_flag "'${wl}-G'
- fi
- else
- # not using gcc
- if test "$host_cpu" = ia64; then
- # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
- # chokes on -Wl,-G. The following line is correct:
- shared_flag='-G'
- else
- if test "$aix_use_runtimelinking" = yes; then
- shared_flag='${wl}-G'
- else
- shared_flag='${wl}-bM:SRE'
- fi
- fi
- fi
-
- export_dynamic_flag_spec='${wl}-bexpall'
- # It seems that -bexpall does not export symbols beginning with
- # underscore (_), so it is better to generate a list of symbols to export.
- always_export_symbols=yes
- if test "$aix_use_runtimelinking" = yes; then
- # Warning - without using the other runtime loading flags (-brtl),
- # -berok will link without error, but may produce a broken library.
- allow_undefined_flag='-berok'
- # Determine the default libpath from the value encoded in an
- # empty executable.
- if test "${lt_cv_aix_libpath+set}" = set; then
- aix_libpath=$lt_cv_aix_libpath
-else
- if ${lt_cv_aix_libpath_+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-
- lt_aix_libpath_sed='
- /Import File Strings/,/^$/ {
- /^0/ {
- s/^0 *\([^ ]*\) *$/\1/
- p
- }
- }'
- lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- # Check for a 64-bit object if we didn't find anything.
- if test -z "$lt_cv_aix_libpath_"; then
- lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- fi
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- if test -z "$lt_cv_aix_libpath_"; then
- lt_cv_aix_libpath_="/usr/lib:/lib"
- fi
-
-fi
-
- aix_libpath=$lt_cv_aix_libpath_
-fi
-
- hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
- archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
- else
- if test "$host_cpu" = ia64; then
- hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
- allow_undefined_flag="-z nodefs"
- archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
- else
- # Determine the default libpath from the value encoded in an
- # empty executable.
- if test "${lt_cv_aix_libpath+set}" = set; then
- aix_libpath=$lt_cv_aix_libpath
-else
- if ${lt_cv_aix_libpath_+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-
- lt_aix_libpath_sed='
- /Import File Strings/,/^$/ {
- /^0/ {
- s/^0 *\([^ ]*\) *$/\1/
- p
- }
- }'
- lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- # Check for a 64-bit object if we didn't find anything.
- if test -z "$lt_cv_aix_libpath_"; then
- lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- fi
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- if test -z "$lt_cv_aix_libpath_"; then
- lt_cv_aix_libpath_="/usr/lib:/lib"
- fi
-
-fi
-
- aix_libpath=$lt_cv_aix_libpath_
-fi
-
- hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
- # Warning - without using the other run time loading flags,
- # -berok will link without error, but may produce a broken library.
- no_undefined_flag=' ${wl}-bernotok'
- allow_undefined_flag=' ${wl}-berok'
- if test "$with_gnu_ld" = yes; then
- # We only use this code for GNU lds that support --whole-archive.
- whole_archive_flag_spec='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
- else
- # Exported symbols can be pulled into shared objects from archives
- whole_archive_flag_spec='$convenience'
- fi
- archive_cmds_need_lc=yes
- # This is similar to how AIX traditionally builds its shared libraries.
- archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
- fi
- fi
- ;;
-
- amigaos*)
- case $host_cpu in
- powerpc)
- # see comment about AmigaOS4 .so support
- archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds=''
- ;;
- m68k)
- archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
- hardcode_libdir_flag_spec='-L$libdir'
- hardcode_minus_L=yes
- ;;
- esac
- ;;
-
- bsdi[45]*)
- export_dynamic_flag_spec=-rdynamic
- ;;
-
- cygwin* | mingw* | pw32* | cegcc*)
- # When not using gcc, we currently assume that we are using
- # Microsoft Visual C++.
- # hardcode_libdir_flag_spec is actually meaningless, as there is
- # no search path for DLLs.
- case $cc_basename in
- cl*)
- # Native MSVC
- hardcode_libdir_flag_spec=' '
- allow_undefined_flag=unsupported
- always_export_symbols=yes
- file_list_spec='@'
- # Tell ltmain to make .lib files, not .a files.
- libext=lib
- # Tell ltmain to make .dll files, not .so files.
- shrext_cmds=".dll"
- # FIXME: Setting linknames here is a bad hack.
- archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
- archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
- sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
- else
- sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
- fi~
- $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
- linknames='
- # The linker will not automatically build a static lib if we build a DLL.
- # _LT_TAGVAR(old_archive_from_new_cmds, )='true'
- enable_shared_with_static_runtimes=yes
- exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
- export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
- # Don't use ranlib
- old_postinstall_cmds='chmod 644 $oldlib'
- postlink_cmds='lt_outputfile="@OUTPUT@"~
- lt_tool_outputfile="@TOOL_OUTPUT@"~
- case $lt_outputfile in
- *.exe|*.EXE) ;;
- *)
- lt_outputfile="$lt_outputfile.exe"
- lt_tool_outputfile="$lt_tool_outputfile.exe"
- ;;
- esac~
- if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
- $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
- $RM "$lt_outputfile.manifest";
- fi'
- ;;
- *)
- # Assume MSVC wrapper
- hardcode_libdir_flag_spec=' '
- allow_undefined_flag=unsupported
- # Tell ltmain to make .lib files, not .a files.
- libext=lib
- # Tell ltmain to make .dll files, not .so files.
- shrext_cmds=".dll"
- # FIXME: Setting linknames here is a bad hack.
- archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
- # The linker will automatically build a .lib file if we build a DLL.
- old_archive_from_new_cmds='true'
- # FIXME: Should let the user specify the lib program.
- old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs'
- enable_shared_with_static_runtimes=yes
- ;;
- esac
- ;;
-
- darwin* | rhapsody*)
-
-
- archive_cmds_need_lc=no
- hardcode_direct=no
- hardcode_automatic=yes
- hardcode_shlibpath_var=unsupported
- if test "$lt_cv_ld_force_load" = "yes"; then
- whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
-
- else
- whole_archive_flag_spec=''
- fi
- link_all_deplibs=yes
- allow_undefined_flag="$_lt_dar_allow_undefined"
- case $cc_basename in
- ifort*) _lt_dar_can_shared=yes ;;
- *) _lt_dar_can_shared=$GCC ;;
- esac
- if test "$_lt_dar_can_shared" = "yes"; then
- output_verbose_link_cmd=func_echo_all
- archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
- module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
- archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
- module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
-
- else
- ld_shlibs=no
- fi
-
- ;;
-
- dgux*)
- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- hardcode_libdir_flag_spec='-L$libdir'
- hardcode_shlibpath_var=no
- ;;
-
- # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
- # support. Future versions do this automatically, but an explicit c++rt0.o
- # does not break anything, and helps significantly (at the cost of a little
- # extra space).
- freebsd2.2*)
- archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
- hardcode_libdir_flag_spec='-R$libdir'
- hardcode_direct=yes
- hardcode_shlibpath_var=no
- ;;
-
- # Unfortunately, older versions of FreeBSD 2 do not have this feature.
- freebsd2.*)
- archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
- hardcode_direct=yes
- hardcode_minus_L=yes
- hardcode_shlibpath_var=no
- ;;
-
- # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
- freebsd* | dragonfly*)
- archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
- hardcode_libdir_flag_spec='-R$libdir'
- hardcode_direct=yes
- hardcode_shlibpath_var=no
- ;;
-
- hpux9*)
- if test "$GCC" = yes; then
- archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
- else
- archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
- fi
- hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
- hardcode_libdir_separator=:
- hardcode_direct=yes
-
- # hardcode_minus_L: Not really in the search PATH,
- # but as the default location of the library.
- hardcode_minus_L=yes
- export_dynamic_flag_spec='${wl}-E'
- ;;
-
- hpux10*)
- if test "$GCC" = yes && test "$with_gnu_ld" = no; then
- archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
- else
- archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
- fi
- if test "$with_gnu_ld" = no; then
- hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
- hardcode_libdir_separator=:
- hardcode_direct=yes
- hardcode_direct_absolute=yes
- export_dynamic_flag_spec='${wl}-E'
- # hardcode_minus_L: Not really in the search PATH,
- # but as the default location of the library.
- hardcode_minus_L=yes
- fi
- ;;
-
- hpux11*)
- if test "$GCC" = yes && test "$with_gnu_ld" = no; then
- case $host_cpu in
- hppa*64*)
- archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- ia64*)
- archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- *)
- archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- esac
- else
- case $host_cpu in
- hppa*64*)
- archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- ia64*)
- archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- *)
-
- # Older versions of the 11.00 compiler do not understand -b yet
- # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5
-$as_echo_n "checking if $CC understands -b... " >&6; }
-if ${lt_cv_prog_compiler__b+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler__b=no
- save_LDFLAGS="$LDFLAGS"
- LDFLAGS="$LDFLAGS -b"
- echo "$lt_simple_link_test_code" > conftest.$ac_ext
- if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
- # The linker can only warn and ignore the option if not recognized
- # So say no if there are warnings
- if test -s conftest.err; then
- # Append any errors to the config.log.
- cat conftest.err 1>&5
- $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
- $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
- if diff conftest.exp conftest.er2 >/dev/null; then
- lt_cv_prog_compiler__b=yes
- fi
- else
- lt_cv_prog_compiler__b=yes
- fi
- fi
- $RM -r conftest*
- LDFLAGS="$save_LDFLAGS"
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5
-$as_echo "$lt_cv_prog_compiler__b" >&6; }
-
-if test x"$lt_cv_prog_compiler__b" = xyes; then
- archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
-else
- archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
-fi
-
- ;;
- esac
- fi
- if test "$with_gnu_ld" = no; then
- hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
- hardcode_libdir_separator=:
-
- case $host_cpu in
- hppa*64*|ia64*)
- hardcode_direct=no
- hardcode_shlibpath_var=no
- ;;
- *)
- hardcode_direct=yes
- hardcode_direct_absolute=yes
- export_dynamic_flag_spec='${wl}-E'
-
- # hardcode_minus_L: Not really in the search PATH,
- # but as the default location of the library.
- hardcode_minus_L=yes
- ;;
- esac
- fi
- ;;
-
- irix5* | irix6* | nonstopux*)
- if test "$GCC" = yes; then
- archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- # Try to use the -exported_symbol ld option, if it does not
- # work, assume that -exports_file does not work either and
- # implicitly export all symbols.
- # This should be the same for all languages, so no per-tag cache variable.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5
-$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; }
-if ${lt_cv_irix_exported_symbol+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- save_LDFLAGS="$LDFLAGS"
- LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-int foo (void) { return 0; }
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- lt_cv_irix_exported_symbol=yes
-else
- lt_cv_irix_exported_symbol=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- LDFLAGS="$save_LDFLAGS"
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5
-$as_echo "$lt_cv_irix_exported_symbol" >&6; }
- if test "$lt_cv_irix_exported_symbol" = yes; then
- archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
- fi
- else
- archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib'
- fi
- archive_cmds_need_lc='no'
- hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
- hardcode_libdir_separator=:
- inherit_rpath=yes
- link_all_deplibs=yes
- ;;
-
- netbsd*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out
- else
- archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF
- fi
- hardcode_libdir_flag_spec='-R$libdir'
- hardcode_direct=yes
- hardcode_shlibpath_var=no
- ;;
-
- newsos6)
- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- hardcode_direct=yes
- hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
- hardcode_libdir_separator=:
- hardcode_shlibpath_var=no
- ;;
-
- *nto* | *qnx*)
- ;;
-
- openbsd*)
- if test -f /usr/libexec/ld.so; then
- hardcode_direct=yes
- hardcode_shlibpath_var=no
- hardcode_direct_absolute=yes
- if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
- hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
- export_dynamic_flag_spec='${wl}-E'
- else
- case $host_os in
- openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
- archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
- hardcode_libdir_flag_spec='-R$libdir'
- ;;
- *)
- archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
- hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
- ;;
- esac
- fi
- else
- ld_shlibs=no
- fi
- ;;
-
- os2*)
- hardcode_libdir_flag_spec='-L$libdir'
- hardcode_minus_L=yes
- allow_undefined_flag=unsupported
- archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
- old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
- ;;
-
- osf3*)
- if test "$GCC" = yes; then
- allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
- archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- else
- allow_undefined_flag=' -expect_unresolved \*'
- archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- fi
- archive_cmds_need_lc='no'
- hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
- hardcode_libdir_separator=:
- ;;
-
- osf4* | osf5*) # as osf3* with the addition of -msym flag
- if test "$GCC" = yes; then
- allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
- archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
- else
- allow_undefined_flag=' -expect_unresolved \*'
- archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~
- $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp'
-
- # Both c and cxx compiler support -rpath directly
- hardcode_libdir_flag_spec='-rpath $libdir'
- fi
- archive_cmds_need_lc='no'
- hardcode_libdir_separator=:
- ;;
-
- solaris*)
- no_undefined_flag=' -z defs'
- if test "$GCC" = yes; then
- wlarc='${wl}'
- archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
- else
- case `$CC -V 2>&1` in
- *"Compilers 5.0"*)
- wlarc=''
- archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
- archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp'
- ;;
- *)
- wlarc='${wl}'
- archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
- ;;
- esac
- fi
- hardcode_libdir_flag_spec='-R$libdir'
- hardcode_shlibpath_var=no
- case $host_os in
- solaris2.[0-5] | solaris2.[0-5].*) ;;
- *)
- # The compiler driver will combine and reorder linker options,
- # but understands `-z linker_flag'. GCC discards it without `$wl',
- # but is careful enough not to reorder.
- # Supported since Solaris 2.6 (maybe 2.5.1?)
- if test "$GCC" = yes; then
- whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
- else
- whole_archive_flag_spec='-z allextract$convenience -z defaultextract'
- fi
- ;;
- esac
- link_all_deplibs=yes
- ;;
-
- sunos4*)
- if test "x$host_vendor" = xsequent; then
- # Use $CC to link under sequent, because it throws in some extra .o
- # files that make .init and .fini sections work.
- archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
- else
- archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
- fi
- hardcode_libdir_flag_spec='-L$libdir'
- hardcode_direct=yes
- hardcode_minus_L=yes
- hardcode_shlibpath_var=no
- ;;
-
- sysv4)
- case $host_vendor in
- sni)
- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- hardcode_direct=yes # is this really true???
- ;;
- siemens)
- ## LD is ld it makes a PLAMLIB
- ## CC just makes a GrossModule.
- archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
- reload_cmds='$CC -r -o $output$reload_objs'
- hardcode_direct=no
- ;;
- motorola)
- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- hardcode_direct=no #Motorola manual says yes, but my tests say they lie
- ;;
- esac
- runpath_var='LD_RUN_PATH'
- hardcode_shlibpath_var=no
- ;;
-
- sysv4.3*)
- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- hardcode_shlibpath_var=no
- export_dynamic_flag_spec='-Bexport'
- ;;
-
- sysv4*MP*)
- if test -d /usr/nec; then
- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- hardcode_shlibpath_var=no
- runpath_var=LD_RUN_PATH
- hardcode_runpath_var=yes
- ld_shlibs=yes
- fi
- ;;
-
- sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
- no_undefined_flag='${wl}-z,text'
- archive_cmds_need_lc=no
- hardcode_shlibpath_var=no
- runpath_var='LD_RUN_PATH'
-
- if test "$GCC" = yes; then
- archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- else
- archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- fi
- ;;
-
- sysv5* | sco3.2v5* | sco5v6*)
- # Note: We can NOT use -z defs as we might desire, because we do not
- # link with -lc, and that would cause any symbols used from libc to
- # always be unresolved, which means just about no library would
- # ever link correctly. If we're not using GNU ld we use -z text
- # though, which does catch some bad symbols but isn't as heavy-handed
- # as -z defs.
- no_undefined_flag='${wl}-z,text'
- allow_undefined_flag='${wl}-z,nodefs'
- archive_cmds_need_lc=no
- hardcode_shlibpath_var=no
- hardcode_libdir_flag_spec='${wl}-R,$libdir'
- hardcode_libdir_separator=':'
- link_all_deplibs=yes
- export_dynamic_flag_spec='${wl}-Bexport'
- runpath_var='LD_RUN_PATH'
-
- if test "$GCC" = yes; then
- archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- else
- archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- fi
- ;;
-
- uts4*)
- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- hardcode_libdir_flag_spec='-L$libdir'
- hardcode_shlibpath_var=no
- ;;
-
- *)
- ld_shlibs=no
- ;;
- esac
-
- if test x$host_vendor = xsni; then
- case $host in
- sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
- export_dynamic_flag_spec='${wl}-Blargedynsym'
- ;;
- esac
- fi
- fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5
-$as_echo "$ld_shlibs" >&6; }
-test "$ld_shlibs" = no && can_build_shared=no
-
-with_gnu_ld=$with_gnu_ld
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-#
-# Do we need to explicitly link libc?
-#
-case "x$archive_cmds_need_lc" in
-x|xyes)
- # Assume -lc should be added
- archive_cmds_need_lc=yes
-
- if test "$enable_shared" = yes && test "$GCC" = yes; then
- case $archive_cmds in
- *'~'*)
- # FIXME: we may have to deal with multi-command sequences.
- ;;
- '$CC '*)
- # Test whether the compiler implicitly links with -lc since on some
- # systems, -lgcc has to come before -lc. If gcc already passes -lc
- # to ld, don't add -lc before -lgcc.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
-$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
-if ${lt_cv_archive_cmds_need_lc+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- $RM conftest*
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } 2>conftest.err; then
- soname=conftest
- lib=conftest
- libobjs=conftest.$ac_objext
- deplibs=
- wl=$lt_prog_compiler_wl
- pic_flag=$lt_prog_compiler_pic
- compiler_flags=-v
- linker_flags=-v
- verstring=
- output_objdir=.
- libname=conftest
- lt_save_allow_undefined_flag=$allow_undefined_flag
- allow_undefined_flag=
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5
- (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
- then
- lt_cv_archive_cmds_need_lc=no
- else
- lt_cv_archive_cmds_need_lc=yes
- fi
- allow_undefined_flag=$lt_save_allow_undefined_flag
- else
- cat conftest.err 1>&5
- fi
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5
-$as_echo "$lt_cv_archive_cmds_need_lc" >&6; }
- archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc
- ;;
- esac
- fi
- ;;
-esac
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
-$as_echo_n "checking dynamic linker characteristics... " >&6; }
-
-if test "$GCC" = yes; then
- case $host_os in
- darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
- *) lt_awk_arg="/^libraries:/" ;;
- esac
- case $host_os in
- mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;;
- *) lt_sed_strip_eq="s,=/,/,g" ;;
- esac
- lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq`
- case $lt_search_path_spec in
- *\;*)
- # if the path contains ";" then we assume it to be the separator
- # otherwise default to the standard path separator (i.e. ":") - it is
- # assumed that no part of a normal pathname contains ";" but that should
- # okay in the real world where ";" in dirpaths is itself problematic.
- lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'`
- ;;
- *)
- lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"`
- ;;
- esac
- # Ok, now we have the path, separated by spaces, we can step through it
- # and add multilib dir if necessary.
- lt_tmp_lt_search_path_spec=
- lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
- for lt_sys_path in $lt_search_path_spec; do
- if test -d "$lt_sys_path/$lt_multi_os_dir"; then
- lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
- else
- test -d "$lt_sys_path" && \
- lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
- fi
- done
- lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk '
-BEGIN {RS=" "; FS="/|\n";} {
- lt_foo="";
- lt_count=0;
- for (lt_i = NF; lt_i > 0; lt_i--) {
- if ($lt_i != "" && $lt_i != ".") {
- if ($lt_i == "..") {
- lt_count++;
- } else {
- if (lt_count == 0) {
- lt_foo="/" $lt_i lt_foo;
- } else {
- lt_count--;
- }
- }
- }
- }
- if (lt_foo != "") { lt_freq[lt_foo]++; }
- if (lt_freq[lt_foo] == 1) { print lt_foo; }
-}'`
- # AWK program above erroneously prepends '/' to C:/dos/paths
- # for these hosts.
- case $host_os in
- mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\
- $SED 's,/\([A-Za-z]:\),\1,g'` ;;
- esac
- sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP`
-else
- sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
-fi
-library_names_spec=
-libname_spec='lib$name'
-soname_spec=
-shrext_cmds=".so"
-postinstall_cmds=
-postuninstall_cmds=
-finish_cmds=
-finish_eval=
-shlibpath_var=
-shlibpath_overrides_runpath=unknown
-version_type=none
-dynamic_linker="$host_os ld.so"
-sys_lib_dlsearch_path_spec="/lib /usr/lib"
-need_lib_prefix=unknown
-hardcode_into_libs=no
-
-# when you set need_version to no, make sure it does not cause -set_version
-# flags to be left without arguments
-need_version=unknown
-
-case $host_os in
-aix3*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
- shlibpath_var=LIBPATH
-
- # AIX 3 has no versioning support, so we append a major version to the name.
- soname_spec='${libname}${release}${shared_ext}$major'
- ;;
-
-aix[4-9]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- hardcode_into_libs=yes
- if test "$host_cpu" = ia64; then
- # AIX 5 supports IA64
- library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- else
- # With GCC up to 2.95.x, collect2 would create an import file
- # for dependence libraries. The import file would start with
- # the line `#! .'. This would cause the generated library to
- # depend on `.', always an invalid library. This was fixed in
- # development snapshots of GCC prior to 3.0.
- case $host_os in
- aix4 | aix4.[01] | aix4.[01].*)
- if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
- echo ' yes '
- echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
- :
- else
- can_build_shared=no
- fi
- ;;
- esac
- # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
- # soname into executable. Probably we can add versioning support to
- # collect2, so additional links can be useful in future.
- if test "$aix_use_runtimelinking" = yes; then
- # If using run time linking (on AIX 4.2 or later) use lib<name>.so
- # instead of lib<name>.a to let people know that these are not
- # typical AIX shared libraries.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- else
- # We preserve .a as extension for shared libraries through AIX4.2
- # and later when we are not doing run time linking.
- library_names_spec='${libname}${release}.a $libname.a'
- soname_spec='${libname}${release}${shared_ext}$major'
- fi
- shlibpath_var=LIBPATH
- fi
- ;;
-
-amigaos*)
- case $host_cpu in
- powerpc)
- # Since July 2007 AmigaOS4 officially supports .so libraries.
- # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- ;;
- m68k)
- library_names_spec='$libname.ixlibrary $libname.a'
- # Create ${libname}_ixlibrary.a entries in /sys/libs.
- finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
- ;;
- esac
- ;;
-
-beos*)
- library_names_spec='${libname}${shared_ext}'
- dynamic_linker="$host_os ld.so"
- shlibpath_var=LIBRARY_PATH
- ;;
-
-bsdi[45]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
- sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
- # the default ld.so.conf also contains /usr/contrib/lib and
- # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
- # libtool to hard-code these into programs
- ;;
-
-cygwin* | mingw* | pw32* | cegcc*)
- version_type=windows
- shrext_cmds=".dll"
- need_version=no
- need_lib_prefix=no
-
- case $GCC,$cc_basename in
- yes,*)
- # gcc
- library_names_spec='$libname.dll.a'
- # DLL is installed to $(libdir)/../bin by postinstall_cmds
- postinstall_cmds='base_file=`basename \${file}`~
- dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
- dldir=$destdir/`dirname \$dlpath`~
- test -d \$dldir || mkdir -p \$dldir~
- $install_prog $dir/$dlname \$dldir/$dlname~
- chmod a+x \$dldir/$dlname~
- if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
- eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
- fi'
- postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
- dlpath=$dir/\$dldll~
- $RM \$dlpath'
- shlibpath_overrides_runpath=yes
-
- case $host_os in
- cygwin*)
- # Cygwin DLLs use 'cyg' prefix rather than 'lib'
- soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
-
- sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"
- ;;
- mingw* | cegcc*)
- # MinGW DLLs use traditional 'lib' prefix
- soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
- ;;
- pw32*)
- # pw32 DLLs use 'pw' prefix rather than 'lib'
- library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
- ;;
- esac
- dynamic_linker='Win32 ld.exe'
- ;;
-
- *,cl*)
- # Native MSVC
- libname_spec='$name'
- soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
- library_names_spec='${libname}.dll.lib'
-
- case $build_os in
- mingw*)
- sys_lib_search_path_spec=
- lt_save_ifs=$IFS
- IFS=';'
- for lt_path in $LIB
- do
- IFS=$lt_save_ifs
- # Let DOS variable expansion print the short 8.3 style file name.
- lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
- sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
- done
- IFS=$lt_save_ifs
- # Convert to MSYS style.
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'`
- ;;
- cygwin*)
- # Convert to unix form, then to dos form, then back to unix form
- # but this time dos style (no spaces!) so that the unix form looks
- # like /cygdrive/c/PROGRA~1:/cygdr...
- sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
- sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
- sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
- ;;
- *)
- sys_lib_search_path_spec="$LIB"
- if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then
- # It is most probably a Windows format PATH.
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
- else
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
- fi
- # FIXME: find the short name or the path components, as spaces are
- # common. (e.g. "Program Files" -> "PROGRA~1")
- ;;
- esac
-
- # DLL is installed to $(libdir)/../bin by postinstall_cmds
- postinstall_cmds='base_file=`basename \${file}`~
- dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
- dldir=$destdir/`dirname \$dlpath`~
- test -d \$dldir || mkdir -p \$dldir~
- $install_prog $dir/$dlname \$dldir/$dlname'
- postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
- dlpath=$dir/\$dldll~
- $RM \$dlpath'
- shlibpath_overrides_runpath=yes
- dynamic_linker='Win32 link.exe'
- ;;
-
- *)
- # Assume MSVC wrapper
- library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
- dynamic_linker='Win32 ld.exe'
- ;;
- esac
- # FIXME: first we should search . and the directory the executable is in
- shlibpath_var=PATH
- ;;
-
-darwin* | rhapsody*)
- dynamic_linker="$host_os dyld"
- version_type=darwin
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
- soname_spec='${libname}${release}${major}$shared_ext'
- shlibpath_overrides_runpath=yes
- shlibpath_var=DYLD_LIBRARY_PATH
- shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
-
- sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"
- sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
- ;;
-
-dgux*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- ;;
-
-freebsd* | dragonfly*)
- # DragonFly does not have aout. When/if they implement a new
- # versioning mechanism, adjust this.
- if test -x /usr/bin/objformat; then
- objformat=`/usr/bin/objformat`
- else
- case $host_os in
- freebsd[23].*) objformat=aout ;;
- *) objformat=elf ;;
- esac
- fi
- # Handle Gentoo/FreeBSD as it was Linux
- case $host_vendor in
- gentoo)
- version_type=linux ;;
- *)
- version_type=freebsd-$objformat ;;
- esac
-
- case $version_type in
- freebsd-elf*)
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
- need_version=no
- need_lib_prefix=no
- ;;
- freebsd-*)
- library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
- need_version=yes
- ;;
- linux)
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- need_lib_prefix=no
- need_version=no
- ;;
- esac
- shlibpath_var=LD_LIBRARY_PATH
- case $host_os in
- freebsd2.*)
- shlibpath_overrides_runpath=yes
- ;;
- freebsd3.[01]* | freebsdelf3.[01]*)
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
- freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
- freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
- *) # from 4.6 on, and DragonFly
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
- esac
- ;;
-
-gnu*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-haiku*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- dynamic_linker="$host_os runtime_loader"
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
- hardcode_into_libs=yes
- ;;
-
-hpux9* | hpux10* | hpux11*)
- # Give a soname corresponding to the major version so that dld.sl refuses to
- # link against other versions.
- version_type=sunos
- need_lib_prefix=no
- need_version=no
- case $host_cpu in
- ia64*)
- shrext_cmds='.so'
- hardcode_into_libs=yes
- dynamic_linker="$host_os dld.so"
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- if test "X$HPUX_IA64_MODE" = X32; then
- sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
- else
- sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
- fi
- sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
- ;;
- hppa*64*)
- shrext_cmds='.sl'
- hardcode_into_libs=yes
- dynamic_linker="$host_os dld.sl"
- shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
- shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
- sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
- ;;
- *)
- shrext_cmds='.sl'
- dynamic_linker="$host_os dld.sl"
- shlibpath_var=SHLIB_PATH
- shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- ;;
- esac
- # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
- postinstall_cmds='chmod 555 $lib'
- # or fails outright, so override atomically:
- install_override_mode=555
- ;;
-
-interix[3-9]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-irix5* | irix6* | nonstopux*)
- case $host_os in
- nonstopux*) version_type=nonstopux ;;
- *)
- if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux # correct to gnu/linux during the next big refactor
- else
- version_type=irix
- fi ;;
- esac
- need_lib_prefix=no
- need_version=no
- soname_spec='${libname}${release}${shared_ext}$major'
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
- case $host_os in
- irix5* | nonstopux*)
- libsuff= shlibsuff=
- ;;
- *)
- case $LD in # libtool.m4 will add one of these switches to LD
- *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
- libsuff= shlibsuff= libmagic=32-bit;;
- *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
- libsuff=32 shlibsuff=N32 libmagic=N32;;
- *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
- libsuff=64 shlibsuff=64 libmagic=64-bit;;
- *) libsuff= shlibsuff= libmagic=never-match;;
- esac
- ;;
- esac
- shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
- shlibpath_overrides_runpath=no
- sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
- sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
- hardcode_into_libs=yes
- ;;
-
-# No shared lib support for Linux oldld, aout, or coff.
-linux*oldld* | linux*aout* | linux*coff*)
- dynamic_linker=no
- ;;
-
-# This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
-
- # Some binutils ld are patched to set DT_RUNPATH
- if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_shlibpath_overrides_runpath=no
- save_LDFLAGS=$LDFLAGS
- save_libdir=$libdir
- eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \
- LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\""
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then :
- lt_cv_shlibpath_overrides_runpath=yes
-fi
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- LDFLAGS=$save_LDFLAGS
- libdir=$save_libdir
-
-fi
-
- shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
-
- # This implies no fast_install, which is unacceptable.
- # Some rework will be needed to allow for fast_install
- # before this can be enabled.
- hardcode_into_libs=yes
-
- # Append ld.so.conf contents to the search path
- if test -f /etc/ld.so.conf; then
- lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
- sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
- fi
-
- # We used to test for /lib/ld.so.1 and disable shared libraries on
- # powerpc, because MkLinux only supported shared libraries with the
- # GNU dynamic linker. Since this was broken with cross compilers,
- # most powerpc-linux boxes support dynamic linking these days and
- # people can always --disable-shared, the test was removed, and we
- # assume the GNU/Linux dynamic linker is in use.
- dynamic_linker='GNU/Linux ld.so'
- ;;
-
-netbsd*)
- version_type=sunos
- need_lib_prefix=no
- need_version=no
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
- dynamic_linker='NetBSD (a.out) ld.so'
- else
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- dynamic_linker='NetBSD ld.elf_so'
- fi
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
-
-newsos6)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- ;;
-
-*nto* | *qnx*)
- version_type=qnx
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- dynamic_linker='ldqnx.so'
- ;;
-
-openbsd*)
- version_type=sunos
- sys_lib_dlsearch_path_spec="/usr/lib"
- need_lib_prefix=no
- # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
- case $host_os in
- openbsd3.3 | openbsd3.3.*) need_version=yes ;;
- *) need_version=no ;;
- esac
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- case $host_os in
- openbsd2.[89] | openbsd2.[89].*)
- shlibpath_overrides_runpath=no
- ;;
- *)
- shlibpath_overrides_runpath=yes
- ;;
- esac
- else
- shlibpath_overrides_runpath=yes
- fi
- ;;
-
-os2*)
- libname_spec='$name'
- shrext_cmds=".dll"
- need_lib_prefix=no
- library_names_spec='$libname${shared_ext} $libname.a'
- dynamic_linker='OS/2 ld.exe'
- shlibpath_var=LIBPATH
- ;;
-
-osf3* | osf4* | osf5*)
- version_type=osf
- need_lib_prefix=no
- need_version=no
- soname_spec='${libname}${release}${shared_ext}$major'
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
- sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
- ;;
-
-rdos*)
- dynamic_linker=no
- ;;
-
-solaris*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- # ldd complains unless libraries are executable
- postinstall_cmds='chmod +x $lib'
- ;;
-
-sunos4*)
- version_type=sunos
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- if test "$with_gnu_ld" = yes; then
- need_lib_prefix=no
- fi
- need_version=yes
- ;;
-
-sysv4 | sysv4.3*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- case $host_vendor in
- sni)
- shlibpath_overrides_runpath=no
- need_lib_prefix=no
- runpath_var=LD_RUN_PATH
- ;;
- siemens)
- need_lib_prefix=no
- ;;
- motorola)
- need_lib_prefix=no
- need_version=no
- shlibpath_overrides_runpath=no
- sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
- ;;
- esac
- ;;
-
-sysv4*MP*)
- if test -d /usr/nec ;then
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
- soname_spec='$libname${shared_ext}.$major'
- shlibpath_var=LD_LIBRARY_PATH
- fi
- ;;
-
-sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
- version_type=freebsd-elf
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- if test "$with_gnu_ld" = yes; then
- sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
- else
- sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
- case $host_os in
- sco3.2v5*)
- sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
- ;;
- esac
- fi
- sys_lib_dlsearch_path_spec='/usr/lib'
- ;;
-
-tpf*)
- # TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-uts4*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- ;;
-
-*)
- dynamic_linker=no
- ;;
-esac
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5
-$as_echo "$dynamic_linker" >&6; }
-test "$dynamic_linker" = no && can_build_shared=no
-
-variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
-if test "$GCC" = yes; then
- variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
-fi
-
-if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
- sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
-fi
-if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
- sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
-fi
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5
-$as_echo_n "checking how to hardcode library paths into programs... " >&6; }
-hardcode_action=
-if test -n "$hardcode_libdir_flag_spec" ||
- test -n "$runpath_var" ||
- test "X$hardcode_automatic" = "Xyes" ; then
-
- # We can hardcode non-existent directories.
- if test "$hardcode_direct" != no &&
- # If the only mechanism to avoid hardcoding is shlibpath_var, we
- # have to relink, otherwise we might link with an installed library
- # when we should be linking with a yet-to-be-installed one
- ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no &&
- test "$hardcode_minus_L" != no; then
- # Linking always hardcodes the temporary library directory.
- hardcode_action=relink
- else
- # We can link without hardcoding, and we can hardcode nonexisting dirs.
- hardcode_action=immediate
- fi
-else
- # We cannot hardcode anything, or else we can only hardcode existing
- # directories.
- hardcode_action=unsupported
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5
-$as_echo "$hardcode_action" >&6; }
-
-if test "$hardcode_action" = relink ||
- test "$inherit_rpath" = yes; then
- # Fast installation is not supported
- enable_fast_install=no
-elif test "$shlibpath_overrides_runpath" = yes ||
- test "$enable_shared" = no; then
- # Fast installation is not necessary
- enable_fast_install=needless
-fi
-
-
-
-
-
-
- if test "x$enable_dlopen" != xyes; then
- enable_dlopen=unknown
- enable_dlopen_self=unknown
- enable_dlopen_self_static=unknown
-else
- lt_cv_dlopen=no
- lt_cv_dlopen_libs=
-
- case $host_os in
- beos*)
- lt_cv_dlopen="load_add_on"
- lt_cv_dlopen_libs=
- lt_cv_dlopen_self=yes
- ;;
-
- mingw* | pw32* | cegcc*)
- lt_cv_dlopen="LoadLibrary"
- lt_cv_dlopen_libs=
- ;;
-
- cygwin*)
- lt_cv_dlopen="dlopen"
- lt_cv_dlopen_libs=
- ;;
-
- darwin*)
- # if libdl is installed we need to link against it
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
-$as_echo_n "checking for dlopen in -ldl... " >&6; }
-if ${ac_cv_lib_dl_dlopen+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-ldl $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char dlopen ();
-int
-main ()
-{
-return dlopen ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_dl_dlopen=yes
-else
- ac_cv_lib_dl_dlopen=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
-$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
-if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
- lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
-else
-
- lt_cv_dlopen="dyld"
- lt_cv_dlopen_libs=
- lt_cv_dlopen_self=yes
-
-fi
-
- ;;
-
- *)
- ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load"
-if test "x$ac_cv_func_shl_load" = xyes; then :
- lt_cv_dlopen="shl_load"
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5
-$as_echo_n "checking for shl_load in -ldld... " >&6; }
-if ${ac_cv_lib_dld_shl_load+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-ldld $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char shl_load ();
-int
-main ()
-{
-return shl_load ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_dld_shl_load=yes
-else
- ac_cv_lib_dld_shl_load=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5
-$as_echo "$ac_cv_lib_dld_shl_load" >&6; }
-if test "x$ac_cv_lib_dld_shl_load" = xyes; then :
- lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"
-else
- ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen"
-if test "x$ac_cv_func_dlopen" = xyes; then :
- lt_cv_dlopen="dlopen"
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
-$as_echo_n "checking for dlopen in -ldl... " >&6; }
-if ${ac_cv_lib_dl_dlopen+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-ldl $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char dlopen ();
-int
-main ()
-{
-return dlopen ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_dl_dlopen=yes
-else
- ac_cv_lib_dl_dlopen=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
-$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
-if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
- lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5
-$as_echo_n "checking for dlopen in -lsvld... " >&6; }
-if ${ac_cv_lib_svld_dlopen+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lsvld $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char dlopen ();
-int
-main ()
-{
-return dlopen ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_svld_dlopen=yes
-else
- ac_cv_lib_svld_dlopen=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5
-$as_echo "$ac_cv_lib_svld_dlopen" >&6; }
-if test "x$ac_cv_lib_svld_dlopen" = xyes; then :
- lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5
-$as_echo_n "checking for dld_link in -ldld... " >&6; }
-if ${ac_cv_lib_dld_dld_link+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-ldld $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char dld_link ();
-int
-main ()
-{
-return dld_link ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_dld_dld_link=yes
-else
- ac_cv_lib_dld_dld_link=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5
-$as_echo "$ac_cv_lib_dld_dld_link" >&6; }
-if test "x$ac_cv_lib_dld_dld_link" = xyes; then :
- lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"
-fi
-
-
-fi
-
-
-fi
-
-
-fi
-
-
-fi
-
-
-fi
-
- ;;
- esac
-
- if test "x$lt_cv_dlopen" != xno; then
- enable_dlopen=yes
- else
- enable_dlopen=no
- fi
-
- case $lt_cv_dlopen in
- dlopen)
- save_CPPFLAGS="$CPPFLAGS"
- test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
-
- save_LDFLAGS="$LDFLAGS"
- wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
-
- save_LIBS="$LIBS"
- LIBS="$lt_cv_dlopen_libs $LIBS"
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5
-$as_echo_n "checking whether a program can dlopen itself... " >&6; }
-if ${lt_cv_dlopen_self+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test "$cross_compiling" = yes; then :
- lt_cv_dlopen_self=cross
-else
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<_LT_EOF
-#line $LINENO "configure"
-#include "confdefs.h"
-
-#if HAVE_DLFCN_H
-#include <dlfcn.h>
-#endif
-
-#include <stdio.h>
-
-#ifdef RTLD_GLOBAL
-# define LT_DLGLOBAL RTLD_GLOBAL
-#else
-# ifdef DL_GLOBAL
-# define LT_DLGLOBAL DL_GLOBAL
-# else
-# define LT_DLGLOBAL 0
-# endif
-#endif
-
-/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
- find out it does not work in some platform. */
-#ifndef LT_DLLAZY_OR_NOW
-# ifdef RTLD_LAZY
-# define LT_DLLAZY_OR_NOW RTLD_LAZY
-# else
-# ifdef DL_LAZY
-# define LT_DLLAZY_OR_NOW DL_LAZY
-# else
-# ifdef RTLD_NOW
-# define LT_DLLAZY_OR_NOW RTLD_NOW
-# else
-# ifdef DL_NOW
-# define LT_DLLAZY_OR_NOW DL_NOW
-# else
-# define LT_DLLAZY_OR_NOW 0
-# endif
-# endif
-# endif
-# endif
-#endif
-
-/* When -fvisbility=hidden is used, assume the code has been annotated
- correspondingly for the symbols needed. */
-#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
-int fnord () __attribute__((visibility("default")));
-#endif
-
-int fnord () { return 42; }
-int main ()
-{
- void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
- int status = $lt_dlunknown;
-
- if (self)
- {
- if (dlsym (self,"fnord")) status = $lt_dlno_uscore;
- else
- {
- if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
- else puts (dlerror ());
- }
- /* dlclose (self); */
- }
- else
- puts (dlerror ());
-
- return status;
-}
-_LT_EOF
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then
- (./conftest; exit; ) >&5 2>/dev/null
- lt_status=$?
- case x$lt_status in
- x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;;
- x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;;
- x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;;
- esac
- else :
- # compilation failed
- lt_cv_dlopen_self=no
- fi
-fi
-rm -fr conftest*
-
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5
-$as_echo "$lt_cv_dlopen_self" >&6; }
-
- if test "x$lt_cv_dlopen_self" = xyes; then
- wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5
-$as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; }
-if ${lt_cv_dlopen_self_static+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test "$cross_compiling" = yes; then :
- lt_cv_dlopen_self_static=cross
-else
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<_LT_EOF
-#line $LINENO "configure"
-#include "confdefs.h"
-
-#if HAVE_DLFCN_H
-#include <dlfcn.h>
-#endif
-
-#include <stdio.h>
-
-#ifdef RTLD_GLOBAL
-# define LT_DLGLOBAL RTLD_GLOBAL
-#else
-# ifdef DL_GLOBAL
-# define LT_DLGLOBAL DL_GLOBAL
-# else
-# define LT_DLGLOBAL 0
-# endif
-#endif
-
-/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
- find out it does not work in some platform. */
-#ifndef LT_DLLAZY_OR_NOW
-# ifdef RTLD_LAZY
-# define LT_DLLAZY_OR_NOW RTLD_LAZY
-# else
-# ifdef DL_LAZY
-# define LT_DLLAZY_OR_NOW DL_LAZY
-# else
-# ifdef RTLD_NOW
-# define LT_DLLAZY_OR_NOW RTLD_NOW
-# else
-# ifdef DL_NOW
-# define LT_DLLAZY_OR_NOW DL_NOW
-# else
-# define LT_DLLAZY_OR_NOW 0
-# endif
-# endif
-# endif
-# endif
-#endif
-
-/* When -fvisbility=hidden is used, assume the code has been annotated
- correspondingly for the symbols needed. */
-#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
-int fnord () __attribute__((visibility("default")));
-#endif
-
-int fnord () { return 42; }
-int main ()
-{
- void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
- int status = $lt_dlunknown;
-
- if (self)
- {
- if (dlsym (self,"fnord")) status = $lt_dlno_uscore;
- else
- {
- if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
- else puts (dlerror ());
- }
- /* dlclose (self); */
- }
- else
- puts (dlerror ());
-
- return status;
-}
-_LT_EOF
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then
- (./conftest; exit; ) >&5 2>/dev/null
- lt_status=$?
- case x$lt_status in
- x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;;
- x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;;
- x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;;
- esac
- else :
- # compilation failed
- lt_cv_dlopen_self_static=no
- fi
-fi
-rm -fr conftest*
-
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5
-$as_echo "$lt_cv_dlopen_self_static" >&6; }
- fi
-
- CPPFLAGS="$save_CPPFLAGS"
- LDFLAGS="$save_LDFLAGS"
- LIBS="$save_LIBS"
- ;;
- esac
-
- case $lt_cv_dlopen_self in
- yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
- *) enable_dlopen_self=unknown ;;
- esac
-
- case $lt_cv_dlopen_self_static in
- yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
- *) enable_dlopen_self_static=unknown ;;
- esac
-fi
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-striplib=
-old_striplib=
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5
-$as_echo_n "checking whether stripping libraries is possible... " >&6; }
-if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then
- test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
- test -z "$striplib" && striplib="$STRIP --strip-unneeded"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-else
-# FIXME - insert some real tests, host_os isn't really good enough
- case $host_os in
- darwin*)
- if test -n "$STRIP" ; then
- striplib="$STRIP -x"
- old_striplib="$STRIP -S"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
- else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
- fi
- ;;
- *)
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
- ;;
- esac
-fi
-
-
-
-
-
-
-
-
-
-
-
-
- # Report which library types will actually be built
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5
-$as_echo_n "checking if libtool supports shared libraries... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5
-$as_echo "$can_build_shared" >&6; }
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5
-$as_echo_n "checking whether to build shared libraries... " >&6; }
- test "$can_build_shared" = "no" && enable_shared=no
-
- # On AIX, shared libraries and static libraries use the same namespace, and
- # are all built from PIC.
- case $host_os in
- aix3*)
- test "$enable_shared" = yes && enable_static=no
- if test -n "$RANLIB"; then
- archive_cmds="$archive_cmds~\$RANLIB \$lib"
- postinstall_cmds='$RANLIB $lib'
- fi
- ;;
-
- aix[4-9]*)
- if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
- test "$enable_shared" = yes && enable_static=no
- fi
- ;;
- esac
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5
-$as_echo "$enable_shared" >&6; }
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5
-$as_echo_n "checking whether to build static libraries... " >&6; }
- # Make sure either enable_shared or enable_static is yes.
- test "$enable_shared" = yes || enable_static=yes
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5
-$as_echo "$enable_static" >&6; }
-
-
-
-
-fi
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-CC="$lt_save_CC"
-
- if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
- ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
- (test "X$CXX" != "Xg++"))) ; then
- ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
-$as_echo_n "checking how to run the C++ preprocessor... " >&6; }
-if test -z "$CXXCPP"; then
- if ${ac_cv_prog_CXXCPP+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- # Double quotes because CXXCPP needs to be expanded
- for CXXCPP in "$CXX -E" "/lib/cpp"
- do
- ac_preproc_ok=false
-for ac_cxx_preproc_warn_flag in '' yes
-do
- # Use a header file that comes with gcc, so configuring glibc
- # with a fresh cross-compiler works.
- # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- # <limits.h> exists even on freestanding compilers.
- # On the NeXT, cc -E runs the code through the compiler's parser,
- # not just through cpp. "Syntax error" is here to catch this case.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
- Syntax error
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
-
-else
- # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
- # OK, works on sane cases. Now check whether nonexistent headers
- # can be detected and how.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <ac_nonexistent.h>
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
- # Broken: success on invalid input.
-continue
-else
- # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.i conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
- break
-fi
-
- done
- ac_cv_prog_CXXCPP=$CXXCPP
-
-fi
- CXXCPP=$ac_cv_prog_CXXCPP
-else
- ac_cv_prog_CXXCPP=$CXXCPP
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5
-$as_echo "$CXXCPP" >&6; }
-ac_preproc_ok=false
-for ac_cxx_preproc_warn_flag in '' yes
-do
- # Use a header file that comes with gcc, so configuring glibc
- # with a fresh cross-compiler works.
- # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- # <limits.h> exists even on freestanding compilers.
- # On the NeXT, cc -E runs the code through the compiler's parser,
- # not just through cpp. "Syntax error" is here to catch this case.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
- Syntax error
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
-
-else
- # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
- # OK, works on sane cases. Now check whether nonexistent headers
- # can be detected and how.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <ac_nonexistent.h>
-_ACEOF
-if ac_fn_cxx_try_cpp "$LINENO"; then :
- # Broken: success on invalid input.
-continue
-else
- # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.i conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.i conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then :
-
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-else
- _lt_caught_CXX_error=yes
-fi
-
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-archive_cmds_need_lc_CXX=no
-allow_undefined_flag_CXX=
-always_export_symbols_CXX=no
-archive_expsym_cmds_CXX=
-compiler_needs_object_CXX=no
-export_dynamic_flag_spec_CXX=
-hardcode_direct_CXX=no
-hardcode_direct_absolute_CXX=no
-hardcode_libdir_flag_spec_CXX=
-hardcode_libdir_separator_CXX=
-hardcode_minus_L_CXX=no
-hardcode_shlibpath_var_CXX=unsupported
-hardcode_automatic_CXX=no
-inherit_rpath_CXX=no
-module_cmds_CXX=
-module_expsym_cmds_CXX=
-link_all_deplibs_CXX=unknown
-old_archive_cmds_CXX=$old_archive_cmds
-reload_flag_CXX=$reload_flag
-reload_cmds_CXX=$reload_cmds
-no_undefined_flag_CXX=
-whole_archive_flag_spec_CXX=
-enable_shared_with_static_runtimes_CXX=no
-
-# Source file extension for C++ test sources.
-ac_ext=cpp
-
-# Object file extension for compiled C++ test sources.
-objext=o
-objext_CXX=$objext
-
-# No sense in running all these tests if we already determined that
-# the CXX compiler isn't working. Some variables (like enable_shared)
-# are currently assumed to apply to all compilers on this platform,
-# and will be corrupted by setting them based on a non-working compiler.
-if test "$_lt_caught_CXX_error" != yes; then
- # Code to be used in simple compile tests
- lt_simple_compile_test_code="int some_variable = 0;"
-
- # Code to be used in simple link tests
- lt_simple_link_test_code='int main(int, char *[]) { return(0); }'
-
- # ltmain only uses $CC for tagged configurations so make sure $CC is set.
-
-
-
-
-
-
-# If no C compiler was specified, use CC.
-LTCC=${LTCC-"$CC"}
-
-# If no C compiler flags were specified, use CFLAGS.
-LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
-
-# Allow CC to be a program name with arguments.
-compiler=$CC
-
-
- # save warnings/boilerplate of simple test code
- ac_outfile=conftest.$ac_objext
-echo "$lt_simple_compile_test_code" >conftest.$ac_ext
-eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
-_lt_compiler_boilerplate=`cat conftest.err`
-$RM conftest*
-
- ac_outfile=conftest.$ac_objext
-echo "$lt_simple_link_test_code" >conftest.$ac_ext
-eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
-_lt_linker_boilerplate=`cat conftest.err`
-$RM -r conftest*
-
-
- # Allow CC to be a program name with arguments.
- lt_save_CC=$CC
- lt_save_CFLAGS=$CFLAGS
- lt_save_LD=$LD
- lt_save_GCC=$GCC
- GCC=$GXX
- lt_save_with_gnu_ld=$with_gnu_ld
- lt_save_path_LD=$lt_cv_path_LD
- if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
- lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
- else
- $as_unset lt_cv_prog_gnu_ld
- fi
- if test -n "${lt_cv_path_LDCXX+set}"; then
- lt_cv_path_LD=$lt_cv_path_LDCXX
- else
- $as_unset lt_cv_path_LD
- fi
- test -z "${LDCXX+set}" || LD=$LDCXX
- CC=${CXX-"c++"}
- CFLAGS=$CXXFLAGS
- compiler=$CC
- compiler_CXX=$CC
- for cc_temp in $compiler""; do
- case $cc_temp in
- compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
- distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
- \-*) ;;
- *) break;;
- esac
-done
-cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
-
-
- if test -n "$compiler"; then
- # We don't want -fno-exception when compiling C++ code, so set the
- # no_builtin_flag separately
- if test "$GXX" = yes; then
- lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin'
- else
- lt_prog_compiler_no_builtin_flag_CXX=
- fi
-
- if test "$GXX" = yes; then
- # Set up default GNU C++ configuration
-
-
-
-# Check whether --with-gnu-ld was given.
-if test "${with_gnu_ld+set}" = set; then :
- withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes
-else
- with_gnu_ld=no
-fi
-
-ac_prog=ld
-if test "$GCC" = yes; then
- # Check if gcc -print-prog-name=ld gives a path.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5
-$as_echo_n "checking for ld used by $CC... " >&6; }
- case $host in
- *-*-mingw*)
- # gcc leaves a trailing carriage return which upsets mingw
- ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
- *)
- ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
- esac
- case $ac_prog in
- # Accept absolute paths.
- [\\/]* | ?:[\\/]*)
- re_direlt='/[^/][^/]*/\.\./'
- # Canonicalize the pathname of ld
- ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
- while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
- ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
- done
- test -z "$LD" && LD="$ac_prog"
- ;;
- "")
- # If it fails, then pretend we aren't using GCC.
- ac_prog=ld
- ;;
- *)
- # If it is relative, then search for the first ld in PATH.
- with_gnu_ld=unknown
- ;;
- esac
-elif test "$with_gnu_ld" = yes; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5
-$as_echo_n "checking for GNU ld... " >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
-$as_echo_n "checking for non-GNU ld... " >&6; }
-fi
-if ${lt_cv_path_LD+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if test -z "$LD"; then
- lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
- for ac_dir in $PATH; do
- IFS="$lt_save_ifs"
- test -z "$ac_dir" && ac_dir=.
- if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
- lt_cv_path_LD="$ac_dir/$ac_prog"
- # Check to see if the program is GNU ld. I'd rather use --version,
- # but apparently some variants of GNU ld only accept -v.
- # Break only if it was the GNU/non-GNU ld that we prefer.
- case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
- *GNU* | *'with BFD'*)
- test "$with_gnu_ld" != no && break
- ;;
- *)
- test "$with_gnu_ld" != yes && break
- ;;
- esac
- fi
- done
- IFS="$lt_save_ifs"
-else
- lt_cv_path_LD="$LD" # Let the user override the test with a path.
-fi
-fi
-
-LD="$lt_cv_path_LD"
-if test -n "$LD"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LD" >&5
-$as_echo "$LD" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
-$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
-if ${lt_cv_prog_gnu_ld+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- # I'd rather use --version here, but apparently some GNU lds only accept -v.
-case `$LD -v 2>&1 </dev/null` in
-*GNU* | *'with BFD'*)
- lt_cv_prog_gnu_ld=yes
- ;;
-*)
- lt_cv_prog_gnu_ld=no
- ;;
-esac
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_gnu_ld" >&5
-$as_echo "$lt_cv_prog_gnu_ld" >&6; }
-with_gnu_ld=$lt_cv_prog_gnu_ld
-
-
-
-
-
-
-
- # Check if GNU C++ uses GNU ld as the underlying linker, since the
- # archiving commands below assume that GNU ld is being used.
- if test "$with_gnu_ld" = yes; then
- archive_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-
- hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
- export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
-
- # If archive_cmds runs LD, not CC, wlarc should be empty
- # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
- # investigate it a little bit more. (MM)
- wlarc='${wl}'
-
- # ancient GNU ld didn't support --whole-archive et. al.
- if eval "`$CC -print-prog-name=ld` --help 2>&1" |
- $GREP 'no-whole-archive' > /dev/null; then
- whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
- else
- whole_archive_flag_spec_CXX=
- fi
- else
- with_gnu_ld=no
- wlarc=
-
- # A generic and very simple default shared library creation
- # command for GNU C++ for the case where it uses the native
- # linker, instead of GNU ld. If possible, this setting should
- # overridden to take advantage of the native linker features on
- # the platform it is being used on.
- archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
- fi
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
-
- else
- GXX=no
- with_gnu_ld=no
- wlarc=
- fi
-
- # PORTME: fill in a description of your system's C++ link characteristics
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
-$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
- ld_shlibs_CXX=yes
- case $host_os in
- aix3*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- aix[4-9]*)
- if test "$host_cpu" = ia64; then
- # On IA64, the linker does run time linking by default, so we don't
- # have to do anything special.
- aix_use_runtimelinking=no
- exp_sym_flag='-Bexport'
- no_entry_flag=""
- else
- aix_use_runtimelinking=no
-
- # Test if we are trying to use run time linking or normal
- # AIX style linking. If -brtl is somewhere in LDFLAGS, we
- # need to do runtime linking.
- case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
- for ld_flag in $LDFLAGS; do
- case $ld_flag in
- *-brtl*)
- aix_use_runtimelinking=yes
- break
- ;;
- esac
- done
- ;;
- esac
-
- exp_sym_flag='-bexport'
- no_entry_flag='-bnoentry'
- fi
-
- # When large executables or shared objects are built, AIX ld can
- # have problems creating the table of contents. If linking a library
- # or program results in "error TOC overflow" add -mminimal-toc to
- # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not
- # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
-
- archive_cmds_CXX=''
- hardcode_direct_CXX=yes
- hardcode_direct_absolute_CXX=yes
- hardcode_libdir_separator_CXX=':'
- link_all_deplibs_CXX=yes
- file_list_spec_CXX='${wl}-f,'
-
- if test "$GXX" = yes; then
- case $host_os in aix4.[012]|aix4.[012].*)
- # We only want to do this on AIX 4.2 and lower, the check
- # below for broken collect2 doesn't work under 4.3+
- collect2name=`${CC} -print-prog-name=collect2`
- if test -f "$collect2name" &&
- strings "$collect2name" | $GREP resolve_lib_name >/dev/null
- then
- # We have reworked collect2
- :
- else
- # We have old collect2
- hardcode_direct_CXX=unsupported
- # It fails to find uninstalled libraries when the uninstalled
- # path is not listed in the libpath. Setting hardcode_minus_L
- # to unsupported forces relinking
- hardcode_minus_L_CXX=yes
- hardcode_libdir_flag_spec_CXX='-L$libdir'
- hardcode_libdir_separator_CXX=
- fi
- esac
- shared_flag='-shared'
- if test "$aix_use_runtimelinking" = yes; then
- shared_flag="$shared_flag "'${wl}-G'
- fi
- else
- # not using gcc
- if test "$host_cpu" = ia64; then
- # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
- # chokes on -Wl,-G. The following line is correct:
- shared_flag='-G'
- else
- if test "$aix_use_runtimelinking" = yes; then
- shared_flag='${wl}-G'
- else
- shared_flag='${wl}-bM:SRE'
- fi
- fi
- fi
-
- export_dynamic_flag_spec_CXX='${wl}-bexpall'
- # It seems that -bexpall does not export symbols beginning with
- # underscore (_), so it is better to generate a list of symbols to
- # export.
- always_export_symbols_CXX=yes
- if test "$aix_use_runtimelinking" = yes; then
- # Warning - without using the other runtime loading flags (-brtl),
- # -berok will link without error, but may produce a broken library.
- allow_undefined_flag_CXX='-berok'
- # Determine the default libpath from the value encoded in an empty
- # executable.
- if test "${lt_cv_aix_libpath+set}" = set; then
- aix_libpath=$lt_cv_aix_libpath
-else
- if ${lt_cv_aix_libpath__CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- lt_aix_libpath_sed='
- /Import File Strings/,/^$/ {
- /^0/ {
- s/^0 *\([^ ]*\) *$/\1/
- p
- }
- }'
- lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- # Check for a 64-bit object if we didn't find anything.
- if test -z "$lt_cv_aix_libpath__CXX"; then
- lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- fi
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- if test -z "$lt_cv_aix_libpath__CXX"; then
- lt_cv_aix_libpath__CXX="/usr/lib:/lib"
- fi
-
-fi
-
- aix_libpath=$lt_cv_aix_libpath__CXX
-fi
-
- hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath"
-
- archive_expsym_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
- else
- if test "$host_cpu" = ia64; then
- hardcode_libdir_flag_spec_CXX='${wl}-R $libdir:/usr/lib:/lib'
- allow_undefined_flag_CXX="-z nodefs"
- archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
- else
- # Determine the default libpath from the value encoded in an
- # empty executable.
- if test "${lt_cv_aix_libpath+set}" = set; then
- aix_libpath=$lt_cv_aix_libpath
-else
- if ${lt_cv_aix_libpath__CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- lt_aix_libpath_sed='
- /Import File Strings/,/^$/ {
- /^0/ {
- s/^0 *\([^ ]*\) *$/\1/
- p
- }
- }'
- lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- # Check for a 64-bit object if we didn't find anything.
- if test -z "$lt_cv_aix_libpath__CXX"; then
- lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- fi
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- if test -z "$lt_cv_aix_libpath__CXX"; then
- lt_cv_aix_libpath__CXX="/usr/lib:/lib"
- fi
-
-fi
-
- aix_libpath=$lt_cv_aix_libpath__CXX
-fi
-
- hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath"
- # Warning - without using the other run time loading flags,
- # -berok will link without error, but may produce a broken library.
- no_undefined_flag_CXX=' ${wl}-bernotok'
- allow_undefined_flag_CXX=' ${wl}-berok'
- if test "$with_gnu_ld" = yes; then
- # We only use this code for GNU lds that support --whole-archive.
- whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
- else
- # Exported symbols can be pulled into shared objects from archives
- whole_archive_flag_spec_CXX='$convenience'
- fi
- archive_cmds_need_lc_CXX=yes
- # This is similar to how AIX traditionally builds its shared
- # libraries.
- archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
- fi
- fi
- ;;
-
- beos*)
- if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- allow_undefined_flag_CXX=unsupported
- # Joseph Beckenbach <jrb3 at best.com> says some releases of gcc
- # support --undefined. This deserves some investigation. FIXME
- archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- else
- ld_shlibs_CXX=no
- fi
- ;;
-
- chorus*)
- case $cc_basename in
- *)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- esac
- ;;
-
- cygwin* | mingw* | pw32* | cegcc*)
- case $GXX,$cc_basename in
- ,cl* | no,cl*)
- # Native MSVC
- # hardcode_libdir_flag_spec is actually meaningless, as there is
- # no search path for DLLs.
- hardcode_libdir_flag_spec_CXX=' '
- allow_undefined_flag_CXX=unsupported
- always_export_symbols_CXX=yes
- file_list_spec_CXX='@'
- # Tell ltmain to make .lib files, not .a files.
- libext=lib
- # Tell ltmain to make .dll files, not .so files.
- shrext_cmds=".dll"
- # FIXME: Setting linknames here is a bad hack.
- archive_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
- archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
- $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
- else
- $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
- fi~
- $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
- linknames='
- # The linker will not automatically build a static lib if we build a DLL.
- # _LT_TAGVAR(old_archive_from_new_cmds, CXX)='true'
- enable_shared_with_static_runtimes_CXX=yes
- # Don't use ranlib
- old_postinstall_cmds_CXX='chmod 644 $oldlib'
- postlink_cmds_CXX='lt_outputfile="@OUTPUT@"~
- lt_tool_outputfile="@TOOL_OUTPUT@"~
- case $lt_outputfile in
- *.exe|*.EXE) ;;
- *)
- lt_outputfile="$lt_outputfile.exe"
- lt_tool_outputfile="$lt_tool_outputfile.exe"
- ;;
- esac~
- func_to_tool_file "$lt_outputfile"~
- if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
- $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
- $RM "$lt_outputfile.manifest";
- fi'
- ;;
- *)
- # g++
- # _LT_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless,
- # as there is no search path for DLLs.
- hardcode_libdir_flag_spec_CXX='-L$libdir'
- export_dynamic_flag_spec_CXX='${wl}--export-all-symbols'
- allow_undefined_flag_CXX=unsupported
- always_export_symbols_CXX=no
- enable_shared_with_static_runtimes_CXX=yes
-
- if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
- archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
- # If the export-symbols file already is a .def file (1st line
- # is EXPORTS), use it as is; otherwise, prepend...
- archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
- cp $export_symbols $output_objdir/$soname.def;
- else
- echo EXPORTS > $output_objdir/$soname.def;
- cat $export_symbols >> $output_objdir/$soname.def;
- fi~
- $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
- else
- ld_shlibs_CXX=no
- fi
- ;;
- esac
- ;;
- darwin* | rhapsody*)
-
-
- archive_cmds_need_lc_CXX=no
- hardcode_direct_CXX=no
- hardcode_automatic_CXX=yes
- hardcode_shlibpath_var_CXX=unsupported
- if test "$lt_cv_ld_force_load" = "yes"; then
- whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
-
- else
- whole_archive_flag_spec_CXX=''
- fi
- link_all_deplibs_CXX=yes
- allow_undefined_flag_CXX="$_lt_dar_allow_undefined"
- case $cc_basename in
- ifort*) _lt_dar_can_shared=yes ;;
- *) _lt_dar_can_shared=$GCC ;;
- esac
- if test "$_lt_dar_can_shared" = "yes"; then
- output_verbose_link_cmd=func_echo_all
- archive_cmds_CXX="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
- module_cmds_CXX="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
- archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
- module_expsym_cmds_CXX="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
- if test "$lt_cv_apple_cc_single_mod" != "yes"; then
- archive_cmds_CXX="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}"
- archive_expsym_cmds_CXX="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}"
- fi
-
- else
- ld_shlibs_CXX=no
- fi
-
- ;;
-
- dgux*)
- case $cc_basename in
- ec++*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- ghcx*)
- # Green Hills C++ Compiler
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- *)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- esac
- ;;
-
- freebsd2.*)
- # C++ shared libraries reported to be fairly broken before
- # switch to ELF
- ld_shlibs_CXX=no
- ;;
-
- freebsd-elf*)
- archive_cmds_need_lc_CXX=no
- ;;
-
- freebsd* | dragonfly*)
- # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
- # conventions
- ld_shlibs_CXX=yes
- ;;
-
- gnu*)
- ;;
-
- haiku*)
- archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- link_all_deplibs_CXX=yes
- ;;
-
- hpux9*)
- hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir'
- hardcode_libdir_separator_CXX=:
- export_dynamic_flag_spec_CXX='${wl}-E'
- hardcode_direct_CXX=yes
- hardcode_minus_L_CXX=yes # Not in the search PATH,
- # but as the default
- # location of the library.
-
- case $cc_basename in
- CC*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- aCC*)
- archive_cmds_CXX='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
- ;;
- *)
- if test "$GXX" = yes; then
- archive_cmds_CXX='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
- else
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- fi
- ;;
- esac
- ;;
-
- hpux10*|hpux11*)
- if test $with_gnu_ld = no; then
- hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir'
- hardcode_libdir_separator_CXX=:
-
- case $host_cpu in
- hppa*64*|ia64*)
- ;;
- *)
- export_dynamic_flag_spec_CXX='${wl}-E'
- ;;
- esac
- fi
- case $host_cpu in
- hppa*64*|ia64*)
- hardcode_direct_CXX=no
- hardcode_shlibpath_var_CXX=no
- ;;
- *)
- hardcode_direct_CXX=yes
- hardcode_direct_absolute_CXX=yes
- hardcode_minus_L_CXX=yes # Not in the search PATH,
- # but as the default
- # location of the library.
- ;;
- esac
-
- case $cc_basename in
- CC*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- aCC*)
- case $host_cpu in
- hppa*64*)
- archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- ia64*)
- archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- *)
- archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- esac
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
- ;;
- *)
- if test "$GXX" = yes; then
- if test $with_gnu_ld = no; then
- case $host_cpu in
- hppa*64*)
- archive_cmds_CXX='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- ia64*)
- archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- *)
- archive_cmds_CXX='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- esac
- fi
- else
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- fi
- ;;
- esac
- ;;
-
- interix[3-9]*)
- hardcode_direct_CXX=no
- hardcode_shlibpath_var_CXX=no
- hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
- export_dynamic_flag_spec_CXX='${wl}-E'
- # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
- # Instead, shared libraries are loaded at an image base (0x10000000 by
- # default) and relocated if they conflict, which is a slow very memory
- # consuming and fragmenting process. To avoid this, we pick a random,
- # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
- # time. Moving up from 0x10000000 also allows more sbrk(2) space.
- archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
- archive_expsym_cmds_CXX='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
- ;;
- irix5* | irix6*)
- case $cc_basename in
- CC*)
- # SGI C++
- archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
-
- # Archives containing C++ object files must be created using
- # "CC -ar", where "CC" is the IRIX C++ compiler. This is
- # necessary to make sure instantiated templates are included
- # in the archive.
- old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs'
- ;;
- *)
- if test "$GXX" = yes; then
- if test "$with_gnu_ld" = no; then
- archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- else
- archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib'
- fi
- fi
- link_all_deplibs_CXX=yes
- ;;
- esac
- hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
- hardcode_libdir_separator_CXX=:
- inherit_rpath_CXX=yes
- ;;
-
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
- case $cc_basename in
- KCC*)
- # Kuck and Associates, Inc. (KAI) C++ Compiler
-
- # KCC will only create a shared library if the output file
- # ends with ".so" (or ".sl" for HP-UX), so rename the library
- # to its proper name (with version) after linking.
- archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
- archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
-
- hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
- export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
-
- # Archives containing C++ object files must be created using
- # "CC -Bstatic", where "CC" is the KAI C++ compiler.
- old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs'
- ;;
- icpc* | ecpc* )
- # Intel C++
- with_gnu_ld=yes
- # version 8.0 and above of icpc choke on multiply defined symbols
- # if we add $predep_objects and $postdep_objects, however 7.1 and
- # earlier do not add the objects themselves.
- case `$CC -V 2>&1` in
- *"Version 7."*)
- archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- ;;
- *) # Version 8.0 or newer
- tmp_idyn=
- case $host_cpu in
- ia64*) tmp_idyn=' -i_dynamic';;
- esac
- archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- ;;
- esac
- archive_cmds_need_lc_CXX=no
- hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
- export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
- whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
- ;;
- pgCC* | pgcpp*)
- # Portland Group C++ compiler
- case `$CC -V` in
- *pgCC\ [1-5].* | *pgcpp\ [1-5].*)
- prelink_cmds_CXX='tpldir=Template.dir~
- rm -rf $tpldir~
- $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~
- compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"'
- old_archive_cmds_CXX='tpldir=Template.dir~
- rm -rf $tpldir~
- $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~
- $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~
- $RANLIB $oldlib'
- archive_cmds_CXX='tpldir=Template.dir~
- rm -rf $tpldir~
- $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
- $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
- archive_expsym_cmds_CXX='tpldir=Template.dir~
- rm -rf $tpldir~
- $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
- $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
- ;;
- *) # Version 6 and above use weak symbols
- archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
- archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
- ;;
- esac
-
- hardcode_libdir_flag_spec_CXX='${wl}--rpath ${wl}$libdir'
- export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
- whole_archive_flag_spec_CXX='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- ;;
- cxx*)
- # Compaq C++
- archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
- archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
-
- runpath_var=LD_RUN_PATH
- hardcode_libdir_flag_spec_CXX='-rpath $libdir'
- hardcode_libdir_separator_CXX=:
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed'
- ;;
- xl* | mpixl* | bgxl*)
- # IBM XL 8.0 on PPC, with GNU ld
- hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
- export_dynamic_flag_spec_CXX='${wl}--export-dynamic'
- archive_cmds_CXX='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- if test "x$supports_anon_versioning" = xyes; then
- archive_expsym_cmds_CXX='echo "{ global:" > $output_objdir/$libname.ver~
- cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
- echo "local: *; };" >> $output_objdir/$libname.ver~
- $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
- fi
- ;;
- *)
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ C*)
- # Sun C++ 5.9
- no_undefined_flag_CXX=' -zdefs'
- archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- archive_expsym_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols'
- hardcode_libdir_flag_spec_CXX='-R$libdir'
- whole_archive_flag_spec_CXX='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- compiler_needs_object_CXX=yes
-
- # Not sure whether something based on
- # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1
- # would be better.
- output_verbose_link_cmd='func_echo_all'
-
- # Archives containing C++ object files must be created using
- # "CC -xar", where "CC" is the Sun C++ compiler. This is
- # necessary to make sure instantiated templates are included
- # in the archive.
- old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs'
- ;;
- esac
- ;;
- esac
- ;;
-
- lynxos*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
-
- m88k*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
-
- mvs*)
- case $cc_basename in
- cxx*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- *)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- esac
- ;;
-
- netbsd*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- archive_cmds_CXX='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
- wlarc=
- hardcode_libdir_flag_spec_CXX='-R$libdir'
- hardcode_direct_CXX=yes
- hardcode_shlibpath_var_CXX=no
- fi
- # Workaround some broken pre-1.5 toolchains
- output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
- ;;
-
- *nto* | *qnx*)
- ld_shlibs_CXX=yes
- ;;
-
- openbsd2*)
- # C++ shared libraries are fairly broken
- ld_shlibs_CXX=no
- ;;
-
- openbsd*)
- if test -f /usr/libexec/ld.so; then
- hardcode_direct_CXX=yes
- hardcode_shlibpath_var_CXX=no
- hardcode_direct_absolute_CXX=yes
- archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
- hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
- if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
- export_dynamic_flag_spec_CXX='${wl}-E'
- whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
- fi
- output_verbose_link_cmd=func_echo_all
- else
- ld_shlibs_CXX=no
- fi
- ;;
-
- osf3* | osf4* | osf5*)
- case $cc_basename in
- KCC*)
- # Kuck and Associates, Inc. (KAI) C++ Compiler
-
- # KCC will only create a shared library if the output file
- # ends with ".so" (or ".sl" for HP-UX), so rename the library
- # to its proper name (with version) after linking.
- archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
-
- hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir'
- hardcode_libdir_separator_CXX=:
-
- # Archives containing C++ object files must be created using
- # the KAI C++ compiler.
- case $host in
- osf3*) old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;;
- *) old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;;
- esac
- ;;
- RCC*)
- # Rational C++ 2.4.1
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- cxx*)
- case $host in
- osf3*)
- allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*'
- archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
- ;;
- *)
- allow_undefined_flag_CXX=' -expect_unresolved \*'
- archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
- echo "-hidden">> $lib.exp~
- $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~
- $RM $lib.exp'
- hardcode_libdir_flag_spec_CXX='-rpath $libdir'
- ;;
- esac
-
- hardcode_libdir_separator_CXX=:
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
- ;;
- *)
- if test "$GXX" = yes && test "$with_gnu_ld" = no; then
- allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*'
- case $host in
- osf3*)
- archive_cmds_CXX='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- ;;
- *)
- archive_cmds_CXX='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- ;;
- esac
-
- hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir'
- hardcode_libdir_separator_CXX=:
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
-
- else
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- fi
- ;;
- esac
- ;;
-
- psos*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
-
- sunos4*)
- case $cc_basename in
- CC*)
- # Sun C++ 4.x
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- lcc*)
- # Lucid
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- *)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- esac
- ;;
-
- solaris*)
- case $cc_basename in
- CC* | sunCC*)
- # Sun C++ 4.2, 5.x and Centerline C++
- archive_cmds_need_lc_CXX=yes
- no_undefined_flag_CXX=' -zdefs'
- archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
- hardcode_libdir_flag_spec_CXX='-R$libdir'
- hardcode_shlibpath_var_CXX=no
- case $host_os in
- solaris2.[0-5] | solaris2.[0-5].*) ;;
- *)
- # The compiler driver will combine and reorder linker options,
- # but understands `-z linker_flag'.
- # Supported since Solaris 2.6 (maybe 2.5.1?)
- whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract'
- ;;
- esac
- link_all_deplibs_CXX=yes
-
- output_verbose_link_cmd='func_echo_all'
-
- # Archives containing C++ object files must be created using
- # "CC -xar", where "CC" is the Sun C++ compiler. This is
- # necessary to make sure instantiated templates are included
- # in the archive.
- old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs'
- ;;
- gcx*)
- # Green Hills C++ Compiler
- archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
-
- # The C++ compiler must be used to create the archive.
- old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
- ;;
- *)
- # GNU C++ compiler with Solaris linker
- if test "$GXX" = yes && test "$with_gnu_ld" = no; then
- no_undefined_flag_CXX=' ${wl}-z ${wl}defs'
- if $CC --version | $GREP -v '^2\.7' > /dev/null; then
- archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
- archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
- else
- # g++ 2.7 appears to require `-G' NOT `-shared' on this
- # platform.
- archive_cmds_CXX='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
- archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
- fi
-
- hardcode_libdir_flag_spec_CXX='${wl}-R $wl$libdir'
- case $host_os in
- solaris2.[0-5] | solaris2.[0-5].*) ;;
- *)
- whole_archive_flag_spec_CXX='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
- ;;
- esac
- fi
- ;;
- esac
- ;;
-
- sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*)
- no_undefined_flag_CXX='${wl}-z,text'
- archive_cmds_need_lc_CXX=no
- hardcode_shlibpath_var_CXX=no
- runpath_var='LD_RUN_PATH'
-
- case $cc_basename in
- CC*)
- archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- *)
- archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- esac
- ;;
-
- sysv5* | sco3.2v5* | sco5v6*)
- # Note: We can NOT use -z defs as we might desire, because we do not
- # link with -lc, and that would cause any symbols used from libc to
- # always be unresolved, which means just about no library would
- # ever link correctly. If we're not using GNU ld we use -z text
- # though, which does catch some bad symbols but isn't as heavy-handed
- # as -z defs.
- no_undefined_flag_CXX='${wl}-z,text'
- allow_undefined_flag_CXX='${wl}-z,nodefs'
- archive_cmds_need_lc_CXX=no
- hardcode_shlibpath_var_CXX=no
- hardcode_libdir_flag_spec_CXX='${wl}-R,$libdir'
- hardcode_libdir_separator_CXX=':'
- link_all_deplibs_CXX=yes
- export_dynamic_flag_spec_CXX='${wl}-Bexport'
- runpath_var='LD_RUN_PATH'
-
- case $cc_basename in
- CC*)
- archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- old_archive_cmds_CXX='$CC -Tprelink_objects $oldobjs~
- '"$old_archive_cmds_CXX"
- reload_cmds_CXX='$CC -Tprelink_objects $reload_objs~
- '"$reload_cmds_CXX"
- ;;
- *)
- archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- esac
- ;;
-
- tandem*)
- case $cc_basename in
- NCC*)
- # NonStop-UX NCC 3.20
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- *)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- esac
- ;;
-
- vxworks*)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
-
- *)
- # FIXME: insert proper C++ library support
- ld_shlibs_CXX=no
- ;;
- esac
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5
-$as_echo "$ld_shlibs_CXX" >&6; }
- test "$ld_shlibs_CXX" = no && can_build_shared=no
-
- GCC_CXX="$GXX"
- LD_CXX="$LD"
-
- ## CAVEAT EMPTOR:
- ## There is no encapsulation within the following macros, do not change
- ## the running order or otherwise move them around unless you know exactly
- ## what you are doing...
- # Dependencies to place before and after the object being linked:
-predep_objects_CXX=
-postdep_objects_CXX=
-predeps_CXX=
-postdeps_CXX=
-compiler_lib_search_path_CXX=
-
-cat > conftest.$ac_ext <<_LT_EOF
-class Foo
-{
-public:
- Foo (void) { a = 0; }
-private:
- int a;
-};
-_LT_EOF
-
-
-_lt_libdeps_save_CFLAGS=$CFLAGS
-case "$CC $CFLAGS " in #(
-*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
-*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
-*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
-esac
-
-if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; then
- # Parse the compiler output and extract the necessary
- # objects, libraries and library flags.
-
- # Sentinel used to keep track of whether or not we are before
- # the conftest object file.
- pre_test_object_deps_done=no
-
- for p in `eval "$output_verbose_link_cmd"`; do
- case ${prev}${p} in
-
- -L* | -R* | -l*)
- # Some compilers place space between "-{L,R}" and the path.
- # Remove the space.
- if test $p = "-L" ||
- test $p = "-R"; then
- prev=$p
- continue
- fi
-
- # Expand the sysroot to ease extracting the directories later.
- if test -z "$prev"; then
- case $p in
- -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;;
- -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;;
- -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;;
- esac
- fi
- case $p in
- =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;;
- esac
- if test "$pre_test_object_deps_done" = no; then
- case ${prev} in
- -L | -R)
- # Internal compiler library paths should come after those
- # provided the user. The postdeps already come after the
- # user supplied libs so there is no need to process them.
- if test -z "$compiler_lib_search_path_CXX"; then
- compiler_lib_search_path_CXX="${prev}${p}"
- else
- compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} ${prev}${p}"
- fi
- ;;
- # The "-l" case would never come before the object being
- # linked, so don't bother handling this case.
- esac
- else
- if test -z "$postdeps_CXX"; then
- postdeps_CXX="${prev}${p}"
- else
- postdeps_CXX="${postdeps_CXX} ${prev}${p}"
- fi
- fi
- prev=
- ;;
-
- *.lto.$objext) ;; # Ignore GCC LTO objects
- *.$objext)
- # This assumes that the test object file only shows up
- # once in the compiler output.
- if test "$p" = "conftest.$objext"; then
- pre_test_object_deps_done=yes
- continue
- fi
-
- if test "$pre_test_object_deps_done" = no; then
- if test -z "$predep_objects_CXX"; then
- predep_objects_CXX="$p"
- else
- predep_objects_CXX="$predep_objects_CXX $p"
- fi
- else
- if test -z "$postdep_objects_CXX"; then
- postdep_objects_CXX="$p"
- else
- postdep_objects_CXX="$postdep_objects_CXX $p"
- fi
- fi
- ;;
-
- *) ;; # Ignore the rest.
-
- esac
- done
-
- # Clean up.
- rm -f a.out a.exe
-else
- echo "libtool.m4: error: problem compiling CXX test program"
-fi
-
-$RM -f confest.$objext
-CFLAGS=$_lt_libdeps_save_CFLAGS
-
-# PORTME: override above test on systems where it is broken
-case $host_os in
-interix[3-9]*)
- # Interix 3.5 installs completely hosed .la files for C++, so rather than
- # hack all around it, let's just trust "g++" to DTRT.
- predep_objects_CXX=
- postdep_objects_CXX=
- postdeps_CXX=
- ;;
-
-linux*)
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ C*)
- # Sun C++ 5.9
-
- # The more standards-conforming stlport4 library is
- # incompatible with the Cstd library. Avoid specifying
- # it if it's in CXXFLAGS. Ignore libCrun as
- # -library=stlport4 depends on it.
- case " $CXX $CXXFLAGS " in
- *" -library=stlport4 "*)
- solaris_use_stlport4=yes
- ;;
- esac
-
- if test "$solaris_use_stlport4" != yes; then
- postdeps_CXX='-library=Cstd -library=Crun'
- fi
- ;;
- esac
- ;;
-
-solaris*)
- case $cc_basename in
- CC* | sunCC*)
- # The more standards-conforming stlport4 library is
- # incompatible with the Cstd library. Avoid specifying
- # it if it's in CXXFLAGS. Ignore libCrun as
- # -library=stlport4 depends on it.
- case " $CXX $CXXFLAGS " in
- *" -library=stlport4 "*)
- solaris_use_stlport4=yes
- ;;
- esac
-
- # Adding this requires a known-good setup of shared libraries for
- # Sun compiler versions before 5.6, else PIC objects from an old
- # archive will be linked into the output, leading to subtle bugs.
- if test "$solaris_use_stlport4" != yes; then
- postdeps_CXX='-library=Cstd -library=Crun'
- fi
- ;;
- esac
- ;;
-esac
-
-
-case " $postdeps_CXX " in
-*" -lc "*) archive_cmds_need_lc_CXX=no ;;
-esac
- compiler_lib_search_dirs_CXX=
-if test -n "${compiler_lib_search_path_CXX}"; then
- compiler_lib_search_dirs_CXX=`echo " ${compiler_lib_search_path_CXX}" | ${SED} -e 's! -L! !g' -e 's!^ !!'`
-fi
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- lt_prog_compiler_wl_CXX=
-lt_prog_compiler_pic_CXX=
-lt_prog_compiler_static_CXX=
-
-
- # C++ specific cases for pic, static, wl, etc.
- if test "$GXX" = yes; then
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_static_CXX='-static'
-
- case $host_os in
- aix*)
- # All AIX code is PIC.
- if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- lt_prog_compiler_static_CXX='-Bstatic'
- fi
- ;;
-
- amigaos*)
- case $host_cpu in
- powerpc)
- # see comment about AmigaOS4 .so support
- lt_prog_compiler_pic_CXX='-fPIC'
- ;;
- m68k)
- # FIXME: we need at least 68020 code to build shared libraries, but
- # adding the `-m68020' flag to GCC prevents building anything better,
- # like `-m68040'.
- lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4'
- ;;
- esac
- ;;
-
- beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
- # PIC is the default for these OSes.
- ;;
- mingw* | cygwin* | os2* | pw32* | cegcc*)
- # This hack is so that the source file can tell whether it is being
- # built for inclusion in a dll (and should export symbols for example).
- # Although the cygwin gcc ignores -fPIC, still need this for old-style
- # (--disable-auto-import) libraries
- lt_prog_compiler_pic_CXX='-DDLL_EXPORT'
- ;;
- darwin* | rhapsody*)
- # PIC is the default on this platform
- # Common symbols not allowed in MH_DYLIB files
- lt_prog_compiler_pic_CXX='-fno-common'
- ;;
- *djgpp*)
- # DJGPP does not support shared libraries at all
- lt_prog_compiler_pic_CXX=
- ;;
- haiku*)
- # PIC is the default for Haiku.
- # The "-static" flag exists, but is broken.
- lt_prog_compiler_static_CXX=
- ;;
- interix[3-9]*)
- # Interix 3.x gcc -fpic/-fPIC options generate broken code.
- # Instead, we relocate shared libraries at runtime.
- ;;
- sysv4*MP*)
- if test -d /usr/nec; then
- lt_prog_compiler_pic_CXX=-Kconform_pic
- fi
- ;;
- hpux*)
- # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
- # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag
- # sets the default TLS model and affects inlining.
- case $host_cpu in
- hppa*64*)
- ;;
- *)
- lt_prog_compiler_pic_CXX='-fPIC'
- ;;
- esac
- ;;
- *qnx* | *nto*)
- # QNX uses GNU C++, but need to define -shared option too, otherwise
- # it will coredump.
- lt_prog_compiler_pic_CXX='-fPIC -shared'
- ;;
- *)
- lt_prog_compiler_pic_CXX='-fPIC'
- ;;
- esac
- else
- case $host_os in
- aix[4-9]*)
- # All AIX code is PIC.
- if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- lt_prog_compiler_static_CXX='-Bstatic'
- else
- lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp'
- fi
- ;;
- chorus*)
- case $cc_basename in
- cxch68*)
- # Green Hills C++ Compiler
- # _LT_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
- ;;
- esac
- ;;
- mingw* | cygwin* | os2* | pw32* | cegcc*)
- # This hack is so that the source file can tell whether it is being
- # built for inclusion in a dll (and should export symbols for example).
- lt_prog_compiler_pic_CXX='-DDLL_EXPORT'
- ;;
- dgux*)
- case $cc_basename in
- ec++*)
- lt_prog_compiler_pic_CXX='-KPIC'
- ;;
- ghcx*)
- # Green Hills C++ Compiler
- lt_prog_compiler_pic_CXX='-pic'
- ;;
- *)
- ;;
- esac
- ;;
- freebsd* | dragonfly*)
- # FreeBSD uses GNU C++
- ;;
- hpux9* | hpux10* | hpux11*)
- case $cc_basename in
- CC*)
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_static_CXX='${wl}-a ${wl}archive'
- if test "$host_cpu" != ia64; then
- lt_prog_compiler_pic_CXX='+Z'
- fi
- ;;
- aCC*)
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_static_CXX='${wl}-a ${wl}archive'
- case $host_cpu in
- hppa*64*|ia64*)
- # +Z the default
- ;;
- *)
- lt_prog_compiler_pic_CXX='+Z'
- ;;
- esac
- ;;
- *)
- ;;
- esac
- ;;
- interix*)
- # This is c89, which is MS Visual C++ (no shared libs)
- # Anyone wants to do a port?
- ;;
- irix5* | irix6* | nonstopux*)
- case $cc_basename in
- CC*)
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_static_CXX='-non_shared'
- # CC pic flag -KPIC is the default.
- ;;
- *)
- ;;
- esac
- ;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
- case $cc_basename in
- KCC*)
- # KAI C++ Compiler
- lt_prog_compiler_wl_CXX='--backend -Wl,'
- lt_prog_compiler_pic_CXX='-fPIC'
- ;;
- ecpc* )
- # old Intel C++ for x86_64 which still supported -KPIC.
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_pic_CXX='-KPIC'
- lt_prog_compiler_static_CXX='-static'
- ;;
- icpc* )
- # Intel C++, used to be incompatible with GCC.
- # ICC 10 doesn't accept -KPIC any more.
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_pic_CXX='-fPIC'
- lt_prog_compiler_static_CXX='-static'
- ;;
- pgCC* | pgcpp*)
- # Portland Group C++ compiler
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_pic_CXX='-fpic'
- lt_prog_compiler_static_CXX='-Bstatic'
- ;;
- cxx*)
- # Compaq C++
- # Make sure the PIC flag is empty. It appears that all Alpha
- # Linux and Compaq Tru64 Unix objects are PIC.
- lt_prog_compiler_pic_CXX=
- lt_prog_compiler_static_CXX='-non_shared'
- ;;
- xlc* | xlC* | bgxl[cC]* | mpixl[cC]*)
- # IBM XL 8.0, 9.0 on PPC and BlueGene
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_pic_CXX='-qpic'
- lt_prog_compiler_static_CXX='-qstaticlink'
- ;;
- *)
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ C*)
- # Sun C++ 5.9
- lt_prog_compiler_pic_CXX='-KPIC'
- lt_prog_compiler_static_CXX='-Bstatic'
- lt_prog_compiler_wl_CXX='-Qoption ld '
- ;;
- esac
- ;;
- esac
- ;;
- lynxos*)
- ;;
- m88k*)
- ;;
- mvs*)
- case $cc_basename in
- cxx*)
- lt_prog_compiler_pic_CXX='-W c,exportall'
- ;;
- *)
- ;;
- esac
- ;;
- netbsd*)
- ;;
- *qnx* | *nto*)
- # QNX uses GNU C++, but need to define -shared option too, otherwise
- # it will coredump.
- lt_prog_compiler_pic_CXX='-fPIC -shared'
- ;;
- osf3* | osf4* | osf5*)
- case $cc_basename in
- KCC*)
- lt_prog_compiler_wl_CXX='--backend -Wl,'
- ;;
- RCC*)
- # Rational C++ 2.4.1
- lt_prog_compiler_pic_CXX='-pic'
- ;;
- cxx*)
- # Digital/Compaq C++
- lt_prog_compiler_wl_CXX='-Wl,'
- # Make sure the PIC flag is empty. It appears that all Alpha
- # Linux and Compaq Tru64 Unix objects are PIC.
- lt_prog_compiler_pic_CXX=
- lt_prog_compiler_static_CXX='-non_shared'
- ;;
- *)
- ;;
- esac
- ;;
- psos*)
- ;;
- solaris*)
- case $cc_basename in
- CC* | sunCC*)
- # Sun C++ 4.2, 5.x and Centerline C++
- lt_prog_compiler_pic_CXX='-KPIC'
- lt_prog_compiler_static_CXX='-Bstatic'
- lt_prog_compiler_wl_CXX='-Qoption ld '
- ;;
- gcx*)
- # Green Hills C++ Compiler
- lt_prog_compiler_pic_CXX='-PIC'
- ;;
- *)
- ;;
- esac
- ;;
- sunos4*)
- case $cc_basename in
- CC*)
- # Sun C++ 4.x
- lt_prog_compiler_pic_CXX='-pic'
- lt_prog_compiler_static_CXX='-Bstatic'
- ;;
- lcc*)
- # Lucid
- lt_prog_compiler_pic_CXX='-pic'
- ;;
- *)
- ;;
- esac
- ;;
- sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
- case $cc_basename in
- CC*)
- lt_prog_compiler_wl_CXX='-Wl,'
- lt_prog_compiler_pic_CXX='-KPIC'
- lt_prog_compiler_static_CXX='-Bstatic'
- ;;
- esac
- ;;
- tandem*)
- case $cc_basename in
- NCC*)
- # NonStop-UX NCC 3.20
- lt_prog_compiler_pic_CXX='-KPIC'
- ;;
- *)
- ;;
- esac
- ;;
- vxworks*)
- ;;
- *)
- lt_prog_compiler_can_build_shared_CXX=no
- ;;
- esac
- fi
-
-case $host_os in
- # For platforms which do not support PIC, -DPIC is meaningless:
- *djgpp*)
- lt_prog_compiler_pic_CXX=
- ;;
- *)
- lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC"
- ;;
-esac
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
-$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
-if ${lt_cv_prog_compiler_pic_CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_CXX" >&5
-$as_echo "$lt_cv_prog_compiler_pic_CXX" >&6; }
-lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX
-
-#
-# Check to make sure the PIC flag actually works.
-#
-if test -n "$lt_prog_compiler_pic_CXX"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5
-$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; }
-if ${lt_cv_prog_compiler_pic_works_CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_pic_works_CXX=no
- ac_outfile=conftest.$ac_objext
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
- lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- # The option is referenced via a variable to avoid confusing sed.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
- (eval "$lt_compile" 2>conftest.err)
- ac_status=$?
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- if (exit $ac_status) && test -s "$ac_outfile"; then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings other than the usual output.
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
- $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
- if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_pic_works_CXX=yes
- fi
- fi
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_CXX" >&5
-$as_echo "$lt_cv_prog_compiler_pic_works_CXX" >&6; }
-
-if test x"$lt_cv_prog_compiler_pic_works_CXX" = xyes; then
- case $lt_prog_compiler_pic_CXX in
- "" | " "*) ;;
- *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;;
- esac
-else
- lt_prog_compiler_pic_CXX=
- lt_prog_compiler_can_build_shared_CXX=no
-fi
-
-fi
-
-
-
-
-
-#
-# Check to make sure the static flag actually works.
-#
-wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
-$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
-if ${lt_cv_prog_compiler_static_works_CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_static_works_CXX=no
- save_LDFLAGS="$LDFLAGS"
- LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
- echo "$lt_simple_link_test_code" > conftest.$ac_ext
- if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
- # The linker can only warn and ignore the option if not recognized
- # So say no if there are warnings
- if test -s conftest.err; then
- # Append any errors to the config.log.
- cat conftest.err 1>&5
- $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
- $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
- if diff conftest.exp conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_static_works_CXX=yes
- fi
- else
- lt_cv_prog_compiler_static_works_CXX=yes
- fi
- fi
- $RM -r conftest*
- LDFLAGS="$save_LDFLAGS"
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_CXX" >&5
-$as_echo "$lt_cv_prog_compiler_static_works_CXX" >&6; }
-
-if test x"$lt_cv_prog_compiler_static_works_CXX" = xyes; then
- :
-else
- lt_prog_compiler_static_CXX=
-fi
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
-$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if ${lt_cv_prog_compiler_c_o_CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_c_o_CXX=no
- $RM -r conftest 2>/dev/null
- mkdir conftest
- cd conftest
- mkdir out
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
- lt_compiler_flag="-o out/conftest2.$ac_objext"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
- (eval "$lt_compile" 2>out/conftest.err)
- ac_status=$?
- cat out/conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- if (exit $ac_status) && test -s out/conftest2.$ac_objext
- then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
- $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
- if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_c_o_CXX=yes
- fi
- fi
- chmod u+w . 2>&5
- $RM conftest*
- # SGI C++ compiler will create directory out/ii_files/ for
- # template instantiation
- test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
- $RM out/* && rmdir out
- cd ..
- $RM -r conftest
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5
-$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; }
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
-$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if ${lt_cv_prog_compiler_c_o_CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_prog_compiler_c_o_CXX=no
- $RM -r conftest 2>/dev/null
- mkdir conftest
- cd conftest
- mkdir out
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
- lt_compiler_flag="-o out/conftest2.$ac_objext"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
- (eval "$lt_compile" 2>out/conftest.err)
- ac_status=$?
- cat out/conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- if (exit $ac_status) && test -s out/conftest2.$ac_objext
- then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
- $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
- if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
- lt_cv_prog_compiler_c_o_CXX=yes
- fi
- fi
- chmod u+w . 2>&5
- $RM conftest*
- # SGI C++ compiler will create directory out/ii_files/ for
- # template instantiation
- test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
- $RM out/* && rmdir out
- cd ..
- $RM -r conftest
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5
-$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; }
-
-
-
-
-hard_links="nottested"
-if test "$lt_cv_prog_compiler_c_o_CXX" = no && test "$need_locks" != no; then
- # do not overwrite the value of need_locks provided by the user
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5
-$as_echo_n "checking if we can lock with hard links... " >&6; }
- hard_links=yes
- $RM conftest*
- ln conftest.a conftest.b 2>/dev/null && hard_links=no
- touch conftest.a
- ln conftest.a conftest.b 2>&5 || hard_links=no
- ln conftest.a conftest.b 2>/dev/null && hard_links=no
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5
-$as_echo "$hard_links" >&6; }
- if test "$hard_links" = no; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5
-$as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;}
- need_locks=warn
- fi
-else
- need_locks=no
-fi
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5
-$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; }
-
- export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
- exclude_expsyms_CXX='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'
- case $host_os in
- aix[4-9]*)
- # If we're using GNU nm, then we don't want the "-C" option.
- # -C means demangle to AIX nm, but means don't demangle with GNU nm
- # Also, AIX nm treats weak defined symbols like other global defined
- # symbols, whereas GNU nm marks them as "W".
- if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
- export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
- else
- export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
- fi
- ;;
- pw32*)
- export_symbols_cmds_CXX="$ltdll_cmds"
- ;;
- cygwin* | mingw* | cegcc*)
- case $cc_basename in
- cl*)
- exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
- ;;
- *)
- export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
- exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
- ;;
- esac
- ;;
- *)
- export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
- ;;
- esac
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5
-$as_echo "$ld_shlibs_CXX" >&6; }
-test "$ld_shlibs_CXX" = no && can_build_shared=no
-
-with_gnu_ld_CXX=$with_gnu_ld
-
-
-
-
-
-
-#
-# Do we need to explicitly link libc?
-#
-case "x$archive_cmds_need_lc_CXX" in
-x|xyes)
- # Assume -lc should be added
- archive_cmds_need_lc_CXX=yes
-
- if test "$enable_shared" = yes && test "$GCC" = yes; then
- case $archive_cmds_CXX in
- *'~'*)
- # FIXME: we may have to deal with multi-command sequences.
- ;;
- '$CC '*)
- # Test whether the compiler implicitly links with -lc since on some
- # systems, -lgcc has to come before -lc. If gcc already passes -lc
- # to ld, don't add -lc before -lgcc.
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
-$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
-if ${lt_cv_archive_cmds_need_lc_CXX+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- $RM conftest*
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } 2>conftest.err; then
- soname=conftest
- lib=conftest
- libobjs=conftest.$ac_objext
- deplibs=
- wl=$lt_prog_compiler_wl_CXX
- pic_flag=$lt_prog_compiler_pic_CXX
- compiler_flags=-v
- linker_flags=-v
- verstring=
- output_objdir=.
- libname=conftest
- lt_save_allow_undefined_flag=$allow_undefined_flag_CXX
- allow_undefined_flag_CXX=
- if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5
- (eval $archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }
- then
- lt_cv_archive_cmds_need_lc_CXX=no
- else
- lt_cv_archive_cmds_need_lc_CXX=yes
- fi
- allow_undefined_flag_CXX=$lt_save_allow_undefined_flag
- else
- cat conftest.err 1>&5
- fi
- $RM conftest*
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_CXX" >&5
-$as_echo "$lt_cv_archive_cmds_need_lc_CXX" >&6; }
- archive_cmds_need_lc_CXX=$lt_cv_archive_cmds_need_lc_CXX
- ;;
- esac
- fi
- ;;
-esac
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
-$as_echo_n "checking dynamic linker characteristics... " >&6; }
-
-library_names_spec=
-libname_spec='lib$name'
-soname_spec=
-shrext_cmds=".so"
-postinstall_cmds=
-postuninstall_cmds=
-finish_cmds=
-finish_eval=
-shlibpath_var=
-shlibpath_overrides_runpath=unknown
-version_type=none
-dynamic_linker="$host_os ld.so"
-sys_lib_dlsearch_path_spec="/lib /usr/lib"
-need_lib_prefix=unknown
-hardcode_into_libs=no
-
-# when you set need_version to no, make sure it does not cause -set_version
-# flags to be left without arguments
-need_version=unknown
-
-case $host_os in
-aix3*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
- shlibpath_var=LIBPATH
-
- # AIX 3 has no versioning support, so we append a major version to the name.
- soname_spec='${libname}${release}${shared_ext}$major'
- ;;
-
-aix[4-9]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- hardcode_into_libs=yes
- if test "$host_cpu" = ia64; then
- # AIX 5 supports IA64
- library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- else
- # With GCC up to 2.95.x, collect2 would create an import file
- # for dependence libraries. The import file would start with
- # the line `#! .'. This would cause the generated library to
- # depend on `.', always an invalid library. This was fixed in
- # development snapshots of GCC prior to 3.0.
- case $host_os in
- aix4 | aix4.[01] | aix4.[01].*)
- if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
- echo ' yes '
- echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
- :
- else
- can_build_shared=no
- fi
- ;;
- esac
- # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
- # soname into executable. Probably we can add versioning support to
- # collect2, so additional links can be useful in future.
- if test "$aix_use_runtimelinking" = yes; then
- # If using run time linking (on AIX 4.2 or later) use lib<name>.so
- # instead of lib<name>.a to let people know that these are not
- # typical AIX shared libraries.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- else
- # We preserve .a as extension for shared libraries through AIX4.2
- # and later when we are not doing run time linking.
- library_names_spec='${libname}${release}.a $libname.a'
- soname_spec='${libname}${release}${shared_ext}$major'
- fi
- shlibpath_var=LIBPATH
- fi
- ;;
-
-amigaos*)
- case $host_cpu in
- powerpc)
- # Since July 2007 AmigaOS4 officially supports .so libraries.
- # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- ;;
- m68k)
- library_names_spec='$libname.ixlibrary $libname.a'
- # Create ${libname}_ixlibrary.a entries in /sys/libs.
- finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
- ;;
- esac
- ;;
-
-beos*)
- library_names_spec='${libname}${shared_ext}'
- dynamic_linker="$host_os ld.so"
- shlibpath_var=LIBRARY_PATH
- ;;
-
-bsdi[45]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
- sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
- # the default ld.so.conf also contains /usr/contrib/lib and
- # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
- # libtool to hard-code these into programs
- ;;
-
-cygwin* | mingw* | pw32* | cegcc*)
- version_type=windows
- shrext_cmds=".dll"
- need_version=no
- need_lib_prefix=no
-
- case $GCC,$cc_basename in
- yes,*)
- # gcc
- library_names_spec='$libname.dll.a'
- # DLL is installed to $(libdir)/../bin by postinstall_cmds
- postinstall_cmds='base_file=`basename \${file}`~
- dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
- dldir=$destdir/`dirname \$dlpath`~
- test -d \$dldir || mkdir -p \$dldir~
- $install_prog $dir/$dlname \$dldir/$dlname~
- chmod a+x \$dldir/$dlname~
- if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
- eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
- fi'
- postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
- dlpath=$dir/\$dldll~
- $RM \$dlpath'
- shlibpath_overrides_runpath=yes
-
- case $host_os in
- cygwin*)
- # Cygwin DLLs use 'cyg' prefix rather than 'lib'
- soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
-
- ;;
- mingw* | cegcc*)
- # MinGW DLLs use traditional 'lib' prefix
- soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
- ;;
- pw32*)
- # pw32 DLLs use 'pw' prefix rather than 'lib'
- library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
- ;;
- esac
- dynamic_linker='Win32 ld.exe'
- ;;
-
- *,cl*)
- # Native MSVC
- libname_spec='$name'
- soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
- library_names_spec='${libname}.dll.lib'
-
- case $build_os in
- mingw*)
- sys_lib_search_path_spec=
- lt_save_ifs=$IFS
- IFS=';'
- for lt_path in $LIB
- do
- IFS=$lt_save_ifs
- # Let DOS variable expansion print the short 8.3 style file name.
- lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
- sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
- done
- IFS=$lt_save_ifs
- # Convert to MSYS style.
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'`
- ;;
- cygwin*)
- # Convert to unix form, then to dos form, then back to unix form
- # but this time dos style (no spaces!) so that the unix form looks
- # like /cygdrive/c/PROGRA~1:/cygdr...
- sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
- sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
- sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
- ;;
- *)
- sys_lib_search_path_spec="$LIB"
- if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then
- # It is most probably a Windows format PATH.
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
- else
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
- fi
- # FIXME: find the short name or the path components, as spaces are
- # common. (e.g. "Program Files" -> "PROGRA~1")
- ;;
- esac
-
- # DLL is installed to $(libdir)/../bin by postinstall_cmds
- postinstall_cmds='base_file=`basename \${file}`~
- dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
- dldir=$destdir/`dirname \$dlpath`~
- test -d \$dldir || mkdir -p \$dldir~
- $install_prog $dir/$dlname \$dldir/$dlname'
- postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
- dlpath=$dir/\$dldll~
- $RM \$dlpath'
- shlibpath_overrides_runpath=yes
- dynamic_linker='Win32 link.exe'
- ;;
-
- *)
- # Assume MSVC wrapper
- library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
- dynamic_linker='Win32 ld.exe'
- ;;
- esac
- # FIXME: first we should search . and the directory the executable is in
- shlibpath_var=PATH
- ;;
-
-darwin* | rhapsody*)
- dynamic_linker="$host_os dyld"
- version_type=darwin
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
- soname_spec='${libname}${release}${major}$shared_ext'
- shlibpath_overrides_runpath=yes
- shlibpath_var=DYLD_LIBRARY_PATH
- shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
-
- sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
- ;;
-
-dgux*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- ;;
-
-freebsd* | dragonfly*)
- # DragonFly does not have aout. When/if they implement a new
- # versioning mechanism, adjust this.
- if test -x /usr/bin/objformat; then
- objformat=`/usr/bin/objformat`
- else
- case $host_os in
- freebsd[23].*) objformat=aout ;;
- *) objformat=elf ;;
- esac
- fi
- # Handle Gentoo/FreeBSD as it was Linux
- case $host_vendor in
- gentoo)
- version_type=linux ;;
- *)
- version_type=freebsd-$objformat ;;
- esac
-
- case $version_type in
- freebsd-elf*)
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
- need_version=no
- need_lib_prefix=no
- ;;
- freebsd-*)
- library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
- need_version=yes
- ;;
- linux)
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- need_lib_prefix=no
- need_version=no
- ;;
- esac
- shlibpath_var=LD_LIBRARY_PATH
- case $host_os in
- freebsd2.*)
- shlibpath_overrides_runpath=yes
- ;;
- freebsd3.[01]* | freebsdelf3.[01]*)
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
- freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
- freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
- *) # from 4.6 on, and DragonFly
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
- esac
- ;;
-
-gnu*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-haiku*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- dynamic_linker="$host_os runtime_loader"
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
- hardcode_into_libs=yes
- ;;
-
-hpux9* | hpux10* | hpux11*)
- # Give a soname corresponding to the major version so that dld.sl refuses to
- # link against other versions.
- version_type=sunos
- need_lib_prefix=no
- need_version=no
- case $host_cpu in
- ia64*)
- shrext_cmds='.so'
- hardcode_into_libs=yes
- dynamic_linker="$host_os dld.so"
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- if test "X$HPUX_IA64_MODE" = X32; then
- sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
- else
- sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
- fi
- sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
- ;;
- hppa*64*)
- shrext_cmds='.sl'
- hardcode_into_libs=yes
- dynamic_linker="$host_os dld.sl"
- shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
- shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
- sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
- ;;
- *)
- shrext_cmds='.sl'
- dynamic_linker="$host_os dld.sl"
- shlibpath_var=SHLIB_PATH
- shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- ;;
- esac
- # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
- postinstall_cmds='chmod 555 $lib'
- # or fails outright, so override atomically:
- install_override_mode=555
- ;;
-
-interix[3-9]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-irix5* | irix6* | nonstopux*)
- case $host_os in
- nonstopux*) version_type=nonstopux ;;
- *)
- if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux # correct to gnu/linux during the next big refactor
- else
- version_type=irix
- fi ;;
- esac
- need_lib_prefix=no
- need_version=no
- soname_spec='${libname}${release}${shared_ext}$major'
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
- case $host_os in
- irix5* | nonstopux*)
- libsuff= shlibsuff=
- ;;
- *)
- case $LD in # libtool.m4 will add one of these switches to LD
- *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
- libsuff= shlibsuff= libmagic=32-bit;;
- *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
- libsuff=32 shlibsuff=N32 libmagic=N32;;
- *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
- libsuff=64 shlibsuff=64 libmagic=64-bit;;
- *) libsuff= shlibsuff= libmagic=never-match;;
- esac
- ;;
- esac
- shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
- shlibpath_overrides_runpath=no
- sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
- sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
- hardcode_into_libs=yes
- ;;
-
-# No shared lib support for Linux oldld, aout, or coff.
-linux*oldld* | linux*aout* | linux*coff*)
- dynamic_linker=no
- ;;
-
-# This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
-
- # Some binutils ld are patched to set DT_RUNPATH
- if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- lt_cv_shlibpath_overrides_runpath=no
- save_LDFLAGS=$LDFLAGS
- save_libdir=$libdir
- eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_CXX\"; \
- LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_CXX\""
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
- if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then :
- lt_cv_shlibpath_overrides_runpath=yes
-fi
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- LDFLAGS=$save_LDFLAGS
- libdir=$save_libdir
-
-fi
-
- shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
-
- # This implies no fast_install, which is unacceptable.
- # Some rework will be needed to allow for fast_install
- # before this can be enabled.
- hardcode_into_libs=yes
-
- # Append ld.so.conf contents to the search path
- if test -f /etc/ld.so.conf; then
- lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
- sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
- fi
-
- # We used to test for /lib/ld.so.1 and disable shared libraries on
- # powerpc, because MkLinux only supported shared libraries with the
- # GNU dynamic linker. Since this was broken with cross compilers,
- # most powerpc-linux boxes support dynamic linking these days and
- # people can always --disable-shared, the test was removed, and we
- # assume the GNU/Linux dynamic linker is in use.
- dynamic_linker='GNU/Linux ld.so'
- ;;
-
-netbsd*)
- version_type=sunos
- need_lib_prefix=no
- need_version=no
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
- dynamic_linker='NetBSD (a.out) ld.so'
- else
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- dynamic_linker='NetBSD ld.elf_so'
- fi
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
-
-newsos6)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- ;;
-
-*nto* | *qnx*)
- version_type=qnx
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- dynamic_linker='ldqnx.so'
- ;;
-
-openbsd*)
- version_type=sunos
- sys_lib_dlsearch_path_spec="/usr/lib"
- need_lib_prefix=no
- # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
- case $host_os in
- openbsd3.3 | openbsd3.3.*) need_version=yes ;;
- *) need_version=no ;;
- esac
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- case $host_os in
- openbsd2.[89] | openbsd2.[89].*)
- shlibpath_overrides_runpath=no
- ;;
- *)
- shlibpath_overrides_runpath=yes
- ;;
- esac
- else
- shlibpath_overrides_runpath=yes
- fi
- ;;
-
-os2*)
- libname_spec='$name'
- shrext_cmds=".dll"
- need_lib_prefix=no
- library_names_spec='$libname${shared_ext} $libname.a'
- dynamic_linker='OS/2 ld.exe'
- shlibpath_var=LIBPATH
- ;;
-
-osf3* | osf4* | osf5*)
- version_type=osf
- need_lib_prefix=no
- need_version=no
- soname_spec='${libname}${release}${shared_ext}$major'
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
- sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
- ;;
-
-rdos*)
- dynamic_linker=no
- ;;
-
-solaris*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- # ldd complains unless libraries are executable
- postinstall_cmds='chmod +x $lib'
- ;;
-
-sunos4*)
- version_type=sunos
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- if test "$with_gnu_ld" = yes; then
- need_lib_prefix=no
- fi
- need_version=yes
- ;;
-
-sysv4 | sysv4.3*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- case $host_vendor in
- sni)
- shlibpath_overrides_runpath=no
- need_lib_prefix=no
- runpath_var=LD_RUN_PATH
- ;;
- siemens)
- need_lib_prefix=no
- ;;
- motorola)
- need_lib_prefix=no
- need_version=no
- shlibpath_overrides_runpath=no
- sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
- ;;
- esac
- ;;
-
-sysv4*MP*)
- if test -d /usr/nec ;then
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
- soname_spec='$libname${shared_ext}.$major'
- shlibpath_var=LD_LIBRARY_PATH
- fi
- ;;
-
-sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
- version_type=freebsd-elf
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- if test "$with_gnu_ld" = yes; then
- sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
- else
- sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
- case $host_os in
- sco3.2v5*)
- sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
- ;;
- esac
- fi
- sys_lib_dlsearch_path_spec='/usr/lib'
- ;;
-
-tpf*)
- # TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-uts4*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- ;;
-
-*)
- dynamic_linker=no
- ;;
-esac
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5
-$as_echo "$dynamic_linker" >&6; }
-test "$dynamic_linker" = no && can_build_shared=no
-
-variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
-if test "$GCC" = yes; then
- variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
-fi
-
-if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
- sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
-fi
-if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
- sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
-fi
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5
-$as_echo_n "checking how to hardcode library paths into programs... " >&6; }
-hardcode_action_CXX=
-if test -n "$hardcode_libdir_flag_spec_CXX" ||
- test -n "$runpath_var_CXX" ||
- test "X$hardcode_automatic_CXX" = "Xyes" ; then
-
- # We can hardcode non-existent directories.
- if test "$hardcode_direct_CXX" != no &&
- # If the only mechanism to avoid hardcoding is shlibpath_var, we
- # have to relink, otherwise we might link with an installed library
- # when we should be linking with a yet-to-be-installed one
- ## test "$_LT_TAGVAR(hardcode_shlibpath_var, CXX)" != no &&
- test "$hardcode_minus_L_CXX" != no; then
- # Linking always hardcodes the temporary library directory.
- hardcode_action_CXX=relink
- else
- # We can link without hardcoding, and we can hardcode nonexisting dirs.
- hardcode_action_CXX=immediate
- fi
-else
- # We cannot hardcode anything, or else we can only hardcode existing
- # directories.
- hardcode_action_CXX=unsupported
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_CXX" >&5
-$as_echo "$hardcode_action_CXX" >&6; }
-
-if test "$hardcode_action_CXX" = relink ||
- test "$inherit_rpath_CXX" = yes; then
- # Fast installation is not supported
- enable_fast_install=no
-elif test "$shlibpath_overrides_runpath" = yes ||
- test "$enable_shared" = no; then
- # Fast installation is not necessary
- enable_fast_install=needless
-fi
-
-
-
-
-
-
-
- fi # test -n "$compiler"
-
- CC=$lt_save_CC
- CFLAGS=$lt_save_CFLAGS
- LDCXX=$LD
- LD=$lt_save_LD
- GCC=$lt_save_GCC
- with_gnu_ld=$lt_save_with_gnu_ld
- lt_cv_path_LDCXX=$lt_cv_path_LD
- lt_cv_path_LD=$lt_save_path_LD
- lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
- lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
-fi # test "$_lt_caught_CXX_error" != yes
-
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ac_config_commands="$ac_config_commands libtool"
-
-
-
-
-# Only expand once:
-
-
-
-# newer libtool...
-
-
-
-
-
-echo "-----------------------------------------------"
-
-# Machine characteristics
-
-# The cast to long int works around a bug in the HP C Compiler
-# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
-# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
-# This bug is HP SR number 8606223364.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of char" >&5
-$as_echo_n "checking size of char... " >&6; }
-if ${ac_cv_sizeof_char+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if ac_fn_cxx_compute_int "$LINENO" "(long int) (sizeof (char))" "ac_cv_sizeof_char" "$ac_includes_default"; then :
-
-else
- if test "$ac_cv_type_char" = yes; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "cannot compute sizeof (char)
-See \`config.log' for more details" "$LINENO" 5; }
- else
- ac_cv_sizeof_char=0
- fi
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_char" >&5
-$as_echo "$ac_cv_sizeof_char" >&6; }
-
-
-
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_CHAR $ac_cv_sizeof_char
-_ACEOF
-
-
-# The cast to long int works around a bug in the HP C Compiler
-# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
-# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
-# This bug is HP SR number 8606223364.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of short" >&5
-$as_echo_n "checking size of short... " >&6; }
-if ${ac_cv_sizeof_short+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if ac_fn_cxx_compute_int "$LINENO" "(long int) (sizeof (short))" "ac_cv_sizeof_short" "$ac_includes_default"; then :
-
-else
- if test "$ac_cv_type_short" = yes; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "cannot compute sizeof (short)
-See \`config.log' for more details" "$LINENO" 5; }
- else
- ac_cv_sizeof_short=0
- fi
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_short" >&5
-$as_echo "$ac_cv_sizeof_short" >&6; }
-
-
-
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_SHORT $ac_cv_sizeof_short
-_ACEOF
-
-
-# The cast to long int works around a bug in the HP C Compiler
-# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
-# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
-# This bug is HP SR number 8606223364.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of int" >&5
-$as_echo_n "checking size of int... " >&6; }
-if ${ac_cv_sizeof_int+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if ac_fn_cxx_compute_int "$LINENO" "(long int) (sizeof (int))" "ac_cv_sizeof_int" "$ac_includes_default"; then :
-
-else
- if test "$ac_cv_type_int" = yes; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "cannot compute sizeof (int)
-See \`config.log' for more details" "$LINENO" 5; }
- else
- ac_cv_sizeof_int=0
- fi
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_int" >&5
-$as_echo "$ac_cv_sizeof_int" >&6; }
-
-
-
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_INT $ac_cv_sizeof_int
-_ACEOF
-
-
-# The cast to long int works around a bug in the HP C Compiler
-# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
-# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
-# This bug is HP SR number 8606223364.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long" >&5
-$as_echo_n "checking size of long... " >&6; }
-if ${ac_cv_sizeof_long+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if ac_fn_cxx_compute_int "$LINENO" "(long int) (sizeof (long))" "ac_cv_sizeof_long" "$ac_includes_default"; then :
-
-else
- if test "$ac_cv_type_long" = yes; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "cannot compute sizeof (long)
-See \`config.log' for more details" "$LINENO" 5; }
- else
- ac_cv_sizeof_long=0
- fi
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long" >&5
-$as_echo "$ac_cv_sizeof_long" >&6; }
-
-
-
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_LONG $ac_cv_sizeof_long
-_ACEOF
-
-
-# The cast to long int works around a bug in the HP C Compiler
-# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
-# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
-# This bug is HP SR number 8606223364.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long long" >&5
-$as_echo_n "checking size of long long... " >&6; }
-if ${ac_cv_sizeof_long_long+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if ac_fn_cxx_compute_int "$LINENO" "(long int) (sizeof (long long))" "ac_cv_sizeof_long_long" "$ac_includes_default"; then :
-
-else
- if test "$ac_cv_type_long_long" = yes; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "cannot compute sizeof (long long)
-See \`config.log' for more details" "$LINENO" 5; }
- else
- ac_cv_sizeof_long_long=0
- fi
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long_long" >&5
-$as_echo "$ac_cv_sizeof_long_long" >&6; }
-
-
-
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_LONG_LONG $ac_cv_sizeof_long_long
-_ACEOF
-
-
-# The cast to long int works around a bug in the HP C Compiler
-# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
-# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
-# This bug is HP SR number 8606223364.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of __int64" >&5
-$as_echo_n "checking size of __int64... " >&6; }
-if ${ac_cv_sizeof___int64+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- if ac_fn_cxx_compute_int "$LINENO" "(long int) (sizeof (__int64))" "ac_cv_sizeof___int64" "$ac_includes_default"; then :
-
-else
- if test "$ac_cv_type___int64" = yes; then
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error 77 "cannot compute sizeof (__int64)
-See \`config.log' for more details" "$LINENO" 5; }
- else
- ac_cv_sizeof___int64=0
- fi
-fi
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof___int64" >&5
-$as_echo "$ac_cv_sizeof___int64" >&6; }
-
-
-
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF___INT64 $ac_cv_sizeof___int64
-_ACEOF
-
-
-
-# Checks for header files.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
-$as_echo_n "checking for ANSI C header files... " >&6; }
-if ${ac_cv_header_stdc+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <float.h>
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_cv_header_stdc=yes
-else
- ac_cv_header_stdc=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-if test $ac_cv_header_stdc = yes; then
- # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <string.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "memchr" >/dev/null 2>&1; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <stdlib.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "free" >/dev/null 2>&1; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
- if test "$cross_compiling" = yes; then :
- :
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <ctype.h>
-#include <stdlib.h>
-#if ((' ' & 0x0FF) == 0x020)
-# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
-# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
-#else
-# define ISLOWER(c) \
- (('a' <= (c) && (c) <= 'i') \
- || ('j' <= (c) && (c) <= 'r') \
- || ('s' <= (c) && (c) <= 'z'))
-# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
-#endif
-
-#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
-int
-main ()
-{
- int i;
- for (i = 0; i < 256; i++)
- if (XOR (islower (i), ISLOWER (i))
- || toupper (i) != TOUPPER (i))
- return 2;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
-$as_echo "$ac_cv_header_stdc" >&6; }
-if test $ac_cv_header_stdc = yes; then
-
-$as_echo "#define STDC_HEADERS 1" >>confdefs.h
-
-fi
-
-for ac_header in float.h limits.h stddef.h stdlib.h string.h sys/time.h stdint.h
-do :
- as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
-ac_fn_cxx_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
-if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
- cat >>confdefs.h <<_ACEOF
-#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
-done
-
-
-
-# check endianness of the architecture
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
-$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
-if ${ac_cv_c_bigendian+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_cv_c_bigendian=unknown
- # See if we're dealing with a universal compiler.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#ifndef __APPLE_CC__
- not a universal capable compiler
- #endif
- typedef int dummy;
-
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
-
- # Check for potential -arch flags. It is not universal unless
- # there are at least two -arch flags with different values.
- ac_arch=
- ac_prev=
- for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
- if test -n "$ac_prev"; then
- case $ac_word in
- i?86 | x86_64 | ppc | ppc64)
- if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
- ac_arch=$ac_word
- else
- ac_cv_c_bigendian=universal
- break
- fi
- ;;
- esac
- ac_prev=
- elif test "x$ac_word" = "x-arch"; then
- ac_prev=arch
- fi
- done
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- if test $ac_cv_c_bigendian = unknown; then
- # See if sys/param.h defines the BYTE_ORDER macro.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <sys/types.h>
- #include <sys/param.h>
-
-int
-main ()
-{
-#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
- && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
- && LITTLE_ENDIAN)
- bogus endian macros
- #endif
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- # It does; now see whether it defined to BIG_ENDIAN or not.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <sys/types.h>
- #include <sys/param.h>
-
-int
-main ()
-{
-#if BYTE_ORDER != BIG_ENDIAN
- not big endian
- #endif
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_cv_c_bigendian=yes
-else
- ac_cv_c_bigendian=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- fi
- if test $ac_cv_c_bigendian = unknown; then
- # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., Solaris).
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <limits.h>
-
-int
-main ()
-{
-#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
- bogus endian macros
- #endif
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- # It does; now see whether it defined to _BIG_ENDIAN or not.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <limits.h>
-
-int
-main ()
-{
-#ifndef _BIG_ENDIAN
- not big endian
- #endif
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- ac_cv_c_bigendian=yes
-else
- ac_cv_c_bigendian=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- fi
- if test $ac_cv_c_bigendian = unknown; then
- # Compile a test program.
- if test "$cross_compiling" = yes; then :
- # Try to guess by grepping values from an object file.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-short int ascii_mm[] =
- { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
- short int ascii_ii[] =
- { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
- int use_ascii (int i) {
- return ascii_mm[i] + ascii_ii[i];
- }
- short int ebcdic_ii[] =
- { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
- short int ebcdic_mm[] =
- { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
- int use_ebcdic (int i) {
- return ebcdic_mm[i] + ebcdic_ii[i];
- }
- extern int foo;
-
-int
-main ()
-{
-return use_ascii (foo) == use_ebcdic (foo);
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_compile "$LINENO"; then :
- if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
- ac_cv_c_bigendian=yes
- fi
- if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
- if test "$ac_cv_c_bigendian" = unknown; then
- ac_cv_c_bigendian=no
- else
- # finding both strings is unlikely to happen, but who knows?
- ac_cv_c_bigendian=unknown
- fi
- fi
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-
- /* Are we little or big endian? From Harbison&Steele. */
- union
- {
- long int l;
- char c[sizeof (long int)];
- } u;
- u.l = 1;
- return u.c[sizeof (long int) - 1] == 1;
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- ac_cv_c_bigendian=no
-else
- ac_cv_c_bigendian=yes
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
- fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
-$as_echo "$ac_cv_c_bigendian" >&6; }
- case $ac_cv_c_bigendian in #(
- yes)
-
-$as_echo "#define HAVE_BIG_ENDIAN 1" >>confdefs.h
-;; #(
- no)
-
-$as_echo "#define HAVE_LITTLE_ENDIAN 1" >>confdefs.h
- ;; #(
- universal)
-
-$as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
-
- ;; #(
- *)
- as_fn_error $? "unknown endianness
- presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
- esac
-
-
-# Create some useful data types of fixed, known lengths
-
-# We hereby assume that a character is always one byte
-# LINBOX_INT8="char";
-
-# case $ac_cv_sizeof_char in
- # 1)
- # TWO_BYTES=2;
- # FOUR_BYTES=4;
- # EIGHT_BYTES=8;
- # ;;
- # 8)
- # TWO_BYTES=16;
- # FOUR_BYTES=32;
- # EIGHT_BYTES=64;
-# esac
-
-# case $TWO_BYTES in
- # $ac_cv_sizeof_short)
- # LINBOX_INT16="short";
- # ;;
- # $ac_cv_sizeof_int)
- # LINBOX_INT16="int";
- # ;;
-# esac
-
-# case $FOUR_BYTES in
- # $ac_cv_sizeof_short)
- # LINBOX_INT32="short";
- # ;;
- # $ac_cv_sizeof_int)
- # LINBOX_INT32="int";
- # ;;
- # $ac_cv_sizeof_long)
- # LINBOX_INT32="long";
- # ;;
-# esac
-
-# case $EIGHT_BYTES in
- # $ac_cv_sizeof_short)
- # LINBOX_INT64="short";
- # ;;
- # $ac_cv_sizeof_int)
- # LINBOX_INT64="int";
- # ;;
- # $ac_cv_sizeof_long)
- # LINBOX_INT64="long";
- # ;;
- # $ac_cv_sizeof_long_long)
- # LINBOX_INT64="long long";
- # ;;
- # $ac_cv_sizeof___int64)
- # LINBOX_INT64="__int64";
- # ;;
-# esac
-
-# AC_DEFINE_UNQUOTED(INT8, $LINBOX_INT8, Canonical 8-bit data type)
-# AC_DEFINE_UNQUOTED(INT16, $LINBOX_INT16, Canonical 16-bit data type)
-# AC_DEFINE_UNQUOTED(INT32, $LINBOX_INT32, Canonical 32-bit data type)
-# AC_DEFINE_UNQUOTED(INT64, $LINBOX_INT64, Canonical 64-bit data type)
-
-echo "-----------------------------------------------"
-# Feature checks
-
-
-
-# Check whether --with-default was given.
-if test "${with_default+set}" = set; then :
- withval=$with_default; if test "$withval" = yes ; then
- echo "Default path = /usr /usr/local"
- DEFAULT_CHECKING_PATH="/usr /usr/local"
- else
- echo "Default path = $withval /usr /usr/local"
- DEFAULT_CHECKING_PATH="$withval /usr /usr/local"
- fi
-
-else
-
- echo "Default path = /usr /usr/local"
- DEFAULT_CHECKING_PATH="/usr /usr/local"
-
-fi
-
-
-
-
-# Check whether --with-all was given.
-if test "${with_all+set}" = set; then :
- withval=$with_all; if test "$withval" = yes ; then
- check_all="yes"
- echo "Checking all external packages in ${DEFAULT_CHECKING_PATH}"
-
- elif test "$withval" != no ; then
- check_all="yes"
- DEFAULT_CHECKING_PATH="$withval ${DEFAULT_CHECKING_PATH}"
- echo "Checking all external packages in ${DEFAULT_CHECKING_PATH}"
- fi
-
-fi
-
-
-if test -n "$check_all"; then
-
- GMP_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- GIVARO_HOME_PATH="${DEFAULT_CHECKING_PATH}"
-# NTL_HOME_PATH="${DEFAULT_CHECKING_PATH}"
-# LIDIA_HOME_PATH="${DEFAULT_CHECKING_PATH}"
-# SACLIB_HOME_PATH="${DEFAULT_CHECKING_PATH}"
-# MAPLE_HOME_PATH="${DEFAULT_CHECKING_PATH} unknown"
-# EXPAT_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- BLAS_HOME_PATH="${DEFAULT_CHECKING_PATH}"
-fi
-
-
-
-# LB_DRIVER
-
-ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-
-echo "-----------------------------------------------"
-
-
-
-# Check whether --with-gmp was given.
-if test "${with_gmp+set}" = set; then :
- withval=$with_gmp; if test "$withval" = yes ; then
- GMP_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- elif test "$withval" != no ; then
- GMP_HOME_PATH="$withval ${DEFAULT_CHECKING_PATH}"
- fi
-else
- GMP_HOME_PATH="${DEFAULT_CHECKING_PATH}"
-fi
-
-
-min_gmp_version=3.1.1
-
-BACKUP_CXXFLAGS=${CXXFLAGS}
-BACKUP_LIBS=${LIBS}
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for GMP >= $min_gmp_version" >&5
-$as_echo_n "checking for GMP >= $min_gmp_version... " >&6; }
-
-for GMP_HOME in ${GMP_HOME_PATH}
- do
- if test -r "$GMP_HOME/include/gmp.h"; then
-
- if test "x$GMP_HOME" != "x/usr" -a "x$GMP_HOME" != "x/usr/local"; then
- GMP_CFLAGS="-I${GMP_HOME}/include"
- GMP_LIBS="-L${GMP_HOME}/lib -lgmpxx -lgmp"
- else
- GMP_CFLAGS=
- GMP_LIBS="-lgmpxx -lgmp"
- fi
-
- CXXFLAGS="${CXXFLAGS} ${GMP_CFLAGS}"
- LIBS="${LIBS} ${GMP_LIBS}"
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <gmp.h>
-int
-main ()
-{
-mpz_t a; mpz_init (a);
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your GMP version is new enough. I am assuming it is."
-
-
-
-$as_echo "#define HAVE_GMP 1" >>confdefs.h
-
- :
- break
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <gmp.h>
- int main () { if (__GNU_MP_VERSION < 3) return -1; else return 0; }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5
-$as_echo "found" >&6; }
-
-
-
-$as_echo "#define HAVE_GMP 1" >>confdefs.h
-
- # See if we are running GMP 4.0
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether GMP is 4.0 or greater" >&5
-$as_echo_n "checking whether GMP is 4.0 or greater... " >&6; }
- if test "$cross_compiling" = yes; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <gmp.h>
- int main () { if (__GNU_MP_VERSION < 4) return -1; else return 0; }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- gmp_found="yes"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
- # See if GMP was compiled with --enable-cxx
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether GMP was compiled with --enable-cxx" >&5
-$as_echo_n "checking whether GMP was compiled with --enable-cxx... " >&6; }
- if test "$cross_compiling" = yes; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <gmpxx.h>
- int main () { mpz_class a(2),b(3),c(5); if ( a+b == c ) return 0; else return -1; }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
- GMP_VERSION=""
-
-
-else
-
- gmp_found="no"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-$as_echo "#define GMP_VERSION_3 1" >>confdefs.h
-
- GMP_VERSION="-DGMP_VERSION_3"
-
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
- :
- break
-
-else
-
- gmp_problem="$gmp_problem $GMP_HOME"
- unset GMP_CFLAGS
- unset GMP_LIBS
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- gmp_found="no"
- unset GMP_CFLAGS
- unset GMP_LIBS
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-
- else
- gmp_found="no"
- fi
-done
-
-if test "x$gmp_found" != "xyes"; then
- if test -n "$gmp_problem"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: problem" >&5
-$as_echo "problem" >&6; }
- echo "Sorry, your GMP version is too old. Disabling."
- elif test "x$gmp_found" != "xno"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
- fi
- echo '*******************************************************************************'
-echo ' WARNING: GMP not found! (this is not a problem for the moment)'
-echo
-echo ' GMP library compiled with --enable-cxx is required for this library to compile.'
-echo ' Please make sure GMP is installed and specify its location with the option'
-echo ' --with-gmp=<prefix> when running configure.'
-echo ' Do not forget to set/export LD_LIBRARY_PATH if necessary.'
-echo '*******************************************************************************'
-
-fi
-
-
-CXXFLAGS=${BACKUP_CXXFLAGS}
-LIBS=${BACKUP_LIBS}
-#unset LD_LIBRARY_PATH
-
-
-
-
-
-
-
-# Check whether --with-givaro was given.
-if test "${with_givaro+set}" = set; then :
- withval=$with_givaro; if test "$withval" = yes ; then
- GIVARO_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- elif test "$withval" != no ; then
- GIVARO_HOME_PATH="$withval ${DEFAULT_CHECKING_PATH}"
- fi
-else
- GIVARO_HOME_PATH="${DEFAULT_CHECKING_PATH}"
-fi
-
-
-
-version_min=30700
-version_max=30800
-
-
-BACKUP_CXXFLAGS=${CXXFLAGS}
-BACKUP_LIBS=${LIBS}
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for GIVARO >= $version_min and < $version_max" >&5
-$as_echo_n "checking for GIVARO >= $version_min and < $version_max... " >&6; }
-
-for GIVARO_HOME in ${GIVARO_HOME_PATH}
- do
-if test -r "$GIVARO_HOME/include/givaro/givconfig.h"; then
-
- if test "x$GIVARO_HOME" != "x/usr" -a "x$GIVARO_HOME" != "x/usr/local"; then
- GIVARO_CFLAGS="-I${GIVARO_HOME}/include"
- GIVARO_LIBS="-L${GIVARO_HOME}/lib -lgivaro"
- else
- GIVARO_CFLAGS=
- GIVARO_LIBS="-lgivaro"
- fi
- CXXFLAGS="${BACKUP_CXXFLAGS} ${GIVARO_CFLAGS} ${GMP_CFLAGS}"
- LIBS="${BACKUP_LIBS} ${GIVARO_LIBS} ${GMP_LIBS}"
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <givaro/givinteger.h>
-int
-main ()
-{
-Givaro::Integer a;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
-
- givaro_found="yes"
- givaro_cross="yes"
-
- break
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <givaro/givconfig.h>
- int main () { if (GIVARO_VERSION < $version_min || GIVARO_VERSION >= $version_max || GIVARO_VERSION>0x030000) return -1; else return 0; /* old version of Givaro are defined as hexa 0x03yyzz*/ }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- givaro_found="yes"
- break
-
-else
-
- givaro_problem="$problem $GIVARO_HOME"
- unset GIVARO_CFLAGS
- unset GIVARO_LIBS
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- givaro_found="no"
- givaro_checked="$checked $GIVARO_HOME"
- unset GIVARO_CFLAGS
- unset GIVARO_LIBS
-
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-else
- givaro_found="no"
-fi
-done
-
-if test "x$givaro_found" = "xyes" ; then
-
-
-
-$as_echo "#define HAVE_GIVARO 1" >>confdefs.h
-
- HAVE_GIVARO=yes
- if test "x$givaro_cross" != "xyes"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5
-$as_echo "found" >&6; }
- else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your GIVARO version is new enough. I am assuming it is."
- fi
- :
-elif test -n "$givaro_problem"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: problem" >&5
-$as_echo "problem" >&6; }
- echo "Sorry, your GIVARO version is too old. Disabling."
-
-echo '*******************************************************************************'
-echo ' WARNING: GIVARO not found! (this is not a problem for the moment)'
-echo
-echo ' GIVARO library is required for some tests in this library.'
-echo ' Please make sure GIVARO is installed and specify its location with the'
-echo ' option --with-givaro=<prefix> when running configure.'
-echo ' Do not forget to set/export LD_LIBRARY_PATH if necessary.'
-echo '*******************************************************************************'
-
-elif test "x$givaro_found" = "xno" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
-
-echo '*******************************************************************************'
-echo ' WARNING: GIVARO not found! (this is not a problem for the moment)'
-echo
-echo ' GIVARO library is required for some tests in this library.'
-echo ' Please make sure GIVARO is installed and specify its location with the'
-echo ' option --with-givaro=<prefix> when running configure.'
-echo ' Do not forget to set/export LD_LIBRARY_PATH if necessary.'
-echo '*******************************************************************************'
-
-fi
-
- if test "x$HAVE_GIVARO" = "xyes"; then
- LINBOX_HAVE_GIVARO_TRUE=
- LINBOX_HAVE_GIVARO_FALSE='#'
-else
- LINBOX_HAVE_GIVARO_TRUE='#'
- LINBOX_HAVE_GIVARO_FALSE=
-fi
-
-
-CXXFLAGS=${BACKUP_CXXFLAGS}
-LIBS=${BACKUP_LIBS}
-#unset LD_LIBRARY_PATH
-
-
-
-BLAS_FOUND=false
-
-
-# Check whether --with-blas was given.
-if test "${with_blas+set}" = set; then :
- withval=$with_blas;
-fi
-
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- if test -n "$with_blas"; then :
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking \"for BLAS ($with_blas)\"" >&5
-$as_echo_n "checking \"for BLAS ($with_blas)\"... " >&6; }
-
- BLAS_LIBS="$with_blas"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
-int
-main ()
-{
-double a;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
-
- blas_found="yes"
- blas_cross="yes"
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- blas_found="yes"
-
-else
-
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- blas_found="no"
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-
-
- if test "x$blas_found" = "xyes" ; then :
- BLAS_VENDOR="USER"
-
-
-
-
-
-$as_echo "#define HAVE_BLAS 1" >>confdefs.h
-
-
-$as_echo "#define HAVE_CBLAS 1" >>confdefs.h
-
- BLAS_FOUND=true
-
- HAVE_BLAS=yes
- if test "x$blas_cross" != "xyes"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found (cblas)" >&5
-$as_echo "found (cblas)" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
-fi
-
-else
-
- CBLAS_FLAG=""
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
-int
-main ()
-{
-double a;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
-
- blas_found="yes"
- blas_cross="yes"
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- blas_found="yes"
-
-else
-
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- blas_found="no"
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- if test "x$blas_found" = "xyes"; then :
- BLAS_VENDOR="USER"
-
-
-
-
-
-$as_echo "#define HAVE_BLAS 1" >>confdefs.h
-
- BLAS_FOUND=true
-
- HAVE_BLAS=yes
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found (cblas)" >&5
-$as_echo "found (cblas)" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-
-
-
-fi
-
-
- if test "x$HAVE_BLAS" = "xyes"; then
- FFLASFFPACK_HAVE_BLAS_TRUE=
- FFLASFFPACK_HAVE_BLAS_FALSE='#'
-else
- FFLASFFPACK_HAVE_BLAS_TRUE='#'
- FFLASFFPACK_HAVE_BLAS_FALSE=
-fi
-
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
-
-
-
-
-
-
-# Check whether --with-gotoblas2 was given.
-if test "${with_gotoblas2+set}" = set; then :
- withval=$with_gotoblas2;
-fi
-
-
- if test -n "$with_gotoblas2" ; then :
- BLAS_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- if test "$with_gotoblas2" != "yes" ; then :
- BLAS_HOME_PATH="$with_gotoblas2 ${DEFAULT_CHECKING_PATH}"
-fi
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C interface to BLAS with -lgoto2" >&5
-$as_echo_n "checking for C interface to BLAS with -lgoto2... " >&6; }
-
-
- for BLAS_HOME in ${BLAS_HOME_PATH} ; do
- CBLAS="yes"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
-
- if test -r "$BLAS_HOME/lib/libgoto2.a" -o -r "$BLAS_HOME/lib/libgoto2.so" ; then :
- BLAS_LIBS="-lgoto2 -pthread"
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME}/lib -Wl,-R,${BLAS_HOME}/lib ${BLAS_LIBS}"
-fi
-
-elif test -r "$BLAS_HOME/libgoto2.a" -o -r "$BLAS_HOME/libgoto2.so" ; then :
- BLAS_LIBS="-lgoto2 -pthread"
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME} -Wl,-R,${BLAS_HOME}/lib ${BLAS_LIBS}"
-fi
-
-fi
-
- case "x$CCNAM" in #(
- "xgcc") :
- BLAS_LIBS="${BLAS_LIBS} -lgfortran" ;; #(
- "xicc") :
- BLAS_LIBS="${BLAS_LIBS} -lifcore" ;; #(
- *) :
- ;;
-esac
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
-int
-main ()
-{
-double a;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
- blas_found="yes"
- blas_cross="yes"
- BLAS_PATH=${BLAS_HOME}
- break
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main ()
- { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- blas_found="yes"
- BLAS_PATH=${BLAS_HOME}
- break
-else
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- done
-
- if test "x$blas_found" = "xyes" ; then :
-
- BLAS_VENDOR="GOTO2"
-
-
-
-
-
-$as_echo "#define HAVE_BLAS 1" >>confdefs.h
-
-
-$as_echo "#define HAVE_CBLAS 1" >>confdefs.h
-
- BLAS_FOUND=true
-
- HAVE_BLAS=yes
- if test "x$blas_cross" != "xyes" ; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5
-$as_echo "found" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
-
-fi
- :
-
-elif test -n "$blas_problem" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not working" >&5
-$as_echo "not working" >&6; }
-elif test "x$blas_found" = "xno" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
-
-fi
-
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
-
-fi
-
-
-
-
-
-# Check whether --with-gsl was given.
-if test "${with_gsl+set}" = set; then :
- withval=$with_gsl;
-fi
-
-
- if test -n "$with_gsl" ; then :
- BLAS_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- if test "$with_gsl" != "yes" ; then :
- BLAS_HOME_PATH="$with_gsl ${DEFAULT_CHECKING_PATH}"
-fi
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C interface to BLAS with -lgsl -lgslcblas" >&5
-$as_echo_n "checking for C interface to BLAS with -lgsl -lgslcblas... " >&6; }
-
-
- for BLAS_HOME in ${BLAS_HOME_PATH} ; do
- CBLAS="yes"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
-
- if test -r "$BLAS_HOME/lib/libgsl.a" -o -r "$BLAS_HOME/lib/libgsl.so" ; then :
- BLAS_LIBS="-lgsl -lgslcblas -lm"
- BLAS_PATH="${BLAS_HOME}/lib"
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME}/lib ${BLAS_LIBS}"
-fi
-
-elif test -r "$BLAS_HOME/libgsl.a" -o -r "$BLAS_HOME/libgsl.so" ; then :
- BLAS_LIBS="-lgsl -lgslcblas -lm"
- BLAS_PATH="${BLAS_HOME}"
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME} ${BLAS_LIBS}"
-fi
-
-fi
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
-int
-main ()
-{
-double a;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
- blas_found="yes"
- blas_cross="yes"
- break
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main ()
- { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- blas_found="yes"
- break
-else
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- done
-
- if test "x$blas_found" = "xyes" ; then :
-
- BLAS_VENDOR="GSL"
-
-
-
-
-
-$as_echo "#define HAVE_BLAS 1" >>confdefs.h
-
-
-$as_echo "#define HAVE_CBLAS 1" >>confdefs.h
-
- BLAS_FOUND=true
-
- HAVE_BLAS=yes
- if test "x$blas_cross" != "xyes" ; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5
-$as_echo "found" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
-
-fi
- :
-
-elif test -n "$blas_problem" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not working" >&5
-$as_echo "not working" >&6; }
-elif test "x$blas_found" = "xno" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
-
-fi
-
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
-
-fi
-
-
-
-if test "$BLAS_FOUND" = "false" ; then
-
-# Check whether --with-cblas was given.
-if test "${with_cblas+set}" = set; then :
- withval=$with_cblas;
-fi
-
-
- BLAS_HOME_PATH="$with_cblas ${DEFAULT_CHECKING_PATH}"
-
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C interface to BLAS with -lcblas" >&5
-$as_echo_n "checking for C interface to BLAS with -lcblas... " >&6; }
-
-
-
- for BLAS_HOME in ${BLAS_HOME_PATH} ; do
- CBLAS="yes"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
- ATLAS_LIBS="-lcblas"
- if test -r "/System/Library/Frameworks/Accelerate.framework" ; then :
- BLAS_LIBS="-Wl,-framework -Wl,Accelerate"
-elif test -r "$BLAS_HOME/lib/libcblas.a" -o -r "$BLAS_HOME/lib/libcblas.so" ; then :
- ATLAS_NEEDED=`nm -u $BLAS_HOME/lib/libcblas.a | grep ATL`
- ATLAS_NEEDED2=`nm -Du $BLAS_HOME/lib/libcblas.so | grep ATL`
- if test -n "$ATLAS_NEEDED" -o -n "$ATLAS_NEEDED2"; then :
- ATLAS_LIBS=" ${ATLAS_LIBS} -latlas"
-fi
-
- BLAS_LIBS=" ${ATLAS_LIBS}"
- BLAS_PATH="${BLAS_HOME}/lib"
-
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME}/lib ${ATLAS_LIBS}"
-fi
-
-elif test -r "$BLAS_HOME/libcblas.a" -o -r "$BLAS_HOME/libcblas.so" ; then :
- ATLAS_NEEDED=`nm -u $BLAS_HOME/libcblas.a | grep ATL`
- ATLAS_NEEDED2=`nm -Du $BLAS_HOME/libcblas.so | grep ATL`
- if test -n "$ATLAS_NEEDED" -o -n "$ATLAS_NEEDED2"; then :
- ATLAS_LIBS=" ${ATLAS_LIBS} -latlas"
-fi
-
- BLAS_LIBS=" ${ATLAS_LIBS}"
- BLAS_PATH="${BLAS_HOME}"
-
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME} ${ATLAS_LIBS}"
-fi
-
-
-fi
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
-int
-main ()
-{
-double a;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
-
- blas_found="yes"
- blas_cross="yes"
- break
-
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- blas_found="yes"
- break
-
-else
-
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS
-
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- done
-
-
-
- if test "x$blas_found" = "xyes" ; then :
- BLAS_VENDOR="ATLAS"
-
-
-
-
-
-
-$as_echo "#define HAVE_BLAS 1" >>confdefs.h
-
-
-$as_echo "#define HAVE_CBLAS 1" >>confdefs.h
-
- HAVE_BLAS=yes
- BLAS_FOUND=true
-
-
- if test "x$blas_cross" != "xyes" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5
-$as_echo "found" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
-
-fi
-
- :
-
-elif test -n "$blas_problem" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not working" >&5
-$as_echo "not working" >&6; }
-elif test "x$blas_found" = "xno" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
-
-fi
- if test "x$blas_found" != "xyes" ; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C interface to BLAS with -lblas" >&5
-$as_echo_n "checking for C interface to BLAS with -lblas... " >&6; }
-
-
-
- for BLAS_HOME in ${BLAS_HOME_PATH} ; do
- CBLAS="yes"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
- ATLAS_LIBS="-lblas"
- if test -r "$BLAS_HOME/lib/libblas.a" -o -r "$BLAS_HOME/lib/libblas.so" ; then :
-
- BLAS_LIBS=" ${ATLAS_LIBS}"
- BLAS_PATH="${BLAS_HOME}/lib"
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME}/lib ${ATLAS_LIBS}"
-fi
-
-elif test -r "$BLAS_HOME/libblas.a" -o -r "$BLAS_HOME/libblas.so" ; then :
- BLAS_LIBS=" ${ATLAS_LIBS}"
- BLAS_PATH="${BLAS_HOME}"
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME} ${ATLAS_LIBS}"
-fi
-
-fi
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
-int
-main ()
-{
-double a;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
-
- blas_found="yes"
- blas_cross="yes"
- break
-
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
-#include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- blas_found="yes"
- break
-
-else
-
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS
-
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- done ;
-
- if test "x$blas_found" = "xyes" ; then :
- BLAS_VENDOR="OTHER"
-
-
-
-
-
-$as_echo "#define HAVE_BLAS 1" >>confdefs.h
-
-
-$as_echo "#define HAVE_CBLAS 1" >>confdefs.h
-
- HAVE_BLAS=yes
- BLAS_FOUND=true
-
- if test "x$blas_cross" != "xyes" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5
-$as_echo "found" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
-
-fi
- :
-
-elif test -n "$blas_problem" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not working" >&5
-$as_echo "not working" >&6; }
-elif test "x$blas_found" = "xno" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
-
-fi
-
-fi
-
-
- if test "x$HAVE_BLAS" = "xyes"; then
- FFLASFFPACK_HAVE_BLAS_TRUE=
- FFLASFFPACK_HAVE_BLAS_FALSE='#'
-else
- FFLASFFPACK_HAVE_BLAS_TRUE='#'
- FFLASFFPACK_HAVE_BLAS_FALSE=
-fi
-
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
-
-
-
-fi
-
-if test "$BLAS_FOUND" = "false" ; then
-
-# Check whether --with-otherblas was given.
-if test "${with_otherblas+set}" = set; then :
- withval=$with_otherblas;
-fi
-
-
- BLAS_HOME_PATH="$with_otherblas ${DEFAULT_CHECKING_PATH}"
-
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for other BLAS" >&5
-$as_echo_n "checking for other BLAS... " >&6; }
-
- for BLAS_HOME in ${BLAS_HOME_PATH}; do
- CBLAS="no"
- CBLAS_FLAG=""
- BLAS_LIBS=""
-
-
- if test -r "$BLAS_HOME/lib/libblas.a" -o -r "$BLAS_HOME/lib/libblas.so" ; then :
- BLAS_LIBS="-lblas"
- BLAS_PATH="${BLAS_HOME}/lib"
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME}/lib -lblas"
-fi
-
-elif test -r "$BLAS_HOME/libblas.a" -o -r "$BLAS_HOME/libblas.so" ; then :
- BLAS_LIBS="-lblas"
- BLAS_PATH="${BLAS_HOME}"
- if test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"; then :
- BLAS_LIBS="-L${BLAS_HOME} -lblas"
-fi
-
-
-fi
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
-int
-main ()
-{
-double a;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-
- if test "$cross_compiling" = yes; then :
-
- blas_found="yes"
- blas_cross="yes"
- break
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
-
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- blas_found="yes"
- break
-
-else
-
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-else
-
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS
-
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- done
-
-
- if test "x$blas_found" = "xyes" ; then :
- BLAS_VENDOR="OTHER"
-
-
-
-
-
-$as_echo "#define HAVE_BLAS 1" >>confdefs.h
-
- BLAS_FOUND=true
-
- HAVE_BLAS=yes
- if test "x$blas_cross" != "xyes"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5
-$as_echo "found" >&6; }
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unknown" >&5
-$as_echo "unknown" >&6; }
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
-fi
- :
-
-elif test -n "$blas_problem" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: problem" >&5
-$as_echo "problem" >&6; }
- echo "Sorry, your BLAS are not working. Disabling."
- :
-
-elif test "x$blas_found" = "xno" ; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
- :
-
-fi
-
-
-
-
- if test "x$HAVE_BLAS" = "xyes"; then
- FFLASFFPACK_HAVE_BLAS_TRUE=
- FFLASFFPACK_HAVE_BLAS_FALSE='#'
-else
- FFLASFFPACK_HAVE_BLAS_TRUE='#'
- FFLASFFPACK_HAVE_BLAS_FALSE=
-fi
-
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
-
-
-
-fi
-
-if test "$BLAS_FOUND" = "false" ; then
- echo ''
- echo '*******************************************************************************'
- echo ' ERROR: BLAS not found!'
- echo
- echo ' BLAS routines are required for this library to compile. Please'
- echo ' make sure BLAS are installed and specify its location with the option'
- echo ' --with-blas=<lib> when running configure (or --with-cblas... see configure --help).'
- echo '*******************************************************************************'
- exit 1
-fi
-
-
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
-
-
-# Check whether --with-lapack was given.
-if test "${with_lapack+set}" = set; then :
- withval=$with_lapack;
-fi
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for LAPACK" >&5
-$as_echo_n "checking for LAPACK... " >&6; }
-
-
- if test "$with_lapack" = "blas"; then :
-
- case ${BLAS_VENDOR} in #(
- "ATLAS") :
-
- LAPACK_LIBS="-llapack"
- if test -r "${BLAS_PATH}/liblapack_atlas.a" -o -r "${BLAS_PATH}/liblapack_atlas.so"; then :
- LAPACK_LIBS="${LAPACK_LIBS} -llapack_atlas"
-fi
-
- ;; #(
- "GSL") :
- LAPACK_LIBS="" ;; #(
- "GOTO2") :
- LAPACK_LIBS="" ;; #(
- "OTHER") :
- LAPACK_LIBS="" ;; #(
- *) :
- LAPACK_LIBS="" ;;
-esac
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS} ${LAPACK_LIBS}"
-
-
- if test "$cross_compiling" = yes; then :
- dgetrf_found=""
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- #define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- }
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- dgetrf_found="yes"
-else
- dgetrf_problem="problem"
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
- if test "${dgetrf_found}" = "yes"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes (clapack)" >&5
-$as_echo "yes (clapack)" >&6; }
-
-$as_echo "#define HAVE_LAPACK 1" >>confdefs.h
-
-
-$as_echo "#define HAVE_CLAPACK 1" >>confdefs.h
-
-
-else
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS} ${LAPACK_LIBS}"
-
-
- if test "$cross_compiling" = yes; then :
- dgetrf_found=""
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- //#define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- }
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- dgetrf_found="yes"
-else
- dgetrf_problem="problem"
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
- if test "${dgetrf_found}" = "yes"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes (lapack)" >&5
-$as_echo "yes (lapack)" >&6; }
-
-$as_echo "#define HAVE_LAPACK 1" >>confdefs.h
-
-
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-fi
-
-else
-
- if test "x$BLAS_VENDOR" = "xUSER"; then :
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
-
- if test "$cross_compiling" = yes; then :
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- #define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- }
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- dgetrf_found="yes"
-
-else
- dgetrf_problem="problem"
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
- if test "${dgetrf_found}" = "yes"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes (clapack)" >&5
-$as_echo "yes (clapack)" >&6; }
-
-$as_echo "#define HAVE_LAPACK 1" >>confdefs.h
-
-
-$as_echo "#define HAVE_CLAPACK 1" >>confdefs.h
-
-
-else
-
-
- if test "$cross_compiling" = yes; then :
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- //#define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- }
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- dgetrf_found="yes"
-
-else
- dgetrf_problem="$problem"
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
- if test "x${dgetrf_found}" = "xyes"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes (lapack)" >&5
-$as_echo "yes (lapack)" >&6; }
-
-$as_echo "#define HAVE_LAPACK 1" >>confdefs.h
-
-
-else
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no " >&5
-$as_echo "no " >&6; }
-
-fi
-
-fi
-
-
-else
-
-
- LAPACK_HOME_PATH="$with_lapack ${DEFAULT_CHECKING_PATH}"
- for LAPACK_HOME in ${LAPACK_HOME_PATH} ; do
- if test -r "$LAPACK_HOME/lib/liblapack.a" -o -r "$LAPACK_HOME/lib/liblapack.so" ; then :
- LAPACK_LIBS="-llapack"
- LAPACK_PATH="${LAPACK_HOME}/lib"
- if test "x$LAPACK_HOME" != "x/usr" -a "x$LAPACK_HOME" != "x/usr/local"; then :
- LAPACK_LIBS="-L${LAPACK_HOME}/lib -llapack"
-fi
-
-elif test -r "$LAPACK_HOME/liblapack.a" -o -r "$LAPACK_HOME/liblapack.so" ; then :
- LAPACK_LIBS="-llapack"
- LAPACK_PATH="${LAPACK_HOME}"
- if test "x$LAPACK_HOME" != "x/usr" -a "x$LAPACK_HOME" != "x/usr/local"; then :
- LAPACK_LIBS="-L${LAPACK_HOME} -llapack"
-fi
-
-
-fi
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS} ${LAPACK_LIBS}"
-
-
- if test "$cross_compiling" = yes; then :
- break
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- #define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- }
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- dgetrf_found="yes"
- break
-else
- dgetrf_problem="problem"
- unset LAPACK_LIBS
- fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
- done ;
- if test "${dgetrf_found}" = "yes"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes (clapack)" >&5
-$as_echo "yes (clapack)" >&6; }
-
-$as_echo "#define HAVE_LAPACK 1" >>confdefs.h
-
-
-$as_echo "#define HAVE_CLAPACK 1" >>confdefs.h
-
-
-else
-
- for LAPACK_HOME in ${LAPACK_HOME_PATH} ; do
- if test -r "$LAPACK_HOME/lib/liblapack.a" -o -r "$LAPACK_HOME/lib/liblapack.so" ; then :
- LAPACK_LIBS="-llapack"
- LAPACK_PATH="${LAPACK_HOME}/lib"
- if test "x$LAPACK_HOME" != "x/usr" -a "x$LAPACK_HOME" != "x/usr/local"; then :
- LAPACK_LIBS="-L${LAPACK_HOME}/lib -llapack"
-fi
-
-elif test -r "$LAPACK_HOME/liblapack.a" -o -r "$LAPACK_HOME/liblapack.so" ; then :
- LAPACK_LIBS="-llapack"
- LAPACK_PATH="${LAPACK_HOME}"
- if test "x$LAPACK_HOME" != "x/usr" -a "x$LAPACK_HOME" != "x/usr/local"; then :
- LAPACK_LIBS="-L${LAPACK_HOME} -llapack"
-fi
-
-
-fi
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS} ${LAPACK_LIBS}"
-
-
- if test "$cross_compiling" = yes; then :
- break
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- //#define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- }
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
- dgetrf_found="yes"
- break
-else
- dgetrf_problem="$problem"
- unset LAPACK_LIBS
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
- done ;
- if test "${dgetrf_found}" = "yes"; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes (lapack)" >&5
-$as_echo "yes (lapack)" >&6; }
-
-$as_echo "#define HAVE_LAPACK 1" >>confdefs.h
-
-
-else
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no " >&5
-$as_echo "no " >&6; }
-
-fi
-
-fi
-
-fi
-
-fi
-
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
-
-
-
-
-BLAS_LIBS="${BLAS_LIBS} ${LAPACK_LIBS}"
-
-
-# AM_CONDITIONAL(FFLASFFPACK_HAVE_BLAS, test "x$BLAS_FOUND" != "xfalse")
-
-
-# FF_BENCH
-
-
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build documentation" >&5
-$as_echo_n "checking whether to build documentation... " >&6; }
-
-
-
-# Check whether --with-docdir was given.
-if test "${with_docdir+set}" = set; then :
- withval=$with_docdir;
- FFLASFFPACK_DOC_PATH="$withval"
-
-else
-
- eval FFLASFFPACK_DOC_PATH="${prefix}/docs"
-
-fi
-
-
-
-
-
-# Check whether --with-doxygen was given.
-if test "${with_doxygen+set}" = set; then :
- withval=$with_doxygen;
- DOXYGEN_PATH="$PATH $withval"
-
-else
-
- DOXYGEN_PATH="$PATH"
-
-fi
-
-
-# Check whether --enable-doc was given.
-if test "${enable_doc+set}" = set; then :
- enableval=$enable_doc;
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether doxygen works" >&5
-$as_echo_n "checking whether doxygen works... " >&6; }
-export PATH=$DOXYGEN_PATH
-(doxygen --version) < /dev/null > /dev/null 2>&1 || {
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
- echo
- echo "You must have doxygen installed to create documentation for"
- echo "FFLAS-FFPACK. This error only happens if you use --enable-doc."
- echo "Download the appropriate package for your distribution, or get"
- echo "the source tarball from http://www.stack.nl/~dimitri/doxygen/"
- exit -1
-}
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
- if true; then
- FFLASFFPACK_BUILD_DOC_TRUE=
- FFLASFFPACK_BUILD_DOC_FALSE='#'
-else
- FFLASFFPACK_BUILD_DOC_TRUE='#'
- FFLASFFPACK_BUILD_DOC_FALSE=
-fi
-
-
-else
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
- if false; then
- FFLASFFPACK_BUILD_DOC_TRUE=
- FFLASFFPACK_BUILD_DOC_FALSE='#'
-else
- FFLASFFPACK_BUILD_DOC_TRUE='#'
- FFLASFFPACK_BUILD_DOC_FALSE=
-fi
-
-
-fi
-
-
-
-
-# if test ! -d ./benchmarks/data ; then
- # echo "Creating data dir in benchmark" ;
- # mkdir ./benchmarks/data ;
-# fi
-
-CXXFLAGS="${GMP_CFLAGS} ${CXXFLAGS}"
-
-
-echo "-----------------------------------------------"
-
-ac_config_files="$ac_config_files Makefile macros/Makefile fflas-ffpack-config fflas-ffpack/Makefile fflas-ffpack/fflas/Makefile fflas-ffpack/ffpack/Makefile fflas-ffpack/field/Makefile utils/Makefile doc/Makefile tests/Makefile benchmark/Makefile optimiser/Makefile benchmark/src/Makefile benchmark/src/BLOCKING/Makefile benchmark/src/FFLAS_FFPACK/Makefile benchmark/src/BLAS_LAPACK/Makefile benchmark/html/Makefile benchmark/graph/Makefile benchmark/test-src/Makefile"
-
-cat >confcache <<\_ACEOF
-# This file is a shell script that caches the results of configure
-# tests run on this system so they can be shared between configure
-# scripts and configure runs, see configure's option --config-cache.
-# It is not useful on other systems. If it contains results you don't
-# want to keep, you may remove or edit it.
-#
-# config.status only pays attention to the cache file if you give it
-# the --recheck option to rerun configure.
-#
-# `ac_cv_env_foo' variables (set or unset) will be overridden when
-# loading this file, other *unset* `ac_cv_foo' will be assigned the
-# following values.
-
-_ACEOF
-
-# The following way of writing the cache mishandles newlines in values,
-# but we know of no workaround that is simple, portable, and efficient.
-# So, we kill variables containing newlines.
-# Ultrix sh set writes to stderr and can't be redirected directly,
-# and sets the high bit in the cache file unless we assign to the vars.
-(
- for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
- eval ac_val=\$$ac_var
- case $ac_val in #(
- *${as_nl}*)
- case $ac_var in #(
- *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5
-$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
- esac
- case $ac_var in #(
- _ | IFS | as_nl) ;; #(
- BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
- *) { eval $ac_var=; unset $ac_var;} ;;
- esac ;;
- esac
- done
-
- (set) 2>&1 |
- case $as_nl`(ac_space=' '; set) 2>&1` in #(
- *${as_nl}ac_space=\ *)
- # `set' does not quote correctly, so add quotes: double-quote
- # substitution turns \\\\ into \\, and sed turns \\ into \.
- sed -n \
- "s/'/'\\\\''/g;
- s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
- ;; #(
- *)
- # `set' quotes correctly as required by POSIX, so do not add quotes.
- sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
- ;;
- esac |
- sort
-) |
- sed '
- /^ac_cv_env_/b end
- t clear
- :clear
- s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
- t end
- s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
- :end' >>confcache
-if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
- if test -w "$cache_file"; then
- if test "x$cache_file" != "x/dev/null"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
-$as_echo "$as_me: updating cache $cache_file" >&6;}
- if test ! -f "$cache_file" || test -h "$cache_file"; then
- cat confcache >"$cache_file"
- else
- case $cache_file in #(
- */* | ?:*)
- mv -f confcache "$cache_file"$$ &&
- mv -f "$cache_file"$$ "$cache_file" ;; #(
- *)
- mv -f confcache "$cache_file" ;;
- esac
- fi
- fi
- else
- { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
-$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
- fi
-fi
-rm -f confcache
-
-test "x$prefix" = xNONE && prefix=$ac_default_prefix
-# Let make expand exec_prefix.
-test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
-
-DEFS=-DHAVE_CONFIG_H
-
-ac_libobjs=
-ac_ltlibobjs=
-U=
-for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
- # 1. Remove the extension, and $U if already installed.
- ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
- ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
- # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR
- # will be set to the directory where LIBOBJS objects are built.
- as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
- as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
-done
-LIBOBJS=$ac_libobjs
-
-LTLIBOBJS=$ac_ltlibobjs
-
-
- if test -n "$EXEEXT"; then
- am__EXEEXT_TRUE=
- am__EXEEXT_FALSE='#'
-else
- am__EXEEXT_TRUE='#'
- am__EXEEXT_FALSE=
-fi
-
-if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
- as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then
- as_fn_error $? "conditional \"AMDEP\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${INSIDE_GNOME_COMMON_TRUE}" && test -z "${INSIDE_GNOME_COMMON_FALSE}"; then
- as_fn_error $? "conditional \"INSIDE_GNOME_COMMON\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${DEBUG_TRUE}" && test -z "${DEBUG_FALSE}"; then
- as_fn_error $? "conditional \"DEBUG\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${PROFILE_TRUE}" && test -z "${PROFILE_FALSE}"; then
- as_fn_error $? "conditional \"PROFILE\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-
-if test -z "${LINBOX_HAVE_GIVARO_TRUE}" && test -z "${LINBOX_HAVE_GIVARO_FALSE}"; then
- as_fn_error $? "conditional \"LINBOX_HAVE_GIVARO\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${FFLASFFPACK_HAVE_BLAS_TRUE}" && test -z "${FFLASFFPACK_HAVE_BLAS_FALSE}"; then
- as_fn_error $? "conditional \"FFLASFFPACK_HAVE_BLAS\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${FFLASFFPACK_HAVE_BLAS_TRUE}" && test -z "${FFLASFFPACK_HAVE_BLAS_FALSE}"; then
- as_fn_error $? "conditional \"FFLASFFPACK_HAVE_BLAS\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${FFLASFFPACK_HAVE_BLAS_TRUE}" && test -z "${FFLASFFPACK_HAVE_BLAS_FALSE}"; then
- as_fn_error $? "conditional \"FFLASFFPACK_HAVE_BLAS\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${FFLASFFPACK_BUILD_DOC_TRUE}" && test -z "${FFLASFFPACK_BUILD_DOC_FALSE}"; then
- as_fn_error $? "conditional \"FFLASFFPACK_BUILD_DOC\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${FFLASFFPACK_BUILD_DOC_TRUE}" && test -z "${FFLASFFPACK_BUILD_DOC_FALSE}"; then
- as_fn_error $? "conditional \"FFLASFFPACK_BUILD_DOC\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-
-: "${CONFIG_STATUS=./config.status}"
-ac_write_fail=0
-ac_clean_files_save=$ac_clean_files
-ac_clean_files="$ac_clean_files $CONFIG_STATUS"
-{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
-$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
-as_write_fail=0
-cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
-#! $SHELL
-# Generated by $as_me.
-# Run this file to recreate the current configuration.
-# Compiler output produced by configure, useful for debugging
-# configure, is in config.log if it exists.
-
-debug=false
-ac_cs_recheck=false
-ac_cs_silent=false
-
-SHELL=\${CONFIG_SHELL-$SHELL}
-export SHELL
-_ASEOF
-cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
-## -------------------- ##
-## M4sh Initialization. ##
-## -------------------- ##
-
-# Be more Bourne compatible
-DUALCASE=1; export DUALCASE # for MKS sh
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :
- emulate sh
- NULLCMD=:
- # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '${1+"$@"}'='"$@"'
- setopt NO_GLOB_SUBST
-else
- case `(set -o) 2>/dev/null` in #(
- *posix*) :
- set -o posix ;; #(
- *) :
- ;;
-esac
-fi
-
-
-as_nl='
-'
-export as_nl
-# Printing a long string crashes Solaris 7 /usr/bin/printf.
-as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
-as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
-as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
-# Prefer a ksh shell builtin over an external printf program on Solaris,
-# but without wasting forks for bash or zsh.
-if test -z "$BASH_VERSION$ZSH_VERSION" \
- && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then
- as_echo='print -r --'
- as_echo_n='print -rn --'
-elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
- as_echo='printf %s\n'
- as_echo_n='printf %s'
-else
- if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
- as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
- as_echo_n='/usr/ucb/echo -n'
- else
- as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
- as_echo_n_body='eval
- arg=$1;
- case $arg in #(
- *"$as_nl"*)
- expr "X$arg" : "X\\(.*\\)$as_nl";
- arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
- esac;
- expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
- '
- export as_echo_n_body
- as_echo_n='sh -c $as_echo_n_body as_echo'
- fi
- export as_echo_body
- as_echo='sh -c $as_echo_body as_echo'
-fi
-
-# The user is always right.
-if test "${PATH_SEPARATOR+set}" != set; then
- PATH_SEPARATOR=:
- (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
- (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
- PATH_SEPARATOR=';'
- }
-fi
-
-
-# IFS
-# We need space, tab and new line, in precisely that order. Quoting is
-# there to prevent editors from complaining about space-tab.
-# (If _AS_PATH_WALK were called with IFS unset, it would disable word
-# splitting by setting IFS to empty value.)
-IFS=" "" $as_nl"
-
-# Find who we are. Look in the path if we contain no directory separator.
-as_myself=
-case $0 in #((
- *[\\/]* ) as_myself=$0 ;;
- *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
- done
-IFS=$as_save_IFS
-
- ;;
-esac
-# We did not find ourselves, most probably we were run as `sh COMMAND'
-# in which case we are not to be found in the path.
-if test "x$as_myself" = x; then
- as_myself=$0
-fi
-if test ! -f "$as_myself"; then
- $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
- exit 1
-fi
-
-# Unset variables that we do not need and which cause bugs (e.g. in
-# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"
-# suppresses any "Segmentation fault" message there. '((' could
-# trigger a bug in pdksh 5.2.14.
-for as_var in BASH_ENV ENV MAIL MAILPATH
-do eval test x\${$as_var+set} = xset \
- && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
-done
-PS1='$ '
-PS2='> '
-PS4='+ '
-
-# NLS nuisances.
-LC_ALL=C
-export LC_ALL
-LANGUAGE=C
-export LANGUAGE
-
-# CDPATH.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-
-# as_fn_error STATUS ERROR [LINENO LOG_FD]
-# ----------------------------------------
-# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
-# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with STATUS, using 1 if that was 0.
-as_fn_error ()
-{
- as_status=$1; test $as_status -eq 0 && as_status=1
- if test "$4"; then
- as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
- fi
- $as_echo "$as_me: error: $2" >&2
- as_fn_exit $as_status
-} # as_fn_error
-
-
-# as_fn_set_status STATUS
-# -----------------------
-# Set $? to STATUS, without forking.
-as_fn_set_status ()
-{
- return $1
-} # as_fn_set_status
-
-# as_fn_exit STATUS
-# -----------------
-# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
-as_fn_exit ()
-{
- set +e
- as_fn_set_status $1
- exit $1
-} # as_fn_exit
-
-# as_fn_unset VAR
-# ---------------
-# Portably unset VAR.
-as_fn_unset ()
-{
- { eval $1=; unset $1;}
-}
-as_unset=as_fn_unset
-# as_fn_append VAR VALUE
-# ----------------------
-# Append the text in VALUE to the end of the definition contained in VAR. Take
-# advantage of any shell optimizations that allow amortized linear growth over
-# repeated appends, instead of the typical quadratic growth present in naive
-# implementations.
-if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then :
- eval 'as_fn_append ()
- {
- eval $1+=\$2
- }'
-else
- as_fn_append ()
- {
- eval $1=\$$1\$2
- }
-fi # as_fn_append
-
-# as_fn_arith ARG...
-# ------------------
-# Perform arithmetic evaluation on the ARGs, and store the result in the
-# global $as_val. Take advantage of shells that can avoid forks. The arguments
-# must be portable across $(()) and expr.
-if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then :
- eval 'as_fn_arith ()
- {
- as_val=$(( $* ))
- }'
-else
- as_fn_arith ()
- {
- as_val=`expr "$@" || test $? -eq 1`
- }
-fi # as_fn_arith
-
-
-if expr a : '\(a\)' >/dev/null 2>&1 &&
- test "X`expr 00001 : '.*\(...\)'`" = X001; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
- as_basename=basename
-else
- as_basename=false
-fi
-
-if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
- as_dirname=dirname
-else
- as_dirname=false
-fi
-
-as_me=`$as_basename -- "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X/"$0" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{
- s//\1/
- q
- }
- /^X\/\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\/\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
-
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
-
-ECHO_C= ECHO_N= ECHO_T=
-case `echo -n x` in #(((((
--n*)
- case `echo 'xy\c'` in
- *c*) ECHO_T=' ';; # ECHO_T is single tab character.
- xy) ECHO_C='\c';;
- *) echo `echo ksh88 bug on AIX 6.1` > /dev/null
- ECHO_T=' ';;
- esac;;
-*)
- ECHO_N='-n';;
-esac
-
-rm -f conf$$ conf$$.exe conf$$.file
-if test -d conf$$.dir; then
- rm -f conf$$.dir/conf$$.file
-else
- rm -f conf$$.dir
- mkdir conf$$.dir 2>/dev/null
-fi
-if (echo >conf$$.file) 2>/dev/null; then
- if ln -s conf$$.file conf$$ 2>/dev/null; then
- as_ln_s='ln -s'
- # ... but there are two gotchas:
- # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
- # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
- # In both cases, we have to default to `cp -pR'.
- ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
- as_ln_s='cp -pR'
- elif ln conf$$.file conf$$ 2>/dev/null; then
- as_ln_s=ln
- else
- as_ln_s='cp -pR'
- fi
-else
- as_ln_s='cp -pR'
-fi
-rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
-rmdir conf$$.dir 2>/dev/null
-
-
-# as_fn_mkdir_p
-# -------------
-# Create "$as_dir" as a directory, including parents if necessary.
-as_fn_mkdir_p ()
-{
-
- case $as_dir in #(
- -*) as_dir=./$as_dir;;
- esac
- test -d "$as_dir" || eval $as_mkdir_p || {
- as_dirs=
- while :; do
- case $as_dir in #(
- *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
- *) as_qdir=$as_dir;;
- esac
- as_dirs="'$as_qdir' $as_dirs"
- as_dir=`$as_dirname -- "$as_dir" ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- test -d "$as_dir" && break
- done
- test -z "$as_dirs" || eval "mkdir $as_dirs"
- } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
-
-
-} # as_fn_mkdir_p
-if mkdir -p . 2>/dev/null; then
- as_mkdir_p='mkdir -p "$as_dir"'
-else
- test -d ./-p && rmdir ./-p
- as_mkdir_p=false
-fi
-
-
-# as_fn_executable_p FILE
-# -----------------------
-# Test if FILE is an executable regular file.
-as_fn_executable_p ()
-{
- test -f "$1" && test -x "$1"
-} # as_fn_executable_p
-as_test_x='test -x'
-as_executable_p=as_fn_executable_p
-
-# Sed expression to map a string onto a valid CPP name.
-as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
-
-# Sed expression to map a string onto a valid variable name.
-as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
-
-
-exec 6>&1
-## ----------------------------------- ##
-## Main body of $CONFIG_STATUS script. ##
-## ----------------------------------- ##
-_ASEOF
-test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
-
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-# Save the log message, to keep $0 and so on meaningful, and to
-# report actual input values of CONFIG_FILES etc. instead of their
-# values after options handling.
-ac_log="
-This file was extended by FFLAS-FFPACK $as_me 1.6.0, which was
-generated by GNU Autoconf 2.69. Invocation command line was
-
- CONFIG_FILES = $CONFIG_FILES
- CONFIG_HEADERS = $CONFIG_HEADERS
- CONFIG_LINKS = $CONFIG_LINKS
- CONFIG_COMMANDS = $CONFIG_COMMANDS
- $ $0 $@
-
-on `(hostname || uname -n) 2>/dev/null | sed 1q`
-"
-
-_ACEOF
-
-case $ac_config_files in *"
-"*) set x $ac_config_files; shift; ac_config_files=$*;;
-esac
-
-case $ac_config_headers in *"
-"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
-esac
-
-
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-# Files that config.status was made for.
-config_files="$ac_config_files"
-config_headers="$ac_config_headers"
-config_commands="$ac_config_commands"
-
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-ac_cs_usage="\
-\`$as_me' instantiates files and other configuration actions
-from templates according to the current configuration. Unless the files
-and actions are specified as TAGs, all are instantiated by default.
-
-Usage: $0 [OPTION]... [TAG]...
-
- -h, --help print this help, then exit
- -V, --version print version number and configuration settings, then exit
- --config print configuration, then exit
- -q, --quiet, --silent
- do not print progress messages
- -d, --debug don't remove temporary files
- --recheck update $as_me by reconfiguring in the same conditions
- --file=FILE[:TEMPLATE]
- instantiate the configuration file FILE
- --header=FILE[:TEMPLATE]
- instantiate the configuration header FILE
-
-Configuration files:
-$config_files
-
-Configuration headers:
-$config_headers
-
-Configuration commands:
-$config_commands
-
-Report bugs to <ffpack-devel at googlegroups.com>.
-FFLAS-FFPACK home page: <http://www.linalg.org/projects/fflas-ffpack>."
-
-_ACEOF
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
-ac_cs_version="\\
-FFLAS-FFPACK config.status 1.6.0
-configured by $0, generated by GNU Autoconf 2.69,
- with options \\"\$ac_cs_config\\"
-
-Copyright (C) 2012 Free Software Foundation, Inc.
-This config.status script is free software; the Free Software Foundation
-gives unlimited permission to copy, distribute and modify it."
-
-ac_pwd='$ac_pwd'
-srcdir='$srcdir'
-INSTALL='$INSTALL'
-MKDIR_P='$MKDIR_P'
-AWK='$AWK'
-test -n "\$AWK" || AWK=awk
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-# The default lists apply if the user does not specify any file.
-ac_need_defaults=:
-while test $# != 0
-do
- case $1 in
- --*=?*)
- ac_option=`expr "X$1" : 'X\([^=]*\)='`
- ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
- ac_shift=:
- ;;
- --*=)
- ac_option=`expr "X$1" : 'X\([^=]*\)='`
- ac_optarg=
- ac_shift=:
- ;;
- *)
- ac_option=$1
- ac_optarg=$2
- ac_shift=shift
- ;;
- esac
-
- case $ac_option in
- # Handling of the options.
- -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
- ac_cs_recheck=: ;;
- --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
- $as_echo "$ac_cs_version"; exit ;;
- --config | --confi | --conf | --con | --co | --c )
- $as_echo "$ac_cs_config"; exit ;;
- --debug | --debu | --deb | --de | --d | -d )
- debug=: ;;
- --file | --fil | --fi | --f )
- $ac_shift
- case $ac_optarg in
- *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
- '') as_fn_error $? "missing file argument" ;;
- esac
- as_fn_append CONFIG_FILES " '$ac_optarg'"
- ac_need_defaults=false;;
- --header | --heade | --head | --hea )
- $ac_shift
- case $ac_optarg in
- *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
- esac
- as_fn_append CONFIG_HEADERS " '$ac_optarg'"
- ac_need_defaults=false;;
- --he | --h)
- # Conflict between --help and --header
- as_fn_error $? "ambiguous option: \`$1'
-Try \`$0 --help' for more information.";;
- --help | --hel | -h )
- $as_echo "$ac_cs_usage"; exit ;;
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil | --si | --s)
- ac_cs_silent=: ;;
-
- # This is an error.
- -*) as_fn_error $? "unrecognized option: \`$1'
-Try \`$0 --help' for more information." ;;
-
- *) as_fn_append ac_config_targets " $1"
- ac_need_defaults=false ;;
-
- esac
- shift
-done
-
-ac_configure_extra_args=
-
-if $ac_cs_silent; then
- exec 6>/dev/null
- ac_configure_extra_args="$ac_configure_extra_args --silent"
-fi
-
-_ACEOF
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-if \$ac_cs_recheck; then
- set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
- shift
- \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
- CONFIG_SHELL='$SHELL'
- export CONFIG_SHELL
- exec "\$@"
-fi
-
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-exec 5>>config.log
-{
- echo
- sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
-## Running $as_me. ##
-_ASBOX
- $as_echo "$ac_log"
-} >&5
-
-_ACEOF
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-#
-# INIT-COMMANDS
-#
-PACKAGE="$PACKAGE"
-AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"
-
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-sed_quote_subst='$sed_quote_subst'
-double_quote_subst='$double_quote_subst'
-delay_variable_subst='$delay_variable_subst'
-macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`'
-macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`'
-enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`'
-enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
-pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
-enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
-SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`'
-ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`'
-PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`'
-host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`'
-host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`'
-host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`'
-build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`'
-build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`'
-build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`'
-SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`'
-Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`'
-GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`'
-EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`'
-FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`'
-LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`'
-NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`'
-LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`'
-max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`'
-ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`'
-exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`'
-lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`'
-lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`'
-lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`'
-lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`'
-lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`'
-reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`'
-reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`'
-OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`'
-deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`'
-file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`'
-file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`'
-want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`'
-DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`'
-sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`'
-AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`'
-AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`'
-archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`'
-STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`'
-RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`'
-old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`'
-old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`'
-old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`'
-lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`'
-CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`'
-CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`'
-compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`'
-GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`'
-lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`'
-lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`'
-lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`'
-lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`'
-nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`'
-lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`'
-objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`'
-MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`'
-lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`'
-lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`'
-lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`'
-lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`'
-lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`'
-need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`'
-MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`'
-DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`'
-NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`'
-LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`'
-OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`'
-OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`'
-libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`'
-shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`'
-extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`'
-archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`'
-enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`'
-export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`'
-whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`'
-compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`'
-old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`'
-old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`'
-archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`'
-archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`'
-module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`'
-module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`'
-with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`'
-allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`'
-no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`'
-hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`'
-hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`'
-hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`'
-hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`'
-hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`'
-inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`'
-link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`'
-always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`'
-export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`'
-exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`'
-include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`'
-prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`'
-postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`'
-file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`'
-variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`'
-need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`'
-need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`'
-version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`'
-runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`'
-shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`'
-shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`'
-libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`'
-library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`'
-soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`'
-install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`'
-postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`'
-postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`'
-finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`'
-finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`'
-hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`'
-sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`'
-sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED "$delay_single_quote_subst"`'
-hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`'
-enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`'
-enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`'
-enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`'
-old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`'
-striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`'
-compiler_lib_search_dirs='`$ECHO "$compiler_lib_search_dirs" | $SED "$delay_single_quote_subst"`'
-predep_objects='`$ECHO "$predep_objects" | $SED "$delay_single_quote_subst"`'
-postdep_objects='`$ECHO "$postdep_objects" | $SED "$delay_single_quote_subst"`'
-predeps='`$ECHO "$predeps" | $SED "$delay_single_quote_subst"`'
-postdeps='`$ECHO "$postdeps" | $SED "$delay_single_quote_subst"`'
-compiler_lib_search_path='`$ECHO "$compiler_lib_search_path" | $SED "$delay_single_quote_subst"`'
-LD_CXX='`$ECHO "$LD_CXX" | $SED "$delay_single_quote_subst"`'
-reload_flag_CXX='`$ECHO "$reload_flag_CXX" | $SED "$delay_single_quote_subst"`'
-reload_cmds_CXX='`$ECHO "$reload_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-old_archive_cmds_CXX='`$ECHO "$old_archive_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-compiler_CXX='`$ECHO "$compiler_CXX" | $SED "$delay_single_quote_subst"`'
-GCC_CXX='`$ECHO "$GCC_CXX" | $SED "$delay_single_quote_subst"`'
-lt_prog_compiler_no_builtin_flag_CXX='`$ECHO "$lt_prog_compiler_no_builtin_flag_CXX" | $SED "$delay_single_quote_subst"`'
-lt_prog_compiler_pic_CXX='`$ECHO "$lt_prog_compiler_pic_CXX" | $SED "$delay_single_quote_subst"`'
-lt_prog_compiler_wl_CXX='`$ECHO "$lt_prog_compiler_wl_CXX" | $SED "$delay_single_quote_subst"`'
-lt_prog_compiler_static_CXX='`$ECHO "$lt_prog_compiler_static_CXX" | $SED "$delay_single_quote_subst"`'
-lt_cv_prog_compiler_c_o_CXX='`$ECHO "$lt_cv_prog_compiler_c_o_CXX" | $SED "$delay_single_quote_subst"`'
-archive_cmds_need_lc_CXX='`$ECHO "$archive_cmds_need_lc_CXX" | $SED "$delay_single_quote_subst"`'
-enable_shared_with_static_runtimes_CXX='`$ECHO "$enable_shared_with_static_runtimes_CXX" | $SED "$delay_single_quote_subst"`'
-export_dynamic_flag_spec_CXX='`$ECHO "$export_dynamic_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
-whole_archive_flag_spec_CXX='`$ECHO "$whole_archive_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
-compiler_needs_object_CXX='`$ECHO "$compiler_needs_object_CXX" | $SED "$delay_single_quote_subst"`'
-old_archive_from_new_cmds_CXX='`$ECHO "$old_archive_from_new_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-old_archive_from_expsyms_cmds_CXX='`$ECHO "$old_archive_from_expsyms_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-archive_cmds_CXX='`$ECHO "$archive_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-archive_expsym_cmds_CXX='`$ECHO "$archive_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-module_cmds_CXX='`$ECHO "$module_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-module_expsym_cmds_CXX='`$ECHO "$module_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`'
-allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
-no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_minus_L_CXX='`$ECHO "$hardcode_minus_L_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_shlibpath_var_CXX='`$ECHO "$hardcode_shlibpath_var_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_automatic_CXX='`$ECHO "$hardcode_automatic_CXX" | $SED "$delay_single_quote_subst"`'
-inherit_rpath_CXX='`$ECHO "$inherit_rpath_CXX" | $SED "$delay_single_quote_subst"`'
-link_all_deplibs_CXX='`$ECHO "$link_all_deplibs_CXX" | $SED "$delay_single_quote_subst"`'
-always_export_symbols_CXX='`$ECHO "$always_export_symbols_CXX" | $SED "$delay_single_quote_subst"`'
-export_symbols_cmds_CXX='`$ECHO "$export_symbols_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-exclude_expsyms_CXX='`$ECHO "$exclude_expsyms_CXX" | $SED "$delay_single_quote_subst"`'
-include_expsyms_CXX='`$ECHO "$include_expsyms_CXX" | $SED "$delay_single_quote_subst"`'
-prelink_cmds_CXX='`$ECHO "$prelink_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-postlink_cmds_CXX='`$ECHO "$postlink_cmds_CXX" | $SED "$delay_single_quote_subst"`'
-file_list_spec_CXX='`$ECHO "$file_list_spec_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_action_CXX='`$ECHO "$hardcode_action_CXX" | $SED "$delay_single_quote_subst"`'
-compiler_lib_search_dirs_CXX='`$ECHO "$compiler_lib_search_dirs_CXX" | $SED "$delay_single_quote_subst"`'
-predep_objects_CXX='`$ECHO "$predep_objects_CXX" | $SED "$delay_single_quote_subst"`'
-postdep_objects_CXX='`$ECHO "$postdep_objects_CXX" | $SED "$delay_single_quote_subst"`'
-predeps_CXX='`$ECHO "$predeps_CXX" | $SED "$delay_single_quote_subst"`'
-postdeps_CXX='`$ECHO "$postdeps_CXX" | $SED "$delay_single_quote_subst"`'
-compiler_lib_search_path_CXX='`$ECHO "$compiler_lib_search_path_CXX" | $SED "$delay_single_quote_subst"`'
-
-LTCC='$LTCC'
-LTCFLAGS='$LTCFLAGS'
-compiler='$compiler_DEFAULT'
-
-# A function that is used when there is no print builtin or printf.
-func_fallback_echo ()
-{
- eval 'cat <<_LTECHO_EOF
-\$1
-_LTECHO_EOF'
-}
-
-# Quote evaled strings.
-for var in SHELL \
-ECHO \
-PATH_SEPARATOR \
-SED \
-GREP \
-EGREP \
-FGREP \
-LD \
-NM \
-LN_S \
-lt_SP2NL \
-lt_NL2SP \
-reload_flag \
-OBJDUMP \
-deplibs_check_method \
-file_magic_cmd \
-file_magic_glob \
-want_nocaseglob \
-DLLTOOL \
-sharedlib_from_linklib_cmd \
-AR \
-AR_FLAGS \
-archiver_list_spec \
-STRIP \
-RANLIB \
-CC \
-CFLAGS \
-compiler \
-lt_cv_sys_global_symbol_pipe \
-lt_cv_sys_global_symbol_to_cdecl \
-lt_cv_sys_global_symbol_to_c_name_address \
-lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \
-nm_file_list_spec \
-lt_prog_compiler_no_builtin_flag \
-lt_prog_compiler_pic \
-lt_prog_compiler_wl \
-lt_prog_compiler_static \
-lt_cv_prog_compiler_c_o \
-need_locks \
-MANIFEST_TOOL \
-DSYMUTIL \
-NMEDIT \
-LIPO \
-OTOOL \
-OTOOL64 \
-shrext_cmds \
-export_dynamic_flag_spec \
-whole_archive_flag_spec \
-compiler_needs_object \
-with_gnu_ld \
-allow_undefined_flag \
-no_undefined_flag \
-hardcode_libdir_flag_spec \
-hardcode_libdir_separator \
-exclude_expsyms \
-include_expsyms \
-file_list_spec \
-variables_saved_for_relink \
-libname_spec \
-library_names_spec \
-soname_spec \
-install_override_mode \
-finish_eval \
-old_striplib \
-striplib \
-compiler_lib_search_dirs \
-predep_objects \
-postdep_objects \
-predeps \
-postdeps \
-compiler_lib_search_path \
-LD_CXX \
-reload_flag_CXX \
-compiler_CXX \
-lt_prog_compiler_no_builtin_flag_CXX \
-lt_prog_compiler_pic_CXX \
-lt_prog_compiler_wl_CXX \
-lt_prog_compiler_static_CXX \
-lt_cv_prog_compiler_c_o_CXX \
-export_dynamic_flag_spec_CXX \
-whole_archive_flag_spec_CXX \
-compiler_needs_object_CXX \
-with_gnu_ld_CXX \
-allow_undefined_flag_CXX \
-no_undefined_flag_CXX \
-hardcode_libdir_flag_spec_CXX \
-hardcode_libdir_separator_CXX \
-exclude_expsyms_CXX \
-include_expsyms_CXX \
-file_list_spec_CXX \
-compiler_lib_search_dirs_CXX \
-predep_objects_CXX \
-postdep_objects_CXX \
-predeps_CXX \
-postdeps_CXX \
-compiler_lib_search_path_CXX; do
- case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
- *[\\\\\\\`\\"\\\$]*)
- eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\""
- ;;
- *)
- eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
- ;;
- esac
-done
-
-# Double-quote double-evaled strings.
-for var in reload_cmds \
-old_postinstall_cmds \
-old_postuninstall_cmds \
-old_archive_cmds \
-extract_expsyms_cmds \
-old_archive_from_new_cmds \
-old_archive_from_expsyms_cmds \
-archive_cmds \
-archive_expsym_cmds \
-module_cmds \
-module_expsym_cmds \
-export_symbols_cmds \
-prelink_cmds \
-postlink_cmds \
-postinstall_cmds \
-postuninstall_cmds \
-finish_cmds \
-sys_lib_search_path_spec \
-sys_lib_dlsearch_path_spec \
-reload_cmds_CXX \
-old_archive_cmds_CXX \
-old_archive_from_new_cmds_CXX \
-old_archive_from_expsyms_cmds_CXX \
-archive_cmds_CXX \
-archive_expsym_cmds_CXX \
-module_cmds_CXX \
-module_expsym_cmds_CXX \
-export_symbols_cmds_CXX \
-prelink_cmds_CXX \
-postlink_cmds_CXX; do
- case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
- *[\\\\\\\`\\"\\\$]*)
- eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\""
- ;;
- *)
- eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
- ;;
- esac
-done
-
-ac_aux_dir='$ac_aux_dir'
-xsi_shell='$xsi_shell'
-lt_shell_append='$lt_shell_append'
-
-# See if we are running on zsh, and set the options which allow our
-# commands through without removal of \ escapes INIT.
-if test -n "\${ZSH_VERSION+set}" ; then
- setopt NO_GLOB_SUBST
-fi
-
-
- PACKAGE='$PACKAGE'
- VERSION='$VERSION'
- TIMESTAMP='$TIMESTAMP'
- RM='$RM'
- ofile='$ofile'
-
-
-
-
-
-
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-
-# Handling of arguments.
-for ac_config_target in $ac_config_targets
-do
- case $ac_config_target in
- "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
- "fflas-ffpack/fflas-ffpack-config.h") CONFIG_COMMANDS="$CONFIG_COMMANDS fflas-ffpack/fflas-ffpack-config.h" ;;
- "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
- "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
- "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
- "macros/Makefile") CONFIG_FILES="$CONFIG_FILES macros/Makefile" ;;
- "fflas-ffpack-config") CONFIG_FILES="$CONFIG_FILES fflas-ffpack-config" ;;
- "fflas-ffpack/Makefile") CONFIG_FILES="$CONFIG_FILES fflas-ffpack/Makefile" ;;
- "fflas-ffpack/fflas/Makefile") CONFIG_FILES="$CONFIG_FILES fflas-ffpack/fflas/Makefile" ;;
- "fflas-ffpack/ffpack/Makefile") CONFIG_FILES="$CONFIG_FILES fflas-ffpack/ffpack/Makefile" ;;
- "fflas-ffpack/field/Makefile") CONFIG_FILES="$CONFIG_FILES fflas-ffpack/field/Makefile" ;;
- "utils/Makefile") CONFIG_FILES="$CONFIG_FILES utils/Makefile" ;;
- "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
- "tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;;
- "benchmark/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/Makefile" ;;
- "optimiser/Makefile") CONFIG_FILES="$CONFIG_FILES optimiser/Makefile" ;;
- "benchmark/src/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/src/Makefile" ;;
- "benchmark/src/BLOCKING/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/src/BLOCKING/Makefile" ;;
- "benchmark/src/FFLAS_FFPACK/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/src/FFLAS_FFPACK/Makefile" ;;
- "benchmark/src/BLAS_LAPACK/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/src/BLAS_LAPACK/Makefile" ;;
- "benchmark/html/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/html/Makefile" ;;
- "benchmark/graph/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/graph/Makefile" ;;
- "benchmark/test-src/Makefile") CONFIG_FILES="$CONFIG_FILES benchmark/test-src/Makefile" ;;
-
- *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
- esac
-done
-
-
-# If the user did not use the arguments to specify the items to instantiate,
-# then the envvar interface is used. Set only those that are not.
-# We use the long form for the default assignment because of an extremely
-# bizarre bug on SunOS 4.1.3.
-if $ac_need_defaults; then
- test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
- test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
- test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
-fi
-
-# Have a temporary directory for convenience. Make it in the build tree
-# simply because there is no reason against having it here, and in addition,
-# creating and moving files from /tmp can sometimes cause problems.
-# Hook for its removal unless debugging.
-# Note that there is a small window in which the directory will not be cleaned:
-# after its creation but before its name has been assigned to `$tmp'.
-$debug ||
-{
- tmp= ac_tmp=
- trap 'exit_status=$?
- : "${ac_tmp:=$tmp}"
- { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
-' 0
- trap 'as_fn_exit 1' 1 2 13 15
-}
-# Create a (secure) tmp directory for tmp files.
-
-{
- tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
- test -d "$tmp"
-} ||
-{
- tmp=./conf$$-$RANDOM
- (umask 077 && mkdir "$tmp")
-} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
-ac_tmp=$tmp
-
-# Set up the scripts for CONFIG_FILES section.
-# No need to generate them if there are no CONFIG_FILES.
-# This happens for instance with `./config.status config.h'.
-if test -n "$CONFIG_FILES"; then
-
-
-ac_cr=`echo X | tr X '\015'`
-# On cygwin, bash can eat \r inside `` if the user requested igncr.
-# But we know of no other shell where ac_cr would be empty at this
-# point, so we can use a bashism as a fallback.
-if test "x$ac_cr" = x; then
- eval ac_cr=\$\'\\r\'
-fi
-ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
-if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
- ac_cs_awk_cr='\\r'
-else
- ac_cs_awk_cr=$ac_cr
-fi
-
-echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
-_ACEOF
-
-
-{
- echo "cat >conf$$subs.awk <<_ACEOF" &&
- echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
- echo "_ACEOF"
-} >conf$$subs.sh ||
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
-ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
-ac_delim='%!_!# '
-for ac_last_try in false false false false false :; do
- . ./conf$$subs.sh ||
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
-
- ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
- if test $ac_delim_n = $ac_delim_num; then
- break
- elif $ac_last_try; then
- as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
- else
- ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
- fi
-done
-rm -f conf$$subs.sh
-
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
-_ACEOF
-sed -n '
-h
-s/^/S["/; s/!.*/"]=/
-p
-g
-s/^[^!]*!//
-:repl
-t repl
-s/'"$ac_delim"'$//
-t delim
-:nl
-h
-s/\(.\{148\}\)..*/\1/
-t more1
-s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
-p
-n
-b repl
-:more1
-s/["\\]/\\&/g; s/^/"/; s/$/"\\/
-p
-g
-s/.\{148\}//
-t nl
-:delim
-h
-s/\(.\{148\}\)..*/\1/
-t more2
-s/["\\]/\\&/g; s/^/"/; s/$/"/
-p
-b
-:more2
-s/["\\]/\\&/g; s/^/"/; s/$/"\\/
-p
-g
-s/.\{148\}//
-t delim
-' <conf$$subs.awk | sed '
-/^[^""]/{
- N
- s/\n//
-}
-' >>$CONFIG_STATUS || ac_write_fail=1
-rm -f conf$$subs.awk
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-_ACAWK
-cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
- for (key in S) S_is_set[key] = 1
- FS = ""
-
-}
-{
- line = $ 0
- nfields = split(line, field, "@")
- substed = 0
- len = length(field[1])
- for (i = 2; i < nfields; i++) {
- key = field[i]
- keylen = length(key)
- if (S_is_set[key]) {
- value = S[key]
- line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
- len += length(value) + length(field[++i])
- substed = 1
- } else
- len += 1 + keylen
- }
-
- print line
-}
-
-_ACAWK
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
- sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
-else
- cat
-fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
- || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
-_ACEOF
-
-# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
-# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
-# trailing colons and then remove the whole line if VPATH becomes empty
-# (actually we leave an empty line to preserve line numbers).
-if test "x$srcdir" = x.; then
- ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{
-h
-s///
-s/^/:/
-s/[ ]*$/:/
-s/:\$(srcdir):/:/g
-s/:\${srcdir}:/:/g
-s/:@srcdir@:/:/g
-s/^:*//
-s/:*$//
-x
-s/\(=[ ]*\).*/\1/
-G
-s/\n//
-s/^[^=]*=[ ]*$//
-}'
-fi
-
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-fi # test -n "$CONFIG_FILES"
-
-# Set up the scripts for CONFIG_HEADERS section.
-# No need to generate them if there are no CONFIG_HEADERS.
-# This happens for instance with `./config.status Makefile'.
-if test -n "$CONFIG_HEADERS"; then
-cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
-BEGIN {
-_ACEOF
-
-# Transform confdefs.h into an awk script `defines.awk', embedded as
-# here-document in config.status, that substitutes the proper values into
-# config.h.in to produce config.h.
-
-# Create a delimiter string that does not exist in confdefs.h, to ease
-# handling of long lines.
-ac_delim='%!_!# '
-for ac_last_try in false false :; do
- ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
- if test -z "$ac_tt"; then
- break
- elif $ac_last_try; then
- as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
- else
- ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
- fi
-done
-
-# For the awk script, D is an array of macro values keyed by name,
-# likewise P contains macro parameters if any. Preserve backslash
-# newline sequences.
-
-ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
-sed -n '
-s/.\{148\}/&'"$ac_delim"'/g
-t rset
-:rset
-s/^[ ]*#[ ]*define[ ][ ]*/ /
-t def
-d
-:def
-s/\\$//
-t bsnl
-s/["\\]/\\&/g
-s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\
-D["\1"]=" \3"/p
-s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p
-d
-:bsnl
-s/["\\]/\\&/g
-s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\
-D["\1"]=" \3\\\\\\n"\\/p
-t cont
-s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
-t cont
-d
-:cont
-n
-s/.\{148\}/&'"$ac_delim"'/g
-t clear
-:clear
-s/\\$//
-t bsnlc
-s/["\\]/\\&/g; s/^/"/; s/$/"/p
-d
-:bsnlc
-s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
-b cont
-' <confdefs.h | sed '
-s/'"$ac_delim"'/"\\\
-"/g' >>$CONFIG_STATUS || ac_write_fail=1
-
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
- for (key in D) D_is_set[key] = 1
- FS = ""
-}
-/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
- line = \$ 0
- split(line, arg, " ")
- if (arg[1] == "#") {
- defundef = arg[2]
- mac1 = arg[3]
- } else {
- defundef = substr(arg[1], 2)
- mac1 = arg[2]
- }
- split(mac1, mac2, "(") #)
- macro = mac2[1]
- prefix = substr(line, 1, index(line, defundef) - 1)
- if (D_is_set[macro]) {
- # Preserve the white space surrounding the "#".
- print prefix "define", macro P[macro] D[macro]
- next
- } else {
- # Replace #undef with comments. This is necessary, for example,
- # in the case of _POSIX_SOURCE, which is predefined and required
- # on some systems where configure will not decide to define it.
- if (defundef == "undef") {
- print "/*", prefix defundef, macro, "*/"
- next
- }
- }
-}
-{ print }
-_ACAWK
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
- as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
-fi # test -n "$CONFIG_HEADERS"
-
-
-eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS"
-shift
-for ac_tag
-do
- case $ac_tag in
- :[FHLC]) ac_mode=$ac_tag; continue;;
- esac
- case $ac_mode$ac_tag in
- :[FHL]*:*);;
- :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
- :[FH]-) ac_tag=-:-;;
- :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
- esac
- ac_save_IFS=$IFS
- IFS=:
- set x $ac_tag
- IFS=$ac_save_IFS
- shift
- ac_file=$1
- shift
-
- case $ac_mode in
- :L) ac_source=$1;;
- :[FH])
- ac_file_inputs=
- for ac_f
- do
- case $ac_f in
- -) ac_f="$ac_tmp/stdin";;
- *) # Look for the file first in the build tree, then in the source tree
- # (if the path is not absolute). The absolute path cannot be DOS-style,
- # because $ac_f cannot contain `:'.
- test -f "$ac_f" ||
- case $ac_f in
- [\\/$]*) false;;
- *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
- esac ||
- as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
- esac
- case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
- as_fn_append ac_file_inputs " '$ac_f'"
- done
-
- # Let's still pretend it is `configure' which instantiates (i.e., don't
- # use $as_me), people would be surprised to read:
- # /* config.h. Generated by config.status. */
- configure_input='Generated from '`
- $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
- `' by configure.'
- if test x"$ac_file" != x-; then
- configure_input="$ac_file. $configure_input"
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
-$as_echo "$as_me: creating $ac_file" >&6;}
- fi
- # Neutralize special characters interpreted by sed in replacement strings.
- case $configure_input in #(
- *\&* | *\|* | *\\* )
- ac_sed_conf_input=`$as_echo "$configure_input" |
- sed 's/[\\\\&|]/\\\\&/g'`;; #(
- *) ac_sed_conf_input=$configure_input;;
- esac
-
- case $ac_tag in
- *:-:* | *:-) cat >"$ac_tmp/stdin" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
- esac
- ;;
- esac
-
- ac_dir=`$as_dirname -- "$ac_file" ||
-$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$ac_file" : 'X\(//\)[^/]' \| \
- X"$ac_file" : 'X\(//\)$' \| \
- X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$ac_file" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- as_dir="$ac_dir"; as_fn_mkdir_p
- ac_builddir=.
-
-case "$ac_dir" in
-.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
-*)
- ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
- # A ".." for each directory in $ac_dir_suffix.
- ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
- case $ac_top_builddir_sub in
- "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
- *) ac_top_build_prefix=$ac_top_builddir_sub/ ;;
- esac ;;
-esac
-ac_abs_top_builddir=$ac_pwd
-ac_abs_builddir=$ac_pwd$ac_dir_suffix
-# for backward compatibility:
-ac_top_builddir=$ac_top_build_prefix
-
-case $srcdir in
- .) # We are building in place.
- ac_srcdir=.
- ac_top_srcdir=$ac_top_builddir_sub
- ac_abs_top_srcdir=$ac_pwd ;;
- [\\/]* | ?:[\\/]* ) # Absolute name.
- ac_srcdir=$srcdir$ac_dir_suffix;
- ac_top_srcdir=$srcdir
- ac_abs_top_srcdir=$srcdir ;;
- *) # Relative name.
- ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
- ac_top_srcdir=$ac_top_build_prefix$srcdir
- ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
-esac
-ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
-
-
- case $ac_mode in
- :F)
- #
- # CONFIG_FILE
- #
-
- case $INSTALL in
- [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
- *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
- esac
- ac_MKDIR_P=$MKDIR_P
- case $MKDIR_P in
- [\\/$]* | ?:[\\/]* ) ;;
- */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;;
- esac
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-# If the template does not know about datarootdir, expand it.
-# FIXME: This hack should be removed a few years after 2.60.
-ac_datarootdir_hack=; ac_datarootdir_seen=
-ac_sed_dataroot='
-/datarootdir/ {
- p
- q
-}
-/@datadir@/p
-/@docdir@/p
-/@infodir@/p
-/@localedir@/p
-/@mandir@/p'
-case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
-*datarootdir*) ac_datarootdir_seen=yes;;
-*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
-$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
-_ACEOF
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
- ac_datarootdir_hack='
- s&@datadir@&$datadir&g
- s&@docdir@&$docdir&g
- s&@infodir@&$infodir&g
- s&@localedir@&$localedir&g
- s&@mandir@&$mandir&g
- s&\\\${datarootdir}&$datarootdir&g' ;;
-esac
-_ACEOF
-
-# Neutralize VPATH when `$srcdir' = `.'.
-# Shell code in configure.ac might set extrasub.
-# FIXME: do we really want to maintain this feature?
-cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-ac_sed_extra="$ac_vpsub
-$extrasub
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-:t
-/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
-s|@configure_input@|$ac_sed_conf_input|;t t
-s&@top_builddir@&$ac_top_builddir_sub&;t t
-s&@top_build_prefix@&$ac_top_build_prefix&;t t
-s&@srcdir@&$ac_srcdir&;t t
-s&@abs_srcdir@&$ac_abs_srcdir&;t t
-s&@top_srcdir@&$ac_top_srcdir&;t t
-s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
-s&@builddir@&$ac_builddir&;t t
-s&@abs_builddir@&$ac_abs_builddir&;t t
-s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
-s&@INSTALL@&$ac_INSTALL&;t t
-s&@MKDIR_P@&$ac_MKDIR_P&;t t
-$ac_datarootdir_hack
-"
-eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
- >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
-
-test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
- { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
- { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \
- "$ac_tmp/out"`; test -z "$ac_out"; } &&
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined. Please make sure it is defined" >&5
-$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined. Please make sure it is defined" >&2;}
-
- rm -f "$ac_tmp/stdin"
- case $ac_file in
- -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
- *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
- esac \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
- ;;
- :H)
- #
- # CONFIG_HEADER
- #
- if test x"$ac_file" != x-; then
- {
- $as_echo "/* $configure_input */" \
- && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
- } >"$ac_tmp/config.h" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
- if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
-$as_echo "$as_me: $ac_file is unchanged" >&6;}
- else
- rm -f "$ac_file"
- mv "$ac_tmp/config.h" "$ac_file" \
- || as_fn_error $? "could not create $ac_file" "$LINENO" 5
- fi
- else
- $as_echo "/* $configure_input */" \
- && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
- || as_fn_error $? "could not create -" "$LINENO" 5
- fi
-# Compute "$ac_file"'s index in $config_headers.
-_am_arg="$ac_file"
-_am_stamp_count=1
-for _am_header in $config_headers :; do
- case $_am_header in
- $_am_arg | $_am_arg:* )
- break ;;
- * )
- _am_stamp_count=`expr $_am_stamp_count + 1` ;;
- esac
-done
-echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
-$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$_am_arg" : 'X\(//\)[^/]' \| \
- X"$_am_arg" : 'X\(//\)$' \| \
- X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$_am_arg" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`/stamp-h$_am_stamp_count
- ;;
-
- :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
-$as_echo "$as_me: executing $ac_file commands" >&6;}
- ;;
- esac
-
-
- case $ac_file$ac_mode in
- "fflas-ffpack/fflas-ffpack-config.h":C) ac_prefix_conf_OUT=`echo fflas-ffpack/fflas-ffpack-config.h`
-ac_prefix_conf_DEF=`echo _$ac_prefix_conf_OUT | sed -e "y:abcdefghijklmnopqrstuvwxyz:ABCDEFGHIJKLMNOPQRSTUVWXYZ:" -e "s/[^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]/_/g"`
-ac_prefix_conf_PKG=`echo __FFLASFFPACK`
-ac_prefix_conf_LOW=`echo _$ac_prefix_conf_PKG | sed -e "y:ABCDEFGHIJKLMNOPQRSTUVWXYZ-:abcdefghijklmnopqrstuvwxyz_:"`
-ac_prefix_conf_UPP=`echo $ac_prefix_conf_PKG | sed -e "y:abcdefghijklmnopqrstuvwxyz-:ABCDEFGHIJKLMNOPQRSTUVWXYZ_:" -e "/^[0123456789]/s/^/_/"`
-ac_prefix_conf_INP=`echo "" | sed -e 's/ *//'`
-if test ".$ac_prefix_conf_INP" = "."; then
- for ac_file in : $CONFIG_HEADERS; do test "_$ac_file" = _: && continue
- case "$ac_file" in
- *.h) ac_prefix_conf_INP=$ac_file ;;
- *)
- esac
- test ".$ac_prefix_conf_INP" != "." && break
- done
-fi
-if test ".$ac_prefix_conf_INP" = "."; then
- case "$ac_prefix_conf_OUT" in
- */*) ac_prefix_conf_INP=`basename "$ac_prefix_conf_OUT"`
- ;;
- *-*) ac_prefix_conf_INP=`echo "$ac_prefix_conf_OUT" | sed -e "s/[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_]*-//"`
- ;;
- *) ac_prefix_conf_INP=config.h
- ;;
- esac
-fi
-if test -z "$ac_prefix_conf_PKG" ; then
- as_fn_error $? "no prefix for _PREFIX_PKG_CONFIG_H" "$LINENO" 5
-else
- if test ! -f "$ac_prefix_conf_INP" ; then if test -f "$srcdir/$ac_prefix_conf_INP" ; then
- ac_prefix_conf_INP="$srcdir/$ac_prefix_conf_INP"
- fi fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_prefix_conf_OUT - prefix $ac_prefix_conf_UPP for $ac_prefix_conf_INP defines" >&5
-$as_echo "$as_me: creating $ac_prefix_conf_OUT - prefix $ac_prefix_conf_UPP for $ac_prefix_conf_INP defines" >&6;}
- if test -f $ac_prefix_conf_INP ; then
- echo "s/#undef *\\([ABCDEFGHIJKLMNOPQRSTUVWXYZ_]\\)/#undef $ac_prefix_conf_UPP""_\\1/" > conftest.prefix
- echo "s/#undef *\\([abcdefghijklmnopqrstuvwxyz]\\)/#undef $ac_prefix_conf_LOW""_\\1/" >> conftest.prefix
- echo "s/#define *\\([ABCDEFGHIJKLMNOPQRSTUVWXYZ_][abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_]*\\)\\(.*\\)/#ifndef $ac_prefix_conf_UPP""_\\1 \\" >> conftest.prefix
- echo "#define $ac_prefix_conf_UPP""_\\1 \\2 \\" >> conftest.prefix
- echo "#endif/" >>conftest.prefix
- echo "s/#define *\\([abcdefghijklmnopqrstuvwxyz][abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_]*\\)\\(.*\\)/#ifndef $ac_prefix_conf_LOW""_\\1 \\" >> conftest.prefix
- echo "#define $ac_prefix_conf_LOW""_\\1 \\2 \\" >> conftest.prefix
- echo "#endif/" >> conftest.prefix
- # now executing _script on _DEF input to create _OUT output file
- echo "#ifndef $ac_prefix_conf_DEF" >$tmp/pconfig.h
- echo "#define $ac_prefix_conf_DEF 1" >>$tmp/pconfig.h
- echo ' ' >>$tmp/pconfig.h
- echo /'*' $ac_prefix_conf_OUT. Generated automatically at end of configure. '*'/ >>$tmp/pconfig.h
-
- sed -f conftest.prefix $ac_prefix_conf_INP >>$tmp/pconfig.h
- echo ' ' >>$tmp/pconfig.h
- echo '/* once:' $ac_prefix_conf_DEF '*/' >>$tmp/pconfig.h
- echo "#endif" >>$tmp/pconfig.h
- if cmp -s $ac_prefix_conf_OUT $tmp/pconfig.h 2>/dev/null; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_prefix_conf_OUT is unchanged" >&5
-$as_echo "$as_me: $ac_prefix_conf_OUT is unchanged" >&6;}
- else
- ac_dir=`$as_dirname -- "$ac_prefix_conf_OUT" ||
-$as_expr X"$ac_prefix_conf_OUT" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$ac_prefix_conf_OUT" : 'X\(//\)[^/]' \| \
- X"$ac_prefix_conf_OUT" : 'X\(//\)$' \| \
- X"$ac_prefix_conf_OUT" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$ac_prefix_conf_OUT" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- as_dir="$ac_dir"; as_fn_mkdir_p
- rm -f "$ac_prefix_conf_OUT"
- mv $tmp/pconfig.h "$ac_prefix_conf_OUT"
- fi
- else
- as_fn_error $? "input file $ac_prefix_conf_INP does not exist - skip generating $ac_prefix_conf_OUT" "$LINENO" 5
- fi
- rm -f conftest.*
-fi
- ;;
- "depfiles":C) test x"$AMDEP_TRUE" != x"" || {
- # Autoconf 2.62 quotes --file arguments for eval, but not when files
- # are listed without --file. Let's play safe and only enable the eval
- # if we detect the quoting.
- case $CONFIG_FILES in
- *\'*) eval set x "$CONFIG_FILES" ;;
- *) set x $CONFIG_FILES ;;
- esac
- shift
- for mf
- do
- # Strip MF so we end up with the name of the file.
- mf=`echo "$mf" | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile or not.
- # We used to match only the files named `Makefile.in', but
- # some people rename them; so instead we look at the file content.
- # Grep'ing the first line is not enough: some people post-process
- # each Makefile.in and add a new line on top of each file to say so.
- # Grep'ing the whole file is not good either: AIX grep has a line
- # limit of 2048, but all sed's we know have understand at least 4000.
- if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
- dirpart=`$as_dirname -- "$mf" ||
-$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$mf" : 'X\(//\)[^/]' \| \
- X"$mf" : 'X\(//\)$' \| \
- X"$mf" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$mf" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- else
- continue
- fi
- # Extract the definition of DEPDIR, am__include, and am__quote
- # from the Makefile without running `make'.
- DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
- test -z "$DEPDIR" && continue
- am__include=`sed -n 's/^am__include = //p' < "$mf"`
- test -z "am__include" && continue
- am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
- # When using ansi2knr, U may be empty or an underscore; expand it
- U=`sed -n 's/^U = //p' < "$mf"`
- # Find all dependency output files, they are included files with
- # $(DEPDIR) in their names. We invoke sed twice because it is the
- # simplest approach to changing $(DEPDIR) to its actual value in the
- # expansion.
- for file in `sed -n "
- s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
- sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
- # Make sure the directory exists.
- test -f "$dirpart/$file" && continue
- fdir=`$as_dirname -- "$file" ||
-$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$file" : 'X\(//\)[^/]' \| \
- X"$file" : 'X\(//\)$' \| \
- X"$file" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$file" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
- s//\1/
- q
- }
- /^X\(\/\/\)$/{
- s//\1/
- q
- }
- /^X\(\/\).*/{
- s//\1/
- q
- }
- s/.*/./; q'`
- as_dir=$dirpart/$fdir; as_fn_mkdir_p
- # echo "creating $dirpart/$file"
- echo '# dummy' > "$dirpart/$file"
- done
- done
-}
- ;;
- "libtool":C)
-
- # See if we are running on zsh, and set the options which allow our
- # commands through without removal of \ escapes.
- if test -n "${ZSH_VERSION+set}" ; then
- setopt NO_GLOB_SUBST
- fi
-
- cfgfile="${ofile}T"
- trap "$RM \"$cfgfile\"; exit 1" 1 2 15
- $RM "$cfgfile"
-
- cat <<_LT_EOF >> "$cfgfile"
-#! $SHELL
-
-# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
-# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION
-# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
-# NOTE: Changes made to this file will be lost: look at ltmain.sh.
-#
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# Written by Gordon Matzigkeit, 1996
-#
-# This file is part of GNU Libtool.
-#
-# GNU Libtool is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2 of
-# the License, or (at your option) any later version.
-#
-# As a special exception to the GNU General Public License,
-# if you distribute this file as part of a program or library that
-# is built using GNU Libtool, you may include this file under the
-# same distribution terms that you use for the rest of that program.
-#
-# GNU Libtool is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Libtool; see the file COPYING. If not, a copy
-# can be downloaded from http://www.gnu.org/licenses/gpl.html, or
-# obtained by writing to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-
-# The names of the tagged configurations supported by this script.
-available_tags="CXX "
-
-# ### BEGIN LIBTOOL CONFIG
-
-# Which release of libtool.m4 was used?
-macro_version=$macro_version
-macro_revision=$macro_revision
-
-# Whether or not to build shared libraries.
-build_libtool_libs=$enable_shared
-
-# Whether or not to build static libraries.
-build_old_libs=$enable_static
-
-# What type of objects to build.
-pic_mode=$pic_mode
-
-# Whether or not to optimize for fast installation.
-fast_install=$enable_fast_install
-
-# Shell to use when invoking shell scripts.
-SHELL=$lt_SHELL
-
-# An echo program that protects backslashes.
-ECHO=$lt_ECHO
-
-# The PATH separator for the build system.
-PATH_SEPARATOR=$lt_PATH_SEPARATOR
-
-# The host system.
-host_alias=$host_alias
-host=$host
-host_os=$host_os
-
-# The build system.
-build_alias=$build_alias
-build=$build
-build_os=$build_os
-
-# A sed program that does not truncate output.
-SED=$lt_SED
-
-# Sed that helps us avoid accidentally triggering echo(1) options like -n.
-Xsed="\$SED -e 1s/^X//"
-
-# A grep program that handles long lines.
-GREP=$lt_GREP
-
-# An ERE matcher.
-EGREP=$lt_EGREP
-
-# A literal string matcher.
-FGREP=$lt_FGREP
-
-# A BSD- or MS-compatible name lister.
-NM=$lt_NM
-
-# Whether we need soft or hard links.
-LN_S=$lt_LN_S
-
-# What is the maximum length of a command?
-max_cmd_len=$max_cmd_len
-
-# Object file suffix (normally "o").
-objext=$ac_objext
-
-# Executable file suffix (normally "").
-exeext=$exeext
-
-# whether the shell understands "unset".
-lt_unset=$lt_unset
-
-# turn spaces into newlines.
-SP2NL=$lt_lt_SP2NL
-
-# turn newlines into spaces.
-NL2SP=$lt_lt_NL2SP
-
-# convert \$build file names to \$host format.
-to_host_file_cmd=$lt_cv_to_host_file_cmd
-
-# convert \$build files to toolchain format.
-to_tool_file_cmd=$lt_cv_to_tool_file_cmd
-
-# An object symbol dumper.
-OBJDUMP=$lt_OBJDUMP
-
-# Method to check whether dependent libraries are shared objects.
-deplibs_check_method=$lt_deplibs_check_method
-
-# Command to use when deplibs_check_method = "file_magic".
-file_magic_cmd=$lt_file_magic_cmd
-
-# How to find potential files when deplibs_check_method = "file_magic".
-file_magic_glob=$lt_file_magic_glob
-
-# Find potential files using nocaseglob when deplibs_check_method = "file_magic".
-want_nocaseglob=$lt_want_nocaseglob
-
-# DLL creation program.
-DLLTOOL=$lt_DLLTOOL
-
-# Command to associate shared and link libraries.
-sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd
-
-# The archiver.
-AR=$lt_AR
-
-# Flags to create an archive.
-AR_FLAGS=$lt_AR_FLAGS
-
-# How to feed a file listing to the archiver.
-archiver_list_spec=$lt_archiver_list_spec
-
-# A symbol stripping program.
-STRIP=$lt_STRIP
-
-# Commands used to install an old-style archive.
-RANLIB=$lt_RANLIB
-old_postinstall_cmds=$lt_old_postinstall_cmds
-old_postuninstall_cmds=$lt_old_postuninstall_cmds
-
-# Whether to use a lock for old archive extraction.
-lock_old_archive_extraction=$lock_old_archive_extraction
-
-# A C compiler.
-LTCC=$lt_CC
-
-# LTCC compiler flags.
-LTCFLAGS=$lt_CFLAGS
-
-# Take the output of nm and produce a listing of raw symbols and C names.
-global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe
-
-# Transform the output of nm in a proper C declaration.
-global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl
-
-# Transform the output of nm in a C name address pair.
-global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address
-
-# Transform the output of nm in a C name address pair when lib prefix is needed.
-global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix
-
-# Specify filename containing input files for \$NM.
-nm_file_list_spec=$lt_nm_file_list_spec
-
-# The root where to search for dependent libraries,and in which our libraries should be installed.
-lt_sysroot=$lt_sysroot
-
-# The name of the directory that contains temporary libtool files.
-objdir=$objdir
-
-# Used to examine libraries when file_magic_cmd begins with "file".
-MAGIC_CMD=$MAGIC_CMD
-
-# Must we lock files when doing compilation?
-need_locks=$lt_need_locks
-
-# Manifest tool.
-MANIFEST_TOOL=$lt_MANIFEST_TOOL
-
-# Tool to manipulate archived DWARF debug symbol files on Mac OS X.
-DSYMUTIL=$lt_DSYMUTIL
-
-# Tool to change global to local symbols on Mac OS X.
-NMEDIT=$lt_NMEDIT
-
-# Tool to manipulate fat objects and archives on Mac OS X.
-LIPO=$lt_LIPO
-
-# ldd/readelf like tool for Mach-O binaries on Mac OS X.
-OTOOL=$lt_OTOOL
-
-# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4.
-OTOOL64=$lt_OTOOL64
-
-# Old archive suffix (normally "a").
-libext=$libext
-
-# Shared library suffix (normally ".so").
-shrext_cmds=$lt_shrext_cmds
-
-# The commands to extract the exported symbol list from a shared archive.
-extract_expsyms_cmds=$lt_extract_expsyms_cmds
-
-# Variables whose values should be saved in libtool wrapper scripts and
-# restored at link time.
-variables_saved_for_relink=$lt_variables_saved_for_relink
-
-# Do we need the "lib" prefix for modules?
-need_lib_prefix=$need_lib_prefix
-
-# Do we need a version for libraries?
-need_version=$need_version
-
-# Library versioning type.
-version_type=$version_type
-
-# Shared library runtime path variable.
-runpath_var=$runpath_var
-
-# Shared library path variable.
-shlibpath_var=$shlibpath_var
-
-# Is shlibpath searched before the hard-coded library search path?
-shlibpath_overrides_runpath=$shlibpath_overrides_runpath
-
-# Format of library name prefix.
-libname_spec=$lt_libname_spec
-
-# List of archive names. First name is the real one, the rest are links.
-# The last name is the one that the linker finds with -lNAME
-library_names_spec=$lt_library_names_spec
-
-# The coded name of the library, if different from the real name.
-soname_spec=$lt_soname_spec
-
-# Permission mode override for installation of shared libraries.
-install_override_mode=$lt_install_override_mode
-
-# Command to use after installation of a shared archive.
-postinstall_cmds=$lt_postinstall_cmds
-
-# Command to use after uninstallation of a shared archive.
-postuninstall_cmds=$lt_postuninstall_cmds
-
-# Commands used to finish a libtool library installation in a directory.
-finish_cmds=$lt_finish_cmds
-
-# As "finish_cmds", except a single script fragment to be evaled but
-# not shown.
-finish_eval=$lt_finish_eval
-
-# Whether we should hardcode library paths into libraries.
-hardcode_into_libs=$hardcode_into_libs
-
-# Compile-time system search path for libraries.
-sys_lib_search_path_spec=$lt_sys_lib_search_path_spec
-
-# Run-time system search path for libraries.
-sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
-
-# Whether dlopen is supported.
-dlopen_support=$enable_dlopen
-
-# Whether dlopen of programs is supported.
-dlopen_self=$enable_dlopen_self
-
-# Whether dlopen of statically linked programs is supported.
-dlopen_self_static=$enable_dlopen_self_static
-
-# Commands to strip libraries.
-old_striplib=$lt_old_striplib
-striplib=$lt_striplib
-
-
-# The linker used to build libraries.
-LD=$lt_LD
-
-# How to create reloadable object files.
-reload_flag=$lt_reload_flag
-reload_cmds=$lt_reload_cmds
-
-# Commands used to build an old-style archive.
-old_archive_cmds=$lt_old_archive_cmds
-
-# A language specific compiler.
-CC=$lt_compiler
-
-# Is the compiler the GNU compiler?
-with_gcc=$GCC
-
-# Compiler flag to turn off builtin functions.
-no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag
-
-# Additional compiler flags for building library objects.
-pic_flag=$lt_lt_prog_compiler_pic
-
-# How to pass a linker flag through the compiler.
-wl=$lt_lt_prog_compiler_wl
-
-# Compiler flag to prevent dynamic linking.
-link_static_flag=$lt_lt_prog_compiler_static
-
-# Does compiler simultaneously support -c and -o options?
-compiler_c_o=$lt_lt_cv_prog_compiler_c_o
-
-# Whether or not to add -lc for building shared libraries.
-build_libtool_need_lc=$archive_cmds_need_lc
-
-# Whether or not to disallow shared libs when runtime libs are static.
-allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes
-
-# Compiler flag to allow reflexive dlopens.
-export_dynamic_flag_spec=$lt_export_dynamic_flag_spec
-
-# Compiler flag to generate shared objects directly from archives.
-whole_archive_flag_spec=$lt_whole_archive_flag_spec
-
-# Whether the compiler copes with passing no objects directly.
-compiler_needs_object=$lt_compiler_needs_object
-
-# Create an old-style archive from a shared archive.
-old_archive_from_new_cmds=$lt_old_archive_from_new_cmds
-
-# Create a temporary old-style archive to link instead of a shared archive.
-old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds
-
-# Commands used to build a shared archive.
-archive_cmds=$lt_archive_cmds
-archive_expsym_cmds=$lt_archive_expsym_cmds
-
-# Commands used to build a loadable module if different from building
-# a shared archive.
-module_cmds=$lt_module_cmds
-module_expsym_cmds=$lt_module_expsym_cmds
-
-# Whether we are building with GNU ld or not.
-with_gnu_ld=$lt_with_gnu_ld
-
-# Flag that allows shared libraries with undefined symbols to be built.
-allow_undefined_flag=$lt_allow_undefined_flag
-
-# Flag that enforces no undefined symbols.
-no_undefined_flag=$lt_no_undefined_flag
-
-# Flag to hardcode \$libdir into a binary during linking.
-# This must work even if \$libdir does not exist
-hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
-
-# Whether we need a single "-rpath" flag with a separated argument.
-hardcode_libdir_separator=$lt_hardcode_libdir_separator
-
-# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
-# DIR into the resulting binary.
-hardcode_direct=$hardcode_direct
-
-# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
-# DIR into the resulting binary and the resulting library dependency is
-# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
-# library is relocated.
-hardcode_direct_absolute=$hardcode_direct_absolute
-
-# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
-# into the resulting binary.
-hardcode_minus_L=$hardcode_minus_L
-
-# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
-# into the resulting binary.
-hardcode_shlibpath_var=$hardcode_shlibpath_var
-
-# Set to "yes" if building a shared library automatically hardcodes DIR
-# into the library and all subsequent libraries and executables linked
-# against it.
-hardcode_automatic=$hardcode_automatic
-
-# Set to yes if linker adds runtime paths of dependent libraries
-# to runtime path list.
-inherit_rpath=$inherit_rpath
-
-# Whether libtool must link a program against all its dependency libraries.
-link_all_deplibs=$link_all_deplibs
-
-# Set to "yes" if exported symbols are required.
-always_export_symbols=$always_export_symbols
-
-# The commands to list exported symbols.
-export_symbols_cmds=$lt_export_symbols_cmds
-
-# Symbols that should not be listed in the preloaded symbols.
-exclude_expsyms=$lt_exclude_expsyms
-
-# Symbols that must always be exported.
-include_expsyms=$lt_include_expsyms
-
-# Commands necessary for linking programs (against libraries) with templates.
-prelink_cmds=$lt_prelink_cmds
-
-# Commands necessary for finishing linking programs.
-postlink_cmds=$lt_postlink_cmds
-
-# Specify filename containing input files.
-file_list_spec=$lt_file_list_spec
-
-# How to hardcode a shared library path into an executable.
-hardcode_action=$hardcode_action
-
-# The directories searched by this compiler when creating a shared library.
-compiler_lib_search_dirs=$lt_compiler_lib_search_dirs
-
-# Dependencies to place before and after the objects being linked to
-# create a shared library.
-predep_objects=$lt_predep_objects
-postdep_objects=$lt_postdep_objects
-predeps=$lt_predeps
-postdeps=$lt_postdeps
-
-# The library search path used internally by the compiler when linking
-# a shared library.
-compiler_lib_search_path=$lt_compiler_lib_search_path
-
-# ### END LIBTOOL CONFIG
-
-_LT_EOF
-
- case $host_os in
- aix3*)
- cat <<\_LT_EOF >> "$cfgfile"
-# AIX sometimes has problems with the GCC collect2 program. For some
-# reason, if we set the COLLECT_NAMES environment variable, the problems
-# vanish in a puff of smoke.
-if test "X${COLLECT_NAMES+set}" != Xset; then
- COLLECT_NAMES=
- export COLLECT_NAMES
-fi
-_LT_EOF
- ;;
- esac
-
-
-ltmain="$ac_aux_dir/ltmain.sh"
-
-
- # We use sed instead of cat because bash on DJGPP gets confused if
- # if finds mixed CR/LF and LF-only lines. Since sed operates in
- # text mode, it properly converts lines to CR/LF. This bash problem
- # is reportedly fixed, but why not run on old versions too?
- sed '$q' "$ltmain" >> "$cfgfile" \
- || (rm -f "$cfgfile"; exit 1)
-
- if test x"$xsi_shell" = xyes; then
- sed -e '/^func_dirname ()$/,/^} # func_dirname /c\
-func_dirname ()\
-{\
-\ case ${1} in\
-\ */*) func_dirname_result="${1%/*}${2}" ;;\
-\ * ) func_dirname_result="${3}" ;;\
-\ esac\
-} # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_basename ()$/,/^} # func_basename /c\
-func_basename ()\
-{\
-\ func_basename_result="${1##*/}"\
-} # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\
-func_dirname_and_basename ()\
-{\
-\ case ${1} in\
-\ */*) func_dirname_result="${1%/*}${2}" ;;\
-\ * ) func_dirname_result="${3}" ;;\
-\ esac\
-\ func_basename_result="${1##*/}"\
-} # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_stripname ()$/,/^} # func_stripname /c\
-func_stripname ()\
-{\
-\ # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\
-\ # positional parameters, so assign one to ordinary parameter first.\
-\ func_stripname_result=${3}\
-\ func_stripname_result=${func_stripname_result#"${1}"}\
-\ func_stripname_result=${func_stripname_result%"${2}"}\
-} # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\
-func_split_long_opt ()\
-{\
-\ func_split_long_opt_name=${1%%=*}\
-\ func_split_long_opt_arg=${1#*=}\
-} # Extended-shell func_split_long_opt implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\
-func_split_short_opt ()\
-{\
-\ func_split_short_opt_arg=${1#??}\
-\ func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\
-} # Extended-shell func_split_short_opt implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\
-func_lo2o ()\
-{\
-\ case ${1} in\
-\ *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\
-\ *) func_lo2o_result=${1} ;;\
-\ esac\
-} # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_xform ()$/,/^} # func_xform /c\
-func_xform ()\
-{\
- func_xform_result=${1%.*}.lo\
-} # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_arith ()$/,/^} # func_arith /c\
-func_arith ()\
-{\
- func_arith_result=$(( $* ))\
-} # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_len ()$/,/^} # func_len /c\
-func_len ()\
-{\
- func_len_result=${#1}\
-} # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-fi
-
-if test x"$lt_shell_append" = xyes; then
- sed -e '/^func_append ()$/,/^} # func_append /c\
-func_append ()\
-{\
- eval "${1}+=\\${2}"\
-} # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\
-func_append_quoted ()\
-{\
-\ func_quote_for_eval "${2}"\
-\ eval "${1}+=\\\\ \\$func_quote_for_eval_result"\
-} # Extended-shell func_append_quoted implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-
-
- # Save a `func_append' function call where possible by direct use of '+='
- sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
- test 0 -eq $? || _lt_function_replace_fail=:
-else
- # Save a `func_append' function call even when '+=' is not available
- sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
- test 0 -eq $? || _lt_function_replace_fail=:
-fi
-
-if test x"$_lt_function_replace_fail" = x":"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute extended shell functions in $ofile" >&5
-$as_echo "$as_me: WARNING: Unable to substitute extended shell functions in $ofile" >&2;}
-fi
-
-
- mv -f "$cfgfile" "$ofile" ||
- (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
- chmod +x "$ofile"
-
-
- cat <<_LT_EOF >> "$ofile"
-
-# ### BEGIN LIBTOOL TAG CONFIG: CXX
-
-# The linker used to build libraries.
-LD=$lt_LD_CXX
-
-# How to create reloadable object files.
-reload_flag=$lt_reload_flag_CXX
-reload_cmds=$lt_reload_cmds_CXX
-
-# Commands used to build an old-style archive.
-old_archive_cmds=$lt_old_archive_cmds_CXX
-
-# A language specific compiler.
-CC=$lt_compiler_CXX
-
-# Is the compiler the GNU compiler?
-with_gcc=$GCC_CXX
-
-# Compiler flag to turn off builtin functions.
-no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX
-
-# Additional compiler flags for building library objects.
-pic_flag=$lt_lt_prog_compiler_pic_CXX
-
-# How to pass a linker flag through the compiler.
-wl=$lt_lt_prog_compiler_wl_CXX
-
-# Compiler flag to prevent dynamic linking.
-link_static_flag=$lt_lt_prog_compiler_static_CXX
-
-# Does compiler simultaneously support -c and -o options?
-compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX
-
-# Whether or not to add -lc for building shared libraries.
-build_libtool_need_lc=$archive_cmds_need_lc_CXX
-
-# Whether or not to disallow shared libs when runtime libs are static.
-allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX
-
-# Compiler flag to allow reflexive dlopens.
-export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX
-
-# Compiler flag to generate shared objects directly from archives.
-whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX
-
-# Whether the compiler copes with passing no objects directly.
-compiler_needs_object=$lt_compiler_needs_object_CXX
-
-# Create an old-style archive from a shared archive.
-old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX
-
-# Create a temporary old-style archive to link instead of a shared archive.
-old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX
-
-# Commands used to build a shared archive.
-archive_cmds=$lt_archive_cmds_CXX
-archive_expsym_cmds=$lt_archive_expsym_cmds_CXX
-
-# Commands used to build a loadable module if different from building
-# a shared archive.
-module_cmds=$lt_module_cmds_CXX
-module_expsym_cmds=$lt_module_expsym_cmds_CXX
-
-# Whether we are building with GNU ld or not.
-with_gnu_ld=$lt_with_gnu_ld_CXX
-
-# Flag that allows shared libraries with undefined symbols to be built.
-allow_undefined_flag=$lt_allow_undefined_flag_CXX
-
-# Flag that enforces no undefined symbols.
-no_undefined_flag=$lt_no_undefined_flag_CXX
-
-# Flag to hardcode \$libdir into a binary during linking.
-# This must work even if \$libdir does not exist
-hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX
-
-# Whether we need a single "-rpath" flag with a separated argument.
-hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX
-
-# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
-# DIR into the resulting binary.
-hardcode_direct=$hardcode_direct_CXX
-
-# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
-# DIR into the resulting binary and the resulting library dependency is
-# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
-# library is relocated.
-hardcode_direct_absolute=$hardcode_direct_absolute_CXX
-
-# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
-# into the resulting binary.
-hardcode_minus_L=$hardcode_minus_L_CXX
-
-# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
-# into the resulting binary.
-hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX
-
-# Set to "yes" if building a shared library automatically hardcodes DIR
-# into the library and all subsequent libraries and executables linked
-# against it.
-hardcode_automatic=$hardcode_automatic_CXX
-
-# Set to yes if linker adds runtime paths of dependent libraries
-# to runtime path list.
-inherit_rpath=$inherit_rpath_CXX
-
-# Whether libtool must link a program against all its dependency libraries.
-link_all_deplibs=$link_all_deplibs_CXX
-
-# Set to "yes" if exported symbols are required.
-always_export_symbols=$always_export_symbols_CXX
-
-# The commands to list exported symbols.
-export_symbols_cmds=$lt_export_symbols_cmds_CXX
-
-# Symbols that should not be listed in the preloaded symbols.
-exclude_expsyms=$lt_exclude_expsyms_CXX
-
-# Symbols that must always be exported.
-include_expsyms=$lt_include_expsyms_CXX
-
-# Commands necessary for linking programs (against libraries) with templates.
-prelink_cmds=$lt_prelink_cmds_CXX
-
-# Commands necessary for finishing linking programs.
-postlink_cmds=$lt_postlink_cmds_CXX
-
-# Specify filename containing input files.
-file_list_spec=$lt_file_list_spec_CXX
-
-# How to hardcode a shared library path into an executable.
-hardcode_action=$hardcode_action_CXX
-
-# The directories searched by this compiler when creating a shared library.
-compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_CXX
-
-# Dependencies to place before and after the objects being linked to
-# create a shared library.
-predep_objects=$lt_predep_objects_CXX
-postdep_objects=$lt_postdep_objects_CXX
-predeps=$lt_predeps_CXX
-postdeps=$lt_postdeps_CXX
-
-# The library search path used internally by the compiler when linking
-# a shared library.
-compiler_lib_search_path=$lt_compiler_lib_search_path_CXX
-
-# ### END LIBTOOL TAG CONFIG: CXX
-_LT_EOF
-
- ;;
-
- esac
-done # for ac_tag
-
-
-as_fn_exit 0
-_ACEOF
-ac_clean_files=$ac_clean_files_save
-
-test $ac_write_fail = 0 ||
- as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
-
-
-# configure is writing to config.log, and then calls config.status.
-# config.status does its own redirection, appending to config.log.
-# Unfortunately, on DOS this fails, as config.log is still kept open
-# by configure, so config.status won't be able to write to it; its
-# output is simply discarded. So we exec the FD to /dev/null,
-# effectively closing config.log, so it can be properly (re)opened and
-# appended to by config.status. When coming back to configure, we
-# need to make the FD available again.
-if test "$no_create" != yes; then
- ac_cs_success=:
- ac_config_status_args=
- test "$silent" = yes &&
- ac_config_status_args="$ac_config_status_args --quiet"
- exec 5>/dev/null
- $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
- exec 5>>config.log
- # Use ||, not &&, to avoid exiting from the if with $? = 1, which
- # would make configure fail if this is the last instruction.
- $ac_cs_success || as_fn_exit 1
-fi
-if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
-$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
-fi
-
-
-echo "-----------------------------------------------"
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use run time optimization" >&5
-$as_echo_n "checking whether to use run time optimization... " >&6; }
-
-# Check whether --enable-optimization was given.
-if test "${enable_optimization+set}" = set; then :
- enableval=$enable_optimization;
-fi
-
-
-
-echo "#ifndef __FFLASFFPACK_optimise_H" > fflas-ffpack/fflas-ffpack-optimise.h
-echo "#define __FFLASFFPACK_optimise_H" >> fflas-ffpack/fflas-ffpack-optimise.h
-echo "" >> fflas-ffpack/fflas-ffpack-optimise.h
-echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h
-
-if test "x$enable_optimization" == "xyes"; then :
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-
-BACKUP_CXXFLAGS=${CXXFLAGS}
-BACKUP_LIBS=${LIBS}
-
-echo " *** OPTIMISATIONS *** "
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking best threshold for Strassen-Winograd matrix multiplication" >&5
-$as_echo_n "checking best threshold for Strassen-Winograd matrix multiplication... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: see below" >&5
-$as_echo "see below" >&6; }
-
-CXXFLAGS_ALL="${BACKUP_CXXFLAGS} -I. -I.. -I`pwd` -I`pwd`/fflas-ffpack ${BLAS_CFLAGS} ${CBLAS_FLAG}"
-LIBS="${BACKUP_LIBS} ${BLAS_LIBS} "
-WINO=`cat optimiser/winograd.C`
-
-
-CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=double"
-echo " == Wino/BLAS threshold for double == "
-if test "$cross_compiling" = yes; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: cross compilation" >&5
-$as_echo "cross compilation" >&6; }
- break
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-${WINO}
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- sed '$d' fflas-ffpack/fflas-ffpack-optimise.h > fflas-ffpack/fflas-ffpack-optimise.back.h ;
- mv fflas-ffpack/fflas-ffpack-optimise.back.h fflas-ffpack/fflas-ffpack-optimise.h ;
- cat WinoThreshold >> fflas-ffpack/fflas-ffpack-optimise.h ;
- echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h
- rm WinoThreshold ;
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5
-$as_echo "done" >&6; }
-
-else
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: problem" >&5
-$as_echo "problem" >&6; }
- break
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-echo " == Wino/BLAS threshold for float == "
-CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=float"
-if test "$cross_compiling" = yes; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: cross compilation" >&5
-$as_echo "cross compilation" >&6; }
- break
-
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-${WINO}
-_ACEOF
-if ac_fn_cxx_try_run "$LINENO"; then :
-
- sed '$d' fflas-ffpack/fflas-ffpack-optimise.h > fflas-ffpack/fflas-ffpack-optimise.back.h ;
- mv fflas-ffpack/fflas-ffpack-optimise.back.h fflas-ffpack/fflas-ffpack-optimise.h ;
- cat WinoThreshold >> fflas-ffpack/fflas-ffpack-optimise.h ;
- echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h
- rm WinoThreshold ;
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5
-$as_echo "done" >&6; }
-
-else
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: problem" >&5
-$as_echo "problem" >&6; }
- break
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
-
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no optimisation" >&5
-$as_echo "no optimisation" >&6; }
-
-fi
-
-
diff --git a/configure.ac b/configure.ac
index eebaced..85dfa71 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -21,17 +21,16 @@
# ========LICENCE========
#/
-
-
AC_PREREQ([2.61])
-AC_INIT([FFLAS-FFPACK], [1.6.0],[ffpack-devel at googlegroups.com],[fflas-ffpack],
- [http://www.linalg.org/projects/fflas-ffpack])
+AC_INIT([FFLAS-FFPACK], [2.2.1],[ffpack-devel at googlegroups.com],[fflas-ffpack],
+ [https://github.com/linbox-team/fflas-ffpack])
+
AC_CONFIG_MACRO_DIR([macros])
AC_CONFIG_AUX_DIR([build-aux])
AM_INIT_AUTOMAKE([1.8 gnu no-dependencies -Wall -Wno-portability])
-AM_CONFIG_HEADER([config.h])
-AX_PREFIX_CONFIG_H(fflas-ffpack/fflas-ffpack-config.h, __FFLASFFPACK)
+AC_CONFIG_HEADERS([config.h])
+AX_PREFIX_CONFIG_H(fflas-ffpack/config.h, __FFLASFFPACK)
AC_PATH_PROG(RM, rm, $FALSE)
RM="$RM -f"
@@ -54,6 +53,7 @@ AC_PROFILE
AC_WARNINGS
echo "-----------------------------------------------"
+
# CFLAGS=${CFLAGS:-$DEFAULT_CFLAGS}
# CXXFLAGS=${CXXFLAGS:-$DEFAULT_CXXFLAGS}
@@ -61,6 +61,7 @@ echo "-----------------------------------------------"
# Try and pass different flags according to compiler #
######################################################
+
# disable default -g -O2 CXXFLAGS
: ${CXXFLAGS=""}
@@ -69,21 +70,25 @@ AC_PROG_CXX
AC_COMPILER_NAME
+# We need a C++11 compiler now - AB 2014-12-12
+AX_CXX_COMPILE_STDCXX_11([],[mandatory])
+
AC_SUBST([DEFAULT_CFLAGS])
AC_SUBST([DEBUG_CFLAGS])
AC_SUBST([TESTS_CFLAGS])
TESTS_CFLAGS="-O0"
DEBUG_CFLAGS="-g"
-DEFAULT_CFLAGS="-pipe"
+DEFAULT_CFLAGS=""
WARN_CFLAGS="-Wall"
+#TODO use -fast for icc, -ipa for eko...
if test "x$DBG" = "xyes" ; then
- DEFAULT_CFLAGS="-O0 ${DEFAULT_CFLAGS} " #those are CXXFLAGS
+ DEFAULT_CFLAGS="-O0 ${DEFAULT_CFLAGS}" #those are CXXFLAGS
DEBUG_CFLAGS="${DEBUG_CFLAGS} -DDEBUG -DFFLASFFPACK_DEBUG"
else
- DEFAULT_CFLAGS="-O2 ${DEFAULT_CFLAGS} "
- DEBUG_CFLAGS="${DEBUG_CFLAGS} -DNDEBUG -UFFLASFFPACK_DEBUG -UFFLASFFPACK_DEBUG"
+ DEFAULT_CFLAGS="-O2 ${DEFAULT_CFLAGS}"
+ DEBUG_CFLAGS="-DNDEBUG -UFFLASFFPACK_DEBUG"
fi
if test "x$PROF" = "xyes" ; then
@@ -91,41 +96,102 @@ if test "x$PROF" = "xyes" ; then
fi
if test "x$WARN" = "xyes" -o "x$WARN" = "xfull" ; then
- if test "x${CCNAM}" = "xicc" ; then
+ case x${CCNAM} in
+ xicc)
WARN_CFLAGS="${WARN_CFLAGS} -Wcheck"
- # DEBUG_CFLAGS="-fast"
- else
- if test "x${CCNAM}" = "xgcc" -o "x${CCNAM}" = "xeko" ; then
+ WARN_CFLAGS="${WARN_CFLAGS} -Wall -Wno-unused-parameter -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wno-long-long"
+ WARN_CFLAGS="${WARN_CFLAGS} -Wextra -ansi"
+ ;;
+ xeko)
+ WARN_CFLAGS="${WARN_CFLAGS} -Wno-unused-parameter"
+ ;;
+ xgcc|xgcc44)
+ WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter"
+ if test "x${WARN}" = "xfull" ; then
+ WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -Wno-variadic-macros -Wno-vla"
+ fi
+ if test "x${HAVE_CXX11}" = "x0" ; then
+ WARN_CFLAGS="${WARN_CFLAGS} -ansi"
+ fi
+ ;;
+ xgcc48)
+ WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter"
+ if test "x${WARN}" = "xfull" ; then
+ WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -Wno-variadic-macros -Wno-vla"
+ # WARN_CFLAGS="${WARN_CFLAGS} -fsanitize=address"
+ fi
+ if test "x${HAVE_CXX11}" = "x0" ; then
+ WARN_CFLAGS="${WARN_CFLAGS} -ansi"
+ fi
+
+ ;;
+
+ xclang)
WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter"
if test "x${WARN}" = "xfull" ; then
- WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -ansi -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long"
+ WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -Wno-vla-extension -Wno-variadic-macros"
+ WARN_CFLAGS="${WARN_CFLAGS} -D__STRICT_ANSI__"
fi
- else
+ ;;
+ xclang31)
+ WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter"
+ if test "x${WARN}" = "xfull" ; then
+ WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -g -Wno-vla-extension -Wno-variadic-macros"
+ WARN_CFLAGS="${WARN_CFLAGS} -D__STRICT_ANSI__"
+ # WARN_CFLAGS="${WARN_CFLAGS} -fsanitize=address"
+ fi
+ ;;
+
+ *)
echo
echo "*******************************************************"
echo "unsupported compiler ($CCNAM). Please file a bug."
echo "*******************************************************"
echo
WARN_CFLAGS="${WARN_CFLAGS}"
- fi
- fi
+ esac
fi
DEFAULT_CFLAGS="${DEFAULT_CFLAGS} ${WARN_CFLAGS} ${DEBUG_CFLAGS}"
-TESTS_CFLAGS="${TESTS_CFLAGS} ${WARN_CFLAGS} ${DEBUG_CFLAGS}"
+TESTS_CFLAGS="${TESTS_CFLAGS} ${WARN_CFLAGS} #${DEBUG_CFLAGS}"
AC_HEADER_STDC
AC_PROG_LIBTOOL
-
+AC_PROG_EGREP
+AC_PROG_SED
# newer libtool...
LT_PREREQ([2.2])
LT_INIT
+echo "-----------------------------------------------"
+echo " START FFLAS-FFPACK CONFIG "
+echo "-----------------------------------------------"
+
+
+echo "-----------------------------------------------"
+FF_CHECK_OMP
+
+# TODO do FF_CHECK_SIMD and take best, define USE_SSE2/AVX/AVX2/... and have also __FFLASFFPACK_USE_SIMD
+FF_CHECK_SSE
+FF_CHECK_AVX
+
+AVXFLAGS="${SSEFLAGS} ${AVXFLAGS}"
echo "-----------------------------------------------"
+AC_SUBST([PARFLAGS],['${AVXFLAGS} ${OMPFLAGS}'])
+ case x${CCNAM} in
+ xgcc|xgcc44|xgcc48)
+ # With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
+ # have overloads for both types without linking error.
+ AVXFLAGS="${AVXFLAGS} -fabi-version=6"
+ ;;
+ *)
+ esac
+
+AC_SUBST([PARLIBS],['${OMPFLAGS}'])
# Machine characteristics
@@ -138,7 +204,7 @@ AC_CHECK_SIZEOF(__int64, 64)
# Checks for header files.
AC_HEADER_STDC
-AC_CHECK_HEADERS([float.h limits.h stddef.h stdlib.h string.h sys/time.h stdint.h])
+AC_CHECK_HEADERS([float.h limits.h stddef.h stdlib.h string.h sys/time.h stdint.h pthread.h])
# check endianness of the architecture
@@ -149,59 +215,6 @@ AC_C_BIGENDIAN(
# Create some useful data types of fixed, known lengths
-# We hereby assume that a character is always one byte
-# LINBOX_INT8="char";
-
-# case $ac_cv_sizeof_char in
- # 1)
- # TWO_BYTES=2;
- # FOUR_BYTES=4;
- # EIGHT_BYTES=8;
- # ;;
- # 8)
- # TWO_BYTES=16;
- # FOUR_BYTES=32;
- # EIGHT_BYTES=64;
-# esac
-
-# case $TWO_BYTES in
- # $ac_cv_sizeof_short)
- # LINBOX_INT16="short";
- # ;;
- # $ac_cv_sizeof_int)
- # LINBOX_INT16="int";
- # ;;
-# esac
-
-# case $FOUR_BYTES in
- # $ac_cv_sizeof_short)
- # LINBOX_INT32="short";
- # ;;
- # $ac_cv_sizeof_int)
- # LINBOX_INT32="int";
- # ;;
- # $ac_cv_sizeof_long)
- # LINBOX_INT32="long";
- # ;;
-# esac
-
-# case $EIGHT_BYTES in
- # $ac_cv_sizeof_short)
- # LINBOX_INT64="short";
- # ;;
- # $ac_cv_sizeof_int)
- # LINBOX_INT64="int";
- # ;;
- # $ac_cv_sizeof_long)
- # LINBOX_INT64="long";
- # ;;
- # $ac_cv_sizeof_long_long)
- # LINBOX_INT64="long long";
- # ;;
- # $ac_cv_sizeof___int64)
- # LINBOX_INT64="__int64";
- # ;;
-# esac
# AC_DEFINE_UNQUOTED(INT8, $LINBOX_INT8, Canonical 8-bit data type)
# AC_DEFINE_UNQUOTED(INT16, $LINBOX_INT16, Canonical 16-bit data type)
@@ -211,50 +224,56 @@ AC_C_BIGENDIAN(
echo "-----------------------------------------------"
# Feature checks
FF_MISC
-# LB_DRIVER
AC_LANG_CPLUSPLUS
-echo "-----------------------------------------------"
-LB_CHECK_GMP(,,[
-echo '*******************************************************************************'
-echo ' WARNING: GMP not found! (this is not a problem for the moment)'
-echo
-echo ' GMP library compiled with --enable-cxx is required for this library to compile.'
-echo ' Please make sure GMP is installed and specify its location with the option'
-echo ' --with-gmp=<prefix> when running configure.'
-echo ' Do not forget to set/export LD_LIBRARY_PATH if necessary.'
-echo '*******************************************************************************'
-])
+echo "-----------------------------------------------"
+# Getting GMP from Givaro - AB 2014-12-12
+#FF_CHECK_GMP
-LB_CHECK_GIVARO(,,[
+FF_CHECK_GIVARO(,,[
echo '*******************************************************************************'
-echo ' WARNING: GIVARO not found! (this is not a problem for the moment)'
+echo ' WARNING: GIVARO not found!'
echo
echo ' GIVARO library is required for some tests in this library.'
echo ' Please make sure GIVARO is installed and specify its location with the'
echo ' option --with-givaro=<prefix> when running configure.'
echo ' Do not forget to set/export LD_LIBRARY_PATH if necessary.'
echo '*******************************************************************************'
+exit 1
])
BLAS_FOUND=false
-FF_CHECK_BLAS
+FF_CHECK_BLAS_CFLAGS
+FF_CHECK_BLAS_LIBS
+FF_CHECK_MKL
+FF_CHECK_USER_BLAS
+FF_CHECK_USER_LAPACK
-FF_CHECK_GOTOBLAS
+# FF_CHECK_BLAS
-FF_CHECK_GSL
+# FF_CHECK_GOTOBLAS
+
+# FF_CHECK_GSL
+
+# if test "$BLAS_FOUND" = "false" ; then
+ # FF_CHECK_CBLAS
+# fi
+
+# if test "$BLAS_FOUND" = "false" ; then
+ # FF_CHECK_OTHERBLAS
+# fi
+
+# FF_CHECK_LAPACK
+
+# if test "$BLAS_FOUND" = "false" ; then
+ # FF_CHECK_BLAS2
+# fi
-if test "$BLAS_FOUND" = "false" ; then
- FF_CHECK_CBLAS
-fi
-if test "$BLAS_FOUND" = "false" ; then
- FF_CHECK_OTHERBLAS
-fi
if test "$BLAS_FOUND" = "false" ; then
echo ''
@@ -263,15 +282,19 @@ if test "$BLAS_FOUND" = "false" ; then
echo
echo ' BLAS routines are required for this library to compile. Please'
echo ' make sure BLAS are installed and specify its location with the option'
- echo ' --with-blas=<lib> when running configure (or --with-cblas... see configure --help).'
+ echo ' --with-blas-libs=<libs> and if necessary --with-blas-cflags=<cflags>'
+ echo ' when running configure.'
echo '*******************************************************************************'
exit 1
fi
-FF_CHECK_LAPACK
-BLAS_LIBS="${BLAS_LIBS} ${LAPACK_LIBS}"
-AC_SUBST(BLAS_LIBS)
+
+# BLAS_LIBS="${BLAS_LIBS}"
+# BLAS_LIBS="-L/${BLAS_PATH} ${LAPACK_LIBS} ${BLAS_LIBS}"
+# AC_SUBST(BLAS_LIBS)
+
+# FF_CHECK_CUDA
# AM_CONDITIONAL(FFLASFFPACK_HAVE_BLAS, test "x$BLAS_FOUND" != "xfalse")
@@ -287,31 +310,47 @@ FF_DOC
# mkdir ./benchmarks/data ;
# fi
-CXXFLAGS="${GMP_CFLAGS} ${CXXFLAGS}"
+CXXFLAGS="${CXXFLAGS}"
+
AC_SUBST(CXXFLAGS)
+FF_PRECOMPILE
+
+echo "-----------------------------------------------"
+echo " END FFLAS-FFPACK CONFIG "
echo "-----------------------------------------------"
AC_CONFIG_FILES([
Makefile
macros/Makefile
+macros/CodeChunk/Makefile
fflas-ffpack-config
fflas-ffpack/Makefile
fflas-ffpack/fflas/Makefile
+fflas-ffpack/fflas/fflas_fgemm/Makefile
+fflas-ffpack/fflas/fflas_sparse/Makefile
+fflas-ffpack/fflas/fflas_sparse/coo/Makefile
+fflas-ffpack/fflas/fflas_sparse/csr/Makefile
+fflas-ffpack/fflas/fflas_sparse/ell/Makefile
+fflas-ffpack/fflas/fflas_sparse/ell_simd/Makefile
+fflas-ffpack/fflas/fflas_sparse/csr_hyb/Makefile
+fflas-ffpack/fflas/fflas_sparse/sell/Makefile
+fflas-ffpack/fflas/fflas_sparse/hyb_zo/Makefile
+fflas-ffpack/fflas/fflas_igemm/Makefile
+fflas-ffpack/fflas/fflas_simd/Makefile
fflas-ffpack/ffpack/Makefile
fflas-ffpack/field/Makefile
-utils/Makefile
+fflas-ffpack/utils/Makefile
+fflas-ffpack/paladin/Makefile
+fflas-ffpack/interfaces/Makefile
+fflas-ffpack/interfaces/libs/Makefile
doc/Makefile
tests/Makefile
-benchmark/Makefile
+tests/data/Makefile
+benchmarks/Makefile
+examples/Makefile
optimiser/Makefile
-benchmark/src/Makefile
-benchmark/src/BLOCKING/Makefile
-benchmark/src/FFLAS_FFPACK/Makefile
-benchmark/src/BLAS_LAPACK/Makefile
-benchmark/html/Makefile
-benchmark/graph/Makefile
-benchmark/test-src/Makefile
+fflas-ffpack.pc
])
AC_OUTPUT
diff --git a/doc/Doxyfile b/doc/Doxyfile
old mode 100755
new mode 100644
index fa3d581..16fd032
--- a/doc/Doxyfile
+++ b/doc/Doxyfile
@@ -26,7 +26,7 @@ DOXYFILE_ENCODING = UTF-8
# identify the project. Note that if you do not use Doxywizard you need
# to put quotes around the project name if it contains spaces.
-PROJECT_NAME = FflasFfpack
+PROJECT_NAME = FFLAS-FFPACK
# The PROJECT_NUMBER tag can be used to enter a project or revision number.
# This could be handy for archiving the generated documentation or
diff --git a/doc/DoxyfileDev b/doc/DoxyfileDev
old mode 100755
new mode 100644
diff --git a/doc/Makefile.am b/doc/Makefile.am
old mode 100755
new mode 100644
index fd1d0a9..d622941
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox documentation
#
# ========LICENCE========
diff --git a/doc/Makefile.in b/doc/Makefile.in
deleted file mode 100644
index 093787e..0000000
--- a/doc/Makefile.in
+++ /dev/null
@@ -1,493 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# adapted from LinBox documentation
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = doc
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = $(FFLASFFPACK_DOC_PATH)
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
- at FFLASFFPACK_BUILD_DOC_TRUE@USE_TARGETS = docs
- at FFLASFFPACK_BUILD_DOC_TRUE@INSTALL_TARGETS = install-doc
-EXTRA_DIST = \
- Doxyfile \
- DoxyfileDev \
- mainpage.doxy \
- fflas-ffpack.html
-
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps doc/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps doc/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags: TAGS
-TAGS:
-
-ctags: CTAGS
-CTAGS:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool clean-local mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- clean-local distclean distclean-generic distclean-libtool \
- distdir dvi dvi-am html html-am info info-am install \
- install-am install-data install-data-am install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-info install-info-am install-man \
- install-pdf install-pdf-am install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- uninstall uninstall-am
-
-
-#man1_MANS = fflas-ffpack-config.1
-
-all all-local: $(USE_TARGETS)
-
-install install-data-local: $(USE_TARGETS) $(INSTALL_TARGETS)
-
-docs:
- sed -i 's/^\\version.*/\\version\ $(VERSION)/' mainpage.doxy
- if test -d fflas-ffpack-html ; then echo exists; else mkdir fflas-ffpack-html ; fi
- if test -d fflas-ffpack-dev-html ; then echo exists; else mkdir fflas-ffpack-dev-html ; fi
- cp ../INSTALL fflas-ffpack-html/
- cp ../COPYING fflas-ffpack-html/
- cp ../AUTHORS fflas-ffpack-html/
- doxygen Doxyfile
-
-docs_dev:
- make docs
- cp ../INSTALL fflas-ffpack-dev-html/
- cp ../COPYING fflas-ffpack-dev-html/
- cp ../AUTHORS fflas-ffpack-dev-html/
- doxygen DoxyfileDev
-
-# cp tutorial.html fflas-ffpack-dev-html/
-# cp install-dist.html fflas-ffpack-dev-html/
-# cp install-dev.html fflas-ffpack-dev-html/
-
-install-doc:
- mkdir -p $(DESTDIR)/$(docdir)
- cp -rp fflas-ffpack-html $(DESTDIR)/$(docdir)/fflas-ffpack-html
- cp -rp fflas-ffpack-dev-html $(DESTDIR)/$(docdir)/fflas-ffpack-dev-html
- cp -p fflas-ffpack.html $(DESTDIR)/$(docdir)/fflas-ffpack.html
-# \
-# doc.doxy \
-# tutorial.doxy \
-# fflas-ffpack.html \
-# fflas-ffpack-config.1 \
-#tutorial.html \
-# install-dev.html \
-# index-dev.html \
-# install-dist.html
-
-clean-local :
- rm -rf fflas-ffpack-html
- rm -rf fflas-ffpack-dev-html
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/doc/fflas-ffpack.html b/doc/fflas-ffpack.html
index c8d1b3d..1fc631f 100644
--- a/doc/fflas-ffpack.html
+++ b/doc/fflas-ffpack.html
@@ -1,6 +1,6 @@
<!--
# Coypright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox documentation
#
# ========LICENCE========
@@ -33,7 +33,7 @@
<!-- the following causes my FFLAS-FFPACK to load after 5 seconds
-->
<meta http-equiv="Refresh"
- content="5; URL=givaro-html/index.html">
+ content="5; URL=fflas-ffpack-html/index.html">
</head><body>
<!-- the following shows during the initial 5 seconds-->
FFLAS-FFPACK documentation main page: <a href="fflas-ffpack-html/index.html"> fflas-ffpack-html/index.html</a>
diff --git a/doc/mainpage.doxy b/doc/mainpage.doxy
old mode 100755
new mode 100644
index 3780de4..34708eb
--- a/doc/mainpage.doxy
+++ b/doc/mainpage.doxy
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox documentation
#
# ========LICENCE========
@@ -25,7 +25,10 @@
/** @mainpage FFLAS-FFPACK Documentation.
*
* \section intro Introduction
- *
+ * FFLAS-FFPACK is a LGPL-2.1+ source code library for basic linear algebra operations over a finite field. It is inspired by BLAS interface (Basic Linear Algebra Subprograms) and the LAPACK library for numerical linear algebra, and shares part of their design. Yet it differs in many aspects due to the specifities of computing over a finite field:
+ * - it is generic with respect to the finite field, so as to accomodate a large variety of field sizes and implementations;
+ * - it is a pure source code library, to be included and compiled in the user's software. Its build system is only used for tests and benchmarks.
+ *
* \section goals Goals
*
* \section desig Design
@@ -47,18 +50,18 @@
*
*\section contrib Contributing to fflas-ffpack, getting assistance.
*
- *\version 1.4.0
+ *\version 2.1.0
*/
/** @page inst Configuring and Installing FFLAS-FFPACK
* FFLAS-FFPACK is a header-only package. Hower configuration process can be tweaked a lot.
- * Configure looks for BLAS and LAPACK routine.
+ * Configure looks for BLAS routines and Givaro library which are both mandatory dependencies.
* See the output of <code>./configure --help</code> for information about the LAPACK/BLAS discovering strategies.
- * For now, Givaro is not compulsory
*/
/** @page copy Copying and Licence
- * @brief no doc.
+ * @brief The FFLAS-FFPACK library is licensed under the terms of the GNU LGPL v2.1 or later.
+* See https://www.gnu.org/licenses/lgpl-2.1.html
*/
/** @page tuto Tutorial
diff --git a/examples/2x2-fgemm.C b/examples/2x2-fgemm.C
new file mode 100644
index 0000000..0d76030
--- /dev/null
+++ b/examples/2x2-fgemm.C
@@ -0,0 +1,77 @@
+/* Copyright (c) FFLAS-FFPACK
+* Written by Jean-Guillaume Dumas <Jean-Guillaume.Dumas at imag.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+
+#include <fflas-ffpack/fflas-ffpack-config.h>
+#include <givaro/modular-balanced.h>
+#include <fflas-ffpack/fflas/fflas.h>
+#include <fflas-ffpack/utils/timer.h>
+#include <fflas-ffpack/utils/Matio.h>
+#include <fflas-ffpack/utils/args-parser.h>
+
+#include <iostream>
+
+using namespace FFLAS;
+
+int main(int argc, char** argv) {
+
+ typedef Givaro::ModularBalanced<float> Ring;
+ Ring F(101);
+
+ Ring::Element * A, * B, * C;
+
+ A = fflas_new(F,2,3);
+ B = fflas_new(F,3,2);
+ C = fflas_new(F,2,2);
+
+ F.assign(*(A+0),F.one);
+ F.init(*(A+1),2);
+ F.init(*(A+2),3);
+ F.init(*(A+3),5);
+ F.init(*(A+4),7);
+ F.init(*(A+5),11);
+
+ Ring::Element t,u,v;
+ F.init(t, 2); F.init(u, 4); F.init(v);
+
+ F.assign(*(B+0),F.zero); // B[0] <- 0
+ F.assign(*(B+1),t); // B[1] <- 2
+ F.assign(*(B+2),u); // B[2] <- 4
+ F.add(v,t,u); F.assign(*(B+3),v); // B[3] <- 2+4
+ F.mul(*(B+4),t,u); // B[4] <- 2*4
+ F.add(*(B+5),u,v); // B[5] <- 4+6
+
+ write_field(F, std::cout << "A:=", A, 2, 3, 3,true) << std::endl;
+ write_field(F, std::cout << "B:=", B, 3, 2, 2,true) << std::endl;
+
+ fgemm (F, FflasNoTrans, FflasNoTrans, 2,2,3, F.one, A, 3, B, 2, F.zero, C, 2 );
+
+ write_field(F, std::cout << "C:=", C, 2, 2, 2,true) << std::endl;
+
+ fflas_delete( A);
+ fflas_delete( B);
+ fflas_delete( C);
+
+
+
+ return 0;
+}
+
diff --git a/examples/Makefile.am b/examples/Makefile.am
new file mode 100644
index 0000000..1d81f01
--- /dev/null
+++ b/examples/Makefile.am
@@ -0,0 +1,58 @@
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by JGD <Jean-Guillaume.Dumas at imag.fr>
+#
+# ========LICENCE========
+# This file is part of the library FFLAS-FFPACK.
+#
+# FFLAS-FFPACK is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+# ========LICENCE========
+#/
+
+SUBDIRS =
+examples: $(EXAMPLES)
+
+AM_CPPFLAGS=-I$(top_srcdir) -g
+AM_CXXFLAGS = @DEFAULT_CFLAGS@
+AM_CPPFLAGS += $(CBLAS_FLAG) $(GIVARO_CFLAGS) $(OPTFLAGS) -I$(top_srcdir)/fflas-ffpack/utils/ -I$(top_srcdir)/fflas-ffpack/fflas/ -I$(top_srcdir)/fflas-ffpack/ffpack -I$(top_srcdir)/fflas-ffpack/field $(CUDA_CFLAGS) $(PARFLAGS)
+LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS)
+AM_LDFLAGS=-static $(PARLIBS)
+
+FFLA_EXAMP = 2x2-fgemm
+2x2_fgemm_SOURCES = 2x2-fgemm.C
+
+if FFLASFFPACK_HAVE_LAPACK
+USE_LAPACK_EXAMP = $(LAPA_EXAMP)
+endif
+
+EXAMPLES = \
+ $(FFLA_EXAMP) \
+ $(BLAS_EXAMP) \
+ $(USE_LAPACK_EXAMP) \
+ $(USE_OMP_EXAMP)
+
+CLEANFILES = $(EXAMPLES)
+
+EXTRA_PROGRAMS = $(EXAMPLES)
+
+
+define other_compilation
+ $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) $(OPTFLAGS) $(PARFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
+endef
+
+%:%.C
+ $(other_compilation)
+
+%:%.cpp
+ $(other_compilation)
diff --git a/fflas-ffpack-config.in b/fflas-ffpack-config.in
index 3bcc393..59e7cd8 100644
--- a/fflas-ffpack-config.in
+++ b/fflas-ffpack-config.in
@@ -1,6 +1,6 @@
-#! /bin/bash
+#!/usr/bin/env bash
# Coypright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -35,12 +35,10 @@ micro=`echo @VERSION@ | cut -d'.' -f3`
decvr=$((((($major*100)+$minor)*100)+$micro))
-
cflags=false
libs=false
-
usage()
{
cat <<EOF
@@ -48,10 +46,11 @@ Usage: fflas-ffpack-config [OPTION]
Known values for OPTION are:
- --prefix show Givaro installation prefix
+ --prefix show FFLAS-FFPACK installation prefix
--libs print library linking information
--blas-libs print BLAS library linking information
- --cflags print pre-processor and compiler flags
+ --cflags print pre-processor flags
+ --cflags-full print pre-processor and compiler flags
--blas-cflags print BLAS pre-processor and BLAS compiler flags
--blas-home print where BLAS were found
--help display this help and exit
@@ -69,11 +68,11 @@ fi
while test $# -gt 0; do
case "$1" in
- -*=*)
+ -*=*)
optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'`
;;
- *)
- optarg=
+ *)
+ optarg=
;;
esac
@@ -101,24 +100,27 @@ while test $# -gt 0; do
;;
--cflags)
- echo -I${includedir} @GMP_CFLAGS@ @GIVARO_CFLAGS@
-# @CBLAS_FLAG@
+ echo -I${includedir} @CBLAS_FLAG@ @AVXFLAGS@ @OMPFLAGS@ @GIVARO_CFLAGS@ @PRECOMPILE_FLAGS@ # @PARFLAGS@ # @CUDA_CFLAGS@
;;
+ --cflags-full)
+ echo -I${includedir} @CBLAS_FLAG@ @AVXFLAGS@ @CXXFLAGS@ @OMPFLAGS@ @GIVARO_CFLAGS@ @PRECOMPILE_FLAGS@ # @PARFLAGS@ # @CUDA_CFLAGS@
+ ;;
+
--blas-cflags)
- echo -I${includedir} @CBLAS_FLAG@
+ echo -I${includedir} @CBLAS_FLAG@ @AVXFLAGS@ # @PARFLAGS@ # @CUDA_CFLAGS@
;;
--libs)
- echo @GMP_LIBS@ @GIVARO_LIBS@ @BLAS_LIBS@
+ echo @PRECOMPILE_LIBS@ @CBLAS_LIBS@ @GIVARO_LIBS@ # @CUDA_LIBS@
;;
--blas-libs)
- echo @BLAS_LIBS@
+ echo @CBLAS_LIBS@
;;
-
+
--blas-home)
- echo @BLAS_PATH@
+ echo @BLAS_PATH@
;;
*)
@@ -128,5 +130,6 @@ while test $# -gt 0; do
esac
shift
done
+echo
exit 0
diff --git a/fflas-ffpack.pc.in b/fflas-ffpack.pc.in
new file mode 100644
index 0000000..1f38302
--- /dev/null
+++ b/fflas-ffpack.pc.in
@@ -0,0 +1,14 @@
+/------------------ fflas-ffpack.pc ------------------------
+prefix=@prefix@
+exec_prefix=@prefix@/bin
+libdir=@prefix@/lib
+includedir=@prefix@/include
+
+Name: fflas-ffpack
+Description: Finite Field Linear Algebra Suroutines/Package
+URL: http://linbox-team.github.io/fflas-ffpack/
+Version: @VERSION@
+Requires: givaro >= 4.0.1
+Libs: @PRECOMPILE_LIBS@ @CBLAS_LIBS@
+Cflags: @DEFAULT_CFLAGS@ @CBLAS_FLAG@ @CXXFLAGS@ @AVXFLAGS@ @OMPFLAGS@ @PRECOMPILE_FLAGS@
+\-------------------------------------------------------
\ No newline at end of file
diff --git a/fflas-ffpack/Makefile.am b/fflas-ffpack/Makefile.am
index b18ba82..4bccdb2 100644
--- a/fflas-ffpack/Makefile.am
+++ b/fflas-ffpack/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -22,12 +22,12 @@
#/
-SUBDIRS=fflas ffpack field
+SUBDIRS=fflas ffpack field utils paladin interfaces
-EXTRA_DIST=fflas-ffpack.doxy utils
+EXTRA_DIST=fflas-ffpack.doxy
pkginclude_HEADERS = config-blas.h \
fflas-ffpack.h \
+ config.h \
fflas-ffpack-config.h \
- fflas-ffpack-configuration.h \
fflas-ffpack-optimise.h
diff --git a/fflas-ffpack/Makefile.in b/fflas-ffpack/Makefile.in
deleted file mode 100644
index 2bfcb9e..0000000
--- a/fflas-ffpack/Makefile.in
+++ /dev/null
@@ -1,702 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# adapted from LinBox configuration
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = fflas-ffpack
-DIST_COMMON = $(pkginclude_HEADERS) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__uninstall_files_from_dir = { \
- test -z "$$files" \
- || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
- || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
- $(am__cd) "$$dir" && rm -f $$files; }; \
- }
-am__installdirs = "$(DESTDIR)$(pkgincludedir)"
-HEADERS = $(pkginclude_HEADERS)
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
- $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
- distdir
-ETAGS = etags
-CTAGS = ctags
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-am__relativize = \
- dir0=`pwd`; \
- sed_first='s,^\([^/]*\)/.*$$,\1,'; \
- sed_rest='s,^[^/]*/*,,'; \
- sed_last='s,^.*/\([^/]*\)$$,\1,'; \
- sed_butlast='s,/*[^/]*$$,,'; \
- while test -n "$$dir1"; do \
- first=`echo "$$dir1" | sed -e "$$sed_first"`; \
- if test "$$first" != "."; then \
- if test "$$first" = ".."; then \
- dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
- dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
- else \
- first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
- if test "$$first2" = "$$first"; then \
- dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
- else \
- dir2="../$$dir2"; \
- fi; \
- dir0="$$dir0"/"$$first"; \
- fi; \
- fi; \
- dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
- done; \
- reldir="$$dir2"
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-SUBDIRS = fflas ffpack field
-EXTRA_DIST = fflas-ffpack.doxy utils
-pkginclude_HEADERS = config-blas.h \
- fflas-ffpack.h \
- fflas-ffpack-config.h \
- fflas-ffpack-configuration.h \
- fflas-ffpack-optimise.h
-
-all: all-recursive
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps fflas-ffpack/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps fflas-ffpack/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-install-pkgincludeHEADERS: $(pkginclude_HEADERS)
- @$(NORMAL_INSTALL)
- @list='$(pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludedir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(pkgincludedir)" || exit 1; \
- fi; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; \
- done | $(am__base_list) | \
- while read files; do \
- echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludedir)'"; \
- $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludedir)" || exit $$?; \
- done
-
-uninstall-pkgincludeHEADERS:
- @$(NORMAL_UNINSTALL)
- @list='$(pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \
- files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- dir='$(DESTDIR)$(pkgincludedir)'; $(am__uninstall_files_from_dir)
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- $(am__make_dryrun) \
- || test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
- $(am__relativize); \
- new_distdir=$$reldir; \
- dir1=$$subdir; dir2="$(top_distdir)"; \
- $(am__relativize); \
- new_top_distdir=$$reldir; \
- echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
- echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
- ($(am__cd) $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$new_top_distdir" \
- distdir="$$new_distdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- am__skip_mode_fix=: \
- distdir) \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-recursive
-all-am: Makefile $(HEADERS)
-installdirs: installdirs-recursive
-installdirs-am:
- for dir in "$(DESTDIR)$(pkgincludedir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am: install-pkgincludeHEADERS
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am: uninstall-pkgincludeHEADERS
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
- install-am install-strip tags-recursive
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am check check-am clean clean-generic clean-libtool \
- ctags ctags-recursive distclean distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-pkgincludeHEADERS install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- installdirs-am maintainer-clean maintainer-clean-generic \
- mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
- ps ps-am tags tags-recursive uninstall uninstall-am \
- uninstall-pkgincludeHEADERS
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/fflas-ffpack/config-blas.h b/fflas-ffpack/config-blas.h
index 9311658..6620028 100644
--- a/fflas-ffpack/config-blas.h
+++ b/fflas-ffpack/config-blas.h
@@ -29,10 +29,35 @@
#ifndef __FFLASFFPACK_config_blas_H
#define __FFLASFFPACK_config_blas_H
-#ifndef __FFLASFFPACK_CONFIGURATION
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
+// #include "fflas-ffpack/utils/fflas_memory.h"
+// #ifndef __FFLASFFPACK_CONFIGURATION
+// #include "fflas-ffpack/fflas-ffpack-config.h"
+// #endif
+
+// #ifdef OPTIMISATION_MODE
+// #include "fflas-ffpack/config.h"
+// #endif
+
+#ifdef HAVE_MKL
+#define __FFLASFFPACK_HAVE_MKL
+#endif
+
+#ifdef __FFLASFFPACK_HAVE_MKL
+#include <mkl.h>
+
#endif
+
+#ifndef CBLAS_INT
+#ifdef blasint /* openblas */
+#define CBLAS_INT blasint
+#elif defined( MKL_INT )
+#define CBLAS_INT MKL_INT
+#else
+#define CBLAS_INT int
+#endif /* blasint */
+#endif /* CBLAS_INT */
+
#ifdef CUDA_BLAS
#define sgemv_ cublas_sgemv
@@ -40,9 +65,11 @@
#define strsm_ cublas_strsm
#define strmm_ cublas_strmm
-#endif
+#endif // CUDA_BLAS
+#ifndef __FFLASFFPACK_HAVE_MKL
+
#define CBLAS_ENUM_DEFINED_H
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102 };
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, AtlasConj=114};
@@ -50,7 +77,7 @@
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
- #define CBLAS_INDEX int
+// #define CBLAS_INDEX int
#ifndef __FFLASFFPACK_HAVE_CBLAS
@@ -77,6 +104,7 @@ extern "C" {
void daxpy_ (const int*, const double*, const double*, const int*, double*, const int*);
void saxpy_ (const int*, const float*, const float*, const int*, float*, const int*);
double ddot_ (const int*, const double*, const int*, const double*, const int*);
+ float sdot_ (const int*, const float*, const int*, const float*, const int*);
double dasum_ (const int*, const double*, const int*);
int idamax_ (const int*, const double*, const int*);
double dnrm2_ (const int*, const double*, const int*);
@@ -85,6 +113,15 @@ extern "C" {
void dgemv_ (const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*);
void sgemv_ (const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*);
void dger_ (const int*, const int*, const double*, const double*, const int*, const double*, const int*, double*, const int*);
+ void sger_ (const int*, const int*, const float*, const float*, const int*, const float*, const int*, float*, const int*);
+
+ void dcopy_ (const int *, const double *, const int *, double *, const int *);
+ void scopy_ (const int *, const float *, const int *, float *, const int *);
+
+ void dscal_ (const int *, const double *, double *, const int *);
+ void sscal_ (const int *, const float *, float *, const int *);
+
+
// level 3 routines
void dtrsm_ (const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*);
@@ -116,6 +153,12 @@ extern "C" {
return ddot_ (&N, X, &incX, Y, &incY);
}
+ inline float cblas_sdot(const int N, const float *X, const int incX, const float *Y, const int incY)
+ {
+ return sdot_ (&N, X, &incX, Y, &incY);
+ }
+
+
inline double cblas_dasum(const int N, const double *X, const int incX){
return dasum_ (&N, X, &incX);
}
@@ -157,6 +200,35 @@ extern "C" {
dger_ (&M, &N, &alpha, X, &incX, Y, &incY, A, &lda);
}
+ inline void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N, const float alpha, const float *X, const int incX,
+ const float *Y, const int incY, float *A, const int lda)
+ {
+ if (Order == CblasRowMajor)
+ sger_ (&N, &M, &alpha, Y, &incY, X, &incX, A, &lda);
+ else
+ sger_ (&M, &N, &alpha, X, &incX, Y, &incY, A, &lda);
+ }
+
+ void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY)
+ {
+ dcopy_(&N,X,&incX,Y,&incY);
+ }
+
+
+ void cblas_scopy(const int N, const float *X, const int incX, float *Y, const int incY)
+ {
+ scopy_(&N,X,&incX,Y,&incY);
+ }
+
+ void cblas_dscal(const int N, const double alpha, double *Y, const int incY)
+ {
+ dscal_(&N,&alpha,Y,&incY);
+ }
+
+ void cblas_sscal(const int N, const float alpha, float *Y, const int incY)
+ {
+ sscal_(&N,&alpha,Y,&incY);
+ }
// level 3 routines
@@ -235,6 +307,7 @@ extern "C" {
void cblas_saxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY);
double cblas_ddot(const int N, const double *X, const int incX, const double *Y, const int incY);
+ float cblas_sdot(const int N, const float *X, const int incX, const float *Y, const int incY);
double cblas_dasum(const int N, const double *X, const int incX);
@@ -254,6 +327,21 @@ extern "C" {
void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, const double alpha, const double *X, const int incX,
const double *Y, const int incY, double *A, const int lda);
+ void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N, const float alpha, const float *X, const int incX,
+ const float *Y, const int incY, float *A, const int lda);
+
+ void cblas_dcopy(const int N, const double *X, const int incX,
+ double *Y, const int incY);
+
+ void cblas_scopy(const int N, const float *X, const int incX,
+ float *Y, const int incY);
+
+ void cblas_dscal(const int N, const double alpha,
+ double *Y, const int incY);
+
+ void cblas_sscal(const int N, const float alpha,
+ float *Y, const int incY);
+
// level 3 routines
@@ -283,6 +371,14 @@ extern "C" {
}
#endif // CBLAS ?
+#endif // __FFLASFFPACK_HAVE_MKL
+
+#ifdef __FFLASFFPACK_HAVE_MKL
+#define blas_enum
+#else
+#define blas_enum enum
+#endif
+
#ifdef __FFLASFFPACK_HAVE_LAPACK
#ifndef __FFLASFFPACK_HAVE_CLAPACK
@@ -299,15 +395,16 @@ extern "C" {
// static const char* EXT_BLAS_SIDE (CBLAS_SIDE t) { if (t == CblasLeft) return "L"; else return "R";}
// static const char* EXT_BLAS_SIDE_tr (CBLAS_SIDE t) { if (t == CblasLeft) return "R"; else return "L";}
-#endif
+#endif // CBLAS_EXTERNALS
// define external link to LAPACK routines
extern "C" {
- void dgetrf_ (const int *, const int *, double *, const int *, int *, int *);
- void dgetri_ (const int *, double *, const int *, const int *, double *, const int *, int *);
- void dtrtri_ (const char *, const char *, const int *, double *, const int *, int *);
- void dswap_ (const int *, double *, const int *, double *, const int *);
+ //!@bug we should also allow lapacke from MLK
+ void dgetrf_ (const CBLAS_INT *, const CBLAS_INT *, double *, const CBLAS_INT *, CBLAS_INT *, CBLAS_INT *);
+ void dgetri_ (const CBLAS_INT *, double *, const CBLAS_INT *, const CBLAS_INT *, double *, const CBLAS_INT *, CBLAS_INT *);
+ void dtrtri_ (const char *, const char *, const CBLAS_INT *, double *, const CBLAS_INT *, CBLAS_INT *);
+ void dswap_ (const CBLAS_INT *, double *, const CBLAS_INT *, double *, const CBLAS_INT *);
}
// define C wrappers
@@ -316,22 +413,23 @@ extern "C" {
// return A=P.L.U (L unitary) with ColMajor
// return A=L.U.P (U unitary) with RowMajor
- inline int clapack_dgetrf(const enum CBLAS_ORDER Order, const int M, const int N,
- double *A, const int lda, int *ipiv)
+ //! @bug Order is not used. we should use ATLAS/interfaces/lapack/C/src/clapack_dgetrf.c or similar
+ inline CBLAS_INT clapack_dgetrf(const blas_enum CBLAS_ORDER, const CBLAS_INT M, const CBLAS_INT N,
+ double *A, const CBLAS_INT lda, CBLAS_INT *ipiv)
{
- int info;
+ CBLAS_INT info;
dgetrf_ ( &M, &N, A, &lda, ipiv, &info);
return info;
}
- inline int clapack_dgetri(const enum CBLAS_ORDER Order, const int N, double *A,
- const int lda, const int *ipiv)
+ inline CBLAS_INT clapack_dgetri(const blas_enum CBLAS_ORDER, const CBLAS_INT N, double *A,
+ const CBLAS_INT lda, const CBLAS_INT *ipiv)
{
- int info;
+ CBLAS_INT info;
double *work;
#ifndef __FFLASFFPACK_AUTOIMPLEMENT_DGETRI
- // the optimum size of work can be determinted via the
+ // the optimum size of work can be determCBLAS_INTed via the
// Lapack function ilaenv.
work= new double[N];
dgetri_ (&N, A, &lda, ipiv, work, &N, &info);
@@ -342,8 +440,8 @@ extern "C" {
if (info > 0)
return 0;
- for (int i=0;i<N;++i){
- for(int j=i;j<N;++j){
+ for (CBLAS_INT i=0;i<N;++i){
+ for(CBLAS_INT j=i;j<N;++j){
work[i*N+j]=A[i*N+j];
if (j>i) A[i*N+j]=0.0;
}
@@ -353,9 +451,9 @@ extern "C" {
double cst=1.;
dtrsm_ ("R", "L", "N", "U", &N, &N, &cst, work, &N, A, &N);
- int ip;
- const int incr=1;
- for (int i=0; i<N; ++i){
+ CBLAS_INT ip;
+ const CBLAS_INT incr=1;
+ for (CBLAS_INT i=0; i<N; ++i){
ip = ipiv[i]-1;
if (ip != i)
dswap_ (&N, &A[i*lda],&incr , &A[ip*lda], &incr);
@@ -366,10 +464,10 @@ extern "C" {
return info;
}
- inline int clapack_dtrtri(const enum CBLAS_ORDER Order,const enum CBLAS_UPLO Uplo,
- const enum CBLAS_DIAG Diag,const int N, double *A, const int lda)
+ inline CBLAS_INT clapack_dtrtri(const blas_enum CBLAS_ORDER Order,const blas_enum CBLAS_UPLO Uplo,
+ const blas_enum CBLAS_DIAG Diag,const CBLAS_INT N, double *A, const CBLAS_INT lda)
{
- int info;
+ CBLAS_INT info;
if (Order == CblasRowMajor)
dtrtri_ (EXT_BLAS_UPLO_tr(Uplo), EXT_BLAS_DIAG(Diag), &N, A, &lda, &info);
else
@@ -386,16 +484,17 @@ extern "C" {
extern "C" {
// LAPACK routines
- int clapack_dgetrf(const enum CBLAS_ORDER Order, const int M, const int N,
- double *A, const int lda, int *ipiv);
- int clapack_dgetri(const enum CBLAS_ORDER Order, const int N, double *A,
- const int lda, const int *ipiv);
- int clapack_dtrtri(const enum CBLAS_ORDER Order,const enum CBLAS_UPLO Uplo,
- const enum CBLAS_DIAG Diag,const int N, double *A, const int lda);
+ CBLAS_INT clapack_dgetrf(const blas_enum CBLAS_ORDER Order, const CBLAS_INT M, const CBLAS_INT N,
+ double *A, const CBLAS_INT lda, CBLAS_INT *ipiv);
+ CBLAS_INT clapack_dgetri(const blas_enum CBLAS_ORDER Order, const CBLAS_INT N, double *A,
+ const CBLAS_INT lda, const CBLAS_INT *ipiv);
+ CBLAS_INT clapack_dtrtri(const blas_enum CBLAS_ORDER Order,const blas_enum CBLAS_UPLO Uplo,
+ const blas_enum CBLAS_DIAG Diag,const CBLAS_INT N, double *A, const CBLAS_INT lda);
}
+#endif // CLAPACK ?
+
#endif // LAPACK ?
-#endif
#endif //__FFLASFFPACK_config_blas_H
diff --git a/fflas-ffpack/fflas-ffpack-config.h b/fflas-ffpack/fflas-ffpack-config.h
old mode 100644
new mode 100755
index 735b9db..7819a2b
--- a/fflas-ffpack/fflas-ffpack-config.h
+++ b/fflas-ffpack/fflas-ffpack-config.h
@@ -1,214 +1,133 @@
-#ifndef _FFLAS_FFPACK_FFLAS_FFPACK_CONFIG_H
-#define _FFLAS_FFPACK_FFLAS_FFPACK_CONFIG_H 1
-
-/* fflas-ffpack/fflas-ffpack-config.h. Generated automatically at end of configure. */
-/* config.h. Generated from config.h.in by configure. */
-/* config.h.in. Generated from configure.ac by autoheader. */
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* Copyright (C) 2012 FFLAS-FFPACK
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
-/* Define if building universal (internal helper macro) */
-/* #undef __FFLASFFPACK_AC_APPLE_UNIVERSAL_BUILD */
-
-/* Define if GMP is version 3.xxx */
-/* #undef __FFLASFFPACK_GMP_VERSION_3 */
-
-/* Define that architecture uses big endian storage */
-/* #undef __FFLASFFPACK_HAVE_BIG_ENDIAN */
-
-/* Define if BLAS is installed */
-#ifndef __FFLASFFPACK_HAVE_BLAS
-#define __FFLASFFPACK_HAVE_BLAS 1
-#endif
-
-/* Define if C interface to BLAS is available */
-#ifndef __FFLASFFPACK_HAVE_CBLAS
-#define __FFLASFFPACK_HAVE_CBLAS 1
-#endif
-
-/* Define if C interface to LAPACK is available */
-#ifndef __FFLASFFPACK_HAVE_CLAPACK
-#define __FFLASFFPACK_HAVE_CLAPACK 1
-#endif
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_DLFCN_H
-#define __FFLASFFPACK_HAVE_DLFCN_H 1
+/*! @file fflas-ffpack/fflas-ffpack-config.h
+ * @ingroup optimise
+ * @brief Defaults for optimised values.
+ * While \c fflas-ffpack-optimise.h is created by \c configure script,
+ * (either left blank or filled by optimiser), this file produces the
+ * defaults for the optimised values. If \c fflas-ffpack-optimise.h is not
+ * empty, then its values preceeds the defaults here.
+ */
+
+
+#ifndef __FFLASFFPACK_fflas_ffpack_configuration_H
+#define __FFLASFFPACK_fflas_ffpack_configuration_H
+
+#ifndef GCC_VERSION
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#endif
+
+#ifdef __CYGWIN__
+# define _GLIBCXX_USE_C99 true
+# ifndef _GLIBCXX_USE_C99_MATH_TR1
+# include <cstdlib>
+# include <string>
+# include <cmath>
+# undef fma
+# include <stdlib.h>
+# undef strtoull
+# undef strtoll
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+ using ::fma;
+ using ::strtoll;
+ using ::strtoull;
+
+/*
+ unsigned long stoul( const std::string& str, std::size_t* pos = 0, int base = 10 ) {
+ return std::strtoul(str.c_str(), NULL, base);
+ }
+
+ unsigned long long stoull( const std::string& str, std::size_t* pos = 0, int base = 10 ) {
+ return std::strtoull(str.c_str(), NULL, base);
+ }
+
+ long stol( const std::string& str, std::size_t* pos = 0, int base = 10 ) {
+ return std::strtol(str.c_str(), NULL, base);
+ }
+
+ long long stoll( const std::string& str, std::size_t* pos = 0, int base = 10 ) {
+ return std::strtoll(str.c_str(), NULL, base);
+ }
+*/
+
+}
+# else
+# define _GLIBCXX_USE_C99 true
+# include <cstdlib>
+# endif
+#endif
+
+#include "fflas-ffpack/config.h"
+#ifdef __FFLASFFPACK_USE_OPENMP
+# ifndef __GIVARO_USE_OPENMP
+# define __GIVARO_USE_OPENMP 1
+# endif
+#endif
+
+#include "fflas-ffpack/fflas-ffpack-optimise.h"
+
+#if defined(__FFLASFFPACK_USE_SSE) or defined(__FFLASFFPACK_USE_AVX) or defined(__FFLASFFPACK_USE_AVX2)
+#define __FFLASFFPACK_USE_SIMD // see configure...
+#endif
+
+
+
+// winograd algorithm threshold (for double)
+#ifndef __FFLASFFPACK_WINOTHRESHOLD
+#define __FFLASFFPACK_WINOTHRESHOLD 1000
+#endif
+
+#ifndef __FFLASFFPACK_WINOTHRESHOLD_FLT
+#define __FFLASFFPACK_WINOTHRESHOLD_FLT 2000
+#endif
+
+#ifndef __FFLASFFPACK_WINOTHRESHOLD_BAL
+#define __FFLASFFPACK_WINOTHRESHOLD_BAL 1000
#endif
-/* Define to 1 if you have the <float.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_FLOAT_H
-#define __FFLASFFPACK_HAVE_FLOAT_H 1
+#ifndef __FFLASFFPACK_WINOTHRESHOLD_BAL_FLT
+#define __FFLASFFPACK_WINOTHRESHOLD_BAL_FLT 2000
#endif
-/* Define if GIVARO is installed */
-/* #undef __FFLASFFPACK_HAVE_GIVARO */
-/* Define if GMP is installed */
-#ifndef __FFLASFFPACK_HAVE_GMP
-#define __FFLASFFPACK_HAVE_GMP 1
+#if defined(_OPENMP) || defined(OMP_H) || defined(__OMP_H) || defined(__pmp_omp_h)
+#ifndef __FFLASFFPACK_USE_OPENMP
+#warning "openmp was not detected correctly at configure time, please report this bug"
+#define __FFLASFFPACK_USE_OPENMP
#endif
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_INTTYPES_H
-#define __FFLASFFPACK_HAVE_INTTYPES_H 1
-#endif
-
-/* Define if LAPACK is installed */
-#ifndef __FFLASFFPACK_HAVE_LAPACK
-#define __FFLASFFPACK_HAVE_LAPACK 1
-#endif
-
-/* Define to 1 if you have the <limits.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_LIMITS_H
-#define __FFLASFFPACK_HAVE_LIMITS_H 1
-#endif
-
-/* Define that architecture uses little endian storage */
-#ifndef __FFLASFFPACK_HAVE_LITTLE_ENDIAN
-#define __FFLASFFPACK_HAVE_LITTLE_ENDIAN 1
-#endif
-
-/* Define to 1 if you have the <memory.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_MEMORY_H
-#define __FFLASFFPACK_HAVE_MEMORY_H 1
#endif
-/* Define to 1 if you have the <stddef.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_STDDEF_H
-#define __FFLASFFPACK_HAVE_STDDEF_H 1
-#endif
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_STDINT_H
-#define __FFLASFFPACK_HAVE_STDINT_H 1
-#endif
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_STDLIB_H
-#define __FFLASFFPACK_HAVE_STDLIB_H 1
-#endif
-
-/* Define to 1 if you have the <strings.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_STRINGS_H
-#define __FFLASFFPACK_HAVE_STRINGS_H 1
-#endif
-
-/* Define to 1 if you have the <string.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_STRING_H
-#define __FFLASFFPACK_HAVE_STRING_H 1
-#endif
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_SYS_STAT_H
-#define __FFLASFFPACK_HAVE_SYS_STAT_H 1
-#endif
-
-/* Define to 1 if you have the <sys/time.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_SYS_TIME_H
-#define __FFLASFFPACK_HAVE_SYS_TIME_H 1
-#endif
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_SYS_TYPES_H
-#define __FFLASFFPACK_HAVE_SYS_TYPES_H 1
-#endif
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#ifndef __FFLASFFPACK_HAVE_UNISTD_H
-#define __FFLASFFPACK_HAVE_UNISTD_H 1
-#endif
+#ifdef __x86_64__
+#if defined(__GNUC__) || defined (__clang__) /* who supports __int128_t ? */
+#define int128_t __int128_t
+#define uint128_t unsigned __int128_t
+#else /* hopefully this exists */
+#define int128_t __int128
+#define uint128_t unsigned __int128
+#endif /* __int128_t */
+#endif /* __x86_64__ */
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
- */
-#ifndef __FFLASFFPACK_LT_OBJDIR
-#define __FFLASFFPACK_LT_OBJDIR ".libs/"
-#endif
-
-/* Name of package */
-#ifndef __FFLASFFPACK_PACKAGE
-#define __FFLASFFPACK_PACKAGE "fflas-ffpack"
-#endif
-
-/* Define to the address where bug reports for this package should be sent. */
-#ifndef __FFLASFFPACK_PACKAGE_BUGREPORT
-#define __FFLASFFPACK_PACKAGE_BUGREPORT "ffpack-devel at googlegroups.com"
-#endif
-
-/* Define to the full name of this package. */
-#ifndef __FFLASFFPACK_PACKAGE_NAME
-#define __FFLASFFPACK_PACKAGE_NAME "FFLAS-FFPACK"
-#endif
-
-/* Define to the full name and version of this package. */
-#ifndef __FFLASFFPACK_PACKAGE_STRING
-#define __FFLASFFPACK_PACKAGE_STRING "FFLAS-FFPACK 1.6.0"
-#endif
-
-/* Define to the one symbol short name of this package. */
-#ifndef __FFLASFFPACK_PACKAGE_TARNAME
-#define __FFLASFFPACK_PACKAGE_TARNAME "fflas-ffpack"
-#endif
-
-/* Define to the home page for this package. */
-#ifndef __FFLASFFPACK_PACKAGE_URL
-#define __FFLASFFPACK_PACKAGE_URL "http://www.linalg.org/projects/fflas-ffpack"
-#endif
-
-/* Define to the version of this package. */
-#ifndef __FFLASFFPACK_PACKAGE_VERSION
-#define __FFLASFFPACK_PACKAGE_VERSION "1.6.0"
-#endif
-
-/* The size of `char', as computed by sizeof. */
-#ifndef __FFLASFFPACK_SIZEOF_CHAR
-#define __FFLASFFPACK_SIZEOF_CHAR 1
-#endif
-
-/* The size of `int', as computed by sizeof. */
-#ifndef __FFLASFFPACK_SIZEOF_INT
-#define __FFLASFFPACK_SIZEOF_INT 4
-#endif
-
-/* The size of `long', as computed by sizeof. */
-#ifndef __FFLASFFPACK_SIZEOF_LONG
-#define __FFLASFFPACK_SIZEOF_LONG 8
-#endif
-
-/* The size of `long long', as computed by sizeof. */
-#ifndef __FFLASFFPACK_SIZEOF_LONG_LONG
-#define __FFLASFFPACK_SIZEOF_LONG_LONG 8
-#endif
-
-/* The size of `short', as computed by sizeof. */
-#ifndef __FFLASFFPACK_SIZEOF_SHORT
-#define __FFLASFFPACK_SIZEOF_SHORT 2
-#endif
-
-/* The size of `__int64', as computed by sizeof. */
-#ifndef __FFLASFFPACK_SIZEOF___INT64
-#define __FFLASFFPACK_SIZEOF___INT64 0
-#endif
-
-/* Define to 1 if you have the ANSI C header files. */
-#ifndef __FFLASFFPACK_STDC_HEADERS
-#define __FFLASFFPACK_STDC_HEADERS 1
-#endif
-
-/* Version number of package */
-#ifndef __FFLASFFPACK_VERSION
-#define __FFLASFFPACK_VERSION "1.6.0"
-#endif
-
-/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
- significant byte first (like Motorola and SPARC, unlike Intel). */
-#if defined AC_APPLE_UNIVERSAL_BUILD
-# if defined __BIG_ENDIAN__
-# define WORDS_BIGENDIAN 1
-# endif
-#else
-# ifndef WORDS_BIGENDIAN
-/* # undef WORDS_BIGENDIAN */
-# endif
-#endif
-
-/* once: _FFLAS_FFPACK_FFLAS_FFPACK_CONFIG_H */
-#endif
+#endif // __FFLASFFPACK_fflas_ffpack_configuration_H
diff --git a/fflas-ffpack/fflas-ffpack-configuration.h b/fflas-ffpack/fflas-ffpack-configuration.h
deleted file mode 100644
index c98ad7f..0000000
--- a/fflas-ffpack/fflas-ffpack-configuration.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2012 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
- *
- *
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-/*! @file fflas-ffpack/fflas-ffpack-configuration.h
- * @ingroup optimise
- * @brief Defaults for optimised values.
- * While \c fflas-ffpack-optimise.h is created by \c configure script,
- * (either left blank or filled by optimiser), this file produces the
- * defaults for the optimised values. If \c fflas-ffpack-optimise.h is not
- * empty, then its values preceeds the defaults here.
- */
-
-
-#ifndef __FFLASFFPACK_fflas_ffpack_configuration_H
-#define __FFLASFFPACK_fflas_ffpack_configuration_H
-
-#include "fflas-ffpack/fflas-ffpack-config.h"
-#include "fflas-ffpack/fflas-ffpack-optimise.h"
-
-// winograd algorithm threshold (for double)
-#ifndef __FFLASFFPACK_WINOTHRESHOLD
-#define __FFLASFFPACK_WINOTHRESHOLD 1000
-#endif
-
-#endif // __FFLASFFPACK_fflas_ffpack_configuration_H
diff --git a/fflas-ffpack/fflas-ffpack-optimise.h b/fflas-ffpack/fflas-ffpack-optimise.h
deleted file mode 100644
index 5c82b2f..0000000
--- a/fflas-ffpack/fflas-ffpack-optimise.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __FFLASFFPACK_optimise_H
-#define __FFLASFFPACK_optimise_H
-
-#ifndef __FFLASFFPACK_WINOTHRESHOLD
-#define __FFLASFFPACK_WINOTHRESHOLD 1896
-#endif
-
-#ifndef __FFLASFFPACK_WINOTHRESHOLD_FLT
-#define __FFLASFFPACK_WINOTHRESHOLD_FLT 2408
-#endif
-
-#endif // optimise.h
diff --git a/fflas-ffpack/fflas-ffpack.doxy b/fflas-ffpack/fflas-ffpack.doxy
index e69de29..225ffa5 100644
--- a/fflas-ffpack/fflas-ffpack.doxy
+++ b/fflas-ffpack/fflas-ffpack.doxy
@@ -0,0 +1,34 @@
+// Copyright (c) 2011 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+/**
+ * \defgroup fflasffpack FFLAS-FFPACK
+ * \brief the FFLAS FFPACK library
+ *
+ * C++ header library for fast exact dense linear algebra
+ *
+ * @see fflas
+ * @see ffpack
+ */
+
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/fflas-ffpack.h b/fflas-ffpack/fflas-ffpack.h
index ceeb9c0..acd7ad1 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/fflas-ffpack/fflas-ffpack.h
@@ -1,7 +1,7 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -32,7 +32,7 @@
#ifndef __FFLASFFPACK_fflas_ffpack_H
#define __FFLASFFPACK_fflas_ffpack_H
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include "fflas/fflas.h"
#include "ffpack/ffpack.h"
diff --git a/fflas-ffpack/fflas/Makefile.am b/fflas-ffpack/fflas/Makefile.am
index faa7dc8..56c7267 100644
--- a/fflas-ffpack/fflas/Makefile.am
+++ b/fflas-ffpack/fflas/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -24,21 +24,48 @@
pkgincludesubdir=$(pkgincludedir)/fflas
+SUBDIRS=fflas_fgemm fflas_igemm fflas_simd fflas_sparse
+
+sparse=fflas_sparse.h \
+ fflas_sparse.inl
+
+multiprecision= fflas_ftrsm_mp.inl \
+ fflas_fscal_mp.inl \
+ fflas_freduce_mp.inl \
+ fflas_fger_mp.inl \
+ fflas_fgemv_mp.inl
+
pkgincludesub_HEADERS= \
fflas_bounds.inl \
- fflas_fcopy.inl \
+ fflas_fassign.h \
+ fflas_fassign.inl \
fflas_ftrmm.inl \
fflas.h \
- fflas_faddm.inl \
+ fflas_level1.inl \
+ fflas_level2.inl \
+ fflas_level3.inl \
+ fflas_fadd.h \
+ fflas_fadd.inl \
fflas_fdot.inl \
fflas_ftrmm_src.inl \
- fflas_faddmin_src.inl \
fflas_fgemm.inl \
+ fflas_pfgemm.inl \
+ fflas_pftrsm.inl \
fflas_ftrsm.inl \
- fflas_faddm_src.inl \
fflas_fgemv.inl \
+ fflas_freivalds.inl \
+ fflas_fscal.h \
+ fflas_fscal.inl \
fflas_ftrsm_src.inl \
fflas_faxpy.inl \
fflas_fger.inl \
- fflas_ftrsv.inl
+ fflas_ftrsv.inl \
+ fflas_freduce.h \
+ fflas_freduce.inl \
+ fflas_helpers.inl \
+ fflas_simd.h \
+ fflas_enum.h \
+ ${sparse} \
+ ${multiprecision}
+EXTRA_DIST=fflas.doxy
diff --git a/fflas-ffpack/fflas/Makefile.in b/fflas-ffpack/fflas/Makefile.in
deleted file mode 100644
index 3bed548..0000000
--- a/fflas-ffpack/fflas/Makefile.in
+++ /dev/null
@@ -1,563 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# adapted from LinBox configuration
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = fflas-ffpack/fflas
-DIST_COMMON = $(pkgincludesub_HEADERS) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__uninstall_files_from_dir = { \
- test -z "$$files" \
- || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
- || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
- $(am__cd) "$$dir" && rm -f $$files; }; \
- }
-am__installdirs = "$(DESTDIR)$(pkgincludesubdir)"
-HEADERS = $(pkgincludesub_HEADERS)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-pkgincludesubdir = $(pkgincludedir)/fflas
-pkgincludesub_HEADERS = \
- fflas_bounds.inl \
- fflas_fcopy.inl \
- fflas_ftrmm.inl \
- fflas.h \
- fflas_faddm.inl \
- fflas_fdot.inl \
- fflas_ftrmm_src.inl \
- fflas_faddmin_src.inl \
- fflas_fgemm.inl \
- fflas_ftrsm.inl \
- fflas_faddm_src.inl \
- fflas_fgemv.inl \
- fflas_ftrsm_src.inl \
- fflas_faxpy.inl \
- fflas_fger.inl \
- fflas_ftrsv.inl
-
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps fflas-ffpack/fflas/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps fflas-ffpack/fflas/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-install-pkgincludesubHEADERS: $(pkgincludesub_HEADERS)
- @$(NORMAL_INSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(pkgincludesubdir)" || exit 1; \
- fi; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; \
- done | $(am__base_list) | \
- while read files; do \
- echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludesubdir)" || exit $$?; \
- done
-
-uninstall-pkgincludesubHEADERS:
- @$(NORMAL_UNINSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- dir='$(DESTDIR)$(pkgincludesubdir)'; $(am__uninstall_files_from_dir)
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(HEADERS)
-installdirs:
- for dir in "$(DESTDIR)$(pkgincludesubdir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am: install-pkgincludesubHEADERS
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-pkgincludesubHEADERS
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
- clean-libtool ctags distclean distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-pkgincludesubHEADERS install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags uninstall uninstall-am uninstall-pkgincludesubHEADERS
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/fflas-ffpack/fflas/fflas.doxy b/fflas-ffpack/fflas/fflas.doxy
new file mode 100644
index 0000000..3f6cf3f
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas.doxy
@@ -0,0 +1,41 @@
+// Copyright (c) 2011 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+
+
+/** \ingroup fflasffpack
+ * \defgroup fflas FFLAS
+ *
+ * \brief The C-style wrapper of BLAS for finite field linear algebra.
+ *
+ * FFLAS, Finite Field Linear Algebra Subroutines, provide basic linear
+ * algebra subroutines based on the BLAS interface. Therefore, the
+ * specifications are in C style; only the field given as a template parameter
+ * requires \p C++.
+ *
+ * As much as possible, these routines use \p ATLAS/BLAS computations and
+ * achieve therefore high efficiency.
+ *
+ *
+ */
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/fflas/fflas.h b/fflas-ffpack/fflas/fflas.h
index 42a8a4f..44aa372 100644
--- a/fflas-ffpack/fflas/fflas.h
+++ b/fflas-ffpack/fflas/fflas.h
@@ -1,9 +1,10 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* fflas.h
- * Copyright (C) 2005 Clement Pernet
+ * Copyright (C) 2005,2013,2014 Clement Pernet
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
*
*
* ========LICENCE========
@@ -34,1214 +35,106 @@
#ifndef __FFLASFFPACK_fflas_H
#define __FFLASFFPACK_fflas_H
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "fflas-ffpack/config.h"
+#include "fflas-ffpack/config-blas.h"
#include <cmath>
#include <cstring>
-#ifndef MAX
-#define MAX(a,b) ((a < b)?b:a)
-#endif
-#ifndef MIN
-#define MIN(a,b) ((a > b)?b:a)
+#ifdef __FFLASFFPACK_USE_OPENMP
+#include <omp.h>
#endif
-#include "fflas-ffpack/config-blas.h"
-#include "fflas-ffpack/field/unparametric.h"
-#include "fflas-ffpack/field/modular-balanced.h"
-#include "fflas-ffpack/field/modular-positive.h"
-
+// namespace FFLAS {
+#ifndef WINOTHRESHOLD
#define WINOTHRESHOLD __FFLASFFPACK_WINOTHRESHOLD
+#endif
+// }
-/* Thresholds determining which floating point representation to use, depending
+/** Thresholds determining which floating point representation to use, depending
* on the cardinality of the finite field. This is only used when the element
* representation is not a floating point type.
+ * @bug to be benchmarked.
*/
-#define FLOAT_DOUBLE_THRESHOLD_0 430
-#define FLOAT_DOUBLE_THRESHOLD_1 350
-#define FLOAT_DOUBLE_THRESHOLD_2 175
+#ifndef DOUBLE_TO_FLOAT_CROSSOVER
+#define DOUBLE_TO_FLOAT_CROSSOVER 800
+#endif
#include <float.h>
-//#define LB_TRTR
/// @brief FFLAS: <b>F</b>inite <b>F</b>ield <b>L</b>inear <b>A</b>lgebra <b>S</b>ubroutines.
-namespace FFLAS {
-
- // public:
- /// Is matrix transposed ?
- enum FFLAS_TRANSPOSE
- {
- FflasNoTrans=111, /**< Matrix is not transposed */
- FflasTrans =112 /**< Matrix is transposed */
- };
- /// Is triangular matrix's shape upper ?
- enum FFLAS_UPLO
- {
- FflasUpper=121, /**< Triangular matrix is Upper triangular (if \f$i>j\f$ then \f$T_{i,j} = 0\f$)*/
- FflasLower=122 /**< Triangular matrix is Lower triangular (if \f$i<j\f$ then \f$T_{i,j} = 0\f$)*/
- };
-
- /// Is Matrix diagonal implicit ?
- enum FFLAS_DIAG
- {
- FflasNonUnit=131 , /**< Triangular matrix has an explicit general diagonal */
- FflasUnit =132 /**< Triangular matrix has an implicit unit diagonal (\f$T_{i,i} = 1\f$)*//**< */
- };
-
- /// On what side ?
- enum FFLAS_SIDE
- {
- FflasLeft = 141, /**< Operator applied on the left */
- FflasRight = 142 /**< Operator applied on the rigth*/
- };
-
- /** \p FFLAS_BASE determines the type of the element representation for Matrix Mult kernel. */
- enum FFLAS_BASE
- {
- FflasDouble = 151, /**< to use the double precision BLAS */
- FflasFloat = 152, /**< to use the single precison BLAS */
- FflasGeneric = 153 /**< for any other domain, that can not be converted to floating point integers */
- };
-
- /* Representations of Z with floating point elements*/
-
- typedef FFPACK::UnparametricField<float> FloatDomain;
- typedef FFPACK::UnparametricField<double> DoubleDomain;
-
-
- namespace Protected {
-
- // Prevents the instantiation of the class
- // FFLAS(){}
- template <class X,class Y>
- class AreEqual {
- public:
- static const bool value = false;
- };
-
- template <class X>
- class AreEqual<X,X> {
- public:
- static const bool value = true;
- };
-
- //-----------------------------------------------------------------------------
- // Some conversion functions
- //-----------------------------------------------------------------------------
-
- //---------------------------------------------------------------------
- // Finite Field matrix => double matrix
- //---------------------------------------------------------------------
- template<class Field>
- void MatF2MatD (const Field& F,
- DoubleDomain::Element* S, const size_t lds,
- const typename Field::Element* E,
- const size_t lde,const size_t m, const size_t n)
- {
-
- const typename Field::Element* Ei = E;
- DoubleDomain::Element *Si=S;
- size_t j;
- for (; Ei < E+lde*m; Ei+=lde, Si += lds)
- for ( j=0; j<n; ++j){
- F.convert(*(Si+j),*(Ei+j));
- }
- }
- //---------------------------------------------------------------------
- // Finite Field matrix => float matrix
- //---------------------------------------------------------------------
- template<class Field>
- void MatF2MatFl (const Field& F,
- FloatDomain::Element* S, const size_t lds,
- const typename Field::Element* E,
- const size_t lde,const size_t m, const size_t n)
- {
-
- const typename Field::Element* Ei = E;
- FloatDomain::Element *Si=S;
- size_t j;
- for (; Ei < E+lde*m; Ei+=lde, Si += lds)
- for ( j=0; j<n; ++j){
- F.convert(*(Si+j),*(Ei+j));
- }
- }
-
- //---------------------------------------------------------------------
- // Finite Field matrix => double matrix
- // Special design for upper-triangular matrices
- //---------------------------------------------------------------------
- template<class Field>
- void MatF2MatD_Triangular (const Field& F,
- typename DoubleDomain::Element* S, const size_t lds,
- const typename Field::Element* const E,
- const size_t lde,
- const size_t m, const size_t n){
-
- const typename Field::Element* Ei = E;
- typename DoubleDomain::Element* Si = S;
- size_t i=0, j;
- for ( ; i<m;++i, Ei+=lde, Si+=lds)
- for ( j=i; j<n;++j)
- F.convert(*(Si+j),*(Ei+j));
- }
-
- //---------------------------------------------------------------------
- // Finite Field matrix => float matrix
- // Special design for upper-triangular matrices
- //---------------------------------------------------------------------
- template<class Field>
- void MatF2MatFl_Triangular (const Field& F,
- typename FloatDomain::Element* S, const size_t lds,
- const typename Field::Element* const E,
- const size_t lde,
- const size_t m, const size_t n){
-
- const typename Field::Element* Ei = E;
- typename FloatDomain::Element* Si = S;
- size_t i=0, j;
- for ( ; i<m;++i, Ei+=lde, Si+=lds)
- for ( j=i; j<n;++j)
- F.convert(*(Si+j),*(Ei+j));
- }
- //---------------------------------------------------------------------
- // double matrix => Finite Field matrix
- //---------------------------------------------------------------------
- template<class Field>
- void MatD2MatF (const Field& F,
- typename Field::Element* S, const size_t lds,
- const typename DoubleDomain::Element* E, const size_t lde,
- const size_t m, const size_t n)
- {
+#include <algorithm>
- typename Field::Element* Si = S;
- const DoubleDomain::Element* Ei =E;
- size_t j;
- for ( ; Si < S+m*lds; Si += lds, Ei+= lde){
- for ( j=0; j<n;++j)
- F.init( *(Si+j), *(Ei+j) );
- }
- }
+#include "fflas_enum.h"
- //---------------------------------------------------------------------
- // float matrix => Finite Field matrix
- //---------------------------------------------------------------------
- template<class Field>
- void MatFl2MatF (const Field& F,
- typename Field::Element* S, const size_t lds,
- const typename FloatDomain::Element* E, const size_t lde,
- const size_t m, const size_t n){
+#include "fflas-ffpack/utils/fflas_memory.h"
+#include "fflas-ffpack/paladin/parallel.h"
- typename Field::Element* Si = S;
- const FloatDomain::Element* Ei =E;
- size_t j;
- for ( ; Si < S+m*lds; Si += lds, Ei+= lde){
- for ( j=0; j<n;++j)
- F.init( *(Si+j), *(Ei+j) );
- }
- }
+//---------------------------------------------------------------------
+// Level 1 routines
+//---------------------------------------------------------------------
+#include "fflas_level1.inl"
- /**
- * Computes the threshold parameters for the cascade
- * Matmul algorithm.
- *
- *
- * \param F Finite Field/Ring of the computation.
- * \param k Common dimension of A and B, in the product A x B
- * \param beta Computing \f$AB + \beta C\f$
- * \param delayedDim Returns the size of blocks that can be multiplied
- * over Z with no overflow
- * \param base Returns the type of BLAS representation to use
- * \param winoRecLevel Returns the number of recursion levels of
- * Strassen-Winograd's algorithm to perform
- * \param winoLevelProvided tells whether the user forced the number of
- * recursive level of Winograd's algorithm
- *
- * @bib
- * - Dumas, Giorgi, Pernet, arXiv cs/0601133 <a href=http://arxiv.org/abs/cs.SC/0601133>here</a>
- */
- template <class Field>
- void MatMulParameters (const Field& F,
- const size_t k,
- const typename Field::Element& beta,
- size_t& delayedDim,
- FFLAS_BASE& base,
- size_t& winoRecLevel,
- bool winoLevelProvided=false);
+//---------------------------------------------------------------------
+// Level 2 routines
+//---------------------------------------------------------------------
+#include "fflas_level2.inl"
+//---------------------------------------------------------------------
+// Level 3 routines
+//---------------------------------------------------------------------
+#include "fflas_level3.inl"
- /**
- * Computes the maximal size for delaying the modular reduction
- * in a dotproduct.
- *
- * This is the default version assuming a conversion to a positive modular representation
- *
- * \param F Finite Field/Ring of the computation
- * \param winoRecLevel Number of recusrive Strassen-Winograd levels (if any, 0 otherwise)
- * \param beta Computing AB + beta C
- * \param base Type of floating point representation for delayed modular computations
- *
- */
- template <class Field>
- size_t DotProdBound (const Field& F,
- const size_t winoRecLevel,
- const typename Field::Element& beta,
- const FFLAS_BASE base);
-
-
- /**
- * Internal function for the bound computation.
- * Generic implementation for positive representations
- */
- template <class Field>
- double computeFactorWino (const Field& F, const size_t w);
-
- template <class Field>
- double computeFactorClassic (const Field& F);
-
-
- /**
- * Determines the type of floating point representation to convert to,
- * for BLAS computations.
- * \param F Finite Field/Ring of the computation
- * \param w Number of recursive levels in Winograd's algorithm
- */
- template <class Field>
- FFLAS_BASE BaseCompute (const Field& F, const size_t w);
-
- /**
- * Computes the maximal size for delaying the modular reduction
- * in a triangular system resolution.
- *
- * Compute the maximal dimension k, such that a unit diagonal triangular
- * system of dimension k can be solved over Z without overflow of the
- * underlying floating point representation.
- *
- * @bib
- * - Dumas, Giorgi, Pernet 06, arXiv:cs/0601133.
- *
- * \param F Finite Field/Ring of the computation
- *
- */
- template <class Field>
- size_t TRSMBound (const Field& F);
-
- template <class Field>
- void DynamicPealing( const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const typename Field::Element alpha,
- const typename Field::Element* A, const size_t lda,
- const typename Field::Element* B, const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element* C, const size_t ldc,
- const size_t ); //kmax
-
- template <class Field>
- void MatVectProd (const Field& F,
- const FFLAS_TRANSPOSE TransA,
- const size_t M, const size_t N,
- const typename Field::Element alpha,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * X, const size_t incX,
- const typename Field::Element beta,
- typename Field::Element * Y, const size_t incY);
-
- template <class Field>
- void ClassicMatmul(const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const typename Field::Element alpha,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element * C, const size_t ldc,
- const size_t kmax, const FFLAS_BASE base );
-
- // Winograd Multiplication alpha.A(n*k) * B(k*m) + beta . C(n*m)
- // WinoCalc performs the 22 Winograd operations
- template <class Field>
- void WinoCalc (const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t mr, const size_t nr,const size_t kr,
- const typename Field::Element alpha,
- const typename Field::Element* A,const size_t lda,
- const typename Field::Element* B,const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base);
-
- template<class Field>
- void WinoMain (const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const typename Field::Element alpha,
- const typename Field::Element* A,const size_t lda,
- const typename Field::Element* B,const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base);
-
- // Specialized routines for ftrsm
- template <class Element>
- class ftrsmLeftUpperNoTransNonUnit;
- template <class Element>
- class ftrsmLeftUpperNoTransUnit;
- template <class Element>
- class ftrsmLeftUpperTransNonUnit;
- template <class Element>
- class ftrsmLeftUpperTransUnit;
- template <class Element>
- class ftrsmLeftLowerNoTransNonUnit;
- template <class Element>
- class ftrsmLeftLowerNoTransUnit;
- template <class Element>
- class ftrsmLeftLowerTransNonUnit;
- template <class Element>
- class ftrsmLeftLowerTransUnit;
- template <class Element>
- class ftrsmRightUpperNoTransNonUnit;
- template <class Element>
- class ftrsmRightUpperNoTransUnit;
- template <class Element>
- class ftrsmRightUpperTransNonUnit;
- template <class Element>
- class ftrsmRightUpperTransUnit;
- template <class Element>
- class ftrsmRightLowerNoTransNonUnit;
- template <class Element>
- class ftrsmRightLowerNoTransUnit;
- template <class Element>
- class ftrsmRightLowerTransNonUnit;
- template <class Element>
- class ftrsmRightLowerTransUnit;
-
- // Specialized routines for ftrmm
- template <class Element>
- class ftrmmLeftUpperNoTransNonUnit;
- template <class Element>
- class ftrmmLeftUpperNoTransUnit;
- template <class Element>
- class ftrmmLeftUpperTransNonUnit;
- template <class Element>
- class ftrmmLeftUpperTransUnit;
- template <class Element>
- class ftrmmLeftLowerNoTransNonUnit;
- template <class Element>
- class ftrmmLeftLowerNoTransUnit;
- template <class Element>
- class ftrmmLeftLowerTransNonUnit;
- template <class Element>
- class ftrmmLeftLowerTransUnit;
- template <class Element>
- class ftrmmRightUpperNoTransNonUnit;
- template <class Element>
- class ftrmmRightUpperNoTransUnit;
- template <class Element>
- class ftrmmRightUpperTransNonUnit;
- template <class Element>
- class ftrmmRightUpperTransUnit;
- template <class Element>
- class ftrmmRightLowerNoTransNonUnit;
- template <class Element>
- class ftrmmRightLowerNoTransUnit;
- template <class Element>
- class ftrmmRightLowerTransNonUnit;
- template <class Element>
- class ftrmmRightLowerTransUnit;
-
- // BB : ça peut servir...
-#ifdef LB_TRTR
- template <class Element>
- class ftrtrLeftUpperNoTransNonUnitNonUnit;
- template <class Element>
- class ftrtrLeftUpperNoTransUnitNonUnit;
- template <class Element>
- class ftrtrLeftUpperTransNonUnitNonUnit;
- template <class Element>
- class ftrtrLeftUpperTransUnitNonUnit;
- template <class Element>
- class ftrtrLeftLowerNoTransNonUnitNonUnit;
- template <class Element>
- class ftrtrLeftLowerNoTransUnitNonUnit;
- template <class Element>
- class ftrtrLeftLowerTransNonUnitNonUnit;
- template <class Element>
- class ftrtrLeftLowerTransUnitNonUnit;
- template <class Element>
- class ftrtrLeftUpperNoTransNonUnitUnit;
- template <class Element>
- class ftrtrLeftUpperNoTransUnitUnit;
- template <class Element>
- class ftrtrLeftUpperTransNonUnitUnit;
- template <class Element>
- class ftrtrLeftUpperTransUnitUnit;
- template <class Element>
- class ftrtrLeftLowerNoTransNonUnitUnit;
- template <class Element>
- class ftrtrLeftLowerNoTransUnitUnit;
- template <class Element>
- class ftrtrLeftLowerTransNonUnitUnit;
- template <class Element>
- class ftrtrLeftLowerTransUnitUnit;
- template <class Element>
- class ftrtrRightUpperNoTransNonUnitNonUnit;
- template <class Element>
- class ftrtrRightUpperNoTransUnitNonUnit;
- template <class Element>
- class ftrtrRightUpperTransNonUnitNonUnit;
- template <class Element>
- class ftrtrRightUpperTransUnitNonUnit;
- template <class Element>
- class ftrtrRightLowerNoTransNonUnitNonUnit;
- template <class Element>
- class ftrtrRightLowerNoTransUnitNonUnit;
- template <class Element>
- class ftrtrRightLowerTransNonUnitNonUnit;
- template <class Element>
- class ftrtrRightLowerTransUnitNonUnit;
- template <class Element>
- class ftrtrRightUpperNoTransNonUnitUnit;
- template <class Element>
- class ftrtrRightUpperNoTransUnitUnit;
- template <class Element>
- class ftrtrRightUpperTransNonUnitUnit;
- template <class Element>
- class ftrtrRightUpperTransUnitUnit;
- template <class Element>
- class ftrtrRightLowerNoTransNonUnitUnit;
- template <class Element>
- class ftrtrRightLowerNoTransUnitUnit;
- template <class Element>
- class ftrtrRightLowerTransNonUnitUnit;
- template <class Element>
- class ftrtrRightLowerTransUnitUnit;
+#ifdef FFLAS_COMPILED
+#include "fflas-ffpack/interfaces/libs/fflas_L1_inst.h"
+#include "fflas-ffpack/interfaces/libs/fflas_L2_inst.h"
+#include "fflas-ffpack/interfaces/libs/fflas_L3_inst.h"
#endif
- template<class Element>
- class faddmTrans;
- template<class Element>
- class faddmNoTrans;
- template<class Element>
- class fsubmTrans;
- template<class Element>
- class fsubmNoTrans;
- template<class Element>
- class faddmTransTrans;
- template<class Element>
- class faddmNoTransTrans;
- template<class Element>
- class faddmTransNoTrans;
- template<class Element>
- class faddmNoTransNoTrans;
- template<class Element>
- class fsubmTransTrans;
- template<class Element>
- class fsubmNoTransTrans;
- template<class Element>
- class fsubmTransNoTrans;
- template<class Element>
- class fsubmNoTransNoTrans;
- } // protected
-
- //---------------------------------------------------------------------
- // Level 1 routines
- //---------------------------------------------------------------------
-
- /** \brief fzero : \f$A \gets 0 \f$.
- * @param F field
- * @param n number of elements to zero
- * \param X vector in \p F
- * \param incX stride of \p X
- */
- template<class Field>
- void
- fzero (const Field& F, const size_t n,
- typename Field::Element *X, const size_t incX)
- {
- if (incX == 1) { // contigous data
- // memset(X,(int)F.zero,n); // might be bogus ?
- for (size_t i = 0 ; i < n ; ++i)
- F.assign(*(X+i), F.zero);
-
- }
- else { // not contiguous (strided)
- for (size_t i = 0 ; i < n ; ++i)
- F.assign(*(X+i*incX), F.zero);
- }
- }
-
- /** fscal
- * \f$x \gets a \cdot x\f$.
- * @param F field
- * @param n size of the vectors
- * @param alpha homotéti scalar
- * \param X vector in \p F
- * \param incX stride of \p X
- * @bug use cblas_(d)scal when possible
- * @internal
- * @todo check if comparison with +/-1,0 is necessary.
- */
- template<class Field>
- void
- fscal (const Field& F, const size_t n, const typename Field::Element alpha,
- typename Field::Element * X, const size_t incX)
- {
- typedef typename Field::Element Element ;
-
- if (F.isOne(alpha))
- return ;
-
- Element * Xi = X;
- if (F.areEqual(alpha,F.mOne)){
- for (; Xi < X+n*incX; Xi+=incX )
- F.negin( *Xi );
- return;
- }
- if (F.isZero(alpha)){
- fzero(F,n,X,incX);
- return;
- }
-
- for (; Xi < X+n*incX; Xi+=incX )
- F.mulin( *Xi, alpha );
- }
-
- /** \brief fcopy : \f$x \gets y \f$.
- * X is preallocated
- * @param F field
- * @param N size of the vectors
- * \param [out] X vector in \p F
- * \param incX stride of \p X
- * \param [in] Y vector in \p F
- * \param incY stride of \p Y
- */
- template<class Field>
- void
- fcopy (const Field& F, const size_t N,
- typename Field::Element * X, const size_t incX,
- const typename Field::Element * Y, const size_t incY );
-
-
- /** \brief faxpy : \f$y \gets \alpha \cdot x + y\f$.
- * @param F field
- * @param N size of the vectors
- * @param alpha scalar
- * \param X vector in \p F
- * \param incX stride of \p X
- * \param Y vector in \p F
- * \param incY stride of \p Y
- */
- template<class Field>
- void
- faxpy (const Field& F, const size_t N,
- const typename Field::Element alpha,
- const typename Field::Element * X, const size_t incX,
- typename Field::Element * Y, const size_t incY );
- /** \brief fdot: dot product \f$x^T y\f$.
- * @param F field
- * @param N size of the vectors
- * \param X vector in \p F
- * \param incX stride of \p X
- * \param Y vector in \p F
- * \param incY stride of \p Y
- */
- template<class Field>
- typename Field::Element
- fdot (const Field& F, const size_t N,
- const typename Field::Element * X, const size_t incX,
- const typename Field::Element * Y, const size_t incY );
+//---------------------------------------------------------------------
+// specialisations and implementation
+//---------------------------------------------------------------------
- /** \brief fswap: \f$ X \leftrightarrow Y\f$.
- * @param F field
- * @param N size of the vectors
- * \param X vector in \p F
- * \param incX stride of \p X
- * \param Y vector in \p F
- * \param incY stride of \p Y
- */
- template<class Field>
- void
- fswap (const Field& F, const size_t N, typename Field::Element * X, const size_t incX,
- typename Field::Element * Y, const size_t incY )
- {
+#include "fflas_freduce.h"
+#include "fflas_fadd.h"
+#include "fflas_fscal.h"
+#include "fflas_fassign.h"
- typename Field::Element tmp;
- typename Field::Element * Xi = X;
- typename Field::Element * Yi=Y;
- for (; Xi < X+N*incX; Xi+=incX, Yi+=incY ){
- F.assign( tmp, *Xi );
- F.assign( *Xi, *Yi );
- F.assign( *Yi, tmp );
- }
- }
-
- //---------------------------------------------------------------------
- // Level 2 routines
- //---------------------------------------------------------------------
-
- /** \brief fcopy : \f$A \gets B \f$.
- * @param F field
- * @param m number of rows to copy
- * @param n number of cols to copy
- * \param A matrix in \p F
- * \param lda stride of \p A
- * \param B vector in \p F
- * \param ldb stride of \p B
- */
- template<class Field>
- void
- fcopy (const Field& F, const size_t m, const size_t n,
- typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb ) ;
-
- /** \brief fzero : \f$A \gets 0 \f$.
- * @param F field
- * @param m number of rows to zero
- * @param n number of cols to zero
- * \param A matrix in \p F
- * \param lda stride of \p A
- * @warning may be buggy if Element is larger than int
- */
- template<class Field>
- void
- fzero (const Field& F, const size_t m, const size_t n,
- typename Field::Element * A, const size_t lda)
- {
- /* use memset only with Elements that are ok */
- if (n == lda) { // contigous data
- // memset(A,(int) F.zero,m*n); // might be bogus ?
- fzero(F,m*n,A,1);
- }
- else { // not contiguous (strided)
- for (size_t i = 0 ; i < m ; ++i)
- // memset(A+i*lda,(int) F.zero,n) ; // might be bogus ?
- fzero(F,n,A+i*lda,1);
- }
- }
-
- /** fscal
- * \f$A \gets a \cdot A\f$.
- * @param F field
- * @param m number of rows
- * @param n number of cols
- * @param alpha homotecie scalar
- * \param A matrix in \p F
- * \param lda stride of \p A
- * @internal
- */
- template<class Field>
- void
- fscal (const Field& F, const size_t m , const size_t n,
- const typename Field::Element alpha,
- typename Field::Element * A, const size_t lda)
- {
- typedef typename Field::Element Element ;
-
- if (F.isOne(alpha)) {
- return ;
- }
- else {
- if (lda == n) {
- fscal(F,n*m,alpha,A,1);
- }
- else {
- for (size_t i = 0 ; i < m ; ++i)
- fscal(F,n,alpha,A+i*lda,1);
- }
-
- return;
- }
- }
-
- /** \brief fmove : \f$A \gets B \f$ and \f$ B \gets 0\f$.
- * @param F field
- * @param m number of rows to copy
- * @param n number of cols to copy
- * \param A matrix in \p F
- * \param lda stride of \p A
- * \param B vector in \p F
- * \param ldb stride of \p B
- */
- template<class Field>
- void
- fmove (const Field& F, const size_t m, const size_t n,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb )
- {
- fcopy(F,m,n,A,lda,B,ldb);
- fzero(F,m,n,B,ldb);
- }
-
- /** fadd : matrix addition.
- * Computes \p C = \p A + \p B.
- * @param F field
- * @param M rows
- * @param N cols
- * @param A dense matrix of size \c MxN
- * @param lda leading dimension of \p A
- * @param B dense matrix of size \c MxN
- * @param ldb leading dimension of \p B
- * @param C dense matrix of size \c MxN
- * @param ldc leading dimension of \p C
- */
- template <class Field>
- void
- fadd (const Field& F, const size_t M, const size_t N,
- const typename Field::Element* A, const size_t lda,
- const typename Field::Element* B, const size_t ldb,
- typename Field::Element* C, const size_t ldc)
- {
- const typename Field::Element *Ai = A, *Bi = B;
- typename Field::Element *Ci = C;
- for (; Ai < A+M*lda; Ai+=lda, Bi+=ldb, Ci+=ldc)
- for (size_t i=0; i<N; i++)
- F.add (Ci[i], Ai[i], Bi[i]);
- }
-
- /** fsub : matrix subtraction.
- * Computes \p C = \p A - \p B.
- * @param F field
- * @param M rows
- * @param N cols
- * @param A dense matrix of size \c MxN
- * @param lda leading dimension of \p A
- * @param B dense matrix of size \c MxN
- * @param ldb leading dimension of \p B
- * @param C dense matrix of size \c MxN
- * @param ldc leading dimension of \p C
- */
- template <class Field>
- void
- fsub (const Field& F, const size_t M, const size_t N,
- const typename Field::Element* A, const size_t lda,
- const typename Field::Element* B, const size_t ldb,
- typename Field::Element* C, const size_t ldc)
- {
- const typename Field::Element * Ai = A, *Bi = B;
- typename Field::Element *Ci = C;
- for (; Ai < A+M*lda; Ai+=lda, Bi+=ldb, Ci+=ldc)
- for (size_t i=0; i<N; i++)
- F.sub (Ci[i], Ai[i], Bi[i]);
- }
-
- //! fsubin
- template <class Field>
- void
- fsubin (const Field& F, const size_t M, const size_t N,
- const typename Field::Element* B, const size_t ldb,
- typename Field::Element* C, const size_t ldc)
- {
- const typename Field::Element * Bi = B;
- typename Field::Element *Ci = C;
- for (; Ci < C+M*ldc; Bi+=ldb, Ci+=ldc)
- for (size_t i=0; i<N; i++)
- F.subin (Ci[i], Bi[i]);
- }
-
- //! faddin
- template <class Field>
- void
- faddin (const Field& F, const size_t M, const size_t N,
- const typename Field::Element* B, const size_t ldb,
- typename Field::Element* C, const size_t ldc)
- {
- const typename Field::Element * Bi = B;
- typename Field::Element *Ci = C;
- for (; Ci < C+M*ldc; Bi+=ldb, Ci+=ldc)
- for (size_t i=0; i<N; i++)
- F.addin (Ci[i], Bi[i]);
- }
-
-
- /** @brief finite prime Field GEneral Matrix Vector multiplication.
- *
- * Computes \f$Y \gets \alpha \mathrm{op}(A) X + \beta Y \f$.
- * @param F field
- * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
- * @param M rows
- * @param N cols
- * @param alpha scalar
- * @param A dense matrix of size \c MxN
- * @param lda leading dimension of \p A
- * @param X dense vector of size \c N
- * @param incX stride of \p X
- * @param beta scalar
- * @param[out] Y dense vector of size \c M
- * @param incY stride of \p Y
- */
- template<class Field>
- void
- fgemv (const Field& F, const FFLAS_TRANSPOSE TransA,
- const size_t M, const size_t N,
- const typename Field::Element alpha,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * X, const size_t incX,
- const typename Field::Element beta,
- typename Field::Element * Y, const size_t incY);
-
- /** @brief fger: GEneral ?
- *
- * Computes \f$A \gets \alpha x . y^T + A\f$
- * @param F field
- * @param M rows
- * @param N cols
- * @param alpha scalar
- * @param[in,out] A dense matrix of size \c MxN and leading dimension \p lda
- * @param lda leading dimension of \p A
- * @param x dense vector of size \c M
- * @param incx stride of \p X
- * @param y dense vector of size \c N
- * @param incy stride of \p Y
- */
- template<class Field>
- void
- fger (const Field& F, const size_t M, const size_t N,
- const typename Field::Element alpha,
- const typename Field::Element * x, const size_t incx,
- const typename Field::Element * y, const size_t incy,
- typename Field::Element * A, const size_t lda);
-
- /** @brief ftrsv: TRiangular System solve with Vector
- * Computes \f$ X \gets \mathrm{op}(A^{-1}) X\f$
- * @param F field
- * @param X vector of size \p N on a field \p F
- * @param incX stride of \p X
- * @param A a matrix of leading dimension \p lda and size \p N
- * @param lda leading dimension of \p A
- * @param N number of rows or columns of \p A according to \p TransA
- * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
- * \param Diag if \c Diag==FflasUnit then \p A is unit.
- * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
- */
- template<class Field>
- void
- ftrsv (const Field& F, const FFLAS_UPLO Uplo,
- const FFLAS_TRANSPOSE TransA, const FFLAS_DIAG Diag,
- const size_t N,const typename Field::Element * A, const size_t lda,
- typename Field::Element * X, int incX);
-
- //---------------------------------------------------------------------
- // Level 3 routines
- //---------------------------------------------------------------------
-
- /** @brief ftrsm: <b>TR</b>iangular <b>S</b>ystem solve with <b>M</b>atrix.
- * Computes \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ or \f$B \gets \alpha B \mathrm{op}(A^{-1})\f$.
- * \param F field
- * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ is computed.
- * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
- * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
- * \param Diag if \c Diag==FflasUnit then \p A is unit.
- * \param M rows of \p B
- * \param N cols of \p B
- * @param alpha scalar
- * \param A triangular invertible matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
- * @param lda leading dim of \p A
- * @param B matrix of size \p MxN
- * @param ldb leading dim of \p B
- * @bug unsafe with \c Trans==FflasTrans (debugging in progress)
- * @bug \f$\alpha\f$ must be non zero.
- */
- template<class Field>
- void
- ftrsm (const Field& F, const FFLAS_SIDE Side,
- const FFLAS_UPLO Uplo,
- const FFLAS_TRANSPOSE TransA,
- const FFLAS_DIAG Diag,
- const size_t M, const size_t N,
- const typename Field::Element alpha,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb);
-
- /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply.
- * Computes \f$ B \gets \alpha \mathrm{op}(A) B\f$ or \f$B \gets \alpha B \mathrm{op}(A)\f$.
- * @param F field
- * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed.
- * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
- * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
- * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit.
- * \param M rows of \p B
- * \param N cols of \p B
- * @param alpha scalar
- * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
- * @param lda leading dim of \p A
- * @param B matrix of size \p MxN
- * @param ldb leading dim of \p B
- * @bug unsafe with \c Trans==FflasTrans (debugging in progress)
- */
- template<class Field>
- void
- ftrmm (const Field& F, const FFLAS_SIDE Side,
- const FFLAS_UPLO Uplo,
- const FFLAS_TRANSPOSE TransA,
- const FFLAS_DIAG Diag,
- const size_t M, const size_t N,
- const typename Field::Element alpha,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb);
-
- /** @brief fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply.
- *
- * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$
- * \param F field.
- * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
- * \param tb same for \p B
- * \param m see \p A
- * \param k see \p A
- * \param n see \p B
- * \param alpha scalar
- * \param beta scalar
- * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
- * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
- * \param C \f$C\f$ is \f$m \times n\f$
- * \param lda leading dimension of \p A
- * \param ldb leading dimension of \p B
- * \param ldc leading dimension of \p C
- * \param w recursive levels of Winograd's algorithm are used
- * @warning \f$\alpha\f$ \e must be invertible
- */
- template<class Field>
- typename Field::Element*
- fgemm( const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m,
- const size_t n,
- const size_t k,
- const typename Field::Element alpha,
- const typename Field::Element* A, const size_t lda,
- const typename Field::Element* B, const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element* C, const size_t ldc,
- const size_t w)
- {
-
- if (!(m && n && k)) return C;
-
- if (F.isZero (alpha)){
- fscal(F, m, n, beta, C, ldc);
- return C;
- }
-
-
-
- size_t kmax = 0;
- size_t winolevel = w;
- FFLAS_BASE base;
- Protected::MatMulParameters (F, MIN(MIN(m,n),k), beta, kmax, base,
- winolevel, true);
- Protected::WinoMain (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta,
- C, ldc, kmax, winolevel, base);
- return C;
- }
-
- /** @brief fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply.
- *
- * Computes \f$C = \alpha \mathrm{op}(A) \mathrm{op}(B) + \beta C\f$.
- * Automatically set Winograd recursion level
- * \param F field.
- * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
- * \param tb same for matrix \p B
- * \param m see \p A
- * \param k see \p A
- * \param n see \p B
- * \param alpha scalar
- * \param beta scalar
- * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
- * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
- * \param C \f$C\f$ is \f$m \times n\f$
- * \param lda leading dimension of \p A
- * \param ldb leading dimension of \p B
- * \param ldc leading dimension of \p C
- * @warning \f$\alpha\f$ \e must be invertible
- */
- template<class Field>
- typename Field::Element*
- fgemm (const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m,
- const size_t n,
- const size_t k,
- const typename Field::Element alpha,
- const typename Field::Element* A, const size_t lda,
- const typename Field::Element* B, const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element* C, const size_t ldc)
- {
-
- if (!(m && n && k)) return C;
- if (F.isZero (alpha)){
- for (size_t i = 0; i<m; ++i)
- fscal(F, n, beta, C + i*ldc, 1);
- return C;
- }
-
-#ifdef _LB_DEBUG
- /* check if alpha is invertible. XXX do it in F.isInvertible(Element&) ? */
- typename Field::Element e ;
- F.init(e,1UL);
- F.divin(e,alpha);
- F.mulin(e,alpha);
- FFLASFFPACK_check(F.isOne(e));
-#endif
- size_t w, kmax;
- FFLAS_BASE base;
-
- Protected::MatMulParameters (F, MIN(MIN(m,n),k), beta, kmax, base, w);
-
- Protected::WinoMain (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta,
- C, ldc, kmax, w, base);
- return C;
- }
-
- /** @brief fsquare: Squares a matrix.
- * compute \f$ C \gets \alpha \mathrm{op}(A) \mathrm{op}(A) + \beta C\f$ over a Field \p F
- * Avoid the conversion of B
- * @param ta if \c ta==FflasTrans, \f$\mathrm{op}(A)=A^T\f$.
- * @param F field
- * @param n size of \p A
- * @param alpha scalar
- * @param beta scalar
- * @param A dense matrix of size \c nxn
- * @param lda leading dimension of \p A
- * @param C dense matrix of size \c nxn
- * @param ldc leading dimension of \p C
- */
- template<class Field>
- typename Field::Element* fsquare (const Field& F,
- const FFLAS_TRANSPOSE ta,
- const size_t n,
- const typename Field::Element alpha,
- const typename Field::Element* A,
- const size_t lda,
- const typename Field::Element beta,
- typename Field::Element* C,
- const size_t ldc);
-#ifdef LB_TRTR
- // BB
- /** @brief ftrtr: Triangular-Triangular matrix multiplication.
- * \f$B \gets \alpha \mathrm{op}(A) \times B\f$ (for FFLAS_SIDE::FflasLeft)
- * A and B are triangular, with B UpLo
- * and op(A) = A, A^T according to TransA
- * A and B can be (non)unit
- *
- */
- template<class Field>
- typename Field::Element* ftrtr (const Field& F, const FFLAS_SIDE Side,
- const FFLAS_UPLO Uplo,
- const FFLAS_TRANSPOSE TransA,
- const FFLAS_DIAG ADiag,
- const FFLAS_DIAG BDiag,
- const size_t M,
- const typename Field::Element alpha,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb);
-#endif
-
- /** faddm.
- * A <- A+op(B)
- * with op(B) = B or B^T
- */
- template<class Field>
- void faddm(const Field & F,
- const FFLAS_TRANSPOSE transA,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb);
-
- /** faddm.
- * C <- op(A)+op(B)
- * with op(B) = B or B^T
- */
- template<class Field>
- void faddm(const Field & F,
- const FFLAS_TRANSPOSE transA,
- const FFLAS_TRANSPOSE transB,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- typename Field::Element * C, const size_t ldc );
-
- /** fsubm.
- * A <- A-op(B)
- * with op(B) = B or B^T
- */
- template<class Field>
- void fsubm(const Field & F,
- const FFLAS_TRANSPOSE transA,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb) ;
-
- /** fsubm.
- * C <- op(A)-op(B)
- * with op(B) = B or B^T
- */
- template<class Field>
- void fsubm(const Field & F,
- const FFLAS_TRANSPOSE transA,
- const FFLAS_TRANSPOSE transB,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- typename Field::Element * C, const size_t ldc );
-
- /** MatCopy makes a copy of the matrix M into a new allocated space.
- * @param F field
- * @param M rows of \p A
- * @param N cols of \p A
- * @param A matrix to be copied
- * @param lda leading dimension of \p A
- * @return a copy \p C of \p A with stride \p N
- * @warning \p A and \p C belong to the same field.
- */
- template<class Field>
- typename Field::Element* MatCopy (const Field& F,
- const size_t M, const size_t N,
- const typename Field::Element * A,
- const size_t lda)
- {
-
- typename Field::Element * C = new typename Field::Element[M*N];
- for (size_t i = 0; i < N; ++i)
- for (size_t j = 0; j < N; ++j)
- F.assign(*(C + i*N + j),*(A + i*lda + j));
- return C;
- }
-
- /** \brief Computes the number of recursive levels to perform.
- *
- * \param m the common dimension in the product AxB
- */
- size_t WinoSteps (const size_t m);
-
-
-
-} // class FFLAS
-
-#include "fflas_bounds.inl"
#include "fflas_fgemm.inl"
+#include "fflas_pfgemm.inl"
+// fgemm must be before fgemv according to ScalAndReduce function declaration ?!? PG
#include "fflas_fgemv.inl"
+#include "fflas_freivalds.inl"
#include "fflas_fger.inl"
#include "fflas_ftrsm.inl"
+#include "fflas_pftrsm.inl"
#include "fflas_ftrmm.inl"
#include "fflas_ftrsv.inl"
#include "fflas_faxpy.inl"
#include "fflas_fdot.inl"
-#include "fflas_fcopy.inl"
-//BB
-#ifdef LB_TRTR
-#include "fflas_ftrtr.inl"
-#endif
+//---------------------------------------------------------------------
+// MultiPrecision routines
+//---------------------------------------------------------------------
-#include "fflas_faddm.inl"
+// include multiprecision fields for specialisation
-#undef LB_TRTR
+#include "fflas-ffpack/field/rns.h" //forward declaration of the multiprecision field
+#include "fflas_fscal_mp.inl"
+#include "fflas_freduce_mp.inl"
+#include "fflas-ffpack/fflas/fflas_fger_mp.inl"
+#include "fflas_fgemm/fgemm_classical_mp.inl"
+#include "fflas_ftrsm_mp.inl"
+#include "fflas_fgemv_mp.inl"
+#include "fflas-ffpack/field/rns.inl" // real implementation of the multiprecision field
-#endif // __FFLASFFPACK_fflas_H
+#include "fflas-ffpack/paladin/fflas_pfinit.h"
+//---------------------------------------------------------------------
+// Sparse routines
+//---------------------------------------------------------------------
+#include "fflas_sparse.h"
+
+#endif // __FFLASFFPACK_fflas_H
diff --git a/fflas-ffpack/fflas/fflas_bounds.inl b/fflas-ffpack/fflas/fflas_bounds.inl
index 20af400..4ed282d 100644
--- a/fflas-ffpack/fflas/fflas_bounds.inl
+++ b/fflas-ffpack/fflas/fflas_bounds.inl
@@ -1,9 +1,10 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* fflas/fflas_bounds.inl
* Copyright (C) 2008 Clement Pernet
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
*
*
* ========LICENCE========
@@ -29,363 +30,223 @@
#ifndef __FFLASFFPACK_fflas_bounds_INL
#define __FFLASFFPACK_fflas_bounds_INL
-#define FFLAS_INT_TYPE long unsigned int
+#define FFLAS_INT_TYPE uint64_t
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "fflas-ffpack/utils/flimits.h"
-namespace FFLAS {
- namespace Protected {
- /** MatMulParameters.
- *
- * \brief Computes the threshold parameters for the cascade
- * Matmul algorithm
- *
- *
- * \param F Finite Field/Ring of the computation.
- * \param k Common dimension of A and B, in the product A x B
- * \param bet Computing AB + beta C
- * \param delayedDim Returns the size of blocks that can be multiplied
- * over Z with no overflow
- * \param base Returns the type of BLAS representation to use
- * \param winoRecLevel Returns the number of recursion levels of
- * Strassen-Winograd's algorithm to perform
- * \param winoLevelProvided tells whether the user forced the number of
- * recursive level of Winograd's algorithm
- */
- template <class Field>
- inline void MatMulParameters (const Field& F,
- const size_t k,
- const typename Field::Element& beta,
- size_t& delayedDim,
- FFLAS_BASE& base,
- size_t& winoRecLevel,
- bool winoLevelProvided) {
-
- // Strategy : determine Winograd's recursion first, then choose appropriate
- // floating point representation, and finally the blocking dimension.
- // Can be improved for some cases.
+#include <givaro/udl.h>
+#include <givaro/modular.h>
+#include <givaro/modular-balanced.h>
- if (!winoLevelProvided)
- winoRecLevel = WinoSteps (k);
- base = BaseCompute (F, winoRecLevel);
- delayedDim = DotProdBound (F, winoRecLevel, beta, base);
-
- size_t n = k;
- size_t winoDel = winoRecLevel;
-
- // Computes the delayedDim, only depending on the recursive levels
- // that must be performed over Z
- while (winoDel > 0 && delayedDim < n) {
- winoDel--;
- delayedDim = DotProdBound (F, winoDel, beta, base);
- n >>= 1;
- }
- delayedDim = MIN (k, delayedDim);
+namespace FFLAS { namespace Protected {
- }
+ template <class Field>
+ inline double computeFactorClassic (const Field& F)
+ {
+ //FFLAS_INT_TYPE p=0;
+ Givaro::Integer p=0;
+ F.characteristic(p);
+ return (double) (p-1);
+ }
- template <class Field>
- unsigned long Mantissa (const Field& F, const FFLAS_BASE base)
- {
- return (base == FflasDouble) ? DBL_MANT_DIG : FLT_MANT_DIG;
- }
+ /*************************************************************************************
+ * Specializations for ModularPositive and ModularBalanced over double and float
+ *************************************************************************************/
+ template <>
+ inline double computeFactorClassic (const Givaro::ModularBalanced<double>& F)
+ {
+ //FFLAS_INT_TYPE p;
+ Givaro::Integer p;
+ F.characteristic(p);
+ return double((p-1) >> 1);
+ }
- /** DotProdBound computes the maximal size for delaying the modular reduction
- * in a dotproduct.
- *
- * This is the default version assuming a conversion to a positive modular representation
- *
- * \param F Finite Field/Ring of the computation
- * \param w Number of recusrive Strassen-Winograd levels (if any, \p 0 otherwise)
- * \param beta Computing <code>AB + beta C</code>
- * \param base Type of floating point representation for delayed modular computations
- *
- */
- template <class Field>
- inline size_t DotProdBound (const Field& F,
- const size_t w,
- const typename Field::Element& beta,
- const FFLAS_BASE base)
- {
+ //BB: ajout, pourquoi pas ?
+ template <>
+ inline double computeFactorClassic (const Givaro::ModularBalanced<float>& F)
+ {
+ //FFLAS_INT_TYPE p;
+ Givaro::Integer p;
+ F.characteristic(p);
+ return double((p-1) >> 1);
+ }
- FFLAS_INT_TYPE p;
- F.characteristic(p);
+ template <class Field>
+ inline size_t DotProdBoundClassic (const Field& F,
+ const typename Field::Element& beta
+ )
+ {
- unsigned long mantissa = Mantissa (F, base);
+ //FFLAS_INT_TYPE p=0;
+ Givaro::Integer p=0;
+ F.characteristic(p);
- //(base == FflasDouble) ? DBL_MANT_DIG : FLT_MANT_DIG;
+ //unsigned long mantissa = Protected::Mantissa<typename Field::Element>();
- if (p == 0)
- return 1;
+ if (p == 0)
+ return std::numeric_limits<size_t>::max();
- double kmax;
- if (w > 0) {
- double c = computeFactorWino (F,w);
+ double kmax;
+ {
- double d = (double (1ULL << mantissa) /(c*c) + 1);
- if (d < 2)
- return 1;
- kmax = floor (d * double(1ULL << w));
- // if (kmax <= 1) return 1;
- } else {
+ double c = computeFactorClassic(F);
+
+ double cplt=0;
+ if (!F.isZero (beta)){
+ if (F.isOne (beta) || F.areEqual (beta, F.mOne)) cplt = c;
+ else{
+ double be;
+ F.convert(be, beta);
+ cplt = fabs(be)*c;
+ }
+ }
+ kmax = floor ( (double (double(limits<typename Field::Element>::max()) + 1 - cplt)) / (c*c));
+ if (kmax <= 1) return 1;
+ }
+
+ //kmax--; // we computed a strict upper bound
+ return (size_t) std::min ((uint64_t)kmax, 1_ui64 << 31);
+ }
+
+} // FFLAS
+} // Protected
- double c = computeFactorClassic(F);
+namespace FFLAS {
- double cplt=0;
- if (!F.isZero (beta)){
- if (F.isOne (beta) || F.areEqual (beta, F.mOne)) cplt = c;
- else{
- double be;
- F.convert(be, beta);
- cplt = fabs(be)*c;
- }
+ inline Givaro::Integer
+ InfNorm (const size_t M, const size_t N, const Givaro::Integer* A, const size_t lda){
+ Givaro::Integer max = 0;
+ size_t log=0;
+ for (size_t i=0; i<M; ++i)
+ for (size_t j=0; j<N; ++j){
+ Givaro::Integer x = A[i*lda+j];
+ if ((x.bitsize() >= log) && (abs(x) > max)){
+ max = abs(x);
+// max = x;
+ log = x.bitsize();
}
- kmax = floor ( (double (double(1ULL << mantissa) - cplt)) / (c*c));
- if (kmax <= 1) return 1;
}
+ return max;
+ }
- //kmax--; // we computed a strict upper bound
- return (size_t) MIN ((unsigned long long)kmax, 1ULL << 31);
- }
-
- /** @internal
- * @brief Internal function for the bound computation
- * Generic implementation for positive representations
- */
- template <class Field>
- inline double computeFactorWino (const Field& F, const size_t w)
- {
- FFLAS_INT_TYPE p;
- F.characteristic(p);
- size_t ex=1;
- for (size_t i=0; i < w; ++i) ex *= 3;
- return double(p - 1) * double(1 + ex) / double(2);
- }
+ namespace Protected {
- template <class Field>
- inline double computeFactorClassic (const Field& F)
- {
- FFLAS_INT_TYPE p;
- F.characteristic(p);
- return (double) (p-1);
- }
- } // Protected
- /** WinoSteps computes the number of recursive levels to perform.
+ /**
+ * TRSMBound
*
- * \param m the common dimension in the product AxB
+ * \brief computes the maximal size for delaying the modular reduction
+ * in a triangular system resolution
+ *
+ * This is the default version over an arbitrary field.
+ * It is currently never used (the recursive algorithm is run until n=1 in this case)
+ *
+ * \param F Finite Field/Ring of the computation
*
*/
- inline size_t WinoSteps (const size_t m)
+ template <class Field>
+ inline size_t TRSMBound (const Field&)
{
- size_t w = 0;
- size_t mt = m;
- while (mt >= WINOTHRESHOLD) {w++; mt >>= 1;}
- return w;
+ return 1;
}
- namespace Protected {
- /** BaseCompute determines the type of floating point representation to
- * convert to, for BLAS computations.
- * \param F Finite Field/Ring of the computation
- * \param w Number of recursive levels in Winograd's algorithm
- *
- */
- template <class Field>
- inline FFLAS_BASE BaseCompute (const Field& F, const size_t w)
- {
-
- FFLAS_INT_TYPE pi;
- F.characteristic(pi);
- FFLAS_BASE base;
- switch (w) {
- case 0: base = (pi < FLOAT_DOUBLE_THRESHOLD_0)? FflasFloat : FflasDouble;
- break;
- case 1: base = (pi < FLOAT_DOUBLE_THRESHOLD_1)? FflasFloat : FflasDouble;
- break;
- case 2: base = (pi < FLOAT_DOUBLE_THRESHOLD_2)? FflasFloat : FflasDouble;
- break;
- default: base = FflasDouble;
- break;
- }
- return base;
- }
-
-
- /*************************************************************************************
- * Specializations for ModularPositive and ModularBalanced over double and float
- *************************************************************************************/
-
- template <>
- inline double computeFactorWino (const FFPACK:: ModularBalanced<double>& F, const size_t w)
- {
- FFLAS_INT_TYPE p;
- F.characteristic(p);
- size_t ex=1;
- for (size_t i=0; i < w; ++i) ex *= 3;
- return double((p - 1) * ex / 2);
- }
-
- template <>
- inline double computeFactorWino (const FFPACK:: ModularBalanced<float>& F, const size_t w)
- {
- FFLAS_INT_TYPE p;
- F.characteristic(p);
- size_t ex=1;
- for (size_t i=0; i < w; ++i) ex *= 3;
- return double((p - 1) * ex / 2);
- }
-
- template <>
- inline double computeFactorClassic (const FFPACK:: ModularBalanced<double>& F)
- {
- FFLAS_INT_TYPE p;
- F.characteristic(p);
- return double((p-1) >> 1);
- }
-
-
- template <>
- inline FFLAS_BASE BaseCompute (const FFPACK:: Modular<double>& ,
- const size_t )
- {
- return FflasDouble;
- }
-
- template <>
- inline FFLAS_BASE BaseCompute (const FFPACK:: Modular<float>& ,
- const size_t )
- {
- return FflasFloat;
- }
-
- template <>
- inline FFLAS_BASE BaseCompute (const FFPACK:: ModularBalanced<double>& ,
- const size_t )
- {
- return FflasDouble;
- }
-
- template <>
- inline FFLAS_BASE BaseCompute (const FFPACK:: ModularBalanced<float>& ,
- const size_t )
- {
- return FflasFloat;
- }
-
-
-
- /**
- * TRSMBound
- *
- * \brief computes the maximal size for delaying the modular reduction
- * in a triangular system resolution
- *
- * This is the default version over an arbitrary field.
- * It is currently never used (the recursive algorithm is run until n=1 in this case)
- *
- * \param F Finite Field/Ring of the computation
- *
- */
- template <class Field>
- inline size_t TRSMBound (const Field& F)
- {
- return 1;
- }
-
- /**
- * Specialization for positive modular representation over double
- * Computes nmax s.t. (p-1)/2*(p^{nmax-1} + (p-2)^{nmax-1}) < 2^53
- * See [Dumas Giorgi Pernet 06, arXiv:cs/0601133]
- */
- template<>
- inline size_t TRSMBound (const FFPACK:: Modular<double>& F)
- {
-
- FFLAS_INT_TYPE pi;
- F.characteristic(pi);
- unsigned long p = pi;
- unsigned long long p1(1UL), p2(1UL);
- size_t nmax = 0;
- unsigned long long max = ( (1ULL << (DBL_MANT_DIG + 1) ) / ((unsigned long long)(p - 1)));
- while ( (p1 + p2) < max ){
- p1*=p;
- p2*=p-2;
- nmax++;
- }
- return nmax;
- }
-
-
- /**
- * Specialization for positive modular representation over float.
- * Computes nmax s.t. (p-1)/2*(p^{nmax-1} + (p-2)^{nmax-1}) < 2^24
- * @pbi
- * See [Dumas Giorgi Pernet 06, arXiv:cs/0601133]
- */
- template<>
- inline size_t TRSMBound (const FFPACK:: Modular<float>& F)
- {
-
- FFLAS_INT_TYPE pi;
- F.characteristic(pi);
- unsigned long p = pi;
- unsigned long long p1(1UL), p2(1UL);
- size_t nmax = 0;
- unsigned long long max = ( (1ULL << (FLT_MANT_DIG + 1) ) / ((unsigned long long)(p - 1)));
- while ( (p1 + p2) < max ){
- p1*=p;
- p2*=p-2;
- nmax++;
- }
- return nmax;
- }
-
- /**
- * Specialization for balanced modular representation over double.
- * Computes nmax s.t. (p-1)/2*(((p+1)/2)^{nmax-1}) < 2^53
- * @bib
- * - Dumas Giorgi Pernet 06, arXiv:cs/0601133
- */
- template<>
- inline size_t TRSMBound (const FFPACK:: ModularBalanced<double>& F)
- {
+ // /**
+ // * Specialization for positive modular representation over double
+ // * Computes nmax s.t. (p-1)/2*(p^{nmax-1} + (p-2)^{nmax-1}) < 2^53
+ // * See [Dumas Giorgi Pernet 06, arXiv:cs/0601133]
+ // */
+ // template<>
+ // inline size_t TRSMBound (const Givaro::Modular<double>& F)
+ // {
+
+ // FFLAS_INT_TYPE pi;
+ // F.characteristic(pi);
+ // unsigned long p = pi;
+ // unsigned long long p1(1), p2(1);
+ // size_t nmax = 0;
+ // unsigned long long max = ( (1 << (DBL_MANT_DIG + 1) ) / ((unsigned long long)(p - 1)));
+ // while ( (p1 + p2) < max ){
+ // p1*=p;
+ // p2*=p-2;
+ // nmax++;
+ // }
+ // return nmax;
+ // }
+
+
+ /**
+ * Specialization for positive modular representation over float.
+ * Computes nmax s.t. (p-1)/2*(p^{nmax-1} + (p-2)^{nmax-1}) < 2^24
+ * @pbi
+ * See [Dumas Giorgi Pernet 06, arXiv:cs/0601133]
+ */
+ template<class Element>
+ inline size_t TRSMBound (const Givaro::Modular<Element>& F)
+ {
- FFLAS_INT_TYPE pi;
- F.characteristic (pi);
- unsigned long p = (pi + 1) / 2;
- unsigned long long p1(1UL);
- size_t nmax = 0;
- unsigned long long max = ((1ULL << (DBL_MANT_DIG + 1)) / ((unsigned long long)(p - 1)));
- while (p1 < max){
- p1 *= p;
- nmax++;
- }
- return nmax;
- }
+ FFLAS_INT_TYPE pi;
+ F.characteristic(pi);
+ double p = pi;
+ double p1 = 1.0, p2 = 1.0;
+ double pm1 = (p - 1) / 2;
+ size_t nmax = 0;
+ unsigned long long max = limits<Element>::max();
+ while ( (p1 + p2)*pm1 <= max ){
+ p1*=p;
+ p2*=p-2;
+ nmax++;
+ }
+ return std::max((size_t)1,nmax);
+ }
- /**
- * Specialization for balanced modular representation over float
- * Computes nmax s.t. (p-1)/2*(((p+1)/2)^{nmax-1}) < 2^24
- * See [Dumas Giorgi Pernet 06, arXiv:cs/0601133]
- */
- template<>
- inline size_t TRSMBound (const FFPACK:: ModularBalanced<float>& F)
- {
+ /**
+ * Specialization for balanced modular representation over double.
+ * Computes nmax s.t. (p-1)/2*(((p+1)/2)^{nmax-1}) < 2^53
+ * @bib
+ * - Dumas Giorgi Pernet 06, arXiv:cs/0601133
+ */
+ template<class Element>
+ inline size_t TRSMBound (const Givaro::ModularBalanced<Element>& F)
+ {
- FFLAS_INT_TYPE pi;
- F.characteristic (pi);
- unsigned long p = (pi + 1) / 2;
- unsigned long long p1(1UL);
- size_t nmax = 0;
- unsigned long long max = ((1ULL << (FLT_MANT_DIG + 1)) / ((unsigned long long) (p - 1)));
- while (p1 < max){
- p1 *= p;
- nmax++;
- }
- return nmax;
+ FFLAS_INT_TYPE pi;
+ F.characteristic (pi);
+ double pp1 = (pi + 1) / 2;
+ double pm1 = (pi - 1) / 2;
+ double p1 = 1.0;
+ size_t nmax = 0;
+ double max = limits<Element>::max();
+ while (pm1*p1 <= max){
+ p1 *= pp1;
+ nmax++;
+ }
+ return std::max((size_t) 1,nmax);
+ }
- }
- } // Protected
+ // /**
+ // * Specialization for balanced modular representation over float
+ // * Computes nmax s.t. (p-1)/2*(((p+1)/2)^{nmax-1}) < 2^24
+ // * See [Dumas Giorgi Pernet 06, arXiv:cs/0601133]
+ // */
+ // template<>
+ // inline size_t TRSMBound (const Givaro::ModularBalanced<float>& F)
+ // {
+
+ // FFLAS_INT_TYPE pi;
+ // F.characteristic (pi);
+ // unsigned long p = (pi + 1) / 2;
+ // unsigned long long p1(1);
+ // size_t nmax = 0;
+ // unsigned long long max = (1 << (FLT_MANT_DIG + 1)) ;
+ // while ((pi-1)*p1 < max){
+ // p1 *= p;
+ // nmax++;
+ // }
+ // return nmax;
+
+ // }
+} // Protected
} // FFLAS
#endif // __FFLASFFPACK_fflas_bounds_INL
diff --git a/fflas-ffpack/fflas/fflas_enum.h b/fflas-ffpack/fflas/fflas_enum.h
new file mode 100644
index 0000000..ce09e08
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_enum.h
@@ -0,0 +1,103 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* fflas_enum.h
+ * Copyright (C) The FFLAS-FFPACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#ifndef __FFLASFFPACK_enum_INL
+#define __FFLASFFPACK_enum_INL
+
+namespace FFLAS {
+
+ /// Storage by row or col ?
+ enum FFLAS_ORDER {
+ FflasRowMajor=101, /**< row major */
+ FflasColMajor=102 /**< col major */
+ };
+ // public:
+ /// Is matrix transposed ?
+ enum FFLAS_TRANSPOSE {
+ FflasNoTrans = 111, /**< Matrix is not transposed */
+ FflasTrans = 112 /**< Matrix is transposed */
+ };
+ /// Is triangular matrix's shape upper ?
+ enum FFLAS_UPLO {
+ FflasUpper = 121, /**< Triangular matrix is Upper triangular (if \f$i>j\f$ then \f$T_{i,j} = 0\f$)*/
+ FflasLower = 122 /**< Triangular matrix is Lower triangular (if \f$i<j\f$ then \f$T_{i,j} = 0\f$)*/
+ };
+
+ /// Is the triangular matrix implicitly unit diagonal ?
+ enum FFLAS_DIAG {
+ FflasNonUnit = 131, /**< Triangular matrix has an explicit arbitrary diagonal */
+ FflasUnit = 132 /**< Triangular matrix has an implicit unit diagonal (\f$T_{i,i} = 1\f$)*/ /**< */
+ };
+
+ /// On what side ?
+ enum FFLAS_SIDE {
+ FflasLeft = 141,/**< Operator applied on the left */
+ FflasRight = 142 /**< Operator applied on the rigth*/
+ };
+
+ /** \p FFLAS_BASE determines the type of the element representation for Matrix Mult kernel. (deprecated, should not be used) */
+ enum FFLAS_BASE {
+ FflasDouble = 151, /**< to use the double precision BLAS */
+ FflasFloat = 152, /**< to use the single precison BLAS */
+ FflasGeneric = 153 /**< for any other domain, that can not be converted to floating point integers */
+ };
+}
+
+#include <algorithm>
+
+namespace FFLAS{ namespace Protected {
+
+ template <class X, class Y> class AreEqual {
+ public:
+ static const bool value = false;
+ };
+
+ template <class X> class AreEqual<X, X> {
+ public:
+ static const bool value = true;
+ };
+ } // Protected
+} // class FFLAS
+
+namespace FFLAS {
+
+template <class T> const T &min3(const T &m, const T &n, const T &k) { return std::min(m, std::min(n, k)); }
+
+template <class T> const T &max3(const T &m, const T &n, const T &k) { return std::max(m, std::min(n, k)); }
+
+template <class T> const T &min4(const T &m, const T &n, const T &k, const T &l) {
+ return std::min(std::min(m, n), std::min(k, l));
+}
+
+template <class T> const T &max4(const T &m, const T &n, const T &k, const T &l) {
+ return std::max(std::max(m, n), std::max(k, l));
+}
+
+} // FFLAS
+
+
+
+#endif // __FFLASFFPACK_enum_INL
diff --git a/fflas-ffpack/fflas/fflas_fadd.h b/fflas-ffpack/fflas/fflas_fadd.h
new file mode 100644
index 0000000..f6f55c3
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fadd.h
@@ -0,0 +1,310 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fadd_H
+#define __FFLASFFPACK_fadd_H
+
+namespace FFLAS {
+
+ template<class T>
+ struct support_simd_add : public std::false_type {} ;
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+ template<>
+ struct support_simd_add<float> : public std::true_type {} ;
+ template<>
+ struct support_simd_add<double> : public std::true_type {} ;
+ #ifdef SIMD_INT
+ template<>
+ struct support_simd_add<int64_t> : public std::true_type {} ;
+ template<>
+ struct support_simd_add<int32_t> : public std::true_type {} ;
+
+ #endif // SIMD_INT
+
+// #endif // __FFLASFFPACK_USE_SIMD
+
+} // FFLAS
+
+#include "fflas_fadd.inl"
+
+namespace FFLAS {
+
+ /***************************/
+ /* LEVEL 1 */
+ /***************************/
+
+ template <class Field>
+ void
+ fadd (const Field & F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc)
+ {
+ details::fadd<Field, true>(F,N,A,inca,B,incb,C,incc
+ , typename FieldTraits<Field>::category() );
+ }
+
+
+
+ template <class Field>
+ void
+ faddin (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc)
+ {
+ fadd(F,N,B,incb,C,incc,C,incc);
+ return;
+ }
+
+ template <class Field>
+ void
+ fsub(const Field & F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc)
+ {
+
+ details::fadd<Field, false>(F,N,A,inca,B,incb,C,incc
+ , typename FieldTraits<Field>::category() );
+ }
+
+
+
+ template <class Field>
+ void
+ fsubin (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc)
+ {
+ fsub(F,N,C,incc,B,incb,C,incc);
+ return;
+ }
+
+ // C = A + a B
+ template <class Field>
+ void
+ fadd (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc)
+ {
+ if (C == A && inca == incc)
+ return faxpy(F,N,alpha,B,incb,C,incc);
+ if (F.isOne(alpha))
+ return fadd(F,N,A,inca,B,incb,C,incc);
+ if (F.isMOne(alpha)){
+ return fsub(F,N,A,inca,B,incb,C,incc);
+ }
+ if (F.isZero(alpha))
+ return fassign(F,N,A,inca,C,incc);
+
+ if (inca == 1 && incb == 1 && incc == 1) {
+ for (size_t i = 0 ; i < N ; ++i) {
+ //!@todo optimise here
+ F.mul(C[i],alpha,B[i]);
+ F.addin(C[i],A[i]);
+ }
+ return;
+ }
+
+ typename Field::ConstElement_ptr Ai = A, Bi = B;
+ typename Field::Element_ptr Ci = C;
+ for (; Ai < A+N*inca; Ai+=inca, Bi+=incb, Ci+=incc) {
+ F.mul(*Ci,alpha,*Bi);
+ F.addin (*Ci, *Ai);
+ }
+ }
+
+
+ /***************************/
+ /* LEVEL 2 */
+ /***************************/
+
+
+ template <class Field>
+ void
+ pfadd (const Field & F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc, const size_t numths){
+ SYNCH_GROUP(
+ FORBLOCK1D(iter, M, SPLITTER(numths),
+ size_t rowsize= iter.end()-iter.begin();
+ TASK(MODE(CONSTREFERENCE(F) READWRITE(C[iter.begin()*ldc]) READ(A[iter.begin()*lda], B[iter.begin()*ldb])),
+ fadd(F, rowsize, N, A+iter.begin()*lda, lda, B+iter.begin()*ldb, ldb, C+iter.begin()*ldc, ldc);
+ );
+ );
+ );
+ }
+
+ template <class Field>
+ void
+ pfsub (const Field & F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc, const size_t numths){
+ SYNCH_GROUP(
+ FORBLOCK1D(iter, M, SPLITTER(numths),
+ size_t rowsize= iter.end()-iter.begin();
+ TASK(MODE(CONSTREFERENCE(F) READWRITE(C[iter.begin()*ldc]) READ(A[iter.begin()*lda], B[iter.begin()*ldb])),
+ fsub(F, rowsize, N, A+iter.begin()*lda, lda, B+iter.begin()*ldb, ldb, C+iter.begin()*ldc, ldc);
+ );
+ );
+ );
+ }
+
+
+ template <class Field>
+ void
+ pfaddin (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc, size_t numths){
+
+ SYNCH_GROUP(
+ FORBLOCK1D(iter, M, SPLITTER(numths),
+ size_t rowsize= iter.end()-iter.begin();
+ TASK(MODE(CONSTREFERENCE(F) READWRITE(C[iter.begin()*ldc]) READ(B[iter.begin()*ldb])),
+ faddin(F, rowsize, N, B+iter.begin()*ldb, ldb, C+iter.begin()*ldc, ldc);
+ );
+ );
+ );
+ }
+
+ template <class Field>
+ void
+ pfsubin (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc, size_t numths){
+ SYNCH_GROUP(
+ FORBLOCK1D(iter, M, SPLITTER(numths),
+ size_t rowsize= iter.end()-iter.begin();
+ TASK(MODE(CONSTREFERENCE(F) READWRITE(C[iter.begin()*ldc]) READ(B[iter.begin()*ldb])),
+ fsubin(F, rowsize, N, B+iter.begin()*ldb, ldb, C+iter.begin()*ldc, ldc);
+ );
+ );
+ );
+ }
+
+ template <class Field>
+ void
+ fadd (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc)
+ {
+ if (N == lda && N == ldb && N == ldc)
+ return fadd(F,M*N,A,1,B,1,C,1);
+ typename Field::ConstElement_ptr Ai = A, Bi = B;
+ typename Field::Element_ptr Ci = C;
+ for (; Ai < A+M*lda; Ai+=lda, Bi+=ldb, Ci+=ldc)
+ fadd(F,N,Ai,1,Bi,1,Ci,1);
+ }
+
+ template <class Field>
+ void
+ fsub (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc)
+ {
+ if (N == lda && N == ldb && N == ldc)
+ return fsub(F,M*N,A,1,B,1,C,1);
+ typename Field::ConstElement_ptr Ai = A, Bi = B;
+ typename Field::Element_ptr Ci = C;
+ for (; Ai < A+M*lda; Ai+=lda, Bi+=ldb, Ci+=ldc)
+ fsub(F,N,Ai,1,Bi,1,Ci,1);
+ }
+
+ template <class Field>
+ void
+ faddin (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc)
+ {
+ if (N == ldb && N == ldc)
+ return faddin(F,M*N,B,1,C,1);
+ const typename Field::Element *Bi = B;
+ typename Field::Element_ptr Ci = C;
+ for (; Bi < B+M*ldb; Bi+=ldb, Ci+=ldc)
+ faddin(F,N,Bi,1,Ci,1);
+ }
+
+ template <class Field>
+ void
+ fsubin (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc)
+ {
+ if (N == ldb && N == ldc)
+ return fsubin(F,M*N,B,1,C,1);
+ typename Field::ConstElement_ptr Bi = B;
+ typename Field::Element_ptr Ci = C;
+ for (; Bi < B+M*ldb; Bi+=ldb, Ci+=ldc)
+ fsubin(F,N,Bi,1,Ci,1);
+ }
+
+
+ // C = A + a B
+ template <class Field>
+ void
+ fadd (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc)
+ {
+ if (C == A && lda == ldc)
+ return faxpy(F,M,N,alpha,B,ldb,C,ldc);
+ if (F.isOne(alpha))
+ return fadd(F,M,N,A,lda,B,ldb,C,ldc);
+ if (F.isMOne(alpha))
+ return fsub(F,M,N,A,lda,B,ldb,C,ldc);
+ if (F.isZero(alpha))
+ return fassign(F,M,N,A,lda,C,ldc);
+
+ if (N == lda && N == ldb && N == ldc)
+ return fadd(F,M*N,A,1,alpha,B,1,C,1);
+
+ typename Field::ConstElement_ptr Ai = A, Bi = B;
+ typename Field::Element_ptr Ci = C;
+ for (; Ai < A+M*lda; Ai+=lda, Bi+=ldb, Ci+=ldc)
+ for (size_t i=0; i<N; i++) {
+ F.mul(Ci[i],alpha,Bi[i]);
+ F.addin (Ci[i], Ai[i]);
+ }
+ }
+
+
+} // FFLAS
+
+
+#endif // __FFLASFFPACK_fadd_H
+
diff --git a/fflas-ffpack/fflas/fflas_fadd.inl b/fflas-ffpack/fflas/fflas_fadd.inl
new file mode 100644
index 0000000..c3dad8d
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fadd.inl
@@ -0,0 +1,345 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fadd_INL
+#define __FFLASFFPACK_fadd_INL
+
+#include "fflas-ffpack/fflas/fflas_simd.h"
+
+namespace FFLAS { namespace vectorised {
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+ template<class SimdT, class Element, bool positive>
+ inline typename std::enable_if<is_simd<SimdT>::value, void>::type
+ VEC_ADD(SimdT & C, SimdT & A, SimdT & B, SimdT & Q, SimdT & T, SimdT & P, SimdT & NEGP, SimdT & MIN, SimdT & MAX)
+ {
+ using simd = Simd<Element>;
+ C = simd::add(A, B);
+ Q = simd::vand(simd::greater(C, MAX),NEGP);
+ if (!positive) {
+ T = simd::vand(simd::lesser(C, MIN),P);
+ Q = simd::vor(Q, T);
+ }
+ C = simd::add(C, Q);
+ }
+
+ template<bool positive, class Element, class T1, class T2>
+ inline typename std::enable_if<FFLAS::support_simd_add<Element>::value, void>::type
+ addp(Element * T, const Element * TA, const Element * TB, size_t n, Element p, T1 min_, T2 max_)
+ {
+ Element min= (Element)min_, max= (Element)max_;
+ using simd = Simd<Element>;
+ using vect_t = typename simd::vect_t;
+
+ size_t i = 0;
+
+ if (n < simd::vect_size)
+ {
+ for (; i < n ; i++)
+ {
+ T[i] = TA[i] + TB[i];
+ T[i] -= (T[i] > max) ? p : 0;
+ if (!positive)
+ {
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ }
+ return;
+
+ }
+
+ vect_t A,B,C,Q,P,NEGP,TMP,MIN,MAX;
+ P = simd::set1(p);
+ NEGP= simd::set1(-p);
+ MIN = simd::set1(min);
+ MAX = simd::set1(max);
+ long st = long(T)%simd::alignment;
+ if (st)
+ { // the array T is not 32 byte aligned (process few elements s.t. (T+i) is 32 bytes aligned)
+ for (size_t j=static_cast<size_t>(st) ; j < simd::alignment ; j += sizeof(Element), i++)
+ {
+ T[i] = TA[i] + TB[i];
+ T[i] -= (T[i] > max) ? p : 0;
+ if (!positive)
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ }
+ FFLASFFPACK_check((long(T+i) % simd::alignment == 0));
+ if ( (long(TA+i)%simd::alignment==0) && (long(TB+i)%simd::alignment==0))
+ {
+ // perform the loop using 256 bits SIMD
+ for (; i <= n - simd::vect_size ; i += simd::vect_size)
+ {
+ // C = simd::load(T+i);
+ A = simd::load(TA+i);
+ B = simd::load(TB+i);
+ VEC_ADD<vect_t,Element,positive>(C, A, B, Q, TMP, P, NEGP, MIN, MAX);
+ simd::store(T+i, C);
+ }
+ }
+ // perform the last elt from T without SIMD
+ for (; i < n ; i++)
+ {
+ T[i] = TA[i] + TB[i];
+ T[i] -= (T[i] > max) ? p : 0;
+ if (!positive)
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ }
+
+ template<class SimdT, class Element,bool positive>
+ inline typename std::enable_if<is_simd<SimdT>::value, void>::type
+ VEC_SUB(SimdT & C, SimdT & A, SimdT & B, SimdT & Q, SimdT & T, SimdT & P, SimdT & NEGP, SimdT & MIN, SimdT & MAX)
+ {
+ using simd = Simd<Element>;
+ C = simd::sub(A, B);
+ T = simd::vand(simd::lesser(C, MIN),P);
+ if (!positive) {
+ Q = simd::vand(simd::greater(C, MAX),NEGP);
+ T = simd::vor(Q, T);
+ }
+ C = simd::add(C, T);
+ }
+
+ template<bool positive, class Element, class T1, class T2>
+ inline typename std::enable_if<FFLAS::support_simd_add<Element>::value, void>::type
+ subp(Element * T, const Element * TA, const Element * TB, const size_t n, const Element p, const T1 min_, const T2 max_)
+ {
+ Element min = (Element)min_, max = (Element)max_;
+ using simd = Simd<Element>;
+ using vect_t = typename simd::vect_t;
+
+ size_t i = 0;
+
+ if (n < simd::vect_size)
+ {
+ for (; i < n ; i++)
+ {
+ T[i] = TA[i] - TB[i];
+ if (!positive)
+ T[i] -= (T[i] > max) ? p : 0;
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ return;
+
+ }
+ vect_t A,B,C,Q,P,NEGP,TMP,MIN,MAX;
+ P = simd::set1(p);
+ NEGP= simd::set1(-p);
+ MIN = simd::set1(min);
+ MAX = simd::set1(max);
+ long st = long(T) % simd::alignment;
+ if (st)
+ { // the array T is not 32 byte aligned (process few elements s.t. (T+i) is 32 bytes aligned)
+ for (size_t j = static_cast<size_t>(st) ; j < simd::alignment ; j += sizeof(Element), i++)
+ {
+ T[i] = TA[i] - TB[i];
+ if (!positive)
+ T[i] -= (T[i] > max) ? p : 0;
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ }
+ FFLASFFPACK_check((long(T+i) % simd::alignment == 0));
+ if ( (long(TA+i) % simd::alignment == 0) && (long(TB+i) % simd::alignment == 0))
+ {
+ // perform the loop using 256 bits SIMD
+ for (; i <= n - simd::vect_size ; i += simd::vect_size)
+ {
+ // C = simd::load(T+i);
+ A = simd::load(TA+i);
+ B = simd::load(TB+i);
+ VEC_SUB<vect_t,Element,positive>(C, A, B, Q, TMP, P, NEGP, MIN, MAX);
+ simd::store(T+i, C);
+ }
+ }
+
+ // perform the last elt from T without SIMD
+ for (; i < n ; i++)
+ {
+ T[i] = TA[i] - TB[i];
+ if (!positive)
+ T[i] -= (T[i] > max) ? p : 0;
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ }
+
+#else // no simd, but faster than F.init()
+ template<bool positive, class Element, class T1, class T2>
+ // inline typename std::enable_if<!FFLAS::support_simd_add<Element>::value, void>::type
+ void
+ subp(Element * T, const Element * TA, const Element * TB, const size_t n, const Element p, const T1 min_, const T2 max_)
+ {
+ Element min = (Element)min_, max = (Element)max_;
+
+ size_t i = 0;
+
+ for (; i < n ; i++)
+ {
+ T[i] = TA[i] - TB[i];
+ if (!positive)
+ T[i] -= (T[i] > max) ? p : 0;
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ return;
+
+ }
+
+ template<bool positive, class Element, class T1, class T2>
+ // inline typename std::enable_if<!FFLAS::support_simd_add<Element>::value, void>::type
+ void
+ addp(Element * T, const Element * TA, const Element * TB, const size_t n, const Element p, const T1 min_, const T2 max_)
+ {
+ Element min= (Element)min_, max= (Element)max_;
+
+ size_t i = 0;
+
+ for (; i < n ; i++)
+ {
+ T[i] = TA[i] + TB[i];
+ T[i] -= (T[i] > max) ? p : 0;
+ if (!positive)
+ {
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ }
+ return;
+ }
+
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+} // vectorised
+} // FFLAS
+
+namespace FFLAS { namespace details {
+
+ /**** Specialised ****/
+
+ template <class Field, bool ADD>
+ typename std::enable_if<FFLAS::support_simd_add<typename Field::Element>::value, void>::type
+ fadd (const Field & F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc
+ , FieldCategories::ModularTag
+ )
+ {
+ if (inca == 1 && incb == 1 && incc == 1) {
+ typename Field::Element p = (typename Field::Element) F.characteristic();
+ if (ADD)
+ FFLAS::vectorised::addp<!FieldTraits<Field>::balanced>(C,A,B,N,p,F.minElement(),F.maxElement());
+ else
+ FFLAS::vectorised::subp<!FieldTraits<Field>::balanced>(C,A,B,N,p,F.minElement(),F.maxElement());
+ }
+ else {
+ for (size_t i=0; i<N; i++)
+ if (ADD)
+ F.add (C[i*incc], A[i*inca], B[i*incb]);
+ else
+ F.sub (C[i*incc], A[i*inca], B[i*incb]);
+ }
+ }
+
+ template <class Field, bool ADD>
+ typename std::enable_if<!FFLAS::support_simd_add<typename Field::Element>::value, void>::type
+ fadd (const Field & F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc
+ , FieldCategories::ModularTag
+ )
+ {
+ if (inca == 1 && incb == 1 && incc == 1) {
+ for (size_t i=0; i<N; i++)
+ if (ADD)
+ F.add (C[i], A[i], B[i]);
+ else
+ F.sub (C[i], A[i], B[i]);
+ }
+ else {
+ for (size_t i=0; i<N; i++)
+ if (ADD)
+ F.add (C[i*incc], A[i*inca], B[i*incb]);
+ else
+ F.sub (C[i*incc], A[i*inca], B[i*incb]);
+ }
+ }
+
+
+
+ template <class Field, bool ADD>
+ void
+ fadd (const Field & F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc
+ , FieldCategories::GenericTag
+ )
+ {
+ if (inca == 1 && incb == 1 && incc == 1) {
+ for (size_t i=0; i<N; i++) {
+ if (ADD)
+ F.add (C[i], A[i], B[i]);
+ else
+ F.sub (C[i], A[i], B[i]);
+ }
+ }
+ else {
+ for (size_t i=0; i<N; i++)
+ if (ADD)
+ F.add (C[i*incc], A[i*inca], B[i*incb]);
+ else
+ F.add (C[i*incc], A[i*inca], B[i*incb]);
+ }
+ }
+
+ template <class Field, bool ADD>
+ void
+ fadd (const Field & F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc
+ , FieldCategories::UnparametricTag
+ )
+ {
+ for (size_t i=0; i<N; i++)
+ if (ADD)
+ C[i] = A[i] + B[i];
+ else
+ C[i] = A[i] - B[i];
+ }
+
+
+
+} // details
+} // FFLAS
+
+
+#endif // __FFLASFFPACK_fscal_INL
diff --git a/fflas-ffpack/fflas/fflas_faddm.inl b/fflas-ffpack/fflas/fflas_faddm.inl
deleted file mode 100644
index 5146f76..0000000
--- a/fflas-ffpack/fflas/fflas_faddm.inl
+++ /dev/null
@@ -1,288 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* fflas/fflas_faddm.inl
- * Copyright (C) 2010 LinBox
- *
- * Written by Brice Boyer <Brice.Boyer at imag.fr>
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-/*! @internal
- * @file fflas/fflas_faddm.inl
- * @ingroup fflas
- * @brief NO DOC
- */
-
-#ifndef __FFLASFFPACK_fflas_faddm_H
-#define __FFLASFFPACK_fflas_faddm_H
-
-#ifdef __FFLASFFPACK_HAVE_SSE2
-#include <emmintrin.h>
-#endif
-
-namespace FFLAS {
-
- template<class Field>
- inline void faddm(const Field & F,
- const FFLAS_TRANSPOSE transA,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
- {
- if (!M || !N) return ;
- if (transA == FflasNoTrans)
- Protected::faddmNoTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb);
- else
- Protected::faddmTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb);
- return ;
- }
-
-
- template<class Field>
- inline void faddm(const Field & F,
- const FFLAS_TRANSPOSE transA,
- const FFLAS_TRANSPOSE transB,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- typename Field::Element * C, const size_t ldc )
- {
- if (!M || !N) return ;
- if (transA == FflasNoTrans)
- if (transB == FflasNoTrans)
- Protected::faddmNoTransNoTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb,C,ldc);
- else
- Protected::faddmNoTransTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb,C,ldc);
- else
- if (transB == FflasNoTrans)
- Protected::faddmTransNoTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb,C,ldc);
- else
- Protected::faddmTransTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb,C,ldc);
-
- return ;
- }
-
-
- template<class Field>
- inline void fsubm(const Field & F,
- const FFLAS_TRANSPOSE transA,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
-
- {
- if (!M || !N) return ;
- if (transA == FflasNoTrans)
- Protected::fsubmNoTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb);
- else
- Protected::fsubmTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb);
- return ;
- }
-
-
- template<class Field>
- inline void fsubm(const Field & F,
- const FFLAS_TRANSPOSE transA,
- const FFLAS_TRANSPOSE transB,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- typename Field::Element * C, const size_t ldc )
- {
- if (!M || !N) return ;
- if (transA == FflasNoTrans)
- if (transB == FflasNoTrans)
- Protected::fsubmNoTransNoTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb,C,ldc);
- else
- Protected::fsubmNoTransTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb,C,ldc);
- else
- if (transB == FflasNoTrans)
- Protected::fsubmTransNoTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb,C,ldc);
- else
- Protected::fsubmTransTrans<typename Field::Element>()(F,M,N,A,lda,B,ldb,C,ldc);
-
- return ;
- }
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
- namespace Protected {
-
-#undef __FFLAS__FLOAT
-#undef __FFLAS__DOUBLE
-#undef __FFLAS__GENERIC
-#undef __FFLAS__NOTRANSPOSE
-#undef __FFLAS__ATRANSPOSE
-#undef __FFLAS__ANOTRANSPOSE
-#undef __FFLAS__BTRANSPOSE
-#undef __FFLAS__BNOTRANSPOSE
-#undef __FFLAS__TRANSPOSE
-#undef __FFLAS__NOTRANSPOSE
-
-#define __FFLAS__GENERIC
-#define __FFLAS__NOTRANSPOSE // no transpose
-#include "fflas_faddmin_src.inl"
-#undef __FFLAS__NOTRANSPOSE
-#undef __FFLAS__GENERIC
-
-
-#define __FFLAS__GENERIC
-#define __FFLAS__TRANSPOSE // no transpose
-#include "fflas_faddmin_src.inl"
-#undef __FFLAS__TRANSPOSE
-#undef __FFLAS__GENERIC
-
-#define __FFLAS__FLOAT
-#define __FFLAS__NOTRANSPOSE // no transpose
-#include "fflas_faddmin_src.inl"
-#undef __FFLAS__NOTRANSPOSE
-#undef __FFLAS__FLOAT
-
-#define __FFLAS__FLOAT
-#define __FFLAS__TRANSPOSE // no transpose
-#include "fflas_faddmin_src.inl"
-#undef __FFLAS__TRANSPOSE
-#undef __FFLAS__FLOAT
-
-#define __FFLAS__DOUBLE
-#define __FFLAS__NOTRANSPOSE // no transpose
-#include "fflas_faddmin_src.inl"
-#undef __FFLAS__NOTRANSPOSE
-#undef __FFLAS__DOUBLE
-
-#define __FFLAS__DOUBLE
-#define __FFLAS__TRANSPOSE // no transpose
-#include "fflas_faddmin_src.inl"
-#undef __FFLAS__TRANSPOSE
-#undef __FFLAS__DOUBLE
-
- //
-#define __FFLAS__GENERIC
-#define __FFLAS__ANOTRANSPOSE // no A transpose
-#define __FFLAS__BNOTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BNOTRANSPOSE // no B transpose
-#undef __FFLAS__ANOTRANSPOSE // no A transpose
-#undef __FFLAS__GENERIC
-
-#define __FFLAS__GENERIC
-#define __FFLAS__ANOTRANSPOSE // no A transpose
-#define __FFLAS__BTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BTRANSPOSE // no B transpose
-#undef __FFLAS__ANOTRANSPOSE // no A transpose
-#undef __FFLAS__GENERIC
-
-
-#define __FFLAS__GENERIC
-#define __FFLAS__ATRANSPOSE // no A transpose
-#define __FFLAS__BNOTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BNOTRANSPOSE // no B transpose
-#undef __FFLAS__ATRANSPOSE // no A transpose
-#undef __FFLAS__GENERIC
-
-#define __FFLAS__GENERIC
-#define __FFLAS__ATRANSPOSE // no A transpose
-#define __FFLAS__BTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BTRANSPOSE // no B transpose
-#undef __FFLAS__ATRANSPOSE // no A transpose
-#undef __FFLAS__GENERIC
-
- //
-#define __FFLAS__FLOAT
-#define __FFLAS__ANOTRANSPOSE // no A transpose
-#define __FFLAS__BNOTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BNOTRANSPOSE // no B transpose
-#undef __FFLAS__ANOTRANSPOSE // no A transpose
-#undef __FFLAS__FLOAT
-
-#define __FFLAS__FLOAT
-#define __FFLAS__ANOTRANSPOSE // no A transpose
-#define __FFLAS__BTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BTRANSPOSE // no B transpose
-#undef __FFLAS__ANOTRANSPOSE // no A transpose
-#undef __FFLAS__FLOAT
-
-
-#define __FFLAS__FLOAT
-#define __FFLAS__ATRANSPOSE // no A transpose
-#define __FFLAS__BNOTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BNOTRANSPOSE // no B transpose
-#undef __FFLAS__ATRANSPOSE // no A transpose
-#undef __FFLAS__FLOAT
-
-#define __FFLAS__FLOAT
-#define __FFLAS__ATRANSPOSE // no A transpose
-#define __FFLAS__BTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BTRANSPOSE // no B transpose
-#undef __FFLAS__ATRANSPOSE // no A transpose
-#undef __FFLAS__FLOAT
-
- //
-#define __FFLAS__DOUBLE
-#define __FFLAS__ANOTRANSPOSE // no A transpose
-#define __FFLAS__BNOTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BNOTRANSPOSE // no B transpose
-#undef __FFLAS__ANOTRANSPOSE // no A transpose
-#undef __FFLAS__DOUBLE
-
-#define __FFLAS__DOUBLE
-#define __FFLAS__ANOTRANSPOSE // no A transpose
-#define __FFLAS__BTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BTRANSPOSE // no B transpose
-#undef __FFLAS__ANOTRANSPOSE // no A transpose
-#undef __FFLAS__DOUBLE
-
-
-#define __FFLAS__DOUBLE
-#define __FFLAS__ATRANSPOSE // no A transpose
-#define __FFLAS__BNOTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BNOTRANSPOSE // no B transpose
-#undef __FFLAS__ATRANSPOSE // no A transpose
-#undef __FFLAS__DOUBLE
-
-#define __FFLAS__DOUBLE
-#define __FFLAS__ATRANSPOSE // no A transpose
-#define __FFLAS__BTRANSPOSE // no B transpose
-#include "fflas_faddm_src.inl"
-#undef __FFLAS__BTRANSPOSE // no B transpose
-#undef __FFLAS__ATRANSPOSE // no A transpose
-#undef __FFLAS__DOUBLE
-
-
-
- } // Protected
-
-#endif // SKIPPED BY DOXYGEN
-
-} // FFLAS
-
-#endif // __FFLASFFPACK_fflas_faddm_H
diff --git a/fflas-ffpack/fflas/fflas_faddm_src.inl b/fflas-ffpack/fflas/fflas_faddm_src.inl
deleted file mode 100644
index 732d6f2..0000000
--- a/fflas-ffpack/fflas/fflas_faddm_src.inl
+++ /dev/null
@@ -1,240 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* fflas/fflas_ftrmm_src.inl
- * Copyright (C) 2010 LinBox
- *
- * Written by Brice Boyer <Brice.Boyer at imag.fr>
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-#define Mjoin(pre, nam) my_join(pre, nam)
-#define my_join(pre, nam) pre ## nam
-
-#ifdef __FFLAS__BNOTRANSPOSE
-#ifdef __FFLAS__ANOTRANSPOSE
-#define __FFLAS_incA 1
-#define __FFLAS_ldA lda
-#define __FFLAS_A_Trans NoTrans
-#define __FFLAS_incB 1
-#define __FFLAS_ldB ldb
-#define __FFLAS_B_Trans NoTrans
-#endif
-#ifdef __FFLAS__ATRANSPOSE
-#define __FFLAS_incA lda
-#define __FFLAS_ldA 1
-#define __FFLAS_A_Trans Trans
-#define __FFLAS_incB 1
-#define __FFLAS_ldB ldb
-#define __FFLAS_B_Trans NoTrans
-#endif
-#else
-#ifdef __FFLAS__ANOTRANSPOSE
-#define __FFLAS_incA 1
-#define __FFLAS_ldA lda
-#define __FFLAS_A_Trans NoTrans
-#define __FFLAS_incB ldb
-#define __FFLAS_ldB 1
-#define __FFLAS_B_Trans Trans
-#endif
-#ifdef __FFLAS__ATRANSPOSE
-#define __FFLAS_incA lda
-#define __FFLAS_ldA 1
-#define __FFLAS_A_Trans Trans
-#define __FFLAS_incB ldb
-#define __FFLAS_ldB 1
-#define __FFLAS_B_Trans Trans
-#endif
-#endif
-
-
-#ifdef __FFLAS__FLOAT
-#define __FFLAS_Element float
-#endif
-#ifdef __FFLAS__DOUBLE
-#define __FFLAS_Element double
-#endif
-#ifdef __FFLAS__GENERIC
-#define __FFLAS_Element Element
-#endif
-
-#ifndef __FFLAS__GENERIC
-template<>
-class Mjoin(faddm, Mjoin(__FFLAS_A_Trans, __FFLAS_B_Trans))<__FFLAS_Element> {
-public :
- template<class Field>
- void operator() (const Field & F,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- typename Field::Element * C, const size_t ldc)
- {
- if (!M || !N ) return; // ne doit jamais arriver, déjà testé !
-
- // adding (precomputing tB ?)
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- F.add(*(C+i*ldc+j), *(A+i*__FFLAS_ldA+j*__FFLAS_incA), *(B+i*__FFLAS_ldB+j*__FFLAS_incB)) ;
-
-
- return ;
- }
-
-};
-#else
-template<class Element>
-class Mjoin(faddm, Mjoin(__FFLAS_A_Trans, __FFLAS_B_Trans)) {
-public :
- template<class Field>
- void operator() (const Field & F,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- typename Field::Element * C, const size_t ldc)
- {
- if (!M || !N ) return; // ne doit jamais arriver, déjà testé !
-
- // adding (precomputing tB ?)
- for (size_t i = 0 ; i < M ; ++i)
-#ifndef __FFLASFFPACK_HAVE_SSE2
- for (size_t j = 0 ; j < N ; ++j)
- *(C+i*ldc+j) = *(A+i*__FFLAS_ldA+j*__FFLAS_incA) + *(B+i*__FFLAS_ldB+j*__FFLAS_incB) ;
-#else
-#if defined(__FFLAS__ATRANSPOSE) || defined(__FFLAS__BTRANSPOSE)
- for (size_t j = 0 ; j < N ; ++j)
- *(C+i*ldc+j) = *(A+i*__FFLAS_ldA+j*__FFLAS_incA) + *(B+i*__FFLAS_ldB+j*__FFLAS_incB) ;
-#else
-#ifdef __FFLAS__DOUBLE
- {
- size_t j = 0 ;
- __m128d *av, *bv, *cv;
- av = (__m128d*)A+i*__FFLAS_ldA; // assume 16-byte aligned
- bv = (__m128d*)B+i*__FFLAS_ldB; // assume 16-byte aligned
- cv = (__m128d*)C+i*ldc; // assume 16-byte aligned
- for (j = 0; j < N/2; ++j)
- cv[j] = _mm_add_pd(av[j], bv[j]);
- j *= 2 ;
- for (; j < N ; j++)
- *(C+i*ldc+j) = *(A+i*lda+j) + *(B+i*ldb+j) ;
- }
-#else
- {
- size_t j = 0 ;
- __m128 *av, *bv, *cv;
- av = (__m128*)A+i*__FFLAS_ldA; // assume 16-byte aligned
- bv = (__m128*)B+i*__FFLAS_ldB; // assume 16-byte aligned
- cv = (__m128*)C+i*ldc; // assume 16-byte aligned
- for (j = 0; j < N/4; ++j)
- cv[j] = _mm_add_ps(av[j], bv[j]);
- j *= 4 ;
- for (; j < N ; j++)
- *(C+i*ldc+j) = *(A+i*lda+j) + *(B+i*ldb+j) ;
- }
-#endif
-#endif
-#endif
- // reducing :
- if (M == ldc )
- for (size_t i = 0 ; i < M*N ; ++i)
- F.init(*(C+i),*(C+i));
- else
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- F.init(*(C+i*ldc+j), *(C+i*ldc+j));
-
-
- return ;
- }
-
-};
-#endif
-
-#ifndef __FFLAS__GENERIC
-template<>
-class Mjoin(fsubm, Mjoin(__FFLAS_A_Trans, __FFLAS_B_Trans))<__FFLAS_Element > {
-public :
- template<class Field>
- void operator() (const Field & F,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- typename Field::Element * C, const size_t ldc)
- {
- if (!M || !N ) return; // ne doit jamais arriver, déjà testé !
-
- // adding (precomputing tB ?)
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- *(C+i*ldc+j) = *(A+i*__FFLAS_ldA+j*__FFLAS_incA) - *(B+i*__FFLAS_ldB+j*__FFLAS_incB) ;
-
- // reducing :
- if (M == ldc )
- for (size_t i = 0 ; i < M*N ; ++i)
- F.init(*(C+i),*(C+i));
- else
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- F.init(*(C+i*ldc+j), *(C+i*ldc+j));
-
- return ;
-
- }
-
-};
-#else
-template<class Element>
-class Mjoin(fsubm,Mjoin(__FFLAS_A_Trans,__FFLAS_B_Trans)) {
-public :
- template<class Field>
- void operator() (const Field & F,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
- typename Field::Element * C, const size_t ldc)
- {
- if (!M || !N ) return; // ne doit jamais arriver, déjà testé !
-
- // adding (precomputing tB ?)
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- F.sub(*(C+i*ldc+j), *(A+i*__FFLAS_ldA+j*__FFLAS_incA), *(B+i*__FFLAS_ldB+j*__FFLAS_incB)) ;
- return ;
-
- }
-
-};
-#endif
-
-
-#undef Mjoin
-#undef my_join
-#undef __FFLAS_incB
-#undef __FFLAS_incA
-#undef __FFLAS_Element
-#undef __FFLAS_ldA
-#undef __FFLAS_ldB
-#undef __FFLAS_Trans
-#undef __FFLAS_A_Trans
-#undef __FFLAS_B_Trans
-#undef FFLAS_A_inc
-#undef FFLAS_A_ld
-
-
diff --git a/fflas-ffpack/fflas/fflas_faddmin_src.inl b/fflas-ffpack/fflas/fflas_faddmin_src.inl
deleted file mode 100644
index 6575c1c..0000000
--- a/fflas-ffpack/fflas/fflas_faddmin_src.inl
+++ /dev/null
@@ -1,211 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* fflas/fflas_ftrmm_src.inl
- * Copyright (C) 2010 LinBox
- *
- * Written by Brice Boyer <Brice.Boyer at imag.fr>
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-/** @internal
- * @file fflas/fflas_faddmin_src.inl
- * @ingroup fflas
- * @brief NO DOC
- */
-
-#define Mjoin(pre, nam) my_join(pre, nam)
-#define my_join(pre, nam) pre ## nam
-
-#ifdef __FFLAS__NOTRANSPOSE
-#define __FFLAS_A_inc 1
-#define __FFLAS_A_ld lda
-#define __FFLAS_Trans NoTrans
-#else
-#define __FFLAS_A_inc M
-#define __FFLAS_A_ld 1
-#define __FFLAS_Trans Trans
-#endif
-
-
-#ifdef __FFLAS__FLOAT
-#define __FFLAS_Element float
-#endif
-#ifdef __FFLAS__DOUBLE
-#define __FFLAS_Element double
-#endif
-#ifdef __FFLAS__GENERIC
-#define __FFLAS_Element Element
-#endif
-
-#ifndef __FFLAS__GENERIC
-template<>
-class Mjoin(faddm, __FFLAS_Trans)<__FFLAS_Element> {
-public :
- template<class Field>
- void operator() (const Field & F,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
- {
- if (!M || !N ) return; // ne doit jamais arriver, déjà testé !
-
- // adding (precomputing tB ?)
- for (size_t i = 0 ; i < M ; ++i)
-#ifndef __FFLASFFPACK_HAVE_SSE2
- for (size_t j = 0 ; j < N ; ++j)
- *(B+i*ldb+j) += *(A+i*__FFLAS_A_ld+j*__FFLAS_A_inc) ;
-#else
-#if defined(__FFLAS__TRANSPOSE)
- for (size_t j = 0 ; j < N ; ++j)
- *(B+i*ldb+j) += *(A+i*__FFLAS_A_ld+j*__FFLAS_A_inc) ;
-#else
-#ifdef __FFLAS__DOUBLE
- {
- size_t j = 0 ;
- __m128d *av, *bv;
- av = (__m128d*)A+i*lda; // assume 16-byte aligned
- bv = (__m128d*)B+i*ldb; // assume 16-byte aligned
- for (j = 0; j < N/2; ++j)
- _mm_add_pd(bv[j], av[j]);
- j *= 2 ;
- for (; j<N ; ++j)
- *(B+i*ldb+j) += *(A+i*lda+j) ;
- }
-#else
- {
- size_t j = 0 ;
- __m128 *av, *bv;
- av = (__m128*)A+i*lda; // assume 16-byte aligned
- bv = (__m128*)B+i*ldb; // assume 16-byte aligned
- for (j = 0; j < N/4; ++j)
- _mm_add_ps(bv[j], av[j]);
- j *= 4 ;
- for (; j < N ; j++)
- *(B+i*ldb+j) += *(A+i*lda+j) ;
- }
-#endif
-#endif
-#endif
-
- // reducing :
- if (M == ldb )
- for (size_t i = 0 ; i < M*N ; ++i)
- F.init(*(B+i),*(B+i));
- else
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- F.init(*(B+i*ldb+j), *(B+i*ldb+j));
-
- return ;
- }
-
-};
-#else
-template<class Element>
-class Mjoin(faddm, __FFLAS_Trans) {
-public :
- template<class Field>
- void operator() (const Field & F,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
- {
- if (!M || !N ) return; // ne doit jamais arriver, déjà testé !
-
- // adding (precomputing tB ?)
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- F.addin(*(B+i*ldb+j), *(A+i*__FFLAS_A_ld+j*__FFLAS_A_inc)) ;
-
- return ;
- }
-
-};
-#endif
-
-#ifndef __FFLAS__GENERIC
-template<>
-class Mjoin(fsubm, __FFLAS_Trans)<__FFLAS_Element > {
-public :
- template<class Field>
- void operator() (const Field & F,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
- {
- if (!M || !N ) return; // ne doit jamais arriver, déjà testé !
-
- // adding (precomputing tB ?)
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- *(B+i*ldb+j) -= *(A+i*__FFLAS_A_ld+j*__FFLAS_A_inc) ;
-
- // reducing :
- if (M == ldb )
- for (size_t i = 0 ; i < M*N ; ++i)
- F.init(*(B+i),*(B+i));
- else
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- F.init(*(B+i*ldb+j), *(B+i*ldb+j));
- return ;
- }
-
-};
-#else
-template<class Element>
-class Mjoin(fsubm,__FFLAS_Trans) {
-public :
- template<class Field>
- void operator() (const Field & F,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
- {
- if (!M || !N ) return; // ne doit jamais arriver, déjà testé !
-
- // adding (precomputing tB ?)
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < N ; ++j)
- F.subin(*(B+i*ldb+j), *(A+i*__FFLAS_A_ld+j*__FFLAS_A_inc)) ;
-
- return ;
- }
-
-};
-#endif
-
-
-#undef Mjoin
-#undef my_join
-#undef __FFLAS_incB
-#undef __FFLAS_incA
-#undef __FFLAS_Element
-#undef __FFLAS_ldA
-#undef __FFLAS_ldB
-#undef __FFLAS_Trans
-#undef __FFLAS_A_Trans
-#undef __FFLAS_B_Trans
-#undef __FFLAS_A_inc
-#undef __FFLAS_A_ld
-
-
diff --git a/fflas-ffpack/fflas-ffpack.h b/fflas-ffpack/fflas/fflas_fassign.h
similarity index 71%
copy from fflas-ffpack/fflas-ffpack.h
copy to fflas-ffpack/fflas/fflas_fassign.h
index ceeb9c0..8bdefff 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/fflas-ffpack/fflas/fflas_fassign.h
@@ -1,7 +1,11 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+
+/* fflas/fflas_fassign.inl
+ * Copyright (C) 2014 FFLAS FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -20,20 +24,14 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* ========LICENCE========
- *
- */
-
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
+ *.
*/
+#ifndef __FFLASFFPACK_fassign_H
+#define __FFLASFFPACK_fassign_H
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
+//! @todo field traits here too
+#include "fflas_fassign.inl"
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
+#endif // __FFLASFFPACK_fassign_H
-#endif // __FFLASFFPACK_fflas_ffpack_H
diff --git a/fflas-ffpack/fflas/fflas_fassign.inl b/fflas-ffpack/fflas/fflas_fassign.inl
new file mode 100644
index 0000000..d18665f
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fassign.inl
@@ -0,0 +1,171 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_fassign.inl
+ * Copyright (C) 2007 Clement Pernet
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fassign_INL
+#define __FFLASFFPACK_fassign_INL
+
+#include <string.h>
+#include <givaro/modular.h>
+#include <givaro/modular-balanced.h>
+#include <givaro/zring.h>
+
+#include "fflas-ffpack/utils/debug.h"
+
+namespace FFLAS {
+
+
+ /***************************/
+ /* LEVEL 1 */
+ /***************************/
+
+
+ template<class Field>
+ inline void
+ fassign (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr Y, const size_t incY,
+ typename Field::Element_ptr X, const size_t incX)
+ {
+ typename Field::Element_ptr Xi = X;
+ typename Field::ConstElement_ptr Yi=Y;
+
+ if (incX == 1 && incY == 1) {
+ for (; Xi < X+N; ++Xi, ++Yi)
+ F.assign(*Xi,*Yi);
+
+ }
+ else {
+ for (; Xi < X+N*incX; Xi+=incX, Yi+=incY )
+ F.assign(*Xi,*Yi);
+ }
+ return;
+ }
+
+ template<>
+ inline void
+ fassign (const Givaro::Modular<float>& F, const size_t N,
+ const float * Y, const size_t incY,
+ float * X, const size_t incX)
+ {
+
+ cblas_scopy((int)N,Y,(int)incY,X,(int)incX);
+
+ return;
+ }
+
+ template<>
+ inline void
+ fassign (const Givaro::ModularBalanced<float>& F, const size_t N,
+ const float * Y, const size_t incY,
+ float * X, const size_t incX)
+ {
+
+ cblas_scopy((int)N,Y,(int)incY,X,(int)incX);
+
+ return;
+ }
+
+ template<>
+ inline void
+ fassign (const Givaro::ZRing<float>& F, const size_t N,
+ const float * Y, const size_t incY,
+ float * X, const size_t incX)
+ {
+
+ cblas_scopy((int)N,Y,(int)incY,X,(int)incX);
+
+ return;
+ }
+
+ template<>
+ inline void
+ fassign (const Givaro::Modular<double>& F, const size_t N,
+ const double * Y, const size_t incY,
+ double * X, const size_t incX)
+ {
+
+ cblas_dcopy((int)N,Y,(int)incY,X,(int)incX);
+
+ return;
+ }
+
+ template<>
+ inline void
+ fassign (const Givaro::ModularBalanced<double>& F, const size_t N,
+ const double * Y, const size_t incY,
+ double * X, const size_t incX)
+ {
+
+ cblas_dcopy((int)N,Y,(int)incY,X,(int)incX);
+
+ return;
+ }
+
+ template<>
+ inline void
+ fassign (const Givaro::ZRing<double>& F, const size_t N,
+ const double * Y, const size_t incY ,
+ double * X, const size_t incX)
+ {
+
+ cblas_dcopy((int)N,Y,(int)incY,X,(int)incX);
+
+ return;
+ }
+
+
+ /***************************/
+ /* LEVEL 2 */
+ /***************************/
+
+
+ template<class Field>
+ void fassign (const Field& F, const size_t m, const size_t n,
+ typename Field::ConstElement_ptr B, const size_t ldb ,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ FFLASFFPACK_check(n<=std::min(lda,ldb));
+ // if possible, copy one big block
+ if (lda == n && ldb == n) {
+ fassign(F,m*n,B,1,A,1);
+ return ;
+ }
+ // else, copy row after row
+ for (size_t i = 0 ; i < m ; ++i) {
+ fassign(F,n,B+i*ldb,1,A+i*lda,1);
+ }
+ return;
+
+ }
+
+
+} // FFLAS
+
+
+#endif // __FFLASFFPACK_fassign_INL
diff --git a/fflas-ffpack/fflas/fflas_faxpy.inl b/fflas-ffpack/fflas/fflas_faxpy.inl
index 2c171ae..c00c008 100644
--- a/fflas-ffpack/fflas/fflas_faxpy.inl
+++ b/fflas-ffpack/fflas/fflas_faxpy.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -38,27 +38,77 @@ template<class Field>
inline void
faxpy( const Field& F, const size_t N,
const typename Field::Element a,
- const typename Field::Element * X, const size_t incX,
- typename Field::Element * Y, const size_t incY )
+ typename Field::ConstElement_ptr X, const size_t incX,
+ typename Field::Element_ptr Y, const size_t incY )
{
- const typename Field::Element * Xi = X;
- typename Field::Element * Yi=Y;
+ if (F.isZero(a))
+ return ;
+
+ if (F.isOne(a))
+ return faddin(F,N,X,incX,Y,incY);
+ //return fassign(F,N,X,incX,Y,incY);
+
+ if (F.isMOne(a))
+ return fsubin(F,N,X,incX,Y,incY);
+ //return fneg(F,N,X,incX,Y,incY);
+
+ typename Field::ConstElement_ptr Xi = X;
+ typename Field::Element_ptr Yi=Y;
for (; Xi < X+N*incX; Xi+=incX, Yi+=incY )
F.axpyin( *Yi, a, *Xi );
}
template<>
inline void
-faxpy( const DoubleDomain& , const size_t N,
- const DoubleDomain::Element a,
- const DoubleDomain::Element * x, const size_t incx,
- DoubleDomain::Element * y, const size_t incy )
+faxpy( const Givaro::DoubleDomain& , const size_t N,
+ const Givaro::DoubleDomain::Element a,
+ Givaro::DoubleDomain::ConstElement_ptr x, const size_t incx,
+ Givaro::DoubleDomain::Element_ptr y, const size_t incy )
{
cblas_daxpy( (int)N, a, x, (int)incx, y, (int)incy);
}
+template<>
+inline void
+faxpy( const Givaro::FloatDomain& , const size_t N,
+ const Givaro::FloatDomain::Element a,
+ Givaro::FloatDomain::ConstElement_ptr x, const size_t incx,
+ Givaro::FloatDomain::Element_ptr y, const size_t incy )
+{
+
+ cblas_saxpy( (int)N, a, x, (int)incx, y, (int)incy);
+}
+
+template<class Field>
+inline void
+faxpy( const Field& F, const size_t m, const size_t n,
+ const typename Field::Element a,
+ typename Field::ConstElement_ptr X, const size_t ldX,
+ typename Field::Element_ptr Y, const size_t ldY )
+{
+
+ if (F.isZero(a))
+ return ;
+
+ if (F.isOne(a))
+ return faddin(F,m,n,X,ldX,Y,ldY);
+ //return fassign(F,m,n,X,ldX,Y,ldY);
+
+ if (F.isMOne(a))
+ return fsubin(F,m,n,X,ldX,Y,ldY);
+ //return fneg(F,m,n,X,ldX,Y,ldY);
+
+ if (n == ldX && n == ldY)
+ return faxpy(F,m*n,a,X,1,Y,1);
+
+ typename Field::ConstElement_ptr Xi = X;
+ typename Field::Element_ptr Yi=Y;
+ for (; Xi < X+m*ldX; Xi+=ldX, Yi+=ldY )
+ faxpy(F,n,a,Xi,1,Yi,1);
+}
+
} // FFLAS
#endif // __FFLASFFPACK_faxpy_INL
diff --git a/fflas-ffpack/fflas/fflas_fcopy.inl b/fflas-ffpack/fflas/fflas_fcopy.inl
deleted file mode 100644
index a37000b..0000000
--- a/fflas-ffpack/fflas/fflas_fcopy.inl
+++ /dev/null
@@ -1,82 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* fflas/fflas_fcopy.inl
- * Copyright (C) 2007 Clement Pernet
- *
- * Written by Clement Pernet <Clement.Pernet at imag.fr>
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-#ifndef __FFLASFFPACK_fcopy_INL
-#define __FFLASFFPACK_fcopy_INL
-
-#include <string.h>
-
-namespace FFLAS {
-
- template<class Field>
- inline void
- fcopy (const Field& F, const size_t N,
- typename Field::Element * X, const size_t incX,
- const typename Field::Element * Y, const size_t incY )
- {
- typename Field::Element * Xi = X;
- const typename Field::Element * Yi=Y;
-
- if (incY == 1 && incY == 1) {
- // memcpy(X,Y,N*sizeof(typename Field::Element)); // much faster (hopefully)
- for (; Xi < X+N; ++Xi, ++Yi)
- F.assign(*Xi,*Yi);
-
- return;
- }
- for (; Xi < X+N*incX; Xi+=incX, Yi+=incY )
- F.assign(*Xi,*Yi);
- return;
- }
-
- template<class Field>
- void fcopy (const Field& F, const size_t m, const size_t n,
- typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb )
- {
- FFLASFFPACK_check(n<=lda);
- FFLASFFPACK_check(n<=ldb);
- // if possible, copy one big block
- if (lda == n && ldb == n) {
- fcopy(F,m*n,A,1,B,1);
- return ;
- }
- // else, copy row after row
- for (size_t i = 0 ; i < m ; ++i) {
- fcopy(F,n,A+i*lda,1,B+i*ldb,1);
- }
- return;
-
- }
-
-
-}
-
-
-#endif // __FFLASFFPACK_fcopy_INL
diff --git a/fflas-ffpack/fflas/fflas_fdot.inl b/fflas-ffpack/fflas/fflas_fdot.inl
index d7faa5e..44b6205 100644
--- a/fflas-ffpack/fflas/fflas_fdot.inl
+++ b/fflas-ffpack/fflas/fflas_fdot.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -41,29 +41,39 @@ namespace FFLAS {
template<class Field>
inline typename Field::Element
fdot( const Field& F, const size_t N,
- const typename Field::Element * x, const size_t incx,
- const typename Field::Element * y, const size_t incy )
+ typename Field::ConstElement_ptr x, const size_t incx,
+ typename Field::ConstElement_ptr y, const size_t incy )
{
typename Field::Element d;
- const typename Field::Element* xi = x;
- const typename Field::Element* yi = y;
- F.init( d, 0 );
+ typename Field::ConstElement_ptr xi = x;
+ typename Field::ConstElement_ptr yi = y;
+ F.init( d );
for ( ; xi < x+N*incx; xi+=incx, yi+=incy )
F.axpyin( d, *xi, *yi );
return d;
}
template<>
- inline DoubleDomain::Element
- fdot( const DoubleDomain& , const size_t N,
- const DoubleDomain::Element * x, const size_t incx,
- const DoubleDomain::Element * y, const size_t incy )
+ inline Givaro::DoubleDomain::Element
+ fdot( const Givaro::DoubleDomain& , const size_t N,
+ Givaro::DoubleDomain::ConstElement_ptr x, const size_t incx,
+ Givaro::DoubleDomain::ConstElement_ptr y, const size_t incy )
{
return cblas_ddot( (int)N, x, (int)incx, y, (int)incy );
}
+ template<>
+ inline Givaro::FloatDomain::Element
+ fdot( const Givaro::FloatDomain& , const size_t N,
+ Givaro::FloatDomain::ConstElement_ptr x, const size_t incx,
+ Givaro::FloatDomain::ConstElement_ptr y, const size_t incy )
+ {
+
+ return cblas_sdot( (int)N, x, (int)incx, y, (int)incy );
+ }
+
} // FFLAS
#endif // __FFLASFFPACK_fdot_INL
diff --git a/fflas-ffpack/fflas/fflas_fgemm.inl b/fflas-ffpack/fflas/fflas_fgemm.inl
index 0d135e9..83e2d15 100644
--- a/fflas-ffpack/fflas/fflas_fgemm.inl
+++ b/fflas-ffpack/fflas/fflas_fgemm.inl
@@ -1,10 +1,12 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* fflas/fflas_fgemm.inl
* Copyright (C) 2005 Clement Pernet
+ * Copyright (C) 2014 the FFLAS-FFPACK group
*
* Written by Clement Pernet < Clement.Pernet at imag.fr >
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
*
*
* ========LICENCE========
@@ -30,1754 +32,520 @@
#ifndef __FFLASFFPACK_fgemm_INL
#define __FFLASFFPACK_fgemm_INL
-#ifndef MAX
-#define MAX(a,b) (a < b)?b:a
-#endif
-#ifndef MIN
-#define MIN(a,b) (a > b)?b:a
-#endif
-
-namespace FFLAS {
-
- namespace Protected {
+#include <givaro/modular.h>
+#include <givaro/modular-balanced.h>
+#include "fflas-ffpack/utils/debug.h"
- // Note:
- // The domain is supposed to be a field since some divisions are required for efficiency purposes
- // An alternative has to be written for finite rings if necessary
+namespace FFLAS { namespace Protected{
- // Classic Multiplication over double
- // Classic multiplication over a finite field
- template < class Field >
- inline void ClassicMatmul (const Field& F,
+ template <typename FloatElement, class Field, class FieldMode>
+ inline typename Field::Element_ptr
+ fgemm_convert (const Field& F,
const FFLAS_TRANSPOSE ta,
const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n,const size_t k,
+ const size_t m, const size_t n, const size_t k,
const typename Field::Element alpha,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * B, const size_t ldb,
+ typename Field::ConstElement_ptr A,const size_t lda,
+ typename Field::ConstElement_ptr B,const size_t ldb,
const typename Field::Element beta,
- typename Field::Element* C, const size_t ldc,
- const size_t kmax, const FFLAS_BASE base)
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldMode> & H)
{
- typename Field::Element tmp;
-
- size_t k2 = MIN(k,kmax); // Size of the blocks
-
- if (k2 > 1) {
- if (base == FflasDouble){
- DoubleDomain::Element alphad, betad;
- DoubleDomain::Element * Add = new DoubleDomain::Element[m*k2];
- DoubleDomain::Element * Bdd = new DoubleDomain::Element[k2*n];
- DoubleDomain::Element * Cd = new DoubleDomain::Element[m*n];
-
- size_t nblock = k / kmax;
- size_t remblock = k % kmax;
- if (!remblock) {
- remblock = kmax ;
- --nblock;
- }
- if (F.areEqual (F.mOne, beta)) betad = -1.0;
- else F.convert (betad, beta);
-
- if (F.areEqual (F.mOne, alpha)) alphad = -1.0;
- else {
- alphad = 1.0;
- if (! F.areEqual (F.one, alpha)) {
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- FFLASFFPACK_check(!F.isZero(alpha));
- F.div (tmp, beta, alpha);
- F.convert (betad, tmp);
- }
- }
-
- size_t dlda, dldb;
- if (!F.isZero(beta))
- MatF2MatD (F, Cd, n, C, ldc, m, n);
-
- if (ta == FflasTrans) {
- dlda = m;
- MatF2MatD (F, Add, dlda, A+k2*nblock*lda, lda, remblock, m);
- } else {
- dlda = k2;
- MatF2MatD (F, Add, dlda, A+k2*nblock, lda, m, remblock);
- }
- if (tb == FflasTrans) {
- dldb = k2;
- MatF2MatD (F, Bdd, k2, B+k2*nblock, ldb, n, remblock);
- } else {
- dldb = n;
- MatF2MatD (F, Bdd, dldb, B+k2*nblock*ldb, ldb, remblock, n);
- }
-
- ClassicMatmul (DoubleDomain(), ta, tb, m, n, remblock, alphad, Add, dlda,
- Bdd, dldb, betad, Cd, n, kmax,base );
-
- MatD2MatF (F, C, ldc, Cd, n, m, n);
- MatF2MatD (F, Cd, n, C, ldc, m, n);
-
- for (size_t i = 0; i < nblock; ++i) {
- if (ta == FflasTrans) MatF2MatD (F, Add, dlda, A+k2*i*lda, lda, k2, m);
- else MatF2MatD (F, Add, dlda, A+k2*i, lda, m, k2);
-
- if (tb == FflasTrans) MatF2MatD (F, Bdd, dldb, B+k2*i, ldb, n, k2);
- else MatF2MatD (F, Bdd, dldb, B+k2*i*ldb, ldb, k2, n);
-
- ClassicMatmul (DoubleDomain(), ta, tb, m, n, k2, alphad, Add, dlda,
- Bdd, dldb, 1.0, Cd, n, kmax,base);
- MatD2MatF (F, C, ldc, Cd, n, m, n);
- MatF2MatD (F, Cd, n, C, ldc, m, n);
- }
- if ((!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha)))
- for (typename Field::Element * Ci = C; Ci < C+m*ldc; Ci += ldc)
- for (size_t j = 0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
- delete[] Add;
- delete[] Bdd;
- delete[] Cd;
- } else {
- FloatDomain::Element alphad, betad;
- FloatDomain::Element * Add = new FloatDomain::Element[m*k2];
- FloatDomain::Element * Bdd = new FloatDomain::Element[k2*n];
- FloatDomain::Element * Cd = new FloatDomain::Element[m*n];
-
- size_t nblock = k / kmax;
- size_t remblock = k % kmax;
- if (!remblock) {
- remblock = kmax;
- --nblock;
- }
- if (F.areEqual (F.mOne, beta)) betad = -1.0;
- else F.convert (betad, beta);
-
- if (F.areEqual (F.mOne, alpha)) alphad = -1.0;
- else {
- alphad = 1.0;
- if (! F.areEqual (F.one, alpha)) {
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- FFLASFFPACK_check(!F.isZero(alpha));
- F.div (tmp, beta, alpha);
- F.convert (betad, tmp);
- }
- }
-
- size_t dlda, dldb;
- if (!F.isZero(beta))
- MatF2MatFl (F, Cd, n, C, ldc, m, n);
-
- if (ta == FflasTrans) {
- dlda = m;
- MatF2MatFl (F, Add, dlda, A+k2*nblock*lda, lda, remblock, m);
- } else {
- dlda = k2;
- MatF2MatFl (F, Add, dlda, A+k2*nblock, lda, m, remblock);
- }
- if (tb == FflasTrans) {
- dldb = k2;
- MatF2MatFl (F, Bdd, k2, B+k2*nblock, ldb, n, remblock);
- } else {
- dldb = n;
- MatF2MatFl (F, Bdd, dldb, B+k2*nblock*ldb, ldb, remblock, n);
- }
-
- ClassicMatmul (FloatDomain(), ta, tb, m, n, remblock, alphad, Add, dlda,
- Bdd, dldb, betad, Cd, n, kmax,base );
- MatFl2MatF (F, C, ldc, Cd, n, m, n);
- MatF2MatFl (F, Cd, n, C, ldc, m, n);
- for (size_t i = 0; i < nblock; ++i) {
- if (ta == FflasTrans) MatF2MatFl (F, Add, dlda, A+k2*i*lda, lda, k2, m);
- else MatF2MatFl (F, Add, dlda, A+k2*i, lda, m, k2);
- if (tb == FflasTrans) MatF2MatFl (F, Bdd, dldb, B+k2*i, ldb, n, k2);
- else MatF2MatFl (F, Bdd, dldb, B+k2*i*ldb, ldb, k2, n);
-
- ClassicMatmul (FloatDomain(), ta, tb, m, n, k2, alphad, Add, dlda,
- Bdd, dldb, 1.0, Cd, n, kmax,base);
- MatFl2MatF (F, C, ldc, Cd, n, m, n);
- MatF2MatFl (F, Cd, n, C, ldc, m, n);
- }
- if ((!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))) {
- for (typename Field::Element * Ci = C; Ci < C+m*ldc; Ci += ldc)
- for (size_t j = 0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
- }
- delete[] Add;
- delete[] Bdd;
- delete[] Cd;
- }
- } else { // k2 == 1
- // Standard algorithm is performed over the Field, without conversion
- if (F.isZero (beta))
- for (size_t i = 0; i < m; ++i)
- for (size_t j = 0; j < n; ++j)
- F.assign (*(C+i*ldc+j), F.zero);
- else {
- typename Field::Element betadivalpha;
- FFLASFFPACK_check(!F.isZero(alpha));
- F.div (betadivalpha, beta, alpha);
- for (size_t i = 0; i < m; ++i)
- for (size_t j = 0; j < n; ++j)
- F.mulin (*(C+i*ldc+j), betadivalpha);
- }
- if (ta == FflasNoTrans)
- if (tb == FflasNoTrans)
- for (size_t i = 0; i < m; ++i)
- for (size_t l = 0; l < k; ++l)
- for (size_t j = 0; j < n; ++j)
- F.axpyin (*(C+i*ldc+j), *(A+i*lda+l), *(B+l*ldb+j));
- else
- for (size_t i = 0; i < m; ++i)
- for (size_t j = 0; j < n; ++j)
- for (size_t l = 0; l < k; ++l)
- F.axpyin (*(C+i*ldc+j), *(A+i*lda+l), *(B+j*ldb+l));
- else
- if (tb == FflasNoTrans)
- for (size_t i = 0; i < m; ++i)
- for (size_t l = 0; l < k; ++l)
- for (size_t j = 0; j < n; ++j)
- F.axpyin (*(C+i*ldc+j), *(A+l*lda+i), *(B+l*ldb+j));
- else
- for (size_t i = 0; i < m; ++i)
- for (size_t j = 0; j < n; ++j)
- for (size_t l = 0; l < k; ++l)
- F.axpyin (*(C+i*ldc+j), *(A+l*lda+i), *(B+j*ldb+l));
- if (! F.isOne(alpha))
- for (size_t i = 0; i < m; ++i)
- for (size_t j = 0; j < n; ++j)
- F.mulin (*(C+i*ldc+j), alpha);
- }
+ // CP: lda, ldb, ldc can be zero (if m,n or k is 0) and since this may have not
+ // been checked by the caller at this point.
+ // FFLASFFPACK_check(lda);
+ // FFLASFFPACK_check(ldb);
+ // FFLASFFPACK_check(ldc);
+
+ Givaro::ModularBalanced<FloatElement> G((FloatElement) F.characteristic());
+ FloatElement tmp,alphaf, betaf;
+ // This conversion is quite tricky, but convert and init are required
+ // in sequence e.g. for when F is a ModularBalanced field and alpha == -1
+ F.convert (tmp, beta);
+ G.init(betaf, tmp);
+ F.convert (tmp, alpha);
+ G.init(alphaf, tmp);
+
+ FloatElement* Af = FFLAS::fflas_new(G, m, k);
+ FloatElement* Bf = FFLAS::fflas_new(G, k, n);
+ FloatElement* Cf = FFLAS::fflas_new(G, m, n);
+
+ size_t ma, ka, kb, nb; //mb, na
+ if (ta == FflasTrans) { ma = k; ka = m; }
+ else { ma = m; ka = k; }
+ if (tb == FflasTrans) { kb = n; nb = k; }
+ else { kb = k; nb = n; }
+ size_t ldaf = ka, ldbf = nb, ldcf= n;
+
+ fconvert(F, ma, ka, Af, ka, A, lda);
+ freduce(G, ma, ka, Af, ka);
+ fconvert(F, kb, nb, Bf, nb, B, ldb);
+ freduce(G, kb, nb, Bf, nb);
+
+ if (!F.isZero(beta)){
+ fconvert(F, m, n, Cf, n, C, ldc);
+ freduce (G, m, n, Cf, n);
+ }
+ MMHelper<Givaro::ModularBalanced<FloatElement>,
+ MMHelperAlgo::Winograd>
+ HG(G,H.recLevel, ParSeqHelper::Sequential());
+ fgemm (G, ta, tb, m, n, k, alphaf, Af, ldaf, Bf, ldbf, betaf, Cf, ldcf, HG);
+
+ finit (F, m, n, Cf, n, C, ldc);
+
+ fflas_delete (Af);
+ fflas_delete (Bf);
+ fflas_delete (Cf);
+ return C;
}
-
- template<>
- inline void ClassicMatmul (const DoubleDomain& ,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n,const size_t k,
- const DoubleDomain::Element alpha,
- const DoubleDomain::Element * Ad, const size_t lda,
- const DoubleDomain::Element * Bd, const size_t ldb,
- const DoubleDomain::Element beta,
- DoubleDomain::Element * Cd, const size_t ldc,
- const size_t kmax, const FFLAS_BASE base)
+ }//Protected
+}//FFLAS
+
+namespace FFLAS{ namespace Protected{
+ template <class Field, class Element, class AlgoT, class ParSeqTrait>
+ inline bool NeedPreAddReduction (Element& Outmin, Element& Outmax,
+ Element& Op1min, Element& Op1max,
+ Element& Op2min, Element& Op2max,
+ MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >& WH)
{
-
- cblas_dgemm (CblasRowMajor, (CBLAS_TRANSPOSE) ta, (CBLAS_TRANSPOSE) tb,
- (int)m, (int)n, (int)k, (DoubleDomain::Element) alpha,
- Ad, (int)lda, Bd, (int)ldb, (DoubleDomain::Element) beta,Cd, (int)ldc);
+ Outmin = Op1min + Op2min;
+ Outmax = Op1max + Op2max;
+ if (WH.MaxStorableValue - Op1max < Op2max ||
+ WH.MaxStorableValue + Op1min < -Op2min){
+ // Reducing both Op1 and Op2
+ Op1min = Op2min = WH.FieldMin;
+ Op1max = Op2max = WH.FieldMax;
+ Outmin = 2*WH.FieldMin;
+ Outmax = 2*WH.FieldMax;
+ return true;
+ } else return false;
}
- template <>
- inline void ClassicMatmul (const FloatDomain& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n,const size_t k,
- const FloatDomain::Element alpha,
- const FloatDomain::Element * Ad, const size_t lda,
- const FloatDomain::Element * Bd, const size_t ldb,
- const FloatDomain::Element beta,
- FloatDomain::Element * Cd, const size_t ldc,
- const size_t kmax, const FFLAS_BASE base)
+ template <class Field, class Element, class AlgoT, class ModeT, class ParSeqTrait>
+ inline bool NeedPreAddReduction (Element& Outmin, Element& Outmax,
+ Element& Op1min, Element& Op1max,
+ Element& Op2min, Element& Op2max,
+ MMHelper<Field, AlgoT, ModeT, ParSeqTrait >& WH)
{
- cblas_sgemm (CblasRowMajor, (CBLAS_TRANSPOSE) ta, (CBLAS_TRANSPOSE) tb,
- (int)m, (int)n, (int)k, (FloatDomain::Element) alpha,
- Ad, (int)lda, Bd, (int)ldb, (FloatDomain::Element) beta,Cd, (int)ldc);
+ Outmin = WH.FieldMin;
+ Outmax = WH.FieldMax;
+ return false;
}
- template <>
- inline void ClassicMatmul (const FFPACK:: ModularBalanced<double> & F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n,const size_t k,
- const double alpha,
- const double * A, const size_t lda,
- const double * B, const size_t ldb,
- const double beta,
- double* C, const size_t ldc,
- const size_t kmax, const FFLAS_BASE base)
+ template <class Field, class Element, class AlgoT, class ParSeqTrait>
+ inline bool NeedPreSubReduction (Element& Outmin, Element& Outmax,
+ Element& Op1min, Element& Op1max,
+ Element& Op2min, Element& Op2max,
+ MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >& WH)
{
- double _alpha, _beta;
- // To ensure the initial computation with beta
- size_t k2 = MIN(k,kmax);
- size_t nblock = k / kmax;
- size_t remblock = k % kmax;
- if (!remblock) {
- remblock = kmax;
- --nblock;
- }
- if (F.areEqual (F.mOne, beta)) _beta = -1.0;
- else _beta = beta;
- if (F.areEqual (F.mOne, alpha)) _alpha = -1.0;
- else{
- _alpha = 1.0;
- if (! F.areEqual (F.one, alpha)) {
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- FFLASFFPACK_check(!F.isZero(alpha));
- F.divin (_beta, alpha);
- }
- }
- size_t shiftA, shiftB;
- if (ta == FflasTrans) shiftA = k2*lda;
- else shiftA = k2;
- if (tb == FflasTrans) shiftB = k2;
- else shiftB = k2*ldb;
-
- ClassicMatmul (DoubleDomain(), ta, tb, m, n, remblock, _alpha, A+nblock*shiftA, lda,
- B+nblock*shiftB, ldb, _beta, C, ldc, kmax,base );
- for (double * Ci = C; Ci != C+m*ldc; Ci += ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- for (size_t i = 0; i < nblock; ++i) {
- ClassicMatmul (DoubleDomain(), ta, tb, m, n, k2, _alpha, A+i*shiftA, lda,
- B+i*shiftB, ldb, F.one, C, ldc, kmax,base);
- for (double * Ci = C; Ci != C+m*ldc; Ci += ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- }
- if ((!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))) {
- for (double * Ci = C; Ci < C+m*ldc; Ci += ldc)
- for (size_t j = 0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
- }
+ Outmin = Op1min - Op2max;
+ Outmax = Op1max - Op2min;
+ if (WH.MaxStorableValue - Op1max < -Op2min ||
+ WH.MaxStorableValue - Op2max < -Op1min){
+ // Reducing both Op1 and Op2
+ Op1min = Op2min = WH.FieldMin;
+ Op1max = Op2max = WH.FieldMax;
+ Outmin = WH.FieldMin-WH.FieldMax;
+ Outmax = -Outmin;
+ return true;
+ } else return false;
}
-
- template <>
- inline void ClassicMatmul (const FFPACK:: ModularBalanced<float> & F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n,const size_t k,
- const float alpha,
- const float * A, const size_t lda,
- const float * B, const size_t ldb,
- const float beta,
- float* C, const size_t ldc,
- const size_t kmax, const FFLAS_BASE base)
+ template <class Field, class Element, class AlgoT, class ModeT, class ParSeqTrait>
+ inline bool NeedPreSubReduction (Element& Outmin, Element& Outmax,
+ Element& Op1min, Element& Op1max,
+ Element& Op2min, Element& Op2max,
+ MMHelper<Field, AlgoT, ModeT, ParSeqTrait >& WH)
{
- float _alpha, _beta;
- // To ensure the initial computation with beta
- size_t k2 = MIN(k,kmax);
- size_t nblock = k / kmax;
- size_t remblock = k % kmax;
- if (!remblock) {
- remblock = kmax;
- --nblock;
- }
- if (F.areEqual (F.mOne, beta)) _beta = -1.0;
- else _beta = beta;
- if (F.areEqual (F.mOne, alpha)) _alpha = -1.0;
- else{
- _alpha = 1.0;
- if (! F.areEqual (F.one, alpha)) {
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- FFLASFFPACK_check(!F.isZero(alpha));
- F.divin (_beta, alpha);
- }
- }
- size_t shiftA, shiftB;
- if (ta == FflasTrans) shiftA = k2*lda;
- else shiftA = k2;
- if (tb == FflasTrans) shiftB = k2;
- else shiftB = k2*ldb;
-
- ClassicMatmul (FloatDomain(), ta, tb, m, n, remblock, _alpha, A+nblock*shiftA, lda,
- B+nblock*shiftB, ldb, _beta, C, ldc, kmax,base);
- for (float * Ci = C; Ci != C+m*ldc; Ci += ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- for (size_t i = 0; i < nblock; ++i) {
- ClassicMatmul (FloatDomain(), ta, tb, m, n, k2, _alpha, A+i*shiftA, lda,
- B+i*shiftB, ldb, F.one, C, ldc, kmax,base);
- for (float * Ci = C; Ci != C+m*ldc; Ci += ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- }
- if ((!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))) {
- for (float * Ci = C; Ci < C+m*ldc; Ci += ldc)
- for (size_t j = 0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
- }
+ // Necessary? -> CP: Yes, for generic Mode of op
+ Outmin = WH.FieldMin;
+ Outmax = WH.FieldMax;
+ return false;
}
-
- template <>
- inline void ClassicMatmul (const FFPACK:: Modular<double> & F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n,const size_t k,
- const double alpha,
- const double * A, const size_t lda,
- const double * B, const size_t ldb,
- const double beta,
- double* C, const size_t ldc,
- const size_t kmax, const FFLAS_BASE base)
+//Probable bug here due to overflow of int64_t
+ template<class Field, class Element, class AlgoT, class ParSeqTrait>
+ inline bool NeedDoublePreAddReduction (Element& Outmin, Element& Outmax,
+ Element& Op1min, Element& Op1max,
+ Element& Op2min, Element& Op2max, Element beta,
+ MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >& WH)
{
- double _alpha, _beta;
- // To ensure the initial computation with beta
- size_t k2 = MIN(k,kmax);
- size_t nblock = k / kmax;
- size_t remblock = k % kmax;
- if (!remblock) {
- remblock = kmax;
- --nblock;
- }
- if (F.areEqual (F.mOne, beta))
- _beta = -1.0;
- else
- _beta = beta;
- if (F.areEqual (F.mOne, alpha))
- _alpha = -1.0;
- else {
- _alpha = 1.0;
- if (! F.areEqual (F.one, alpha)) {
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- FFLASFFPACK_check(!F.isZero(alpha));
- F.divin (_beta, alpha);
- }
- }
- size_t shiftA, shiftB;
- if (ta == FflasTrans)
- shiftA = k2*lda;
- else
- shiftA = k2;
- if (tb == FflasTrans)
- shiftB = k2;
- else
- shiftB = k2*ldb;
-
- ClassicMatmul (DoubleDomain(), ta, tb, m, n, remblock, _alpha, A+nblock*shiftA, lda,
- B+nblock*shiftB, ldb, _beta, C, ldc, kmax, base );
-
- double * Ci;
-#if 0 /* timing */
- Timer t;
- t.clear();
- t.start();
-#endif
- //#pragma omp parallel for schedule(static) private (Ci)
- //!@todo init only if remblock!=0 and _beta == 0
- for (Ci = C; Ci < C+m*ldc; Ci += ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
-#if 0
- t.stop();
- std::cerr<<"Reduction dans Classic -> "<<t.realtime()<<std::endl;
-#endif
-
-
- for (size_t i = 0; i < nblock; ++i) {
- ClassicMatmul (DoubleDomain(), ta, tb, m, n, k2, _alpha, A+i*shiftA, lda,
- B+i*shiftB, ldb, F.one, C, ldc, kmax, base);
- //#pragma omp parallel for schedule(static) private (Ci)
- for (Ci = C; Ci < C+m*ldc; Ci += ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- }
- if ((!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))) {
- for (Ci = C; Ci < C+m*ldc; Ci += ldc)
- for (size_t j = 0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
+ // Testing if P5 need to be reduced
+ Outmin = std::min(beta*Op2min,beta*Op2max);
+ Outmax = std::max(beta*Op2min,beta*Op2max);
+ if (Op1max > WH.MaxStorableValue-Outmax ||
+ -Op1min > WH.MaxStorableValue+Outmin){
+ Outmin += WH.FieldMin;
+ Outmax += WH.FieldMax;
+ return true;
+ } else{
+ Outmin += Op1min;
+ Outmax += Op1max;
+ return false;
}
}
- template <>
- inline void ClassicMatmul (const FFPACK:: Modular<float> & F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n,const size_t k,
- const float alpha,
- const float * A, const size_t lda,
- const float * B, const size_t ldb,
- const float beta,
- float* C, const size_t ldc,
- const size_t kmax, const FFLAS_BASE base)
+ template<class Field, class Element, class AlgoT, class ModeT, class ParSeqTrait>
+ inline bool NeedDoublePreAddReduction (Element& Outmin, Element& Outmax,
+ Element& Op1min, Element& Op1max,
+ Element& Op2min, Element& Op2max, Element beta,
+ MMHelper<Field, AlgoT, ModeT, ParSeqTrait>& WH)
{
- float _alpha, _beta;
- // To ensure the initial computation with beta
- size_t k2 = MIN(k,kmax);
- size_t nblock = k / kmax;
- size_t remblock = k % kmax;
- if (!remblock) {
- remblock = kmax;
- --nblock;
- }
- if (F.areEqual (F.mOne, beta)) _beta = -1.0;
- else _beta = beta;
- if (F.areEqual (F.mOne, alpha)) _alpha = -1.0;
- else{
- _alpha = 1.0;
- if (! F.areEqual (F.one, alpha)) {
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- FFLASFFPACK_check(!F.isZero(alpha));
- F.divin (_beta, alpha);
- }
- }
- size_t shiftA, shiftB;
- if (ta == FflasTrans) shiftA = k2*lda;
- else shiftA = k2;
- if (tb == FflasTrans) shiftB = k2;
- else shiftB = k2*ldb;
-
- ClassicMatmul (FloatDomain(), ta, tb, m, n, remblock, _alpha, A+nblock*shiftA, lda,
- B+nblock*shiftB, ldb, _beta, C, ldc, kmax,base);
- for (float * Ci = C; Ci != C+m*ldc; Ci += ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- for (size_t i = 0; i < nblock; ++i) {
- ClassicMatmul (FloatDomain(), ta, tb, m, n, k2, _alpha, A+i*shiftA, lda,
- B+i*shiftB, ldb, F.one, C, ldc, kmax,base);
- for (float * Ci = C; Ci != C+m*ldc; Ci += ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- }
- if ((!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))) {
- for (float * Ci = C; Ci < C+m*ldc; Ci += ldc)
- for (size_t j = 0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
- }
+ Outmin = WH.FieldMin;
+ Outmax = WH.FieldMax;
+ return false;
}
-
- // Winograd Multiplication A(n*k) * B(k*m) in C(n*m)
- // Computation of the 22 Winograd's operations
- template < class Field >
- inline void WinoCalc (const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t mr, const size_t nr, const size_t kr,
- const typename Field::Element alpha,
- const typename Field::Element* A,const size_t lda,
- const typename Field::Element* B,const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base)
- {
-
- typename Field::Element mbeta;
- F.neg(mbeta,beta);
- size_t imaxb, jmaxb, imaxa, jmaxa, ldx2, ldx3;
- size_t x3rd = MAX(mr,kr);
- const typename Field::Element* d11,*d12,*d21,*d22;
- typename Field::Element* d11c,*d12c,*d21c,*d22c,*dx1,*dx2,*dx3;
- const typename Field::Element * A11=A, *A12, *A21, *A22;
- const typename Field::Element * B11=B, *B12, *B21, *B22;
- typename Field::Element * C11=C, *C12=C+nr, *C21=C+mr*ldc, *C22=C+nr+mr*ldc;
-
-
- if (F.isZero(beta)){
- size_t x1rd = MAX(nr,kr);
- size_t ldx1;
- if (ta == FflasTrans) {
- A21 = A + mr;
- A12 = A + kr*lda;
- A22 = A12 + mr;
- imaxa = kr;
- jmaxa = mr;
- ldx1 = mr;
- } else {
- A12 = A + kr;
- A21 = A + mr*lda;
- A22 = A21 + kr;
- imaxa = mr;
- jmaxa = kr;
- ldx1 = x1rd;
- }
- if (tb == FflasTrans) {
- B21 = B + kr;
- B12 = B + nr*ldb;
- B22 = B12 + kr;
- imaxb = nr;
- jmaxb = kr;
- ldx2 = kr;
- } else {
- B12 = B + nr;
- B21 = B + kr*ldb;
- B22 = B21 + nr;
- imaxb = kr;
- ldx2 = jmaxb = nr;
- }
-
-
- // Two temporary submatrices are required
-
- typename Field::Element* X2 = new typename Field::Element[kr*nr];
-
- // T3 = B22 - B12 in X2
- d12 = B12; d22 = B22; dx2 = X2;
- for (size_t i=0; i < imaxb; ++i, d12+=ldb, d22+=ldb, dx2+=ldx2) {
- for (size_t j=0;j < jmaxb;++j)
- F.sub (*(dx2+j), *(d22 + j), *(d12 + j));
- }
-
- // S3 = A11 - A21 in X1
- typename Field::Element* X1 = new typename Field::Element[mr*x1rd]; // S3 = A11 - A21 in X1
- d11 = A11; d21 = A21; dx1 = X1;
- for (size_t i = 0; i < imaxa; ++i, d11 += lda, d21 += lda, dx1 += ldx1)
- for (size_t j = 0; j < jmaxa; ++j)
- F.sub (*(dx1+j), *(d11 + j), *(d21 + j));
-
- // P7 = alpha . S3 * T3 in C21
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X1, ldx1, X2, ldx2, F.zero, C21, ldc, kmax, w-1, base);
-
- // T1 = B12 - B11 in X2
- d11 = B11; d12 = B12; dx2 = X2;
- for (size_t i = 0; i < imaxb; ++i, d11 += ldb, d12 += ldb, dx2 += ldx2) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.sub (*(dx2 + j), *(d12 + j), *(d11 + j));
- }
-
- // S1 = A21 + A22 in X1
-
- d21 = A21; d22 = A22; dx1 = X1;
- for (size_t i = 0; i < imaxa; ++i, d21+=lda, d22+=lda, dx1+=ldx1) {
- for (size_t j=0;j < jmaxa;++j)
- F.add(*(dx1+j),* (d21 + j),*(d22 + j));
- }
-
- // P5 = alpha . S1*T1 in C22
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X1, ldx1, X2, ldx2, F.zero, C22, ldc, kmax, w-1, base);
-
- // T2 = B22 - T1 in X2
- d22 = B22; dx2 = X2;
- for (size_t i = 0; i < imaxb; ++i, d22+=ldb, dx2+=ldx2) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.sub (*(dx2+j), *(d22 + j), *(dx2+j));
- }
-
- // S2 = S1 - A11 in X1
- d11 = A11; dx1 = X1;
- for (size_t i = 0; i < imaxa; ++i, d11+=lda, dx1+=ldx1) {
- for (size_t j = 0; j < jmaxa; ++j)
- F.subin (*(dx1+j), *(d11 + j));
- }
-
- // P6 = alpha . S2 * T2 in C12
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X1, ldx1, X2, ldx2, F.zero, C12, ldc, kmax, w-1, base);
-
- // S4 = A12 -S2 in X1
- d12 = A12; dx1 = X1;
- for (size_t i = 0; i < imaxa; ++i, d12 += lda, dx1 += ldx1) {
- for (size_t j = 0; j < jmaxa; ++j)
- F.sub (*(dx1+j), *(d12 + j), *(dx1+j));
- }
-
- // P3 = alpha . S4*B22 in C11
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X1, ldx1, B22, ldb, F.zero, C11, ldc, kmax, w-1, base);
-
- // P1 = alpha . A11 * B11 in X1
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X1, nr, kmax, w-1, base);
-
-
-
- // U2 = P1 + P6 in tmpU2 and
- // U3 = P7 + U2 in tmpU3 and
- // U7 = P5 + U3 in C22 and
- // U4 = P5 + U2 in C12 and
- d12c = C12; dx1=X1; d21c = C21; d22c = C22;
- for (size_t i = 0; i < mr;
- ++i, d12c += ldc, dx1 += nr, d22c+=ldc, d21c += ldc) {
- for (size_t j=0;j < nr;++j) {
- F.addin ( *(d12c + j), *(dx1 + j)); // U2 = P1 + P6
- F.addin ( *(d21c+j), *(d12c+j)); // U3 = U2 + P7
- F.addin (*(d12c + j), *(d22c+j)); // U4 = P5 + U2 in C12
- F.addin (*(d22c + j), *(d21c+j)); // U7 = P5 + U3 in C22
- }
- }
-
- // U5 = P3 + U4 in C12
- d12c = C12; d11 = C11;
- for (size_t i = 0; i < mr; ++i, d12c += ldc, d11 += ldc)
- for (size_t j = 0; j < nr; ++j)
- F.addin (*(d12c + j), *(d11 + j));
- // T4 = T2 - B21 in X2
- d21 = B21;dx2=X2;
- for (size_t i = 0; i < imaxb; ++i, d21+=ldb, dx2+=ldx2) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.subin (*(dx2+j),* (d21 + j));
- }
-
- // P4 = alpha . A22 * T4 in C11
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A22, lda, X2, ldx2, F.zero, C11, ldc, kmax, w-1, base);
-
- delete[] X2;
- // U6 = U3 - P4 in C21
- d21c = C21; d11c = C11;
- for (size_t i = 0; i < mr; ++i, d21c += ldc, d11c += ldc)
- for (size_t j = 0; j < nr; ++j)
- F.subin (*(d21c + j), *(d11c + j));
-
- // P2 = alpha . A12 * B21 in C11
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, C11, ldc, kmax,w-1, base);
-
- // U1 = P2 + P1 in C11
- d11c = C11; dx1 = X1;
- for (size_t i = 0; i < mr; ++i, d11c += ldc, dx1 += nr)
- for (size_t j = 0; j < nr; ++j)
- F.addin (*(d11c + j), *(dx1 + j));
-
- delete[] X1;
-
- } else {
- // Three temporary submatrices are required
- typename Field::Element* X1 = new typename Field::Element[mr*nr];
- typename Field::Element* X2 = new typename Field::Element[mr*kr];
- typename Field::Element* X3 = new typename Field::Element[x3rd*nr];
-
- if (ta == FflasTrans) {
- A21 = A + mr;
- A12 = A + kr*lda;
- A22 = A12 + mr;
- imaxa = kr;
- ldx2 = jmaxa = mr;
- } else {
- A12 = A + kr;
- A21 = A + mr*lda;
- A22 = A21 + kr;
- imaxa = mr;
- ldx2 = jmaxa = kr;
- }
- if (tb == FflasTrans) {
- B21 = B + kr;
- B12 = B + nr*ldb;
- B22 = B12 + kr;
- imaxb = nr;
- jmaxb = kr;
- ldx3 = x3rd;
- } else {
- B12 = B + nr;
- B21 = B + kr*ldb;
- B22 = B21 + nr;
- imaxb = kr;
- ldx3 = jmaxb = nr;
- }
-
-#ifdef NEWWINO
-#if 0
- std::cerr<<"New Wino"<<std::endl;
- // C22 = C22 - C12
- d12c = C12;
- d22c = C22;
- for (size_t i = 0; i < mr; ++i, d12c += ldc, d22c += ldc)
- for (size_t j = 0; j < nr; ++j)
- F.subin (*(d22c + j), *(d12c + j));
-#endif
-
-
- // T1 = B12 - B11 in X3
- d11 = B11; d12 = B12; dx3 = X3;
- for (size_t i = 0; i < imaxb; ++i, d11 += ldb, d12 += ldb, dx3 += ldx3) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.sub (*(dx3 + j), *(d12 + j), *(d11 + j));
- }
-
- // S1 = A21 + A22 in X2
- d21 = A21; d22 = A22; dx2 = X2;
- for (size_t i = 0; i < imaxa; ++i, d21+=lda, d22+=lda, dx2+=ldx2) {
- for (size_t j=0;j < jmaxa;++j)
- F.add(*(dx2+j),* (d21 + j),*(d22 + j));
- }
-
- // P5 = alpha . S1*T1 + beta . C12 in C12
- //WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, X3, ldx3, beta, C12, ldc, kmax, w-1,base);
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, X3, ldx3, F.zero, X1, nr, kmax, w-1,base);
-
- // C22 = P5 + beta C22 in C22
- d22c = C22; dx1 = X1;
- for (size_t i = 0; i < mr; ++i, dx1 += nr, d22c += ldc)
- for (size_t j=0;j < nr;++j) {
- F.mulin (*(d22c + j), beta);
- F.addin (*(d22c + j), *(dx1 + j));
- }
-
- // C12 = P5 + beta C12 in C12
- dx1 = X1; d12c = C12;
- for (size_t i = 0; i < mr; ++i, d12c += ldc, dx1 += nr)
- for (size_t j=0;j < nr;++j) {
- F.mulin (*(d12c + j), beta);
- F.addin (*(d12c + j), *(dx1 + j));
- }
-
- // P1 = alpha . A11 * B11 in X1
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X1, nr, kmax, w-1,base);
-
-
- // P2 = alpha . A12 * B21 + beta . C11 in C11
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, beta, C11, ldc, kmax,w-1,base);
-
- // U1 = P2 + P1 in C11
- d11c = C11; dx1 = X1;
- for (size_t i = 0; i < mr; ++i, d11c += ldc, dx1 += nr)
- for (size_t j = 0; j < nr; ++j)
- F.addin (*(d11c + j), *(dx1 + j));
-
- // T2 = B22 - T1 in X3
- d22 = B22; dx3 = X3;
- for (size_t i = 0; i < imaxb; ++i, d22+=ldb, dx3+=ldx3) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.sub (*(dx3+j), *(d22 + j), *(dx3+j));
- }
-
- // S2 = S1 - A11 in X2
- d11 = A11; dx2 = X2;
- for (size_t i = 0; i < imaxa; ++i, d11+=lda, dx2+=ldx2) {
- for (size_t j = 0; j < jmaxa; ++j)
- F.subin (*(dx2+j), *(d11 + j));
- }
-
- // U2 = P6 + P1 = alpha . S2 * T2 + P1 in X1
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, X3, ldx3, F.one, X1, nr, kmax, w-1,base);
-
-
-
-
- // U4 = U2 + P5 in C12
- d12c = C12; dx1 = X1;
- for (size_t i = 0; i < mr; ++i, d12c += ldc, dx1 += nr)
- for (size_t j=0;j < nr;++j)
- F.addin (*(d12c + j), *(dx1 + j));
-
- // T4 = T2 - B21 in X3
- d21 = B21;dx3=X3;
- for (size_t i = 0; i < imaxb; ++i, d21+=ldb, dx3+=ldx3) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.subin (*(dx3+j),* (d21 + j));
- }
-
- // S4 = A12 -S2 in X2
- d12 = A12; dx2 = X2;
- for (size_t i = 0; i < imaxa; ++i, d12 += lda, dx2 += ldx2) {
- for (size_t j = 0; j < jmaxa; ++j)
- F.sub (*(dx2+j), *(d12 + j), *(dx2+j));
- }
-
- // P4 = alpha . A22 * T4 - beta . C21 in C21
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A22, lda, X3, ldx3, mbeta, C21, ldc, kmax, w-1,base);
-
- // U5 = P3 + U4 = alpha . S4*B22 + U4 in C12
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, B22, ldb, F.one, C12, ldc, kmax, w-1,base);
-
- // T3 = B22 - B12 in X3
- d12 = B12; d22 = B22; dx3 = X3;
- for (size_t i=0; i < imaxb; ++i, d12+=ldb, d22+=ldb, dx3+=ldx3)
- for (size_t j=0;j < jmaxb;++j)
- F.sub (*(dx3+j), *(d22 + j), *(d12 + j));
-
- // S3 = A11 - A21 in X2
- d11 = A11; d21 = A21; dx2 = X2;
- for (size_t i = 0; i < imaxa; ++i, d11 += lda, d21 += lda, dx2 += ldx2)
- for (size_t j = 0; j < jmaxa; ++j)
- F.sub (*(dx2+j), *(d11 + j), *(d21 + j));
-
- // U3 = P7 + U2 = alpha . S3 * T3 + U2 in X1
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, X3, ldx3, F.one, X1, nr, kmax, w-1,base);
-
- // U7 = U3 + C22 in C22
- d22c = C22; dx1 = X1; d12c = C12;
- for (size_t i = 0; i < mr; ++i, d22c += ldc, dx1 += nr)
- for (size_t j = 0; j < nr; ++j)
- F.addin (*(d22c + j), *(dx1 + j));
-
- // U6 = U3 - P4 in C21
- dx1 = X1; d21c = C21;
- for (size_t i = 0; i < mr; ++i, dx1 += nr, d21c += ldc)
- for (size_t j=0;j < nr;++j)
- F.sub (*(d21c + j), *(dx1 + j),* (d21c + j));
-#else
- // P2 = alpha . A12 * B21 + beta . C11 in C11
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, beta, C11, ldc, kmax,w-1,base);
-
- // T3 = B22 - B12 in X3
- d12 = B12; d22 = B22; dx3 = X3;
- for (size_t i=0; i < imaxb; ++i, d12+=ldb, d22+=ldb, dx3+=ldx3) {
- for (size_t j=0;j < jmaxb;++j)
- F.sub (*(dx3+j), *(d22 + j), *(d12 + j));
-
- }
-
- // S3 = A11 - A21 in X2
- d11 = A11; d21 = A21; dx2 = X2;
- for (size_t i = 0; i < imaxa; ++i, d11 += lda, d21 += lda, dx2 += ldx2)
- for (size_t j = 0; j < jmaxa; ++j)
- F.sub (*(dx2+j), *(d11 + j), *(d21 + j));
-
- // C22 = C22 - C12 if beta != 0
- d12c = C12;
- d22c = C22;
- for (size_t i = 0; i < mr; ++i, d12c += ldc, d22c += ldc)
- for (size_t j = 0; j < nr; ++j)
- F.subin (*(d22c + j), *(d12c + j));
-
- // C21 = C21 - C22
- d21c = C21;
- d22c = C22;
- for (size_t i = 0; i < mr; ++i, d22c += ldc, d21c += ldc)
- for (size_t j = 0; j < nr; ++j)
- F.subin (*(d21c + j), *(d22c + j));
-
- // P7 = alpha . S3 * T3 + beta . C22 in C22
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, X3, ldx3, beta, C22, ldc, kmax, w-1,base);
-
- // T1 = B12 - B11 in X3
- d11 = B11; d12 = B12; dx3 = X3;
- for (size_t i = 0; i < imaxb; ++i, d11 += ldb, d12 += ldb, dx3 += ldx3) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.sub (*(dx3 + j), *(d12 + j), *(d11 + j));
- }
-
- // S1 = A21 + A22 in X2
- d21 = A21; d22 = A22; dx2 = X2;
- for (size_t i = 0; i < imaxa; ++i, d21+=lda, d22+=lda, dx2+=ldx2) {
- for (size_t j=0;j < jmaxa;++j)
- F.add(*(dx2+j),* (d21 + j),*(d22 + j));
- }
-
- // P5 = alpha . S1*T1 + beta . C12 in C12
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, X3, ldx3, beta, C12, ldc, kmax, w-1,base);
-
- // T2 = B22 - T1 in X3
- d22 = B22; dx3 = X3;
- for (size_t i = 0; i < imaxb; ++i, d22+=ldb, dx3+=ldx3) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.sub (*(dx3+j), *(d22 + j), *(dx3+j));
- }
-
- // S2 = S1 - A11 in X2
- d11 = A11; dx2 = X2;
- for (size_t i = 0; i < imaxa; ++i, d11+=lda, dx2+=ldx2) {
- for (size_t j = 0; j < jmaxa; ++j)
- F.subin (*(dx2+j), *(d11 + j));
- }
-
- // P6 = alpha . S2 * T2 in X1
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, X3, ldx3, F.zero, X1, nr, kmax, w-1,base);
-
- // T4 = T2 - B21 in X3
- d21 = B21;dx3=X3;
- for (size_t i = 0; i < imaxb; ++i, d21+=ldb, dx3+=ldx3) {
- for (size_t j = 0; j < jmaxb; ++j)
- F.subin (*(dx3+j),* (d21 + j));
- }
-
- // S4 = A12 -S2 in X2
- d12 = A12; dx2 = X2;
- for (size_t i = 0; i < imaxa; ++i, d12 += lda, dx2 += ldx2) {
- for (size_t j = 0; j < jmaxa; ++j)
- F.sub (*(dx2+j), *(d12 + j), *(dx2+j));
- }
-
- // P4 = alpha . A22 * T4 - beta . C21 in C21
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A22, lda, X3, ldx3, mbeta, C21, ldc, kmax, w-1,base);
-
- // P1 = alpha . A11 * B11 in X3
- WinoMain (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X3, nr, kmax, w-1,base);
-
- // U1 = P2 + P1 in C11
- d11c = C11; dx3 = X3;
- for (size_t i = 0; i < mr; ++i, d11c += ldc, dx3 += nr)
- for (size_t j = 0; j < nr; ++j)
- F.addin (*(d11c + j), *(dx3 + j));
-
- // U2 = P1 + P6 in tmpU2 and
- // U3 = P7 + U2 in tmpU3 and
- // U7 = P5 + U3 in C22 and
- // U4 = P5 + U2 in C12 and
- // U6 = U3 - P4 in C21 and
- typename Field::Element tmpU2, tmpU3;
- d12c = C12; dx1=X1; dx3=X3; d21c = C21; d22c = C22;
- for (size_t i = 0; i < mr;
- ++i, d12c += ldc, dx1 += nr, dx3 += nr, d22c+=ldc, d21c += ldc) {
- for (size_t j=0;j < nr;++j) {
- F.add (tmpU2, *(dx3 + j), *(dx1 + j)); // temporary U2 = P1 + P6
- F.add (tmpU3, tmpU2, *(d22c + j)); // temporary U3 = U2 + P7
- F.add (*(d22c + j), *(d12c + j), tmpU3); // U7 = P5 + U3 in C22
- F.addin (*(d12c + j), tmpU2); // U4 = P5 + U2 in C12
- F.sub (*(d21c + j), tmpU3, *(d21c + j)); // U6 = U3 - P4 in C21
- }
- }
- // P3 = alpha . S4*B22 in X1
- WinoMain (F, ta, tb, mr, nr, kr, alpha, X2, ldx2, B22, ldb, F.one, C12, ldc, kmax, w-1,base);
-
- // U5 = P3 + U4 in C12
-#if 0
- // d12c = C12; dx1 = X1;
- // for (size_t i = 0; i < mr; ++i, d12c += ldc, dx1 += nr)
- // for (size_t j = 0; j < nr; ++j)
- // F.addin (*(d12c + j), *(dx1 + j));
-#endif
-#endif
- delete[] X1;
- delete[] X2;
- delete[] X3;
- }
- }
-
-
- // Control the switch with classic multiplication
- // Fix-up for odd-sized matrices using dynamic pealing
- // for matrices over double
- template <>
- inline void WinoMain (const DoubleDomain& D,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const DoubleDomain::Element alpha,
- const DoubleDomain::Element * A, const size_t lda,
- const DoubleDomain::Element * B, const size_t ldb,
- const DoubleDomain::Element beta,
- DoubleDomain::Element * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base)
- {
-
- if (w <= 0)
- ClassicMatmul (D, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, kmax,base);
- else{
- WinoCalc (D, ta, tb, m/2, n/2, k/2, alpha, A, lda, B, ldb, beta, C, ldc, kmax, w,base);
- DynamicPealing (D, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, kmax);
- }
- }
-
- template <>
- inline void WinoMain (const FloatDomain& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const FloatDomain::Element alpha,
- const FloatDomain::Element * A, const size_t lda,
- const FloatDomain::Element * B, const size_t ldb,
- const FloatDomain::Element beta,
- FloatDomain::Element * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base)
- {
-
- if (w <= 0) {
- ClassicMatmul (F, ta, tb, m, n, k, alpha, A, lda, B, ldb,
- beta, C, ldc, kmax,base);
- }
- else{
- WinoCalc (F, ta, tb, m/2, n/2, k/2, alpha, A, lda, B, ldb,
- beta, C, ldc, kmax, w,base);
- DynamicPealing (F, ta, tb, m, n, k, alpha, A, lda, B, ldb,
- beta, C, ldc, kmax);
- }
- }
-
- template <class Field>
- inline void WinoMain (const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const typename Field::Element alpha,
- const typename Field::Element* A,const size_t lda,
- const typename Field::Element* B,const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element * C, const size_t ldc,
- const size_t kmax, const size_t w,
- const FFLAS_BASE base)
- {
-
-
- if (w <= 0) // Winograd - > Classic
- ClassicMatmul (F, ta, tb, m, n, k, alpha, A, lda, B, ldb,
- beta, C, ldc, kmax,base);
- else {
- if (k <= kmax) { // switch on floating point
- if (base == FflasDouble){
- DoubleDomain::Element alphad, betad;
- typename Field::Element _betabis;
-
- if (F.areEqual (F.mOne, alpha)) {
- alphad = -1.0;
- F.convert (betad, beta);
- } else {
- if (! F.areEqual (F.one, alpha)) {
- // Compute C = A*B + beta/alpha.C
- // and after C *= alpha
- FFLASFFPACK_check(!F.isZero(alpha));
- F.div (_betabis, beta, alpha);
- F.convert (betad, _betabis);
- }
- else
- F.convert (betad, beta);
- alphad = 1.0;
- }
- DoubleDomain::Element * Ad = new DoubleDomain::Element[m*k];
- DoubleDomain::Element * Bd = new DoubleDomain::Element[k*n];
- DoubleDomain::Element * Cd = new DoubleDomain::Element[m*n];
- // Conversion GFq = > double
- size_t ma, ka, kb, nb; //mb, na
- if (ta == FflasTrans) { ma = k; ka = m; }
- else { ma = m; ka = k; }
- if (tb == FflasTrans) { kb = n; nb = k; }
- else { kb = k; nb = n; }
-
- MatF2MatD (F, Ad, ka, A, lda, ma, ka);
- MatF2MatD (F, Bd, nb, B, ldb, kb, nb);
- if (!F.isZero(beta))
- MatF2MatD (F, Cd, n, C, ldc, m, n);
- // recursive call
- WinoMain (DoubleDomain(), ta, tb, m, n, k, alphad,
- Ad, ka, Bd, nb, betad, Cd, n, kmax, w,base);
- // Conversion double = > GFq
- MatD2MatF (F, C, ldc, Cd, n, m, n);
-
- if (!F.areEqual (F.one, alpha) &&
- !F.areEqual (F.mOne, alpha)) {
- // Fix-up: compute C *= alpha
- for (typename Field::Element* Ci = C;
- Ci < C + m*ldc; Ci+=ldc)
- for (size_t j=0; j < n; ++j)
- F.mulin (*(Ci + j), alpha);
- }
- // Temporary double matrices destruction
- delete[] Ad;
- delete[] Bd;
- delete[] Cd;
- } else {
- FloatDomain::Element alphad, betad;
- typename Field::Element _betabis;
-
- if (F.areEqual (F.mOne, alpha)) {
- alphad = -1.0;
- F.convert (betad, beta);
- } else {
- if (! F.areEqual (F.one, alpha)) {
- // Compute C = A*B + beta/alpha.C
- // and after C *= alpha
- FFLASFFPACK_check(!F.isZero(alpha));
- F.div (_betabis, beta, alpha);
- F.convert (betad, _betabis);
- }
- else
- F.convert (betad, beta);
- alphad = 1.0;
- }
- FloatDomain::Element * Ad = new FloatDomain::Element[m*k];
- FloatDomain::Element * Bd = new FloatDomain::Element[k*n];
- FloatDomain::Element * Cd = new FloatDomain::Element[m*n];
- // Conversion GFq = > double
- size_t ma, ka, kb, nb; //mb, na
- if (ta == FflasTrans) { ma = k; ka = m; }
- else { ma = m; ka = k; }
- if (tb == FflasTrans) { kb = n; nb = k; }
- else { kb = k; nb = n; }
-
- MatF2MatFl (F, Ad, ka, A, lda, ma, ka);
- MatF2MatFl (F, Bd, nb, B, ldb, kb, nb);
- if (!F.isZero(beta))
- MatF2MatFl (F, Cd, n, C, ldc, m, n);
- // recursive call
- WinoMain (FloatDomain(), ta, tb, m, n, k, alphad,
- Ad, ka, Bd, nb, betad, Cd, n, kmax, w,base);
- // Conversion double = > GFq
- MatFl2MatF (F, C, ldc, Cd, n, m, n);
-
- if (!F.areEqual (F.one, alpha) &&
- !F.areEqual (F.mOne, alpha)) {
- // Fix-up: compute C *= alpha
- for (typename Field::Element* Ci=C;
- Ci < C+m*ldc; Ci+=ldc)
- for (size_t j=0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
- }
- // Temporary double matrices destruction
- delete[] Ad;
- delete[] Bd;
- delete[] Cd;
- }
- } else{
- WinoCalc (F, ta, tb, m/2, n/2, k/2, alpha, A, lda, B, ldb,
- beta, C, ldc, kmax,w,base);
- DynamicPealing (F, ta, tb, m, n, k, alpha, A, lda, B, ldb,
- beta, C, ldc, kmax);
- }
- }
- }
-
- template <>
- inline void WinoMain (const FFPACK:: ModularBalanced<double>& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const double alpha,
- const double* A, const size_t lda,
- const double* B, const size_t ldb,
- const double beta,
- double * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base)
+ template <class Field, class AlgoT, class ParSeqTrait>
+ inline void ScalAndReduce (const Field& F, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::Element_ptr X, const size_t incX,
+ const MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >& H)
{
- if (w <= 0)
- ClassicMatmul (F, ta, tb, m, n, k, alpha, A, lda, B, ldb,
- beta, C, ldc, kmax,base);
- else {
- if (k <= kmax) { // switch on delayed modulus
- DoubleDomain::Element _alpha, _beta;
- _beta = beta;
- if (F.areEqual (-1.0, alpha)) _alpha = -1.0;
- else{
- // Compute C = A*B + beta/alpha.C
- // and then C *= alpha
- if (! F.areEqual (1.0, alpha)) {
- FFLASFFPACK_check(!F.isZero(alpha));
- F.divin (_beta, alpha);
- }
- _alpha = 1.0;
- }
-
-#if 0 /* BB : useless */
- size_t ma, ka, kb, nb; //mb, na;
- if (ta == FflasTrans) { ma = k; ka = m; }
- else { ma = m; ka = k; }
- if (tb == FflasTrans) { kb = n; nb = k; }
- else { kb = k; nb = n; }
-#endif
- // recursive call
- WinoMain (DoubleDomain(), ta, tb, m, n, k, _alpha,
- A, lda, B, ldb, _beta, C, ldc, kmax, w,base);
- // Modular reduction
- for (double* Ci = C; Ci < C+m*ldc; Ci+=ldc)
- for (size_t j = 0; j < n; ++j)
- F.init (*(Ci + j), *(Ci + j));
-
- if (!F.areEqual (1.0, alpha) &&
- !F.areEqual (-1.0, alpha))
- // Fix-up: compute C *= alpha
- for (double* Ci=C; Ci < C+m*ldc; Ci+=ldc)
- for (size_t j=0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
+ if (!F.isOne(alpha) && !F.isMOne(alpha)){
+ typename MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >::DFElt al;
+ F.convert(al, alpha);
+ if (al < 0) al = -al;
+ if (std::max(-H.Outmin, H.Outmax) > H.MaxStorableValue/al){
+ freduce (F, N, X, incX);
+ fscalin (F, N, alpha, X, incX);
} else {
- WinoCalc (F, ta, tb, m/2, n/2, k/2, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax,w,base);
- DynamicPealing (F, ta, tb, m, n, k, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax);
- }
- }
- }
-
-
- template <>
- inline void WinoMain (const FFPACK:: ModularBalanced<float>& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const float alpha,
- const float* A, const size_t lda,
- const float* B, const size_t ldb,
- const float beta,
- float * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base)
- {
- if (w <= 0)
- ClassicMatmul (F, ta, tb, m, n, k, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax,base);
- else {
- if (k <= kmax) { // switch on float
- // Temporary float matrices
- FloatDomain::Element _alpha, _beta;
- _beta = beta;
- if (F.areEqual (-1.0, alpha)) _alpha = -1.0;
- else {
- // Compute C = A*B + beta/alpha.C
- // and then C *= alpha
- if (! F.areEqual (1.0, alpha)) {
- FFLASFFPACK_check(!F.isZero(alpha));
- F.divin (_beta, alpha);
- }
- _alpha = 1.0;
- }
-#if 0 /* BB : inutile */
- size_t ma, ka, kb, nb; //mb, na;
- if (ta == FflasTrans) { ma = k; ka = m; }
- else { ma = m; ka = k; }
- if (tb == FflasTrans) { kb = n; nb = k; }
- else { kb = k; nb = n; }
-#endif
- // recursive call
- WinoMain (FloatDomain(), ta, tb, m, n, k, _alpha,
- A, lda, B, ldb, _beta, C, ldc, kmax, w,base);
- // Conversion float = > GFq
- for (float * Ci = C; Ci != C+m*ldc; Ci+=ldc)
- for (size_t j = 0; j < n; ++j)
- F.init (*(Ci + j), *(Ci + j));
-
- if (!F.areEqual (1.0, alpha) &&
- !F.areEqual (-1.0, alpha))
- // Fix-up: compute C *= alpha
- for (float* Ci=C; Ci < C+m*ldc; Ci+=ldc)
- for (size_t j=0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
- } else{
- WinoCalc (F, ta, tb, m/2, n/2, k/2, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax,w,base);
- DynamicPealing (F, ta, tb, m, n, k, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax);
+ fscalin (H.delayedField, N, alpha, X, incX);
+ freduce (F, N, X, incX);
}
- }
+ } else
+ freduce (F, N, X, incX);
}
- template <>
- inline void WinoMain (const FFPACK:: Modular<double>& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const double alpha,
- const double* A, const size_t lda,
- const double* B, const size_t ldb,
- const double beta,
- double * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base)
+ template <class Field, class AlgoT, class ParSeqTrait>
+ inline void ScalAndReduce (const Field& F, const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::Element_ptr A, const size_t lda,
+ const MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >& H)
{
- if (w <= 0)
- ClassicMatmul (F, ta, tb, m, n, k, alpha, A, lda, B, ldb,
- beta, C, ldc, kmax,base);
- else {
- if (k <= kmax) { // switch on delayed modulus
- DoubleDomain::Element _alpha, _beta;
- _beta = beta;
- if (F.areEqual (-1.0, alpha)) _alpha = -1.0;
- else{
- // Compute C = A*B + beta/alpha.C
- // and then C *= alpha
- if (! F.areEqual (1.0, alpha)) {
- FFLASFFPACK_check(!F.isZero(alpha));
- F.divin (_beta, alpha);
- }
- _alpha = 1.0;
- }
-
-#if 0 /* BB: inutile */
- size_t ma, ka, kb, nb; //mb, na;
- if (ta == FflasTrans) { ma = k; ka = m; }
- else { ma = m; ka = k; }
- if (tb == FflasTrans) { kb = n; nb = k; }
- else { kb = k; nb = n; }
-#endif
- // recursive call
- WinoMain (DoubleDomain(), ta, tb, m, n, k, _alpha,
- A, lda, B, ldb, _beta, C, ldc, kmax, w,base);
- // Modular reduction
-#if 0 /* timing */
- Timer t;
- t.clear();
- t.start();
-#endif
- double*Ci;
-
- // #pragma omp parallel for schedule(static) private (Ci)
- for (Ci = C; Ci < C+m*ldc; Ci+=ldc)
- for (size_t j = 0; j < n; ++j)
- F.init (*(Ci + j), *(Ci + j));
-#if 0
- t.stop();
- std::cerr<<"Reduction -> "<<t.realtime()<<std::endl;
-#endif
-
- if (!F.areEqual (1.0, alpha) &&
- !F.areEqual (-1.0, alpha))
- // Fix-up: compute C *= alpha
- for (Ci=C; Ci < C+m*ldc; Ci+=ldc)
- for (size_t j=0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
+ if (!F.isOne(alpha) && !F.isMOne(alpha)){
+ typename MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >::DFElt al;
+ F.convert(al, alpha);
+ if (al<0) al = -al;
+ if (std::max(-H.Outmin, H.Outmax) > H.MaxStorableValue/al){
+ freduce (F, M, N, A, lda);
+ fscalin (F, M, N, alpha, A, lda);
} else {
- WinoCalc (F, ta, tb, m/2, n/2, k/2, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax,w,base);
- DynamicPealing (F, ta, tb, m, n, k, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax);
+ fscalin (H.delayedField, M, N, alpha, (typename MMHelper<Field, AlgoT, ModeCategories::LazyTag, ParSeqTrait >::DFElt*)A, lda);
+ freduce (F, M, N, A, lda);
}
- }
+ } else
+ freduce (F, M, N, A, lda);
}
+ } // Protected
+} // FFLAS
- template <>
- inline void WinoMain (const FFPACK:: Modular<float>& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const float alpha,
- const float* A, const size_t lda,
- const float* B, const size_t ldb,
- const float beta,
- float * C, const size_t ldc,
- const size_t kmax, const size_t w, const FFLAS_BASE base)
- {
- if (w <= 0) {
- ClassicMatmul (F, ta, tb, m, n, k, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax,base);
- }
- else {
- if (k <= kmax) { // switch on float
- FloatDomain::Element _alpha, _beta;
- _beta = beta;
- if (F.areEqual (-1.0, alpha)) _alpha = -1.0;
- else {
- // Compute C = A*B + beta/alpha.C
- // and then C *= alpha
- if (! F.areEqual (1.0, alpha)) {
- FFLASFFPACK_check(!F.isZero(alpha));
- F.divin (_beta, alpha);
- }
- _alpha = 1.0;
- }
-#if 0 /* BB: inutile */
- size_t ma, ka, kb, nb; //mb, na;
- if (ta == FflasTrans) { ma = k; ka = m; }
- else { ma = m; ka = k; }
- if (tb == FflasTrans) { kb = n; nb = k; }
- else { kb = k; nb = n; }
-#endif
- // recursive call
- WinoMain (FloatDomain(), ta, tb, m, n, k, _alpha,
- A, lda, B, ldb, _beta, C, ldc, kmax, w,base);
- // Conversion float = > GFq
- for (float * Ci = C; Ci != C+m*ldc; Ci+=ldc)
- for (size_t j = 0; j < n; ++j)
- F.init (*(Ci + j), *(Ci + j));
-
- if (!F.areEqual (1.0, alpha) &&
- !F.areEqual (-1.0, alpha))
- // Fix-up: compute C *= alpha
- for (float* Ci=C; Ci < C+m*ldc; Ci+=ldc)
- for (size_t j=0; j < n; ++j)
- F.mulin (* (Ci + j), alpha);
- } else{
- WinoCalc (F, ta, tb, m/2, n/2, k/2, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax,w,base);
- DynamicPealing (F, ta, tb, m, n, k, alpha,
- A, lda, B, ldb, beta, C, ldc, kmax);
- }
- }
- }
+namespace FFLAS {
- template < class Field >
- inline void
- DynamicPealing (const Field& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m, const size_t n, const size_t k,
- const typename Field::Element alpha,
- const typename Field::Element* A, const size_t lda,
- const typename Field::Element* B, const size_t ldb,
- const typename Field::Element beta,
- typename Field::Element* C, const size_t ldc,
- const size_t kmax)
+ template<class Field>
+ inline typename Field::Element_ptr
+ fgemm (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n, const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, ModeCategories::ConvertTo<ElementCategories::MachineFloatTag>, ParSeqHelper::Sequential> & H)
{
- const typename Field::Element *a12, *a21, *b12, *b21;
- size_t inca12, inca21, incb12, incb21, ma, na, mb, nb;
- size_t mkn = (n & 0x1)+ ((k & 0x1) << 1)+ ((m & 0x1) << 2);
-
- if (ta == FflasTrans) {
- ma = k;
- na = m;
- a12 = A+(k-1)*lda;
- inca12 = 1;
- a21 = A+m-1;
- inca21 = lda;
- } else {
- ma = m;
- na = k;
- a12 = A+k-1;
- inca12 = lda;
- a21 = A+(m-1)*lda;
- inca21 = 1;
+ if (F.cardinality() < DOUBLE_TO_FLOAT_CROSSOVER)
+ return Protected::fgemm_convert<float,Field>(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,H);
+ else if (16*F.cardinality() < Givaro::ModularBalanced<double>::maxCardinality())
+ return Protected::fgemm_convert<double,Field>(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,H);
+ // else if (Protected::AreEqual<typename Field::Element,int64_t>::value) {
+ // // Stays over int64_t
+ // MMHelper<Field, MMHelperAlgo::Winograd, ModeCategories::DelayedTag, ParSeqHelper::Sequential> HG(H);
+ // H.Outmin=HG.Outmin;
+ // H.Outmax=HG.Outmax;
+ // return fgemm(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,HG);
+
+ // }
+ else {
+ // Fall back case: used
+ FFPACK::failure()(__func__,__LINE__,"Invalid ConvertTo Mode for this field");
}
- if (tb == FflasTrans) {
- mb = n;
- nb = k;
- b12 = B+(n-1)*ldb;
- incb12 = 1;
- b21 = B+k-1;
- incb21 = ldb;
- } else {
- mb = k;
- nb = n;
- b12 = B+n-1;
- incb12 = ldb;
- b21 = B+(k-1)*ldb;
- incb21 = 1;
- }
- switch (mkn) {
- case 1: // n oddsized
- fgemv (F, ta, ma, na, alpha, A, lda, b12, incb12, beta, C+n-1,ldc);
- break;
-
- case 2: // k oddsized
- fger (F, m, n, alpha, a12, inca12, b21, incb21, C, ldc);
- break;
-
- case 3: // n, k oddsized
- fgemv (F, ta, ma, na, alpha, A, lda, b12, incb12, beta, C+n-1,ldc);
- fger (F, m, n-1, alpha, a12, inca12, b21, incb21, C, ldc);
- break;
-
- case 4: // m oddsized
- fgemv(F, (tb == FflasTrans)?FflasNoTrans:FflasTrans, mb, nb,
- alpha, B, ldb, a21, inca21, beta, C+(m-1)*ldc, 1);
- break;
+ return C;
+ }
+}// FFLAS
- case 5: // m, n oddsized
- if (tb == FflasTrans)
- mb--;
- else
- nb--;
- fgemv (F, ta, ma, na, alpha, A, lda, b12, incb12, beta, C+n-1, ldc);
- fgemv (F, (tb==FflasTrans)?FflasNoTrans:FflasTrans, mb, nb,
- alpha, B, ldb, a21, inca21, beta, C+(m-1)*ldc, 1);
- break;
- case 6: // m, k oddsized
- fger (F, m-1, n, alpha, a12, inca12, b21, incb21, C, ldc);
- fgemv(F, (tb==FflasTrans)?FflasNoTrans:FflasTrans, mb, nb,
- alpha, B, ldb, a21, inca21, beta, C+(m-1)*ldc, 1);
- break;
+// fgemm
+namespace FFLAS {
- case 7: // m, k, n oddsized
- if (tb == FflasTrans)
- mb--;
- else
- nb--;
- // Block NW
- fger (F, m-1, n-1, alpha, a12, inca12, b21, incb21, C, ldc);
- // Block SW
- fgemv (F, (tb==FflasTrans)?FflasNoTrans:FflasTrans, mb, nb,
- alpha, B, ldb, a21, inca21, beta, C+(m-1)*ldc, 1);
- // Block NE
- fgemv (F, ta, ma, na, alpha, A, lda, b12, incb12, beta, C+n-1, ldc);
- break;
- }
+ template<typename Field>
+ inline typename Field::Element_ptr
+ fgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const ParSeqHelper::Sequential seq)
+ {
+ MMHelper<Field, MMHelperAlgo::Auto, typename FFLAS::ModeTraits<Field>::value, ParSeqHelper::Sequential > HW (F, m, k, n, seq);
+ return fgemm (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, HW);
}
- } // Winomain
-
- // Unsafe matmul over Z
- // For internal usage only (or use it with care)
- template<>
- inline double*
- fgemm< FFPACK:: UnparametricField<double> > ( const FFPACK:: UnparametricField<double>& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m,
- const size_t n,
- const size_t k,
- const double alpha,
- const double* A, const size_t lda,
- const double* B, const size_t ldb,
- const double beta,
- double* C, const size_t ldc,
- const size_t w)
+ template<typename Field,class Cut,class Param>
+ inline typename Field::Element_ptr
+ fgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const ParSeqHelper::Parallel<Cut,Param> par)
{
- if (!(m && n && k)) return C;
+ MMHelper<Field, MMHelperAlgo::Auto, typename FFLAS::ModeTraits<Field>::value, ParSeqHelper::Parallel<Cut,Param> > HW (F, m, k, n, par);
+ return fgemm (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, HW);
+ }
- if (F.isZero (alpha)){
- for (size_t i = 0; i<m; ++i)
- fscal(F, n, beta, C + i*ldc, 1);
- return C;
+ template<typename Field>
+ inline typename Field::Element_ptr
+ fgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc)
+ {
+ if (!m || !n) {return C;}
+
+ if (!k || F.isZero (alpha)){
+ fscalin(F, m, n, beta, C, ldc);
+ return C;
}
-
-
- Protected::WinoMain (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta,
- C, ldc, k+1, w, FflasDouble);
- return C;
+ return fgemm(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,FFLAS::ParSeqHelper::Sequential());
}
- template<>
- inline float*
- fgemm< FFPACK:: UnparametricField<float> > ( const FFPACK:: UnparametricField<float>& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m,
- const size_t n,
- const size_t k,
- const float alpha,
- const float* A, const size_t lda,
- const float* B, const size_t ldb,
- const float beta,
- float* C, const size_t ldc,
- const size_t w)
+ template<typename Field, class ModeT, class ParSeq>
+ inline typename Field::Element_ptr
+ fgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Auto, ModeT, ParSeq> & H)
{
+ MMHelper<Field, typename AlgoChooser<ModeT, ParSeq>::value, ModeT, ParSeq> HW (H);
+ return fgemm(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,HW);
+ }
- if (!(m && n && k)) return C;
-
- if (F.isZero (alpha)){
- for (size_t i = 0; i<m; ++i)
- fscal(F, n, beta, C + i*ldc, 1);
+ template<class Field>
+ inline typename Field::Element_ptr
+ fgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, ModeCategories::DelayedTag, ParSeqHelper::Sequential> & H)
+ {
+ if (!m || !n) {return C;}
+
+ if (!k || F.isZero (alpha)){
+ fscalin(F, m, n, beta, C, ldc);
return C;
}
+#ifndef NDEBUG
+ /* check if alpha is invertible.
+ * XXX do it in F.isInvertible(Element&) ?
+ * XXX do it in return status of F.inv(Element&,Element&)
+ */
+ typename Field::Element e ;
+ F.assign(e,beta);
+ F.divin(e,alpha);
+ F.mulin(e,alpha);
+ FFLASFFPACK_check(F.areEqual(e,beta));
+#endif
- Protected::WinoMain (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta,
- C, ldc, k+1, w, FflasFloat);
- return C;
- }
+#if 0
+ // detect fgemv
+ if (n == 1 and ...) {}
+ // detect fger
+ if (k==1 and ...) {}
+#endif
+ if (Protected::AreEqual<Field, Givaro::Modular<double> >::value ||
+ Protected::AreEqual<Field, Givaro::ModularBalanced<double> >::value){
+ //Givaro::Modular<double> need to switch to float if p too small
+ if (F.characteristic() < DOUBLE_TO_FLOAT_CROSSOVER)
+ return Protected::fgemm_convert<float,Field>(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,H);
+ }
+ if (Protected::AreEqual<Field, Givaro::Modular<int64_t> >::value ||
+ Protected::AreEqual<Field, Givaro::ModularBalanced<int64_t> >::value)
+ if (16*F.cardinality() < Givaro::ModularBalanced<double>::maxCardinality())
+ return Protected::fgemm_convert<double,Field>(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,H);
+
+ typename Field::Element alpha_,beta_;
+ if ( !F.isOne(alpha) && !F.isMOne(alpha)){
+ F.assign (alpha_, F.one);
+ F.div (beta_, beta, alpha);
+ } else {
+ F.assign (alpha_,alpha);
+ F.assign (beta_,beta);
+ }
+ MMHelper<Field, MMHelperAlgo::Winograd, ModeCategories::LazyTag> HD(H);
+ // std::cerr<<"\n Delayed -> Lazy alpha_ = "<<alpha_<<std::endl;
+ // std::cerr<<" A = "<<*A<<"\n B = "<<*B<<"\n C = "<<*C<<"\n alpha, beta ="<<alpha<<" "<<beta<<std::endl;
+ fgemm (F, ta, tb, m, n, k, alpha_, A, lda, B, ldb, beta_, C, ldc, HD);
+ // std::cerr<<"Sortie de fgemm Lazy C = "<<*C<<std::endl;
+ Protected::ScalAndReduce (F, m, n, alpha, C, ldc, HD);
+ // std::cerr<<"Sortie de ScalAndReduce C = "<<*C<<std::endl;
- template<>
- inline double*
- fgemm< FFPACK:: UnparametricField<double> > (const FFPACK:: UnparametricField<double>& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m,
- const size_t n,
- const size_t k,
- const double alpha,
- const double* A, const size_t lda,
- const double* B, const size_t ldb,
- const double beta,
- double* C, const size_t ldc)
- {
- return fgemm (F, ta, tb, m, n ,k, alpha, A, lda, B, ldb, beta, C, ldc, WinoSteps (MIN(m,MIN(k,n))));
- }
+ H.initOut();
- template<>
- inline float*
- fgemm< FFPACK:: UnparametricField<float> > (const FFPACK:: UnparametricField<float>& F,
- const FFLAS_TRANSPOSE ta,
- const FFLAS_TRANSPOSE tb,
- const size_t m,
- const size_t n,
- const size_t k,
- const float alpha,
- const float* A, const size_t lda,
- const float* B, const size_t ldb,
- const float beta,
- float* C, const size_t ldc)
- {
- return fgemm (F, ta, tb, m, n ,k, alpha, A, lda, B, ldb, beta, C, ldc, WinoSteps (MIN(m,MIN(k,n))));
+ return C;
}
+} // FFLAS
+// #include "fflas_fgemm/matmul_algos.inl"
+#include "fflas_fgemm/fgemm_classical.inl"
+#include "fflas_fgemm/fgemm_winograd.inl"
+// #include "fflas_fgemm/gemm_bini.inl"
+// fsquare
+namespace FFLAS {
template < class Field >
- inline typename Field::Element*
+ inline typename Field::Element_ptr
fsquare (const Field& F,
- const FFLAS_TRANSPOSE ta,
- const size_t n, const typename Field::Element alpha,
- const typename Field::Element* A, const size_t lda,
- const typename Field::Element beta,
- typename Field::Element* C, const size_t ldc)
+ const FFLAS_TRANSPOSE ta,
+ const size_t n, const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc)
{
double alphad, betad;
F.convert (alphad, alpha);
- if (F.areEqual (beta, F.mOne))
+ if (F.isMOne (beta))
betad = -1.0;
else
F.convert (betad, beta);
- // Double matrices initialisation
- DoubleDomain::Element * Ad = new DoubleDomain::Element[n*n];
- DoubleDomain::Element * Cd = new DoubleDomain::Element[n*n];
- // Conversion finite Field = > double
- Protected::MatF2MatD (F, Ad, n, A, lda, n, n);
- if (!F.isZero(beta)) Protected::MatF2MatD (F, Cd, n, C, ldc, n, n);
+ //! @bug why double ?
+ // Double matrices initialisation
+ Givaro::DoubleDomain::Element_ptr Ad = fflas_new (Givaro::DoubleDomain(),n,n);
+ Givaro::DoubleDomain::Element_ptr Cd = fflas_new (Givaro::DoubleDomain(),n,n);
+ // Conversion finite Field = > double
+ fconvert (F, n, n, Ad, n, A, lda);
+ if (!F.isZero(beta)) fconvert(F, n, n, Cd, n, C, ldc);
- // Call to the blas Multiplication
+ // Call to the blas Multiplication
+ FFLASFFPACK_check(n);
cblas_dgemm (CblasRowMajor, (CBLAS_TRANSPOSE)ta,
- (CBLAS_TRANSPOSE)ta, (int)n, (int)n, (int)n,
- (DoubleDomain::Element) alphad, Ad, (int)n, Ad, (int)n,
- (DoubleDomain::Element) betad, Cd, (int)n);
- // Conversion double = > Finite Field
- delete[] Ad;
- Protected::MatD2MatF (F, C, ldc, Cd, n, n, n);
- delete[] Cd;
+ (CBLAS_TRANSPOSE)ta, (int)n, (int)n, (int)n,
+ (Givaro::DoubleDomain::Element) alphad, Ad, (int)n, Ad, (int)n,
+ (Givaro::DoubleDomain::Element) betad, Cd, (int)n);
+ // Conversion double = > Finite Field
+ fflas_delete (Ad);
+ finit (F,n,n, Cd, n, C, ldc);
+ fflas_delete (Cd);
return C;
}
+ namespace Protected {
+
+ // F is Modular(Balanced)<float/double>
+ template < class Field >
+ inline typename Field::Element_ptr
+ fsquareCommon (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t n, const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc)
+ {
+ if (C==A) {
+ typename Field::Element_ptr Ad = fflas_new (F, n, n);
+ fassign(F,n,n,A,lda,Ad,n);
+ fgemm (F, ta, ta, n, n, n, alpha, Ad, n, Ad, n, beta, C, ldc);
+ fflas_delete (Ad);
+ }
+ else
+ fgemm (F, ta, ta, n, n, n, alpha, A, lda, A, lda, beta, C, ldc);
+ return C;
+
+ }
+
+ } // Protected
+
template <>
- inline double* fsquare (const FFPACK:: ModularBalanced<double> & F,
- const FFLAS_TRANSPOSE ta,
- const size_t n, const double alpha,
- const double* A, const size_t lda,
- const double beta,
- double* C, const size_t ldc)
+ inline double* fsquare (const Givaro::ModularBalanced<double> & F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t n, const double alpha,
+ const double* A, const size_t lda,
+ const double beta,
+ double* C, const size_t ldc)
{
- if (C==A) {
- double * Ad = new double[n*n];
- for (size_t i=0; i < n; ++i)
- fcopy (F, n,Ad+i*n, 1, A+i*lda, 1);
- fgemm (F, ta, ta, n, n, n, alpha, Ad, n, Ad, n, beta, C, ldc);
- delete[] Ad;
- } else
- fgemm (F, ta, ta, n, n, n, alpha, A, lda, A, lda, beta, C, ldc);
- // Conversion double = > Finite Field
- size_t i;
- double *Ci;
- for (i=0, Ci=C ; i < n;++i, Ci+=ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- return C;
+ return Protected::fsquareCommon(F,ta,n,alpha,A,lda,beta,C,ldc);
}
template <>
- inline float * fsquare (const FFPACK:: ModularBalanced<float> & F,
- const FFLAS_TRANSPOSE ta,
- const size_t n, const float alpha,
- const float* A, const size_t lda,
- const float beta,
- float* C, const size_t ldc)
+ inline float * fsquare (const Givaro::ModularBalanced<float> & F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t n, const float alpha,
+ const float* A, const size_t lda,
+ const float beta,
+ float* C, const size_t ldc)
{
- if (C==A) {
- float * Ad = new float[n*n];
- for (size_t i=0; i < n; ++i)
- fcopy (F, n,Ad+i*n, 1, A+i*lda, 1);
- fgemm (F, ta, ta, n, n, n, alpha, Ad, n, Ad, n, beta, C, ldc);
- delete[] Ad;
- } else
- fgemm (F, ta, ta, n, n, n, alpha, A, lda, A, lda, beta, C, ldc);
- // Conversion float = > Finite Field
- size_t i;
- float *Ci;
- for (i=0, Ci=C ; i < n;++i, Ci+=ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- return C;
+ return Protected::fsquareCommon(F,ta,n,alpha,A,lda,beta,C,ldc);
}
template <>
- inline double* fsquare (const FFPACK:: Modular<double> & F,
- const FFLAS_TRANSPOSE ta,
- const size_t n, const double alpha,
- const double* A, const size_t lda,
- const double beta,
- double* C, const size_t ldc)
+ inline double* fsquare (const Givaro::Modular<double> & F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t n, const double alpha,
+ const double* A, const size_t lda,
+ const double beta,
+ double* C, const size_t ldc)
{
- if (C==A) {
- double * Ad = new double[n*n];
- for (size_t i=0; i < n; ++i)
- fcopy (F, n,Ad+i*n, 1, A+i*lda, 1);
- fgemm (F, ta, ta, n, n, n, alpha, Ad, n, Ad, n, beta, C, ldc);
- delete[] Ad;
- } else
- fgemm (F, ta, ta, n, n, n, alpha, A, lda, A, lda, beta, C, ldc);
- // Conversion double = > Finite Field
- double *Ci = C;
- for (size_t i=0; i < n;++i, Ci+=ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- return C;
+ return Protected::fsquareCommon(F,ta,n,alpha,A,lda,beta,C,ldc);
}
template <>
- inline float * fsquare (const FFPACK:: Modular<float> & F,
- const FFLAS_TRANSPOSE ta,
- const size_t n, const float alpha,
- const float* A, const size_t lda,
- const float beta,
- float* C, const size_t ldc)
+ inline float * fsquare (const Givaro::Modular<float> & F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t n, const float alpha,
+ const float* A, const size_t lda,
+ const float beta,
+ float* C, const size_t ldc)
{
- if (C==A) {
- float * Ad = new float[n*n];
- for (size_t i=0; i < n; ++i)
- fcopy (F, n,Ad+i*n, 1, A+i*lda, 1);
- fgemm (F, ta, ta, n, n, n, alpha, Ad, n, Ad, n, beta, C, ldc);
- delete[] Ad;
- } else
- fgemm (F, ta, ta, n, n, n, alpha, A, lda, A, lda, beta, C, ldc);
- // Conversion float = > Finite Field
- float *Ci = C;
- for (size_t i=0; i < n;++i, Ci+=ldc)
- for (size_t j=0; j < n;++j)
- F.init(*(Ci+j),*(Ci+j));
- return C;
+ return Protected::fsquareCommon(F,ta,n,alpha,A,lda,beta,C,ldc);
}
} // FFLAS
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_fgemm/Makefile.am
similarity index 62%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_fgemm/Makefile.am
index 31793b2..b269290 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_fgemm/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,19 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_fgemm
+
+EXTRA_DIST=matmul.doxy
+
+multiprecision=fgemm_classical_mp.inl
+pkgincludesub_HEADERS= \
+ fgemm_classical.inl \
+ fgemm_winograd.inl \
+ schedule_winograd.inl \
+ schedule_winograd_acc.inl \
+ schedule_bini.inl \
+ schedule_winograd_acc_ip.inl \
+ schedule_winograd_ip.inl \
+ ${multiprecision}
diff --git a/fflas-ffpack/fflas/fflas_fgemm/fgemm_classical.inl b/fflas-ffpack/fflas/fflas_fgemm/fgemm_classical.inl
new file mode 100644
index 0000000..339861d
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/fgemm_classical.inl
@@ -0,0 +1,312 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2008, 2014 the FFLAS-FFPACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas_fgemm/fgemm_classical.inl
+ * @brief Classical \f$2n^3\$f matrix multiplication.
+ * @warning The domain is supposed to be a field since some divisions are required for efficiency purposes
+ * An alternative has to be written for finite rings if necessary
+ */
+
+#ifndef __FFLASFFPACK_fflas_fflas_fgemm_classical_INL
+#define __FFLASFFPACK_fflas_fflas_fgemm_classical_INL
+
+#include <cmath>
+
+#include "fflas-ffpack/field/field-traits.h"
+#if defined(__AVX2__) or defined(__AVX__) or defined(__SSE4_1__)
+#include "fflas-ffpack/fflas/fflas_igemm/igemm.h"
+#endif
+#include "fflas-ffpack/utils/Matio.h"
+namespace FFLAS {
+
+ // F is a field supporting delayed reductions
+ template<class Field>
+ inline void fgemm (const Field & F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag> & H)
+ {
+ // Input matrices are unreduced: need to figure out the best option between:
+ // - reducing them
+ // - making possibly more blocks (smaller kmax)
+ typedef MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag> HelperType;
+ typename HelperType::DelayedField::Element alphadf, betadf;
+ betadf = beta;
+
+ if (F.isMOne (alpha)) {
+ alphadf = -H.delayedField.one;
+ } else {
+ alphadf = F.one;
+ if (! F.isOne( alpha)) {
+ // Compute y = A*x + beta/alpha.y
+ // and after y *= alpha
+ FFLASFFPACK_check(!F.isZero(alpha));
+ typename Field::Element betadalpha;
+ F.init(betadalpha);
+ F.div (betadalpha, beta, alpha);
+ betadf = betadalpha;
+ }
+ }
+
+ if (F.isMOne(betadf)) betadf = -F.one;
+
+ size_t kmax = H.MaxDelayedDim (betadf);
+ H.checkA(F,ta, m,k,A,lda);
+ H.checkB(F,tb, k,n,B,ldb);
+ if (kmax <= k/2 || H.Aunfit() || H.Bunfit() ){
+ // Might as well reduce inputs
+ if (H.Amin < H.FieldMin || H.Amax>H.FieldMax){
+ H.initA();
+ freduce_constoverride (F, (ta==FflasNoTrans)?m:k, (ta==FflasNoTrans)?k:m, A, lda);
+ }
+ if (H.Bmin < H.FieldMin || H.Bmax>H.FieldMax){
+ H.initB();
+ freduce_constoverride (F, (tb==FflasNoTrans)?k:n, (tb==FflasNoTrans)?n:k, B, ldb);
+ }
+ if (H.Cmin < H.FieldMin || H.Cmax>H.FieldMax){
+ H.initC();
+ freduce (F, m, n, C, ldc);
+ }
+ kmax = H.MaxDelayedDim (betadf);
+ }
+
+ if (!kmax){
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag> HG(H);
+ H.initOut();
+ return fgemm (F, ta, tb, m,n,k,alpha, A, lda, B, ldb, beta, C, ldc, HG);
+ }
+
+ size_t k2 = std::min(k,kmax);
+ size_t nblock = k / kmax;
+ size_t remblock = k % kmax;
+ if (!remblock) {
+ remblock = kmax;
+ --nblock;
+ }
+ size_t shiftA, shiftB;
+ if (ta == FflasTrans) shiftA = k2*lda;
+ else shiftA = k2;
+ if (tb == FflasTrans) shiftB = k2;
+ else shiftB = k2*ldb;
+
+ typedef MMHelper<typename HelperType::DelayedField, MMHelperAlgo::Classic, ModeCategories::DefaultBoundedTag> DelayedHelper_t;
+ DelayedHelper_t Hfp(H);
+ typedef typename HelperType::DelayedField::Element DFElt;
+ typedef typename HelperType::DelayedField::Element_ptr DFElt_ptr;
+ typedef typename HelperType::DelayedField::ConstElement_ptr DFCElt_ptr;
+
+ fgemm (H.delayedField, ta, tb, m, n, remblock, alphadf,
+ (DFCElt_ptr)A +nblock*shiftA, lda,
+ (DFCElt_ptr)B +nblock*shiftB, ldb, betadf,
+ (DFElt_ptr)C, ldc, Hfp);
+
+ for (size_t i = 0; i < nblock; ++i) {
+ freduce (F, m, n, C, ldc);
+ Hfp.initC();
+ fgemm (H.delayedField, ta, tb, m, n, k2, alphadf,
+ (DFCElt_ptr)A +i*shiftA, lda,
+ (DFCElt_ptr)B +i*shiftB, ldb, F.one,
+ (DFElt_ptr)C, ldc, Hfp);
+ }
+
+ if (!F.isOne(alpha) && !F.isMOne(alpha)){
+ DFElt al; F.convert(al, alpha);
+ if (al<0) al = -al;
+ // This cast is needed when Outmin base type is int8/16_t,
+ // getting -Outmin returns a int, not the same base type.
+ if (std::max(static_cast<const decltype(Hfp.Outmin)&>(-Hfp.Outmin), Hfp.Outmax)
+ >Hfp.MaxStorableValue/al){
+ freduce (F, m, n, C, ldc);
+ Hfp.initOut();
+ }
+
+ fscalin(H.delayedField, m,n,alpha,(typename DelayedHelper_t::DelayedField_t::Element_ptr)C,ldc);
+
+ if (alpha>0){
+ H.Outmin = (const DFElt)(alpha) * Hfp.Outmin;
+ H.Outmax = (const DFElt)alpha * Hfp.Outmax;
+ } else {
+ H.Outmin = (const DFElt)alpha * Hfp.Outmax;
+ H.Outmax = (const DFElt)alpha * Hfp.Outmin;
+ }
+ }else {
+ H.Outmin = Hfp.Outmin;
+ H.Outmax = Hfp.Outmax;
+ }
+ H.checkOut(F,m,n,C,ldc);
+ }
+} // FFLAS
+
+namespace FFLAS {
+
+ // Classic multiplication over a generic finite field
+ template < class Field>
+ inline void fgemm (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ if (F.isZero (alpha)) {
+ fscalin(F, m, n, beta, C, ldc);
+ return;
+ }
+ // Standard algorithm is performed over the Field, without conversion
+ if (F.isZero (beta))
+ fzero (F, m, n, C, ldc);
+ else {
+ typename Field::Element betadivalpha;
+ F.init(betadivalpha);
+ F.div (betadivalpha, beta, alpha);
+ fscalin(F,m,n,betadivalpha,C,ldc);
+ }
+ if (ta == FflasNoTrans)
+ if (tb == FflasNoTrans)
+ for (size_t i = 0; i < m; ++i)
+ for (size_t l = 0; l < k; ++l)
+ for (size_t j = 0; j < n; ++j)
+ F.axpyin (*(C+i*ldc+j), *(A+i*lda+l), *(B+l*ldb+j));
+ else
+ for (size_t i = 0; i < m; ++i)
+ for (size_t j = 0; j < n; ++j)
+ for (size_t l = 0; l < k; ++l)
+ F.axpyin (*(C+i*ldc+j), *(A+i*lda+l), *(B+j*ldb+l));
+ else
+ if (tb == FflasNoTrans)
+ for (size_t i = 0; i < m; ++i)
+ for (size_t l = 0; l < k; ++l)
+ for (size_t j = 0; j < n; ++j)
+ F.axpyin (*(C+i*ldc+j), *(A+l*lda+i), *(B+l*ldb+j));
+ else
+ for (size_t i = 0; i < m; ++i)
+ for (size_t j = 0; j < n; ++j)
+ for (size_t l = 0; l < k; ++l)
+ F.axpyin (*(C+i*ldc+j), *(A+l*lda+i), *(B+j*ldb+l));
+ fscalin(F,m,n,alpha,C,ldc);
+ }
+ template < class Field>
+ inline void fgemm (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultBoundedTag> & H)
+ {
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag> Hd(F,0);
+ fgemm (F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,Hd);
+ H.setOutBounds (k,alpha,beta);
+ }
+
+ inline void fgemm (const Givaro::DoubleDomain& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const Givaro::DoubleDomain::Element alpha,
+ Givaro::DoubleDomain::ConstElement_ptr Ad, const size_t lda,
+ Givaro::DoubleDomain::ConstElement_ptr Bd, const size_t ldb,
+ const Givaro::DoubleDomain::Element beta,
+ Givaro::DoubleDomain::Element_ptr Cd, const size_t ldc,
+ MMHelper<Givaro::DoubleDomain, MMHelperAlgo::Classic, ModeCategories::DefaultTag> &H)
+ {
+ FFLASFFPACK_check(lda);
+ FFLASFFPACK_check(ldb);
+ FFLASFFPACK_check(ldc);
+
+ cblas_dgemm (CblasRowMajor, (CBLAS_TRANSPOSE) ta, (CBLAS_TRANSPOSE) tb,
+ (int)m, (int)n, (int)k, (Givaro::DoubleDomain::Element) alpha,
+ Ad, (int)lda, Bd, (int)ldb, (Givaro::DoubleDomain::Element) beta, Cd, (int)ldc);
+ }
+
+ inline void fgemm (const Givaro::FloatDomain& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const Givaro::FloatDomain::Element alpha,
+ Givaro::FloatDomain::ConstElement_ptr Ad, const size_t lda,
+ Givaro::FloatDomain::ConstElement_ptr Bd, const size_t ldb,
+ const Givaro::FloatDomain::Element beta,
+ Givaro::FloatDomain::Element_ptr Cd, const size_t ldc,
+ MMHelper<Givaro::FloatDomain, MMHelperAlgo::Classic,ModeCategories::DefaultTag> & H)
+ {
+ FFLASFFPACK_check(lda);
+ FFLASFFPACK_check(ldb);
+ FFLASFFPACK_check(ldc);
+
+ cblas_sgemm (CblasRowMajor, (CBLAS_TRANSPOSE) ta, (CBLAS_TRANSPOSE) tb,
+ (int)m, (int)n, (int)k, (Givaro::FloatDomain::Element) alpha,
+ Ad, (int)lda, Bd, (int)ldb, (Givaro::FloatDomain::Element) beta,Cd, (int)ldc);
+ }
+
+ inline void fgemm (const Givaro::ZRing<int64_t>& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const int64_t alpha,
+ const int64_t * Ad, const size_t lda,
+ const int64_t * Bd, const size_t ldb,
+ const int64_t beta,
+ int64_t * Cd, const size_t ldc,
+ MMHelper<Givaro::ZRing<int64_t>, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ FFLASFFPACK_check(lda);
+ FFLASFFPACK_check(ldb);
+ FFLASFFPACK_check(ldc);
+
+#if defined(__AVX2__) or defined(__AVX__) or defined(__SSE4_1__)
+ igemm_ (FflasRowMajor, ta, tb, (int)m, (int)n, (int)k, alpha, Ad, (int)lda, Bd, (int)ldb, beta, Cd, (int)ldc);
+#else
+ for (size_t i=0; i<m; i++){
+ for (size_t j=0; j<n; j++)
+ Cd[i*ldc+j] *= beta;
+ for (size_t l=0; l<k; l++){
+ int64_t a = alpha* ((ta==FflasNoTrans) ? Ad[i*lda+l] : Ad[i+l*lda]);
+ for (size_t j=0; j<n; j++)
+ Cd[i*ldc+j] += a*((tb==FflasNoTrans) ? Bd[l*ldb+j] : Bd[l+j*ldb]);
+ }
+ }
+#endif
+ }
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_fflas_fgemm_classical_INL
diff --git a/fflas-ffpack/fflas/fflas_fgemm/fgemm_classical_mp.inl b/fflas-ffpack/fflas/fflas_fgemm/fgemm_classical_mp.inl
new file mode 100644
index 0000000..26f9dbd
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/fgemm_classical_mp.inl
@@ -0,0 +1,480 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+/** @file fflas_fgemm/fgemm_classical_mp.inl
+ * @brief matrix multiplication with multiprecision input (either over Z or over Z/pZ)
+ */
+
+
+#ifndef __FFPACK_fgemm_classical_INL
+#define __FFPACK_fgemm_classical_INL
+
+#include <givaro/modular-integer.h>
+#include <givaro/zring.h>
+#ifdef PROFILE_FGEMM_MP
+#include "fflas-ffpack/utils/timer.h"
+#endif
+#include "fflas-ffpack/field/rns-double.h"
+#include "fflas-ffpack/field/rns-integer.h"
+#include "fflas-ffpack/field/rns-integer-mod.h"
+#include "fflas-ffpack/field/field-traits.h"
+#include "fflas-ffpack/fflas/fflas_helpers.inl"
+#include "fflas-ffpack/fflas/fflas_bounds.inl"
+namespace FFLAS {
+
+ template<typename Field,
+ typename AlgoTrait,
+ typename ParSeqTrait>
+ struct MMHelper<Field, AlgoTrait,ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeqTrait> {
+ Givaro::Integer normA,normB;
+ int recLevel;
+ ParSeqTrait parseq;
+ MMHelper() : normA(0), normB(0), recLevel(-1) {}
+ template <class F2, class A2, class M2, class PS2>
+ MMHelper(MMHelper<F2, A2, M2, PS2> H2) :
+ normA(H2.normA), normB(H2.normB), recLevel(H2.recLevel), parseq(H2.parseq) {}
+ MMHelper(Givaro::Integer Amax, Givaro::Integer Bmax) : normA(Amax), normB(Bmax), recLevel(-1) {}
+ MMHelper(const Field& F, size_t m, size_t n, size_t k, ParSeqTrait PS=ParSeqTrait())
+ : recLevel(-1), parseq(PS)
+ {F.characteristic(normA);F.characteristic(normB);}
+ MMHelper(const Field& F, int wino, ParSeqTrait PS=ParSeqTrait()) : recLevel(wino), parseq(PS)
+ {F.characteristic(normA);F.characteristic(normB);}
+ void setNorm(Givaro::Integer p){normA=normB=p;}
+ };
+ template<typename E,
+ typename AlgoTrait,
+ typename ParSeqTrait>
+ struct MMHelper<FFPACK::RNSInteger<E>, AlgoTrait,ModeCategories::DefaultTag, ParSeqTrait> {
+ Givaro::Integer normA,normB;
+ int recLevel;
+ ParSeqTrait parseq;
+ MMHelper() : normA(0), normB(0), recLevel(-1) {}
+ MMHelper(Givaro::Integer Amax, Givaro::Integer Bmax) : normA(Amax), normB(Bmax), recLevel(-1) {}
+ MMHelper(const FFPACK::RNSInteger<E>& F, size_t m, size_t n, size_t k, ParSeqTrait PS=ParSeqTrait())
+ : recLevel(-1), parseq(PS)
+ {F.characteristic(normA);F.characteristic(normB);}
+ MMHelper(const FFPACK::RNSInteger<E>& F, int wino, ParSeqTrait PS=ParSeqTrait()) : recLevel(wino), parseq(PS)
+ {F.characteristic(normA);F.characteristic(normB);}
+ template <class F2, class A2, class M2, class PS2>
+ MMHelper(MMHelper<F2, A2, M2, PS2> H2) :
+ normA(H2.normA), normB(H2.normB), recLevel(H2.recLevel), parseq(H2.parseq) {}
+ void setNorm(Givaro::Integer p){normA=normB=p;}
+ };
+ template<typename E,
+ typename AlgoTrait,
+ typename ParSeqTrait>
+ struct MMHelper<FFPACK::RNSIntegerMod<E>, AlgoTrait,ModeCategories::DefaultTag, ParSeqTrait> {
+ Givaro::Integer normA,normB;
+ int recLevel;
+ ParSeqTrait parseq;
+ MMHelper() : normA(0), normB(0), recLevel(-1) {}
+ MMHelper(Givaro::Integer Amax, Givaro::Integer Bmax) : normA(Amax), normB(Bmax), recLevel(-1) {}
+ MMHelper(const FFPACK::RNSIntegerMod<E>& F, size_t m, size_t n, size_t k, ParSeqTrait PS=ParSeqTrait())
+ : recLevel(-1), parseq(PS)
+ {F.characteristic(normA);F.characteristic(normB);}
+ MMHelper(const FFPACK::RNSIntegerMod<E>& F, int wino, ParSeqTrait PS=ParSeqTrait()) : recLevel(wino), parseq(PS)
+ {F.characteristic(normA);F.characteristic(normB);}
+ // copy constructor from other Field and Algo Traits
+ template<class F2, typename AlgoT2, typename FT2, typename PS2>
+ MMHelper(MMHelper<F2, AlgoT2, FT2, PS2>& WH) : recLevel(WH.recLevel), parseq(WH.parseq) {}
+
+ void setNorm(Givaro::Integer p){normA=normB=p;}
+ };
+
+ /***********************************
+ *** MULTIPRECISION FGEMM OVER Z ***
+ ***********************************/
+
+ // fgemm for RnsInteger sequential version
+ template<typename RNS>
+ inline typename FFPACK::RNSInteger<RNS>::Element_ptr
+ fgemm (const FFPACK::RNSInteger<RNS> &F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const typename FFPACK::RNSInteger<RNS>::Element alpha,
+ typename FFPACK::RNSInteger<RNS>::ConstElement_ptr Ad, const size_t lda,
+ typename FFPACK::RNSInteger<RNS>::ConstElement_ptr Bd, const size_t ldb,
+ const typename FFPACK::RNSInteger<RNS>::Element beta,
+ typename FFPACK::RNSInteger<RNS>::Element_ptr Cd, const size_t ldc,
+ MMHelper<FFPACK::RNSInteger<RNS>, MMHelperAlgo::Classic,ModeCategories::DefaultTag, ParSeqHelper::Sequential> & H)
+ {
+
+ // compute each fgemm componentwise
+#ifdef FFT_PROFILER
+ Givaro::Timer t;t.start();
+#endif
+ for(size_t i=0;i<F.size();i++){
+ MMHelper<typename RNS::ModField,MMHelperAlgo::Winograd> H2(F.rns()._field_rns[i], H.recLevel, H.parseq);
+ FFLAS::fgemm(F.rns()._field_rns[i],ta,tb,
+ m, n, k, alpha._ptr[i*alpha._stride],
+ Ad._ptr+i*Ad._stride, lda,
+ Bd._ptr+i*Bd._stride, ldb,
+ beta._ptr[i*beta._stride],
+ Cd._ptr+i*Cd._stride, ldc, H2);
+ }
+#ifdef FFT_PROFILER
+ t.stop();
+
+ std::cerr<<"=========================================="<<std::endl
+ <<"Pointwise fgemm : "<<t.realtime()<<" ("<<F.size()<<") moduli "<<std::endl
+ <<"=========================================="<<std::endl;
+#endif
+ return Cd;
+ }
+
+ // fgemm for RnsInteger parallel version
+ template<typename RNS, typename Cut, typename Param>
+ inline typename FFPACK::RNSInteger<RNS>::Element_ptr
+ fgemm (const FFPACK::RNSInteger<RNS> &F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const typename FFPACK::RNSInteger<RNS>::Element alpha,
+ typename FFPACK::RNSInteger<RNS>::ConstElement_ptr Ad, const size_t lda,
+ typename FFPACK::RNSInteger<RNS>::ConstElement_ptr Bd, const size_t ldb,
+ const typename FFPACK::RNSInteger<RNS>::Element beta,
+ typename FFPACK::RNSInteger<RNS>::Element_ptr Cd, const size_t ldc,
+ MMHelper<FFPACK::RNSInteger<RNS>, MMHelperAlgo::Classic, ModeCategories::DefaultTag, ParSeqHelper::Parallel<Cut,Param> > & H)
+ {
+ // compute each fgemm componentwise
+ size_t s=F.size();
+ size_t nt=H.parseq.numthreads();
+ size_t loop_nt = std::min(s,nt);
+ size_t iter_nt = nt / loop_nt;
+ size_t leftover_nt = nt % loop_nt;
+ //std::cerr<<"iter_nt = "<<iter_nt<<" loop_nt = "<<loop_nt<<" leftover_nt = "<<leftover_nt<<std::endl;
+ ParSeqHelper::Parallel<Cut,Param> sp(loop_nt);
+ //#endif
+#ifdef FFT_PROFILER
+ Givaro::Timer t;t.start();
+#endif
+ typedef MMHelper<typename RNS::ModField,
+ MMHelperAlgo::Winograd,
+ typename ModeTraits<typename RNS::ModField>::value,
+ ParSeqHelper::Parallel<Cut,Param> > MMH_par_t;
+
+ typedef MMHelper<typename RNS::ModField,MMHelperAlgo::Winograd> MMH_seq_t;
+ FORBLOCK1D(iter,s,SPLITTER(H.parseq.numthreads()),
+ TASK(MODE(CONSTREFERENCE(F,H)),
+ {for(auto i=iter.begin(); i!=iter.end(); ++i)
+// for(int i=0; i<s;++i)
+ {
+ size_t gemm_nt = iter_nt;
+ if (i < leftover_nt)
+ gemm_nt++;
+ if (gemm_nt>1){ // Running a parallel fgemm
+ MMH_par_t H2(F.rns()._field_rns[i], H.recLevel,
+ ParSeqHelper::Parallel<Cut,Param>(gemm_nt));
+// SPLITTER(gemm_nt,Cut,Param));
+ //std::cerr<<"calling fgemm with "<<gemm_nt<<" threads"<<std::endl;
+ FFLAS::fgemm(F.rns()._field_rns[i],ta,tb, m, n, k, alpha._ptr[i*alpha._stride],
+ Ad._ptr+i*Ad._stride, lda, Bd._ptr+i*Bd._stride, ldb,
+ beta._ptr[i*beta._stride], Cd._ptr+i*Cd._stride, ldc, H2);
+ } else { // Running a sequential fgemm
+ MMH_seq_t WH(F.rns()._field_rns[i], H.recLevel, ParSeqHelper::Sequential());
+ FFLAS::fgemm(F.rns()._field_rns[i],ta,tb, m, n, k, alpha._ptr[i*alpha._stride],
+ Ad._ptr+i*Ad._stride, lda, Bd._ptr+i*Bd._stride, ldb,
+ beta._ptr[i*beta._stride], Cd._ptr+i*Cd._stride, ldc, WH);
+ }
+ }
+ }); // TASK
+ ); // FLORBLOCK1D
+
+#ifdef FFT_PROFILER
+ t.stop();
+
+ std::cerr<<"=========================================="<<std::endl
+ <<"Pointwise fgemm : "<<t.realtime()<<" ("<<s<<") moduli "<<std::endl
+ <<"=========================================="<<std::endl;
+#endif
+ return Cd;
+ }
+
+
+ template<class ParSeq>
+ inline Givaro::Integer*
+ fgemm (const Givaro::ZRing<Givaro::Integer>& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const Givaro::Integer alpha,
+ const Givaro::Integer* A, const size_t lda,
+ const Givaro::Integer* B, const size_t ldb,
+ Givaro::Integer beta,
+ Givaro::Integer* C, const size_t ldc,
+ MMHelper<Givaro::ZRing<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeq > & H)
+ {
+#ifdef PROFILE_FGEMM_MP
+ Timer chrono;
+ chrono.start();
+#endif
+ if (alpha == 0){
+ fscalin(F,m,n,beta,C,ldc);
+ return C;
+ }
+
+ if (k==0) return C;
+ // compute bit size of feasible prime for FFLAS
+ size_t _k=k,lk=0;
+ while ( _k ) {_k>>=1; ++lk;}
+ size_t prime_bitsize= (53-lk)>>1;
+
+ // compute bound on the output
+ Givaro::Integer mA,mB,mC;
+ size_t logA,logB;
+ mA=H.normA;
+ mB=H.normB;
+ if (H.normA==0)
+ H.normA = InfNorm ((ta==FflasNoTrans)?m:k,(ta==FflasNoTrans)?k:m,A,lda);
+ logA = H.normA.bitsize();
+ if (H.normB==0)
+ H.normB = InfNorm ((tb==FflasNoTrans)?k:n,(tb==FflasNoTrans)?n:k,B,ldb);
+ logB = H.normA.bitsize();
+
+ mC = 2*uint64_t(k)*H.normA*H.normB*abs(alpha); // need to use 2x bound to reach both positive and negative
+
+ // construct an RNS structure and its associated Domain
+ FFPACK::rns_double RNS(mC, prime_bitsize);
+
+ typedef FFPACK::RNSInteger<FFPACK::rns_double> RnsDomain;
+ RnsDomain Zrns(RNS);
+
+ size_t Acold,Arowd,Bcold,Browd;
+ if (ta == FFLAS::FflasNoTrans){Arowd=m; Acold = k; }
+ else { Arowd=k; Acold = m;}
+ if (tb == FFLAS::FflasNoTrans){Browd=k; Bcold = n; }
+ else { Browd=n; Bcold = k;}
+
+ // allocate data for RNS representation
+ typename RnsDomain::Element_ptr Ap,Bp,Cp;
+ Ap = FFLAS::fflas_new(Zrns,Arowd,Acold);
+ Bp = FFLAS::fflas_new(Zrns,Browd,Bcold);
+ Cp = FFLAS::fflas_new(Zrns,m,n);
+
+#ifdef PROFILE_FGEMM_MP
+ chrono.stop();
+ std::cout<<"-------------------------------"<<std::endl;
+ std::cout<<"FGEMM_MP: nb prime: "<<RNS._size<<std::endl;
+ std::cout<<"FGEMM_MP: init: "<<uint64_t(chrono.realtime()*1000)<<"ms"<<std::endl;
+ chrono.start();
+#endif
+
+ // convert the input matrices to RNS representation
+ finit_rns(Zrns,Arowd,Acold,(logA/16)+((logA%16)?1:0),A,lda,Ap);
+ finit_rns(Zrns,Browd,Bcold,(logB/16)+((logB%16)?1:0),B,ldb,Bp);
+
+#ifdef PROFILE_FGEMM_MP
+ chrono.stop();
+ std::cout<<"FGEMM_MP: to RNS: "<<uint64_t(chrono.realtime()*1000)<<"ms"<<std::endl;
+ chrono.start();
+#endif
+
+ // perform the fgemm in RNS
+ // Classic as no Winograd over ZZ available for the moment
+ MMHelper<RnsDomain, MMHelperAlgo::Classic, ModeCategories::DefaultTag, ParSeq> H2(Zrns,H.recLevel,H.parseq);
+
+ // compute alpha and beta in RNS
+ typename RnsDomain::Element alphap, betap;
+ Zrns.init(alphap, alpha);
+ Zrns.init(betap, F.zero);
+
+ // call fgemm
+ fgemm(Zrns,ta,tb,m,n,k,alphap,Ap,Acold,Bp,Bcold,betap,Cp,n,H2);
+
+#ifdef PROFILE_FGEMM_MP
+ chrono.stop();
+ std::cout<<"FGEMM_MP: RNS Mul: "<<uint64_t(chrono.realtime()*1000)<<"ms"<<std::endl;
+ chrono.start();
+#endif
+
+
+ // convert the RNS output to integer representation (C=beta.C+ RNS^(-1)(Cp) )
+ fconvert_rns(Zrns,m,n,beta,C,ldc,Cp);
+
+ FFLAS::fflas_delete(Ap);
+ FFLAS::fflas_delete(Bp);
+ FFLAS::fflas_delete(Cp);
+#ifdef PROFILE_FGEMM_MP
+ chrono.stop();
+ std::cout<<"FGEMM_MP: from RNS: "<<uint64_t(chrono.realtime()*1000)<<"ms"<<std::endl;
+ std::cout<<"-------------------------------"<<std::endl;
+#endif
+
+ return C;
+ }
+
+
+
+// Simple switch Winograd -> Classic (waiting for Winograd's algorithm to be generic wrt ModeTrait)
+ template<typename RNS, class ModeT>
+ inline typename RNS::Element_ptr fgemm (const FFPACK::RNSInteger<RNS> &F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const typename RNS::Element alpha,
+ typename RNS::ConstElement_ptr Ad, const size_t lda,
+ typename RNS::ConstElement_ptr Bd, const size_t ldb,
+ const typename RNS::Element beta,
+ typename RNS::Element_ptr Cd, const size_t ldc,
+ MMHelper<FFPACK::RNSInteger<RNS>, MMHelperAlgo::Winograd, ModeT, ParSeqHelper::Sequential> & H)
+ {
+ MMHelper<FFPACK::RNSInteger<RNS>, MMHelperAlgo::Classic, ModeT, ParSeqHelper::Sequential> H2(F, H.recLevel,H.parseq);
+ return fgemm(F,ta,tb,m,n,k,alpha,Ad,lda,Bd,ldb,beta,Cd,ldc,H2);
+ }
+
+ // template<class ParSeq>
+ // inline Givaro::Integer*
+ // fgemm (const Givaro::ZRing<Givaro::Integer>& F,
+ // const FFLAS_TRANSPOSE ta,
+ // const FFLAS_TRANSPOSE tb,
+ // const size_t m, const size_t n,const size_t k,
+ // const Givaro::Integer alpha,
+ // const Givaro::Integer* A, const size_t lda,
+ // const Givaro::Integer* B, const size_t ldb,
+ // Givaro::Integer beta,
+ // Givaro::Integer* C, const size_t ldc,
+ // MMHelper<Givaro::ZRing<Givaro::Integer>, MMHelperAlgo::Winograd, ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeq > & H)
+ // {
+ // MMHelper<Givaro::ZRing<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeq> H2(F, H.recLevel,H.parseq);
+ // return fgemm(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,H2);
+
+ // }
+ /************************************
+ *** MULTIPRECISION FGEMM OVER Fp ***
+ ************************************/
+
+ // fgemm for RNSIntegerMod with Winograd Helper
+ template<typename RNS>
+ inline typename RNS::Element_ptr fgemm (const FFPACK::RNSIntegerMod<RNS> &F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const typename RNS::Element alpha,
+ typename RNS::ConstElement_ptr Ad, const size_t lda,
+ typename RNS::ConstElement_ptr Bd, const size_t ldb,
+ const typename RNS::Element beta,
+ typename RNS::Element_ptr Cd, const size_t ldc,
+ MMHelper<FFPACK::RNSIntegerMod<RNS>, MMHelperAlgo::Winograd> & H)
+ {
+ // compute the product over Z
+ typedef FFPACK::RNSInteger<RNS> RnsDomain;
+ RnsDomain Zrns(F.rns());
+ MMHelper<RnsDomain, MMHelperAlgo::Classic> H2(Zrns, H.recLevel,H.parseq);
+#ifdef BENCH_PERF_FGEMM_MP
+ FFLAS::Timer chrono;chrono.start();
+#endif
+ fgemm(Zrns,ta,tb,m,n,k,alpha,Ad,lda,Bd,ldb,beta,Cd,ldc,H2);
+ // reduce the product mod p (note that entries are larger than p, due to RNS modulo reduction)
+ freduce (F, m, n, Cd, ldc);
+#ifdef BENCH_PERF_FGEMM_MP
+ chrono.stop();
+ F.t_igemm+=chrono.realtime();
+#endif
+
+ return Cd;
+ }
+
+
+ // fgemm for IntegerDomain with Winograd Helper
+ inline Givaro::Integer* fgemm (const Givaro::Modular<Givaro::Integer>& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const Givaro::Integer alpha,
+ const Givaro::Integer *A, const size_t lda,
+ const Givaro::Integer *B, const size_t ldb,
+ const Givaro::Integer beta,
+ Givaro::Integer* C, const size_t ldc,
+ MMHelper<Givaro::Modular<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag> > & H)
+ {
+ // compute the product over Z
+ // std::cerr<<"Entering fgemm<Modular<Integer>>"<<std::endl;
+ typedef Givaro::ZRing<Givaro::Integer> IntegerDomain;
+ Givaro::Integer p;
+ F.cardinality(p);
+ IntegerDomain Z;
+ MMHelper<IntegerDomain,MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag> > H2(Z,H.recLevel,H.parseq);
+ H2.setNorm(p);
+
+ fgemm(Z,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,H2);
+
+ // reduce the product mod p
+ freduce (F, m, n, C, ldc);
+
+ return C;
+ }
+ template<class ParSeq>
+ inline Givaro::Integer* fgemm (const Givaro::Modular<Givaro::Integer>& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n,const size_t k,
+ const Givaro::Integer alpha,
+ const Givaro::Integer *A, const size_t lda,
+ const Givaro::Integer *B, const size_t ldb,
+ const Givaro::Integer beta,
+ Givaro::Integer* C, const size_t ldc,
+ MMHelper<Givaro::Modular<Givaro::Integer>, MMHelperAlgo::Auto, ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeq > & H)
+ {
+ // compute the product over Z
+ // std::cerr<<"Entering fgemm<Modular<Integer>>"<<std::endl;
+ typedef Givaro::ZRing<Givaro::Integer> IntegerDomain;
+ Givaro::Integer p;
+ F.cardinality(p);
+ IntegerDomain Z;
+ MMHelper<IntegerDomain,MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeq > H2(Z,H.recLevel,H.parseq);
+ H2.setNorm(p);
+
+ fgemm(Z,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,H2);
+
+ // reduce the product mod p
+ freduce (F, m, n, C, ldc);
+
+ return C;
+ }
+
+
+ // // PARALLEL VERSION (NOT PARALLEL YET)
+ // template<class Cut, class Param>
+ // inline Givaro::Integer* fgemm (const Givaro::ZRing<Givaro::Integer>& F,
+ // const FFLAS_TRANSPOSE ta,
+ // const FFLAS_TRANSPOSE tb,
+ // const size_t m, const size_t n,const size_t k,
+ // const Givaro::Integer alpha,
+ // const Givaro::Integer* A, const size_t lda,
+ // const Givaro::Integer* B, const size_t ldb,
+ // Givaro::Integer beta,
+ // Givaro::Integer* C, const size_t ldc,
+ // MMHelper<Givaro::ZRing<Givaro::Integer>,MMHelperAlgo::Winograd,FieldCategories::UnparametricTag,ParSeqHelper::Parallel<Cut,Param> > & H){
+ // MMHelper<Givaro::ZRing<Givaro::Integer>,MMHelperAlgo::Winograd> H2(F,H.recLevel);
+ // return fgemm(F,ta,tb,m,n,k,alpha,A,lda,B,lda,beta,C,ldc,H2);
+ // }
+
+}// END of namespace FFLAS
+
+#endif
+
diff --git a/fflas-ffpack/fflas/fflas_fgemm/fgemm_winograd.inl b/fflas-ffpack/fflas/fflas_fgemm/fgemm_winograd.inl
new file mode 100644
index 0000000..47bc54e
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/fgemm_winograd.inl
@@ -0,0 +1,546 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas_fgemm/fgemm_winograd.h
+ * @brief Strassen--Winograd matrix multiplication.
+ * @warning The domain is supposed to be a field since some divisions are required for efficiency purposes
+ * An alternative has to be written for finite rings if necessary
+ */
+
+#ifndef __FFLASFFPACK_fflas_fflas_fgemm_winograd_INL
+#define __FFLASFFPACK_fflas_fflas_fgemm_winograd_INL
+
+#include <stdint.h>
+#include <givaro/modular.h>
+#include <givaro/zring.h>
+
+#include "fgemm_classical.inl"
+#include "schedule_winograd.inl"
+#include "schedule_winograd_acc.inl"
+#include "schedule_winograd_acc_ip.inl"
+#include "schedule_winograd_ip.inl"
+// #include "fflas_fgemm/bini.inl"
+
+
+#ifndef NEWWINO
+#define NEWWINO
+#endif
+
+//#define OLDWINO
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+
+
+// DynamicPeeling, WinogradCalc
+namespace FFLAS { namespace Protected {
+
+ /** \brief Computes the number of recursive levels to perform.
+ *
+ * \param m the common dimension in the product AxB
+ */
+ template<class Field>
+ inline int WinogradThreshold(const Field& F) {return __FFLASFFPACK_WINOTHRESHOLD;}
+ template<>
+ inline int WinogradThreshold (const Givaro::Modular<float>& F) {return __FFLASFFPACK_WINOTHRESHOLD_FLT;}
+ template<>
+ inline int WinogradThreshold (const Givaro::ModularBalanced<double> & F) {return __FFLASFFPACK_WINOTHRESHOLD_BAL;}
+ template<>
+ inline int WinogradThreshold (const Givaro::ModularBalanced<float> & F) {return __FFLASFFPACK_WINOTHRESHOLD_BAL_FLT;}
+
+ template<class Field>
+ inline int WinogradSteps (const Field & F, const size_t & m)
+ {
+ int w = 0;
+ size_t th = WinogradThreshold<Field>(F);
+ size_t mt = m;
+ while ( mt >= th) {
+ ++w;
+ mt >>= 1;
+ }
+ return w;
+ }
+
+ template < class Field, class FieldMode >
+ inline void
+ DynamicPeeling (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n, const size_t k,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldMode> & H,
+ const typename MMHelper<Field, MMHelperAlgo::Winograd, FieldMode>::DelayedField::Element Cmin,
+ const typename MMHelper<Field, MMHelperAlgo::Winograd, FieldMode>::DelayedField::Element Cmax)
+ {
+ typename Field::ConstElement_ptr a12, a21, b12, b21;
+ size_t inca12, inca21, incb12, incb21, ma, na, mb, nb;
+ size_t mkn = nr + (kr << 1)+ (mr << 2);
+
+ if (ta == FflasTrans) {
+ ma = k; na = m;
+ a12 = A+(k-1)*lda; inca12 = 1;
+ a21 = A+m-1; inca21 = lda;
+ }
+ else {
+ ma = m; na = k;
+ a12 = A+k-1; inca12 = lda;
+ a21 = A+(m-1)*lda; inca21 = 1;
+ }
+ if (tb == FflasTrans) {
+ mb = n; nb = k;
+ b12 = B+(n-1)*ldb; incb12 = 1;
+ b21 = B+k-1; incb21 = ldb;
+ }
+ else {
+ mb = k; nb = n;
+ b12 = B+n-1; incb12 = ldb;
+ b21 = B+(k-1)*ldb; incb21 = 1;
+ }
+ MMHelper<Field, MMHelperAlgo::Classic, FieldMode> Hacc(H);
+ MMHelper<Field, MMHelperAlgo::Classic, FieldMode> HModd(H);
+ MMHelper<Field, MMHelperAlgo::Classic, FieldMode> HNodd(H);
+
+ Hacc.Cmin = H.Outmin; Hacc.Cmax = H.Outmax;
+ HModd.Cmin = Cmin; HModd.Cmax = Cmax;
+ HModd.Amax = H.Bmax; HModd.Amin = H.Bmin;
+ HModd.Bmax = H.Amax; HModd.Bmin = H.Amin;
+ HNodd.Cmin = Cmin; HNodd.Cmax = Cmax;
+ switch (mkn) {
+ case 1: // n oddsized
+ fgemv (F, ta, ma, na, alpha, A, lda, b12, incb12, beta, C+n-1,ldc, HNodd);
+ break;
+
+ case 2: // k oddsized
+ fger (F, m, n, alpha, a12, inca12, b21, incb21, C, ldc, Hacc);
+ break;
+
+ case 3: // n, k oddsized
+ fgemv (F, ta, ma, na, alpha, A, lda, b12, incb12, beta, C+n-1,ldc, HNodd);
+ fger (F, m, n-1, alpha, a12, inca12, b21, incb21, C, ldc, Hacc);
+ break;
+
+ case 4: // m oddsized
+ fgemv(F, (tb == FflasTrans)?FflasNoTrans:FflasTrans, mb, nb,
+ alpha, B, ldb, a21, inca21, beta, C+(m-1)*ldc, 1, HModd);
+ break;
+
+ case 5: // m, n oddsized
+ if (tb == FflasTrans) mb--;
+ else nb--;
+ fgemv (F, ta, ma, na, alpha, A, lda, b12, incb12, beta, C+n-1, ldc, HNodd);
+ fgemv (F, (tb==FflasTrans)?FflasNoTrans:FflasTrans, mb, nb,
+ alpha, B, ldb, a21, inca21, beta, C+(m-1)*ldc, 1, HModd);
+ break;
+
+ case 6: // m, k oddsized
+ fger (F, m-1, n, alpha, a12, inca12, b21, incb21, C, ldc, Hacc);
+ fgemv(F, (tb==FflasTrans)?FflasNoTrans:FflasTrans, mb, nb,
+ alpha, B, ldb, a21, inca21, beta, C+(m-1)*ldc, 1, HModd);
+ break;
+
+ case 7: // m, k, n oddsized
+ if (tb == FflasTrans) mb--;
+ else nb--;
+ H.checkA(F,ta, m,k,A,lda);
+ H.checkB(F,tb, k,n,B,ldb);
+ // Block NW
+ fger (F, m-1, n-1, alpha, a12, inca12, b21, incb21, C, ldc, Hacc);
+ // Block SW
+ fgemv (F, (tb==FflasTrans)?FflasNoTrans:FflasTrans, mb, nb,
+ alpha, B, ldb, a21, inca21, beta, C+(m-1)*ldc, 1, HModd);
+ HModd.checkOut(F, m-1,n-1, C, ldc);
+
+ // Block E
+ fgemv (F, ta, ma, na, alpha, A, lda, b12, incb12, beta, C+n-1, ldc, HNodd);
+ break;
+ }
+ H.Outmin = min4(HModd.Outmin,HNodd.Outmin, Hacc.Outmin, H.Outmin);
+ H.Outmax = max4(HModd.Outmax,HNodd.Outmax, Hacc.Outmax, H.Outmax);
+ H.checkOut(F, m,n, C, ldc);
+ }
+
+ template < class Field, class FieldMode >
+ inline void
+ DynamicPeeling2 (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n, const size_t k,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldMode> & H,
+ const typename MMHelper<Field, MMHelperAlgo::Winograd, FieldMode>::DelayedField::Element Cmin,
+ const typename MMHelper<Field, MMHelperAlgo::Winograd, FieldMode>::DelayedField::Element Cmax)
+ {
+ size_t mkn =(size_t)( (bool)(nr > 0)+ ((bool)(kr > 0) << 1)+ ((bool)(mr > 0) << 2));
+ if (mkn == 0) return;
+
+ typename Field::ConstElement_ptr a12, a21, b12, b21;
+ if (ta == FflasTrans) {
+ a12 = A+(k-kr)*lda;
+ a21 = A+(m-mr);
+ }
+ else {
+ a12 = A+(k-kr);
+ a21 = A+(m-mr)*lda;
+ }
+ if (tb == FflasTrans) {
+ b12 = B+(n-nr)*ldb;
+ b21 = B+(k-kr);
+ }
+ else {
+ b12 = B+(n-nr);
+ b21 = B+(k-kr)*ldb;
+ }
+
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldMode> Hacc(H);
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldMode> HModd(H);
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldMode> HNodd(H);
+
+ Hacc.Cmin = H.Outmin; Hacc.Cmax = H.Outmax;
+ Hacc.recLevel=-1;HModd.recLevel=-1;HNodd.recLevel=-1;
+ HModd.Cmin = Cmin; HModd.Cmax = Cmax;
+ HModd.Amax = H.Bmax; HModd.Amin = H.Bmin;
+ HModd.Bmax = H.Amax; HModd.Bmin = H.Amin;
+ HNodd.Cmin = Cmin; HNodd.Cmax = Cmax;
+
+ switch (mkn) {
+ case 1: // n oddsized
+ fgemm (F, ta, tb, m, nr, k, alpha, A, lda, b12, ldb, beta, C+(n-nr), ldc, HNodd);
+ break;
+
+ case 2: // k oddsized
+ fgemm (F, ta, tb, m, n, kr, alpha, a12, lda, b21, ldb, F.one, C, ldc, Hacc);
+ break;
+
+ case 3: // n, k oddsized
+ fgemm (F, ta, tb, m, nr, k, alpha, A, lda, b12, ldb, beta, C+(n-nr), ldc, HNodd);
+ fgemm (F, ta, tb, m, n-nr, kr, alpha, a12, lda, b21, ldb, F.one, C, ldc, Hacc);
+ break;
+
+ case 4: // m oddsized
+ fgemm (F, ta, tb, mr, n, k, alpha, a21, lda, B, ldb, beta, C+(m-mr)*ldc, ldc, HModd);
+ break;
+
+ case 5: // m, n oddsized
+ fgemm (F, ta, tb, m, nr, k, alpha, A, lda, b12, ldb, beta, C+(n-nr), ldc, HNodd);
+ fgemm (F, ta, tb, mr, n-nr, k, alpha, a21, lda, B, ldb, beta, C+(m-mr)*ldc, ldc, HModd);
+ break;
+
+ case 6: // m, k oddsized
+ fgemm (F, ta, tb, m-mr, n, kr, alpha, a12, lda, b21, ldb, F.one, C, ldc, Hacc);
+ fgemm (F, ta, tb, mr, n, k, alpha, a21, lda, B, ldb, beta, C+(m-mr)*ldc, ldc, HModd);
+ break;
+
+ case 7: // m, k, n oddsized
+ // Block NW
+ fgemm (F, ta, tb, m-mr, n-nr, kr, alpha, a12, lda, b21, ldb, F.one, C, ldc, Hacc);
+ // Block SW
+ fgemm (F, ta, tb, mr, n-nr, k, alpha, a21, lda, B, ldb, beta, C+(m-mr)*ldc, ldc, HModd);
+ // Block NE
+ fgemm (F, ta, tb, m, nr, k, alpha, A, lda, b12, ldb, beta, C+(n-nr), ldc, HNodd);
+ break;
+ }
+ H.Outmin = min4(HModd.Outmin,HNodd.Outmin, Hacc.Outmin, H.Outmin);
+ H.Outmax = max4(HModd.Outmax,HNodd.Outmax, Hacc.Outmax, H.Outmax);
+ H.checkOut(F, m,n, C, ldc);
+ }
+
+ // #define NEWIP
+ // #define NEWACCIP
+
+ // Switch between the scheduling for Strassen-Winograd Multiplication
+ template < class Field, class FieldMode >
+ inline void WinogradCalc (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A,const size_t lda,
+ typename Field::ConstElement_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldMode> & H)
+ {
+#if defined(NEWIP) or defined(NEWACCIP) /* XXX TESTS ONLY */
+ typedef typename Field::Element Element ;
+ Element_ptr Ac;
+ Element_ptr Bc;
+ if (ta == FflasNoTrans) {
+ Ac = fflas_new (F, mr*2, lda);
+ fassign(F,mr*2,kr*2,A,lda,Ac,lda);
+ }
+ else {
+ Ac = fflas_new (F, kr*2, lda);
+ fassign(F,kr*2,mr*2,A,lda,Ac,lda);
+ }
+ if (tb == FflasNoTrans) {
+ Bc = fflas_new (F, kr*2, ldb);
+ fassign(F,kr*2,nr*2,B,ldb,Bc,ldb);
+ }
+ else {
+ Bc = fflas_new (F, nr*2, ldb);
+ fassign(F,nr*2,kr*2,B,ldb,Bc,ldb);
+ }
+#endif
+
+ if (F.isZero(beta)) {
+#ifdef NEWIP /* NOT IP --- TESTS ONLY */
+ // (kr == nr && kr <= mr /* if not transposed */)
+ // we copy because they erase stuff
+ // bool normal = (ta == FflasNoTrans && tb == FflasNoTrans) ;
+ bool normal = true;
+
+ if (kr == nr && kr == mr && normal) {
+ // BLAS3::Winograd_L_S(F,ta,tb,mr,nr,kr,alpha,Ac,lda,Bc,ldb,beta,C,ldc,H);
+ // BLAS3::Winograd_R_S(F,ta,tb,mr,nr,kr,alpha,Ac,lda,Bc,ldb,beta,C,ldc,H);
+ BLAS3::Winograd_LR_S(F,ta,tb,mr,nr,kr,alpha,Ac,lda,Bc,ldb,beta,C,ldc,H);
+ }
+ else
+#endif
+ {
+ BLAS3::Winograd(F,ta,tb,mr,nr,kr,alpha,A,lda,B,ldb,beta,C,ldc,H);
+ }
+
+ }
+ else {
+#ifdef NEWACCIP /* test only */
+ if (kr == nr && kr == mr ) {
+ BLAS3::WinogradAcc_L_S(F,ta,tb,mr,nr,kr,alpha,Ac,lda,Bc,ldb,beta,C,ldc,H);
+ // BLAS3::WinogradAcc_R_S(F,ta,tb,mr,nr,kr,alpha,Ac,lda,Bc,ldb,beta,C,ldc,H);
+ }
+ else {
+ BLAS3::WinogradAcc_LR(F,ta,tb,mr,nr,kr,alpha,Ac,lda,Bc,ldb,beta,C,ldc,H);
+ }
+
+
+#elif defined(NEWWINO)
+ BLAS3::WinogradAcc_3_21(F,ta,tb,mr,nr,kr,alpha,A,lda,B,ldb,beta,C,ldc,H);
+#elif defined(OLDWINO)
+ BLAS3::WinogradAcc_3_23(F,ta,tb,mr,nr,kr,alpha,A,lda,B,ldb,beta,C,ldc,H);
+#elif defined(NEWACC)
+ // BLAS3::WinogradAcc_2_24(F,ta,tb,mr,nr,kr,alpha,A,lda,B,ldb,beta,C,ldc,H);
+ BLAS3::WinogradAcc_2_27(F,ta,tb,mr,nr,kr,alpha,A,lda,B,ldb,beta,C,ldc,H);
+#else
+#error "you need to make a choice for a BLAS3 mat mul schedule"
+#endif
+
+ }
+#if defined(NEWIP) or defined(NEWACCIP) /* NOT IP --- TESTS ONLY */
+ fflas_delete (Ac);
+ fflas_delete (Bc);
+#endif
+
+ } // WinogradCalc
+
+
+
+
+//#define OLD_DYNAMIC_PEELING
+
+}// namespace Protected
+} // FFLAS
+
+
+namespace FFLAS{
+ template<class Field, class ModeT>
+ inline typename Field::Element_ptr
+ fgemm (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n, const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, ModeT> & H)
+ {
+ if (!m || !n ) return C;
+ if (!k){
+ //TODO: update helper
+ fscalin(F,m,n,beta,C,ldc);
+ return C;
+ }
+ if (H.recLevel < 0) {
+ H.recLevel = Protected::WinogradSteps (F, min3(m,k,n));
+ }
+
+ if (H.recLevel == 0){
+ MMHelper<Field, MMHelperAlgo::Classic, ModeT> HC(H);
+ fgemm (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, HC);
+
+ H.Outmax = HC.Outmax;
+ H.Outmin = HC.Outmin;
+ return C;
+ }
+
+ // Then w >0
+ typedef typename MMHelper<Field, MMHelperAlgo::Winograd, ModeT>::DelayedField::Element DFElt;
+ DFElt Cmin = H.Cmin;
+ DFElt Cmax = H.Cmax;
+
+#ifdef OLD_DYNAMIC_PEELING
+
+ Protected::WinogradCalc (F, ta, tb, m/2, n/2, k/2, alpha, A, lda, B, ldb, beta, C, ldc, H);
+
+ FFLASFFPACK_check(m-(m/2)*2 == (m&0x1));
+ FFLASFFPACK_check(n-(n/2)*2 == (n&0x1));
+ FFLASFFPACK_check(k-(k/2)*2 == (k&0x1));
+
+ Protected::DynamicPeeling (F, ta, tb, m, n, k, m&0x1, n&0x1, k&0x1, alpha, A, lda, B, ldb, beta, C, ldc, H, Cmin, Cmax);
+#else
+ size_t ww = (size_t)H.recLevel ;
+ size_t m2 = (m >> ww) << (ww-1) ;
+ size_t n2 = (n >> ww) << (ww-1) ;
+ size_t k2 = (k >> ww) << (ww-1) ;
+
+ Protected::WinogradCalc (F, ta, tb, m2, n2, k2, alpha, A, lda, B, ldb, beta, C, ldc, H);
+
+ size_t mr = m -2*m2;
+ size_t nr = n -2*n2;
+ size_t kr = k -2*k2;
+
+ FFLASFFPACK_check(m == m2*2+mr);
+ FFLASFFPACK_check(n == n2*2+nr);
+ FFLASFFPACK_check(k == k2*2+kr);
+
+ Protected::DynamicPeeling2 (F, ta, tb, m, n, k, mr, nr, kr, alpha, A, lda, B, ldb, beta, C, ldc, H, Cmin, Cmax);
+#endif
+ return C;
+ } // fgemm
+
+
+ template<class Field, class ModeT, class Cut, class Param>
+ inline typename Field::Element_ptr
+ fgemm (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n, const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::WinogradPar, ModeT, ParSeqHelper::Parallel<Cut,Param> > & H)
+ {
+ if (!m || !n ) return C;
+
+ if (!k){
+ //TODO: update helper
+ fscalin(F,m,n,beta,C,ldc);
+ return C;
+ }
+ if (H.recLevel < 0) {
+ H.recLevel = Protected::WinogradSteps (F, min3(m,k,n));
+ }
+
+ if (H.recLevel == 0){
+
+#ifdef WINO_SEQ
+ MMHelper<Field,MMHelperAlgo::Winograd>
+ HC (F, -1,ParSeqHelper::Sequential());
+#elif defined CLASSIC_SEQ
+ MMHelper<Field,MMHelperAlgo::Winograd>
+ HC (F, 0,ParSeqHelper::Sequential());
+#elif defined CLASSIC_Hybrid
+
+ typedef StrategyParameter::TwoDAdaptive twoda;
+ typedef CuttingStrategy::Recursive rec;
+
+ MMHelper<Field,MMHelperAlgo::Winograd,
+ typename FFLAS::ModeTraits<Field>::value,
+ FFLAS::ParSeqHelper::Parallel<rec, twoda> >
+ HC (F, -1, SPLITTER(H.parseq.numthreads(), rec, twoda));
+
+#elif defined PFGEMM_WINO_SEQ
+ MMHelper<Field,MMHelperAlgo::Winograd,
+ typename FFLAS::ModeTraits<Field>::value,
+ FFLAS::ParSeqHelper::Parallel>
+ HC (F, -1, ParSeqHelper::Parallel(PFGEMM_WINO_SEQ, RECURSIVE, TWO_D_ADAPT));
+#else
+ MMHelper<Field,MMHelperAlgo::Winograd,
+ typename FFLAS::ModeTraits<Field>::value,
+ FFLAS::ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoDAdaptive> >
+ HC (F, 0, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoDAdaptive>(NUM_THREADS));
+#endif
+ // MMHelper<Field, MMHelperAlgo::Classic, ModeTraits> HC(H);
+
+ fgemm (F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, HC);
+ H.Outmax = HC.Outmax;
+ H.Outmin = HC.Outmin;
+ return C;
+ }
+
+///
+
+ // Then w >0
+ typedef typename MMHelper<Field, MMHelperAlgo::Winograd, ModeT>::DelayedField::Element DFElt;
+ DFElt Cmin = H.Cmin;
+ DFElt Cmax = H.Cmax;
+#ifdef OLD_DYNAMIC_PEELING
+
+ BLAS3::WinoPar (F, ta, tb, m/2, n/2, k/2, alpha, A, lda, B, ldb, beta, C, ldc, H);
+
+ FFLASFFPACK_check(m-(m/2)*2 == (m&0x1));
+ FFLASFFPACK_check(n-(n/2)*2 == (n&0x1));
+ FFLASFFPACK_check(k-(k/2)*2 == (k&0x1));
+ MMHelper<Field, MMHelperAlgo::Winograd, ModeT> HC(H);
+ Protected::DynamicPeeling (F, ta, tb, m, n, k, m&0x1, n&0x1, k&0x1, alpha, A, lda, B, ldb, beta, C, ldc, HC, Cmin, Cmax);
+#else
+ size_t ww = (size_t)H.recLevel ;
+ size_t m2 = (m >> ww) << (ww-1) ;
+ size_t n2 = (n >> ww) << (ww-1) ;
+ size_t k2 = (k >> ww) << (ww-1) ;
+
+ BLAS3::WinoPar (F, ta, tb, m2, n2, k2, alpha, A, lda, B, ldb, beta, C, ldc, H);
+
+ size_t mr = m -2*m2;
+ size_t nr = n -2*n2;
+ size_t kr = k -2*k2;
+
+ FFLASFFPACK_check(m == m2*2+mr);
+ FFLASFFPACK_check(n == n2*2+nr);
+ FFLASFFPACK_check(k == k2*2+kr);
+ MMHelper<Field, MMHelperAlgo::Winograd, ModeT> HC(H);
+ Protected::DynamicPeeling2 (F, ta, tb, m, n, k, mr, nr, kr, alpha, A, lda, B, ldb, beta, C, ldc, HC, Cmin, Cmax);
+#endif
+ return C;
+ } // fgemm
+
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_fflas_fgemm_winograd_INL
diff --git a/fflas-ffpack/fflas/fflas_fgemm/matmul.doxy b/fflas-ffpack/fflas/fflas_fgemm/matmul.doxy
new file mode 100644
index 0000000..15b2aee
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/matmul.doxy
@@ -0,0 +1,34 @@
+// Copyright (c) 2014 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+
+
+/** \ingroup fflas-ffpack
+ * \defgroup MMalgos Matrix Multiplication Algorithms
+ *
+ * \brief Matrix Multiplication (level 3) algorithms
+ *
+ * @todo biblio
+ *
+ */
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/fflas/fflas_fgemm/schedule_bini.inl b/fflas-ffpack/fflas/fflas_fgemm/schedule_bini.inl
new file mode 100644
index 0000000..fd50ce2
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/schedule_bini.inl
@@ -0,0 +1,110 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 the LinBox group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fgemm/schedule_bini.inl
+ * @ingroup MMalgos
+ * @brief Bini implementation
+ */
+
+#ifndef __FFLASFFPACK_fgemm_bini_INL
+#define __FFLASFFPACK_fgemm_bini_INL
+
+namespace FFLAS { namespace BLAS3 {
+
+ template < class Field >
+ inline void Bini (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A,const size_t lda,
+ const typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const size_t kmax, const size_t w, const FFLAS_BASE base,
+ const size_t rec_level)
+ {
+
+ FFLASFFPACK_check(F.isZero(beta));
+ FFLASFFPACK_check(rec_level>0);
+
+ size_t imaxb, jmaxb, imaxa, jmaxa, ldx2;
+ // size_t x3rd = std::max(mr,kr);
+ const typename Field::Element_ptr d11,d12,d21,d22;
+ typename Field::Element_ptr d11c,d12c,d21c,d22c,dx1,dx2;
+ const typename Field::Element_ptr A11=A, A12, A21, A22;
+ const typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C+nr+mr*ldc;
+
+
+ size_t x1rd = std::max(nr,kr);
+ size_t ldx1;
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ imaxa = kr;
+ jmaxa = mr;
+ ldx1 = mr;
+ }
+ else {
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ imaxa = mr;
+ jmaxa = kr;
+ ldx1 = x1rd;
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ imaxb = nr;
+ jmaxb = kr;
+ ldx2 = kr;
+ }
+ else {
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ imaxb = kr;
+ ldx2 = jmaxb = nr;
+ }
+
+
+
+ } // Bini
+
+} // BLAS3
+
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fgemm_bini_INL
+
diff --git a/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd.inl b/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd.inl
new file mode 100644
index 0000000..6781acc
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd.inl
@@ -0,0 +1,549 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 the LinBox group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * Ziad Sultan <ziad.sultan at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fgemm/winograd.inl
+ * @ingroup MMalgos
+ * @brief Winograd implementation
+ * @bib ISSAC09 Scheduling
+ */
+
+#ifndef __FFLASFFPACK_fgemm_winograd_INL
+#define __FFLASFFPACK_fgemm_winograd_INL
+
+namespace FFLAS { namespace BLAS3 {
+
+ template < class Field, class FieldTrait, class Strat, class Param >
+ inline typename Field::Element_ptr
+ WinoPar (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A,const size_t lda,
+ typename Field::ConstElement_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ // const size_t kmax, const size_t w, const FFLAS_BASE base
+ MMHelper<Field, MMHelperAlgo::WinogradPar, FieldTrait, ParSeqHelper::Parallel<Strat,Param> > & WH
+ )
+ {
+ FFLASFFPACK_check(F.isZero(beta));
+
+ // typedef MMHelper<Field, MMHelperAlgo::WinogradPar, FieldTrait > MMH_t;
+ typedef MMHelper<Field, MMHelperAlgo::WinogradPar, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoDAdaptive> > MMH_t;
+ const typename MMH_t::DelayedField & DF = WH.delayedField;
+ typedef typename MMH_t::DelayedField::Element DFElt;
+
+ size_t lb, cb, la, ca, ldX2;
+ // size_t x3rd = std::max(mr,kr);
+ typename Field::ConstElement_ptr A11=A, A12, A21, A22;
+ typename Field::ConstElement_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+ size_t x1rd = std::max(nr,kr);
+ size_t ldX1;
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr;
+ ca = mr;
+ ldX1 = mr;
+ } else {
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr;
+ ca = kr;
+ ldX1 = x1rd;
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr;
+ cb = kr;
+ ldX2 = kr;
+ } else {
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr;
+ ldX2 = cb = nr;
+ }
+
+ // 11 temporary submatrices are required
+ typename Field::Element_ptr X21 = fflas_new (F, kr, nr);
+ typename Field::Element_ptr X11 = fflas_new (F,mr,x1rd);
+
+ typename Field::Element_ptr X22 = fflas_new (F, kr, nr);
+ typename Field::Element_ptr X12 = fflas_new (F,mr,x1rd);
+
+ typename Field::Element_ptr X23 = fflas_new (F, kr, nr);
+ typename Field::Element_ptr X13 = fflas_new (F,mr,x1rd);
+
+ typename Field::Element_ptr X24 = fflas_new (F, kr, nr);
+ typename Field::Element_ptr X14 = fflas_new (F,mr,x1rd);
+ typename Field::Element_ptr X15 = fflas_new (F,mr,x1rd);
+
+ typename Field::Element_ptr C_11 = fflas_new (F,mr,nr);
+ typename Field::Element_ptr CC_11 = fflas_new (F,mr,nr);
+ SYNCH_GROUP(
+
+ // T3 = B22 - B12 in X21 and S3 = A11 - A21 in X11
+ TASK(MODE(READ(B22, B12) WRITE(X21) CONSTREFERENCE(DF)),
+ pfsub(DF,lb,cb,B22,ldb,B12,ldb,X21,ldX2, NUM_THREADS););
+ TASK(MODE(READ(A11, A21) WRITE(X11) CONSTREFERENCE(DF)),
+ pfsub(DF,la,ca,A11,lda,A21,lda,X11,ldX1, NUM_THREADS););
+
+ // T1 = B12 - B11 in X22 and S1 = A21 + A22 in X12
+ TASK(MODE(READ(B11, B12) WRITE(X22) CONSTREFERENCE(DF)),
+ pfsub(DF,lb,cb,B12,ldb,B11,ldb,X22,ldX2, NUM_THREADS););
+ TASK(MODE(READ(A12, A22) WRITE(X12) CONSTREFERENCE(DF)),
+ pfadd(DF,la,ca,A21,lda,A22,lda,X12,ldX1, NUM_THREADS););
+
+ CHECK_DEPENDENCIES;
+
+ // T2 = B22 - T1 in X23 and S2 = S1 - A11 in X13
+ TASK(MODE(READ(B22, X22) READWRITE(X23) CONSTREFERENCE(DF)),
+ pfsub(DF,lb,cb,B22,ldb,X22,ldX2,X23,ldX2, NUM_THREADS););
+ TASK(MODE(READ(A11, X12) READWRITE(X13) CONSTREFERENCE(DF)),
+ // fsub(DF,la,ca,A11,lda,X12,ldX1,X13,ldX1););
+ pfsub(DF,la,ca,X12,ldX1,A11,lda,X13,ldX1, NUM_THREADS););
+ /*
+ fsub(DF,lb,cb,B22,ldb,X2,ldX2,X2,ldX2);
+ fsubin(DF,la,ca,A11,lda,X1,ldX1););
+ */
+ CHECK_DEPENDENCIES;
+
+ // T4 = T2 - B21 in X2 and S4 = A12 -S2 in X1
+ TASK(MODE(READ(B21, X23) READWRITE(X24) CONSTREFERENCE(DF)),
+ // fsub(DF,lb,cb,B21,ldb,X23,ldX2,X24,ldX2);
+ pfsub(DF,lb,cb,X23,ldX2,B21,ldb,X24,ldX2, NUM_THREADS););
+ TASK(MODE(READ(A12, X13) READWRITE(X14) CONSTREFERENCE(DF)),
+ pfsub(DF,la,ca,A12,lda,X13,ldX1,X14,ldX1, NUM_THREADS););
+
+ /*
+ fsubin(DF,lb,cb,B21,ldb,X2,ldX2);
+ fsub(DF,la,ca,A12,lda,X1,ldX1,X1,ldX1););
+ */
+ CHECK_DEPENDENCIES;
+
+ // P1 = alpha . A11 * B11 in X1
+
+ MMH_t H1(F, WH.recLevel-1, WH.Amin, WH.Amax, WH.Bmin, WH.Bmax, 0, 0);
+ MMH_t H7(F, WH.recLevel-1, -(WH.Amax-WH.Amin), WH.Amax-WH.Amin, -(WH.Bmax-WH.Bmin), WH.Bmax-WH.Bmin, 0,0);
+ MMH_t H5(F, WH.recLevel-1, 2*WH.Amin, 2*WH.Amax, -(WH.Bmax-WH.Bmin), WH.Bmax-WH.Bmin, 0, 0);
+ MMH_t H6(F, WH.recLevel-1, 2*WH.Amin-WH.Amax, 2*WH.Amax-WH.Amin, 2*WH.Bmin-WH.Bmax, 2*WH.Bmax-WH.Bmin, 0, 0);
+ MMH_t H3(F, WH.recLevel-1, 2*WH.Amin-2*WH.Amax, 2*WH.Amax-2*WH.Amin, WH.Bmin, WH.Bmax, 0, 0);
+ MMH_t H4(F, WH.recLevel-1, WH.Amin, WH.Amax, 2*WH.Bmin-2*WH.Bmax, 2*WH.Bmax-2*WH.Bmin, 0, 0);
+ MMH_t H2(F, WH.recLevel-1, WH.Amin, WH.Amax, WH.Bmin, WH.Bmax, 0, 0);
+
+ size_t nt = WH.parseq.numthreads();
+ size_t nt_rec = nt/7;
+ size_t nt_mod = nt % 7 ;
+ H1.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H2.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H3.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H4.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H5.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H6.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H7.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+
+ TASK(MODE(READ(A11, B11) WRITE(X15) CONSTREFERENCE(F,H1)),
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X15, x1rd, H1););
+ // P7 = alpha . S3 * T3 in C21
+ TASK(MODE(READ(X11, X21) WRITE(C21) CONSTREFERENCE(F,H7)),
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X11, ldX1, X21, ldX2, F.zero, C21, ldc, H7););
+
+ // P5 = alpha . S1*T1 in C22
+ TASK(MODE(READ(X12, X22) WRITE(C22) CONSTREFERENCE(F,H5)),
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X12, ldX1, X22, ldX2, F.zero, C22, ldc, H5););
+
+ // P6 = alpha . S2 * T2 in C12
+ TASK(MODE(READ(X13, X23) WRITE(C12) CONSTREFERENCE(F,H6)),
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X13, ldX1, X23, ldX2, F.zero, C12, ldc, H6););
+
+ // P3 = alpha . S4*B22 in CC_11
+ TASK(MODE(READ(X14, B22) WRITE(CC_11) CONSTREFERENCE(F,H3)),
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X14, ldX1, B22, ldb, F.zero, CC_11, nr, H3););
+
+ // P4 = alpha . A22 * T4 in C_11
+ TASK(MODE(READ(A22) WRITE(C_11) READWRITE(X24, X22, X23, X21) CONSTREFERENCE(F,H4)),
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A22, lda, X24, ldX2, F.zero, C_11, nr, H4);
+ );
+
+ // P2 = alpha . A12 * B21 in C11
+ TASK(MODE(READ(A12, B21) WRITE(C11) CONSTREFERENCE(F,H2)),
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, C11, ldc, H2););
+ CHECK_DEPENDENCIES;
+
+ DFElt U2Min, U2Max;
+ DFElt U3Min, U3Max;
+ DFElt U4Min, U4Max;
+ DFElt U7Min, U7Max;
+ DFElt U5Min, U5Max;
+ // U2 = P1 + P6 in C12 and
+ // U3 = P7 + U2 in C21 and
+ // U4 = P5 + U2 in C12 and
+ // U7 = P5 + U3 in C22 and
+ // U5 = P3 + U4 in C12
+ // BIG TASK with 5 Addin function calls
+// TASK(MODE(READWRITE(X15, C12) CONSTREFERENCE(F, DF, WH, U2Min, U2Max, H1.Outmin, H1.Outmax, H6.Outmin, H6.Outmax)),
+ if (Protected::NeedPreAddReduction(U2Min, U2Max, H1.Outmin, H1.Outmax, H6.Outmin, H6.Outmax, WH)){
+ TASK(MODE(READWRITE(X15) CONSTREFERENCE(F)),
+ pfreduce (F, mr, x1rd, X15, x1rd, NUM_THREADS);
+ );
+ TASK(MODE(READWRITE(C12) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C12, ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+ }
+ TASK(MODE(READWRITE(X15, C12) CONSTREFERENCE(DF)),
+ pfaddin(DF,mr,nr,X15,x1rd,C12,ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+// TASK(MODE(READWRITE(C12, C21) CONSTREFERENCE(F, DF, WH, U3Min, U3Max, U2Min, U2Max)),
+ if (Protected::NeedPreAddReduction(U3Min, U3Max, U2Min, U2Max, H7.Outmin, H7.Outmax, WH)){
+ TASK(MODE(READWRITE(C12) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C12, ldc, NUM_THREADS);
+ );
+ TASK(MODE(READWRITE(C21) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C21, ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+ }
+ TASK(MODE(READWRITE(C12, C21) CONSTREFERENCE(DF)),
+ pfaddin(DF,mr,nr,C12,ldc,C21,ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+// TASK(MODE(READWRITE(C12, C22) CONSTREFERENCE(F, DF, WH) VALUE(U4Min, U4Max, U2Min, U2Max)),
+ if (Protected::NeedPreAddReduction(U4Min, U4Max, U2Min, U2Max, H5.Outmin, H5.Outmax, WH)){
+ TASK(MODE(READWRITE(C22) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C22, ldc, NUM_THREADS);
+ );
+ TASK(MODE(READWRITE(C12) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C12, ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+ }
+ TASK(MODE(READWRITE(C12, C22) CONSTREFERENCE(DF, WH)),
+ pfaddin(DF,mr,nr,C22,ldc,C12,ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+// TASK(MODE(READWRITE(C22, C21) CONSTREFERENCE(F, DF, WH) VALUE(U3Min, U3Max, U7Min, U7Max)),
+ if (Protected::NeedPreAddReduction (U7Min,U7Max, U3Min, U3Max, H5.Outmin,H5.Outmax, WH) ){
+ TASK(MODE(READWRITE(C21) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C21, ldc, NUM_THREADS);
+ );
+ TASK(MODE(READWRITE(C22) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C22, ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+ }
+ TASK(MODE(READWRITE(C22, C21) CONSTREFERENCE(DF, WH)),
+ pfaddin(DF,mr,nr,C21,ldc,C22,ldc, NUM_THREADS);
+ );
+// TASK(MODE(READWRITE(C12, CC_11) CONSTREFERENCE(F, DF, WH) VALUE(U5Min, U5Max, U4Min, U4Max)),
+ if (Protected::NeedPreAddReduction (U5Min,U5Max, U4Min, U4Max, H3.Outmin, H3.Outmax, WH) ){
+ TASK(MODE(READWRITE(C12) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C12, ldc, NUM_THREADS);
+ );
+ TASK(MODE(READWRITE(CC_11) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, CC_11, nr, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+ }
+ TASK(MODE(READWRITE(C12, CC_11) CONSTREFERENCE(DF, WH)),
+ pfaddin(DF,mr,nr,CC_11,nr,C12,ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES;
+
+ // U6 = U3 - P4 in C21
+ DFElt U6Min, U6Max;
+// TASK(MODE(READWRITE(C_11, C21) CONSTREFERENCE(F, DF, WH) VALUE(U6Min, U6Max, U3Min, U3Max)),
+ if (Protected::NeedPreSubReduction (U6Min,U6Max, U3Min, U3Max, H4.Outmin,H4.Outmax, WH) ){
+ TASK(MODE(READWRITE(CC_11) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C_11, nr, NUM_THREADS);
+ );
+ TASK(MODE(READWRITE(C21) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C21, ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES
+ }
+ TASK(MODE(READWRITE(C_11, C21) CONSTREFERENCE(DF, WH) ),
+ pfsubin(DF,mr,nr,C_11,nr,C21,ldc, NUM_THREADS);
+ );
+
+ //CHECK_DEPENDENCIES;
+
+ // U1 = P2 + P1 in C11
+ DFElt U1Min, U1Max;
+// TASK(MODE(READWRITE(C11, X15/*, X14, X13, X12, X11*/) CONSTREFERENCE(F, DF, WH) VALUE(U1Min, U1Max)),
+ if (Protected::NeedPreAddReduction (U1Min, U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
+ TASK(MODE(READWRITE(X15) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, X15, x1rd, NUM_THREADS);
+ );
+ TASK(MODE(READWRITE(C11) CONSTREFERENCE(F)),
+ pfreduce (F, mr, nr, C11, ldc, NUM_THREADS);
+ );
+ CHECK_DEPENDENCIES
+ }
+ TASK(MODE(READWRITE(C11, X15) CONSTREFERENCE(DF, WH)),
+ pfaddin(DF,mr,nr,X15,x1rd,C11,ldc, NUM_THREADS);
+ );
+
+ WH.Outmin = std::min (U1Min, std::min (U5Min, std::min (U6Min, U7Min)));
+ WH.Outmax = std::max (U1Max, std::max (U5Max, std::max (U6Max, U7Max)));
+
+ );
+// WAIT;
+
+
+ fflas_delete (CC_11);
+ fflas_delete (C_11);
+ fflas_delete (X15);
+ fflas_delete (X14);
+ fflas_delete (X24);
+ fflas_delete (X13);
+ fflas_delete (X23);
+ fflas_delete (X12);
+ fflas_delete (X22);
+ fflas_delete (X11);
+ fflas_delete (X21);
+
+ return C;
+ } //wino parallel
+
+
+ template < class Field, class FieldTrait >
+ inline void Winograd (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A,const size_t lda,
+ typename Field::ConstElement_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ // const size_t kmax, const size_t w, const FFLAS_BASE base
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait> & WH
+ )
+ {
+
+ FFLASFFPACK_check(F.isZero(beta));
+
+ typedef MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > MMH_t;
+ typedef typename MMH_t::DelayedField::Element_ptr DFEptr;
+ typedef typename MMH_t::DelayedField::ConstElement_ptr DFCEptr;
+ typedef typename MMH_t::DelayedField::Element DFElt;
+
+ const typename MMH_t::DelayedField & DF = WH.delayedField;
+
+ size_t lb, cb, la, ca, ldX2;
+ // size_t x3rd = std::max(mr,kr);
+ typename Field::ConstElement_ptr A11=A, A12, A21, A22;
+ typename Field::ConstElement_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+ size_t x1rd = std::max(nr,kr);
+ size_t ldX1;
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr;
+ ca = mr;
+ ldX1 = mr;
+ } else {
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr;
+ ca = kr;
+ ldX1 = x1rd;
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr;
+ cb = kr;
+ ldX2 = kr;
+ } else {
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr;
+ ldX2 = cb = nr;
+ }
+ // Two temporary submatrices are required
+ typename Field::Element_ptr X2 = fflas_new (F, kr, nr);
+
+ // T3 = B22 - B12 in X2
+ fsub(DF,lb,cb, (DFCEptr) B22,ldb, (DFCEptr) B12,ldb, (DFEptr)X2,ldX2);
+
+ // S3 = A11 - A21 in X1
+ typename Field::Element_ptr X1 = fflas_new (F,mr,x1rd);
+ fsub(DF,la,ca,(DFCEptr)A11,lda,(DFCEptr)A21,lda,(DFEptr)X1,ldX1);
+
+ // P7 = alpha . S3 * T3 in C21
+ MMH_t H7(F, WH.recLevel-1, -(WH.Amax-WH.Amin), WH.Amax-WH.Amin, -(WH.Bmax-WH.Bmin), WH.Bmax-WH.Bmin, 0,0);
+
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X1, ldX1, X2, ldX2, F.zero, C21, ldc, H7);
+
+ // T1 = B12 - B11 in X2
+ fsub(DF,lb,cb,(DFCEptr)B12,ldb,(DFCEptr)B11,ldb,(DFEptr)X2,ldX2);
+
+ // S1 = A21 + A22 in X1
+ fadd(DF,la,ca,(DFCEptr)A21,lda,(DFCEptr)A22,lda,(DFEptr)X1,ldX1);
+
+ // P5 = alpha . S1*T1 in C22
+ MMH_t H5(F, WH.recLevel-1, 2*WH.Amin, 2*WH.Amax, -(WH.Bmax-WH.Bmin), WH.Bmax-WH.Bmin, 0, 0);
+
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X1, ldX1, X2, ldX2, F.zero, C22, ldc, H5);
+
+ // T2 = B22 - T1 in X2
+ fsub(DF,lb,cb,(DFCEptr)B22,ldb,(DFCEptr)X2,ldX2,(DFEptr)X2,ldX2);
+
+ // S2 = S1 - A11 in X1
+ fsubin(DF,la,ca,(DFCEptr)A11,lda,(DFEptr)X1,ldX1);
+
+ // P6 = alpha . S2 * T2 in C12
+ MMH_t H6(F, WH.recLevel-1, 2*WH.Amin-WH.Amax, 2*WH.Amax-WH.Amin, 2*WH.Bmin-WH.Bmax, 2*WH.Bmax-WH.Bmin, 0, 0);
+
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X1, ldX1, X2, ldX2, F.zero, C12, ldc, H6);
+
+ // S4 = A12 -S2 in X1
+ fsub(DF,la,ca,(DFCEptr)A12,lda,(DFCEptr)X1,ldX1,(DFEptr)X1,ldX1);
+
+ // P3 = alpha . S4*B22 in C11
+ MMH_t H3(F, WH.recLevel-1, 2*WH.Amin-2*WH.Amax, 2*WH.Amax-2*WH.Amin, WH.Bmin, WH.Bmax, 0, 0);
+
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X1, ldX1, B22, ldb, F.zero, C11, ldc, H3);
+
+ // P1 = alpha . A11 * B11 in X1
+ MMH_t H1(F, WH.recLevel-1, WH.Amin, WH.Amax, WH.Bmin, WH.Bmax, 0, 0);
+
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X1, nr, H1);
+
+ // U2 = P1 + P6 in C12 and
+ DFElt U2Min, U2Max;
+ // This test will be optimized out
+ if (Protected::NeedPreAddReduction(U2Min, U2Max, H1.Outmin, H1.Outmax, H6.Outmin, H6.Outmax, WH)){
+ freduce (F, mr, nr, X1, nr);
+ freduce (F, mr, nr, C12, ldc);
+ }
+ faddin(DF,mr,nr,(DFCEptr)X1,nr,(DFEptr)C12,ldc);
+
+ // U3 = P7 + U2 in C21 and
+ DFElt U3Min, U3Max;
+ // This test will be optimized out
+ if (Protected::NeedPreAddReduction(U3Min, U3Max, U2Min, U2Max, H7.Outmin, H7.Outmax, WH)){
+ freduce (F, mr, nr, C12, ldc);
+ freduce (F, mr, nr, C21, ldc);
+ }
+ faddin(DF,mr,nr,(DFCEptr)C12,ldc,(DFEptr)C21,ldc);
+
+
+ // U4 = P5 + U2 in C12 and
+ DFElt U4Min, U4Max;
+ // This test will be optimized out
+ if (Protected::NeedPreAddReduction(U4Min, U4Max, U2Min, U2Max, H5.Outmin, H5.Outmax, WH)){
+ freduce (F, mr, nr, C22, ldc);
+ freduce (F, mr, nr, C12, ldc);
+ }
+ faddin(DF,mr,nr,(DFCEptr)C22,ldc,(DFEptr)C12,ldc);
+
+ // U7 = P5 + U3 in C22 and
+ DFElt U7Min, U7Max;
+ // This test will be optimized out
+ if (Protected::NeedPreAddReduction (U7Min,U7Max, U3Min, U3Max, H5.Outmin,H5.Outmax, WH) ){
+ freduce (F, mr, nr, C21, ldc);
+ freduce (F, mr, nr, C22, ldc);
+ }
+ faddin(DF,mr,nr,(DFCEptr)C21,ldc,(DFEptr)C22,ldc);
+
+ // U5 = P3 + U4 in C12
+ DFElt U5Min, U5Max;
+ // This test will be optimized out
+ if (Protected::NeedPreAddReduction (U5Min,U5Max, U4Min, U4Max, H3.Outmin, H3.Outmax, WH) ){
+ freduce (F, mr, nr, C12, ldc);
+ freduce (F, mr, nr, C11, ldc);
+ }
+ faddin(DF,mr,nr,(DFCEptr)C11,ldc,(DFEptr)C12,ldc);
+
+ // T4 = T2 - B21 in X2
+ fsubin(DF,lb,cb,(DFCEptr)B21,ldb,(DFEptr)X2,ldX2);
+
+ // P4 = alpha . A22 * T4 in C11
+ MMH_t H4(F, WH.recLevel-1, WH.Amin, WH.Amax, 2*WH.Bmin-2*WH.Bmax, 2*WH.Bmax-2*WH.Bmin, 0, 0);
+
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A22, lda, X2, ldX2, F.zero, C11, ldc, H4);
+
+ fflas_delete (X2);
+
+ // U6 = U3 - P4 in C21
+ DFElt U6Min, U6Max;
+ // This test will be optimized out
+ if (Protected::NeedPreSubReduction (U6Min,U6Max, U3Min, U3Max, H4.Outmin,H4.Outmax, WH) ){
+ freduce (F, mr, nr, C11, ldc);
+ freduce (F, mr, nr, C21, ldc);
+ }
+ fsubin(DF,mr,nr,(DFCEptr)C11,ldc,(DFEptr)C21,ldc);
+
+ // P2 = alpha . A12 * B21 in C11
+ MMH_t H2(F, WH.recLevel-1, WH.Amin, WH.Amax, WH.Bmin, WH.Bmax, 0, 0);
+
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, C11, ldc, H2);
+
+ // U1 = P2 + P1 in C11
+ DFElt U1Min, U1Max;
+ // This test will be optimized out
+ if (Protected::NeedPreAddReduction (U1Min, U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
+ freduce (F, mr, nr, X1, nr);
+ freduce (F, mr, nr, C11, ldc);
+ }
+ faddin(DF,mr,nr,(DFCEptr)X1,nr,(DFEptr)C11,ldc);
+
+ fflas_delete (X1);
+
+ WH.Outmin = std::min (U1Min, std::min (U5Min, std::min (U6Min, U7Min)));
+ WH.Outmax = std::max (U1Max, std::max (U5Max, std::max (U6Max, U7Max)));
+
+ } // Winograd
+
+ } // BLAS3
+
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fgemm_winograd_INL
+
diff --git a/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_acc.inl b/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_acc.inl
new file mode 100644
index 0000000..143cf58
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_acc.inl
@@ -0,0 +1,644 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (C) 2014 the LinBox group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fgemm/winograd_acc.inl
+ * @ingroup MMalgos
+ * @brief Winograd implementation
+ * @bib ISSAC09 Scheduling
+ */
+
+#ifndef __FFLASFFPACK_fgemm_winograd_acc_INL
+#define __FFLASFFPACK_fgemm_winograd_acc_INL
+
+namespace FFLAS { namespace BLAS3 {
+
+
+ // 3 temps and 23 ops
+ // TODO: Add check for modular reductions before final additions
+ template < class Field,class FieldTrait >
+ inline void WinogradAcc_3_23 (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A,const size_t lda,
+ typename Field::ConstElement_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH
+ )
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(!F.isZero(beta));
+
+ typename Field::Element mbeta ;
+ F.neg(mbeta,beta);
+
+ size_t lb, cb, la, ca;
+ size_t x3rd = std::max(mr,kr);
+ typename Field::ConstElement_ptr A11=A, A12, A21, A22;
+ typename Field::ConstElement_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+
+ size_t ldX3;
+ // Three temporary submatrices are required
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr;
+ ca = mr;
+ }
+ else { // ta == FflasNoTrans
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr;
+ ca = kr;
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr;
+ cb = kr;
+ ldX3 = x3rd;
+ }
+ else { // ta == FflasNoTrans
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr;
+ ldX3 = cb = nr;
+ }
+
+ // P2 = alpha . A12 * B21 + beta . C11 in C11
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, beta, C11, ldc, H);
+
+ typename Field::Element_ptr X3 = fflas_new (F, x3rd, nr);
+
+ // T3 = B22 - B12 in X3
+ fsub(F,lb,cb,B22,ldb,B12,ldb,X3,ldX3);
+
+ typename Field::Element_ptr X2 = fflas_new (F, mr, kr);
+
+ // S3 = A11 - A21 in X2
+ fsub(F,la,ca,A11,lda,A21,lda,X2,ca);
+
+ // C22 = C22 - C12 if beta != 0
+ fsubin(F,mr,nr,C12,ldc,C22,ldc);
+
+ // C21 = C21 - C22
+ fsubin(F,mr,nr,C22,ldc,C21,ldc);
+
+ // P7 = alpha . S3 * T3 + beta . C22 in C22
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X2, ca, X3, ldX3, beta, C22, ldc, H);
+
+ // T1 = B12 - B11 in X3
+ fsub(F,lb,cb,B12,ldb,B11,ldb,X3,ldX3);
+
+ // S1 = A21 + A22 in X2
+ fadd(F,la,ca,A21,lda,A22,lda,X2,ca);
+
+ // P5 = alpha . S1*T1 + beta . C12 in C12
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X2, ca, X3, ldX3, beta, C12, ldc, H);
+
+ // T2 = B22 - T1 in X3
+ fsub(F,lb,cb,B22,ldb,X3,ldX3,X3,ldX3);
+
+ // S2 = S1 - A11 in X2
+ fsubin(F,la,ca,A11,lda,X2,ca);
+
+ typename Field::Element_ptr X1 = fflas_new (F, mr, nr);
+
+ // P6 = alpha . S2 * T2 in X1
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X2, ca, X3, ldX3, F.zero, X1, nr, H);
+
+ // T4 = T2 - B21 in X3
+ fsubin(F,lb,cb,B21,ldb,X3,ldX3);
+
+ // S4 = A12 -S2 in X2
+ fsub(F,la,ca,A12,lda,X2,ca,X2,ca);
+
+ // P4 = alpha . A22 * T4 - beta . C21 in C21
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A22, lda, X3, ldX3, mbeta, C21, ldc, H);
+
+ // P1 = alpha . A11 * B11 in X3
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X3, nr, H);
+
+ // U1 = P2 + P1 in C11
+ faddin(F,mr,nr,X3,nr,C11,ldc);
+
+ // U2 = P1 + P6 in tmpU2/X1 and
+ faddin(F, mr, nr, X3, nr, X1, nr);
+
+ // U3 = P7 + U2 in tmpU3/X3 and
+ fadd(F, mr, nr, X1, nr, C22, ldc, X3, nr);
+
+ // U7 = P5 + U3 in C22 and
+ fadd(F, mr, nr, C12, ldc, X3, nr, C22, ldc);
+
+ // U4 = P5 + U2 in C12 and
+ faddin(F, mr, nr, X1, nr, C12, ldc);
+
+ fflas_delete (X1);
+
+ // U6 = U3 - P4 in C21 and
+ fsub(F, mr, nr, X3, nr, C21, ldc, C21, ldc);
+
+ fflas_delete (X3);
+
+ // P3 = alpha . S4*B22 in X1
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X2, ca, B22, ldb, F.one, C12, ldc, H);
+
+ fflas_delete (X2);
+
+ } // WinogradAccOld
+
+ // 3 temps and 21 ops
+ template < class Field, class FieldTrait>
+ inline void WinogradAcc_3_21 (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A,const size_t lda,
+ typename Field::ConstElement_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH
+ )
+ {
+ typedef MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > MMH_t;
+ typedef typename MMH_t::DelayedField::Element_ptr DFEptr;
+ typedef typename MMH_t::DelayedField::ConstElement_ptr DFCEptr;
+ typedef typename MMH_t::DelayedField::Element DFElt;
+
+ const typename MMH_t::DelayedField & DF = WH.delayedField;
+
+ FFLASFFPACK_check(!DF.isZero(beta));
+
+ size_t lb, cb, la, ca;
+ size_t x3rd = std::max(mr,kr);
+ typename Field::ConstElement_ptr A11=A, A12, A21, A22;
+ typename Field::ConstElement_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+ typename Field::Element mbeta;
+ F.neg(mbeta,beta);
+ DFElt betadf;
+ if (F.isMOne(beta))
+ DF.assign(betadf,DF.mOne);
+ else
+ DF.init(betadf, beta);
+
+ size_t ldX3;
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr;
+ ca = mr;
+ } else { // ta == FflasNoTrans
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr;
+ ca = kr;
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr;
+ cb = kr;
+ ldX3 = x3rd;
+ } else { // ta == FflasNoTrans
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr;
+ ldX3 = cb = nr;
+ }
+
+ // Three temporary submatrices are required
+ typename Field::Element_ptr X3 = fflas_new (F, x3rd, nr);
+
+ // T1 = B12 - B11 in X3
+ fsub(DF,lb,cb,(DFCEptr)B12,ldb,(DFCEptr)B11,ldb,(DFEptr)X3,ldX3);
+
+ typename Field::Element_ptr X2 = fflas_new(F,mr,kr);
+
+ // S1 = A21 + A22 in X2
+ fadd(DF,la,ca,(DFCEptr)A21,lda,(DFCEptr)A22,lda,(DFEptr)X2,ca);
+
+ typename Field::Element_ptr X1 = fflas_new(F,mr,nr);
+ // P5 = alpha . S1*T1 in X1
+ MMH_t H5(F, WH.recLevel-1,
+ 2*WH.Amin, 2*WH.Amax,
+ -(WH.Bmax-WH.Bmin),
+ WH.Bmax-WH.Bmin,
+ 0, 0);
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X2, ca, X3, ldX3, F.zero, X1, nr, H5);
+
+ DFElt C22Min, C22Max;
+ DFElt C12Min, C12Max;
+ // This test will be optimized out
+ if (Protected::NeedDoublePreAddReduction (C12Min, C12Max, H5.Outmin, H5.Outmax, WH.Cmin, WH.Cmax, betadf, WH)){
+ freduce(F,mr,nr,X1,nr);
+ H5.initOut();
+ }
+ C22Min = C12Min; C22Max = C12Max;
+
+ // C22 = P5 + beta C22 in C22
+ fadd(DF,mr,nr,(DFCEptr)X1,nr,betadf,(DFCEptr)C22,ldc,(DFEptr)C22,ldc);
+
+ // C12 = P5 + beta C12 in C12
+ fadd(DF,mr,nr,(DFCEptr)X1,nr,betadf,(DFCEptr)C12,ldc,(DFEptr)C12,ldc);
+
+ // P1 = alpha . A11 * B11 in X1
+ MMH_t H1(F, WH.recLevel-1,
+ WH.Amin, WH.Amax,
+ WH.Bmin, WH.Bmax,
+ 0, 0);
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X1, nr, H1);
+
+ // P2 = alpha . A12 * B21 + beta . C11 in C11
+ MMH_t H2(F, WH.recLevel-1,
+ WH.Amin, WH.Amax,
+ WH.Bmin, WH.Bmax,
+ WH.Cmin, WH.Cmax);
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, beta, C11, ldc, H2);
+
+ // U1 = P2 + P1 in C11
+ DFElt U1Min, U1Max;
+ if (Protected::NeedPreAddReduction (U1Min,U1Max, H1.Outmin, H1.Outmax, H2.Outmin,H2.Outmax, WH) ){
+ freduce(F,mr,nr,X1,nr);
+ freduce(F,mr,nr,C11,ldc);
+ }
+ faddin(DF,mr,nr,(DFCEptr)X1,nr,(DFEptr)C11,ldc);
+
+ // T2 = B22 - T1 in X3
+ fsub(DF,lb,cb,(DFCEptr)B22,ldb,(DFCEptr)X3,ldX3,(DFEptr)X3,ldX3);
+
+ // S2 = S1 - A11 in X2
+ fsubin(DF,la,ca,(DFCEptr)A11,lda,(DFEptr)X2,ca);
+
+ // U2 = P6 + P1 = alpha . S2 * T2 + P1 in X1
+ MMH_t H6(F, WH.recLevel-1,
+ 2*WH.Amin-WH.Amax, 2*WH.Amax-WH.Amin,
+ 2*WH.Bmin-WH.Bmax, 2*WH.Bmax-WH.Bmin,
+ H1.Outmin, H1.Outmax);
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X2, ca, X3, ldX3, F.one, X1, nr, H6);
+
+ // U4 = U2 + C12 in C12
+ DFElt U4Min, U4Max;
+ if (Protected::NeedPreAddReduction (U4Min, U4Max, H6.Outmin, H6.Outmax, C12Min, C12Max, WH)){
+ freduce(F,mr,nr,C12,ldc);
+ freduce(F,mr,nr,X1,nr);
+ }
+ faddin(DF,mr,nr,(DFCEptr)X1,nr,(DFEptr)C12,ldc);
+
+ // T4 = T2 - B21 in X3
+ fsubin(DF,lb,cb,(DFCEptr)B21,ldb,(DFEptr)X3,ldX3);
+
+ // S4 = A12 -S2 in X2
+ fsub(DF,la,ca,(DFCEptr)A12,lda,(DFCEptr)X2,ca,(DFEptr)X2,ca);
+
+ // P4 = alpha . A22 * T4 - beta . C21 in C21
+ MMH_t H4(F, WH.recLevel-1,
+ WH.Amin, WH.Amax,
+ 2*WH.Bmin-2*WH.Bmax, 2*WH.Bmax-2*WH.Bmin,
+ WH.Cmin, WH.Cmax);
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A22, lda, X3, ldX3, mbeta, C21, ldc, H4);
+
+ // U5 = P3 + U4 = alpha . S4*B22 + U4 in C12
+ MMH_t H3(F, WH.recLevel-1,
+ 2*WH.Amin-2*WH.Amax, 2*WH.Amax-2*WH.Amin,
+ WH.Bmin, WH.Bmax,
+ U4Min, U4Max);
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X2, ca, B22, ldb, F.one, C12, ldc, H3);
+
+ // T3 = B22 - B12 in X3
+ fsub(DF,lb,cb,(DFCEptr)B22,ldb,(DFCEptr)B12,ldb,(DFEptr)X3,ldX3);
+
+ // S3 = A11 - A21 in X2
+ fsub(DF,la,ca,(DFCEptr)A11,lda,(DFCEptr)A21,lda,(DFEptr)X2,ca);
+
+ // U3 = P7 + U2 = alpha . S3 * T3 + U2 in X1
+ MMH_t H7(F, WH.recLevel-1,
+ WH.Amin-WH.Amax, WH.Amax-WH.Amin,
+ WH.Bmin-WH.Bmax, WH.Bmax-WH.Bmin,
+ H6.Outmin, H6.Outmax);
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X2, ca, X3, ldX3, F.one, X1, nr, H7);
+
+ fflas_delete (X2);
+ fflas_delete (X3);
+
+ // U7 = U3 + C22 in C22
+ DFElt U7Min, U7Max;
+ if (Protected::NeedPreAddReduction (U7Min, U7Max, H7.Outmin, H7.Outmax, C22Min, C22Max, WH)){
+ freduce(F,mr,nr,X1,nr);
+ freduce(F,mr,nr,C22,ldc);
+ }
+ faddin(DF,mr,nr,(DFCEptr)X1,nr,(DFEptr)C22,ldc);
+
+ // U6 = U3 - P4 in C21
+ DFElt U6Min, U6Max;
+ if (Protected::NeedPreSubReduction(U6Min, U6Max, H7.Outmin, H7.Outmax, H4.Outmin, H4.Outmax, WH)){
+ freduce(F,mr,nr,X1,nr);
+ freduce(F,mr,nr,C21,ldc);
+ }
+ fsub(DF,mr,nr,(DFCEptr)X1,nr,(DFCEptr)C21,ldc,(DFEptr)C21,ldc);
+
+ fflas_delete (X1);
+
+ // Updating WH with Outmin, Outmax of the result
+ WH.Outmin = min4 (U1Min, H3.Outmin, U6Min, U7Min);
+ WH.Outmax = max4 (U1Max, H3.Outmax, U6Max, U7Max);
+ } // WinogradAcc
+
+
+ // 2 temps and 24 ops
+ // TODO: Add check for modular reductions before final additions
+ template < class Field, class FieldTrait >
+ inline void WinogradAcc_2_24 (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A,const size_t lda,
+ const typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH
+ )
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(!F.isZero(beta));
+
+ typename Field::Element malpha ;
+ F.neg(malpha,alpha);
+
+ // A, B and c submatrices
+ const typename Field::Element_ptr A11=A, A12, A21, A22;
+ const typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+
+ size_t la, ca, lb, cb; // lines and columns in A,B sub matrices
+
+ // Three temporary submatrices are required
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr ;
+ ca = mr ;
+ }
+ else { // ta == FflasNoTrans
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr ;
+ ca = kr ;
+
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr ;
+ cb = kr ;
+
+ }
+ else { // ta == FflasNoTrans
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr ;
+ cb = nr ;
+ }
+
+ // Z1 = C22 - C12 in C22
+ fsubin(F,mr,nr,C12,ldc,C22,ldc);
+ // Z3 = C12-C21 in C12
+ fsubin(F,mr,nr,C21,ldc,C12,ldc);
+ // S1 = A21 + A22 in X
+ typename Field::Element_ptr X = fflas_new(F,mr,std::max(nr,kr));
+ fadd(F,la,ca,A21,lda,A22,lda,X,ca);
+ // T1 = B12 - B11 in Y
+ typename Field::Element_ptr Y = fflas_new(F,nr,kr);
+ fsub(F,lb,cb,B12,ldb,B11,ldb,Y,cb);
+ // P5 = a S1 T1 + b Z3 in C12
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X, ca, Y, cb, beta, C12, ldc, H);
+ // S2 = S1 - A11 in X
+ fsubin(F,la,ca,A11,lda,X,ca);
+ // T2 = B22 - T1 in Y
+ fsub(F,lb,cb,B22,ldb,Y,cb,Y,cb);
+ // P6 = a S2 T2 + b C21 in C21
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X, ca, Y, cb, beta, C21, ldc, H);
+ // S4 = A12 - S2 in X
+ fsub(F,la,ca,A12,lda,X,ca,X,ca);
+ // W1 = P5 + beta Z1 in C22
+ fadd(F,mr,nr,C12,ldc,beta,C22,ldc,C22,ldc);
+ // P3 = a S4 B22 + P5 in C12
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X, ca, B22, ldb, F.one, C12, ldc, H);
+ // P1 = a A11 B11 in X
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X, nr, H);
+ // U2 = P6 + P1 in C21
+ faddin(F,mr,nr,X,nr,C21,ldc);
+ // P2 = a A12 B21 + b C11 in C11
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, beta, C11, ldc, H);
+ // U1 = P1 + P2 in C11
+ faddin(F,mr,nr,X,nr,C11,ldc);
+ // U5 = U2 + P3 in C12
+ faddin(F,mr,nr,C21,ldc,C12,ldc);
+ // S3 = A11 - A21 in X ;
+ fsub(F,la,ca,A11,lda,A21,lda,X,ca);
+ // T3 = B22 - B12 in Y
+ fsub(F,lb,cb,B22,ldb,B12,ldb,Y,cb);
+ // U3 = a S3 T3 + U2 in C21
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X, ca, Y, cb, F.one, C21, ldc, H);
+ fflas_delete (X);
+ // U7 = U3 + W1 in C22
+ faddin(F,mr,nr,C21,ldc,C22,ldc);
+ // T1_ = B12 - B11 in Y
+ fsub(F,lb,cb,B12,ldb,B11,ldb,Y,cb);
+ // T2_ = B22 - T1_ in Y
+ fsub(F,lb,cb,B22,ldb,Y,cb,Y,cb);
+ // T4 = T2_ - B21 in Y
+ fsub(F,lb,cb,Y,cb,B21,ldb,Y,cb);
+ // U6 = -a A22 T4 + U3 in C21;
+ fgemm (F, ta, tb, mr, nr, kr, malpha, A22, lda, Y, cb, F.one, C21, ldc, H);
+ fflas_delete (Y);
+
+
+ } // WinogradAccOld
+
+ // 2 temps and 27 ops
+ // TODO: Add check for modular reductions before final additions
+ template < class Field, class FieldTrait >
+ inline void WinogradAcc_2_27 (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A,const size_t lda,
+ const typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH)
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(!F.isZero(beta));
+
+ typename Field::Element malpha ;
+ F.neg(malpha,alpha);
+
+ // A, B and c submatrices
+ const typename Field::Element_ptr A11=A, A12, A21, A22;
+ const typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+
+ size_t la, ca, lb, cb; // lines and columns in A,B sub matrices
+
+ // Three temporary submatrices are required
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr ;
+ ca = mr ;
+ }
+ else { // ta == FflasNoTrans
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr ;
+ ca = kr ;
+
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr ;
+ cb = kr ;
+
+ }
+ else { // ta == FflasNoTrans
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr ;
+ cb = nr ;
+ }
+
+ // Z1 = C22 - C12 in C22
+ fsubin(F,mr,nr,C12,ldc,C22,ldc);
+ // Z3 = C12-C21 in C12
+ fsubin(F,mr,nr,C21,ldc,C12,ldc);
+ // S1 = A21 + A22 in X
+ typename Field::Element_ptr X = fflas_new(F,mr,std::max(nr,kr));
+ fadd(F,la,ca,A21,lda,A22,lda,X,ca);
+ // T1 = B12 - B11 in Y
+ typename Field::Element_ptr Y = fflas_new(F,nr,std::max(kr,mr));
+ fsub(F,lb,cb,B12,ldb,B11,ldb,Y,cb);
+ // P5 = a S1 T1 + b Z3 in C12
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X, ca, Y, cb, beta, C12, ldc, H);
+ // S2 = S1 - A11 in X
+ fsubin(F,la,ca,A11,lda,X,ca);
+ // T2 = B22 - T1 in Y
+ fsub(F,lb,cb,B22,ldb,Y,cb,Y,cb);
+ // P6 = a S2 T2 + b C21 in C21
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X, ca, Y, cb, beta, C21, ldc, H);
+ // S4 = A12 - S2 in X
+ fsub(F,la,ca,A12,lda,X,ca,X,ca);
+ // W1 = P5 + beta Z1 in C22
+ fadd(F,mr,nr,C12,ldc,beta,C22,ldc,C22,ldc);
+ // P3 = a S4 B22 + P5 in C12
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X, ca, B22, ldb, F.zero, Y, nr, H);
+ fadd(F,mr,nr,Y,nr,C12,ldc,C12,ldc);
+ // P1 = a A11 B11 in X
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X, nr, H);
+ // U2 = P6 + P1 in C21
+ faddin(F,mr,nr,X,nr,C21,ldc);
+ // P2 = a A12 B21 + b C11 in C11
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, Y, nr, H);
+ fadd(F,mr,nr,Y,nr,beta,C11,ldc,C11,ldc);
+ // U1 = P1 + P2 in C11
+ faddin(F,mr,nr,X,nr,C11,ldc);
+ // U5 = U2 + P3 in C12
+ faddin(F,mr,nr,C21,ldc,C12,ldc);
+ // S3 = A11 - A21 in X ;
+ fsub(F,la,ca,A11,lda,A21,lda,X,ca);
+ // T3 = B22 - B12 in Y
+ fsub(F,lb,cb,B22,ldb,B12,ldb,Y,cb);
+ // U3 = a S3 T3 + U2 in C21
+ fgemm (F, ta, tb, mr, nr, kr, alpha, X, ca, Y, cb, F.one, C21, ldc, H);
+ // U7 = U3 + W1 in C22
+ faddin(F,mr,nr,C21,ldc,C22,ldc);
+ // T1_ = B12 - B11 in Y
+ fsub(F,lb,cb,B12,ldb,B11,ldb,Y,cb);
+ // T2_ = B22 - T1_ in Y
+ fsub(F,lb,cb,B22,ldb,Y,cb,Y,cb);
+ // T4 = T2_ - B21 in Y
+ fsub(F,lb,cb,Y,cb,B21,ldb,Y,cb);
+ // U6 = -a A22 T4 + U3 in C21;
+ fgemm (F, ta, tb, mr, nr, kr, alpha, A22, lda, Y, cb, F.zero, X, nr, H);
+ fflas_delete (Y);
+ fsub(F,mr,nr,C21,ldc,X,nr,C21,ldc);
+ fflas_delete (X);
+
+
+ } // WinogradAcc3
+
+
+} // BLAS3
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fgemm_winograd_acc_INL
+
diff --git a/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_acc_ip.inl b/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_acc_ip.inl
new file mode 100644
index 0000000..b0927fd
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_acc_ip.inl
@@ -0,0 +1,425 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (C) 2014 the LinBox group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fgemm/winograd_acc2.inl
+ * @ingroup MMalgos
+ * @brief Winograd implementation
+ * @bib ISSAC09 Scheduling
+ */
+
+#ifndef __FFLASFFPACK_fgemm_winograd_acc_ip_INL
+#define __FFLASFFPACK_fgemm_winograd_acc_ip_INL
+
+namespace FFLAS { namespace BLAS3 {
+
+ template < class Field, class FieldTrait >
+ inline void WinogradAcc_LR (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::Element_ptr A,const size_t lda,
+ typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH
+ )
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(!F.isZero(beta));
+
+ typename Field::Element malpha ;
+ F.neg(malpha,alpha);
+
+ // A, B and c submatrices
+ typename Field::Element_ptr A11=A, A12, A21, A22;
+ typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+ typename Field::Element mbeta ;
+ F.neg(mbeta,beta);
+
+ size_t la, ca, lb, cb; // lines and columns in A,B sub matrices
+
+ // Three temporary submatrices are required
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr ;
+ ca = mr ;
+ }
+ else { // ta == FflasNoTrans
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr ;
+ ca = kr ;
+
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr ;
+ cb = kr ;
+
+ }
+ else { // ta == FflasNoTrans
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr ;
+ cb = nr ;
+ }
+
+
+ // Z1 = C22 - C12 in C22
+ fsubin(F,mr,nr,C12,ldc,C22,ldc);
+ // S1 = A21 + A22 in X
+ typename Field::Element_ptr X = fflas_new (F, std::max(std::max(mr*nr,kr*nr),mr*kr), 1);
+ fadd(F,la,ca,A21,lda,A22,lda,X,ca);
+ // T1 = B12 - B11 in Y
+ typename Field::Element_ptr Y = fflas_new (F, std::max(mr,kr), nr);
+ fsub(F,lb,cb,B12,ldb,B11,ldb,Y,cb);
+ // Z2 = C21 - Z1 in C21
+ fsubin(F,mr,nr,C22,ldc,C21,ldc);
+ // T3 = B22 - B12 in B12 ;
+ fsub(F,lb,cb,B22,ldb,B12,ldb,B12,ldb);
+ // S3 = A11 - A21 in A21
+ fsub(F,la,ca,A11,lda,A21,lda,A21,lda);
+ // P7 = a S3 T3 + b Z1 in C22
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A21, lda, B12, ldb, beta, C22, ldc, H);
+ // S2 = S1 - A11 in A21
+ fsub(F,la,ca,X,ca,A11,lda,A21,lda);
+ // T2 = B22 - T1 in B12
+ fsub(F,lb,cb,B22,ldb,Y,cb,B12,ldb);
+ // P5 = a S1 T1 + b C12 in C12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, X, ca, Y, cb, beta, C12, ldc, H);
+ // T4 = T2 - B21 in X
+ fsub(F,lb,cb,B12,ldb,B21,ldb,X,cb);
+ // W1 = a A22 T4 in Y;
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A22, lda, X, cb, F.zero, Y, nr, H);
+ // P4 = W1 - b Z2 in C21
+ fadd(F,mr,nr,Y,nr,mbeta,C21,ldc,C21,ldc);
+ // S4 = A12 - S2 in A22
+ fsub(F,la,ca,A12,lda,A21,lda,A22,lda);
+ // P6 = a S2 T2 in X
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A21, lda, B12, ldb, F.zero, X, nr, H);
+ // W2 = a A12 B21 in Y
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, Y, nr, H);
+ // P2 = W2 + beta C11 in C11
+ fadd(F,mr,nr,Y,nr,beta,C11,ldc,C11,ldc);
+ // P1 = a A11 B11 in Y
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, Y, nr, H);
+ // U1 = P1 + P2 in C11
+ faddin(F,mr,nr,Y,nr,C11,ldc);
+ // U2 = P6 + P1 in X
+ faddin(F,mr,nr,Y,nr,X,nr);
+ fflas_delete (Y);
+ // U3 = U2 + P7 in C22
+ faddin(F,mr,nr,X,nr,C22,ldc);
+ // U4 = U2 + P5 in X
+ faddin(F,mr,nr,C12,ldc,X,nr);
+ // U6 = U3 - P4 in C21
+ fsub(F,mr,nr,C22,ldc,C21,ldc,C21,ldc);
+ // U7 = U3 + P5 in C22
+ faddin(F,mr,nr,C12,ldc,C22,ldc);
+ // P3 = a S4 B22 in C12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A22, lda, B22, ldb, F.zero, C12, ldc, H);
+ // U5 = U4 + P3 in C12
+ faddin(F,mr,nr,X,nr,C12,ldc);
+
+ fflas_delete (X);
+
+
+ } // WinogradAccOld
+
+ template < class Field, class FieldTrait >
+ inline void WinogradAcc_R_S (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A,const size_t lda,
+ typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const MMHelper<Field, MMHelperAlgo::Winograd,FieldTrait > & WH
+ )
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(!F.isZero(beta));
+
+ typename Field::Element malpha ;
+ F.neg(malpha,alpha);
+
+ // A, B and c submatrices
+ const typename Field::Element_ptr A11=A, A12, A21, A22;
+ typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+ typename Field::Element mbeta ;
+ F.neg(mbeta,beta);
+
+ size_t la, ca, lb, cb; // lines and columns in A,B sub matrices
+
+ // Three temporary submatrices are required
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr ;
+ ca = mr ;
+ }
+ else { // ta == FflasNoTrans
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr ;
+ ca = kr ;
+
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr ;
+ cb = kr ;
+
+ }
+ else { // ta == FflasNoTrans
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr ;
+ cb = nr ;
+ }
+
+ FFLASFFPACK_check(mr == nr && kr == nr);
+
+ // Z1 = C22 - C12 in C22
+ fsubin(F,mr,nr,C12,ldc,C22,ldc);
+ // T1 = B12 - B11 in X
+ // typename Field::Element_ptr X = fflas_new (F, std::max(mr,kr)*nr];
+ typename Field::Element_ptr X = fflas_new (F, mr, nr);
+ fsub(F,lb,cb,B12,ldb,B11,ldb,X,cb);
+ // Z2 = C21 - Z1 in C21
+ fsubin(F,mr,nr,C22,ldc,C21,ldc);
+ // T3 = B22 - B12 in B12 ;
+ fsub(F,lb,cb,B22,ldb,B12,ldb,B12,ldb);
+ // S3 = A11 - A21 in Y
+ typename Field::Element_ptr Y = fflas_new (F, mr, kr);
+ fsub(F,la,ca,A11,lda,A21,lda,Y,ca);
+ // P7 = a S3 T3 + b Z1 in C22
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, Y, ca, B12, ldb, beta, C22, ldc, H);
+ // S1 = A21 + A22 in Y
+ fadd(F,la,ca,A21,lda,A22,lda,Y,ca);
+ // T2 = B22 - T1 in B12
+ fsub(F,lb,cb,B22,ldb,X,cb,B12,ldb);
+ // P5 = a S1 T1 + b C12 in C12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, Y, ca, X, cb, beta, C12, ldc, H);
+ // T4 = T2 - B21 in X
+ fsub(F,lb,cb,B12,ldb,B21,ldb,X,cb);
+ // P4 = a A22 T4 - b Z2 in C21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A22, lda, X, cb, mbeta, C21, ldc, H);
+ // W1 = a A12 B21 in X;
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, X, nr, H);
+ // P2 = W1 + beta C11 in C11
+ fadd(F,mr,nr,X,nr,beta,C11,ldc,C11,ldc);
+ // S2 = S1 - A11 in Y
+ fsubin(F,la,ca,A11,lda,Y,ca);
+ // P6 = a S2 T2 in B21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, Y, ca, B12, ldb, F.zero, B21, ldb, H);
+ // S4 = A12 - S2 in Y
+ fsub(F,la,ca,A12,lda,Y,ca,Y,ca);
+ // P1 = a A11 B11 in X
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X, nr, H);
+ // U2 = P6 + P1 in B21
+ faddin(F,mr,nr,X,nr,B21,ldb);
+ // U3 = U2 + P7 in C22
+ faddin(F,mr,nr,B21,ldb,C22,ldc);
+ // U4 = U2 + P5 in B21
+ faddin(F,mr,nr,C12,ldc,B21,ldb);
+ // U6 = U3 - P4 in C21
+ fsub(F,mr,nr,C22,ldc,C21,ldc,C21,ldc);
+ // U1 = P1 + P2 in C11
+ faddin(F,mr,nr,X,nr,C11,ldc);
+ fflas_delete (X);
+ // U7 = U3 + P5 in C22
+ faddin(F,mr,nr,C12,ldc,C22,ldc);
+ // P3 = a S4 B22 in C12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, Y, ca, B22, ldb, F.zero, C12, ldc, H);
+ fflas_delete (Y);
+ // U5 = U4 + P3 in C12
+ faddin(F,mr,nr,B21,ldb,C12,ldc);
+
+
+
+
+ } // WinogradAccOld
+
+
+ template < class Field ,class FieldTrait>
+ inline void WinogradAcc_L_S (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::Element_ptr A,const size_t lda,
+ const typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH
+ )
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(!F.isZero(beta));
+
+ typename Field::Element malpha ;
+ F.neg(malpha,alpha);
+
+ // A, B and c submatrices
+ typename Field::Element_ptr A11=A, A12, A21, A22;
+ const typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+ typename Field::Element mbeta ;
+ F.neg(mbeta,beta);
+
+ size_t la, ca, lb, cb; // lines and columns in A,B sub matrices
+
+ // Three temporary submatrices are required
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr ;
+ ca = mr ;
+ }
+ else { // ta == FflasNoTrans
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr ;
+ ca = kr ;
+
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr ;
+ cb = kr ;
+
+ }
+ else { // ta == FflasNoTrans
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr ;
+ cb = nr ;
+ }
+
+ FFLASFFPACK_check(mr == nr && kr == nr);
+
+ // Z1 = C22 - C12 in C22
+ fsubin(F,mr,nr,C12,ldc,C22,ldc);
+ // Z2 = C21 - Z1 in C21
+ fsubin(F,mr,nr,C22,ldc,C21,ldc);
+ // S3 = A11 - A21 in X
+ typename Field::Element_ptr X = fflas_new (F, mr, nr);
+ fsub(F,la,ca,A11,lda,A21,lda,X,ca);
+ // S1 = A21 + A22 in A21
+ faddin(F,la,ca,A22,lda,A21,lda);
+ // T3 = B22 - B12 in Y ;
+ typename Field::Element_ptr Y = fflas_new (F, mr, kr);
+ fsub(F,lb,cb,B22,ldb,B12,ldb,Y,cb);
+ // P7 = a S3 T3 + b Z1 in C22
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, X, ca, Y, cb, beta, C22, ldc, H);
+ // T1 = B12 - B11 in X
+ fsub(F,lb,cb,B12,ldb,B11,ldb,X,cb);
+ // T2 = B22 - T1 in Y
+ fsub(F,lb,cb,B22,ldb,X,cb,Y,cb);
+ // P5 = a S1 T1 + b C12 in C12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A21, lda, X, cb, beta, C12, ldc, H);
+ // S2 = S1 - A11 in A21
+ fsubin(F,la,ca,A11,lda,A21,lda);
+ // P1 = a A11 B11 in X
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, X, nr, H);
+ // S4 = A12 - S2 in A11
+ fsub(F,la,ca,A12,lda,A21,lda,A11,lda);
+ // P2 = a A12 B21 + b C11 in C11;
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, beta, C11, ldc, H);
+ // U1 = P1 + P2 in C11
+ faddin(F,mr,nr,X,nr,C11,ldc);
+ // P6 = a S2 T2 in A12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A21, lda, Y, cb, F.zero, A12, lda, H);
+ // T4 = T2 - B21 in Y
+ fsubin(F,lb,cb,B21,ldb,Y,cb);
+ // W2 = a A22 T4 in A21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A22, lda, Y, cb, F.zero, A21, lda, H);
+ // P4 = W2 - beta Z2 in C21
+ fadd(F,mr,nr,A21,lda,mbeta,C21,ldc,C21,ldc);
+ // U2 = P6 + P1 in X
+ faddin(F,mr,nr,A12,lda,X,nr);
+ // U3 = U2 + P7 in C22
+ faddin(F,mr,nr,X,nr,C22,ldc);
+ // U6 = U3 - P4 in C21
+ fsub(F,mr,nr,C22,ldc,C21,ldc,C21,ldc);
+ // U7 = U3 + P5 in C22
+ faddin(F,mr,nr,C12,ldc,C22,ldc);
+ // U4 = U2 + P5 in C12
+ faddin(F,mr,nr,X,nr,C12,ldc);
+ fflas_delete (X);
+ // W3 = a S4 B22 in Y
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A11, lda, B22, ldb, F.zero, Y, nr, H);
+ // U5 = U4 + W3 in C12
+ faddin(F,mr,nr,Y,nr,C12,ldc);
+ fflas_delete (Y);
+
+
+ } // WinogradAccOld
+
+} // BLAS3
+} // FFLAS
+
+#endif // __FFLASFFPACK_fgemm_winograd_acc_ip_INL
+
diff --git a/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_ip.inl b/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_ip.inl
new file mode 100644
index 0000000..607c849
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemm/schedule_winograd_ip.inl
@@ -0,0 +1,366 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 the LinBox group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fgemm/winograd_ip.inl
+ * @ingroup MMalgos
+ * @brief Winograd implementation
+ * @bib ISSAC09 Scheduling
+ */
+
+#ifndef __FFLASFFPACK_fgemm_winograd_ip_INL
+#define __FFLASFFPACK_fgemm_winograd_ip_INL
+
+namespace FFLAS { namespace BLAS3 {
+
+ template < class Field, class FieldTrait >
+ inline void Winograd_LR_S (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::Element_ptr A,const size_t lda,
+ typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH
+ )
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait> H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(F.isZero(beta));
+
+ // FFLASFFPACK_check(mr == nr && mr == kr);
+ FFLASFFPACK_check(kr == nr);
+
+ size_t lb, cb, la, ca;
+ typename Field::Element_ptr A11=A, A12, A21, A22;
+ typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr;
+ ca = mr;
+ }
+ else {
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr;
+ ca = kr;
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr;
+ cb = kr;
+ }
+ else {
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr;
+ cb = nr;
+ }
+
+
+ // S3 = A11 - A21 in C11
+ fsub(F,la,ca,A11,lda,A21,lda,C11,ldc);
+ // S1 = A21 + A22 in A21
+ faddin(F,la,ca,A22,lda,A21,lda);
+ // T1 = B12 - B11 in C22
+ fsub(F,lb,cb,B12,ldb,B11,ldb,C22,ldc);
+ // T3 = B22 - B12 in B12
+ fsub(F,lb,cb,B22,ldb,B12,ldb,B12,ldb);
+ // P7 = S3 T3 in C21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, C11, ldc, B12, ldb, F.zero, C21, ldc, H);
+ // S2 = S1 - A11 in C12
+ fsub(F,la,ca,A21,lda,A11,lda,C12,ldc);
+ // P1 = A11 B11 in C11
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, C11, ldc, H);
+ // T2 = B22 - T1 in B11
+ fsub(F,lb,cb,B22,ldb,C22,ldc,B11,ldb);
+ // P5 = S1 T1 in A11
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A21, lda, C22, ldc, F.zero, A11, lda, H);
+ // T4 = T2 - B21 in C22
+ fsub(F,lb,cb,B11,ldb,B21,ldb,C22,ldc);
+ // P4 = A22 T4 in A21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A22, lda, C22, ldc, F.zero, A21, lda, H);
+ // S4 = A12 - S2 in A22
+ fsub(F,la,ca,A12,lda,C12,ldc,A22,lda);
+ // P6 = S2 T2 in C22
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, C12, ldc, B11, ldb, F.zero, C22, ldc, H);
+ // U2 = P1 + P6 in C22
+ faddin(F,mr,nr,C11,ldc,C22,ldc);
+ // P2 = A12 B21 in C12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, C12, ldc, H);
+ // U1 = P1 + P2 in C11
+ faddin(F,mr,nr,C12,ldc,C11,ldc);
+ // U4 = U2 + P5 in C12
+ fadd(F,mr,nr,C22,ldc,A11,lda,C12,ldc);
+ // U3 = U2 + P7 in C22
+ faddin(F,mr,nr,C21,ldc,C22,ldc);
+ // U6 = U3 - P4 in C21
+ fsub(F,mr,nr,C22,ldc,A21,lda,C21,ldc);
+ // U7 = U3 + P5 in C22
+ faddin(F,mr,nr,A11,lda,C22,ldc);
+ // P3 = S4 B22 in A12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A22, lda, B22, ldb, F.zero, A12, lda, H);
+ // U5 = U4 + P3 in C12
+ faddin(F,mr,nr,A12,lda,C12,ldc);
+
+
+
+ } // WinogradIP
+
+
+ template < class Field, class FieldTrait >
+ inline void Winograd_L_S(const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ typename Field::Element_ptr A,const size_t lda,
+ const typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH
+ )
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(F.isZero(beta));
+
+ FFLASFFPACK_check(kr == nr && kr <= mr);
+
+ size_t lb, cb, la, ca;
+ typename Field::Element_ptr A11=A, A12, A21, A22;
+ const typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr;
+ ca = mr;
+ }
+ else {
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr;
+ ca = kr;
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr;
+ cb = kr;
+ }
+ else {
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr;
+ cb = nr;
+ }
+
+
+ // S3 = A11 - A21 in C22
+ fsub(F,la,ca,A11,lda,A21,lda,C22,ldc);
+ // S1 = A21 + A22 in A21
+ fadd(F,la,ca,A22,lda,A21,lda,A21,lda);
+ // S2 = S1 - A11 in C12
+ fsub(F,la,ca,A21,lda,A11,lda,C12,ldc);
+ // T1 = B12 - B11 in C21
+ fsub(F,lb,cb,B12,ldb,B11,ldb,C21,ldc);
+ // P1 = A11 B11 in C11
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, C11, ldc, H);
+ // T3 = B22 - B12 in A11
+ fsub(F,lb,cb,B22,ldb,B12,ldb,A11,lda);
+ // P7 = S3 T3 in X
+ typename Field::Element_ptr X = fflas_new (F, mr, nr);
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, C22, ldc, A11, lda, F.zero, X, nr, H);
+ // T2 = B22 - T1 in A11
+ fsub(F,lb,cb,B22,ldb,C21,ldc,A11,lda);
+ // P5 = S1 T1 in C22
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A21, lda, C21, ldc, F.zero, C22, ldc, H);
+ // S4 = A12 - S2 in C21
+ fsub(F,la,ca,A12,lda,C12,ldc,C21,ldc);
+ // P3 = S4 B22 in A21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, C21, ldc, B22, ldb, F.zero, A21, lda, H);
+ // P6 = S2 T2 in C21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, C12, ldc, A11, lda, F.zero, C21, ldc, H);
+ // T4 = T2 - B21 in A11
+ fsubin(F,lb,cb,B21,ldb,A11,lda);
+ // U2 = P1 + P6 in C21
+ faddin(F,mr,nr,C11,ldc,C21,ldc);
+ // U4 = U2 + P5 in C12
+ fadd(F,mr,nr,C22,ldc,C21,ldc,C12,ldc);
+ // U3 = U2 + P7 in C21
+ faddin(F,mr,nr,X,nr,C21,ldc);
+ // U7 = U3 + P5 in C22
+ faddin(F,mr,nr,C21,ldc,C22,ldc);
+ // U5 = U4 + P3 in C12
+ faddin(F,la,ca,A21,lda,C12,ldc);
+ // P2 = A12 B21 in X
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, X, nr, H);
+ // U1 = P1 + P2 in C11
+ faddin(F,mr,nr,X,nr,C11,ldc);
+ fflas_delete (X);
+ // P4 = A22 T4 in A21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A22, lda, A11, lda, F.zero, A21, lda, H);
+ // U6 = U3 - P4 in C21
+ fsubin(F,mr,nr,A21,lda,C21,ldc);
+
+
+ } // WinogradIP
+
+ template < class Field, class FieldTrait >
+ inline void Winograd_R_S(const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t mr, const size_t nr, const size_t kr,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A,const size_t lda,
+ typename Field::Element_ptr B,const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > & WH
+ )
+ {
+ MMHelper<Field, MMHelperAlgo::Winograd, FieldTrait > H = WH ;
+ H.recLevel = H.recLevel - 1 ;
+
+ FFLASFFPACK_check(F.isZero(beta));
+
+ FFLASFFPACK_check(kr == nr && kr <= mr);
+
+ size_t lb, cb, la, ca;
+ const typename Field::Element_ptr A11=A, A12, A21, A22;
+ typename Field::Element_ptr B11=B, B12, B21, B22;
+ typename Field::Element_ptr C11=C, C12=C+nr, C21=C+mr*ldc, C22=C21+nr;
+
+
+ if (ta == FflasTrans) {
+ A21 = A + mr;
+ A12 = A + kr*lda;
+ A22 = A12 + mr;
+ la = kr;
+ ca = mr;
+ }
+ else {
+ A12 = A + kr;
+ A21 = A + mr*lda;
+ A22 = A21 + kr;
+ la = mr;
+ ca = kr;
+ }
+ if (tb == FflasTrans) {
+ B21 = B + kr;
+ B12 = B + nr*ldb;
+ B22 = B12 + kr;
+ lb = nr;
+ cb = kr;
+ }
+ else {
+ B12 = B + nr;
+ B21 = B + kr*ldb;
+ B22 = B21 + nr;
+ lb = kr;
+ cb = nr;
+ }
+
+
+ // S3 = A11 - A21 in C22
+ fsub(F,la,ca,A11,lda,A21,lda,C22,ldc);
+ // S1 = A21 + A22 in C21
+ fadd(F,la,ca,A22,lda,A21,lda,C21,ldc);
+ // T1 = B12 - B11 in C12
+ fsub(F,lb,cb,B12,ldb,B11,ldb,C12,ldc);
+ // P1 = A11 B11 in C11
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A11, lda, B11, ldb, F.zero, C11, ldc, H);
+ // S2 = S1 - A11 in B11
+ fsub(F,la,ca,C21,ldc,A11,lda,B11,ldb);
+ // T3 = B22 - B12 in B12
+ fsub(F,lb,cb,B22,ldb,B12,ldb,B12,ldb);
+ // P7 = S3 T3 in X
+ typename Field::Element_ptr X = fflas_new (F, mr, nr);
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, C22, ldc, B12, ldb, F.zero, X, nr, H);
+ // T2 = B22 - T1 in B12
+ fsub(F,lb,cb,B22,ldb,C12,ldc,B12,ldb);
+ // P5 = S1 T1 in C22
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, C21, ldc, C12, ldc, F.zero, C22, ldc, H);
+ // T4 = T2 - B21 in C12
+ fsub(F,lb,cb,B12,ldb,B21,ldb,C12,ldc);
+ // P6 = S2 T2 in C21
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, B11, ldb, B12, ldb, F.zero, C21, ldc, H);
+ // P4 = A22 T4 in B12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A22, lda, C12, ldc, F.zero, B12, ldb, H);
+ // S4 = A12 - S2 in B11
+ fsub(F,la,ca,A12,lda,B11,ldb,B11,ldb);
+ // U2 = P1 + P6 in C21
+ faddin(F,mr,nr,C11,ldc,C21,ldc);
+ // U4 = U2 + P5 in C12
+ fadd(F,mr,nr,C22,ldc,C21,ldc,C12,ldc);
+ // U3 = U2 + P7 in C21
+ faddin(F,mr,nr,X,nr,C21,ldc);
+ fflas_delete (X);
+ // U7 = U3 + P5 in C22
+ faddin(F,mr,nr,C21,ldc,C22,ldc);
+ // U6 = U3 - P4 in C21
+ fsubin(F,mr,nr,B12,ldb,C21,ldc);
+ // P3 = S4 B22 in B12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, B11, ldb, B22, ldb, F.zero, B12, ldb, H);
+ // U5 = U4 + P3 in C12
+ faddin(F,la,ca,B12,ldb,C12,ldc);
+ // P2 = A12 B21 in B12
+ fgemm2 (F, ta, tb, mr, nr, kr, alpha, A12, lda, B21, ldb, F.zero, B12, ldb, H);
+ // U1 = P1 + P2 in C11
+ faddin(F,mr,nr,B12,ldb,C11,ldc);
+
+
+ } // WinogradIP
+
+} // BLAS3
+
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fgemm_winograd_ip_INL
+
diff --git a/fflas-ffpack/fflas/fflas_fgemv.inl b/fflas-ffpack/fflas/fflas_fgemv.inl
index 3468844..fb8303c 100644
--- a/fflas-ffpack/fflas/fflas_fgemv.inl
+++ b/fflas-ffpack/fflas/fflas_fgemv.inl
@@ -5,21 +5,22 @@
* Copyright (C) 2005 Clement Pernet
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
*
- *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -29,6 +30,86 @@
#ifndef __FFLASFFPACK_fgemv_INL
#define __FFLASFFPACK_fgemv_INL
+
+#include <givaro/zring.h> // DoubleDomain
+
+#if defined(__AVX2__) or defined(__AVX__) or defined(__SSE4_1__)
+#include "fflas-ffpack/fflas/fflas_igemm/igemm.h"
+#endif
+
+namespace FFLAS{ namespace Protected {
+ template <typename FloatElement, class Field>
+ inline typename Field::Element_ptr
+ fgemv_convert (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A,const size_t lda,
+ typename Field::ConstElement_ptr X,const size_t incX,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t incY)
+ {
+ FFLASFFPACK_check(lda);
+
+ Givaro::ModularBalanced<FloatElement> G((FloatElement) F.characteristic());
+ FloatElement tmp,alphaf, betaf;
+ F.convert (tmp, beta);
+ G.init(betaf,tmp);
+ F.convert (tmp, alpha);
+ G.init(alphaf,tmp);
+ size_t ma, na;
+ if (ta == FflasTrans) { ma = N; na = M; }
+ else { ma = M; na = N; }
+ // sizet ldaf = na;
+ FloatElement* Af = FFLAS::fflas_new<FloatElement>(M*N);
+ FloatElement* Xf = FFLAS::fflas_new<FloatElement>(na);
+ FloatElement* Yf = FFLAS::fflas_new<FloatElement>(ma);
+
+ fconvert(F, M, N, Af, N, A, lda);
+ freduce (G, M, N, Af, N);
+ fconvert(F, na, Xf, 1, X, incX);
+ freduce (G, na, Xf, 1);
+
+ if (!F.isZero(beta)){
+ fconvert (F, ma, Yf, 1, Y, incY);
+ freduce (G, ma, Yf, 1);
+ }
+
+ fgemv (G, ta, M, N, alphaf, Af, N, Xf, 1, betaf, Yf, 1);
+
+ finit(F, ma, Yf, 1, Y, incY);
+ fflas_delete (Af);
+ fflas_delete (Xf);
+ fflas_delete (Yf);
+ return Y;
+ }
+ }// Protected
+}// FFLAS
+
+namespace FFLAS {
+ template<class Field>
+ inline typename Field::Element_ptr
+ fgemv (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t incY,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> > & H)
+ {
+ if (F.cardinality() < DOUBLE_TO_FLOAT_CROSSOVER)
+ return Protected::fgemv_convert<float,Field>(F,ta,M,N,alpha,A,lda,X, incX, beta,Y,incY);
+ else if (16*F.cardinality() < Givaro::ModularBalanced<double>::maxCardinality())
+ return Protected::fgemv_convert<double,Field>(F,ta,M,N,alpha,A,lda,X, incX, beta,Y,incY);
+ else {
+ FFPACK::failure()(__func__,__LINE__,"Invalid ConvertTo Mode for this field");
+ }
+ return Y;
+ }
+}// FFLAS
+
namespace FFLAS {
//---------------------------------------------------------------------
@@ -36,341 +117,341 @@ namespace FFLAS {
// Computes Y <- alpha.op(A).X + beta.Y
// A is M*N,
//---------------------------------------------------------------------
+
template<class Field>
- inline void
- fgemv (const Field& F, const FFLAS_TRANSPOSE TransA,
+ inline typename Field::Element_ptr
+ fgemv (const Field& F, const FFLAS_TRANSPOSE ta,
const size_t M, const size_t N,
const typename Field::Element alpha,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * X, const size_t incX,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr X, const size_t incX,
const typename Field::Element beta,
- typename Field::Element * Y, const size_t incY)
+ typename Field::Element_ptr Y, const size_t incY,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DelayedTag> & H)
{
- if (F.isZero (alpha)){
- for (typename Field::Element * Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi+=incY)
- F.mulin(*Yi, beta);
- return;
+ if (!M) {return Y;}
+ size_t Ydim = (ta == FflasNoTrans)?M:N;
+ size_t Xdim = (ta == FflasNoTrans)?N:M;
+ if (!Xdim || F.isZero (alpha)){
+ fscalin(F, Ydim, beta, Y, incY);
+ return Y;
}
- FFLAS_BASE base = Protected::BaseCompute (F, 0);
- size_t kmax = Protected::DotProdBound (F, 0, beta, base);
-
- if (kmax > 1) {
- if (TransA == FflasNoTrans) {
- size_t nblock = N / kmax;
- size_t remblock = N % kmax;
- // To ensure the initial computation with beta
- if (!remblock){
- remblock = kmax;
- --nblock;
- }
-
- Protected::MatVectProd (F, FflasNoTrans, M, remblock, alpha,
- A+kmax*nblock, lda, X+kmax*nblock*incX, incX, beta,
- Y, incY);
- for (size_t i = 0; i < nblock; ++i){
- Protected::MatVectProd (F, FflasNoTrans, M, kmax, alpha,
- A+i*kmax, lda, X+i*kmax*incX, incX, F.one,
- Y, incY);
- }
- }
- else{ // FflasTrans
- size_t nblock = M / kmax;
- size_t remblock = M % kmax;
- // To ensure the initial computation with beta
- if (!remblock){
- remblock = kmax;
- --nblock;
- }
-
- Protected::MatVectProd (F, FflasTrans, remblock, N, alpha,
- A+kmax*nblock*lda, lda, X+kmax*nblock*incX, incX, beta,
- Y, incY);
- for (size_t i = 0; i < nblock; ++i){
- Protected::MatVectProd (F, FflasTrans, kmax, N, alpha,
- A+i*kmax*lda, lda, X+i*kmax*incX, incX, F.one,
- Y, incY);
- }
-
- }
- } else {
- if (TransA == FflasNoTrans) {
- if (F.isZero (beta))
- for (size_t i = 0; i < M; ++i)
- F.assign( *(Y+i*incY), F.zero);
- else {
- typename Field::Element betadivalpha;
- F.div (betadivalpha, beta, alpha);
- for (size_t i = 0; i < M; ++i)
- F.mulin( *(Y+i*incY), betadivalpha);
- }
- for (size_t i = 0; i < M; ++i)
- for (size_t j = 0; j < N; ++j)
- F.axpyin (*(Y+i*incY), *(A+i*lda+j), *(X+j*incX));
- if (! F.isOne(alpha))
- for (size_t i = 0; i < M; ++i)
- F.mulin (*(Y+i*incY), alpha);
- } else {
- if (F.isZero (beta))
- for (size_t i = 0; i < N; ++i)
- F.assign( *(Y+i*incY), F.zero);
- else {
- typename Field::Element betadivalpha;
- F.div (betadivalpha, beta, alpha);
- for (size_t i = 0; i < N; ++i)
- F.mulin( *(Y+i*incY), betadivalpha);
- }
-
-
- for (size_t i = 0; i < M; ++i)
- for (size_t j = 0; j < N; ++j){
- F.axpyin (*(Y+j*incY), *(A+i*lda+j), *(X+i*incX));
- }
- if (! F.isOne(alpha))
- for (size_t i = 0; i < N; ++i)
- F.mulin (*(Y+i*incY), alpha);
- }
+ typename Field::Element alpha_,beta_;
+ F.assign (alpha_,alpha);
+ F.assign (beta_,beta);
+ if (Protected::AreEqual<Field, Givaro::Modular<double> >::value ||
+ Protected::AreEqual<Field, Givaro::ModularBalanced<double> >::value){
+ //Givaro::Modular<double> need to switch to float if p too small
+ if (F.characteristic() < DOUBLE_TO_FLOAT_CROSSOVER)
+ return Protected::fgemv_convert<float,Field>(F,ta,M,N,alpha,A,lda,X,incX,beta,Y,incY);
}
- }
- namespace Protected {
-
- // MatVectProd: computes y <- alpha.op(A)*x + beta.y.
- // Assumes that the condition k(p-1)^2 <2^53 is satisfied
- template<class Field>
- inline void
- MatVectProd (const Field& F, const FFLAS_TRANSPOSE TransA,
- const size_t M, const size_t N,
- const typename Field::Element alpha,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * X, const size_t incX,
- const typename Field::Element beta,
- typename Field::Element * Y, const size_t incY)
- {
- typename Field::Element tmp;
-
- size_t Xl, Yl;
- if (TransA == FflasNoTrans){Xl = N;Yl = M;}
- else {Xl = M; Yl = N;}
- double* Ad = new double[M*N];
- double* Xd = new double[Xl];
- double* Yd = new double[Yl];
- double alphad, betad;
-
- if (F.areEqual (F.mOne, alpha)){
- alphad = -1.0;
- F.convert (betad, beta);
- } else {
- if (! F.areEqual (F.one, alpha)){
- // Compute C = A*B + beta/alpha.C
- // and after C *= alpha
- F.div (tmp, beta, alpha);
- F.convert (betad, tmp);
- } else
- F.convert (betad, beta);
- alphad = 1.0;
- }
- MatF2MatD (F, Ad, N, A, lda, M, N);
-
- double *Xdi=Xd;
- for (const typename Field::Element* Xi=X; Xi != X+Xl*incX; Xi+=incX, Xdi++)
- F.convert (*(Xdi), *Xi);
- double *Ydi=Yd;
- if (!F.isZero(beta))
- for (typename Field::Element* Yi = Y; Yi != Y+Yl*incY; Yi+=incY, Ydi++)
- F.convert (*(Ydi), *Yi);
-
- cblas_dgemv (CblasRowMajor, (CBLAS_TRANSPOSE) TransA, (int)M, (int)N, alphad,
- Ad, (int)N, Xd, 1, betad, Yd, 1);
-
- Ydi=Yd;
- for (typename Field::Element* Yi = Y; Yi != Y+Yl*incY; Yi+=incY, Ydi++)
- F.init (*Yi, *(Ydi));
-
- if (!F.areEqual (F.one, alpha) && !F.areEqual (F.mOne, alpha)){
- // Fix-up: compute Y *= alpha
- for (typename Field::Element* Yi = Y; Yi != Y+Yl*incY; Yi += incY)
- F.mulin (*Yi , alpha);
+ if (Protected::AreEqual<Field, Givaro::Modular<int64_t> >::value ||
+ Protected::AreEqual<Field, Givaro::ModularBalanced<int64_t> >::value){
+ if (16*F.cardinality() < Givaro::ModularBalanced<double>::maxCardinality())
+ return Protected::fgemv_convert<double,Field>(F,ta,M,N,alpha,A,lda,X, incX,beta,Y,incY);
+ else{
+ // Stay over int64_t
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag, ParSeqHelper::Sequential> HG(H);
+ HG.recLevel = 0;
+ if (ta == FflasNoTrans)
+ fgemm(F,FflasNoTrans,FflasNoTrans,M,1,N,alpha,A,lda,X,incX,beta,Y,incY,HG);
+ else
+ fgemm(F,FflasTrans,FflasNoTrans,N,1,M,alpha,A,lda,X,incX,beta,Y,incY,HG);
+ freduce(F,(ta==FflasNoTrans)?M:N, Y,incY);
+ H.initOut();
+ return Y;
}
- delete[] Ad;
- delete[] Xd;
- delete[] Yd;
}
+ if ( !F.isOne(alpha) && !F.isMOne(alpha)){
+ F.assign (alpha_, F.one);
+ F.div (beta_, beta, alpha);
+ }
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag> HD(F,0);
+ fgemv (F, ta, M, N, alpha_,
+ FFPACK::fflas_const_cast<typename Field::Element_ptr>(A), lda,
+ FFPACK::fflas_const_cast<typename Field::Element_ptr>(X), incX,
+ beta_, Y, incY, HD);
- template<>
- inline void MatVectProd (const FFPACK:: ModularBalanced<double>& F,
- const FFLAS_TRANSPOSE TransA,
- const size_t M, const size_t N,
- const double alpha,
- const double * A, const size_t lda,
- const double * X, const size_t incX,
- const double beta,
- double * Y, const size_t incY)
- {
+ Protected::ScalAndReduce (F, Ydim, alpha, Y, incY, HD);
+ H.initOut();
- double _alpha, _beta;
- if (F.areEqual (F.mOne, beta)) _beta = -1.0;
- else _beta = beta;
+ return Y;
+ }
- if (F.areEqual (F.mOne, alpha)) _alpha = -1.0;
- else{
- _alpha = 1.0;
- if (! F.areEqual (F.one, alpha))
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- F.divin (_beta, alpha);
- }
- cblas_dgemv (CblasRowMajor, (CBLAS_TRANSPOSE) TransA, (int)M, (int)N,
- _alpha, A, (int)lda, X, (int)incX, _beta, Y, (int)incY);
- for (double * Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi+=incY)
- F.init (*Yi, *Yi);
+}
- if ( (!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))){
- // Fix-up: compute y *= alpha
- for (double* Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi += incY)
- F.mulin (*Yi , alpha);
- }
+namespace FFLAS{
+ template<class Field>
+ inline typename Field::Element_ptr
+ fgemv (const Field& F, const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t incY,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+
+ {
+ size_t Ydim = (ta==FflasNoTrans)?M:N;
+
+ if (F.isZero (beta))
+ fzero (F, Ydim, Y, incY);
+ else {
+ typename Field::Element betadivalpha;
+ FFLASFFPACK_check(!F.isZero(alpha));
+ F.div (betadivalpha, beta, alpha);
+ fscalin (F, Ydim, betadivalpha, Y, incY);
}
+ if (ta == FflasNoTrans)
+ for (size_t i = 0; i < Ydim; ++i)
+ F.addin (Y[i*incY], fdot(F, N, A+i*lda, 1, X, incX));
+ else
+ for (size_t i = 0; i < Ydim; ++i)
+ F.addin (Y[i*incY], fdot(F, M, A+i, lda, X, incX));
+ fscalin (F, Ydim, alpha, Y, incY);
+
+ return Y;
+ }
+}
- template<>
- inline void MatVectProd (const FFPACK:: ModularBalanced<float>& F,
- const FFLAS_TRANSPOSE TransA,
- const size_t M, const size_t N,
- const float alpha,
- const float * A, const size_t lda,
- const float * X, const size_t incX,
- const float beta,
- float * Y, const size_t incY)
- {
-
- float _alpha, _beta;
- if (F.areEqual (F.mOne, beta)) _beta = -1.0;
- else _beta = beta;
-
- if (F.areEqual (F.mOne, alpha)) _alpha = -1.0;
- else{
- _alpha = 1.0;
- if (! F.areEqual (F.one, alpha)){
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- F.divin (_beta, alpha);
- }
+namespace FFLAS{
+ template<class Field>
+ inline typename Field::Element_ptr
+ fgemv (const Field& F, const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t incY,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag> & H)
+ {
+ typedef MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag> HelperType;
+ typedef typename HelperType::DelayedField::Element DFElt;
+ typedef typename HelperType::DelayedField::Element_ptr DFElt_ptr;
+ typedef typename HelperType::DelayedField::ConstElement_ptr DFCElt_ptr;
+ DFElt alphadf=alpha, betadf=beta;
+ size_t Ydim = (ta==FflasNoTrans)?M:N;
+ size_t Xdim = (ta==FflasNoTrans)?N:M;
+ if (F.isMOne (alpha)) alphadf = -F.one;
+ else {
+ alphadf = F.one;
+ if (! F.isOne( alpha)) {
+ // Compute y = A*x + beta/alpha.y, then y *= alpha
+ FFLASFFPACK_check(!F.isZero(alpha));
+ typename Field::Element betadalpha;
+ F.init(betadalpha);
+ F.div (betadalpha, beta, alpha);
+ betadf=betadalpha;
}
- cblas_sgemv (CblasRowMajor, (CBLAS_TRANSPOSE) TransA, (int)M, (int)N,
- _alpha, A, (int)lda, X, (int)incX, _beta, Y, (int)incY);
- for (float * Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi+=incY)
- F.init (*Yi, *Yi);
- if ( (!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))){
- // Fix-up: compute y *= alpha
- for (float* Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi += incY)
- F.mulin (*Yi , alpha);
+ }
+ if (F.isMOne(betadf)) betadf = -F.one;
+
+ size_t kmax = H.MaxDelayedDim (betadf);
+
+ if (kmax <= Xdim/2 ){
+ // Might as well reduce inputs
+ if (H.Amin < H.FieldMin || H.Amax>H.FieldMax){
+ H.initA();
+ freduce_constoverride (F, M, N, A, lda);
+ }
+ if (H.Bmin < H.FieldMin || H.Bmax>H.FieldMax){
+ H.initB();
+ freduce_constoverride (F, Xdim, X, incX);
+ }
+ if (H.Cmin < H.FieldMin || H.Cmax>H.FieldMax){
+ H.initC();
+ freduce (F, Ydim, Y, incY);
}
+ kmax = H.MaxDelayedDim (betadf);
}
- template<>
- inline void MatVectProd (const FFPACK:: Modular<double>& F,
- const FFLAS_TRANSPOSE TransA,
- const size_t M, const size_t N,
- const double alpha,
- const double * A, const size_t lda,
- const double * X, const size_t incX,
- const double beta,
- double * Y, const size_t incY)
- {
-
- double _alpha, _beta;
- if (F.areEqual (F.mOne, beta)) _beta = -1.0;
- else _beta = beta;
-
- if (F.areEqual (F.mOne, alpha)) _alpha = -1.0;
- else{
- _alpha = 1.0;
- if (! F.areEqual (F.one, alpha))
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- F.divin (_beta, alpha);
- }
+ if (!kmax){
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag> HG(H);
+ H.initOut();
+ return fgemv (F, ta, M, N, alpha, A, lda, X, incX, beta, Y, incY, HG);
+ }
+ size_t k2 = std::min (Xdim, kmax);
+ size_t nblock = Xdim / kmax;
+ size_t remblock = Xdim % kmax;
+ if (!remblock) {
+ remblock = kmax;
+ --nblock;
+ }
+ size_t shiftA, M1, N1, Mi, Ni;
+ if (ta == FflasTrans) {
+ shiftA = k2*lda;
+ M1 = remblock;
+ Mi = k2;
+ Ni = N1 = N;
+ }else {
+ shiftA = k2;
+ Mi = M1 = M;
+ N1 = remblock;
+ Ni = k2;
+ }
+ MMHelper<typename associatedDelayedField<const Field>::field, MMHelperAlgo::Classic, ModeCategories::DefaultBoundedTag> Hfp(H);
- cblas_dgemv (CblasRowMajor, (CBLAS_TRANSPOSE) TransA, (int)M, (int)N,
- _alpha, A, (int)lda, X, (int)incX, _beta, Y, (int)incY);
- for (double * Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi+=incY)
- F.init (*Yi, *Yi);
+ fgemv (H.delayedField, ta, M1, N1, alphadf, (DFCElt_ptr)A+nblock*shiftA, lda,
+ (DFCElt_ptr)X+nblock*k2*incX, incX, betadf, (DFElt_ptr)Y, incY, Hfp);
+
+ for (size_t i = 0; i < nblock; ++i) {
+ freduce (F, Ydim ,Y, incY);
+ Hfp.initC();
+ fgemv (H.delayedField, ta, Mi, Ni, alphadf, (DFCElt_ptr)A+i*shiftA, lda,
+ (DFCElt_ptr)X+i*k2*incX, incX, F.one, (DFElt_ptr)Y, incY, Hfp);
+ }
- if ( (!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))){
- // Fix-up: compute y *= alpha
- for (double* Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi += incY)
- F.mulin (*Yi , alpha);
+ if (!F.isOne(alpha) && !F.isMOne(alpha)){
+ DFElt al; F.convert(al, alpha);
+ if (al<0) al = -al;
+ if (std::max(-Hfp.Outmin, Hfp.Outmax) > Hfp.MaxStorableValue/al){
+ freduce (F, Ydim, Y, incY);
+ Hfp.initOut();
}
+ fscalin (H.delayedField, Ydim, alpha, (DFElt_ptr)Y, incY);
+ if (alpha>0){
+ H.Outmin = al*Hfp.Outmin;
+ H.Outmax = al*Hfp.Outmax;
+ } else {
+ H.Outmin = -al*Hfp.Outmax;
+ H.Outmax = -al*Hfp.Outmin;
+ }
+ }else {
+ H.Outmin = Hfp.Outmin;
+ H.Outmax = Hfp.Outmax;
}
+ return Y;
+ }
+}
- template<>
- inline void MatVectProd (const FFPACK:: Modular<float>& F,
- const FFLAS_TRANSPOSE TransA,
- const size_t M, const size_t N,
- const float alpha,
- const float * A, const size_t lda,
- const float * X, const size_t incX,
- const float beta,
- float * Y, const size_t incY)
- {
-
- float _alpha, _beta;
- if (F.areEqual (F.mOne, beta)) _beta = -1.0;
- else _beta = beta;
-
- if (F.areEqual (F.mOne, alpha)) _alpha = -1.0;
- else{
- _alpha = 1.0;
- if (! F.areEqual (F.one, alpha)){
- // Compute y = A*x + beta/alpha.y
- // and after y *= alpha
- F.divin (_beta, alpha);
- }
+namespace FFLAS{
+ template<class Field>
+ inline typename Field::Element_ptr
+ fgemv (const Field& F, const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t incY)
+ {
+ if (!M) {return Y;}
+ size_t Ydim = (ta == FflasNoTrans)?M:N;
+ size_t Xdim = (ta == FflasNoTrans)?N:M;
+ if (!Xdim || F.isZero (alpha)){
+ fscalin(F, Ydim, beta, Y, incY);
+ return Y;
+ }
+ MMHelper<Field, MMHelperAlgo::Classic > HW (F, 0);
+ return fgemv (F, ta, M, N, alpha,
+ FFPACK::fflas_const_cast<typename Field::Element_ptr>(A), lda,
+ FFPACK::fflas_const_cast<typename Field::Element_ptr>(X), incX,
+ beta, Y, incY, HW);
+ }
+}
+
+
+namespace FFLAS{
+ inline Givaro::ZRing<int64_t>::Element_ptr
+ fgemv (const Givaro::ZRing<int64_t>& F, const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const int64_t alpha,
+ const int64_t* A, const size_t lda,
+ const int64_t* X, const size_t incX,
+ const int64_t beta,
+ int64_t* Y, const size_t incY,
+ MMHelper<Givaro::ZRing<int64_t>, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ FFLASFFPACK_check(lda);
+
+#if defined(__AVX2__) or defined(__AVX__) or defined(__SSE4_1__)
+ if (ta == FflasNoTrans)
+ igemm_ (FflasRowMajor, ta, FflasNoTrans,M,1,N,alpha,A,lda,X,incX,beta,Y,incY);
+ else
+ igemm_ (FflasRowMajor, ta, FflasNoTrans,N,1,M,alpha,A,lda,X,incX,beta,Y,incY);
+#else
+ if (ta == FflasNoTrans){
+ int64_t* Yi=Y;
+ for (size_t i=0;i<M;i++, Yi+=incY){
+ *Yi *= beta * *Yi;
+ const int64_t* Xj=X;
+ for (size_t j=0; j < N; j++, Xj += incX)
+ *Yi += alpha*A[i*lda+j] * *Xj;
}
- cblas_sgemv (CblasRowMajor, (CBLAS_TRANSPOSE) TransA, (int)M, (int)N,
- _alpha, A, (int)lda, X, (int)incX, _beta, Y, (int)incY);
- for (float * Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi+=incY)
- F.init (*Yi, *Yi);
- if ( (!F.areEqual (F.one, alpha)) && (!F.areEqual (F.mOne, alpha))){
- // Fix-up: compute y *= alpha
- for (float* Yi = Y; Yi != Y+((TransA == FflasNoTrans)?M:N)*incY; Yi += incY)
- F.mulin (*Yi , alpha);
+ } else {
+ int64_t* Yi=Y;
+ for (size_t i=0;i<N;i++, Yi+=incY){
+ *Yi *= beta * *Yi;
+ const int64_t* Xj=X;
+ for (size_t j=0; j < M; j++, Xj += incX)
+ *Yi += alpha*A[i+j*lda] * *Xj;
}
}
+#endif
+ return Y;
+ }
+ inline Givaro::DoubleDomain::Element_ptr
+ fgemv (const Givaro::DoubleDomain& F, const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const Givaro::DoubleDomain::Element alpha,
+ const Givaro::DoubleDomain::ConstElement_ptr A, const size_t lda,
+ const Givaro::DoubleDomain::ConstElement_ptr X, const size_t incX,
+ const Givaro::DoubleDomain::Element beta,
+ Givaro::DoubleDomain::Element_ptr Y, const size_t incY,
+ MMHelper<Givaro::DoubleDomain, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ FFLASFFPACK_check(lda);
- } // Protected
+ cblas_dgemv (CblasRowMajor, (CBLAS_TRANSPOSE) ta,
+ (int)M, (int)N, (Givaro::DoubleDomain::Element) alpha,
+ A, (int)lda, X, (int)incX, (Givaro::DoubleDomain::Element) beta, Y, (int)incY);
+ return Y;
+ }
- template<>
- inline void
- fgemv (const DoubleDomain& , const FFLAS_TRANSPOSE TransA,
+ template <class Field>
+ inline typename Field::Element_ptr
+ fgemv (const Field& F, const FFLAS_TRANSPOSE ta,
const size_t M, const size_t N,
- const DoubleDomain::Element alpha,
- const DoubleDomain::Element * A, const size_t lda,
- const DoubleDomain::Element * X, const size_t incX,
- const DoubleDomain::Element beta,
- DoubleDomain::Element * Y, const size_t incY)
+ const typename Field::Element alpha,
+ const typename Field::ConstElement_ptr A, const size_t lda,
+ const typename Field::ConstElement_ptr X, const size_t incX,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t incY,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultBoundedTag> & H)
{
- cblas_dgemv (CblasRowMajor, (CBLAS_TRANSPOSE) TransA, (int)M, (int)N,
- alpha, A, (int)lda, X, (int)incX, beta, Y, (int)incY);
+ H.setOutBounds((ta ==FflasNoTrans)?N:M, alpha, beta);
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag> Hb(F,0);
+
+ return fgemv(F, ta, M, N, alpha, A, lda, X, incX, beta, Y, incY, Hb);
}
- template<>
- inline void
- fgemv (const FloatDomain& , const FFLAS_TRANSPOSE TransA,
+ inline Givaro::FloatDomain::Element_ptr
+ fgemv (const Givaro::FloatDomain& F, const FFLAS_TRANSPOSE ta,
const size_t M, const size_t N,
- const FloatDomain::Element alpha,
- const FloatDomain::Element * A, const size_t lda,
- const FloatDomain::Element * X, const size_t incX,
- const FloatDomain::Element beta,
- FloatDomain::Element * Y, const size_t incY)
+ const Givaro::FloatDomain::Element alpha,
+ const Givaro::FloatDomain::ConstElement_ptr A, const size_t lda,
+ const Givaro::FloatDomain::ConstElement_ptr X, const size_t incX,
+ const Givaro::FloatDomain::Element beta,
+ Givaro::FloatDomain::Element_ptr Y, const size_t incY,
+ MMHelper<Givaro::FloatDomain, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
{
- cblas_sgemv (CblasRowMajor, (CBLAS_TRANSPOSE) TransA, (int)M, (int)N,
- alpha, A, (int)lda, X, (int)incX, beta, Y, (int)incY);
+ FFLASFFPACK_check(lda);
+
+ cblas_sgemv (CblasRowMajor, (CBLAS_TRANSPOSE) ta,
+ (int)M, (int)N, (Givaro::FloatDomain::Element) alpha,
+ A, (int)lda, X, (int)incX, (Givaro::FloatDomain::Element) beta, Y, (int)incY);
+ return Y;
}
-} // FFLAS
+
+}
+
#endif // __FFLASFFPACK_fgemv_INL
diff --git a/fflas-ffpack/fflas/fflas_fgemv_mp.inl b/fflas-ffpack/fflas/fflas_fgemv_mp.inl
new file mode 100644
index 0000000..8981b6e
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fgemv_mp.inl
@@ -0,0 +1,125 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fgemv_mp_INL
+#define __FFLASFFPACK_fgemv_mp_INL
+
+#include "fflas-ffpack/field/rns-integer-mod.h"
+
+namespace FFLAS {
+
+
+ // specialization of the fgemv function for the field RNSInteger<rns_double>
+ inline FFPACK::rns_double::Element_ptr
+ fgemv (const FFPACK::RNSInteger<FFPACK::rns_double>& F, const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::ConstElement_ptr A, const size_t lda,
+ FFPACK::rns_double::ConstElement_ptr X, const size_t incX,
+ const FFPACK::rns_double::Element beta,
+ FFPACK::rns_double::Element_ptr Y, const size_t incY,
+ MMHelper<FFPACK::RNSInteger<FFPACK::rns_double>, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ if (M!=0 && N !=0){
+ for (size_t i=0;i<F.size();i++)
+ fgemv(F.rns()._field_rns[i], ta,
+ M, N,
+ alpha._ptr[i*alpha._stride],
+ A._ptr+i*A._stride, lda,
+ X._ptr+i*X._stride, incX,
+ beta._ptr[i*beta._stride],
+ Y._ptr+i*Y._stride, incY
+ );
+ }
+ return Y;
+ }
+
+
+ // specialization of the fgemv function for the field RNSIntegerMod<rns_double>
+ inline FFPACK::rns_double::Element_ptr
+ fgemv (const FFPACK::RNSIntegerMod<FFPACK::rns_double>& F, const FFLAS_TRANSPOSE ta,
+ const size_t M, const size_t N,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::ConstElement_ptr A, const size_t lda,
+ FFPACK::rns_double::ConstElement_ptr X, const size_t incX,
+ const FFPACK::rns_double::Element beta,
+ FFPACK::rns_double::Element_ptr Y, const size_t incY,
+ MMHelper<FFPACK::RNSIntegerMod<FFPACK::rns_double>, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ //std::cout<<"HERE 1"<<std::endl;
+ MMHelper<FFPACK::RNSInteger<FFPACK::rns_double>, MMHelperAlgo::Classic, ModeCategories::DefaultTag > H2;
+ //std::cout<<"HERE 2"<<std::endl;
+ fgemv(F.delayed(),ta,M,N,alpha,A,lda,X,incX, beta,Y,incY,H2);
+ //std::cout<<"HERE 3"<<std::endl;
+ size_t Ydim = (ta == FflasNoTrans)?M:N;
+ freduce (F, Ydim, Y, incY);
+ return Y;
+ }
+
+
+ // BB hack. might not work.
+ // Calling fgemm, TODO: really specialize fgemv
+ // specialization of the fgemv function for the field Givaro::ZRing<Givaro::Integer>
+ inline Givaro::Integer* fgemv (const Givaro::ZRing<Givaro::Integer>& F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t m, const size_t n,
+ const Givaro::Integer alpha,
+ Givaro::Integer* A, const size_t lda,
+ Givaro::Integer* X, const size_t ldx,
+ Givaro::Integer beta,
+ Givaro::Integer* Y, const size_t ldy,
+ MMHelper<Givaro::ZRing<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag> > & H)
+ {
+ MMHelper<Givaro::ZRing<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeqHelper::Sequential> H2;
+ fgemm(F,ta,FFLAS::FflasNoTrans, (ta==FFLAS::FflasNoTrans)?m:n, 1,(ta==FFLAS::FflasNoTrans)?n:m, alpha,A,lda,X,ldx,beta,Y,ldy,H2);
+ return Y;
+ }
+
+ // specialization of the fgemv function for the field Givaro::Modular<Givaro::Integer>
+ // Calling fgemm, TODO: really specialize fgemv
+ inline Givaro::Integer* fgemv (const Givaro::Modular<Givaro::Integer>& F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t m, const size_t n,
+ const Givaro::Integer alpha,
+ Givaro::Integer* A, const size_t lda,
+ Givaro::Integer* X, const size_t ldx,
+ Givaro::Integer beta,
+ Givaro::Integer* Y, const size_t ldy,
+ MMHelper<Givaro::Modular<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag> > & H)
+ {
+ MMHelper<Givaro::Modular<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeqHelper::Sequential> H2;
+ fgemm(F,ta,FFLAS::FflasNoTrans,(ta==FFLAS::FflasNoTrans)?m:n,1,(ta==FFLAS::FflasNoTrans)?n:m,alpha,A,lda,X,ldx,beta,Y,ldy,H2);
+ return Y;
+ }
+
+
+} // end namespace FFLAS
+
+#endif
+
diff --git a/fflas-ffpack/fflas/fflas_fger.inl b/fflas-ffpack/fflas/fflas_fger.inl
index d887a00..fe3db83 100644
--- a/fflas-ffpack/fflas/fflas_fger.inl
+++ b/fflas-ffpack/fflas/fflas_fger.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -29,29 +29,106 @@
#ifndef __FFLASFFPACK_fger_INL
#define __FFLASFFPACK_fger_INL
+
namespace FFLAS {
template<class Field>
inline void
fger (const Field& F, const size_t M, const size_t N,
const typename Field::Element alpha,
- const typename Field::Element * x, const size_t incx,
- const typename Field::Element * y, const size_t incy,
- typename Field::Element * A, const size_t lda)
+ typename Field::ConstElement_ptr x, const size_t incx,
+ typename Field::ConstElement_ptr y, const size_t incy,
+ typename Field::Element_ptr A, const size_t lda)
{
+ MMHelper<Field, MMHelperAlgo::Classic> H(F,0);
+ fger (F, M, N, alpha, x, incx, y, incy, A, lda, H);
+ freduce (F, M, N, A, lda);
+ }
+} //FFLAS
+
+namespace FFLAS { namespace Protected {
+
+ template<class FloatElement, class Field>
+ inline void
+ fger_convert (const Field& F, const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr x, const size_t incx,
+ typename Field::ConstElement_ptr y, const size_t incy,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ Givaro::ModularBalanced<FloatElement> G((FloatElement) F.characteristic());
+ FloatElement alphaf;
+ F.convert (alphaf, alpha);
+
+ FloatElement* Af = fflas_new (G,M,N);
+ FloatElement* Xf = fflas_new (G,M,1);
+ FloatElement* Yf = fflas_new (G,N,1);
+
+ fconvert(F, M, N, Af, N, A, lda);
+ freduce(G, M, N, Af, N);
+ fconvert(F, M, Xf, 1, x, incx);
+ freduce(G, M, Xf, 1);
+ fconvert(F, N, Yf, 1, y, incy);
+ freduce(G, N, Yf, 1);
+
+ fger (G, M, N, alphaf, Xf, 1, Yf, 1, Af, N);
+
+ finit (F, M, N, Af, N, A, lda);
+
+ fflas_delete (Af);
+ fflas_delete (Xf);
+ fflas_delete (Yf);
+ }
+}// Protected
+}// FFLAS
+
+namespace FFLAS{
+
+ template<class Field>
+ inline void
+ fger (const Field& F, const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr x, const size_t incx,
+ typename Field::ConstElement_ptr y, const size_t incy,
+ typename Field::Element_ptr A, const size_t lda,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> > & H)
+ {
+ if (F.isZero(alpha)) { return ; }
+
+ if (F.cardinality() < DOUBLE_TO_FLOAT_CROSSOVER){
+ return Protected::fger_convert<float,Field>(F,M,N,alpha,x, incx, y,incy, A, lda);
+ } else if (16*F.cardinality() < Givaro::ModularBalanced<double>::maxCardinality()){
+ return Protected::fger_convert<double,Field>(F,M,N,alpha,x, incx, y,incy, A, lda);
+ } else {
+ FFPACK::failure()(__func__,__LINE__,"Invalid ConvertTo Mode for this field");
+ }
+ }
+
+
+ template<class Field>
+ inline void
+ fger (const Field& F, const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr x, const size_t incx,
+ typename Field::ConstElement_ptr y, const size_t incy,
+ typename Field::Element_ptr A, const size_t lda,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ if (F.isZero(alpha)) { return ; }
typename Field::Element tmp;
- const typename Field::Element* xi=x, *yj=y;
- typename Field::Element* Ai=A;
+ typename Field::ConstElement_ptr xi=x, yj=y;
+ typename Field::Element_ptr Ai=A;
+
if ( M < N ){
- if ( F.areEqual( alpha, F.one ) )
+ if ( F.isOne( alpha ) )
for ( ; Ai < A+M*lda; Ai+=lda, xi+=incx ){
yj = y;
for (size_t j = 0; j < N; ++j, yj+=incy )
F.axpyin( *(Ai+j), *xi, *yj );
}
- else if ( F.areEqual( alpha, F.mOne ) )
+ else if ( F.isMOne( alpha ) )
for ( ; Ai < A+M*lda; Ai+=lda, xi+=incx ){
F.neg( tmp, *xi );
yj = y;
@@ -66,14 +143,14 @@ namespace FFLAS {
F.axpyin( *(Ai+j), tmp, *yj );
}
} else {
- if ( F.areEqual( alpha, F.one ) ){
+ if ( F.isOne( alpha ) ){
for ( ; Ai < A+N; ++Ai, yj+=incy ){
xi = x;
for (size_t i = 0; i < M; ++i, xi+=incx )
F.axpyin( *(Ai+i*lda), *xi, *yj );
}
}
- else if ( F.areEqual( alpha, F.mOne ) )
+ else if ( F.isMOne( alpha ) )
for ( ; Ai < A+N; ++Ai, yj+=incy ){
F.neg( tmp, *yj );
xi = x;
@@ -91,16 +168,168 @@ namespace FFLAS {
}
- template<>
inline void
- fger( const DoubleDomain& , const size_t M, const size_t N,
- const DoubleDomain::Element alpha,
- const DoubleDomain::Element * x, const size_t incx,
- const DoubleDomain::Element * y, const size_t incy,
- DoubleDomain::Element * A, const size_t lda)
+ fger( const Givaro::DoubleDomain& F, const size_t M, const size_t N,
+ const Givaro::DoubleDomain::Element alpha,
+ const Givaro::DoubleDomain::ConstElement_ptr x, const size_t incx,
+ const Givaro::DoubleDomain::ConstElement_ptr y, const size_t incy,
+ Givaro::DoubleDomain::Element_ptr A, const size_t lda,
+ MMHelper<Givaro::DoubleDomain, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
{
-
+ if (F.isZero(alpha)) { return ; }
+ FFLASFFPACK_check(lda);
cblas_dger( CblasRowMajor, (int)M, (int)N, alpha, x, (int)incx, y, (int)incy, A, (int)lda );
}
+
+ template<class Field>
+ inline void
+ fger(const Field& F, const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ const typename Field::ConstElement_ptr x, const size_t incx,
+ const typename Field::ConstElement_ptr y, const size_t incy,
+ typename Field::Element_ptr A, const size_t lda,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultBoundedTag> & H)
+ {
+ H.setOutBounds (1, alpha, 1.0);
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DefaultTag> Hd(F,0);
+ fger (F, M, N, alpha, x, incx, y, incy, A, lda, Hd);
+ }
+
+ inline void
+ fger( const Givaro::FloatDomain& F, const size_t M, const size_t N,
+ const Givaro::FloatDomain::Element alpha,
+ const Givaro::FloatDomain::ConstElement_ptr x, const size_t incx,
+ const Givaro::FloatDomain::ConstElement_ptr y, const size_t incy,
+ Givaro::FloatDomain::Element_ptr A, const size_t lda,
+ MMHelper<Givaro::FloatDomain, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ if (F.isZero(alpha)) { return ; }
+
+ FFLASFFPACK_check(lda);
+ cblas_sger( CblasRowMajor, (int)M, (int)N, alpha, x, (int)incx, y, (int)incy, A, (int)lda );
+ }
+
+
+
+
+ template<class Field>
+ inline void
+ fger (const Field& F, const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr x, const size_t incx,
+ typename Field::ConstElement_ptr y, const size_t incy,
+ typename Field::Element_ptr A, const size_t lda,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag> & H)
+ {
+ if (F.isZero(alpha)) { return ; }
+
+ typedef MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag> HelperType;
+ typedef typename HelperType::DelayedField delayedField;
+ typedef typename HelperType::DelayedField::Element DFElt;
+ typedef typename HelperType::DelayedField::ConstElement_ptr DFCElt_ptr;
+ typedef typename HelperType::DelayedField::Element_ptr DFElt_ptr;
+ typedef typename Field::Element Element;
+ typedef typename Field::Element_ptr Element_ptr;
+ typedef MMHelper<delayedField, MMHelperAlgo::Classic, ModeCategories::DefaultBoundedTag> DelayedHelperType;
+
+ DelayedHelperType Hfp(H);
+
+ if (Hfp.MaxDelayedDim(1.0) < 1){
+ if (Hfp.Amin < H.FieldMin || Hfp.Amax>H.FieldMax){
+ Hfp.initA();
+ freduce_constoverride (F, M, x, incx);
+ }
+ if (Hfp.Bmin < H.FieldMin || Hfp.Bmax>H.FieldMax){
+ Hfp.initB();
+ freduce_constoverride (F, N, y, incy);
+ }
+ if (Hfp.Cmin < H.FieldMin || Hfp.Cmax>H.FieldMax){
+ Hfp.initC();
+ freduce (F, M, N, A, lda);
+ }
+ }
+ Hfp.Outmin = Hfp.FieldMin;
+ Hfp.Outmax = Hfp.FieldMax;
+
+ if (F.isOne(alpha) || F.isMOne(alpha)){
+ DFElt alphadf;
+ if (F.isMOne( alpha)) alphadf = -F.one;
+ else alphadf = F.one;
+
+ fger (H.delayedField, M, N, alphadf, (DFCElt_ptr)x, incx, (DFCElt_ptr)y, incy, (DFElt_ptr)A, lda, Hfp);
+
+ H.Outmin = Hfp.Outmin;
+ H.Outmax = Hfp.Outmax;
+ } else {
+ Element_ptr sY = FFLAS::fflas_new<Element> (N);
+ fscal(F, N, alpha, y, incy, sY, 1);
+
+ fger (H.delayedField, M, N, 1.0, (DFCElt_ptr)x, incx, (DFCElt_ptr) sY, 1, (DFElt_ptr)A, lda, Hfp);
+
+ FFLAS::fflas_delete(sY);
+
+ H.setOutBounds (1, alpha, 1.0);
+
+ }
+
+ }
+
+ template<class Field>
+ inline void
+ fger (const Field& F, const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr x, const size_t incx,
+ typename Field::ConstElement_ptr y, const size_t incy,
+ typename Field::Element_ptr A, const size_t lda,
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DelayedTag> & H)
+ {
+ if (F.isZero(alpha)) { return ; }
+
+ if (Protected::AreEqual<Field, Givaro::Modular<int64_t> >::value ||
+ Protected::AreEqual<Field, Givaro::ModularBalanced<int64_t> >::value){
+ if (F.cardinality() < Givaro::ModularBalanced<double>::maxCardinality())
+ return Protected::fger_convert<double,Field>(F,M,N,alpha,x,incx,y,incy, A,lda);
+ else{
+ // Stay over int64_t
+ MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::LazyTag, ParSeqHelper::Sequential> HG(H);
+ HG.recLevel = 0;
+ fgemm(F,FflasNoTrans,FflasNoTrans,M,N,1,alpha,x,incx,y,incy,F.one,A,lda,HG);
+ freduce(F,M,N,A,lda);
+ H.initOut();
+ return;
+ }
+ }
+ typedef MMHelper<Field, MMHelperAlgo::Classic, ModeCategories::DelayedTag> ModularHelperType;
+ typedef typename ModularHelperType::DelayedField delayedField;
+ typedef typename delayedField::Element DFElt;
+ typedef typename delayedField::ConstElement_ptr DFCElt_ptr;
+ typedef typename delayedField::Element_ptr DFElt_ptr;
+ typedef typename Field::Element Element;
+ typedef typename Field::Element_ptr Element_ptr;
+ typedef MMHelper<delayedField, MMHelperAlgo::Classic, ModeCategories::DefaultBoundedTag> DelayedHelperType;
+
+ DelayedHelperType Hfp(H);
+
+ if (F.isOne(alpha) || F.isMOne(alpha)){
+ DFElt alphadf;
+ if (F.isMOne( alpha)) alphadf = -F.one;
+ else alphadf = F.one;
+
+ fger (H.delayedField, M, N, alphadf, (DFCElt_ptr)x, incx, (DFCElt_ptr)y, incy, (DFElt_ptr)A, lda, Hfp);
+
+ } else {
+ Element_ptr sY = FFLAS::fflas_new<Element> (N);
+ fscal(F, N, alpha, y, incy, sY, 1);
+
+ fger (H.delayedField, M, N, H.delayedField.one, (DFCElt_ptr)x, incx, (DFCElt_ptr)sY, (size_t)1, (DFElt_ptr)A, lda, Hfp);
+
+ FFLAS::fflas_delete(sY);
+
+ }
+
+ H.initOut();
+ }
+
} // FFLAS
+//#include "fflas-ffpack/fflas/fflas_fger_mp.inl" moved to fflas.h
#endif // __FFLASFFPACK_fger_INL
diff --git a/fflas-ffpack/fflas/fflas_fger_mp.inl b/fflas-ffpack/fflas/fflas_fger_mp.inl
new file mode 100644
index 0000000..9a0597a
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fger_mp.inl
@@ -0,0 +1,97 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+/** @file fflas_fgemm/fgemm_classical_mp.inl
+ * @brief matrix multiplication with multiprecision input (either over Z or over Z/pZ)
+ */
+
+
+#ifndef __FFPACK_fger_mp_INL
+#define __FFPACK_fger_mp_INL
+
+#include <givaro/modular-integer.h>
+#include <givaro/zring.h>
+
+#include "fflas-ffpack/fflas/fflas_helpers.inl"
+#include "fflas-ffpack/fflas/fflas_fgemm/fgemm_classical_mp.inl"
+#include "fflas-ffpack/field/rns-integer.h"
+#include "fflas-ffpack/field/rns-integer-mod.h"
+
+namespace FFLAS{
+
+
+ inline void
+ fger (const Givaro::Modular<Givaro::Integer>& F, const size_t M, const size_t N,
+ const typename Givaro::Integer alpha,
+ typename Givaro::Integer* x, const size_t incx,
+ typename Givaro::Integer* y, const size_t incy,
+ typename Givaro::Integer* A, const size_t lda,
+ MMHelper<Givaro::Modular<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::ConvertTo<ElementCategories::RNSElementTag> > & H)
+ {
+ MMHelper<Givaro::Modular<Givaro::Integer>, MMHelperAlgo::Classic, ModeCategories::DefaultTag> H2;
+ FFLAS::fger(F,M,N,alpha,x,incx,y,incy,A,lda,H2);
+ }
+
+ template<typename RNS>
+ inline void
+ fger (const FFPACK::RNSInteger<RNS>& F, const size_t M, const size_t N,
+ const typename FFPACK::RNSInteger<RNS>::Element alpha,
+ typename FFPACK::RNSInteger<RNS>::Element_ptr x, const size_t incx,
+ typename FFPACK::RNSInteger<RNS>::Element_ptr y, const size_t incy,
+ typename FFPACK::RNSInteger<RNS>::Element_ptr A, const size_t lda,
+ MMHelper<FFPACK::RNSInteger<RNS>, MMHelperAlgo::Classic, ModeCategories::DefaultTag> & H)
+ {
+ for(size_t i=0;i<F.size();i++){
+ FFLAS::fger(F.rns()._field_rns[i],M,N,
+ alpha._ptr[i*alpha._stride],
+ x._ptr+i*x._stride,incx,y._ptr+i*y._stride,incy,A._ptr+i*A._stride,lda);
+ }
+ }
+
+ template<typename RNS>
+ inline void
+ fger (const FFPACK::RNSIntegerMod<RNS>& F, const size_t M, const size_t N,
+ const typename FFPACK::RNSIntegerMod<RNS>::Element alpha,
+ typename FFPACK::RNSIntegerMod<RNS>::Element_ptr x, const size_t incx,
+ typename FFPACK::RNSIntegerMod<RNS>::Element_ptr y, const size_t incy,
+ typename FFPACK::RNSIntegerMod<RNS>::Element_ptr A, const size_t lda,
+ MMHelper<FFPACK::RNSIntegerMod<RNS>, MMHelperAlgo::Classic> & H)
+ {
+ typedef FFPACK::RNSInteger<RNS> RnsDomain;
+ MMHelper<RnsDomain, MMHelperAlgo::Classic> H2;
+ RnsDomain Zrns(F.rns());
+ FFLAS::fger(Zrns,M,N,alpha,x,incx,y,incy,A,lda,H2);
+
+ // reduce the result mod p
+ freduce (F, M, N, A, lda);
+ }
+
+
+} // namespace FFLAS
+
+#endif
+
diff --git a/fflas-ffpack/fflas/fflas_freduce.h b/fflas-ffpack/fflas/fflas_freduce.h
new file mode 100644
index 0000000..55a895d
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_freduce.h
@@ -0,0 +1,182 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_freduce.inl
+ * Copyright (C) 2014 FFLAS FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_freduce_H
+#define __FFLASFFPACK_fflas_freduce_H
+
+#include "fflas-ffpack/fflas/fflas_simd.h"
+#include "fflas-ffpack/field/field-traits.h"
+#include "fflas-ffpack/utils/cast.h"
+
+namespace FFLAS {
+
+ template<class T>
+ struct support_simd_mod : public std::false_type {} ;
+
+#ifdef __FFLASFFPACK_USE_SIMD
+ template<>
+ struct support_simd_mod<float> : public std::true_type {} ;
+ template<>
+ struct support_simd_mod<double> : public std::true_type {} ;
+#ifdef SIMD_INT
+ template<>
+ struct support_simd_mod<int64_t> : public std::true_type {} ;
+#endif // SIMD_INT
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_freduce.inl"
+
+namespace FFLAS {
+
+ /***************************/
+ /* LEVEL 1 */
+ /***************************/
+
+ template<class Field>
+ void
+ freduce (const Field & F, const size_t m,
+ typename Field::ConstElement_ptr B, const size_t incY,
+ typename Field::Element_ptr A, const size_t incX)
+ {
+ return details::freduce (F,m,B,incY,A,incX,typename FieldTraits<Field>::category());
+ }
+
+ template<class Field>
+ void
+ freduce (const Field & F, const size_t m,
+ typename Field::Element_ptr A, const size_t incX)
+ {
+ return details::freduce (F,m,A,incX,typename FieldTraits<Field>::category());
+ }
+
+ template<class Field>
+ void
+ freduce_constoverride(const Field & F, const size_t m,
+ typename Field::ConstElement_ptr A, const size_t incX)
+ {
+ return freduce(F, m, FFPACK::fflas_const_cast<typename Field::Element_ptr>(A), incX);
+ }
+
+ // OOOPS
+ // CP: to be moved to a fflas_finit field, if ever needed
+ template<class Field, class ConstOtherElement_ptr>
+ void
+ finit (const Field& F, const size_t n,
+ ConstOtherElement_ptr Y, const size_t incY,
+ typename Field::Element_ptr X, const size_t incX)
+ {
+ typename Field::Element_ptr Xi = X ;
+ ConstOtherElement_ptr Yi = Y ;
+
+ if (incX == 1 && incY == 1)
+ for (; Yi < Y + n ; ++Xi, ++Yi)
+ F.init( *Xi , *Yi);
+ else
+ for (; Yi < Y+n*incY; Xi+=incX, Yi += incY )
+ F.init( *Xi , *Yi);
+ }
+
+
+ /***************************/
+ /* LEVEL 2 */
+ /***************************/
+
+
+ template<class Field>
+ void
+ freduce (const Field& F, const size_t m , const size_t n,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ if (n == lda)
+ freduce (F, n*m, A, 1);
+ else
+ for (size_t i = 0 ; i < m ; ++i)
+ freduce (F, n, A+i*lda, 1);
+ return;
+ }
+ template<class Field>
+ void
+ pfreduce (const Field& F, const size_t m , const size_t n,
+ typename Field::Element_ptr A, const size_t lda, const size_t numths)
+ {
+ SYNCH_GROUP(
+ FORBLOCK1D(iter, m, SPLITTER(numths),
+ size_t rowsize= iter.end()-iter.begin();
+ TASK(MODE(CONSTREFERENCE(F) READWRITE(A[iter.begin()*lda])),
+ freduce (F, rowsize, n, A+iter.begin()*lda, lda);
+ );
+ );
+ );
+ return;
+ }
+
+ template<class Field>
+ void
+ freduce (const Field& F, const size_t m , const size_t n,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ for (size_t i = 0 ; i < m ; ++i) {
+ freduce(F,n,B+i*ldb,1,A+i*lda,1);
+ }
+ }
+
+
+ template<class Field>
+ void
+ freduce_constoverride(const Field & F, const size_t m, const size_t n,
+ typename Field::ConstElement_ptr A, const size_t lda)
+ {
+ return freduce(F, m, n,
+ FFPACK::fflas_const_cast<typename Field::Element_ptr>(A), lda);
+ }
+
+ // CP: to be moved to a fflas_finit field, if ever needed
+ template<class Field, class OtherElement_ptr>
+ void
+ finit (const Field& F, const size_t m , const size_t n,
+ const OtherElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ if (n == lda && n == ldb)
+ finit (F, n*m, B, 1, A, 1);
+ else
+ for (size_t i = 0 ; i < m ; ++i)
+ finit (F, n, B + i*ldb, 1, A + i*lda, 1);
+ return;
+ }
+
+} // end of namespace FFLAS
+
+//#include "fflas_freduce_mp.inl" moved to fflas.h
+
+#endif // __FFLASFFPACK_fflas_freduce_H
diff --git a/fflas-ffpack/fflas/fflas_freduce.inl b/fflas-ffpack/fflas/fflas_freduce.inl
new file mode 100644
index 0000000..e89d40e
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_freduce.inl
@@ -0,0 +1,794 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_freduce.inl
+ * Copyright (C) 2014 Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <Pascal.Giorgi at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * Part of this code is taken from http://libdivide.com/
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_freduce_INL
+#define __FFLASFFPACK_fflas_freduce_INL
+
+#include <givaro/udl.h>
+
+#include "fflas-ffpack/fflas/fflas_fassign.h"
+#include "fflas-ffpack/utils/bit_manipulation.h"
+
+#define FFLASFFPACK_COPY_REDUCE 32 /* TO BENCMARK LATER */
+
+
+
+namespace FFLAS { namespace vectorised { /* for casts (?) */
+
+ template<class T>
+ inline typename std::enable_if< ! std::is_integral<T>::value, T>::type
+ monfmod(T A, T B)
+ {
+ return fmod(A,B);
+ }
+
+ template<class T>
+ inline typename std::enable_if< std::is_integral<T>::value, T>::type
+ monfmod(T A, T B)
+ {
+ return A % B; // B > 0
+ }
+
+ template<>
+ inline Givaro::Integer monfmod(Givaro::Integer A, Givaro::Integer B) // @bug B is not integer, but uint64_t usually
+ {
+ return A % B; // B > 0
+ }
+
+ template<>
+ inline float monfmod(float A, float B)
+ {
+ return fmodf(A,B);
+ }
+
+ template<>
+ inline double monfmod(double A, double B)
+ {
+ //std::cerr<<"fmod"<<std::endl;
+ return fmod(A,B);
+ }
+
+ template<size_t K, size_t MG>
+ inline RecInt::rmint<K,MG>& monfmod(RecInt::rmint<K,MG>& A, RecInt::rmint<K,MG>& B)
+ {
+ return RecInt::rmint<K>::mod_n(A, B);
+ }
+
+ template<class T>
+ inline typename std::enable_if< ! std::is_integral<T>::value, T>::type
+ monrint(T A)// @bug pass by reference ?
+ {
+ return rint(A);
+ }
+
+ template<class T>
+ inline typename std::enable_if< std::is_integral<T>::value, T>::type
+ monrint( T A)
+ {
+ return A ;
+ }
+
+ template<>
+ inline double monrint(double A)
+ {
+ return rint(A);
+ }
+
+
+ template<>
+ inline float monrint(float A)
+ {
+ return rintf(A);
+ }
+
+ template<>
+ inline Givaro::Integer monrint(Givaro::Integer A) // @bug B is not integer, but uint64_t usually
+ {
+ return A ; // B > 0
+ }
+
+
+ template<bool overflow, bool poweroftwo>
+ inline int64_t monfmod(int64_t A, int64_t p, int8_t shifter, int64_t magic)
+ {
+ if (poweroftwo) { //shift path
+ int64_t q = A + ((A >> 63) & ((1_i64 << shifter) - 1));
+ q = A - ((q>>shifter)<< shifter) ;
+ return (q<0)?(q+p):q ;
+ }
+ else {
+ int64_t q = mulhi_64(magic, A);
+ if (overflow) {
+ q += A ;
+ }
+ q >>= shifter;
+ A = A - q * p ;
+ if (A >= p) A-= p ; // because of mulhi_fast
+ return A ;
+
+ }
+ }
+
+} // vectorised
+} // FFLAS
+
+namespace FFLAS { namespace vectorised {
+
+
+ template<class T>
+ inline void fast_mod_generate(bool & overflow, bool & poweroftwo, int8_t & shift, T & magic, T denom)
+ {
+ overflow = false ;
+ poweroftwo = false ;
+ shift = 0 ;
+ magic = 0 ;
+ }
+
+ //! @pre d > 0
+ template<>
+ inline void fast_mod_generate(bool & overflow, bool & poweroftwo, int8_t & shift, int64_t & magic, int64_t denom)
+ {
+
+ // overflow = false ;
+ // poweroftwo = false ;
+ // shift = 0 ;
+ // magic = 0 ;
+ if ((denom & (denom- 1)) == 0) {
+ shift = (int8_t)ctz((uint64_t)denom) ;
+ magic = 0;
+ poweroftwo = true ;
+ }
+ else {
+ const uint32_t floor_log_2_d = 63 - clz((uint64_t)denom);
+
+ /*the dividend here is 2**(floor_log_2_d + 63), so the low 64 bit word is 0 and the high word is floor_log_2_d - 1 */
+ uint64_t rem, proposed_m;
+
+ proposed_m = getpoweroftwoden_128(floor_log_2_d, denom, &rem);
+
+ const uint64_t e = denom- rem;
+
+ /* We are going to start with a power of floor_log_2_d - 1. This works if works if e < 2**floor_log_2_d. */
+ if (e < (1_ui64 << floor_log_2_d)) {
+ /* This power works */
+ shift = (int8_t)(floor_log_2_d - 1);
+ }
+ else {
+ /* We need to go one higher. This should not make proposed_m overflow, but it will make it negative when interpreted as an int32_t. */
+ proposed_m += proposed_m;
+ const uint64_t twice_rem = rem + rem;
+ if (twice_rem >= (uint64_t)denom || twice_rem < rem) proposed_m += 1;
+ shift = (int8_t) floor_log_2_d ;
+ overflow = true ;
+ }
+ proposed_m += 1;
+ magic = (int64_t)proposed_m ;
+ }
+ }
+
+ template<class Field, class ElementTraits = typename ElementTraits<typename Field::Element>::value>
+ struct HelperMod ;
+
+
+ template<class Field>
+ struct HelperMod<Field, ElementCategories::MachineIntTag> {
+ bool overflow = false ;
+ bool poweroftwo = false ;
+ int8_t shift = 0 ;
+ typename Field::Element magic = (typename Field::Element)0 ;
+ typename Field::Element p;
+
+ HelperMod()
+ {
+ // std::cout << "empty cstor called" << std::endl;
+ } ;
+
+ HelperMod( const Field & F)
+ {
+ // std::cout << "field cstor called" << std::endl;
+ p = (typename Field::Element) F.characteristic();
+ fast_mod_generate(overflow, poweroftwo, shift, magic, p);
+ // std::cout << overflow << ',' << poweroftwo << std::endl;
+ // std::cout << (int) shift << ',' << magic << std::endl;
+ // std::cout << this->shift << std::endl;
+ }
+
+ int getAlgo() const
+ {
+ // std::cout << "will be " << (2*overflow + poweroftwo) << std::endl;
+ return 2* (int)overflow + (int) poweroftwo ;
+ // return overflow << 1 | poweroftwo ;
+ }
+
+
+ } ;
+
+ template<class Field>
+ struct HelperMod<Field, FFLAS::ElementCategories::MachineFloatTag> {
+ typename Field::Element p;
+ typename Field::Element invp;
+ // typename Field::Elmeent min ;
+ // typename Field::Elmeent max ;
+
+ HelperMod() {} ;
+
+ HelperMod( const Field & F)
+ {
+ p = (typename Field::Element) F.characteristic();
+ invp = (typename Field::Element)1/p;
+ // min = F.minElement();
+ // max = F.maxElement();
+ }
+
+ int getAlgo() const
+ {
+ return 0;
+ }
+ } ;
+
+ template<class Field>
+ struct HelperMod<Field, FFLAS::ElementCategories::ArbitraryPrecIntTag> {
+ typename Field::Element p;
+ // typename Field::Element invp;
+ // typename Field::Elmeent min ;
+ // typename Field::Elmeent max ;
+
+ HelperMod() {} ;
+
+ HelperMod( const Field & F)
+ {
+ p = (typename Field::Element) F.characteristic();
+ // invp = (typename Field::Element)1/p;
+ // min = F.minElement();
+ // max = F.maxElement();
+ }
+
+ int getAlgo() const
+ {
+ return 0;
+ }
+ } ;
+
+ template<class Field>
+ struct HelperMod<Field, FFLAS::ElementCategories::FixedPrecIntTag> {
+ typename Field::Element p;
+ // typename Field::Element invp;
+ // typename Field::Elmeent min ;
+ // typename Field::Elmeent max ;
+
+ HelperMod() {} ;
+
+ HelperMod( const Field & F)
+ {
+ p = (typename Field::Element) F.characteristic();
+ // invp = (typename Field::Element)1/p;
+ // min = F.minElement();
+ // max = F.maxElement();
+ }
+
+ int getAlgo() const
+ {
+ return 0;
+ }
+ } ;
+
+
+#ifdef __FFLASFFPACK_USE_SIMD
+ template<class Field, class SimdT, class ElementTraits = typename ElementTraits<typename Field::Element>::value>
+ struct HelperModSimd ;
+
+ template<class Field, class SimdT>
+ struct HelperModSimd<Field, SimdT, ElementCategories::MachineIntTag> : public HelperMod<Field> {
+ typedef typename SimdT::vect_t vect_t ;
+ // bool overflow ;
+ // int8_t shift ;
+ // typename Field::Element p;
+ typename Field::Element magic ;
+ vect_t M ;
+ vect_t P ;
+ vect_t MIN ;
+ vect_t MAX ;
+ vect_t NEGP ;
+ vect_t Q ;
+ vect_t T ;
+
+ HelperModSimd ( const Field & F) :
+ HelperMod<Field>(F)
+ {
+ // std::cout << "HelperMod constructed " << this->shift << std::endl;
+ // p = F.characteristic();
+ P = SimdT::set1(this->p);
+ NEGP = SimdT::set1(-this->p);
+ MIN = SimdT::set1(F.minElement());
+ MAX = SimdT::set1(F.maxElement());
+ // fast_mod_generate(overflow, shift, magic, p);
+ M = SimdT::set1(magic);
+ }
+
+ HelperModSimd( const Field & F, const HelperMod<Field> & G)
+ {
+ this->overflow=G.overflow;
+ this->poweroftwo=G.poweroftwo;
+ this->shift=G.shift;
+ this->magic=G.magic;
+ this->p=G.p;
+ // std::cout << "magic is = " << this->magic<< ',' << G.magic<< std::endl;
+ P = SimdT::set1(this->p);
+ NEGP = SimdT::set1(-(this->p));
+ MIN = SimdT::set1(F.minElement());
+ MAX = SimdT::set1(F.maxElement());
+ // fast_mod_generate(overflow, shift, magic, p);
+ M = SimdT::set1(magic);
+ }
+
+ } ;
+
+ template<class Field, class SimdT>
+ struct HelperModSimd<Field, SimdT, ElementCategories::MachineFloatTag> : public HelperMod<Field> {
+ typedef typename SimdT::vect_t vect_t ;
+ vect_t INVP;
+ vect_t MIN ;
+ vect_t MAX ;
+ vect_t NEGP ;
+ vect_t P ;
+ vect_t Q ;
+ vect_t T ;
+
+ HelperModSimd( const Field & F) :
+ HelperMod<Field>(F)
+ {
+ P = SimdT::set1(this->p);
+ NEGP = SimdT::set1(-(this->p));
+ // MIN = SimdT::set1(max);
+ MIN = SimdT::set1(F.minElement());
+ // MAX = SimdT::set1(min);
+ MAX = SimdT::set1(F.maxElement());
+ INVP = SimdT::set1(this->invp);
+ }
+
+ HelperModSimd( const Field & F, const HelperMod<Field> & G)
+ {
+ this->p = G.p;
+ this->invp = G.invp ;
+ P = SimdT::set1(this->p);
+ NEGP = SimdT::set1(-this->p);
+ // MIN = SimdT::set1(max);
+ MIN = SimdT::set1(F.minElement());
+ // MAX = SimdT::set1(min);
+ MAX = SimdT::set1(F.maxElement());
+ INVP = SimdT::set1(this->invp);
+
+
+ }
+ } ;
+#endif // __FFLASFFPACK_USE_SIMD
+
+
+#ifdef __x86_64__
+ template<class Field, int ALGO>
+ typename std::enable_if< std::is_same<typename Field::Element,int64_t>::value , int64_t>::type
+ monfmod (typename Field::Element A, HelperMod<Field,ElementCategories::MachineIntTag> & H)
+ {
+ switch(ALGO) {
+ case 3 :
+ // std::cout << 3 << std::endl;
+ return monfmod<true,true> (A,H.p,H.shift,H.magic);
+ case 2 :
+ // std::cout << 2 << std::endl;
+ return monfmod<true,false> (A,H.p,H.shift,H.magic);
+ case 1 :
+ // std::cout << 1 << std::endl;
+ return monfmod<false,true> (A,H.p,H.shift,H.magic);
+ case 0 :
+ // std::cout << "using " << 0 << std::endl;
+ return monfmod<false,false>(A,H.p,H.shift,H.magic);
+ default :
+ FFLASFFPACK_abort("unknown algo");
+ }
+ }
+#endif // __x86_64__
+
+
+ template<class Field, int ALGO>
+#ifdef __x86_64__
+ typename std::enable_if< ! std::is_same<typename Field::Element,int64_t>::value , typename Field::Element>::type
+#else
+ typename Field::Element
+#endif // __x86_64__
+ monfmod (typename Field::Element A, HelperMod<Field,ElementCategories::MachineIntTag> & H)
+ {
+ return monfmod(A,H.p);
+ }
+
+ template<class Field, int ALGO>
+ typename Field::Element monfmod (typename Field::Element A, HelperMod<Field,ElementCategories::MachineFloatTag> & H)
+ {
+ return monfmod(A,H.p);
+ }
+
+ template<class Field, int ALGO>
+ typename Field::Element monfmod (typename Field::Element A, HelperMod<Field,ElementCategories::ArbitraryPrecIntTag> & H)
+ {
+ return monfmod(A,H.p);
+ }
+
+
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+ template<class Field, class SimdT, int ALGO>
+ inline void
+ VEC_MOD(typename SimdT::vect_t & C, HelperModSimd<Field,SimdT,ElementCategories::MachineFloatTag> & H)
+ {
+ C = SimdT::mod( C, H.P, H.INVP, H.NEGP, H.MIN, H.MAX, H.Q, H.T );
+ }
+
+ template<class Field, class SimdT, int ALGO>
+ inline void
+ VEC_MOD(typename SimdT::vect_t & C, HelperModSimd<Field,SimdT,ElementCategories::MachineIntTag> & H)
+ {
+ // std::cout << "magic " << H.magic<< std::endl;
+ // std::cout << H.P << std::endl;
+ switch (ALGO) {
+ case 0 :
+ C = SimdT::template mod<false,false>( C, H.P, H.shift, H.M, H.NEGP, H.MIN, H.MAX, H.Q, H.T );
+ break;
+ case 1 :
+ C = SimdT::template mod<true,false> ( C, H.P, H.shift, H.M, H.NEGP, H.MIN, H.MAX, H.Q, H.T );
+ break;
+ case 2 :
+ C = SimdT::template mod<false,true> ( C, H.P, H.shift, H.M, H.NEGP, H.MIN, H.MAX, H.Q, H.T );
+ break;
+ case 3 :
+ C = SimdT::template mod<true,true> ( C, H.P, H.shift, H.M, H.NEGP, H.MIN, H.MAX, H.Q, H.T );
+ break;
+ }
+ }
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+} // vectorised
+} // FFLAS
+
+namespace FFLAS { namespace vectorised { namespace unswitch {
+
+#ifdef __FFLASFFPACK_USE_SIMD
+ template<class Field, bool round, int algo>
+ inline typename std::enable_if<FFLAS::support_simd_mod<typename Field::Element>::value, void>::type
+ modp(const Field &F, typename Field::ConstElement_ptr U, const size_t & n,
+ typename Field::Element_ptr T
+ , HelperMod<Field> & G
+ )
+ {
+
+// std::cerr<<"modp vectorized"<<std::endl;
+ typedef typename Field::Element Element;
+ Element min = (Element)F.minElement(), max = (Element)F.maxElement();
+ using simd = Simd<Element>;
+ using vect_t = typename simd::vect_t;
+ bool positive = ! FieldTraits<Field>::balanced ; // known at compile time
+ HelperModSimd<Field,simd> H(F,G);
+
+ size_t i = 0;
+ if (n < simd::vect_size)
+ {
+// std::cerr<< n<< " < "<<simd::vect_size<<std::endl;
+ for (; i < n ; i++)
+ {
+ if (round)
+ {
+ T[i] = monrint(U[i]);
+ T[i] = monfmod<Field,algo>(T[i],H);
+ }
+ else
+ {
+ T[i]=monfmod<Field,algo>(U[i],H);
+ }
+ if (!positive)
+ {
+ T[i]-=(T[i]>max)?H.p:0;
+ }
+ T[i]+=(T[i]<min)?H.p:0;
+ }
+ return;
+ }
+
+ long st = long(T) % simd::alignment;
+
+ // the array T is not 32 byte aligned (process few elements s.t. (T+i) is 32 bytes aligned)
+
+ if (st)
+ {
+// std::cerr<< st << " not aligned on "<<simd::alignment<<std::endl;
+
+ for (size_t j = static_cast<size_t>(st) ; j < simd::alignment ; j += sizeof(Element), i++)
+ {
+ if (round)
+ {
+ T[i] = monrint(U[i]);
+ T[i] = monfmod<Field,algo>(T[i],H);
+ }
+ else
+ {
+ T[i] = monfmod<Field,algo>(U[i],H);
+ }
+ if (!positive)
+ {
+ T[i] -= (T[i] > max) ? H.p : 0;
+ }
+ T[i] += (T[i] < min) ? H.p : 0;
+ }
+ }
+
+ FFLASFFPACK_check((long(T+i) % simd::alignment == 0));
+
+ vect_t C ;
+
+ if((long(U+i) % simd::alignment == 0))
+ {
+ // perform the loop using 256 bits SIMD
+ for (; i<= n - simd::vect_size ; i += simd::vect_size)
+ {
+ C = simd::load(U + i);
+
+ if (round)
+ {
+ C = simd::round(C);
+ }
+
+ VEC_MOD<Field,simd,algo>(C,H);
+ simd::store(T+i, C);
+ }
+ }
+
+ // perform the last elt from T without SIMD
+// std::cerr<< n-i<< " unaligned elements left "<<std::endl;
+ for (;i<n;i++)
+ {
+
+ if (round)
+ {
+ T[i] = monrint(U[i]);
+ T[i] = monfmod<Field,algo>(T[i],H);
+ }
+ else
+ {
+ T[i] = monfmod<Field,algo>(U[i],H);
+ }
+ if (!positive)
+ {
+ T[i] -= (T[i] > max) ? H.p : 0;
+ }
+ T[i] += (T[i] < min) ? H.p : 0;
+ }
+ }
+#endif
+
+ // not vectorised but allows better code than % or fmod via helper
+ template<class Field, bool round, int algo>
+ inline typename std::enable_if< !FFLAS::support_simd_mod<typename Field::Element>::value, void>::type
+ modp(const Field &F, typename Field::ConstElement_ptr U, const size_t & n,
+ typename Field::Element_ptr T
+ , HelperMod<Field> & H
+ )
+ {
+// std::cerr<<"modp not vectorized"<<std::endl;
+ typedef typename Field::Element Element;
+ Element min = (Element)F.minElement(), max = (Element)F.maxElement();
+ bool positive = ! FieldTraits<Field>::balanced ;
+
+ size_t i = 0;
+ for (; i < n ; i++)
+ {
+ if (round)
+ {
+ T[i] = monrint(U[i]);
+ T[i] = monfmod<Field,algo>(T[i],H);
+ }
+ else
+ {
+ T[i]=monfmod<Field,algo>(U[i],H);
+ }
+ if (!positive)
+ {
+ T[i]-=(T[i]>max)?H.p:(typename Field::Element)0;
+ }
+ T[i]+=(T[i]<min)?H.p:(typename Field::Element)0;
+ }
+ }
+
+} // unswitch
+} // vectorised
+} // FFLAS
+
+namespace FFLAS { namespace vectorised {
+
+
+ template<class Field, bool round>
+ //inline typename std::enable_if<FFLAS::support_simd_mod<typename Field::Element>::value, void>::type
+ void
+ modp(const Field &F, typename Field::ConstElement_ptr U, const size_t & n,
+ typename Field::Element_ptr T)
+ {
+ HelperMod<Field> H(F);
+
+ int ALGO = H.getAlgo();
+
+ switch (ALGO) {
+ case 0 :
+ unswitch::modp<Field,round,0>(F,U,n,T,H);
+ break;
+ case 1 :
+ unswitch::modp<Field,round,1>(F,U,n,T,H);
+ break;
+ case 2 :
+ unswitch::modp<Field,round,2>(F,U,n,T,H);
+ break;
+ case 3 :
+ unswitch::modp<Field,round,3>(F,U,n,T,H);
+ break;
+ }
+ }
+
+} // vectorised
+} // FFLAS
+
+
+namespace FFLAS { namespace details {
+
+
+ // specialised
+ template<class Field>
+ typename std::enable_if<FFLAS::support_simd_mod<typename Field::Element>::value, void>::type
+ freduce (const Field & F, const size_t m,
+ typename Field::Element_ptr A, const size_t incX, FieldCategories::ModularTag)
+ {
+ if(incX == 1) {
+ vectorised::modp<Field,false>(F,A,m,A);
+ }
+ else { /* faster with copy, use incX=1, copy back ? */
+ if (m < FFLASFFPACK_COPY_REDUCE) {
+ typename Field::Element_ptr Xi = A ;
+ for (; Xi < A+m*incX; Xi+=incX )
+ F.reduce(*Xi);
+ }
+ else {
+ typename Field::Element_ptr Ac = fflas_new (F,m,1) ;
+ fassign (F,m,A,incX,Ac,1);
+ freduce (F,m,Ac,1,FieldCategories::ModularTag());
+ fassign (F,m,Ac,1,A,incX);
+ fflas_delete (Ac);
+ }
+ }
+ }
+
+ template<class Field>
+ typename std::enable_if< ! FFLAS::support_simd_mod<typename Field::Element>::value, void>::type
+ freduce (const Field & F, const size_t m,
+ typename Field::Element_ptr A, const size_t incX, FieldCategories::ModularTag)
+ { /* ??? ( faster with copy, use incX=1, copy back ? */
+ // CP: no SIMD supported here!
+ // if(incX == 1) {
+ // vectorised::modp<Field,false>(F,A,m,A);
+ // }
+ // else {
+ typename Field::Element_ptr Xi = A ;
+ for (; Xi < A+m*incX; Xi+=incX )
+ F.reduce(*Xi);
+ // }
+ }
+
+ template<class Field>
+ void
+ freduce (const Field & F, const size_t m,
+ typename Field::Element_ptr A, const size_t incX,
+ FieldCategories::GenericTag)
+ {
+ typename Field::Element_ptr Xi = A ;
+ for (; Xi < A+m*incX; Xi+=incX )
+ F.reduce (*Xi);
+ }
+
+ template<class Field>
+ void
+ freduce (const Field & F, const size_t m,
+ typename Field::Element_ptr A, const size_t incX,
+ FieldCategories::UnparametricTag)
+ {
+ typename Field::Element_ptr Xi = A ;
+ for (; Xi < A+m*incX; Xi+=incX )
+ F.reduce (*Xi);
+ }
+
+ template<class Field>
+ typename std::enable_if< FFLAS::support_simd_mod<typename Field::Element>::value, void>::type
+ freduce (const Field & F, const size_t m,
+ typename Field::ConstElement_ptr B, const size_t incY,
+ typename Field::Element_ptr A, const size_t incX,
+ FieldCategories::ModularTag)
+ {
+
+ if(incX == 1 && incY == 1) {
+ vectorised::modp<Field,false>(F,B,m,A);
+ }
+ else {
+ typename Field::Element_ptr Xi = A ;
+ typename Field::ConstElement_ptr Yi = B ;
+ for (; Xi < A+m*incX; Xi+=incX, Yi += incY )
+ F.reduce (*Xi , *Yi);
+ }
+ }
+
+ template<class Field>
+ typename std::enable_if< ! FFLAS::support_simd_mod<typename Field::Element>::value, void>::type
+ freduce (const Field & F, const size_t m,
+ typename Field::ConstElement_ptr B, const size_t incY,
+ typename Field::Element_ptr A, const size_t incX,
+ FieldCategories::ModularTag)
+ {
+
+ typename Field::Element_ptr Xi = A ;
+ typename Field::ConstElement_ptr Yi = B ;
+ for (; Xi < A+m*incX; Xi+=incX, Yi += incY )
+ F.reduce (*Xi , *Yi);
+ }
+
+ template<class Field>
+ void
+ freduce (const Field & F, const size_t m,
+ typename Field::ConstElement_ptr B, const size_t incY,
+ typename Field::Element_ptr A, const size_t incX,
+ FieldCategories::GenericTag)
+ {
+ typename Field::Element_ptr Xi = A ;
+ typename Field::ConstElement_ptr Yi = B ;
+ for (; Xi < A+m*incX; Xi+=incX, Yi += incY )
+ F.reduce (*Xi , *Yi);
+ }
+ template<class Field>
+ void
+ freduce (const Field & F, const size_t m,
+ typename Field::ConstElement_ptr B, const size_t incY,
+ typename Field::Element_ptr A, const size_t incX,
+ FieldCategories::UnparametricTag)
+ {
+ typename Field::Element_ptr Xi = A ;
+ typename Field::ConstElement_ptr Yi = B ;
+ for (; Xi < A+m*incX; Xi+=incX, Yi += incY )
+ F.reduce (*Xi , *Yi);
+ }
+
+} // details
+} // FFLAS
+
+
+#endif // __FFLASFFPACK_fflas_freduce_INL
+
diff --git a/fflas-ffpack/fflas/fflas_freduce_mp.inl b/fflas-ffpack/fflas/fflas_freduce_mp.inl
new file mode 100644
index 0000000..c757166
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_freduce_mp.inl
@@ -0,0 +1,67 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_freduce_mp.inl
+ * Copyright (C) 2014 FFLAS FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_freduce_mp_INL
+#define __FFLASFFPACK_fflas_freduce_mp_INL
+
+#include "fflas-ffpack/field/rns-integer-mod.h"
+
+namespace FFLAS {
+
+ // specialization of the level1 freduce function for the field RNSInteger<rns_double>
+ template<>
+ inline void freduce (const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F,
+ const size_t n, FFPACK::RNSIntegerMod<FFPACK::rns_double>::Element_ptr A, size_t inc)
+ {
+ if (n==0) return;
+ //cout<<"freduce: "<<n<<" with "<<inc<<endl;
+ if (inc==1)
+ F.reduce_modp(n,A);
+ else
+ F.reduce_modp(n,1,A,inc);
+ }
+ // specialization of the level2 freduce function for the field RNSInteger<rns_double>
+ template<>
+ inline void freduce (const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F,
+ const size_t m, const size_t n, FFPACK::rns_double::Element_ptr A, size_t lda)
+ {
+ if (n==0||m==0) return;
+ //cout<<"freduce: "<<m<<" x "<<n<<" "<<lda<<endl;
+ if (lda == n)
+ F.reduce_modp(m*n,A);
+ else
+ F.reduce_modp(m,n,A,lda);
+ }
+
+
+
+} // end of namespace FFLAS
+
+#endif
+
diff --git a/fflas-ffpack/fflas/fflas_freivalds.inl b/fflas-ffpack/fflas/fflas_freivalds.inl
new file mode 100644
index 0000000..dd011f6
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_freivalds.inl
@@ -0,0 +1,121 @@
+/* fflas/fflas_freivalds.inl
+ * Copyright (C) 2014 Jean-Guillaume Dumas
+ *
+ * Written by Jean-Guillaume Dumas <Jean-Guillaume.Dumas at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#ifndef __FFLASFFPACK_freivalds_INL
+#define __FFLASFFPACK_freivalds_INL
+
+// #include "fflas-ffpack/utils/Matio.h"
+
+namespace FFLAS{
+
+ /** @brief freivalds: <b>F</b>reivalds <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply <b>R</b>andom <b>C</b>heck.
+ *
+ * Randomly Checks \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B)\f$
+ * \param F field.
+ * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
+ * \param tb same for matrix \p B
+ * \param m see \p A
+ * \param n see \p B
+ * \param k see \p A
+ * \param alpha scalar
+ * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
+ * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
+ * \param C \f$C\f$ is \f$m \times n\f$
+ * \param lda leading dimension of \p A
+ * \param ldb leading dimension of \p B
+ * \param ldc leading dimension of \p C
+ */
+ template<class Field> inline bool
+ freivalds (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n, const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::ConstElement_ptr C, const size_t ldc) {
+
+ typename Field::Element_ptr v, y, x;
+
+ v = FFLAS::fflas_new(F,n,1);
+ y = FFLAS::fflas_new(F,k,1);
+ x = FFLAS::fflas_new(F,m,1);
+
+ typename Field::RandIter G(F);
+ for(size_t j=0; j<n; ++j)
+ G.random(v[j]);
+
+// F.write(std::cerr<< "alpha:=", alpha) << ';' << std::endl;
+// F.write(std::cerr<< "moinsun:=", F.mOne) << ';' << std::endl;
+// std::cerr<< "p:=" << F.characteristic() << ';' << std::endl;
+// write_field(F,std::cerr<<"v:=",v,n,1,1,true) << ';' << std::endl;
+// write_field(F,std::cerr<<"A:=",A,m,k,lda,true) << ';' << std::endl;
+// write_field(F,std::cerr<<"B:=",B,k,n,ldb,true) << ';' << std::endl;
+// write_field(F,std::cerr<<"C:=",C,m,n,ldc,true) << ';' << std::endl;
+
+ bool pass=true;
+
+ // y <-- 1.\mathrm{op}(B).v
+ size_t Bnrows = (tb == FflasNoTrans)? k : n;
+ size_t Bncols = (tb == FflasNoTrans)? n : k;
+ size_t Anrows = (ta == FflasNoTrans)? m : k;
+ size_t Ancols = (ta == FflasNoTrans)? k : m;
+
+ FFLAS::fgemv(F, tb, Bnrows, Bncols, F.one, B, ldb, v, 1, F.zero, y, 1);
+// write_field(F,std::cerr<<"y:=",y,k,1,1,true) << ';' << std::endl;
+ // x <-- alpha.\mathrm{op}(A).y
+ // x <-- alpha.\mathrm{op}(A).\mathrm{op}(B).v
+ FFLAS::fgemv(F, ta, Anrows, Ancols, alpha, A, lda, y, 1, F.zero, x, 1);
+// write_field(F,std::cerr<<"x:=",x,m,1,1,true) << ';' << std::endl;
+
+// // x <-- -C.v+x =?= 0
+ FFLAS::fgemv(F, FFLAS::FflasNoTrans,m,n, F.mOne, C, ldc, v, 1, F.one, x, 1);
+// write_field(F,std::cerr<<"t:=",x,m,1,1,true) << ';' << std::endl;
+
+ for(size_t j=0; j<m; ++j)
+ pass &= F.isZero (x[j]);
+
+// // z <-- C.v
+// typename Field::Element_ptr z = FFLAS::fflas_new(F,m,1);
+// FFLAS::fgemv(F, FFLAS::FflasNoTrans, m,n , F.one, C, ldc, v, 1, F.zero, z, 1);
+// // write_field(F,std::cerr<<"z:=",z,m,1,1,true) << ';' << std::endl;
+
+// for(size_t j=0; j<m; ++j)
+// pass &= F.areEqual(z[j],x[j]);
+
+ FFLAS::fflas_delete(y);
+ FFLAS::fflas_delete(v);
+ FFLAS::fflas_delete(x);
+// FFLAS::fflas_delete(z);
+ return pass;
+ }
+
+}
+
+
+
+#endif // __FFLASFFPACK_freivalds_INL
+
+
diff --git a/fflas-ffpack/fflas-ffpack.h b/fflas-ffpack/fflas/fflas_fscal.h
similarity index 71%
copy from fflas-ffpack/fflas-ffpack.h
copy to fflas-ffpack/fflas/fflas_fscal.h
index ceeb9c0..64d1c1f 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/fflas-ffpack/fflas/fflas_fscal.h
@@ -1,7 +1,11 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -20,20 +24,16 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* ========LICENCE========
- *
+ *.
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
- */
+#ifndef __FFLASFFPACK_fscal_H
+#define __FFLASFFPACK_fscal_H
+//! @todo traits
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
+#include "fflas_fscal.inl"
+//#include "fflas_fscal_mp.inl" moved to fflas.h
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
+#endif // __FFLASFFPACK_fscal_H
-#endif // __FFLASFFPACK_fflas_ffpack_H
diff --git a/fflas-ffpack/fflas/fflas_fscal.inl b/fflas-ffpack/fflas/fflas_fscal.inl
new file mode 100644
index 0000000..fa8d2d9
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fscal.inl
@@ -0,0 +1,418 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_faxpy.inl
+ * Copyright (C) 2014 FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fscal_INL
+#define __FFLASFFPACK_fscal_INL
+
+namespace FFLAS { namespace vectorised {
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+ template<class SimdT, class Element>
+ inline typename std::enable_if<is_simd<SimdT>::value, void>::type
+ VEC_SCAL(SimdT & C, SimdT & ALPHA, SimdT & Q, SimdT & T, SimdT & P, SimdT & NEGP, SimdT & INVP, SimdT & MIN, SimdT & MAX)
+ {
+ using simd = Simd<Element>;
+ Q = simd::mul(C,INVP);
+ C = simd::mul(C,ALPHA);
+ Q = simd::floor(Q);
+ C = simd::fnmadd(C,Q,P);
+ Q = simd::greater(C,MAX);
+ T = simd::lesser(C,MIN);
+ Q = simd::vand(Q,NEGP);
+ T = simd::vand(T,P);
+ Q = simd::vor(Q,T);
+ C = simd::add(C,Q);
+ }
+
+ template<class Element, class T1, class T2>
+ inline typename std::enable_if<std::is_floating_point<Element>::value, void>::type
+ scalp(Element *T, const Element alpha, const Element * U, const size_t n, const Element p, const Element invp, const T1 min_, const T2 max_)
+ {
+ Element min = (Element)min_, max=(Element)max_;
+ using simd = Simd<Element>;
+ using vect_t = typename simd::vect_t;
+
+ size_t i = 0;
+
+ if (n < simd::vect_size)
+ {
+ for (; i < n ; i++)
+ {
+ T[i]=monfmod(alpha*U[i], p);
+ T[i] -= (T[i] > max) ? p : 0;
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ return;
+
+ }
+ vect_t C,Q,P,NEGP,INVP,TMP,MIN,MAX,ALPHA;
+ ALPHA = simd::set1(alpha);
+ P = simd::set1(p);
+ NEGP = simd::set1(-p);
+ INVP = simd::set1(invp);
+ MIN = simd::set1(min);
+ MAX = simd::set1(max);
+ long st = long(T) % simd::alignment;
+ if (st)
+ { // the array T is not 32 byte aligned (process few elements s.t. (T+i) is 32 bytes aligned)
+ for (size_t j = static_cast<size_t>(st) ; j < simd::alignment ; j+=sizeof(Element), i++)
+ {
+ T[i] = monfmod(alpha*U[i], p);
+ T[i] -= (T[i] > max) ? p : 0;
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ }
+ FFLASFFPACK_check((long(T+i) % simd::alignment == 0));
+ if ((long(U+i)%simd::alignment==0))
+ {
+ // perform the loop using 256 bits SIMD
+ for (;i <= n - simd::vect_size ; i += simd::vect_size)
+ {
+ C = simd::load(U+i);
+ VEC_SCAL<vect_t,Element>(C, ALPHA, Q, TMP, P, NEGP, INVP, MIN, MAX);
+ simd::store(T+i,C);
+ }
+ }
+ // perform the last elt from T without SIMD
+ for (; i < n ; i++)
+ {
+ T[i] = monfmod(alpha*U[i],p);
+ T[i] -= (T[i] > max) ? p : 0;
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ }
+
+#else
+
+ template<class Element, class T1, class T2>
+ void
+ scalp(Element *T, const Element alpha, const Element * U, const size_t n, const Element p, const Element invp, const T1 min_, const T2 max_)
+ {
+ Element min = (Element)min_, max=(Element)max_;
+
+ size_t i = 0;
+
+ {
+ for (; i < n ; i++)
+ {
+ T[i]=monfmod(alpha*U[i], p);
+ T[i] -= (T[i] > max) ? p : 0;
+ T[i] += (T[i] < min) ? p : 0;
+ }
+ return;
+
+ }
+
+ }
+
+#endif // __FFLASFFPACK_USE_SIMD
+} // vectorised
+} // FFLAS
+
+namespace FFLAS {
+
+ /***************************/
+ /* LEVEL 1 */
+ /***************************/
+
+
+ template<class Field>
+ inline void
+ fscal( const Field& F, const size_t N,
+ const typename Field::Element a,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ typename Field::Element_ptr Y, const size_t incY )
+ {
+ // details::fscal(F,N,a,X,incX,Y,incY, typename FieldTraits<Field>::value() );
+ if (F.isOne(a)) {
+ fassign(F,N,X,incX,Y,incY);
+ return ;
+ }
+
+ typename Field::ConstElement_ptr Xi = X;
+ typename Field::Element_ptr Yi = Y;
+ if (F.areEqual(a,F.mOne)){
+ fneg(F,N,X,incX,Y,incY);
+ return;
+ }
+
+ if (F.isZero(a)){
+ fzero(F,N,Y,incY);
+ return;
+ }
+
+ if (incX == 1 && incY == 1)
+ for (size_t i = 0 ; i < N ; ++i)
+ F.mul( Y[i], a, X[i] );
+ else
+ for (; Xi < X+N*incX; Xi+=incX, Yi+=incY )
+ F.mul( *Yi, a, *Xi );
+ }
+
+ template<class Field>
+ inline void
+ fscalin (const Field& F, const size_t n, const typename Field::Element a,
+ typename Field::Element_ptr X, const size_t incX)
+ {
+ if (F.isOne(a))
+ return ;
+
+ if (F.isMOne(a)){
+ fnegin(F,n,X,incX);
+ return;
+ }
+
+ if (F.isZero(a)){
+ fzero(F,n,X,incX);
+ return;
+ }
+
+ typename Field::Element_ptr Xi = X ;
+
+ if ( incX == 1)
+ for (size_t i = 0 ; i < n ; ++i)
+ F.mulin( X[i], a);
+ else
+
+ for (; Xi < X+n*incX; Xi+=incX )
+ F.mulin( *Xi, a);
+ }
+
+ template<>
+ inline void
+ fscal( const Givaro::DoubleDomain& , const size_t N,
+ const Givaro::DoubleDomain::Element a,
+ Givaro::DoubleDomain::ConstElement_ptr x, const size_t incx,
+ Givaro::DoubleDomain::Element_ptr y, const size_t incy )
+ {
+ cblas_dcopy( (int)N, x, (int)incy, y, (int)incy);
+ cblas_dscal( (int)N, a, y, (int)incy);
+ }
+
+ template<>
+ inline void
+ fscal( const Givaro::FloatDomain& , const size_t N,
+ const Givaro::FloatDomain::Element a,
+ Givaro::FloatDomain::ConstElement_ptr x, const size_t incx,
+ Givaro::FloatDomain::Element_ptr y, const size_t incy )
+ {
+ cblas_scopy( (int)N, x, (int)incy, y, (int)incy);
+ cblas_sscal( (int)N, a, y, (int)incy);
+ }
+
+ template<>
+ inline void
+ fscalin( const Givaro::DoubleDomain& , const size_t N,
+ const Givaro::DoubleDomain::Element a,
+ Givaro::DoubleDomain::Element_ptr y, const size_t incy )
+ {
+
+ cblas_dscal( (int)N, a, y, (int)incy);
+ }
+
+ template<>
+ inline void
+ fscalin( const Givaro::FloatDomain& , const size_t N,
+ const Givaro::FloatDomain::Element a,
+ Givaro::FloatDomain::Element_ptr y, const size_t incy )
+ {
+
+ cblas_sscal( (int)N, a, y, (int)incy);
+ }
+
+
+ template<>
+ inline void
+ fscalin( const Givaro::Modular<float>& F , const size_t N,
+ const float a,
+ float * X, const size_t incX )
+ {
+ if(incX == 1) {
+ float p, invp;
+ p=(float)F.cardinality();
+ invp=a/p;
+ vectorised::scalp(X,a,X,N,p,invp,0,p-1);
+ }
+ else {
+ float * Xi = X ;
+ for (; Xi < X+N*incX; Xi+=incX )
+ F.mulin( *Xi , a);
+
+ }
+ }
+
+ template<>
+ inline void
+ fscalin( const Givaro::ModularBalanced<float>& F , const size_t N,
+ const float a,
+ float * X, const size_t incX )
+ {
+ if(incX == 1) {
+ float p, invp;
+ p=(float)F.cardinality();
+ invp=a/p;
+ vectorised::scalp(X,a,X,N,p,invp,F.minElement(),F.maxElement());
+ }
+ else {
+ float * Xi = X ;
+ for (; Xi < X+N*incX; Xi+=incX )
+ F.mulin( *Xi , a);
+
+ }
+ }
+
+ template<>
+ inline void
+ fscalin( const Givaro::Modular<double>& F , const size_t N,
+ const double a,
+ double * X, const size_t incX )
+ {
+ if(incX == 1) {
+ double p, invp;
+ p=(double)F.cardinality();
+ invp=a/p;
+ vectorised::scalp(X,a,X,N,p,invp,0,p-1);
+ }
+ else {
+ double * Xi = X ;
+ for (; Xi < X+N*incX; Xi+=incX )
+ F.mulin( *Xi , a);
+
+ }
+ }
+
+ template<>
+ inline void
+ fscal( const Givaro::Modular<double>& F , const size_t N,
+ const double a,
+ const double * X, const size_t incX,
+ double * Y, const size_t incY )
+ {
+ if(incX == 1 && incY==1) {
+ double p, invp;
+ p=(double)F.cardinality();
+ invp=a/p;
+ vectorised::scalp(Y,a,X,N,p,invp,0,p-1);
+ }
+ else {
+ const double * Xi = X ;
+ double * Yi = Y ;
+ for (; Xi < X+N*incX; Xi+=incX,Yi+=incY )
+ F.mul(*Yi, *Xi , a);
+
+ }
+ }
+
+ template<>
+ inline void
+ fscalin( const Givaro::ModularBalanced<double>& F , const size_t N,
+ const double a,
+ double * X, const size_t incX )
+ {
+ if(incX == 1) {
+ double p, invp;
+ p=(double)F.cardinality();
+ invp=a/p;
+ vectorised::scalp(X,a,X,N,p,invp,F.minElement(),F.maxElement());
+ }
+ else {
+ double * Xi = X ;
+ for (; Xi < X+N*incX; Xi+=incX )
+ F.mulin( *Xi , a);
+
+ }
+ }
+
+
+ /***************************/
+ /* LEVEL 2 */
+ /***************************/
+
+
+
+ template<class Field>
+ void
+ fscalin (const Field& F, const size_t m , const size_t n,
+ const typename Field::Element a,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ if (F.isOne(a)) {
+ return ;
+ }
+ else if (F.isZero(a)) {
+ fzero(F,m,n,A,lda);
+ }
+ else if (F.isMOne(a)) {
+ fnegin(F,m,n,A,lda);
+ }
+ else {
+ if (lda == n) {
+ fscalin(F,n*m,a,A,1);
+ }
+ else {
+ for (size_t i = 0 ; i < m ; ++i)
+ fscalin(F,n,a,A+i*lda,1);
+ }
+
+ return;
+ }
+ }
+
+ template<class Field>
+ void
+ fscal (const Field& F, const size_t m , const size_t n,
+ const typename Field::Element a,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb)
+ {
+ if (F.isOne(a)) {
+ fassign(F,m,n,A,lda,B,ldb) ;
+ }
+ else if (F.isZero(a)) {
+ fzero(F,m,n,B,ldb);
+ }
+ else if (F.isMOne(a)) {
+ fneg(F,m,n,A,lda,B,ldb);
+ }
+ else {
+ if (n == lda && m == lda)
+ fscal(F,m*n,a,A,lda,B,ldb);
+ else {
+ for (size_t i = 0; i < m ; ++i)
+ fscal(F,n,a,A+i*lda,1,B+i*ldb,1);
+ }
+ }
+
+ return;
+ }
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fscal_INL
diff --git a/fflas-ffpack/fflas/fflas_fscal_mp.inl b/fflas-ffpack/fflas/fflas_fscal_mp.inl
new file mode 100644
index 0000000..e27d94f
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_fscal_mp.inl
@@ -0,0 +1,131 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fscal_mp_INL
+#define __FFLASFFPACK_fscal_mp_INL
+
+#include "fflas-ffpack/field/rns-integer.h"
+#include "fflas_fscal.h"
+#include "fflas_fgemm.inl"
+namespace FFLAS {
+
+ /*
+ * specialization for the field RNSInteger<rns_double>
+ */
+
+ // level 1 : fscalin
+ template<>
+ inline void fscalin(const FFPACK::RNSInteger<FFPACK::rns_double> &F, const size_t n,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::Element_ptr A, const size_t inc)
+ {
+ for (size_t i=0;i<F.size();i++)
+ fscalin(F.rns()._field_rns[i], n, alpha._ptr[i*alpha._stride], A._ptr+i*A._stride,inc);
+ }
+ // level 1 : fscal
+ template<>
+ inline void fscal(const FFPACK::RNSInteger<FFPACK::rns_double> &F, const size_t n,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::ConstElement_ptr A, const size_t Ainc,
+ FFPACK::rns_double::Element_ptr B, const size_t Binc)
+ {
+ for (size_t i=0;i<F.size();i++)
+ fscal(F.rns()._field_rns[i], n, alpha._ptr[i*alpha._stride], A._ptr+i*A._stride,Ainc, B._ptr+i*B._stride,Binc);
+ }
+ // level 2 : fscalin
+ template<>
+ inline void fscalin(const FFPACK::RNSInteger<FFPACK::rns_double> &F, const size_t m, const size_t n,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::Element_ptr A, const size_t lda) {
+ for (size_t i=0;i<F.size();i++)
+ fscalin(F.rns()._field_rns[i], m, n, alpha._ptr[i*alpha._stride], A._ptr+i*A._stride,lda);
+ }
+ // level 2 : fscal
+ template<>
+ inline void fscal(const FFPACK::RNSInteger<FFPACK::rns_double> &F, const size_t m, const size_t n,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::ConstElement_ptr A, const size_t lda,
+ FFPACK::rns_double::Element_ptr B, const size_t ldb) {
+ for (size_t i=0;i<F.size();i++)
+ fscal(F.rns()._field_rns[i], m, n, alpha._ptr[i*alpha._stride], A._ptr+i*A._stride, lda, B._ptr+i*B._stride, ldb);
+
+ }
+}
+
+#include "fflas-ffpack/fflas/fflas_freduce_mp.inl"
+
+namespace FFLAS {
+ /*
+ * specialization for the field RNSIntegerMod<rns_double>
+ */
+
+ // level 1 : fscalin
+ template<>
+ inline void fscalin(const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F, const size_t n,
+ const typename FFPACK::RNSIntegerMod<FFPACK::rns_double>::Element alpha,
+ typename FFPACK::RNSIntegerMod<FFPACK::rns_double>::Element_ptr A, const size_t inc)
+ {
+ fscalin(F.delayed(),n,alpha,A,inc);
+ freduce (F, n, A, inc);
+ }
+ // level 1 : fscal
+ template<>
+ inline void fscal(const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F, const size_t n,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::ConstElement_ptr A, const size_t Ainc,
+ FFPACK::rns_double::Element_ptr B, const size_t Binc)
+ {
+ fscal(F.delayed(),n,alpha,A,Ainc,B,Binc);
+ freduce (F, n, B, Binc);
+ }
+ // level 2 : fscalin
+ template<>
+ inline void fscalin(const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F, const size_t m, const size_t n,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::Element_ptr A, const size_t lda)
+ {
+ fscalin(F.delayed(),m,n,alpha,A,lda);
+ freduce (F, m, n, A, lda);
+ }
+ // level 2 : fscal
+ template<>
+ inline void fscal(const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F, const size_t m, const size_t n,
+ const FFPACK::rns_double::Element alpha,
+ FFPACK::rns_double::ConstElement_ptr A, const size_t lda,
+ FFPACK::rns_double::Element_ptr B, const size_t ldb)
+ {
+ fscal(F.delayed(),m,n,alpha,A,lda,B,ldb);
+ freduce (F, m, n, B, ldb);
+ }
+
+} //end of namespace FFLAS
+
+
+#endif
+
diff --git a/fflas-ffpack/fflas/fflas_ftrmm.inl b/fflas-ffpack/fflas/fflas_ftrmm.inl
index 28e2de8..cc80481 100644
--- a/fflas-ffpack/fflas/fflas_ftrmm.inl
+++ b/fflas-ffpack/fflas/fflas_ftrmm.inl
@@ -5,20 +5,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -35,7 +35,6 @@ namespace FFLAS {
// ftrmm: TRiangular Matrix Multiply
// Computes B <- alpha.op(A).B, B <- alpha.B.op(A)
// B is M*N, A is M*M if Side==FflasLeft, N*N if Side==FflasRight
-// Warning : unsafe with Trans == FflasTrans (debugging in progress)
// //---------------------------------------------------------------------
template<class Field>
inline void
@@ -45,8 +44,8 @@ ftrmm (const Field& F, const FFLAS_SIDE Side,
const FFLAS_DIAG Diag,
const size_t M, const size_t N,
const typename Field::Element alpha,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb)
{
if (!M || !N ) return;
@@ -104,9 +103,7 @@ ftrmm (const Field& F, const FFLAS_SIDE Side,
}
}
if (!F.isOne(alpha))
- for (size_t i=0; i< M; ++i)
- for (size_t j=0; j<N; ++j)
- F.mulin(*(B+i*ldb+j),alpha);
+ fscalin(F,M,N,alpha,B,ldb);
}
diff --git a/fflas-ffpack/fflas/fflas_ftrmm_src.inl b/fflas-ffpack/fflas/fflas_ftrmm_src.inl
index efb2fc4..58659eb 100644
--- a/fflas-ffpack/fflas/fflas_ftrmm_src.inl
+++ b/fflas-ffpack/fflas/fflas_ftrmm_src.inl
@@ -30,7 +30,17 @@
#ifdef __FFLAS__TRANSPOSE
#define __FFLAS__Acolinc lda
#define __FFLAS__Arowinc 1
+ #ifdef __FFLAS__LOW
+ #define __FFLAS__UPPER
+ #else
+ #define __FFLAS__LOWER
+ #endif
#else
+ #ifdef __FFLAS__LOW
+ #define __FFLAS__LOWER
+ #else
+ #define __FFLAS__UPPER
+ #endif
#define __FFLAS__Acolinc 1
#define __FFLAS__Arowinc lda
#endif
@@ -48,7 +58,7 @@
#define __FFLAS__Nupdate N
#define __FFLAS__Bdim N
#define __FFLAS__Bnorminc 1
- #ifdef __FFLAS__LOW
+ #ifdef __FFLAS__LOWER
#define __FFLAS__Atriang A + (nbblocsplit - (i + 1)) * nsplit * (lda + 1)
#define __FFLAS__Aupdate __FFLAS__Atriang + nsplit * __FFLAS__Arowinc
#define __FFLAS__Arest A + nbblocsplit * nsplit * (lda+1)
@@ -86,7 +96,7 @@
#define __FFLAS__Nupdate nrestsplit + i * nsplit
#define __FFLAS__Bdim M
#define __FFLAS__Bnorminc ldb
- #ifdef __FFLAS__UP
+ #ifdef __FFLAS__UPPER
#define __FFLAS__Atriang A + (nbblocsplit - (i + 1)) * nsplit * (lda + 1)
#define __FFLAS__Aupdate __FFLAS__Atriang + nsplit * __FFLAS__Acolinc
#define __FFLAS__Arest A + nbblocsplit * nsplit * (lda+1)
@@ -133,13 +143,13 @@
#ifdef __FFLAS__DOUBLE
#define __FFLAS__ELEMENT double
- #define __FFLAS__DOMAIN DoubleDomain
+ #define __FFLAS__DOMAIN Givaro::DoubleDomain
#define __FFLAS__BLAS_PREFIX d
#endif
#ifdef __FFLAS__FLOAT
#define __FFLAS__ELEMENT float
- #define __FFLAS__DOMAIN FloatDomain
+ #define __FFLAS__DOMAIN Givaro::FloatDomain
#define __FFLAS__BLAS_PREFIX s
#endif
@@ -156,8 +166,8 @@ public:
template <class Field>
void delayed (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb)
{
Mjoin(cblas_,Mjoin(__FFLAS__BLAS_PREFIX,trmm))
(CblasRowMajor,
@@ -166,27 +176,18 @@ void delayed (const Field& F, const size_t M, const size_t N,
Mjoin (Cblas, __FFLAS__TRANS),
Mjoin (Cblas, __FFLAS__DIAG),
(int)M, (int)N, 1.0, A, (int)lda, B, (int)ldb );
- for (size_t i = 0; i < M; ++i)
- for (size_t j = 0; j < N; ++j)
- F.init (*(B + i*ldb + j), *(B + i*ldb + j));
+ freduce(F, M, N, B, ldb);
}
template <class Field>
void operator () (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb)
{
if (!M || !N ) return;
- size_t nsplit = DotProdBound (F, 0, F.one,
-#ifdef __FFLAS__DOUBLE
- FflasDouble
-#else
- FflasFloat
-#endif
- );
-
+ size_t nsplit = DotProdBoundClassic (F, F.one);
size_t nbblocsplit = (__FFLAS__Na-1) / nsplit;
size_t nrestsplit = ((__FFLAS__Na-1) % nsplit) +1;
FFLASFFPACK_check(__FFLAS__Na == nsplit*nbblocsplit+nrestsplit);
@@ -224,13 +225,13 @@ public:
template<class Field>
void operator() (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb)
{
if (__FFLAS__Na == 1)
#ifdef __FFLAS__NONUNIT
- fscal(F, __FFLAS__Bdim, *A, B, __FFLAS__Bnorminc);
+ fscalin(F, __FFLAS__Bdim, *A, B, __FFLAS__Bnorminc);
#else
;
#endif
@@ -256,6 +257,11 @@ void operator() (const Field& F, const size_t M, const size_t N,
#endif // __FFLAS__GENERIC
+#ifdef __FFLAS__LOWER
+ #undef __FFLAS__LOWER
+#else
+ #undef __FFLAS__UPPER
+#endif
#undef __FFLAS__UPLO
#undef __FFLAS__DIAG
#undef __FFLAS__SIDE
diff --git a/fflas-ffpack/fflas/fflas_ftrsm.inl b/fflas-ffpack/fflas/fflas_ftrsm.inl
index 3a8dabe..853b645 100644
--- a/fflas-ffpack/fflas/fflas_ftrsm.inl
+++ b/fflas-ffpack/fflas/fflas_ftrsm.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -32,11 +32,11 @@
namespace FFLAS {
+
//---------------------------------------------------------------------
// ftrsm: TRiangular System solve with matrix
// Computes B <- alpha.op(A^-1).B, B <- alpha.B.op(A^-1)
// B is M*N, A is M*M if Side==FflasLeft, N*N if Side==FflasRight
- // Warning : unsafe with Trans == FflasTrans (debugging in progress)
//---------------------------------------------------------------------
template<class Field>
inline void
@@ -46,69 +46,135 @@ namespace FFLAS {
const FFLAS_DIAG Diag,
const size_t M, const size_t N,
const typename Field::Element alpha,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else
+ typename Field::Element_ptr
+#endif
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb)
{
- if (!M || !N ) return;
+ ParSeqHelper::Sequential PSH;
+ TRSMHelper<StructureHelper::Recursive, ParSeqHelper::Sequential> H(PSH);
+ ftrsm(F, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, H);
+ }
+ template<class Field>
+ inline void
+ ftrsm (const Field& F, const FFLAS_SIDE Side,
+ const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA,
+ const FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else
+ typename Field::Element_ptr
+#endif
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ const ParSeqHelper::Sequential& PSH)
+ {
+ TRSMHelper<StructureHelper::Recursive, ParSeqHelper::Sequential> H(PSH);
+ ftrsm(F, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, H);
+ }
+
+ template<class Field, class Cut, class Param>
+ inline void
+ ftrsm (const Field& F, const FFLAS_SIDE Side,
+ const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA,
+ const FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else
+ typename Field::Element_ptr
+#endif
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ const ParSeqHelper::Parallel<Cut,Param>& PSH)
+ {
+ TRSMHelper<StructureHelper::Iterative, ParSeqHelper::Parallel<Cut,Param> > H(PSH);
+ ftrsm(F, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, H);
+ }
+
+ template<class Field, class ParSeqTrait=ParSeqHelper::Sequential>
+ inline void
+ ftrsm (const Field& F, const FFLAS_SIDE Side,
+ const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA,
+ const FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else
+ typename Field::Element_ptr
+#endif
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ TRSMHelper<StructureHelper::Recursive, ParSeqTrait> & H)
+ {
+ if (!M || !N ) return;
if ( Side==FflasLeft ){
if ( Uplo==FflasUpper){
if (TransA == FflasNoTrans){
if (Diag == FflasUnit)
- Protected::ftrsmLeftUpperNoTransUnit<typename Field::Element> ()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmLeftUpperNoTransUnit<typename Field::Element> ()(F,M,N,A,lda,B,ldb,H);
else
- Protected::ftrsmLeftUpperNoTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmLeftUpperNoTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
} else {
if (Diag == FflasUnit)
- Protected::ftrsmLeftUpperTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmLeftUpperTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
else
- Protected::ftrsmLeftUpperTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmLeftUpperTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
}
} else {
if (TransA == FflasNoTrans){
if (Diag == FflasUnit)
- Protected::ftrsmLeftLowerNoTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmLeftLowerNoTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
else
- Protected::ftrsmLeftLowerNoTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmLeftLowerNoTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
} else {
if (Diag == FflasUnit)
- Protected::ftrsmLeftLowerTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmLeftLowerTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
else
- Protected::ftrsmLeftLowerTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmLeftLowerTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
}
}
} else {
if ( Uplo == FflasUpper){
if (TransA == FflasNoTrans){
if (Diag == FflasUnit)
- Protected::ftrsmRightUpperNoTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmRightUpperNoTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
else
- Protected::ftrsmRightUpperNoTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmRightUpperNoTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
} else {
if (Diag == FflasUnit)
- Protected::ftrsmRightUpperTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmRightUpperTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
else
- Protected::ftrsmRightUpperTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmRightUpperTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
}
} else {
if (TransA == FflasNoTrans){
if (Diag == FflasUnit)
- Protected::ftrsmRightLowerNoTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmRightLowerNoTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
else
- Protected::ftrsmRightLowerNoTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmRightLowerNoTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
} else {
if (Diag == FflasUnit)
- Protected::ftrsmRightLowerTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmRightLowerTransUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
else
- Protected::ftrsmRightLowerTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb);
+ Protected::ftrsmRightLowerTransNonUnit<typename Field::Element>()(F,M,N,A,lda,B,ldb,H);
}
}
}
if (!F.isOne(alpha))
- for (size_t i=0; i< M; ++i)
- for (size_t j=0; j<N; ++j)
- F.mulin(*(B+i*ldb+j),alpha);
+ fscalin(F,M,N,alpha,B,ldb);
}
diff --git a/fflas-ffpack/fflas/fflas_ftrsm_mp.inl b/fflas-ffpack/fflas/fflas_ftrsm_mp.inl
new file mode 100644
index 0000000..268cd03
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_ftrsm_mp.inl
@@ -0,0 +1,357 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+/** @file fflas/fflas_ftrsm_mp.inl
+ * @brief triangular system with matrix right hand side over multiprecision domain (either over Z or over Z/pZ)
+ */
+#ifndef __FFPACK_ftrsm_mp_INL
+#define __FFPACK_ftrsm_mp_INL
+
+#include <cmath>
+#include <givaro/modular-integer.h>
+#include <givaro/givinteger.h>
+
+#include "fflas-ffpack/fflas/fflas_bounds.inl"
+#include "fflas-ffpack/fflas/fflas_level3.inl"
+#include "fflas-ffpack/field/rns-integer-mod.h"
+#include "fflas-ffpack/field/rns-integer.h"
+
+namespace FFLAS {
+
+
+ inline void ftrsm (const Givaro::Modular<Givaro::Integer> & F,
+ const FFLAS_SIDE Side,
+ const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA,
+ const FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const Givaro::Integer alpha,
+ const Givaro::Integer * A, const size_t lda,
+ Givaro::Integer * B, const size_t ldb){
+
+
+#ifdef BENCH_PERF_TRSM_MP
+ double t_init=0, t_trsm=0, t_mod=0, t_rec=0;
+ FFLAS::Timer chrono;
+ chrono.start();
+#endif
+ Givaro::Integer p;
+ F.cardinality(p);
+ size_t logp=p.bitsize();
+ size_t K;
+ if (Side == FFLAS::FflasLeft)
+ K=M;
+ else
+ K=N;
+
+ if (K==0) return;
+
+ // compute bit size of feasible prime
+ size_t _k=std::max(K,logp/20), lk=0;
+ while ( _k ) {_k>>=1; ++lk;}
+ size_t prime_bitsize= (53-lk)>>1;
+
+ // construct rns basis
+ Givaro::Integer maxC= (p-1)*(p-1)*(p-1)*uint64_t(K);
+ size_t n_pr =maxC.bitsize()/prime_bitsize;
+ maxC=(p-1)*(p-1)*uint64_t(K)*(1<<prime_bitsize)*uint64_t(n_pr);
+ FFPACK::rns_double RNS(maxC, prime_bitsize, true);
+ FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS);
+#ifdef BENCH_PERF_TRSM_MP
+ chrono.stop();
+ t_init+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ // compute A and B in RNS
+ FFPACK::rns_double::Element_ptr Ap,Bp;
+ Ap = FFLAS::fflas_new(Zp,K,K);
+ Bp = FFLAS::fflas_new(Zp,M,N);
+
+ if (Side == FFLAS::FflasLeft){
+ finit_rns(Zp,K,K,(logp/16)+(logp%16?1:0),A,lda,Ap);
+ finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),B,ldb,Bp);
+ }
+ else {
+ finit_trans_rns(Zp,K,K,(logp/16)+(logp%16?1:0),A,lda,Ap);
+ finit_trans_rns(Zp,M,N,(logp/16)+(logp%16?1:0),B,ldb,Bp);
+ }
+#ifdef BENCH_PERF_TRSM_MP
+ chrono.stop();
+ t_mod+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+
+ // call ftrsm in rns
+ //ftrsm(Zp, Side, Uplo, TransA, Diag, M, N, Zp.one, Ap, K, Bp, N);
+ if (Side == FFLAS::FflasLeft)
+ ftrsm(Zp, Side, Uplo, TransA, Diag, M, N, Zp.one, Ap, K, Bp, N);
+ else {
+ if (Uplo == FFLAS::FflasUpper)
+ ftrsm(Zp, FFLAS::FflasLeft, FFLAS::FflasLower, TransA, Diag, N, M, Zp.one, Ap, K, Bp, M);
+ else
+ ftrsm(Zp, FFLAS::FflasLeft, FFLAS::FflasUpper, TransA, Diag, N, M, Zp.one, Ap, K, Bp, M);
+ }
+#ifdef BENCH_PERF_TRSM_MP
+ chrono.stop();
+ t_trsm+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ // reconstruct the result
+ if (Side == FFLAS::FflasLeft)
+ fconvert_rns(Zp,M,N,F.zero,B,ldb,Bp);
+ else{
+ fconvert_trans_rns(Zp,M,N,F.zero,B,ldb,Bp);
+ }
+
+ // reduce it modulo p
+ freduce (F, M, N, B, ldb);
+ // scale it with alpha
+ if (!F.isOne(alpha))
+ fscalin(F,M,N,alpha,B,ldb);
+
+#ifdef BENCH_PERF_TRSM_MP
+ chrono.stop();
+ t_rec+=chrono.usertime();
+ cout<<"FTRSM RNS PERF:"<<endl;
+ cout<<" *** init : "<<t_init<<endl;
+ cout<<" *** rns mod : "<<t_mod<<endl;
+ cout<<" *** rns trsm : "<<t_trsm<<" ( igemm="<<Zp.t_igemm<<" scal="<<Zp.t_scal<<" modp="<<Zp.t_modp<<endl;;
+ cout<<" *** rns rec : "<<t_rec<<endl;
+#endif
+
+ FFLAS::fflas_delete(Ap);
+ FFLAS::fflas_delete(Bp);
+ }
+
+ /* bb: do not use CBLAS_ORDER, or make it compatible with MLK */
+
+ inline void cblas_imptrsm(const enum FFLAS_ORDER Order,
+ const enum FFLAS_SIDE Side,
+ const enum FFLAS_UPLO Uplo,
+ const enum FFLAS_TRANSPOSE TransA,
+ const enum FFLAS_DIAG Diag,
+ const int M, const int N, const FFPACK::rns_double_elt alpha,
+ FFPACK::rns_double_elt_cstptr A, const int lda,
+ FFPACK::rns_double_elt_ptr B, const int ldb) {}
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+ namespace Protected {
+
+ template<>
+ inline size_t TRSMBound (const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F)
+ {
+ return 1;
+ }
+
+ template <>
+ inline size_t DotProdBoundClassic (const FFPACK::RNSIntegerMod<FFPACK::rns_double>& F,
+ const FFPACK::rns_double_elt& beta)
+ {
+ Givaro::Integer p,b,M;
+ F.cardinality(p);
+ p--;
+ F.convert(b,beta);
+ M=F.rns()._M;
+ uint64_t kmax= (M-b*p)/(p*p);
+ return (size_t)std::max(uint64_t(1),kmax);
+ //return kmax;
+ }
+
+#ifndef __FTRSM_MP_FAST
+#define __FFLAS_MULTIPRECISION
+
+#define __FFLAS__LEFT
+#define __FFLAS__UP
+#define __FFLAS__NOTRANSPOSE
+#define __FFLAS__NONUNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__LEFT
+#undef __FFLAS__UP
+#undef __FFLAS__NOTRANSPOSE
+#undef __FFLAS__NONUNIT
+
+
+
+#define __FFLAS__LEFT
+#define __FFLAS__UP
+#define __FFLAS__NOTRANSPOSE
+#define __FFLAS__UNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__LEFT
+#undef __FFLAS__UP
+#undef __FFLAS__NOTRANSPOSE
+#undef __FFLAS__UNIT
+
+#define __FFLAS__LEFT
+#define __FFLAS__UP
+#define __FFLAS__TRANSPOSE
+#define __FFLAS__NONUNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__LEFT
+#undef __FFLAS__UP
+#undef __FFLAS__TRANSPOSE
+#undef __FFLAS__NONUNIT
+
+#define __FFLAS__LEFT
+#define __FFLAS__UP
+#define __FFLAS__TRANSPOSE
+#define __FFLAS__UNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__LEFT
+#undef __FFLAS__UP
+#undef __FFLAS__TRANSPOSE
+#undef __FFLAS__UNIT
+
+
+#define __FFLAS__LEFT
+#define __FFLAS__LOW
+#define __FFLAS__NOTRANSPOSE
+#define __FFLAS__NONUNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__LEFT
+#undef __FFLAS__LOW
+#undef __FFLAS__NOTRANSPOSE
+#undef __FFLAS__NONUNIT
+
+#define __FFLAS__LEFT
+#define __FFLAS__LOW
+#define __FFLAS__NOTRANSPOSE
+#define __FFLAS__UNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__LEFT
+#undef __FFLAS__LOW
+#undef __FFLAS__NOTRANSPOSE
+#undef __FFLAS__UNIT
+
+#define __FFLAS__LEFT
+#define __FFLAS__LOW
+#define __FFLAS__TRANSPOSE
+#define __FFLAS__NONUNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__LEFT
+#undef __FFLAS__LOW
+#undef __FFLAS__TRANSPOSE
+#undef __FFLAS__NONUNIT
+
+#define __FFLAS__LEFT
+#define __FFLAS__LOW
+#define __FFLAS__TRANSPOSE
+#define __FFLAS__UNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__LEFT
+#undef __FFLAS__LOW
+#undef __FFLAS__TRANSPOSE
+#undef __FFLAS__UNIT
+
+#define __FFLAS__RIGHT
+#define __FFLAS__UP
+#define __FFLAS__NOTRANSPOSE
+#define __FFLAS__NONUNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__RIGHT
+#undef __FFLAS__UP
+#undef __FFLAS__NOTRANSPOSE
+#undef __FFLAS__NONUNIT
+
+#define __FFLAS__RIGHT
+#define __FFLAS__UP
+#define __FFLAS__NOTRANSPOSE
+#define __FFLAS__UNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__RIGHT
+#undef __FFLAS__UP
+#undef __FFLAS__NOTRANSPOSE
+#undef __FFLAS__UNIT
+
+#define __FFLAS__RIGHT
+#define __FFLAS__UP
+#define __FFLAS__TRANSPOSE
+#define __FFLAS__NONUNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__RIGHT
+#undef __FFLAS__UP
+#undef __FFLAS__TRANSPOSE
+#undef __FFLAS__NONUNIT
+
+#define __FFLAS__RIGHT
+#define __FFLAS__UP
+#define __FFLAS__TRANSPOSE
+#define __FFLAS__UNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__RIGHT
+#undef __FFLAS__UP
+#undef __FFLAS__TRANSPOSE
+#undef __FFLAS__UNIT
+
+
+#define __FFLAS__RIGHT
+#define __FFLAS__LOW
+#define __FFLAS__NOTRANSPOSE
+#define __FFLAS__NONUNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__RIGHT
+#undef __FFLAS__LOW
+#undef __FFLAS__NOTRANSPOSE
+#undef __FFLAS__NONUNIT
+
+#define __FFLAS__RIGHT
+#define __FFLAS__LOW
+#define __FFLAS__NOTRANSPOSE
+#define __FFLAS__UNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__RIGHT
+#undef __FFLAS__LOW
+#undef __FFLAS__NOTRANSPOSE
+#undef __FFLAS__UNIT
+
+#define __FFLAS__RIGHT
+#define __FFLAS__LOW
+#define __FFLAS__TRANSPOSE
+#define __FFLAS__NONUNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__RIGHT
+#undef __FFLAS__LOW
+#undef __FFLAS__TRANSPOSE
+#undef __FFLAS__NONUNIT
+
+#define __FFLAS__RIGHT
+#define __FFLAS__LOW
+#define __FFLAS__TRANSPOSE
+#define __FFLAS__UNIT
+#include "fflas_ftrsm_src.inl"
+#undef __FFLAS__RIGHT
+#undef __FFLAS__LOW
+#undef __FFLAS__TRANSPOSE
+#undef __FFLAS__UNIT
+#endif // #ifdef __FTRSM_MP_FAST
+
+ } // end of namespace protected
+#endif // #ifndef DOXYGEN_SHOULD_SKIP_THIS
+} // END OF NAMESPACE FFLAS
+
+#endif
+
diff --git a/fflas-ffpack/fflas/fflas_ftrsm_src.inl b/fflas-ffpack/fflas/fflas_ftrsm_src.inl
index feb4532..faf3708 100644
--- a/fflas-ffpack/fflas/fflas_ftrsm_src.inl
+++ b/fflas-ffpack/fflas/fflas_ftrsm_src.inl
@@ -27,7 +27,17 @@
#ifdef __FFLAS__TRANSPOSE
#define __FFLAS__Acolinc lda
#define __FFLAS__Arowinc 1
+ #ifdef __FFLAS__LOW
+ #define __FFLAS__UPPER
+ #else
+ #define __FFLAS__LOWER
+ #endif
#else
+ #ifdef __FFLAS__LOW
+ #define __FFLAS__LOWER
+ #else
+ #define __FFLAS__UPPER
+ #endif
#define __FFLAS__Acolinc 1
#define __FFLAS__Arowinc lda
#endif
@@ -36,6 +46,13 @@
#define __FFLAS__SIDE Left
#define __FFLAS__Na M
#define __FFLAS__Nb N
+ #ifdef __FFLAS__TRANSPOSE
+ #define __FFLAS__Acopcolinc __FFLAS__Na
+ #define __FFLAS__Acoprowinc 1
+ #else // __FFLAS__NOTRANSPOSE
+ #define __FFLAS__Acopcolinc 1
+ #define __FFLAS__Acoprowinc __FFLAS__Na
+ #endif
#define __FFLAS__Mb nsplit
#define __FFLAS__Nb2 N
#define __FFLAS__Mb2 M-nsplit
@@ -44,14 +61,16 @@
#define __FFLAS__Mupdate M-(i+1)*nsplit
#define __FFLAS__Nupdate N
#define __FFLAS__Anorminc __FFLAS__Acolinc
+ #define __FFLAS__Acopnorminc __FFLAS__Acopcolinc
#define __FFLAS__Bnorminc 1
#define __FFLAS__Bnormnext ldb
#define __FFLAS__Bdim N
- #ifdef __FFLAS__LOW
+ #ifdef __FFLAS__LOWER
#define __FFLAS__Atriang A + i * nsplit * (lda + 1)
#define __FFLAS__Aupdate A + i * nsplit * (lda + 1) + nsplit*__FFLAS__Arowinc
#define __FFLAS__Arest A + (__FFLAS__Na - nrestsplit) * (lda + 1)
#define __FFLAS__Anormnext __FFLAS__Arowinc
+ #define __FFLAS__Acopnormnext __FFLAS__Acoprowinc
#define __FFLAS__Bupdate B + (i+1)*nsplit*ldb
#define __FFLAS__Brec B + i * nsplit * ldb
#define __FFLAS__Brest B + (M - nrestsplit) * ldb
@@ -61,11 +80,12 @@
#define __FFLAS__B1 B
#define __FFLAS__B2 B + nsplit * ldb
#define __FFLAS__Normdim i
-#else
+#else // __FFLAS__UPPER
#define __FFLAS__Atriang A + (__FFLAS__Na - (i + 1) * nsplit) * (lda + 1)
#define __FFLAS__Aupdate A + (__FFLAS__Na - (i + 1) * nsplit) * __FFLAS__Acolinc
#define __FFLAS__Arest A
#define __FFLAS__Anormnext lda + 1
+ #define __FFLAS__Acopnormnext __FFLAS__Na + 1
#define __FFLAS__Bupdate B
#define __FFLAS__Brec B + (M - (i + 1) * nsplit) * ldb
#define __FFLAS__Brest B
@@ -76,10 +96,17 @@
#define __FFLAS__B2 B
#define __FFLAS__Normdim __FFLAS__Na-i-1
#endif
-#else
+#else // __FFLAS__RIGHT
#define __FFLAS__SIDE Right
#define __FFLAS__Na N
#define __FFLAS__Nb nsplit
+ #ifdef __FFLAS__TRANSPOSE
+ #define __FFLAS__Acopcolinc __FFLAS__Na
+ #define __FFLAS__Acoprowinc 1
+ #else // __FFLAS__NOTRANSPOSE
+ #define __FFLAS__Acopcolinc 1
+ #define __FFLAS__Acoprowinc __FFLAS__Na
+ #endif
#define __FFLAS__Mb M
#define __FFLAS__Mb2 M
#define __FFLAS__Nb2 N-nsplit
@@ -88,14 +115,16 @@
#define __FFLAS__Mupdate M
#define __FFLAS__Nupdate N - (i + 1) * nsplit
#define __FFLAS__Anorminc __FFLAS__Arowinc
+ #define __FFLAS__Acopnorminc __FFLAS__Acoprowinc
#define __FFLAS__Bnorminc ldb
#define __FFLAS__Bnormnext 1
#define __FFLAS__Bdim M
- #ifdef __FFLAS__UP
+ #ifdef __FFLAS__UPPER
#define __FFLAS__Atriang A + i * nsplit * (lda + 1)
#define __FFLAS__Aupdate A + i * nsplit * (lda + 1) + nsplit * __FFLAS__Acolinc
#define __FFLAS__Arest A + (__FFLAS__Na - nrestsplit) * (lda + 1)
#define __FFLAS__Anormnext __FFLAS__Acolinc
+ #define __FFLAS__Acopnormnext __FFLAS__Acopcolinc
#define __FFLAS__Bupdate B + (i + 1) * nsplit
#define __FFLAS__Brec B + i * nsplit
#define __FFLAS__Brest B + (N - nrestsplit)
@@ -105,18 +134,19 @@
#define __FFLAS__B1 B
#define __FFLAS__B2 B + nsplit
#define __FFLAS__Normdim i
-#else
+#else // __FFLAS__LOWER
#define __FFLAS__Atriang A + (__FFLAS__Na - (i + 1) * nsplit) * (lda + 1)
#define __FFLAS__Aupdate A + (__FFLAS__Na - (i + 1) * nsplit) * __FFLAS__Arowinc
#define __FFLAS__Arest A
#define __FFLAS__Anormnext lda + 1
+ #define __FFLAS__Acopnormnext __FFLAS__Na + 1
#define __FFLAS__Bupdate B
- #define __FFLAS__Brec B + N - (i + 1) * nsplit
+#define __FFLAS__Brec B + (N - (i + 1) * nsplit)
#define __FFLAS__Brest B
#define __FFLAS__A1 A + (__FFLAS__Na - nsplit) * (lda + 1)
#define __FFLAS__A2 A + (__FFLAS__Na - nsplit) * __FFLAS__Arowinc
#define __FFLAS__A3 A
- #define __FFLAS__B1 B + N - nsplit
+#define __FFLAS__B1 B + (N - nsplit)
#define __FFLAS__B2 B
#define __FFLAS__Normdim __FFLAS__Na - i -1
#endif
@@ -142,13 +172,13 @@
#ifdef __FFLAS__DOUBLE
#define __FFLAS__ELEMENT double
- #define __FFLAS__DOMAIN DoubleDomain
+ #define __FFLAS__DOMAIN Givaro::DoubleDomain
#define __FFLAS__BLAS_PREFIX d
#endif
#ifdef __FFLAS__FLOAT
#define __FFLAS__ELEMENT float
- #define __FFLAS__DOMAIN FloatDomain
+ #define __FFLAS__DOMAIN Givaro::FloatDomain
#define __FFLAS__BLAS_PREFIX s
#endif
@@ -156,7 +186,11 @@
#define __FFLAS__ELEMENT Element
#endif
-
+#ifdef __FFLAS_MULTIPRECISION
+#define __FFLAS__ELEMENT FFPACK::rns_double_elt
+#define __FFLAS__DOMAIN FFPACK::RNSInteger<FFPACK::rns_double>
+ #define __FFLAS__BLAS_PREFIX imp
+#endif
#ifndef __FFLAS__GENERIC
template <>
@@ -165,109 +199,166 @@ public:
// TRSM with delayed updates: assumes input in Zp and ensures output in Zp.
// The multiple MatMul updates (recursive sequence) are done over Z
-template<class Field>
+template<class Field, class ParSeqTrait>
void delayed (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb,
- const size_t nblas, size_t nbblocsblas)
-{
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else //__FFLAS__TRSM_READONLY
+ typename Field::Element_ptr
+#endif //__FFLAS__TRSM_READONLY
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ const size_t nblas, size_t nbblocsblas,
+ TRSMHelper<StructureHelper::Recursive, ParSeqTrait> & H)
- static __FFLAS__DOMAIN D; // is this safe ??
+{
+
+ //static __FFLAS__DOMAIN D(F); // is this safe ??
+ __FFLAS__DOMAIN D(F); // is this safe ??
if ( __FFLAS__Na <= nblas ){
- for (size_t i=0; i < M; ++i)
- for (size_t j = 0; j < N; ++j)
- F.init( *(B + i*ldb + j), *( B + i*ldb + j));
+ freduce (F, M, N, B, ldb);
+
+#define __FFLAS__Atrsm A
+#define __FFLAS__Atrsm_lda lda
#ifndef __FFLAS__UNIT
+#ifdef __FFLAS__TRSM_READONLY
+ //! @warning this is C99 (-Wno-vla)
+ //typename Field::Element Acop[__FFLAS__Na*__FFLAS__Na];
+ typename Field::Element_ptr Acop = FFLAS::fflas_new(F,__FFLAS__Na,__FFLAS__Na);
+ typename Field::Element_ptr Acopi = Acop;
+#undef __FFLAS__Atrsm
+#undef __FFLAS__Atrsm_lda
+ #define __FFLAS__Atrsm Acop
+ #define __FFLAS__Atrsm_lda __FFLAS__Na
+#endif //__FFLAS__TRSM_READONLY
typename Field::Element inv;
- typename Field::Element * Ai = A, * Bi = B;
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else //__FFLAS__TRSM_READONLY
+ typename Field::Element_ptr
+#endif //__FFLAS__TRSM_READONLY
+ Ai = A;
+ typename Field::Element_ptr Bi = B;
#ifdef __FFLAS__LEFT
#ifdef __FFLAS__UP
Ai += __FFLAS__Acolinc;
-#endif
-#endif
+#ifdef __FFLAS__TRSM_READONLY
+ Acopi += __FFLAS__Acopcolinc;
+#endif //__FFLAS__TRSM_READONLY
+#endif //__FFLAS__UP
+#endif //__FFLAS__LEFT
#ifdef __FFLAS__RIGHT
#ifdef __FFLAS__LOW
Ai += __FFLAS__Arowinc;
-#endif
-#endif
+#ifdef __FFLAS__TRSM_READONLY
+ Acopi += __FFLAS__Acoprowinc;
+#endif //__FFLAS__TRSM_READONLY
+#endif //__FFLAS__LOW
+#endif //__FFLAS__RIGHT
for (size_t i = 0; i < __FFLAS__Na; ++i){
#ifdef _FF_DEBUG
if ( F.isZero(*(A+i*(lda+1))) ) throw PreconditionFailed(__func__,__FILE__,__LINE__,"Triangular matrix not invertible");
-#endif
+#endif //_FF_DEBUG
F.inv (inv, *(A + i * (lda+1)));
- fscal (F, __FFLAS__Normdim, inv, Ai, __FFLAS__Anorminc);
- fscal (F, __FFLAS__Bdim, inv, Bi, __FFLAS__Bnorminc);
+#ifndef __FFLAS_MULTIPRECISION
+#ifdef __FFLAS__TRSM_READONLY
+ fscal (F, __FFLAS__Normdim, inv, Ai, __FFLAS__Anorminc, Acopi, __FFLAS__Acopnorminc);
+ Acopi += __FFLAS__Acopnormnext;
+#else //__FFLAS__TRSM_READONLY
+ fscalin (F, __FFLAS__Normdim, inv, Ai, __FFLAS__Anorminc);
+#endif //__FFLAS__TRSM_READONLY
+#endif //__FFLAS_MULTIPRECISION
+ FFLAS::fscalin (F, __FFLAS__Bdim, inv, Bi, __FFLAS__Bnorminc);
Ai += __FFLAS__Anormnext;
Bi += __FFLAS__Bnormnext;
+
}
#endif // __FFLAS__UNIT
+#ifndef __FFLAS_MULTIPRECISION
Mjoin(cblas_,Mjoin(__FFLAS__BLAS_PREFIX,trsm))
(CblasRowMajor,
Mjoin (Cblas, __FFLAS__SIDE),
Mjoin (Cblas, __FFLAS__UPLO),
Mjoin (Cblas, __FFLAS__TRANS),
CblasUnit,
- (int)M, (int)N, 1.0, A, (int)lda, B, (int)ldb );
- for (size_t i = 0; i < M; ++i)
- for (size_t j = 0; j < N; ++j)
- F.init (*(B + i*ldb + j), *(B + i*ldb + j));
+ (int)M, (int)N, D.one, __FFLAS__Atrsm, (int)__FFLAS__Atrsm_lda, B, (int)ldb );
+ freduce (F, M, N, B, ldb);
+#endif //__FFLAS_MULTIPRECISION
#ifndef __FFLAS__UNIT
Ai = A;
#ifdef __FFLAS__LEFT
#ifdef __FFLAS__UP
Ai += __FFLAS__Acolinc;
-#endif
-#endif
+#endif //__FFLAS__UP
+#endif //__FFLAS__LEFT
#ifdef __FFLAS__RIGHT
#ifdef __FFLAS__LOW
Ai += __FFLAS__Arowinc;
-#endif
-#endif
+#endif //__FFLAS__LOW
+#endif //__FFLAS__RIGHT
+
+#ifndef __FFLAS__TRSM_READONLY
+#ifndef __FFLAS_MULTIPRECISION
for (size_t i = 0; i < __FFLAS__Na; ++i){
- fscal( F, __FFLAS__Normdim, *(A + i * (lda+1)) , Ai, __FFLAS__Anorminc);
+ fscalin( F, __FFLAS__Normdim, *(A + i * (lda+1)) , Ai, __FFLAS__Anorminc);
Ai += __FFLAS__Anormnext;
}
+#endif //__FFLAS_MULTIPRECISION
+#endif //__FFLAS__TRSM_READONLY
+
+#ifdef __FFLAS__TRSM_READONLY
+ FFLAS::fflas_delete(Acop);
+#endif //__FFLAS__TRSM_READONLY
#endif // __FFLAS__UNIT
} else { // __FFLAS__Na <= nblas
size_t nbblocsup = (nbblocsblas + 1) / 2;
size_t nsplit = nbblocsup * nblas;
this->delayed (F, __FFLAS__Mb, __FFLAS__Nb,
- __FFLAS__A1, lda, __FFLAS__B1, ldb, nblas, nbblocsup);
+ __FFLAS__A1, lda, __FFLAS__B1, ldb, nblas, nbblocsup, H);
+
+
#ifdef __FFLAS__RIGHT
- fgemm (D, FflasNoTrans, Mjoin (Fflas, __FFLAS__TRANS), __FFLAS__Mb2, __FFLAS__Nb2, nsplit,
- -1.0, __FFLAS__B1, ldb, __FFLAS__A2, lda, F.one, __FFLAS__B2, ldb);
+ fgemm (D, FflasNoTrans, Mjoin (Fflas, __FFLAS__TRANS),
+ __FFLAS__Mb2, __FFLAS__Nb2, nsplit, D.mOne,
+ __FFLAS__B1, ldb, __FFLAS__A2, lda,
+ F.one, __FFLAS__B2, ldb, H.parseq);
#else
- fgemm (D, Mjoin (Fflas, __FFLAS__TRANS), FflasNoTrans, __FFLAS__Mb2, __FFLAS__Nb2, nsplit,
- -1.0, __FFLAS__A2, lda, __FFLAS__B1, ldb, F.one, __FFLAS__B2, ldb);
-#endif
+ fgemm (D, Mjoin (Fflas, __FFLAS__TRANS), FflasNoTrans,
+ __FFLAS__Mb2, __FFLAS__Nb2, nsplit, D.mOne,
+ __FFLAS__A2, lda, __FFLAS__B1, ldb,
+ F.one, __FFLAS__B2, ldb, H.parseq);
+#endif //__FFLAS__RIGHT
this->delayed (F, __FFLAS__Mb2, __FFLAS__Nb2,
- __FFLAS__A3, lda, __FFLAS__B2, ldb, nblas, nbblocsblas - nbblocsup);
+ __FFLAS__A3, lda, __FFLAS__B2, ldb, nblas, nbblocsblas - nbblocsup, H);
}
}
-template <class Field>
+template <class Field, class ParSeqTrait>
void operator () (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else
+ typename Field::Element_ptr
+#endif //__FFLAS__TRSM_READONLY
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ TRSMHelper<StructureHelper::Recursive, ParSeqTrait> & H)
{
+#if defined(__FFLAS_MULTIPRECISION) && defined(BENCH_PERF_FTRSM_MP)
+ FFLAS::Timer chrono;chrono.start();
+#endif
+
if (!M || !N ) return;
-
- static __FFLAS__DOMAIN D;
- size_t nblas = TRSMBound<Field> (F);
-
- size_t ndel = DotProdBound (F, 0, F.one,
-#ifdef __FFLAS__DOUBLE
- FflasDouble
-#else
- FflasFloat
-#endif
- );
+ //static __FFLAS__DOMAIN D(F);
+ __FFLAS__DOMAIN D(F);
+ size_t nblas = TRSMBound<Field> (F);
+ size_t ndel = DotProdBoundClassic (F, F.one);
ndel = (ndel / nblas)*nblas;
size_t nsplit = ndel;
size_t nbblocsplit = (__FFLAS__Na-1) / nsplit;
@@ -275,21 +366,29 @@ void operator () (const Field& F, const size_t M, const size_t N,
for ( size_t i = 0; i < nbblocsplit; ++i) {
this->delayed (F, __FFLAS__Mb, __FFLAS__Nb,
- __FFLAS__Atriang, lda, __FFLAS__Brec, ldb, nblas, nsplit / nblas);
+ __FFLAS__Atriang, lda, __FFLAS__Brec, ldb, nblas, nsplit / nblas, H);
#ifdef __FFLAS__RIGHT
fgemm (F, FflasNoTrans, Mjoin (Fflas, __FFLAS__TRANS),
__FFLAS__Mupdate, __FFLAS__Nupdate, nsplit, F.mOne,
- __FFLAS__Brec, ldb, __FFLAS__Aupdate, lda, F.one, __FFLAS__Bupdate, ldb);
+ __FFLAS__Brec, ldb, __FFLAS__Aupdate, lda,
+ F.one, __FFLAS__Bupdate, ldb, H.parseq);
#else
fgemm (F, Mjoin (Fflas, __FFLAS__TRANS), FflasNoTrans,
__FFLAS__Mupdate, __FFLAS__Nupdate, nsplit, F.mOne,
- __FFLAS__Aupdate, lda, __FFLAS__Brec, ldb, F.one, __FFLAS__Bupdate, ldb);
-#endif
+ __FFLAS__Aupdate, lda, __FFLAS__Brec, ldb,
+ F.one, __FFLAS__Bupdate, ldb, H.parseq);
+#endif //__FFLAS__RIGHT
}
if (nrestsplit)
this->delayed (F, __FFLAS__Mbrest, __FFLAS__Nbrest,
- __FFLAS__Arest, lda, __FFLAS__Brest, ldb, nblas, nrestsplit / nblas);
+ __FFLAS__Arest, lda, __FFLAS__Brest, ldb, nblas, nrestsplit / nblas, H);
+
+#if defined(__FFLAS_MULTIPRECISION) && defined(BENCH_PERF_FTRSM_MP)
+ chrono.stop();
+ F.t_trsm+=chrono.usertime();
+#endif
+
}
@@ -300,44 +399,56 @@ void operator () (const Field& F, const size_t M, const size_t N,
template <class Element>
class Mjoin(ftrsm, Mjoin(__FFLAS__SIDE, Mjoin(__FFLAS__UPLO, Mjoin(__FFLAS__TRANS, __FFLAS__DIAG)))) {
public:
-
-template<class Field>
+
+template<class Field, class ParSeqTrait>
void operator() (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * B, const size_t ldb)
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else
+ typename Field::Element_ptr
+#endif
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ TRSMHelper<StructureHelper::Recursive, ParSeqTrait> & H)
{
-
if (__FFLAS__Na == 1){
#ifndef __FFLAS__UNIT
typename Field::Element inv;
+ F.init(inv);
#ifdef _FF_DEBUG
if ( F.isZero(*A) ) throw PreconditionFailed(__func__,__FILE__,__LINE__,"Triangular matrix not invertible");
-#endif
+#endif //_FF_DEBUG
F.inv(inv, *A);
- FFLAS::fscal(F, __FFLAS__Bdim, inv, B, __FFLAS__Bnorminc);
+ FFLAS::fscalin(F, __FFLAS__Bdim, inv, B, __FFLAS__Bnorminc);
+
#endif //__FFLAS__UNIT
} else { // __FFLAS__Na > 1
size_t nsplit = __FFLAS__Na >> 1;
- this->operator() (F, __FFLAS__Mb, __FFLAS__Nb, __FFLAS__A1, lda, __FFLAS__B1, ldb);
-
+ this->operator() (F, __FFLAS__Mb, __FFLAS__Nb, __FFLAS__A1, lda, __FFLAS__B1, ldb, H);
#ifdef __FFLAS__RIGHT
fgemm (F, FflasNoTrans , Mjoin (Fflas, __FFLAS__TRANS),
__FFLAS__Mb2, __FFLAS__Nb2, nsplit, F.mOne,
- __FFLAS__B1, ldb, __FFLAS__A2, lda, F.one, __FFLAS__B2, ldb);
-#else
+ __FFLAS__B1, ldb, __FFLAS__A2, lda,
+ F.one, __FFLAS__B2, ldb, H.parseq);
+#else //__FFLAS__RIGHT
fgemm (F, Mjoin (Fflas, __FFLAS__TRANS), FFLAS::FflasNoTrans,
__FFLAS__Mb2, __FFLAS__Nb2, nsplit, F.mOne,
- __FFLAS__A2, lda, __FFLAS__B1, ldb, F.one, __FFLAS__B2, ldb);
-#endif
- this->operator() (F, __FFLAS__Mb2, __FFLAS__Nb2, __FFLAS__A3, lda, __FFLAS__B2, ldb);
+ __FFLAS__A2, lda, __FFLAS__B1, ldb,
+ F.one, __FFLAS__B2, ldb, H.parseq);
+#endif //__FFLAS__RIGHT
+ this->operator() (F, __FFLAS__Mb2, __FFLAS__Nb2, __FFLAS__A3, lda, __FFLAS__B2, ldb, H);
}
}
};
#endif // __FFLAS__GENERIC
-
+#ifdef __FFLAS__LOWER
+ #undef __FFLAS__LOWER
+#else
+ #undef __FFLAS__UPPER
+#endif
#undef __FFLAS__UPLO
#undef __FFLAS__DIAG
#undef __FFLAS__SIDE
@@ -358,7 +469,9 @@ void operator() (const Field& F, const size_t M, const size_t N,
#undef __FFLAS__Bnorminc
#undef __FFLAS__Bnormnext
#undef __FFLAS__Anormnext
+#undef __FFLAS__Acopnormnext
#undef __FFLAS__Anorminc
+#undef __FFLAS__Acopnorminc
#undef __FFLAS__ELEMENT
#undef __FFLAS__BLAS_PREFIX
#undef __FFLAS__DOMAIN
@@ -373,5 +486,9 @@ void operator() (const Field& F, const size_t M, const size_t N,
#undef __FFLAS__Normdim
#undef __FFLAS__Acolinc
#undef __FFLAS__Arowinc
+#undef __FFLAS__Acopcolinc
+#undef __FFLAS__Acoprowinc
+#undef __FFLAS__Atrsm_lda
+#undef __FFLAS__Atrsm
#undef Mjoin
#undef my_join
diff --git a/fflas-ffpack/fflas/fflas_ftrsv.inl b/fflas-ffpack/fflas/fflas_ftrsv.inl
index 8320f52..0857a21 100644
--- a/fflas-ffpack/fflas/fflas_ftrsv.inl
+++ b/fflas-ffpack/fflas/fflas_ftrsv.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -39,16 +39,16 @@ template<class Field>
inline void
ftrsv (const Field& F, const FFLAS_UPLO Uplo,
const FFLAS_TRANSPOSE TransA, const FFLAS_DIAG Diag,
- const size_t N,const typename Field::Element * A, size_t lda,
- typename Field::Element * X, int incX)
+ const size_t N,typename Field::ConstElement_ptr A, size_t lda,
+ typename Field::Element_ptr X, int incX)
{
- typename Field::Element * Xi,* Xj, * Ximax;
- const typename Field::Element * Ai, * Aj;
+ typename Field::Element_ptr Xi, Xj, Ximax;
+ typename Field::ConstElement_ptr Ai, Aj;
if ( Uplo == FflasLower ){
if ( TransA == FflasTrans){
Ai = A+(N-1)*(lda+1); // bottom right entry of A
- Ximax = Xi = X+(N-1)*incX;
+ Ximax = Xi = X+(int)(N-1)*incX;
for( ; Xi>=X; Ai-=lda+1,Xi-=incX ){
F.negin( *Xi );
for ( Xj = Xi+incX, Aj=Ai+lda; Xj<=Ximax;
@@ -64,7 +64,7 @@ ftrsv (const Field& F, const FFLAS_UPLO Uplo,
else{
Ai = A;
Xi = X;
- for( ; Xi<X+incX*N; Ai+=lda+1,Xi+=incX ){
+ for( ; Xi<X+incX*(int)N; Ai+=lda+1,Xi+=incX ){
F.negin( *Xi );
for ( Xj = Xi-incX, Aj=Ai-1; Xj>=X;
Xj-=incX, Aj--){
@@ -80,7 +80,7 @@ ftrsv (const Field& F, const FFLAS_UPLO Uplo,
if ( TransA == FflasTrans){
Ai = A;
Xi = X;
- for( ; Xi<X+N*incX; Ai+=lda+1,Xi+=incX ){
+ for( ; Xi<X+(int)N*incX; Ai+=lda+1,Xi+=incX ){
F.negin( *Xi );
for ( Xj = Xi-incX, Aj=Ai-lda; Xj>=X;
Xj-=incX, Aj-=lda){
@@ -95,7 +95,7 @@ ftrsv (const Field& F, const FFLAS_UPLO Uplo,
} // FflasTrans
else{
Ai = A+(lda+1)*(N-1);
- Ximax = Xi = X+incX*(N-1);
+ Ximax = Xi = X+incX*(int)(N-1);
for( ; Xi>=X; Ai-=lda+1,Xi-=incX ){
F.negin( *Xi );
for ( Xj = Xi+incX, Aj=Ai+1; Xj<=Ximax;
diff --git a/fflas-ffpack/fflas/fflas_helpers.inl b/fflas-ffpack/fflas/fflas_helpers.inl
new file mode 100755
index 0000000..cfe0ca9
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_helpers.inl
@@ -0,0 +1,368 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_mmhelper.h
+ * @brief Matrix-Matrix Helper class
+ */
+
+#ifndef __FFLASFFPACK_fflas_fflas_mmhelper_INL
+#define __FFLASFFPACK_fflas_fflas_mmhelper_INL
+
+#include "fflas-ffpack/field/field-traits.h"
+#include "fflas-ffpack/paladin/parallel.h"
+#include "fflas-ffpack/utils/flimits.h"
+
+#include <algorithm> // std::max
+
+namespace FFLAS{ namespace Protected{
+ /** \brief Computes the number of recursive levels to perform.
+ *
+ * \param m the common dimension in the product AxB
+ */
+ template<class Field>
+ int WinogradSteps (const Field & F, const size_t & m);
+
+}//Protected
+}//FFLAS
+
+namespace FFLAS {
+
+ namespace Protected{
+ template <class T>
+ inline bool unfit(T x){return false;}
+ template <>
+ inline bool unfit(int64_t x){return (x>limits<int32_t>::max());}
+ template <size_t K>
+ inline bool unfit(RecInt::rint<K> x){return (x > RecInt::rint<K>(limits<RecInt::rint<K-1>>::max()));}
+ template <>
+ inline bool unfit(RecInt::rint<6> x){return (x > limits<int32_t>::max());}
+ }
+
+ namespace MMHelperAlgo{
+ struct Auto{};
+ struct Classic{};
+ struct Winograd{};
+ struct WinogradPar{};
+ struct Bini{};
+ }
+
+ template<class ModeT, class ParSeq>
+ struct AlgoChooser{typedef MMHelperAlgo::Winograd value;};
+ template<class ParSeq>
+ struct AlgoChooser<ModeCategories::ConvertTo<ElementCategories::RNSElementTag>, ParSeq>{typedef MMHelperAlgo::Classic value;};
+
+ template<class Field,
+ typename AlgoTrait = MMHelperAlgo::Auto,
+ typename ModeTrait = typename ModeTraits<Field>::value,
+ typename ParSeqTrait = ParSeqHelper::Sequential >
+ struct MMHelper;
+ /*! FGEMM Helper for Default and ConvertTo modes of operation
+ */
+ template<class Field,
+ typename AlgoTrait,
+ typename ParSeqTrait >
+ struct MMHelper<Field, AlgoTrait, ModeCategories::DefaultTag, ParSeqTrait>
+ {
+ typedef MMHelper<Field,AlgoTrait, ModeCategories::DefaultTag,ParSeqTrait> Self_t;
+ int recLevel ;
+ ParSeqTrait parseq;
+
+ MMHelper(){}
+ MMHelper(const Field& F, size_t m, size_t k, size_t n, ParSeqTrait _PS) : recLevel(-1), parseq(_PS) {}
+ MMHelper(const Field& F, int w, ParSeqTrait _PS=ParSeqTrait()) : recLevel(w), parseq(_PS) {}
+
+ // copy constructor from other Field and Algo Traits
+ template<class F2, typename AlgoT2, typename FT2, typename PS2>
+ MMHelper(MMHelper<F2, AlgoT2, FT2, PS2>& WH) : recLevel(WH.recLevel), parseq(WH.parseq) {}
+
+ friend std::ostream& operator<<(std::ostream& out, const Self_t& M)
+ {
+ return out <<"Helper: "
+ <<typeid(AlgoTrait).name()<<' '
+ <<typeid(ModeCategories::DefaultTag).name()<< ' '
+ << M.parseq <<std::endl
+ <<" recLevel = "<<M.recLevel<<std::endl;
+ }
+ };
+ template<class Field,
+ typename AlgoTrait,
+ typename Dest,
+ typename ParSeqTrait>
+ struct MMHelper<Field, AlgoTrait, ModeCategories::ConvertTo<Dest>, ParSeqTrait>
+ {
+ typedef MMHelper<Field,AlgoTrait, ModeCategories::ConvertTo<Dest>,ParSeqTrait> Self_t;
+ int recLevel ;
+ ParSeqTrait parseq;
+
+ MMHelper(){}
+ MMHelper(const Field& F, size_t m, size_t k, size_t n, ParSeqTrait _PS) : recLevel(-1), parseq(_PS) {}
+ MMHelper(const Field& F, int w, ParSeqTrait _PS=ParSeqTrait()) : recLevel(w), parseq(_PS) {}
+
+ // copy constructor from other Field and Algo Traits
+ template<class F2, typename AlgoT2, typename FT2, typename PS2>
+ MMHelper(MMHelper<F2, AlgoT2, FT2, PS2>& WH) : recLevel(WH.recLevel), parseq(WH.parseq) {}
+
+ friend std::ostream& operator<<(std::ostream& out, const Self_t& M)
+ {
+ return out <<"Helper: "
+ <<typeid(AlgoTrait).name()<<' '
+ <<typeid(ModeCategories::ConvertTo<Dest>).name()<< ' '
+ << M.parseq <<std::endl
+ <<" recLevel = "<<M.recLevel<<std::endl;
+ }
+ };
+ // MMHelper for Delayed and Lazy Modes of operation
+ template<class Field,
+ typename AlgoTrait,
+ typename ModeTrait,
+ typename ParSeqTrait>
+ struct MMHelper {
+ typedef MMHelper<Field,AlgoTrait,ModeTrait,ParSeqTrait> Self_t;
+ typedef typename associatedDelayedField<const Field>::type DelayedField_t;
+ typedef typename associatedDelayedField<const Field>::field DelayedField;
+ typedef typename DelayedField::Element DFElt;
+ int recLevel ;
+ DFElt FieldMin, FieldMax, Amin, Amax, Bmin, Bmax, Cmin, Cmax, Outmin, Outmax;
+ DFElt MaxStorableValue;
+
+ const DelayedField_t delayedField;
+ ParSeqTrait parseq;
+ void initC(){Cmin = FieldMin; Cmax = FieldMax;}
+ void initA(){Amin = FieldMin; Amax = FieldMax;}
+ void initB(){Bmin = FieldMin; Bmax = FieldMax;}
+ void initOut(){Outmin = FieldMin; Outmax = FieldMax;}
+
+
+ size_t MaxDelayedDim(DFElt beta)
+ {
+ DFElt absbeta;
+ delayedField.init(absbeta,beta);
+ if (beta < 0) absbeta = -beta;
+ // This cast is needed when Cmin base type is int8/16_t,
+ // getting -Cmin returns a int, not the same base type.
+ DFElt diff = MaxStorableValue - absbeta
+ * std::max(static_cast<const DFElt&>(-Cmin), Cmax);
+ DFElt AB = std::max(static_cast<const DFElt&>(-Amin), Amax)
+ * std::max(static_cast<const DFElt&>(-Bmin), Bmax);
+ return static_cast<size_t>(((diff < DFElt(0u))||(AB<DFElt(0u)))? DFElt(0u) : diff / AB);
+ }
+ bool Aunfit(){ return Protected::unfit(std::max(static_cast<const DFElt&>(-Amin),Amax));}
+ bool Bunfit(){ return Protected::unfit(std::max(static_cast<const DFElt&>(-Bmin),Bmax));}
+ void setOutBounds(const size_t k, const DFElt alpha, const DFElt beta)
+ {
+ if (beta<0){
+ Outmin = beta*Cmax;
+ Outmax = beta*Cmin;
+ } else {
+ Outmin = beta*Cmin;
+ Outmax = beta*Cmax;
+ }
+ if (alpha >0){
+ Outmin += DFElt(k)*alpha*std::min(Amin*Bmax, Amax*Bmin);
+ Outmax += DFElt(k)*alpha*std::max(Amin*Bmin, Amax*Bmax);
+ }else{
+ Outmin += DFElt(k)*alpha*std::max(Amin*Bmin, Amax*Bmax);
+ Outmax += DFElt(k)*alpha*std::min(Amin*Bmax, Amax*Bmin);
+ }
+ }
+
+ bool checkA(const Field& F, const FFLAS::FFLAS_TRANSPOSE ta, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda )
+ {
+#ifdef DEBUG
+ for (size_t i=0; i<M;++i)
+ for (size_t j=0; j<N;++j){
+ const typename Field::Element x = (ta == FFLAS::FflasNoTrans)? A[i*lda+j] : A[i+j*lda];
+ if (x > Amax || x < Amin){
+ std::cerr<<"Error in "<<Amin<<" < = A["<<i<<", "<<j<<"] ="<<x<<" <= "<<Amax<<std::endl;
+ return false;
+ }
+ }
+#endif
+ return true;
+ }
+
+ bool checkB(const Field& F, const FFLAS::FFLAS_TRANSPOSE tb, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb)
+ {
+#ifdef DEBUG
+ for (size_t i=0; i<M;++i)
+ for (size_t j=0; j<N;++j){
+ const typename Field::Element x = (tb == FFLAS::FflasNoTrans)? B[i*ldb+j] : B[i+j*ldb];
+ if (x > Bmax || x < Bmin){
+ std::cerr<<"Error in "<<Bmin<<" < = B["<<i<<", "<<j<<"] ="<<B[i*ldb+j]<<" <= "<<Bmax<<std::endl;
+ return false;
+ }
+ }
+#endif
+ return true;
+ }
+
+ bool checkOut(const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda ){
+#ifdef DEBUG
+ for (size_t i=0; i<M;++i)
+ for (size_t j=0; j<N;++j)
+ if ((A[i*lda+j]>Outmax) || (A[i*lda+j]<Outmin)){
+ std::cerr<<"Error in "<<Outmin<<" <= Out["<<i<<", "<<j<<"] = "<<A[i*lda+j]<<" <= "<<Outmax<<std::endl;
+ return false;
+ }
+#endif
+ return true;
+ }
+
+ MMHelper(){}
+ //TODO: delayedField constructor has a >0 characteristic even when it is a Double/FloatDomain
+ // correct but semantically not satisfactory
+ MMHelper(const Field& F, size_t m, size_t k, size_t n, ParSeqTrait _PS) :
+ recLevel(-1),
+ FieldMin((DFElt)F.minElement()), FieldMax((DFElt)F.maxElement()),
+ Amin(FieldMin), Amax(FieldMax),
+ Bmin(FieldMin), Bmax(FieldMax),
+ Cmin(FieldMin), Cmax(FieldMax),
+ Outmin(0), Outmax(0),
+ MaxStorableValue ((DFElt)(limits<typename DelayedField::Element>::max())),
+ delayedField(F),
+ // delayedField((typename Field::Element)F.characteristic()),
+ parseq(_PS)
+ {
+ }
+
+ MMHelper(const Field& F, int w, ParSeqTrait _PS=ParSeqTrait()) :
+ recLevel(w),
+ FieldMin((DFElt)F.minElement()), FieldMax((DFElt)F.maxElement()),
+ Amin(FieldMin), Amax(FieldMax),
+ Bmin(FieldMin), Bmax(FieldMax),
+ Cmin(FieldMin), Cmax(FieldMax),
+ Outmin(0), Outmax(0),
+ MaxStorableValue ((DFElt)(limits<typename DelayedField::Element>::max())),
+ delayedField(F),
+ parseq(_PS)
+ {
+ }
+
+ // copy constructor from other Field and Algo Traits
+ template<class F2, typename AlgoT2, typename FT2, typename PS2>
+ MMHelper(MMHelper<F2, AlgoT2, FT2, PS2>& WH) :
+ recLevel(WH.recLevel),
+ FieldMin(WH.FieldMin), FieldMax(WH.FieldMax),
+ Amin(WH.Amin), Amax(WH.Amax),
+ Bmin(WH.Bmin), Bmax(WH.Bmax),
+ Cmin(WH.Cmin), Cmax(WH.Cmax),
+ Outmin(WH.Outmin), Outmax(WH.Outmax),
+ MaxStorableValue(WH.MaxStorableValue),
+ delayedField(WH.delayedField),
+ parseq(WH.parseq)
+ {
+ }
+
+ MMHelper(const Field& F, int w,
+ DFElt _Amin, DFElt _Amax,
+ DFElt _Bmin, DFElt _Bmax,
+ DFElt _Cmin, DFElt _Cmax,
+ ParSeqTrait _PS=ParSeqTrait()):
+ recLevel(w), FieldMin((DFElt)F.minElement()), FieldMax((DFElt)F.maxElement()),
+ Amin(_Amin), Amax(_Amax),
+ Bmin(_Bmin), Bmax(_Bmax),
+ Cmin(_Cmin), Cmax(_Cmax),
+ Outmin(0),Outmax(0),
+ MaxStorableValue(limits<typename DelayedField::Element>::max()),
+ delayedField(F),
+ parseq(_PS)
+ {
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const Self_t& M)
+ {
+ return out <<"Helper: "
+ <<typeid(AlgoTrait).name()<<' '
+ <<typeid(ModeTrait).name()<< ' '
+ << M.parseq <<std::endl
+ <<" DelayedField = "<<typeid(DelayedField).name()<<std::endl
+ <<" recLevel = "<<M.recLevel<<std::endl
+ <<" FieldMin = "<<M.FieldMin<<" FieldMax = "<<M.FieldMax<<std::endl
+ <<" MaxStorableValue = "<< M.MaxStorableValue <<std::endl
+ <<" Amin = "<<M.Amin<<" Amax = "<<M.Amax<<std::endl
+ <<" Bmin = "<<M.Bmin<<" Bmax = "<<M.Bmax<<std::endl
+ <<" Cmin = "<<M.Cmin<<" Cmax = "<<M.Cmax<<std::endl
+ <<" Outmin = "<<M.Outmin<<" Outmax = "<<M.Outmax<<std::endl;
+ }
+ }; // MMHelper
+
+
+ // to be used in the future, when Winograd's algorithm will be made generic wrt the ModeTrait
+ // template <class Field, class AlgoT, class ParSeqH>
+ // void copyOutBounds(const MMHelper<Field,AlgoT,ModeCategories::DelayedTag, ParSeqH> &Source,
+ // MMHelper<Field,AlgoT,ModeCategories::DelayedTag, ParSeqH> & Dest){
+ // Dest.Outmax = Source.Outmax;
+ // Dest.Outmin = Source.Outmin;
+ // }
+ // template <class Field, class AlgoT, class ParSeqH>
+ // void copyOutBounds(const MMHelper<Field,AlgoT,ModeCategories::LazyTag, ParSeqH> &Source,
+ // MMHelper<Field,AlgoT,ModeCategories::LazyTag, ParSeqH> & Dest){
+ // Dest.Outmax = Source.Outmax;
+ // Dest.Outmin = Source.Outmin;
+ // }
+ // template <class MMH1, class MMH2>
+ // void copyOutBounds(const MMH1 &Source, MMH2 & Dest){}
+ /*! StructureHelper for ftrsm
+ */
+ namespace StructureHelper {
+ struct Recursive{};
+ struct Iterative{};
+ struct Hybrid{};
+ }
+
+ /*! TRSM Helper
+ */
+ template<typename RecIterTrait = StructureHelper::Recursive, typename ParSeqTrait = ParSeqHelper::Sequential>
+ struct TRSMHelper {
+ ParSeqTrait parseq;
+ template<class Cut,class Param>
+ TRSMHelper(ParSeqHelper::Parallel<Cut,Param> _PS):parseq(_PS){}
+ TRSMHelper(ParSeqHelper::Sequential _PS):parseq(_PS){}
+ template<typename RIT, typename PST>
+ TRSMHelper(TRSMHelper<RIT,PST>& _TH):parseq(_TH.parseq){}
+
+ template<class Dom, class Algo=FFLAS::MMHelperAlgo::Winograd, class ModeT=typename FFLAS::ModeTraits<Dom>::value>
+ FFLAS::MMHelper<Dom, Algo, ModeT, ParSeqTrait> pMMH (Dom& D, size_t m, size_t k, size_t n, ParSeqTrait p) const {
+ return FFLAS::MMHelper<Dom, Algo, ModeT, ParSeqTrait>(D,m,k,n,p);
+ }
+
+ template<class Dom, class Algo=FFLAS::MMHelperAlgo::Winograd, class ModeT=typename FFLAS::ModeTraits<Dom>::value>
+ FFLAS::MMHelper<Dom, Algo, ModeT, ParSeqTrait> pMMH (Dom& D, size_t m, size_t k, size_t n) const {
+ return pMMH(D,m,k,n,this->parseq);
+ }
+
+ };
+
+
+
+
+} // FFLAS
+#endif
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_igemm/Makefile.am
similarity index 73%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_igemm/Makefile.am
index 31793b2..9b2dc8e 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_igemm/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,16 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_igemm
+
+EXTRA_DIST=igemm.doxy
+
+pkgincludesub_HEADERS= \
+ igemm_kernels.h \
+ igemm_kernels.inl \
+ igemm_tools.h \
+ igemm_tools.inl \
+ igemm.h \
+ igemm.inl
diff --git a/fflas-ffpack/fflas/fflas_igemm/igemm.doxy b/fflas-ffpack/fflas/fflas_igemm/igemm.doxy
new file mode 100644
index 0000000..b773fd1
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_igemm/igemm.doxy
@@ -0,0 +1,36 @@
+// Copyright (c) 2014 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+
+
+
+/** \ingroup fflas-ffpack
+ * \ingroup gemm
+ *
+ * \brief gemm for \c int64_t
+ * \details same as dgemm, sgemm, zgemm, but igemm
+ *
+ * @todo biblio
+ *
+ */
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/fflas/fflas_igemm/igemm.h b/fflas-ffpack/fflas/fflas_igemm/igemm.h
new file mode 100644
index 0000000..1057a71
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_igemm/igemm.h
@@ -0,0 +1,96 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2013,2014 Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * the code is inspired and adapted from the Eigen library
+ * modified by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_igemm_igemm_H
+#define __FFLASFFPACK_fflas_igemm_igemm_H
+
+namespace FFLAS {
+
+ enum number_kind {
+ zero =0,
+ one =1,
+ mone =-1,
+ other =2
+ } ;
+
+} // FFLAS
+
+#if defined(__AVX2__) or defined(__AVX__) or defined(__SSE4_1__)
+#include "igemm_kernels.h"
+#include "igemm_tools.h"
+#endif
+#include "fflas-ffpack/utils/fflas_memory.h"
+
+namespace FFLAS { namespace Protected {
+
+
+
+ template<enum FFLAS_TRANSPOSE tA, enum FFLAS_TRANSPOSE tB>
+ void igemm_colmajor( size_t rows, size_t cols, size_t depth
+ , const int64_t alpha
+ , const int64_t* A, size_t lda, const int64_t* B, size_t ldb
+ , int64_t* C, size_t ldc
+ ) ;
+
+ template<enum FFLAS_TRANSPOSE tA, enum FFLAS_TRANSPOSE tB, enum number_kind alpha_kind>
+ void igemm_colmajor( size_t rows, size_t cols, size_t depth
+ , const int64_t alpha
+ , const int64_t* A, size_t lda, const int64_t* B, size_t ldb
+ , int64_t* C, size_t ldc
+ ) ;
+
+
+
+ inline void igemm(const enum FFLAS_TRANSPOSE TransA, const enum FFLAS_TRANSPOSE TransB
+ , size_t rows, size_t cols, size_t depth
+ , const int64_t alpha
+ , const int64_t* A, size_t lda, const int64_t* B, size_t ldb
+ , const int64_t beta
+ , int64_t* C, size_t ldc
+ ) ;
+
+} // Protected
+} // FFLAS
+
+namespace FFLAS { /* igemm */
+
+ inline void igemm_(const enum FFLAS_ORDER Order, const enum FFLAS_TRANSPOSE TransA, const enum FFLAS_TRANSPOSE TransB
+ , const size_t M, const size_t N, const size_t K
+ , const int64_t alpha
+ , const int64_t *A, const size_t lda, const int64_t *B, const size_t ldb
+ , const int64_t beta
+ , int64_t *C, const size_t ldc);
+
+
+} // FFLAS
+#if defined(__AVX2__) or defined(__AVX__) or defined(__SSE4_1__)
+#include "igemm.inl"
+#endif
+#endif // __FFLASFFPACK_fflas_igemm_igemm_H
+
diff --git a/fflas-ffpack/fflas/fflas_igemm/igemm.inl b/fflas-ffpack/fflas/fflas_igemm/igemm.inl
new file mode 100644
index 0000000..d82885c
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_igemm/igemm.inl
@@ -0,0 +1,194 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2013,2014 Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * the code is inspired and adapted from the Eigen library
+ * modified by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_igemm_igemm_INL
+#define __FFLASFFPACK_fflas_igemm_igemm_INL
+
+#include "fflas-ffpack/utils/fflas_memory.h"
+
+namespace FFLAS { namespace Protected {
+
+
+ // Assume matrices A,B,C are stored in column major order
+ template<enum FFLAS_TRANSPOSE tA, enum FFLAS_TRANSPOSE tB>
+ void igemm_colmajor(size_t rows, size_t cols, size_t depth,
+ const int64_t alpha,
+ const int64_t* A, size_t lda, const int64_t* B, size_t ldb,
+ int64_t* C, size_t ldc)
+ {
+ FFLASFFPACK_check(alpha != 0);
+ switch(alpha) {
+ case 1:
+ igemm_colmajor<tA,tB,number_kind::one>(rows,cols,depth,
+ alpha,A,lda,B,ldb,
+ C,ldc);
+ break;
+ case -1:
+ igemm_colmajor<tA,tB,number_kind::mone>(rows,cols,depth,
+ alpha,A,lda,B,ldb,
+ C,ldc);
+ break;
+ default:
+ igemm_colmajor<tA,tB,number_kind::other>(rows,cols,depth,
+ alpha,A,lda,B,ldb,
+ C,ldc);
+
+
+ }
+ }
+
+ template<enum FFLAS_TRANSPOSE tA, enum FFLAS_TRANSPOSE tB, enum number_kind alpha_kind>
+ void igemm_colmajor(size_t rows, size_t cols, size_t depth,
+ const int64_t alpha,
+ const int64_t* A, size_t lda, const int64_t* B, size_t ldb,
+ int64_t* C, size_t ldc)
+ {
+
+ using simd = Simd<int64_t> ;
+ size_t mc,kc,nc;
+ mc=rows;
+ nc=cols;
+ kc=depth;
+ FFLAS::details::BlockingFactor(mc,nc,kc);
+ size_t sizeA = mc*kc;
+ size_t sizeB = kc*cols;
+ size_t sizeW = simd::vect_size*kc*_nr; // store data duplicated by the number of elements fitting in vector register
+
+ // these data must be simd::alignment byte aligned
+ int64_t *blockA, *blockB, *blockW;
+
+
+ blockA = fflas_new<int64_t>(sizeA, (Alignment)simd::alignment);
+ blockB = fflas_new<int64_t>(sizeB, (Alignment)simd::alignment);
+ blockW = fflas_new<int64_t>(sizeW, (Alignment)simd::alignment);
+
+ // For each horizontal panel of B, and corresponding vertical panel of A
+ for(size_t k2=0; k2<depth; k2+=kc){
+
+ const size_t actual_kc = std::min(k2+kc,depth)-k2;
+ FFLASFFPACK_check(kc <= depth);
+
+ // pack horizontal panel of B into sequential memory (L2 cache)
+ if (tB == FflasNoTrans)
+ FFLAS::details::pack_rhs<_nr,false>(blockB, B+k2, ldb, actual_kc, cols);
+ else
+ FFLAS::details::pack_lhs<_nr,true>(blockB, B+k2*ldb, ldb, cols, actual_kc);
+
+ // For each mc x kc block of the lhs's vertical panel...
+ for(size_t i2=0; i2<rows; i2+=mc){
+
+ const size_t actual_mc = std::min(i2+mc,rows)-i2;
+
+
+ FFLASFFPACK_check(mc <= rows);
+ // pack a chunk of the vertical panel of A into a sequential memory (L1 cache)
+ if (tA == FflasNoTrans)
+ FFLAS::details::pack_lhs<_mr,false>(blockA, A+i2+k2*lda, lda, actual_mc, actual_kc);
+ else
+ FFLAS::details::pack_rhs<_mr,true>(blockA, A+i2*lda+k2, lda, actual_kc, actual_mc);
+
+ // call block*panel kernel
+ FFLAS::details::igebp<alpha_kind>(actual_mc, cols, actual_kc
+ , alpha
+ , blockA, actual_kc, blockB, actual_kc
+ , C+i2, ldc
+ , blockW);
+ }
+ }
+
+ fflas_delete(blockA);
+ fflas_delete(blockB);
+ fflas_delete(blockW);
+ }
+
+ void igemm( const enum FFLAS_TRANSPOSE TransA, const enum FFLAS_TRANSPOSE TransB,
+ size_t rows, size_t cols, size_t depth
+ , const int64_t alpha
+ , const int64_t* A, size_t lda, const int64_t* B, size_t ldb
+ , const int64_t beta
+ , int64_t* C, size_t ldc
+ )
+ {
+ if (!rows || !cols) {
+ return ;
+ }
+
+ //! @todo use primitive (no Field()) and specialise for int64.
+ // CP: fscalin assumes C in row major mode and we are here in col major mode
+ // hence let's transpose the arguments.
+ fscalin(Givaro::ZRing<int64_t>(),cols,rows, beta,C,ldc);
+ if (!depth || alpha == 0) {
+ return ;
+ }
+ if (TransA == FflasNoTrans) {
+ if (TransB == FflasNoTrans) {
+ igemm_colmajor<FflasNoTrans,FflasNoTrans>(rows, cols, depth, alpha, A, lda, B, ldb, C, ldc);
+ }
+ else {
+ igemm_colmajor<FflasNoTrans,FflasTrans>(rows, cols, depth, alpha, A, lda, B, ldb, C, ldc);
+ }
+ }
+ else {
+ if (TransB == FflasNoTrans) {
+ igemm_colmajor<FflasTrans,FflasNoTrans>(rows, cols, depth, alpha, A, lda, B, ldb, C, ldc);
+ }
+ else {
+ igemm_colmajor<FflasTrans,FflasTrans>(rows, cols, depth, alpha, A, lda, B, ldb, C, ldc);
+ }
+ }
+ }
+
+} // Protected
+} // FFLAS
+
+
+// igemm
+
+namespace FFLAS {
+ inline void igemm_(const enum FFLAS_ORDER Order, const enum FFLAS_TRANSPOSE TransA, const enum FFLAS_TRANSPOSE TransB,
+ const size_t M, const size_t N, const size_t K,
+ const int64_t alpha,
+ const int64_t *A, const size_t lda,
+ const int64_t *B, const size_t ldb,
+ const int64_t beta,
+ int64_t *C, const size_t ldc)
+ {
+
+
+ if (Order == FflasColMajor)
+ Protected::igemm(TransA,TransB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
+ else
+ Protected::igemm(TransB,TransA,N,M,K,alpha,B,ldb,A,lda,beta,C,ldc);
+ }
+
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_igemm_igemm_INL
+
diff --git a/fflas-ffpack/fflas/fflas_igemm/igemm_kernels.h b/fflas-ffpack/fflas/fflas_igemm/igemm_kernels.h
new file mode 100644
index 0000000..e323ebb
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_igemm/igemm_kernels.h
@@ -0,0 +1,100 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2013,2014 Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * the code is inspired and adapted from the Eigen library
+ * modified by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_igemm_igemm_kernels_H
+#define __FFLASFFPACK_fflas_igemm_igemm_kernels_H
+
+namespace FFLAS { namespace details {
+
+
+ /* ************* */
+ /* GEBP KERNELS */
+ /* ************* */
+
+ template<enum number_kind K>
+ inline void igebb44(size_t i, size_t j, size_t depth, size_t pdeth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ );
+
+ template<enum number_kind K>
+ inline void igebb24(size_t i, size_t j, size_t depth, size_t pdeth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ );
+
+ template<enum number_kind K>
+ inline void igebb14(size_t i, size_t j, size_t depth, size_t pdeth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ );
+
+ template<enum number_kind K>
+ inline void igebb41(size_t i, size_t j, size_t depth, size_t pdeth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ );
+
+ template<enum number_kind K>
+ inline void igebb21(size_t i, size_t j, size_t depth, size_t pdeth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ );
+
+ template<enum number_kind K>
+ inline void igebb11(size_t i, size_t j, size_t depth, size_t pdeth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ );
+
+
+ /*************************
+ * MAIN GEBP OPERATION *
+ ************************/
+
+ template<enum number_kind K>
+ void igebp( size_t rows, size_t cols, size_t depth
+ , const int64_t alpha
+ , const int64_t* blockA, size_t lda,
+ const int64_t* blockB, size_t ldb,
+ int64_t* C, size_t ldc,
+ int64_t* blockW);
+
+} // details
+} // FFLAS
+
+#include "igemm_kernels.inl" // could be .C
+
+#endif // __FFLASFFPACK_fflas_igemm_igemm_kernels_H
diff --git a/fflas-ffpack/fflas/fflas_igemm/igemm_kernels.inl b/fflas-ffpack/fflas/fflas_igemm/igemm_kernels.inl
new file mode 100644
index 0000000..567d0f8
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_igemm/igemm_kernels.inl
@@ -0,0 +1,578 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2013,2014 Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * the code is inspired and adapted from the Eigen library
+ * modified by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_igemm_igemm_kernels_INL
+#define __FFLASFFPACK_fflas_igemm_igemm_kernels_INL
+
+
+#ifdef __AVX2__
+#define _nr 4
+#define _mr 8
+#define StepA 4
+#define StepB 4
+#elif defined(__SSE4_1__) or defined(__AVX__)
+#define _nr 4
+#define _mr 4
+#define StepA 2
+#define StepB 2
+#else
+#error "kernels not supported"
+#endif
+
+#include "fflas-ffpack/utils/fflas_memory.h"
+#include "igemm_tools.h"
+
+/********************************************************
+ * KERNEL FOR MATMUL USING SIMD OPERATION AND REGISTERS *
+ ********************************************************/
+
+namespace FFLAS { namespace details { /* kernels */
+
+ template<enum number_kind K>
+ inline void igebb44(size_t i, size_t j, size_t depth, size_t pdepth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ )
+ {
+ using simd = Simd<int64_t>;
+ using vect_t = typename simd::vect_t;
+ size_t k;
+ vect_t C0,C1,C2,C3,C4,C5,C6,C7;
+ C0 = simd::zero();
+ C1 = simd::zero();
+ C2 = simd::zero();
+ C3 = simd::zero();
+ C4 = simd::zero();
+ C5 = simd::zero();
+ C6 = simd::zero();
+ C7 = simd::zero();
+ int64_t *r0 = C+j*ldc+i;
+ int64_t *r1 = r0+ldc;
+ int64_t *r2 = r1+ldc;
+ int64_t *r3 = r2+ldc;
+ prefetch(r0+simd::vect_size);
+ prefetch(r1+simd::vect_size);
+ prefetch(r2+simd::vect_size);
+ prefetch(r3+simd::vect_size);
+ // process the loop by (_mrx4) by (4x4) matrix mul
+ for (k=0;k<pdepth;k+=4){
+ vect_t A0,A1;
+ vect_t B0,B1,B2,B3;
+ A0 = simd::load( blA+0*StepA);
+ A1 = simd::load( blA+1*StepA);
+ B0 = simd::load( blB+0*StepB);
+ B1 = simd::load( blB+1*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ B2 = simd::load( blB+2*StepB);
+ simd::fmaddxin(C4,A1,B0); // B0
+ B3 = simd::load( blB+3*StepB);
+ B0 = simd::load( blB+4*StepB);
+ simd::fmaddxin(C1,A0,B1);
+ simd::fmaddxin(C5,A1,B1); // B1
+ B1 = simd::load( blB+5*StepB);
+ simd::fmaddxin(C2,A0,B2);
+ simd::fmaddxin(C6,A1,B2); // B2
+ B2 = simd::load( blB+6*StepB);
+ simd::fmaddxin(C3,A0,B3);
+ A0 = simd::load( blA+2*StepA);
+ simd::fmaddxin(C7,A1,B3); // B3
+ A1 = simd::load( blA+3*StepA);
+ B3 = simd::load( blB+7*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ simd::fmaddxin(C4,A1,B0); // B0
+ B0 = simd::load( blB+8*StepB);
+ simd::fmaddxin(C1,A0,B1);
+ simd::fmaddxin(C5,A1,B1); // B1
+ B1 = simd::load( blB+9*StepB);
+ simd::fmaddxin(C2,A0,B2);
+ simd::fmaddxin(C6,A1,B2); // B2
+ B2 = simd::load( blB+10*StepB);
+ simd::fmaddxin(C3,A0,B3);
+ A0 = simd::load( blA+4*StepA);
+ simd::fmaddxin(C7,A1,B3); // B3
+ A1 = simd::load( blA+5*StepA);
+ B3 = simd::load( blB+11*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ simd::fmaddxin(C4,A1,B0); // B0
+ B0 = simd::load( blB+12*StepB);
+ simd::fmaddxin(C1,A0,B1);
+ simd::fmaddxin(C5,A1,B1); // B1
+ B1 = simd::load( blB+13*StepB);
+ simd::fmaddxin(C2,A0,B2);
+ simd::fmaddxin(C6,A1,B2); // B2
+ B2 = simd::load( blB+14*StepB);
+ simd::fmaddxin(C3,A0,B3);
+ A0 = simd::load( blA+6*StepA);
+ simd::fmaddxin(C7,A1,B3); // B3
+ A1 = simd::load( blA+7*StepA);
+ B3 = simd::load( blB+15*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ simd::fmaddxin(C4,A1,B0); // B0
+ simd::fmaddxin(C1,A0,B1);
+ simd::fmaddxin(C5,A1,B1); // B1
+ simd::fmaddxin(C2,A0,B2);
+ simd::fmaddxin(C6,A1,B2); // B2
+ simd::fmaddxin(C3,A0,B3);
+ simd::fmaddxin(C7,A1,B3); // B3
+ blA+= 8*StepA;
+ blB+=16*StepB;
+ }
+ // process (depth mod 4) remaining entries by (_mrx1) by (1x4) matrix mul
+ for(;k<depth;k++){
+ vect_t A0,A1;
+ vect_t B0,B1,B2,B3;
+ A0 = simd::load( blA+0*StepA);
+ A1 = simd::load( blA+1*StepA);
+ B0 = simd::load( blB+0*StepB);
+ B1 = simd::load( blB+1*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ B2 = simd::load( blB+2*StepB);
+ simd::fmaddxin(C4,A1,B0); // B0
+ B3 = simd::load( blB+3*StepB);
+ simd::fmaddxin(C1,A0,B1);
+ simd::fmaddxin(C5,A1,B1); // B1
+ simd::fmaddxin(C2,A0,B2);
+ simd::fmaddxin(C6,A1,B2); // B2
+ simd::fmaddxin(C3,A0,B3);
+ simd::fmaddxin(C7,A1,B3); // B3
+ blA+=2*StepA;
+ blB+=4*StepB;
+ }
+ vect_t R0, R1, R2, R3, R4, R5, R6;
+ vect_t A0 ;
+ A0 = simd::set1(alpha);
+ R0 = simd::loadu( r0);
+ R1 = simd::loadu( r1);
+ R2 = simd::loadu( r2);
+ R3 = simd::loadu( r3);
+ R4 = simd::loadu( r0+simd::vect_size);
+ R5 = simd::loadu( r1+simd::vect_size);
+ R6 = simd::loadu( r2+simd::vect_size);
+ if (K == number_kind::one) {
+ simd::addin(R0,C0);
+ }
+ if (K == number_kind::mone) {
+ simd::subin(R0,C0);
+ }
+ if (K == number_kind::other) {
+ simd::fmaddxin(R0,A0,C0);
+ }
+ simd::storeu(r0,R0);
+ R0 = simd::loadu( r3+simd::vect_size);
+ if (K == number_kind::one) {
+ simd::addin(R1,C1);
+ simd::addin(R2,C2);
+ simd::addin(R3,C3);
+ simd::addin(R4,C4);
+ simd::addin(R5,C5);
+ simd::addin(R6,C6);
+ simd::addin(R0,C7);
+ }
+ if (K == number_kind::mone) {
+ simd::subin(R1,C1);
+ simd::subin(R2,C2);
+ simd::subin(R3,C3);
+ simd::subin(R4,C4);
+ simd::subin(R5,C5);
+ simd::subin(R6,C6);
+ simd::subin(R0,C7);
+ }
+ if (K == number_kind::other) {
+ simd::fmaddxin(R1,A0,C1);
+ simd::fmaddxin(R2,A0,C2);
+ simd::fmaddxin(R3,A0,C3);
+ simd::fmaddxin(R4,A0,C4);
+ simd::fmaddxin(R5,A0,C5);
+ simd::fmaddxin(R6,A0,C6);
+ simd::fmaddxin(R0,A0,C7);
+ }
+ simd::storeu(r1,R1);
+ simd::storeu(r2,R2);
+ simd::storeu(r3,R3);
+ simd::storeu(r0+simd::vect_size,R4);
+ simd::storeu(r1+simd::vect_size,R5);
+ simd::storeu(r2+simd::vect_size,R6);
+ simd::storeu(r3+simd::vect_size,R0);
+
+ }
+
+
+ template<enum number_kind K>
+ inline void igebb24(size_t i, size_t j, size_t depth, size_t pdepth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ )
+ {
+ using simd = Simd<int64_t>;
+ using vect_t = typename simd::vect_t;
+
+ //cout<<"aligned 32:"<< int64_t( blA)% 32 <<endl;
+ size_t k;
+ vect_t C0,C1,C2,C3;
+ C0 = simd::zero();
+ C1 = simd::zero();
+ C2 = simd::zero();
+ C3 = simd::zero();
+ int64_t *r0 = C+j*ldc+i;
+ int64_t *r1 = r0+ldc;
+ int64_t *r2 = r1+ldc;
+ int64_t *r3 = r2+ldc;
+ // process the loop by (1/2_mrx4) by (4x4) matrix mul
+ for (k=0;k<pdepth;k+=4){
+ vect_t A0;
+ vect_t B0,B1,B2,B3;
+ A0 = simd::load( blA+0*StepA);
+ B0 = simd::load( blB+0*StepB);
+ B1 = simd::load( blB+1*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ B2 = simd::load( blB+2*StepB);
+ B3 = simd::load( blB+3*StepB);
+ B0 = simd::load( blB+4*StepB);
+ simd::fmaddxin(C1,A0,B1);
+ B1 = simd::load( blB+5*StepB);
+ simd::fmaddxin(C2,A0,B2);
+ B2 = simd::load( blB+6*StepB);
+ simd::fmaddxin(C3,A0,B3);
+ A0 = simd::load( blA+1*StepA);
+ B3 = simd::load( blB+7*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ B0 = simd::load( blB+8*StepB);
+ simd::fmaddxin(C1,A0,B1);
+ B1 = simd::load( blB+9*StepB);
+ simd::fmaddxin(C2,A0,B2);
+ B2 = simd::load( blB+10*StepB);
+ simd::fmaddxin(C3,A0,B3);
+ A0 = simd::load( blA+2*StepA);
+ B3 = simd::load( blB+11*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ B0 = simd::load( blB+12*StepB);
+ simd::fmaddxin(C1,A0,B1);
+ B1 = simd::load( blB+13*StepB);
+ simd::fmaddxin(C2,A0,B2);
+ B2 = simd::load( blB+14*StepB);
+ simd::fmaddxin(C3,A0,B3);
+ A0 = simd::load( blA+3*StepA);
+ B3 = simd::load( blB+15*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ simd::fmaddxin(C1,A0,B1);
+ simd::fmaddxin(C2,A0,B2);
+ simd::fmaddxin(C3,A0,B3);
+ blA+= 4*StepA;
+ blB+=16*StepB;
+ }
+ // process (depth mod 4) remaining entries by (1/2_mrx1) by (1x4) matrix mul
+ for(;k<depth;k++){
+ vect_t A0;
+ vect_t B0,B1,B2,B3;
+ A0 = simd::load( blA+0*StepA);
+ B0 = simd::load( blB+0*StepB);
+ B1 = simd::load( blB+1*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ B2 = simd::load( blB+2*StepB);
+ B3 = simd::load( blB+3*StepB);
+ simd::fmaddxin(C1,A0,B1);
+ simd::fmaddxin(C2,A0,B2);
+ simd::fmaddxin(C3,A0,B3);
+ blA+=StepA;
+ blB+=4*StepB;
+ }
+ vect_t R0, R1, R2, R3;
+ vect_t A0 ;
+ A0 = simd::set1(alpha);
+ R0 = simd::loadu( r0);
+ R1 = simd::loadu( r1);
+ R2 = simd::loadu( r2);
+ R3 = simd::loadu( r3);
+ if ( K == number_kind::one) {
+ simd::addin(R0,C0);
+ simd::addin(R1,C1);
+ simd::addin(R2,C2);
+ simd::addin(R3,C3);
+ }
+ if ( K == number_kind::mone) {
+ simd::subin(R0,C0);
+ simd::subin(R1,C1);
+ simd::subin(R2,C2);
+ simd::subin(R3,C3);
+ }
+ if ( K == number_kind::other) {
+ simd::fmaddxin(R0,A0,C0);
+ simd::fmaddxin(R1,A0,C1);
+ simd::fmaddxin(R2,A0,C2);
+ simd::fmaddxin(R3,A0,C3);
+ }
+ simd::storeu(r0,R0);
+ simd::storeu(r1,R1);
+ simd::storeu(r2,R2);
+ simd::storeu(r3,R3);
+
+ }
+
+
+ template<enum number_kind K>
+ inline void igebb14(size_t i, size_t j, size_t depth, size_t pdepth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ )
+ {
+ // using simd = Simd<int64_t>;
+ // using vect_t = typename simd::vect_t;
+
+ size_t k;
+ int64_t *r0 = C+j*ldc+i;
+ int64_t *r1 = r0+ldc;
+ int64_t *r2 = r1+ldc;
+ int64_t *r3 = r2+ldc;
+ for(k=0;k<depth;k++){
+ if (K == number_kind::one) {
+ r0[0]+=blA[0]*blB[0];
+ r1[0]+=blA[0]*blB[1];
+ r2[0]+=blA[0]*blB[2];
+ r3[0]+=blA[0]*blB[3];
+ }
+ if (K == number_kind::mone) {
+ r0[0]-=blA[0]*blB[0];
+ r1[0]-=blA[0]*blB[1];
+ r2[0]-=blA[0]*blB[2];
+ r3[0]-=blA[0]*blB[3];
+ }
+ if (K == number_kind::other) {
+ int64_t abla = alpha*blA[0];
+ r0[0]+=abla*blB[0];
+ r1[0]+=abla*blB[1];
+ r2[0]+=abla*blB[2];
+ r3[0]+=abla*blB[3];
+ }
+
+ blA++;
+ blB+=4;
+ }
+ }
+
+
+ template<enum number_kind K>
+ inline void igebb41(size_t i, size_t j, size_t depth, size_t pdepth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ )
+ {
+ using simd = Simd<int64_t>;
+ using vect_t = typename simd::vect_t;
+
+ size_t k;
+ vect_t C0,C4;
+ C0 = simd::zero();
+ C4 = simd::zero();
+ int64_t *r0 = C+j*ldc+i;
+ int64_t *r4 = r0+simd::vect_size;
+
+ // process the loop by (_mrx1) by (1x1) matrix mul
+ for (k=0;k<depth;k++){
+ vect_t A0,A1;
+ vect_t B0;
+ A0 = simd::load( blA+0*StepA);
+ A1 = simd::load( blA+1*StepA);
+ B0 = simd::load( blB+0*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ simd::fmaddxin(C4,A1,B0); //! bug ,B0 dans VEC_MADD_32 ?
+ blA+= 2*StepA;
+ blB+= 1*StepB;
+ }
+ vect_t R0, R4;
+ R0 = simd::loadu( r0);
+ R4 = simd::loadu( r4);
+ vect_t A0 ;
+ A0 = simd::set1(alpha);
+ if (K == number_kind::one) {
+ simd::addin(R0,C0);
+ simd::addin(R4,C4);
+ }
+ if (K == number_kind::mone) {
+ simd::subin(R0,C0);
+ simd::subin(R4,C4);
+ }
+ if (K == number_kind::other) {
+ simd::fmaddxin(R0,A0,C0);
+ simd::fmaddxin(R4,A0,C4);
+ }
+ simd::storeu(r0,R0);
+ simd::storeu(r4,R4);
+ }
+
+
+ template<enum number_kind K>
+ inline void igebb21(size_t i, size_t j, size_t depth, size_t pdepth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ )
+ {
+ using simd = Simd<int64_t>;
+ using vect_t = typename simd::vect_t;
+
+ size_t k;
+ vect_t C0;
+ C0 = simd::zero();
+ int64_t *r0 = C+j*ldc+i;
+
+ // process the loop by (1/2_mrx1) by (1x1) matrix mul
+ for (k=0;k<depth;k++){
+ vect_t A0;
+ vect_t B0;
+ A0 = simd::load( blA+0*StepA);
+ B0 = simd::load( blB+0*StepB);
+ simd::fmaddxin(C0,A0,B0);
+ blA+= 1*StepA;
+ blB+= 1*StepB;
+ }
+ vect_t R0;
+ vect_t A0 ;
+ A0 = simd::set1(alpha);
+
+ R0 = simd::loadu( r0);
+ if ( K == number_kind::one)
+ simd::addin(R0,C0);
+ if ( K == number_kind::mone)
+ simd::subin(R0,C0);
+ if ( K == number_kind::other)
+ simd::fmaddxin(R0,A0,C0);
+ simd::storeu(r0,R0);
+ }
+
+
+ template<enum number_kind K>
+ inline void igebb11(size_t i, size_t j, size_t depth, size_t pdepth
+ , const int64_t alpha
+ , const int64_t *blA, const int64_t* blB
+ , int64_t* C, size_t ldc
+ )
+ {
+ // using simd = Simd<int64_t>;
+ // using vect_t = typename simd::vect_t;
+ size_t k;
+ int64_t *r0 = C+j*ldc+i;
+ for(k=0;k<depth;k++){
+ if (K == number_kind::one)
+ r0[0]+=blA[k]*blB[k];
+ if ( K == number_kind::mone)
+ r0[0]-=blA[k]*blB[k];
+ if ( K == number_kind::other)
+ r0[0]+=alpha*blA[k]*blB[k];
+ }
+ }
+
+
+} // details
+} // FFLAS
+
+
+/*************************
+ * MAIN GEBP OPERATION *
+ ************************/
+
+namespace FFLAS { namespace details { /* main */
+
+ template<enum number_kind K>
+ void igebp( size_t rows, size_t cols, size_t depth
+ , const int64_t alpha
+ , const int64_t* blockA, size_t lda,
+ const int64_t* blockB, size_t ldb,
+ int64_t* C, size_t ldc,
+ int64_t* blockW)
+ {
+ using simd = Simd<int64_t>;
+ // using vect_t = typename simd::vect_t;
+ size_t i,j;
+ size_t prows,pcols,pdepth;
+ prows=(rows/_mr)*_mr;
+ pcols=(cols/_nr)*_nr;
+ pdepth=(depth/4)*4;
+ // process columns by pack of _nr
+ for(j=0;j<pcols;j+=_nr){
+ duplicate_vect<simd::vect_size>(blockW, blockB+j*ldb,depth*_nr);
+ prefetch(blockW);
+ // process rows by pack of _mr
+ for (i=0;i<prows;i+=_mr){
+ const int64_t* blA = blockA+i*lda;
+ prefetch(blA);
+ igebb44<K>(i, j, depth, pdepth, alpha, blA, blockW, C, ldc);
+ }
+ i=prows;
+ // process the (rows%_mr) remainings rows
+ int rem=(int)(rows-prows);
+ while (rem >0) {
+ if (rem>=(int)simd::vect_size){
+ igebb24<K>(i ,j,depth, pdepth, alpha , blockA+i*lda, blockW, C, ldc);
+ i+=simd::vect_size;
+ rem-=(int)simd::vect_size;
+ }
+ else{ // use blockB since no vectorization
+ igebb14<K>(i,j,depth, pdepth, alpha, blockA+i*lda, blockB+j*ldb, C, ldc);
+ i++;
+ rem--;
+ }
+ }
+ }
+ // process the (columns%_nr) remaining columns one by one
+ for (;j<cols;j++){
+ duplicate_vect<simd::vect_size>(blockW, blockB+j*ldb,depth);
+ prefetch(blockW);
+ // process rows by pack of _mr
+ for (i=0;i<prows;i+=_mr){
+ const int64_t* blA = blockA+i*lda;
+ prefetch(blA);
+ igebb41<K>(i, j, depth, pdepth, alpha, blA, blockW, C, ldc);
+ }
+ i=prows;
+ // process the (rows%_mr) remainings rows
+ int rem=(int)(rows-prows);
+ while (rem >0) {
+ if (rem>=(int)simd::vect_size){
+ igebb21<K>(i ,j,depth, pdepth, alpha, blockA+i*lda, blockW, C, ldc);
+ i+=simd::vect_size;
+ rem-=(int)(simd::vect_size);
+ }
+ else{ // use blockB since no vectorization
+ igebb11<K>(i,j,depth, pdepth, alpha, blockA+i*lda, blockB+j*ldb, C, ldc);
+ i++;
+ rem--;
+ }
+ }
+ }
+ }
+
+
+} // details
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_igemm_igemm_kernels_INL
diff --git a/fflas-ffpack/fflas/fflas_igemm/igemm_tools.h b/fflas-ffpack/fflas/fflas_igemm/igemm_tools.h
new file mode 100644
index 0000000..3a415cf
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_igemm/igemm_tools.h
@@ -0,0 +1,62 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2013,2014 Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * the code is inspired and adapted from the Eigen library
+ * modified by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_igemm_igemm_tools_H
+#define __FFLASFFPACK_fflas_igemm_igemm_tools_H
+
+
+/* ***** */
+/* TOOLS */
+/* ***** */
+
+namespace FFLAS { namespace details { /* tools */
+
+ // duplicate each entry into vector register
+ template<size_t N>
+ inline void duplicate_vect (int64_t* XX, const int64_t* X, size_t n){}
+
+ template<size_t k,bool transpose>
+ void pack_lhs(int64_t* XX, const int64_t* X, size_t ldx, size_t rows, size_t cols);
+
+ template<size_t k, bool transpose>
+ void pack_rhs(int64_t* XX, const int64_t* X, size_t ldx, size_t rows, size_t cols);
+
+ void gebp(size_t rows, size_t cols, size_t depth,int64_t* C, size_t ldc, const int64_t* blockA, size_t lda,
+ const int64_t* BlockB, size_t ldb, int64_t* BlockW);
+
+ void BlockingFactor(size_t& m, size_t& n, size_t& k);
+
+
+} // details
+} // FFLAS
+
+#include "igemm_tools.inl"
+
+#endif // __FFLASFFPACK_fflas_igemm_igemm_tools_H
+
diff --git a/fflas-ffpack/fflas/fflas_igemm/igemm_tools.inl b/fflas-ffpack/fflas/fflas_igemm/igemm_tools.inl
new file mode 100644
index 0000000..59dad33
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_igemm/igemm_tools.inl
@@ -0,0 +1,167 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2013,2014 Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * the code is inspired and adapted from the Eigen library
+ * modified by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_igemm_igemm_tools_INL
+#define __FFLASFFPACK_fflas_igemm_igemm_tools_INL
+
+#include "fflas-ffpack/fflas/fflas_simd.h"
+
+namespace FFLAS { namespace details {
+
+ template<>
+ inline void duplicate_vect<2>(int64_t* XX, const int64_t* X, size_t n)
+ {
+ int64_t *p=XX;
+ for(size_t i=0;i<n;i++){
+ p[0]=X[i];
+ p[1]=X[i];
+ p+=2;
+ }
+ }
+
+ template<>
+ inline void duplicate_vect<4>(int64_t* XX, const int64_t* X, size_t n)
+ {
+ int64_t *p=XX;
+ for(size_t i=0;i<n;i++){
+ p[0]=X[i];
+ p[1]=X[i];
+ p[2]=X[i];
+ p[3]=X[i];
+ p+=4;
+ }
+ }
+
+ // store each rows x k submatrices of Rhs in row major mode
+ // if k does not divide cols, the remaining column are not packed
+ template<size_t k, bool transpose>
+ void pack_rhs(int64_t* XX, const int64_t* X, size_t ldx, size_t rows, size_t cols)
+ {
+ size_t cols_by_k=(cols/k)*k;
+ size_t p=0;
+ // pack by k columns
+ for(size_t j=0;j<cols_by_k;j+=k){
+ for(size_t i=0;i<rows;i++)
+ //! @bug this is fassign
+ for (size_t l=0;l<k;l++,p++) {
+ XX[p]=X[i+(j+l)*ldx];
+ }
+ }
+ if (transpose){
+ if (cols-cols_by_k>=StepA){
+ for(size_t i=0;i<rows;i++) {
+ for (size_t l=0;l<StepA;l++,p++)
+ XX[p]=X[i+(cols_by_k+l)*ldx];
+ }
+ cols_by_k+=StepA;
+ }
+ }
+ // the remaining columns are not packed
+ for(size_t j=cols_by_k;j<cols;j++)
+ //! @bug this is fassign
+ for(size_t i=0;i<rows;i++,p++) {
+ XX[p]=X[i+j*ldx];
+ }
+ }
+
+
+ // store each k x cols submatrices of Lhs in column major mode
+ // if k does not divide rows, the remaining rows are not packed
+ template<size_t k, bool transpose>
+ void pack_lhs(int64_t* XX, const int64_t* X, size_t ldx, size_t rows, size_t cols)
+ {
+ using simd = Simd<int64_t> ;
+ size_t p=0;
+ size_t rows_by_k=(rows/k)*k;
+ // pack rows by group of k
+ for(size_t i=0;i<rows_by_k;i+=k)
+ for(size_t j=0;j<cols;j++) {
+ // for (size_t l=0;l<k;l++,p++) XX[p]=X[i+l+j*ldx];
+ FFLASFFPACK_check(k%simd::vect_size == 0);
+ //! @bug this is fassign
+ for (size_t l=0;l<k;l+= simd::vect_size, p+=simd::vect_size){
+ simd::store(&XX[p],simd::loadu(&X[i+l+j*ldx]));
+ }
+ }
+ // the remaining rows are packed by group of StepA (if possible)
+ if (!transpose) {
+ if (rows-rows_by_k>=StepA){
+ for(size_t j=0;j<cols;j++) {
+ // for (size_t l=0;l<StepA;l++,p++) XX[p]=X[rows_by_k+l+j*ldx];
+ FFLASFFPACK_check(StepA%simd::vect_size == 0);
+ for (size_t l=0;l<StepA;l+=simd::vect_size,p+=simd::vect_size){
+ simd::store(&XX[p],simd::loadu(&X[rows_by_k+l+j*ldx]));
+ }
+ }
+ rows_by_k+=StepA;
+ }
+ }
+ for(size_t i=rows_by_k;i<rows;i++) {
+ //! @bug this is fassign
+ for(size_t j=0;j<cols;j++,p++){
+ XX[p]=X[i+j*ldx];
+ }
+ }
+
+ }
+
+ inline void BlockingFactor(size_t& m, size_t& n, size_t& k)
+ {
+ int l1, l2, l3, tlb;
+ queryCacheSizes(l1,l2,l3);
+ getTLBSize(tlb);
+ /*
+ cout<<"Cache size: ";
+ cout<<"L1 ("<<l1<<") ";
+ cout<<"L2 ("<<l2<<") ";
+ cout<<"L3 ("<<l3<<") ";
+ cout<<"TLB ("<<tlb<<") ";
+ cout<<endl;
+ */
+ l2=std::max(l2,l3);
+ if (tlb)
+ l2=std::min(l2,tlb);
+ size_t kc,mc;
+ // kc * 2*(_mr+_nr) must fit in L1 cache
+ // kc * (n+mc) must fit in L2 cache and in TLB
+ size_t kdiv= 2*(_nr+_mr)*sizeof(int64_t);
+ kc = std::min(k, l1/kdiv);
+ mc = std::min(m, l2/(sizeof(int64_t) * kc));
+ k=kc;
+ m=mc;
+ //cout<<"mc="<<m<<endl;
+ //cout<<"kc="<<k<<endl;
+ }
+
+
+} // details
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_igemm_igemm_tools_INL
+
diff --git a/fflas-ffpack/fflas/fflas_level1.inl b/fflas-ffpack/fflas/fflas_level1.inl
new file mode 100644
index 0000000..811a9de
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_level1.inl
@@ -0,0 +1,431 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_level1.h
+ * @brief Vector operations
+ * or anything of \f$n\f$ complexity
+ */
+
+#ifndef __FFLASFFPACK_fflas_fflas_level1_INL
+#define __FFLASFFPACK_fflas_fflas_level1_INL
+
+namespace FFLAS {
+
+
+
+ //---------------------------------------------------------------------
+ // Level 1 routines
+ //---------------------------------------------------------------------
+
+ /** freduce
+ * \f$x \gets x mod F\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template<class Field>
+ void
+ freduce (const Field& F, const size_t n,
+ typename Field::Element_ptr X, const size_t incX);
+
+ /** freduce
+ * \f$x \gets y mod F\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param Y vector of \p Element
+ * \param incY stride of \p Y
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template<class Field>
+ void
+ freduce (const Field& F, const size_t n,
+ typename Field::ConstElement_ptr Y, const size_t incY,
+ typename Field::Element_ptr X, const size_t incX);
+
+ /** finit
+ * \f$x \gets y mod F\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param Y vector of \p OtherElement
+ * \param incY stride of \p Y
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template<class Field, class OtherElement_ptr>
+ void
+ finit (const Field& F, const size_t n,
+ const OtherElement_ptr Y, const size_t incY,
+ typename Field::Element_ptr X, const size_t incX);
+
+ /** fconvert
+ * \f$x \gets y mod F\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param Y vector of \p F
+ * \param incY stride of \p Y
+ * \param X vector in \p OtherElement
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template<class Field, class OtherElement_ptr>
+ void
+ fconvert (const Field& F, const size_t n,
+ OtherElement_ptr X, const size_t incX,
+ typename Field::ConstElement_ptr Y, const size_t incY)
+ {
+ OtherElement_ptr Xi = X ;
+ typename Field::ConstElement_ptr Yi = Y ;
+ for (; Xi < X+n*incX; Xi+=incX, Yi += incY )
+ F.convert( *Xi , *Yi);
+ }
+
+ /** fnegin
+ * \f$x \gets - x\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template<class Field>
+ void
+ fnegin (const Field& F, const size_t n,
+ typename Field::Element_ptr X, const size_t incX)
+ {
+ typename Field::Element_ptr Xi = X ;
+ for (; Xi < X+n*incX; Xi+=incX )
+ F.negin( *Xi );
+ }
+
+ /** fneg
+ * \f$x \gets - y\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * \param Y vector in \p F
+ * \param incY stride of \p Y
+ * @bug use cblas_(d)scal when possible
+ */
+ template<class Field>
+ void
+ fneg (const Field& F, const size_t n,
+ typename Field::ConstElement_ptr Y, const size_t incY,
+ typename Field::Element_ptr X, const size_t incX)
+ {
+ typename Field::Element_ptr Xi = X ;
+ typename Field::ConstElement_ptr Yi = Y ;
+ for (; Xi < X+n*incX; Xi+=incX,Yi+=incY )
+ F.neg( *Xi, *Yi );
+ }
+
+ /** \brief fzero : \f$A \gets 0 \f$.
+ * @param F field
+ * @param n number of elements to zero
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ */
+ template<class Field>
+ void
+ fzero (const Field& F, const size_t n,
+ typename Field::Element_ptr X, const size_t incX)
+ {
+ if (incX == 1) { // contigous data
+ // memset(X,(int)F.zero,n); // might be bogus ?
+ for (size_t i = 0 ; i < n ; ++i)
+ F.assign(*(X+i), F.zero);
+
+ }
+ else { // not contiguous (strided)
+ for (size_t i = 0 ; i < n ; ++i)
+ F.assign(*(X+i*incX), F.zero);
+ }
+ }
+
+ /** \brief frand : \f$A \gets random \f$.
+ * @param F field
+ * @param G randomiterator
+ * @param n number of elements to randomize
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ */
+ template<class Field, class RandIter>
+ void
+ frand (const Field& F, RandIter& G, const size_t n,
+ typename Field::Element_ptr X, const size_t incX)
+ {
+ if (incX == 1) { // contigous data
+ // memset(X,(int)F.zero,n); // might be bogus ?
+ for (size_t i = 0 ; i < n ; ++i)
+ G.random(*(X+i));
+
+ }
+ else { // not contiguous (strided)
+ for (size_t i = 0 ; i < n ; ++i)
+ G.random(*(X+i*incX));
+ }
+ }
+
+ /** \brief fiszero : test \f$X = 0 \f$.
+ * @param F field
+ * @param n vector dimension
+ * \param X vector in \p F
+ * \param incX increment of \p X
+ */
+ template<class Field>
+ bool
+ fiszero (const Field& F, const size_t n,
+ typename Field::ConstElement_ptr X, const size_t incX)
+ {
+ bool res=true;
+ for (size_t i = 0 ; i < n ; ++i)
+ res &= F.isZero (X [i*incX]);
+ return res;
+ }
+
+ /** \brief fequal : test \f$X = Y \f$.
+ * @param F field
+ * @param n vector dimension
+ * \param X vector in \p F
+ * \param incX increment of \p X
+ * \param Y vector in \p F
+ * \param incY increment of \p Y
+ */
+ template<class Field>
+ bool
+ fequal (const Field& F, const size_t n,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ typename Field::ConstElement_ptr Y, const size_t incY)
+ {
+ bool res=true;
+ for (size_t i = 0 ; i < n ; ++i)
+ res &= F.areEqual (X [i*incX], Y [i*incY]);
+ return res;
+ }
+
+ /** \brief fassign : \f$x \gets y \f$.
+ * X is preallocated
+ * @todo variant for triagular matrix
+ * @param F field
+ * @param N size of the vectors
+ * \param [out] X vector in \p F
+ * \param incX stride of \p X
+ * \param [in] Y vector in \p F
+ * \param incY stride of \p Y
+ */
+ template<class Field>
+ void
+ fassign (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr Y, const size_t incY ,
+ typename Field::Element_ptr X, const size_t incX);
+
+
+ /** fscalin
+ * \f$x \gets \alpha \cdot x\f$.
+ * @param F field
+ * @param n size of the vectors
+ * @param alpha scalar
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ * @internal
+ * @todo check if comparison with +/-1,0 is necessary.
+ */
+ template<class Field>
+ void
+ fscalin (const Field& F, const size_t n, const typename Field::Element alpha,
+ typename Field::Element_ptr X, const size_t incX);
+
+
+ /** fscal
+ * \f$y \gets \alpha \cdot x\f$.
+ * @param F field
+ * @param n size of the vectors
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param incX stride of \p X
+ * \param[out] Y vector in \p F
+ * \param incY stride of \p Y
+ * @bug use cblas_(d)scal when possible
+ * @internal
+ * @todo check if comparison with +/-1,0 is necessary.
+ */
+ template<class Field>
+ void
+ fscal (const Field& F, const size_t n
+ , const typename Field::Element alpha
+ , typename Field::ConstElement_ptr X, const size_t incX
+ , typename Field::Element_ptr Y, const size_t incY);
+
+
+
+ /** \brief faxpy : \f$y \gets \alpha \cdot x + y\f$.
+ * @param F field
+ * @param N size of the vectors
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param incX stride of \p X
+ * \param[in,out] Y vector in \p F
+ * \param incY stride of \p Y
+ */
+ template<class Field>
+ void
+ faxpy (const Field& F, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ typename Field::Element_ptr Y, const size_t incY );
+
+ /** \brief faxpby : \f$y \gets \alpha \cdot x + \beta \cdot y\f$.
+ * @param F field
+ * @param N size of the vectors
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param incX stride of \p X
+ * \param beta scalar
+ * \param[in,out] Y vector in \p F
+ * \param incY stride of \p Y
+ * \note this is a catlas function
+ */
+ template<class Field>
+ void
+ faxpby (const Field& F, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t incY );
+
+
+ /** \brief fdot: dot product \f$x^T y\f$.
+ * @param F field
+ * @param N size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * \param Y vector in \p F
+ * \param incY stride of \p Y
+ */
+ template<class Field>
+ typename Field::Element
+ fdot (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ typename Field::ConstElement_ptr Y, const size_t incY );
+
+ /** \brief fswap: \f$ X \leftrightarrow Y\f$.
+ * @bug use cblas_dswap when double
+ * @param F field
+ * @param N size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * \param Y vector in \p F
+ * \param incY stride of \p Y
+ */
+ template<class Field>
+ void
+ fswap (const Field& F, const size_t N, typename Field::Element_ptr X, const size_t incX,
+ typename Field::Element_ptr Y, const size_t incY )
+ {
+
+ typename Field::Element tmp; F.init(tmp);
+ typename Field::Element_ptr Xi = X;
+ typename Field::Element_ptr Yi=Y;
+ for (; Xi < X+N*incX; Xi+=incX, Yi+=incY ){
+ F.assign( tmp, *Xi );
+ F.assign( *Xi, *Yi );
+ F.assign( *Yi, tmp );
+ }
+ }
+
+ template <class Field>
+ void
+ pfadd (const Field & F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc, const size_t numths);
+
+ template <class Field>
+ void
+ pfsub (const Field & F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc, const size_t numths);
+
+ template <class Field>
+ void
+ pfaddin (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc, size_t numths);
+
+ template <class Field>
+ void
+ pfsubin (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc, size_t numths);
+
+
+ template <class Field>
+ void
+ fadd (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc);
+
+ template <class Field>
+ void
+ fsub (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc);
+
+ template <class Field>
+ void
+ faddin (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc);
+
+ template <class Field>
+ void
+ fsubin (const Field& F, const size_t N,
+ typename Field::Element_ptr C, const size_t incc);
+
+
+ template <class Field>
+ void
+ fadd (const Field& F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t inca,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr B, const size_t incb,
+ typename Field::Element_ptr C, const size_t incc);
+
+} // FFLAS
+
+
+#endif // __FFLASFFPACK_fflas_fflas_level1_INL
diff --git a/fflas-ffpack/fflas/fflas_level2.inl b/fflas-ffpack/fflas/fflas_level2.inl
new file mode 100644
index 0000000..637289b
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_level2.inl
@@ -0,0 +1,516 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_level2.h
+ * @brief Matrix-Vector operations
+ * or anything of \f$n^2\f$ complexity
+ */
+
+#ifndef __FFLASFFPACK_fflas_fflas_level2_INL
+#define __FFLASFFPACK_fflas_fflas_level2_INL
+
+namespace FFLAS {
+
+ //---------------------------------------------------------------------
+ // Level 2 routines
+ //---------------------------------------------------------------------
+
+ /** \brief fassign : \f$A \gets B \f$.
+ * @param F field
+ * @param m number of rows to copy
+ * @param n number of cols to copy
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * \param B vector in \p F
+ * \param ldb stride of \p B
+ */
+ template<class Field>
+ void
+ fassign (const Field& F, const size_t m, const size_t n,
+ typename Field::ConstElement_ptr B, const size_t ldb ,
+ typename Field::Element_ptr A, const size_t lda );
+
+ /** \brief fzero : \f$A \gets 0 \f$.
+ * @param F field
+ * @param m number of rows to zero
+ * @param n number of cols to zero
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @warning may be buggy if Element is larger than int
+ */
+
+ template<class Field>
+ void
+ fzero (const Field& F, const size_t m, const size_t n,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ /* use memset only with Elements that are ok */
+ if (n == lda) { // contigous data
+ // memset(A,(int) F.zero,m*n); // might be bogus ?
+ fzero(F,m*n,A,1);
+ }
+ else { // not contiguous (strided)
+ for (size_t i = 0 ; i < m ; ++i)
+ // memset(A+i*lda,(int) F.zero,n) ; // might be bogus ?
+ fzero(F,n,A+i*lda,1);
+ }
+ }
+ /** \brief frand : \f$A \gets random \f$.
+ * @param F field
+ * @param G randomiterator
+ * @param m number of rows to randomize
+ * @param n number of cols to randomize
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ */
+ template<class Field, class RandIter>
+ void
+ frand (const Field& F, RandIter& G, const size_t m, const size_t n,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ /* use memset only with Elements that are ok */
+ if (n == lda) { // contigous data
+ // memset(A,(int) F.zero,m*n); // might be bogus ?
+ frand(F,G,m*n,A,1);
+ }
+ else { // not contiguous (strided)
+ for (size_t i = 0 ; i < m ; ++i)
+ // memset(A+i*lda,(int) F.zero,n) ; // might be bogus ?
+ frand(F,G,n,A+i*lda,1);
+ }
+ }
+ /** \brief fequal : test \f$A = B \f$.
+ * @param F field
+ * @param m row dimension
+ * @param n column dimension
+ * \param A m x n matrix in \p F
+ * \param lda leading dimension of A
+ * \param B m x n matrix in \p F
+ * \param ldb leading dimension of B
+ */
+ template<class Field>
+ bool
+ fequal (const Field& F, const size_t m, const size_t n,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb)
+ {
+ bool res=true;
+ for (size_t i = 0 ; i < m ; ++i)
+ res &= fequal (F, n, A + i*lda, 1, B + i*ldb, 1);
+ return res;
+ }
+ /** \brief fiszero : test \f$A = 0 \f$.
+ * @param F field
+ * @param m row dimension
+ * @param n column dimension
+ * \param A m x n matrix in \p F
+ * \param lda leading dimension of A
+ */
+ template<class Field>
+ bool
+ fiszero (const Field& F, const size_t m, const size_t n,
+ typename Field::ConstElement_ptr A, const size_t lda)
+ {
+ bool res=true;
+ for (size_t i = 0 ; i < m ; ++i)
+ res &= fiszero (F, n, A + i*lda, 1);
+ return res;
+ }
+
+ //! creates a diagonal matrix
+ template<class Field>
+ void
+ fidentity (const Field& F, const size_t m, const size_t n,
+ typename Field::Element_ptr A, const size_t lda, const typename Field::Element & d) // =F.one...
+ {
+ fzero(F,m,n,A,lda);
+ for (size_t i = 0 ; i < std::min(m,n) ; ++i)
+ F.assign(A[i*lda+i],d);
+ }
+
+ //! creates a diagonal matrix
+ template<class Field>
+ void
+ fidentity (const Field& F, const size_t m, const size_t n,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ fzero(F,m,n,A,lda);
+ for (size_t i = 0 ; i < std::min(m,n) ; ++i)
+ F.assign(A[i*lda+i],F.one);
+ }
+
+ /** freduce
+ * \f$A \gets A mod F\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @internal
+ */
+ template<class Field>
+ void
+ freduce (const Field& F, const size_t m , const size_t n,
+ typename Field::Element_ptr A, const size_t lda);
+
+ /** freduce
+ * \f$A \gets B mod F\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * \param B matrix in \p Element
+ * \param ldb stride of \p B
+ * @internal
+ */
+ template<class Field>
+ void
+ freduce (const Field& F, const size_t m , const size_t n,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr A, const size_t lda);
+
+ /** finit
+ * \f$A \gets B mod F\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * \param B matrix in \p OtherElement
+ * \param ldb stride of \p B
+ * @internal
+ */
+ template<class Field, class OtherElement_ptr>
+ void
+ finit (const Field& F, const size_t m , const size_t n,
+ const OtherElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr A, const size_t lda);
+
+ /** fconvert
+ * \f$A \gets B mod F\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p OtherElement
+ * \param lda stride of \p A
+ * \param B matrix in \p F
+ * \param ldb stride of \p B
+ * @internal
+ */
+ template<class Field, class OtherElement_ptr>
+ void
+ fconvert (const Field& F, const size_t m , const size_t n,
+ OtherElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb)
+ {
+ //!@todo check if n == lda
+ for (size_t i = 0 ; i < m ; ++i)
+ fconvert(F,n,A+i*lda,1,B+i*ldb,1);
+ return;
+ }
+
+ /** fnegin
+ * \f$A \gets - A\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @internal
+ */
+ template<class Field>
+ void
+ fnegin (const Field& F, const size_t m , const size_t n,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ //!@todo check if n == lda
+ for (size_t i = 0 ; i < m ; ++i)
+ fnegin(F,n,A+i*lda,1);
+ return;
+ }
+
+ /** fneg
+ * \f$A \gets - B\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @internal
+ */
+ template<class Field>
+ void
+ fneg (const Field& F, const size_t m , const size_t n,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+ //!@todo check if n == lda
+ for (size_t i = 0 ; i < m ; ++i)
+ fneg(F,n,B+i*ldb,1,A+i*lda,1);
+ return;
+ }
+
+ /** fscalin
+ * \f$A \gets a \cdot A\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * @param alpha homotecie scalar
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @internal
+ */
+ template<class Field>
+ void
+ fscalin (const Field& F, const size_t m , const size_t n,
+ const typename Field::Element alpha,
+ typename Field::Element_ptr A, const size_t lda);
+
+ /** fscal
+ * \f$B \gets a \cdot A\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * @param alpha homotecie scalar
+ * \param[in] A matrix in \p F
+ * \param lda stride of \p A
+ * \param[out] B matrix in \p F
+ * \param ldb stride of \p B
+ * @internal
+ */
+ template<class Field>
+ void
+ fscal (const Field& F, const size_t m , const size_t n,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb);
+
+ /** \brief faxpy : \f$y \gets \alpha \cdot x + y\f$.
+ * @param F field
+ * @param m row dimension
+ * @param n column dimension
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param ldx leading dimension of \p X
+ * \param[in,out] Y vector in \p F
+ * \param ldy leading dimension of \p Y
+ */
+ template<class Field>
+ void
+ faxpy (const Field& F, const size_t m, const size_t n
+ , const typename Field::Element alpha,
+ typename Field::ConstElement_ptr X, const size_t ldx,
+ typename Field::Element_ptr Y, const size_t ldy );
+
+ /** \brief faxpby : \f$y \gets \alpha \cdot x + \beta \cdot y\f$.
+ * @param F field
+ * @param m row dimension
+ * @param n column dimension
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param ldx leading dimension of \p X
+ * \param beta scalar
+ * \param[in,out] Y vector in \p F
+ * \param ldy leading dimension of \p Y
+ * \note this is a catlas function
+ */
+ template<class Field>
+ void
+ faxpby (const Field& F, const size_t m, const size_t n,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr X, const size_t ldx,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t ldy );
+
+ /** \brief fmove : \f$A \gets B \f$ and \f$ B \gets 0\f$.
+ * @param F field
+ * @param m number of rows to copy
+ * @param n number of cols to copy
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * \param B vector in \p F
+ * \param ldb stride of \p B
+ */
+ template<class Field>
+ void
+ fmove (const Field& F, const size_t m, const size_t n,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb )
+ {
+ fassign(F,m,n,A,lda,B,ldb);
+ fzero(F,m,n,B,ldb);
+ }
+
+ /** fadd : matrix addition.
+ * Computes \p C = \p A + \p B.
+ * @param F field
+ * @param M rows
+ * @param N cols
+ * @param A dense matrix of size \c MxN
+ * @param lda leading dimension of \p A
+ * @param B dense matrix of size \c MxN
+ * @param ldb leading dimension of \p B
+ * @param C dense matrix of size \c MxN
+ * @param ldc leading dimension of \p C
+ */
+ template <class Field>
+ void
+ fadd (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc);
+
+
+
+ /** fsub : matrix subtraction.
+ * Computes \p C = \p A - \p B.
+ * @param F field
+ * @param M rows
+ * @param N cols
+ * @param A dense matrix of size \c MxN
+ * @param lda leading dimension of \p A
+ * @param B dense matrix of size \c MxN
+ * @param ldb leading dimension of \p B
+ * @param C dense matrix of size \c MxN
+ * @param ldc leading dimension of \p C
+ */
+ template <class Field>
+ void
+ fsub (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc);
+
+ //! fsubin
+ //! C = C - B
+ template <class Field>
+ void
+ fsubin (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc);
+
+ /** fadd : matrix addition with scaling.
+ * Computes \p C = \p A + alpha \p B.
+ * @param F field
+ * @param M rows
+ * @param N cols
+ * @param A dense matrix of size \c MxN
+ * @param lda leading dimension of \p A
+ * @param alpha some scalar
+ * @param B dense matrix of size \c MxN
+ * @param ldb leading dimension of \p B
+ * @param C dense matrix of size \c MxN
+ * @param ldc leading dimension of \p C
+ */
+ template <class Field>
+ void
+ fadd (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc);
+
+ //! faddin
+ template <class Field>
+ void
+ faddin (const Field& F, const size_t M, const size_t N,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ typename Field::Element_ptr C, const size_t ldc);
+
+
+ /** @brief finite prime Field GEneral Matrix Vector multiplication.
+ *
+ * Computes \f$Y \gets \alpha \mathrm{op}(A) X + \beta Y \f$.
+ * @param F field
+ * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
+ * @param M rows
+ * @param N cols
+ * @param alpha scalar
+ * @param A dense matrix of size \c MxN
+ * @param lda leading dimension of \p A
+ * @param X dense vector of size \c N
+ * @param incX stride of \p X
+ * @param beta scalar
+ * @param[out] Y dense vector of size \c M
+ * @param incY stride of \p Y
+ */
+ template<class Field>
+ typename Field::Element_ptr
+ fgemv (const Field& F, const FFLAS_TRANSPOSE TransA,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ const typename Field::Element beta,
+ typename Field::Element_ptr Y, const size_t incY);
+
+ /** @brief fger: rank one update of a general matrix
+ *
+ * Computes \f$A \gets \alpha x . y^T + A\f$
+ * @param F field
+ * @param M rows
+ * @param N cols
+ * @param alpha scalar
+ * @param[in,out] A dense matrix of size \c MxN and leading dimension \p lda
+ * @param lda leading dimension of \p A
+ * @param x dense vector of size \c M
+ * @param incx stride of \p X
+ * @param y dense vector of size \c N
+ * @param incy stride of \p Y
+ */
+ template<class Field>
+ void
+ fger (const Field& F, const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr x, const size_t incx,
+ typename Field::ConstElement_ptr y, const size_t incy,
+ typename Field::Element_ptr A, const size_t lda);
+
+ /** @brief ftrsv: TRiangular System solve with Vector
+ * Computes \f$ X \gets \mathrm{op}(A^{-1}) X\f$
+ * @param F field
+ * @param X vector of size \p N on a field \p F
+ * @param incX stride of \p X
+ * @param A a matrix of leading dimension \p lda and size \p N
+ * @param lda leading dimension of \p A
+ * @param N number of rows or columns of \p A according to \p TransA
+ * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
+ * \param Diag if \c Diag==FflasUnit then \p A is unit.
+ * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
+ */
+ template<class Field>
+ void
+ ftrsv (const Field& F, const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA, const FFLAS_DIAG Diag,
+ const size_t N,typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr X, int incX);
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_fflas_level2_INL
diff --git a/fflas-ffpack/fflas/fflas_level3.inl b/fflas-ffpack/fflas/fflas_level3.inl
new file mode 100644
index 0000000..96011ee
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_level3.inl
@@ -0,0 +1,399 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_level3.h
+ * @brief Matrix-Matrix operations
+ * or anything of \f$>n^2\f$ complexity.
+ */
+
+#ifndef __FFLASFFPACK_fflas_fflas_level3_INL
+#define __FFLASFFPACK_fflas_fflas_level3_INL
+
+//#include <givaro/zring.h>
+
+#include "fflas_bounds.inl"
+#include "fflas_helpers.inl"
+
+namespace FFLAS { namespace Protected {
+ //-----------------------------------------------------------------------------
+ // Some conversion functions
+ //-----------------------------------------------------------------------------
+
+
+ //---------------------------------------------------------------------
+ // Finite Field matrix => double matrix
+ // Special design for upper-triangular matrices
+ //---------------------------------------------------------------------
+ template<class Field>
+ void MatF2MatD_Triangular (const Field& F,
+ Givaro::DoubleDomain::Element_ptr S, const size_t lds,
+ typename Field::ConstElement_ptr const E,
+ const size_t lde,
+ const size_t m, const size_t n)
+ {
+
+ typename Field::ConstElement_ptr Ei = E;
+ Givaro::DoubleDomain::Element_ptr Si = S;
+ size_t i=0, j;
+ for ( ; i<m;++i, Ei+=lde, Si+=lds)
+ for ( j=i; j<n;++j)
+ F.convert(*(Si+j),*(Ei+j));
+ }
+
+ //---------------------------------------------------------------------
+ // Finite Field matrix => float matrix
+ // Special design for upper-triangular matrices
+ //---------------------------------------------------------------------
+ //! @todo do finit(...,FFLAS_TRANS,FFLAS_DIAG)
+ //! @todo do fconvert(...,FFLAS_TRANS,FFLAS_DIAG)
+ template<class Field>
+ void MatF2MatFl_Triangular (const Field& F,
+ Givaro::FloatDomain::Element_ptr S, const size_t lds,
+ typename Field::ConstElement_ptr const E,
+ const size_t lde,
+ const size_t m, const size_t n)
+ {
+
+ typename Field::ConstElement_ptr Ei = E;
+ Givaro::FloatDomain::Element_ptr Si = S;
+ size_t i=0, j;
+ for ( ; i<m;++i, Ei+=lde, Si+=lds)
+ for ( j=i; j<n;++j)
+ F.convert(*(Si+j),*(Ei+j));
+ }
+
+ /**
+ * Computes the maximal size for delaying the modular reduction
+ * in a triangular system resolution.
+ *
+ * Compute the maximal dimension k, such that a unit diagonal triangular
+ * system of dimension k can be solved over Z without overflow of the
+ * underlying floating point representation.
+ *
+ * @bib
+ * - Dumas, Giorgi, Pernet 06, arXiv:cs/0601133.
+ *
+ * \param F Finite Field/Ring of the computation
+ *
+ */
+ // Specialized routines for ftrsm
+ template <class Element> class ftrsmLeftUpperNoTransNonUnit;
+ template <class Element> class ftrsmLeftUpperNoTransUnit;
+ template <class Element> class ftrsmLeftUpperTransNonUnit;
+ template <class Element> class ftrsmLeftUpperTransUnit;
+ template <class Element> class ftrsmLeftLowerNoTransNonUnit;
+ template <class Element> class ftrsmLeftLowerNoTransUnit;
+ template <class Element> class ftrsmLeftLowerTransNonUnit;
+ template <class Element> class ftrsmLeftLowerTransUnit;
+ template <class Element> class ftrsmRightUpperNoTransNonUnit;
+ template <class Element> class ftrsmRightUpperNoTransUnit;
+ template <class Element> class ftrsmRightUpperTransNonUnit;
+ template <class Element> class ftrsmRightUpperTransUnit;
+ template <class Element> class ftrsmRightLowerNoTransNonUnit;
+ template <class Element> class ftrsmRightLowerNoTransUnit;
+ template <class Element> class ftrsmRightLowerTransNonUnit;
+ template <class Element> class ftrsmRightLowerTransUnit;
+
+ // Specialized routines for ftrmm
+ template <class Element> class ftrmmLeftUpperNoTransNonUnit;
+ template <class Element> class ftrmmLeftUpperNoTransUnit;
+ template <class Element> class ftrmmLeftUpperTransNonUnit;
+ template <class Element> class ftrmmLeftUpperTransUnit;
+ template <class Element> class ftrmmLeftLowerNoTransNonUnit;
+ template <class Element> class ftrmmLeftLowerNoTransUnit;
+ template <class Element> class ftrmmLeftLowerTransNonUnit;
+ template <class Element> class ftrmmLeftLowerTransUnit;
+ template <class Element> class ftrmmRightUpperNoTransNonUnit;
+ template <class Element> class ftrmmRightUpperNoTransUnit;
+ template <class Element> class ftrmmRightUpperTransNonUnit;
+ template <class Element> class ftrmmRightUpperTransUnit;
+ template <class Element> class ftrmmRightLowerNoTransNonUnit;
+ template <class Element> class ftrmmRightLowerNoTransUnit;
+ template <class Element> class ftrmmRightLowerTransNonUnit;
+ template <class Element> class ftrmmRightLowerTransUnit;
+
+} // protected
+} // FFLAS
+
+namespace FFLAS {
+
+ //---------------------------------------------------------------------
+ // Level 3 routines
+ //---------------------------------------------------------------------
+ // set by default for ftrsm to be thread safe
+ // undef it at your own risk, and only if you run it in sequential
+ #define __FFLAS__TRSM_READONLY
+
+ /** @brief ftrsm: <b>TR</b>iangular <b>S</b>ystem solve with <b>M</b>atrix.
+ * Computes \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ or \f$B \gets \alpha B \mathrm{op}(A^{-1})\f$.
+ * \param F field
+ * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ is computed.
+ * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
+ * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
+ * \param Diag if \c Diag==FflasUnit then \p A is unit.
+ * \param M rows of \p B
+ * \param N cols of \p B
+ * @param alpha scalar
+ * \param A triangular invertible matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
+ * @param lda leading dim of \p A
+ * @param B matrix of size \p MxN
+ * @param ldb leading dim of \p B
+ * @bug \f$\alpha\f$ must be non zero.
+ */
+ template<class Field>
+ void
+ ftrsm (const Field& F, const FFLAS_SIDE Side,
+ const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA,
+ const FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr A,
+#else
+ typename Field::Element_ptr A,
+#endif
+ const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb);
+
+ /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply.
+ * Computes \f$ B \gets \alpha \mathrm{op}(A) B\f$ or \f$B \gets \alpha B \mathrm{op}(A)\f$.
+ * @param F field
+ * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed.
+ * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
+ * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
+ * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit.
+ * \param M rows of \p B
+ * \param N cols of \p B
+ * @param alpha scalar
+ * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
+ * @param lda leading dim of \p A
+ * @param B matrix of size \p MxN
+ * @param ldb leading dim of \p B
+ */
+ template<class Field>
+ void
+ ftrmm (const Field& F, const FFLAS_SIDE Side,
+ const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA,
+ const FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb);
+
+ /** @brief fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply.
+ *
+ * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$
+ * Automatically set Winograd recursion level
+ * \param F field.
+ * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
+ * \param tb same for matrix \p B
+ * \param m see \p A
+ * \param n see \p B
+ * \param k see \p A
+ * \param alpha scalar
+ * \param beta scalar
+ * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
+ * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
+ * \param C \f$C\f$ is \f$m \times n\f$
+ * \param lda leading dimension of \p A
+ * \param ldb leading dimension of \p B
+ * \param ldc leading dimension of \p C
+ * \param w recursive levels of Winograd's algorithm are used. No argument (or -1) does auto computation of \p w.
+ * @warning \f$\alpha\f$ \e must be invertible
+ */
+ template<class Field>
+ typename Field::Element_ptr
+ fgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc);
+
+ template<typename Field>
+ typename Field::Element_ptr
+ fgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const ParSeqHelper::Sequential seq);
+
+ template<typename Field, class Cut, class Param>
+ typename Field::Element_ptr
+ fgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ const ParSeqHelper::Parallel<Cut,Param> par);
+
+ template<class Field>
+ typename Field::Element*
+ pfgemm_1D_rec( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A, const size_t lda,
+ const typename Field::Element_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element * C, const size_t ldc, size_t seuil);
+
+ template<class Field>
+ typename Field::Element*
+ pfgemm_2D_rec( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A, const size_t lda,
+ const typename Field::Element_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element * C, const size_t ldc, size_t seuil);
+
+ template<class Field>
+ typename Field::Element*
+ pfgemm_3D_rec( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A, const size_t lda,
+ const typename Field::Element_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc, size_t seuil, size_t * x);
+
+ template<class Field>
+ typename Field::Element_ptr
+ pfgemm_3D_rec2( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::Element_ptr A, const size_t lda,
+ const typename Field::Element_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc, size_t seuil, size_t *x);
+
+ /** @brief fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply.
+ *
+ * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$
+ * Version with Helper. Input and Output are not supposed to be reduced.
+ * \param F field.
+ * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
+ * \param tb same for matrix \p B
+ * \param m see \p A
+ * \param n see \p B
+ * \param k see \p A
+ * \param alpha scalar
+ * \param beta scalar
+ * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
+ * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
+ * \param C \f$C\f$ is \f$m \times n\f$
+ * \param lda leading dimension of \p A
+ * \param ldb leading dimension of \p B
+ * \param ldc leading dimension of \p C
+ * \param H helper, driving the computation (algorithm, delayed modular reduction, switch of base type, etc)
+ * @warning \f$\alpha\f$ \e must be invertible
+ */
+ // template<class Field, class AlgoT, class FieldTrait, class ParSeqTrait>
+ // inline typename Field::Element_ptr
+ // fgemm (const Field& F,
+ // const FFLAS_TRANSPOSE ta,
+ // const FFLAS_TRANSPOSE tb,
+ // const size_t m, const size_t n, const size_t k,
+ // const typename Field::Element alpha,
+ // typename Field::Element_ptr A, const size_t lda,
+ // typename Field::Element_ptr B, const size_t ldb,
+ // const typename Field::Element beta,
+ // typename Field::Element_ptr C, const size_t ldc,
+ // MMHelper<Field, AlgoT, FieldTrait, ParSeqTrait> & H);
+
+} // FFLAS
+
+#include "fflas-ffpack/paladin/parallel.h"
+
+namespace FFLAS {
+
+ /** @brief fsquare: Squares a matrix.
+ * compute \f$ C \gets \alpha \mathrm{op}(A) \mathrm{op}(A) + \beta C\f$ over a Field \p F
+ * Avoid the conversion of B
+ * @param ta if \c ta==FflasTrans, \f$\mathrm{op}(A)=A^T\f$.
+ * @param F field
+ * @param n size of \p A
+ * @param alpha scalar
+ * @param beta scalar
+ * @param A dense matrix of size \c nxn
+ * @param lda leading dimension of \p A
+ * @param C dense matrix of size \c nxn
+ * @param ldc leading dimension of \p C
+ */
+ template<class Field>
+ typename Field::Element_ptr fsquare (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t n,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A,
+ const size_t lda,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C,
+ const size_t ldc);
+
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_fflas_level3_INL
diff --git a/fflas-ffpack/fflas/fflas_pfgemm.inl b/fflas-ffpack/fflas/fflas_pfgemm.inl
new file mode 100644
index 0000000..4e04ec7
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_pfgemm.inl
@@ -0,0 +1,93 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_pfgemm.inl
+ * Copyright (C) 2013 Jean Guillaume Dumas Clement Pernet Ziad Sultan
+ *
+ * Written by Jean Guillaume Dumas Clement Pernet Ziad Sultan
+ * Time-stamp: <27 Nov 15 14:07:46 Jean-Guillaume.Dumas at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_pfgemm_INL
+#define __FFLASFFPACK_fflas_pgemm_INL
+
+#define __FFLASFFPACK_SEQPARTHRESHOLD 220
+#define __FFLASFFPACK_DIMKPENALTY 1
+
+#ifdef __FFLASFFPACK_USE_KAAPI
+#include <kaapi++>
+#include "fflas-ffpack/fflas/kaapi_routines.inl"
+#endif
+#ifdef __FFLASFFPACK_USE_OPENMP
+#include <omp.h>
+#endif
+
+
+#include "fflas-ffpack/paladin/blockcuts.inl"
+#include "fflas-ffpack/paladin/parallel.h"
+#include "fflas-ffpack/utils/timer.h"
+
+
+#include "fflas-ffpack/paladin/pfgemm_variants.inl"
+
+namespace FFLAS {
+
+ template<class Field, class ModeTrait, class Strat, class Param>
+ inline typename std::enable_if<!std::is_same<ModeTrait,ModeCategories::ConvertTo<ElementCategories::RNSElementTag> >::value,typename Field::Element_ptr>::type
+ fgemm( const Field& F,
+ const FFLAS::FFLAS_TRANSPOSE ta,
+ const FFLAS::FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, MMHelperAlgo::Winograd, ModeTrait, ParSeqHelper::Parallel<Strat,Param> > & H)
+ {
+ return pfgemm (F, ta, tb, m, n, k ,alpha, A, lda, B, ldb, beta, C, ldc, H);
+ }
+
+ // template<class Field, class ModeTrait, class Strat, class Param>
+ // inline typename std::enable_if<!std::is_same<ModeTrait,ModeCategories::ConvertTo<ElementCategories::RNSElementTag> >::value,typename Field::Element_ptr>::type
+ // fgemm( const Field& F,
+ // const FFLAS::FFLAS_TRANSPOSE ta,
+ // const FFLAS::FFLAS_TRANSPOSE tb,
+ // const size_t m,
+ // const size_t n,
+ // const size_t k,
+ // const typename Field::Element alpha,
+ // typename Field::ConstElement_ptr A, const size_t lda,
+ // typename Field::ConstElement_ptr B, const size_t ldb,
+ // const typename Field::Element beta,
+ // typename Field::Element_ptr C, const size_t ldc,
+ // MMHelper<Field, MMHelperAlgo::WinogradPar, ModeTrait, ParSeqHelper::Parallel<Strat,Param> > & H)
+ // {
+ // std::cerr<<"coucou"<<std::endl;
+ // return BLAS3::WinoPar(F, ta, tb, m, n, k ,alpha, A, lda, B, ldb, beta, C, ldc, H);
+ // }
+}
+
+#endif // __FFLASFFPACK_fflas_pfgemm_INL
+
diff --git a/fflas-ffpack/fflas/fflas_pftrsm.inl b/fflas-ffpack/fflas/fflas_pftrsm.inl
new file mode 100644
index 0000000..72bb8d5
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_pftrsm.inl
@@ -0,0 +1,164 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_pftrsm.inl
+ * Copyright (C) 2013 Ziad Sultan
+ *
+ * Written by Ziad Sultan < Ziad.Sultan at imag.fr >
+ * Time-stamp: <18 Dec 15 16:09:24 Jean-Guillaume.Dumas at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+#ifndef __FFLASFFPACK_fflas_pftrsm_INL
+#define __FFLASFFPACK_fflas_pftrsm_INL
+
+#define PTRSM_HYBRID_THRESHOLD 256
+
+#include "fflas-ffpack/paladin/parallel.h"
+
+namespace FFLAS {
+
+ template<class Field, class Cut, class Param>
+ inline typename Field::Element_ptr
+ ftrsm( const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const FFLAS::FFLAS_UPLO UpLo,
+ const FFLAS::FFLAS_TRANSPOSE TA,
+ const FFLAS::FFLAS_DIAG Diag,
+ const size_t m,
+ const size_t n,
+ const typename Field::Element alpha,
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else
+ typename Field::Element_ptr
+#endif
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ TRSMHelper <StructureHelper::Iterative, ParSeqHelper::Parallel<Cut,Param> > & H)
+ // const FFLAS::CuttingStrategy method,
+ // const size_t numThreads)
+ {
+ typedef TRSMHelper<StructureHelper::Recursive,ParSeqHelper::Sequential> seqRecHelper;
+ SYNCH_GROUP(
+ seqRecHelper SeqH(H);
+ if(Side == FflasRight){
+ FORBLOCK1D(iter, m, H.parseq,
+ TASK(MODE(READ(A[0]) CONSTREFERENCE(F, A, B, SeqH,H) READWRITE(B[iter.begin()*ldb])), ftrsm( F, Side, UpLo, TA, Diag, iter.end()-iter.begin(), n, alpha, A, lda, B + iter.begin()*ldb, ldb, SeqH));
+ );
+ } else {
+ FORBLOCK1D(iter, n, H.parseq,
+// seqRecHelper SeqH(H);
+ TASK(MODE(READ(A[0]) CONSTREFERENCE(F, A, B, SeqH,H) READWRITE(B[iter.begin()])), ftrsm(F, Side, UpLo, TA, Diag, m, iter.end()-iter.begin(), alpha, A , lda, B + iter.begin(), ldb, SeqH));
+ );
+ }
+ );
+ return B;
+ }
+ template<class Field, class Cut, class Param>
+ inline typename Field::Element_ptr
+ ftrsm( const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const FFLAS::FFLAS_UPLO UpLo,
+ const FFLAS::FFLAS_TRANSPOSE TA,
+ const FFLAS::FFLAS_DIAG Diag,
+ const size_t m,
+ const size_t n,
+ const typename Field::Element alpha,
+#ifdef __FFLAS__TRSM_READONLY
+ typename Field::ConstElement_ptr
+#else
+ typename Field::Element_ptr
+#endif
+ A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ TRSMHelper <StructureHelper::Hybrid, ParSeqHelper::Parallel<Cut, Param> > & H)
+ // const FFLAS::CuttingStrategy method,
+ // const size_t numThreads)
+ {
+
+ if(Side == FflasRight){
+
+ size_t nt = H.parseq.numthreads();
+ size_t nt_it,nt_rec;
+ if (m/PTRSM_HYBRID_THRESHOLD < nt){
+ nt_it = (int)ceil(double(m)/PTRSM_HYBRID_THRESHOLD);
+ nt_rec = (int)ceil(double(nt)/double(nt_it));
+ } else { nt_it = nt; nt_rec = 1;}
+// ForStrategy1D<size_t> iter(m, ParSeqHelper::Parallel((size_t)nt_it,H.parseq.method));
+// for (iter.begin(); ! iter.end(); ++iter) {
+ // SYNCH_GROUP(H.parseq.numthreads(),
+ SYNCH_GROUP(
+ ParSeqHelper::Parallel<Cut,Param> psh(nt_rec);
+ TRSMHelper<StructureHelper::Recursive, ParSeqHelper::Parallel<Cut,Param> > SeqH (psh);
+ H.parseq.set_numthreads(nt_it);
+ FORBLOCK1D(iter, m, H.parseq,
+// std::cerr<<"trsm_rec nt = "<<nt_rec<<std::endl;
+ TASK(MODE(READ(A) CONSTREFERENCE(F, A, B, SeqH) READWRITE(B[iter.begin()*ldb])),
+ ftrsm( F, Side, UpLo, TA, Diag, iter.end()-iter.begin(), n, alpha, A, lda, B + iter.begin()*ldb, ldb, SeqH));
+ );
+ );
+
+ } else {
+
+ size_t nt = H.parseq.numthreads();
+ size_t nt_it=nt;
+ size_t nt_rec=1;
+ if (nt_it*PTRSM_HYBRID_THRESHOLD >= n){
+ nt_it>>=1;
+ nt_rec<<=1;
+ while(nt_it*PTRSM_HYBRID_THRESHOLD >= n){
+ nt_it>>=1;
+ nt_rec<<=1;
+ }
+ nt_it<<=1;
+ nt_rec>>=1;
+ }
+
+ // if ((int)n/PTRSM_HYBRID_THRESHOLD < nt){
+ // nt_it = std::min(nt,(int)ceil(double(n)/PTRSM_HYBRID_THRESHOLD));
+ // nt_rec = ceil(double(nt)/nt_it);
+ // } else { nt_it = nt; nt_rec = 1;}
+
+ // ForStrategy1D<size_t> iter(n, ParSeqHelper::Parallel((size_t)nt_it,H.parseq.method));
+// for (iter.begin(); ! iter.end(); ++iter) {
+
+
+// std::cerr<<"trsm_rec nt_it = "<<nt_it<<std::endl;
+// std::cerr<<"trsm_rec nt_rec = "<<nt_rec<<std::endl;
+
+ SYNCH_GROUP(
+ ParSeqHelper::Parallel<Cut,Param> psh(nt_rec);
+ TRSMHelper<StructureHelper::Recursive, ParSeqHelper::Parallel<Cut,Param> > SeqH (psh);
+ H.parseq.set_numthreads(nt_it);
+ FORBLOCK1D(iter, n, H.parseq,
+ TASK(MODE(READ(A) CONSTREFERENCE(F, A, B, SeqH) READWRITE(B[iter.begin()])), ftrsm( F, Side, UpLo, TA, Diag, m, iter.end()-iter.begin(), alpha, A , lda, B + iter.begin(), ldb, SeqH));
+ );
+ );
+ }
+ return B;
+ }
+
+} // FFLAS
+
+
+#endif // __FFLASFFPACK_fflas_pftrsm_INL
diff --git a/fflas-ffpack/fflas/fflas_simd.h b/fflas-ffpack/fflas/fflas_simd.h
new file mode 100644
index 0000000..29e6756
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd.h
@@ -0,0 +1,357 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_utils_simd_H
+#define __FFLASFFPACK_utils_simd_H
+
+#define SIMD_INT 1
+
+
+//#include <x86intrin.h>
+//#include <immintrin.h> -> only define for AVX
+#include "fflas-ffpack/utils/fflas_intrinsic.h"
+#include <iostream>
+#include <type_traits>
+#include <limits>
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "fflas-ffpack/utils/debug.h"
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define INLINE __attribute__((always_inline)) inline
+#else
+#define INLINE inline
+#endif
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define CONST __attribute__((const))
+#else
+#define CONST
+#endif
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define PURE __attribute__((pure))
+#else
+#define PURE
+#endif
+
+#ifdef __FFLASFFPACK_USE_SIMD
+namespace std { // Why? - A.B. 2015-04-30
+
+inline
+std::ostream &operator<<(std::ostream &o, const __m128 &v) {
+ const float *vArray = (const float *)(&v);
+ o << '<';
+ o << vArray[0] << ',' << vArray[1];
+ o << ',';
+ o << vArray[2] << ',' << vArray[3];
+ o << '>';
+ return o;
+}
+
+inline
+std::ostream &operator<<(std::ostream &o, const __m128i &v) {
+ const int64_t *vArray = (const int64_t *)(&v);
+ o << '<';
+ o << vArray[0] << ',' << vArray[1];
+ o << '>';
+ return o;
+}
+
+inline
+std::ostream &operator<<(std::ostream &o, const __m128d &v) {
+ const double *vArray = (const double *)(&v);
+ o << '<';
+ o << vArray[0] << ',' << vArray[1];
+ o << '>';
+ return o;
+}
+} // std
+
+#ifdef __FFLASFFPACK_USE_AVX
+namespace std {
+
+inline
+std::ostream &operator<<(std::ostream &o, const __m256 &v) {
+ const float *vArray = (const float *)(&v);
+ o << '<';
+ o << vArray[0] << ',' << vArray[1] << ',' << vArray[2] << ',' << vArray[3];
+ o << ',';
+ o << vArray[4] << ',' << vArray[5] << ',' << vArray[6] << ',' << vArray[7];
+ o << '>';
+ return o;
+}
+
+inline
+std::ostream &operator<<(std::ostream &o, const __m256i &v) {
+ const int64_t *vArray = (const int64_t *)(&v);
+ o << '<';
+ o << vArray[0] << ',' << vArray[1] << ',' << vArray[2] << ',' << vArray[3];
+ o << '>';
+ return o;
+}
+
+inline
+std::ostream &operator<<(std::ostream &o, const __m256d &v) {
+ const double *vArray = (const double *)(&v);
+ o << '<';
+ o << vArray[0] << ',' << vArray[1] << ',' << vArray[2] << ',' << vArray[3];
+ o << '>';
+ return o;
+}
+} // std
+#endif // __FFLASFFPACK_USE_AVX
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+namespace FFLAS {
+template <class T> struct support_simd : public std::false_type {};
+
+#if defined(__FFLASFFPACK_USE_SIMD)
+template <> struct support_simd<float> : public std::true_type {};
+template <> struct support_simd<double> : public std::true_type {};
+#ifdef SIMD_INT
+template <> struct support_simd<int64_t> : public std::true_type {};
+template <> struct support_simd<int32_t> : public std::true_type {};
+template <> struct support_simd<int16_t> : public std::true_type {};
+#endif
+#endif
+
+} // FFLAS
+
+#define NORML_MOD(C, P, NEGP, MIN, MAX, Q, T) \
+ { \
+ Q = greater(C, MAX); \
+ T = lesser(C, MIN); \
+ Q = vand(Q, NEGP); \
+ T = vand(T, P); \
+ Q = vor(Q, T); \
+ C = add(C, Q); \
+ }
+
+#define FLOAT_MOD(C, P, INVP, Q) \
+ { \
+ Q = mul(C, INVP); \
+ Q = floor(Q); \
+ C = fnmadd(C, Q, P); \
+ }
+
+// to activate SIMD with integers
+//#define SIMD_INT
+
+template <class T> struct simdToType;
+
+/*
+ * is_simd trait
+ */
+
+template <class T> struct is_simd {
+ static const constexpr bool value = false;
+ using type = std::integral_constant<bool, false>;
+};
+
+// SSE
+#if defined(__FFLASFFPACK_USE_SIMD) // SSE or better
+#include "fflas-ffpack/fflas/fflas_simd/simd128.inl"
+
+template <> struct simdToType<__m128d> { using type = double; };
+
+template <> struct simdToType<__m128> { using type = float; };
+
+template <> struct is_simd<__m128d> {
+ static const constexpr bool value = true;
+ using type = std::integral_constant<bool, true>;
+};
+
+template <> struct is_simd<__m128> {
+ static const constexpr bool value = true;
+ using type = std::integral_constant<bool, true>;
+};
+
+#ifdef SIMD_INT
+template <> struct is_simd<__m128i> {
+ static const constexpr bool value = true;
+ using type = std::integral_constant<bool, true>;
+};
+#endif
+
+#endif // SSE
+
+// AVX
+#if defined(__FFLASFFPACK_USE_AVX) or defined(__FFLASFFPACK_USE_AVX2)
+#include "fflas-ffpack/fflas/fflas_simd/simd256.inl"
+
+template <> struct simdToType<__m256d> { using type = double; };
+
+template <> struct simdToType<__m256> { using type = float; };
+
+template <> struct is_simd<__m256d> {
+ static const constexpr bool value = true;
+ using type = std::integral_constant<bool, true>;
+};
+
+template <> struct is_simd<__m256> {
+ static const constexpr bool value = true;
+ using type = std::integral_constant<bool, true>;
+};
+
+#ifdef SIMD_INT
+template <> struct is_simd<__m256i> {
+ static const constexpr bool value = true;
+ using type = std::integral_constant<bool, true>;
+};
+#endif
+#endif // AVX
+
+/*
+ * Simd functors
+ */
+
+struct NoSimd {
+ // Test if the pointer p is multiple of alignment
+ template <class T> static constexpr bool valid(T p) { return false; }
+
+ // Test if n is multiple of vect_size
+ template <class T> static constexpr bool compliant(T n) { return false; }
+};
+
+// #if defined(__FFLASFFPACK_USE_AVX)
+
+template <class T, bool = std::is_arithmetic<T>::value, bool = std::is_integral<T>::value> struct SimdChooser {};
+
+template <class T, bool b> struct SimdChooser<T, false, b> { using value = NoSimd; };
+
+template <class T>
+struct SimdChooser<T, true, false> // floating number
+ {
+#ifdef __FFLASFFPACK_USE_AVX
+ using value = Simd256<T>;
+#elif defined(__FFLASFFPACK_USE_SSE)
+ using value = Simd128<T>;
+#else
+ using value = NoSimd;
+#endif
+};
+
+template <class T>
+struct SimdChooser<T, true, true> // integral number
+ {
+#ifdef __FFLASFFPACK_USE_AVX2
+ using value = Simd256<T>;
+#elif __FFLASFFPACK_USE_SSE
+ using value = Simd128<T>;
+#else
+ using value = NoSimd;
+#endif
+};
+
+template <class T> using Simd = typename SimdChooser<T>::value;
+
+// template <class T> struct SimdChooser<T, true> {
+// #if defined(__FFLASFFPACK_USE_AVX2)
+// typedef Simd256<T> value;
+// #else
+// typedef Simd128<T> value;
+// #endif // __FFLASFFPACK_USE_AVX2
+// };
+
+// #elif defined(__FFLASFFPACK_USE_SSE) // not AVX
+
+// template <class T> using Simd = Simd128<T>;
+
+// #endif // __FFLASFFPACK_USE_AVX
+
+#if defined(__FFLASFFPACK_USE_SIMD) // SSE or better
+
+// template <class T> struct floating_simd;
+
+// template <> struct floating_simd<float> { typedef Simd<float> value; };
+
+// template <> struct floating_simd<double> { typedef Simd<double> value; };
+
+// template <> struct floating_simd<int64_t> {
+// #if defined(__FFLASFFPACK_USE_AVX2)
+// // typedef Simd256<double> value;
+// #else
+// typedef Simd128<double> value;
+// #endif
+// };
+
+#endif
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+namespace FFLAS { /* print helper */
+
+// need friend ?
+template <class simdT>
+inline std::ostream &print(std::ostream &os, const typename simdT::vect_t &P) {
+ typename simdT::scalar_t p[simdT::vect_size];
+ os << '<';
+ simdT::store(p, P);
+ for (size_t i = 0; i < simdT::vect_size; ++i) {
+ os << p[i];
+ if (i < simdT::vect_size - 1)
+ os << '|';
+ }
+ os << '>';
+
+ return os;
+}
+
+} // FFLAS
+
+namespace std {
+// cannot be instanciated, T is not déductible
+template <class T>
+inline std::ostream &operator<<(std::ostream &o, const typename Simd128<T>::vect_t &v) {
+ FFLAS::print<Simd128<T>>(o, v);
+ return o;
+}
+} // std
+
+#ifdef __FFLASFFPACK_USE_AVX
+namespace std {
+// cannot be instanciated, T is not déductible
+template <class T>
+inline std::ostream &operator<<(std::ostream &o, const typename Simd256<T>::vect_t &v) {
+ FFLAS::print(o, v);
+ return o;
+}
+}
+#endif // __FFLASFFPACK_USE_AVX
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+#undef INLINE
+#undef PURE
+#undef CONST
+#undef SIMD_INT
+
+#endif /* __FFLASFFPACK_utils_simd_H */
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_simd/Makefile.am
similarity index 60%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_simd/Makefile.am
index 31793b2..a6aa08f 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_simd/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,29 @@
# ========LICENCE========
#/
-#
-# Nothing yet
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_simd
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+EXTRA_DIST=simd.doxy
+
+SIMD128= simd128.inl \
+ simd128_double.inl \
+ simd128_float.inl \
+ simd128_int16.inl \
+ simd128_int32.inl \
+ simd128_int64.inl
+
+SIMD256= simd256.inl \
+ simd256_double.inl \
+ simd256_float.inl \
+ simd256_int16.inl \
+ simd256_int32.inl \
+ simd256_int64.inl
+
+SIMD_MOD= simd_modular.inl
+
+
+pkgincludesub_HEADERS= \
+ $(SIMD128) \
+ $(SIMD256)\
+ $(SIMD_MOD)
diff --git a/fflas-ffpack/fflas/fflas_simd/simd.doxy b/fflas-ffpack/fflas/fflas_simd/simd.doxy
new file mode 100644
index 0000000..0c8f5b0
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd.doxy
@@ -0,0 +1,35 @@
+// Copyright (c) 2014 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+
+
+/** \ingroup fflas-ffpack
+ * \defgroup simd SIMD wrapper
+ *
+ * \brief wraps SIMD functions
+ * Supporst SSE4.1, AVX, AVX2.
+ *
+ * @todo biblio
+ *
+ */
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/field/modular-positive.h b/fflas-ffpack/fflas/fflas_simd/simd128.inl
similarity index 55%
copy from fflas-ffpack/field/modular-positive.h
copy to fflas-ffpack/fflas/fflas_simd/simd128.inl
index 9e12653..81bffef 100644
--- a/fflas-ffpack/field/modular-positive.h
+++ b/fflas-ffpack/fflas/fflas_simd/simd128.inl
@@ -1,28 +1,25 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* fflas-ffpack/modular-positive.h
- * Copyright (C) 2003 Pascal Giorgi
- * 2008 Clement Pernet
- * Written by Clement Pernet <clement.pernet at gmail.com>
- * Pascal Giorgi <pascal.giorgi at ens-lyon.fr>
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
*
- * ------------------------------------
*
- *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -30,25 +27,25 @@
*.
*/
-#ifndef __FFLASFFPACK_modular_positive_H
-#define __FFLASFFPACK_modular_positive_H
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd128_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd128_INL
+
+template <bool ArithType, bool Int, bool Signed, int Size> struct Simd128_impl;
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
+#include "simd128_float.inl"
+#include "simd128_double.inl"
-namespace FFPACK {
+#ifdef SIMD_INT
+// Trop d'instructions SSE manquantes pour les int8_t
- template <class Element>
- class Modular;
+#include "simd128_int16.inl"
+#include "simd128_int32.inl"
+#include "simd128_int64.inl"
-} // FFPACK
+#endif //#ifdef SIMD_INT
-#include "fflas-ffpack/field/modular-float.h"
-#include "fflas-ffpack/field/modular-double.h"
-#include "fflas-ffpack/field/modular-int32.h"
-#ifdef __x86_64__
-#include "fflas-ffpack/field/modular-int64.h"
-#endif
+template <class T>
+using Simd128 =
+ Simd128_impl<std::is_arithmetic<T>::value, std::is_integral<T>::value, std::is_signed<T>::value, sizeof(T)>;
-#endif // __FFLASFFPACK_modular_positive_H
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd128_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd128_double.inl b/fflas-ffpack/fflas/fflas_simd/simd128_double.inl
new file mode 100644
index 0000000..ecf68a6
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd128_double.inl
@@ -0,0 +1,354 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd128_double_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd128_double_INL
+
+/*
+ * Simd128 specialized for double
+ */
+template <> struct Simd128_impl<true, false, true, 8> {
+#if defined(__FFLASFFPACK_USE_SIMD)
+
+ /*
+ * alias to 128 bit simd register
+ */
+ using vect_t = __m128d;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = double;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 2;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 16;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Return vector of type vect_t with all elements set to zero.
+ * Return [0,0]
+ */
+ static INLINE CONST vect_t zero() { return _mm_setzero_pd(); }
+
+ /*
+ * Broadcast double-precision (64-bit) floating-point value a to all elements of vect_t.
+ * Return [x,x]
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm_set1_pd(x); }
+
+ /*
+ * Set packed double-precision (64-bit) floating-point elements in vect_t with the supplied values.
+ * Return [x1,x2]
+ */
+ static INLINE CONST vect_t set(const scalar_t x1, const scalar_t x2) { return _mm_set_pd(x2, x1); }
+
+ /*
+ * Gather double-precision (64-bit) floating-point elements with indexes idx[0], ..., idx[3] from the address p in
+ * vect_t.
+ * Return [p[idx[0]], p[idx[1]]]
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ return _mm_set_pd(p[idx[1]], p[idx[0]]);
+ }
+
+ /*
+ * Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into vect_t.
+ * p must be aligned on a 16-byte boundary or a general-protection exception will be generated.
+ * Return [p[0], p[1]]
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) { return _mm_load_pd(p); }
+
+ /*
+ * Load 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from memory into vect_t.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0], p[1]]
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) { return _mm_loadu_pd(p); }
+
+ /*
+ * Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from p into memory.
+ * p must be aligned on a 16-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, const vect_t v) { _mm_store_pd(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from p into memory.
+ * p must be aligned on a 16-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void storeu(const scalar_t *p, const vect_t v) { _mm_storeu_pd(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a into memory using
+ * a non-temporal memory hint.
+ * p must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+ */
+ static INLINE void stream(const scalar_t *p, const vect_t v) { _mm_stream_pd(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [a0+b0, a1+b1]
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm_add_pd(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit)
+ * floating-point elements in a, and store the results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [a0-b0, a1-b1]
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm_sub_pd(a, b); }
+
+ static INLINE CONST vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [a0*b0, a1*b1]
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return _mm_mul_pd(a, b); }
+
+ static INLINE CONST vect_t mulin(vect_t &a, const vect_t b) { return a = mul(a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1], [b0, b1], [c0, c1]
+ * Return : [a0*b0+c0, a1*b1+c1]
+ */
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm_fmadd_pd(a, b, c);
+#else
+ return add(c, mul(a, b));
+#endif
+ }
+
+ static INLINE CONST vect_t fmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result
+ * to packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1], [b0, b1], [c0, c1]
+ * Return : [-(a0*b0)+c0, -(a1*b1)+c1]
+ */
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm_fnmadd_pd(a, b, c);
+#else
+ return sub(c, mul(a, b));
+#endif
+ }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result
+ * to packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1], [b0, b1], [c0, c1]
+ * Return : [-(a0*b0)+c0, -(a1*b1)+c1]
+ */
+ static INLINE CONST vect_t nmadd(const vect_t c, const vect_t a, const vect_t b) { return fnmadd(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmadd(c, a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from
+ * the intermediate result, and store the results in vect_t.
+ * Args : [a0, a1], [b0, b1], [c0, c1]
+ * Return : [a0*b0-c0, a1*b1-c1]
+ */
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm_fmsub_pd(a, b, c);
+#else
+ return sub(mul(a, b), c);
+#endif
+ }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from
+ * the intermediate result, and store the results in vect_t.
+ * Args : [a0, a1], [b0, b1], [c0, c1]
+ * Return : [a0*b0-c0, a1*b1-c1]
+ */
+ static INLINE CONST vect_t msub(const vect_t c, const vect_t a, const vect_t b) { return fmsub(c, a, b); }
+
+ static INLINE CONST vect_t fmsubin(vect_t &c, const vect_t a, const vect_t b) { return c = fmsub(c, a, b); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results
+ in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [(a0==b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1==b1) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm_cmpeq_pd(a, b); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for lesser-than, and store the
+ results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [(a0<b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1<b1) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm_cmplt_pd(a, b); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for lesser or equal than, and store
+ the results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [(a0<=b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1<=b1) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return _mm_cmple_pd(a, b); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for greater-than, and store the
+ results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [(a0>b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1>b1) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm_cmpgt_pd(a, b); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for greater or equal than, and store
+ the results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [(a0>=b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1>=b1) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return _mm_cmpge_pd(a, b); }
+
+ /*
+ * Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [a0 AND b0, a1 AND b1]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm_and_pd(a, b); }
+
+ /*
+ * Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [a0 OR b0, a1 OR b1]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm_or_pd(a, b); }
+
+ /*
+ * Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [a0 XOR b0, a1 XOR b1]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm_xor_pd(a, b); }
+
+ /*
+ * Compute the bitwise AND NOT of packed double-precision (64-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [a0 AND NOT b0, a1 AND NOT b1]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm_andnot_pd(a, b); }
+
+ /*
+ * Round the packed double-precision (64-bit) floating-point elements in a down to an integer value, and store the
+ * results as packed double-precision floating-point elements in vect_t.
+ * Args : [a0, a1]
+ * Return : [floor(a0), floor(a1)]
+ */
+ static INLINE CONST vect_t floor(const vect_t a) { return _mm_floor_pd(a); }
+
+ /*
+ * Round the packed double-precision (64-bit) floating-point elements in a up to an integer value, and store the
+ * results as packed double-precision floating-point elements in vect_t.
+ * Args : [a0, a1]
+ * Return : [ceil(a0), ceil(a1)]
+ */
+ static INLINE CONST vect_t ceil(const vect_t a) { return _mm_ceil_pd(a); }
+
+ /*
+ * Round the packed double-precision (64-bit) floating-point elements in a, and store the results as packed
+ * double-precision floating-point elements in vect_t.
+ * Args : [a0, a1]
+ * Return : [round(a0), round(a1)]
+ */
+ static INLINE CONST vect_t round(const vect_t a) {
+ return _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ }
+
+ /*
+ * Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in a and b, and pack the
+ * results in vect_t.
+ * Args : [a0, a1], [b0, b1]
+ * Return : [a0+a1, b0+b1]
+ */
+ static INLINE CONST vect_t hadd(const vect_t a, const vect_t b) { return _mm_hadd_pd(a, b); }
+
+ /*
+ * Horizontally add double-precision (64-bit) floating-point elements in a.
+ * Args : [a0, a1]
+ * Return : a0+a1
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ return ((const scalar_t *)&a)[0] + ((const scalar_t *)&a)[1];
+ }
+
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const vect_t &INVP, const vect_t &NEGP, const vect_t &MIN,
+ const vect_t &MAX, vect_t &Q, vect_t &T) {
+ FLOAT_MOD(C, P, INVP, Q);
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+
+ return C;
+ }
+
+#else // __AVX__
+#error "You need SSE instructions to perform 128bits operations on double"
+#endif
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd128_double_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd128_float.inl b/fflas-ffpack/fflas/fflas_simd/simd128_float.inl
new file mode 100644
index 0000000..04315a1
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd128_float.inl
@@ -0,0 +1,373 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd128_float_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd128_float_INL
+
+/*
+ * Simd128 specialized for float
+ */
+template <> struct Simd128_impl<true, false, true, 4> {
+#if defined(__FFLASFFPACK_USE_SIMD)
+
+ /*
+ * alias to 128 bit simd register
+ */
+ using vect_t = __m128;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = float;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 4;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 16;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0,0,0]
+ */
+ static INLINE CONST vect_t zero() { return _mm_setzero_ps(); }
+
+ /*
+ * Broadcast single-precision (32-bit) floating-point value x to all elements of vect_t.
+ * Return [x,x,x,x]
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) {
+#ifdef __AVX__
+ // return _mm_broadcast_ss(&x);
+ return _mm_set1_ps(x);
+#else
+ return _mm_set1_ps(x);
+#endif
+ }
+
+ /*
+ * Set packed single-precision (32-bit) floating-point elements in vect_t with the supplied values.
+ * Return [x1,x2,x3,x4]
+ */
+ static INLINE CONST vect_t set(const scalar_t x1, const scalar_t x2, const scalar_t x3, const scalar_t x4) {
+ return _mm_set_ps(x4, x3, x2, x1);
+ }
+
+ /*
+ * Gather single-precision (32-bit) floating-point elements with indexes idx[0], ..., idx[3] from the address p in
+ * vect_t.
+ * Return [p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]]]
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ return _mm_set_ps(p[idx[3]], p[idx[2]], p[idx[1]], p[idx[0]]);
+ }
+
+ /*
+ * Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into vect_t.
+ * p must be aligned on a 16-byte boundary or a general-protection exception will be generated.
+ * Return [p[0], p[1], p[2], p[3]]
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) { return _mm_load_ps(p); }
+
+ /*
+ * Load 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from memory into vect_t.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0], p[1], p[2], p[3]]
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) { return _mm_loadu_ps(p); }
+
+ /*
+ * Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a into memory.
+ * p must be aligned on a 16-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, const vect_t v) { _mm_store_ps(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Store 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, const vect_t v) { _mm_storeu_ps(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Store 128-bits (composed of 4 packed double-precision (32-bit) floating-point elements) from a into memory using
+ * a non-temporal memory hint.
+ * p must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+ */
+ static INLINE void stream(const scalar_t *p, const vect_t v) { _mm_stream_ps(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0+b0, a1+b1, a2+b2, a3+b3]
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm_add_ps(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit)
+ * floating-point elements in a, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0-b0, a1-b1, a2-b2, a3-b3]
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm_sub_ps(a, b); }
+
+ static INLINE CONST vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3]
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return _mm_mul_ps(a, b); }
+
+ static INLINE CONST vect_t mulin(vect_t &a, const vect_t b) { return a = mul(a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3]
+ */
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm_fmadd_ps(a, b, c);
+#else
+ return add(c, mul(a, b));
+#endif
+ }
+
+ static INLINE CONST vect_t fmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3, a4*b4+c4]
+ */
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm_fnmadd_ps(a, b, c);
+#else
+ return sub(c, mul(a, b));
+#endif
+ }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3, a4*b4+c4]
+ */
+ static INLINE CONST vect_t nmadd(const vect_t c, const vect_t a, const vect_t b) { return fnmadd(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmadd(c, a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from
+ * the intermediate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0-c0, a1*b1-c1, a2*b2-c2, a3*b3-c3]
+ */
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm_fmsub_ps(a, b, c);
+#else
+ return sub(mul(a, b), c);
+#endif
+ }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from
+ * the intermediate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0-c0, a1*b1-c1, a2*b2-c2, a3*b3-c3]
+ */
+ static INLINE CONST vect_t msub(const vect_t c, const vect_t a, const vect_t b) { return fmsub(c, a, b); }
+
+ static INLINE CONST vect_t fmsubin(vect_t &c, const vect_t a, const vect_t b) { return c = fmsub(c, a, b); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results
+ in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0==b0) ? 0xFFFFFFFF : 0,
+ (a1==b1) ? 0xFFFFFFFF : 0,
+ (a2==b2) ? 0xFFFFFFFF : 0,
+ (a3==b3) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm_cmpeq_ps(a, b); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for lesser-than, and store the
+ results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0<b0) ? 0xFFFFFFFF : 0,
+ (a1<b1) ? 0xFFFFFFFF : 0,
+ (a2<b2) ? 0xFFFFFFFF : 0,
+ (a3<b3) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm_cmplt_ps(a, b); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for lesser or equal than, and store
+ the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0<=b0) ? 0xFFFFFFFF : 0,
+ (a1<=b1) ? 0xFFFFFFFF : 0,
+ (a2<=b2) ? 0xFFFFFFFF : 0,
+ (a3<=b3) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return _mm_cmple_ps(a, b); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the
+ results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0>b0) ? 0xFFFFFFFF : 0,
+ (a1>b1) ? 0xFFFFFFFF : 0,
+ (a2>b2) ? 0xFFFFFFFF : 0,
+ (a3>b3) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm_cmpgt_ps(a, b); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for greater or equal than, and store
+ the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0>=b0) ? 0xFFFFFFFF : 0,
+ (a1>=b1) ? 0xFFFFFFFF : 0,
+ (a2>=b2) ? 0xFFFFFFFF : 0,
+ (a3>=b3) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return _mm_cmpge_ps(a, b); }
+
+ /*
+ * Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm_and_ps(a, b); }
+
+ /*
+ * Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm_or_ps(a, b); }
+
+ /*
+ * Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm_xor_ps(a, b); }
+
+ /*
+ * Compute the bitwise AND NOT of packed single-precision (32-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0 ANDNOT b0, a1 ANDNOT b1, a2 ANDNOT b2, a3 ANDNOT b3]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm_andnot_ps(a, b); }
+
+ /*
+ * Round the packed single-precision (32-bit) floating-point elements in a down to an integer value, and store the
+ * results as packed double-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3]
+ * Return : [floor(a0), floor(a1), floor(a2), floor(a3)]
+ */
+ static INLINE CONST vect_t floor(const vect_t a) { return _mm_floor_ps(a); }
+
+ /*
+ * Round the packed single-precision (32-bit) floating-point elements in a up to an integer value, and store the
+ * results as packed single-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3]
+ * Return : [ceil(a0), ceil(a1), ceil(a2), ceil(a3)]
+ */
+ static INLINE CONST vect_t ceil(const vect_t a) { return _mm_ceil_ps(a); }
+
+ /*
+ * Round the packed single-precision (32-bit) floating-point elements in a, and store the results as packed
+ * single-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3]
+ * Return : [round(a0), round(a1), round(a2), round(a3)]
+ */
+ static INLINE CONST vect_t round(const vect_t a) {
+ return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ }
+
+ /*
+ * Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in a and b, and pack the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0+a1, b0+b1, a2+a3, b2+b3]
+ */
+ static INLINE CONST vect_t hadd(const vect_t a, const vect_t b) { return _mm_hadd_ps(a, b); }
+
+ /*
+ * Horizontally add single-precision (32-bit) floating-point elements in a.
+ * Args : [a0, a1, a2, a3]
+ * Return : a0+a1+a2+a3
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ return ((const scalar_t *)&a)[0] + ((const scalar_t *)&a)[1] + ((const scalar_t *)&a)[2] +
+ ((const scalar_t *)&a)[3];
+ }
+
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const vect_t &INVP, const vect_t &NEGP, const vect_t &MIN,
+ const vect_t &MAX, vect_t &Q, vect_t &T) {
+ FLOAT_MOD(C, P, INVP, Q);
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+
+ return C;
+ }
+#else // __AVX__
+#error "You need SSE instructions to perform 128bits operations on double"
+#endif // __FFLASFFPACK_USE_SIMD
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd128_float_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd128_int16.inl b/fflas-ffpack/fflas/fflas_simd/simd128_int16.inl
new file mode 100644
index 0000000..924cc57
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd128_int16.inl
@@ -0,0 +1,425 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd128_int16_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd128_int16_INL
+
+/*
+ * Simd128 specialized for int16_t
+ */
+template <> struct Simd128_impl<true, true, true, 2> {
+#if defined(__FFLASFFPACK_USE_SIMD)
+ /*
+ * alias to 128 bit simd register
+ */
+ using vect_t = __m128i;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = int16_t;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 8;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 16;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Converter from vect_t to a tab.
+ * exple:
+ * Converter conv;
+ * conv.v = a;
+ * scalart_t x = conv.t[1]
+ */
+ union Converter {
+ vect_t v;
+ scalar_t t[vect_size];
+ };
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0,0,0,0,0,0,0] int16_t
+ */
+ static INLINE CONST vect_t zero() { return _mm_setzero_si128(); }
+
+ /*
+ * Broadcast 16-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x,x,x,x,x,x,x,x] int16_t
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm_set1_epi16(x); }
+
+ /*
+ * Broadcast 16-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x0,x1,x2,x3,x4,x5,x6,x7] int16_t
+ */
+ static INLINE CONST vect_t set(const scalar_t x0, const scalar_t x1, const scalar_t x2, const scalar_t x3,
+ const scalar_t x4, const scalar_t x5, const scalar_t x6, const scalar_t x7) {
+ return _mm_set_epi16(x7, x6, x5, x4, x3, x2, x1, x0);
+ }
+
+ /*
+ * Gather 16-bit integer elements with indexes idx[0], ..., idx[7] from the address p in vect_t.
+ * Return [p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]],
+ p[idx[4]], p[idx[5]], p[idx[6]], p[idx[7]]] int16_t
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ return set(p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]], p[idx[4]], p[idx[5]], p[idx[6]], p[idx[7]]);
+ }
+
+ /*
+ * Load 128-bits of integer data from memory into dst.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int16_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm_load_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 128-bits of integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int16_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm_loadu_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm_store_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, vect_t v) {
+ _mm_storeu_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory using a non-temporal memory hint.
+ * p must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+ */
+ // static INLINE void stream(const scalar_t *p, const vect_t v) { _mm_stream_si128(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Shift packed 32-bit integers in a left by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ * Return : [a0 << s, a1 << s, a2 << s, a3 << s, a4 << s, a5 << s, a6 << s, a7 << s] int16_t
+ */
+ static INLINE CONST vect_t sll(const vect_t a, const int s) { return _mm_slli_epi16(a, s); }
+
+ /*
+ * Shift packed 32-bit integers in a right by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ * Return : [a0 >> s, a1 >> s, a2 >> s, a3 >> s, a4 >> s, a5 >> s, a6 >> s, a7 >> s] int16_t
+ */
+ static INLINE CONST vect_t srl(const vect_t a, const int s) { return _mm_srli_epi16(a, s); }
+
+
+ static INLINE CONST vect_t sra(const vect_t a, const scalar_t s) { return _mm_sra_epi16(a, set1(s)); }
+
+ /*
+ * Add packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [a0+b0, a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7] int16_t
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm_add_epi16(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [a0-b0, a1-b1, a2-b2, a3-b3, a4-b4, a5-b5, a6-b6, a7-b7] int16_t
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm_sub_epi16(a, b); }
+
+ static INLINE CONST vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits
+ of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [a0*b0 mod 2^16-1, a1*b1 mod 2^16-1, a2*b2 mod 2^16-1, a3*b3 mod 2^16-1,
+ a4*b4 mod 2^16-1, a5*b5 mod 2^16-1, a6*b6 mod 2^16-1, a7*b7 mod 2^16-1] int16_t
+ */
+ static INLINE CONST vect_t mullo(const vect_t a, const vect_t b) { return _mm_mullo_epi16(a, b); }
+
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return mullo(a, b); }
+
+ /*
+ * Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return :
+ */
+ static INLINE CONST vect_t mulhi(const vect_t a, const vect_t b) { return _mm_mulhi_epi16(a, b); }
+
+ /*
+ * Multiply the low 8-bit integers from each packed 16-bit element in a and b, and store the signed 16-bit results
+ in dst.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3, a4*b4, a5*b5, a6*b6, a7*b7] int16_t
+ */
+ static INLINE CONST vect_t mulx(vect_t a, vect_t b) {
+ vect_t mask = set1(0x00FF); // ???
+ a = vand(a, mask);
+ b = vand(b, mask);
+ return _mm_mullo_epi16(a, b);
+ }
+
+ /*
+ *
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ [c0, c1, c2, c3, c4, c5, c6, c7] int16_t
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3, a4*b4+c4, a5*b5+c5, a6*b6+c6, a7*b7+c7] int16_t
+ */
+ static INLINE CONST vect_t fmaddx(vect_t c, const vect_t a, const vect_t b) { return add(c, mulx(a, b)); }
+
+ static INLINE CONST vect_t fmadd(vect_t c, const vect_t a, const vect_t b) { return add(c, mul(a, b)); }
+
+ /*
+ * Compare packed 16-bits in a and b for equality, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [(a0==b0) ? 0xFFFF : 0, (a1==b1) ? 0xFFFF : 0,
+ (a2==b2) ? 0xFFFF : 0, (a3==b3) ? 0xFFFF : 0,
+ (a4==b4) ? 0xFFFF : 0, (a5==b5) ? 0xFFFF : 0,
+ (a6==b6) ? 0xFFFF : 0, (a7==b7) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm_cmpeq_epi16(a, b); }
+
+ /*
+ * Compare packed 16-bits in a and b for greater-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [(a0>b0) ? 0xFFFF : 0, (a1>b1) ? 0xFFFF : 0,
+ (a2>b2) ? 0xFFFF : 0, (a3>b3) ? 0xFFFF : 0,
+ (a4>b4) ? 0xFFFF : 0, (a5>b5) ? 0xFFFF : 0,
+ (a6>b6) ? 0xFFFF : 0, (a7>b7) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm_cmpgt_epi16(a, b); }
+
+ /*
+ * Compare packed 16-bits in a and b for lesser-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [(a0<b0) ? 0xFFFF : 0, (a1<b1) ? 0xFFFF : 0,
+ (a2<b2) ? 0xFFFF : 0, (a3<b3) ? 0xFFFF : 0,
+ (a4<b4) ? 0xFFFF : 0, (a5<b5) ? 0xFFFF : 0,
+ (a6<b6) ? 0xFFFF : 0, (a7<b7) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm_cmpgt_epi16(b, a); }
+
+ /*
+ * Compare packed 16-bits in a and b for greater or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [(a0>=b0) ? 0xFFFF : 0, (a1>=b1) ? 0xFFFF : 0,
+ (a2>=b2) ? 0xFFFF : 0, (a3>=b3) ? 0xFFFF : 0,
+ (a4>=b4) ? 0xFFFF : 0, (a5>=b5) ? 0xFFFF : 0,
+ (a6>=b6) ? 0xFFFF : 0, (a7>=b7) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ /*
+ * Compare packed 16-bits in a and b for lesser or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int16_t
+ * Return : [(a0<=b0) ? 0xFFFF : 0, (a1<=b1) ? 0xFFFF : 0,
+ (a2<=b2) ? 0xFFFF : 0, (a3<=b3) ? 0xFFFF : 0,
+ (a4<=b4) ? 0xFFFF : 0, (a5<=b5) ? 0xFFFF : 0,
+ (a6<=b6) ? 0xFFFF : 0, (a7<=b7) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+
+ /*
+ * Compute the bitwise AND of packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3, a4 AND b4, a5 AND b5, a6 AND b6, a7 AND b7]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm_and_si128(b, a); }
+
+ /*
+ * Compute the bitwise OR of packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3, a4 OR b4, a5 OR b5, a6 OR b6, a7 OR b7]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm_or_si128(b, a); }
+
+ /*
+ * Compute the bitwise XOR of packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3, a4 XOR b4, a5 XOR b5, a6 XOR b6, a7 XOR b7]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm_xor_si128(b, a); }
+
+ /*
+ * Compute the bitwise AND NOT of packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 ANDNOT b0, a1 ANDNOT b1, a2 ANDNOT b2, a3 ANDNOT b3, a4 ANDNOT b4, a5 ANDNOT b5, a6 ANDNOT b6, a7
+ ANDNOT b7]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm_andnot_si128(b, a); }
+
+ /*
+ * Horizontally add 16-bits elements of a.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ * Return : a0+a1+a2+a3
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ Converter conv;
+ conv.v = a;
+ return scalar_t(conv.t[0] + conv.t[1] + conv.t[2] + conv.t[3] + conv.t[4] + conv.t[5] + conv.t[6] + conv.t[7]);
+ }
+
+ static INLINE CONST vect_t round(const vect_t a) { return a; }
+
+ static INLINE vect_t fmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fmaddx(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddx(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mulx(a, b)); }
+
+ static INLINE vect_t fnmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmaddx(c, a, b); }
+
+ static INLINE CONST vect_t signbits(const vect_t x) {
+ vect_t signBits = sub(zero(), srl(x, 4*sizeof(scalar_t)-1));
+ return signBits;
+ }
+
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const __m64 &INVP, const vect_t &NEGP, const vect_t &MIN,
+ const vect_t &MAX, vect_t &Q, vect_t &T)
+
+ {
+#ifdef __INTEL_COMPILER
+ C = _mm_rem_epi16(C, P);
+#else
+ FFLASFFPACK_abort("not implemented");
+#endif
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+ return C;
+ }
+
+#else
+#error "You need SSE instructions to perform 128 bits operations on int16"
+#endif // defined(__FFLASFFPACK_USE_AVX2)
+};
+
+/*
+ * Simd128 specialized for uint16_t
+ */
+template <> struct Simd128_impl<true, true, false, 2> : public Simd128_impl<true, true, true, 2> {
+ using scalar_t = uint16_t;
+
+ /*
+ * Load 128-bits of unsigned integer data from memory into dst.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int16_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm_load_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 128-bits of unsigned integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int16_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm_loadu_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 128-bits of unsigned integer data from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm_store_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, vect_t v) {
+ _mm_storeu_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ static INLINE CONST vect_t greater(vect_t a, vect_t b) {
+
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm_cmpgt_epi16(a, b);
+ }
+
+ static INLINE CONST vect_t lesser(vect_t a, vect_t b) {
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm_cmpgt_epi16(a, b);
+ }
+
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+};
+
+#endif
diff --git a/fflas-ffpack/fflas/fflas_simd/simd128_int32.inl b/fflas-ffpack/fflas/fflas_simd/simd128_int32.inl
new file mode 100644
index 0000000..c99f450
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd128_int32.inl
@@ -0,0 +1,455 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd128_int32_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd128_int32_INL
+
+// int32_t
+template <> struct Simd128_impl<true, true, true, 4> {
+
+#if defined(__FFLASFFPACK_USE_SIMD)
+
+ /*
+ * alias to 256 bit simd register
+ */
+ using vect_t = __m128i;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = int32_t;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 4;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 16;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Converter from vect_t to a tab.
+ * exple:
+ * Converter conv;
+ * conv.v = a;
+ * scalart_t x = conv.t[1]
+ */
+ union Converter {
+ vect_t v;
+ scalar_t t[vect_size];
+ };
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0,0,0] int32_t
+ */
+ static INLINE CONST vect_t zero() { return _mm_setzero_si128(); }
+
+ /*
+ * Broadcast 32-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x,x,x,x] int32_t
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm_set1_epi32(x); }
+
+ /*
+ * Broadcast 32-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x0,x1,x2,x3] int32_t
+ */
+ static INLINE CONST vect_t set(const scalar_t x0, const scalar_t x1, const scalar_t x2, const scalar_t x3) {
+ return _mm_set_epi32(x3, x2, x1, x0);
+ }
+
+ /*
+ * Gather 32-bit integer elements with indexes idx[0], ..., idx[3] from the address p in vect_t.
+ * Return [p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]]] int32_t
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ return set(p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]]);
+ }
+
+ /*
+ * Load 128-bits of integer data from memory into dst.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1],p[2],p[3]] int32_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm_load_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 128-bits of integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int32_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm_loadu_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm_store_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, vect_t v) {
+ _mm_storeu_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory using a non-temporal memory hint.
+ * p must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+ */
+ // static INLINE void stream(const scalar_t *p, const vect_t v) { _mm_stream_si128(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Shift packed 64-bit integers in a left by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ * Return : [a0 << s, a1 << s, a2 << s, a3 << s] int32_t
+ */
+ static INLINE CONST vect_t sll(const vect_t a, const int s) { return _mm_slli_epi32(a, s); }
+
+ /*
+ * Shift packed 64-bit integers in a right by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ * Return : [a0 >> s, a1 >> s, a2 >> s, a3 >> s] int32_t
+ */
+ static INLINE CONST vect_t srl(const vect_t a, const int s) { return _mm_srli_epi32(a, s); }
+
+ static INLINE CONST vect_t sra(const vect_t a, const int s) { return _mm_sra_epi32(a, set1(s)); }
+
+ /*
+ * Add packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [a0+b0, a1+b1, a2+b2, a3+b3] int32_t
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm_add_epi32(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [a0-b0, a1-b1, a2-b2, a3-b3] int32_t
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm_sub_epi32(a, b); }
+
+ static INLINE vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits
+ of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [a0*b0 mod 2^16-1, a1*b1 mod 2^16-1, a2*b2 mod 2^16-1, a3*b3 mod 2^16-1] int32_t
+ */
+ static INLINE CONST vect_t mullo(const vect_t a, const vect_t b) { return _mm_mullo_epi32(a, b); }
+
+ /*
+ * Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the high 32
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return :
+ */
+ static INLINE CONST vect_t mulhi(const vect_t a, const vect_t b) {
+ // _mm_mulhi_epi32 emul
+ //#pragma warning "The simd mulhi function is emulate, it may impact the performances."
+ vect_t a1, a2, b1, b2;
+ a1 = set(0, _mm_extract_epi32(a, 0), 0, _mm_extract_epi32(a, 1));
+ a2 = set(0, _mm_extract_epi32(a, 1), 0, _mm_extract_epi32(a, 3));
+ b1 = set(0, _mm_extract_epi32(b, 0), 0, _mm_extract_epi32(b, 1));
+ b2 = set(0, _mm_extract_epi32(b, 1), 0, _mm_extract_epi32(b, 3));
+ a1 = _mm_mul_epi32(a1, b1);
+ a2 = _mm_mul_epi32(a2, b2);
+ return set(_mm_extract_epi32(a1, 0), _mm_extract_epi32(a1, 2), _mm_extract_epi32(b1, 0),
+ _mm_extract_epi32(b2, 0));
+ }
+
+ /*
+ * Multiply the low 16-bit integers from each packed 32-bit element in a and b, and store the signed 32-bit results
+ in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3] int32_t
+ */
+ static INLINE CONST vect_t mulx(const vect_t a, const vect_t b) { return _mm_mul_epi32(a, b); }
+
+ /*
+ * Multiply the packed 32-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits
+ of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [a0*b0 mod 2^16-1, a1*b1 mod 2^16-1, a2*b2 mod 2^16-1, a3*b3 mod 2^16-1] int32_t
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return mullo(a, b); }
+
+ /*
+ *
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ [c0, c1, c2, c3] int32_t
+ * Return : [(a0*b0 mod 2^16-1)+c0, (a1*b1 mod 2^16-1)+c1, (a2*b2 mod 2^16-1)+c2, (a3*b3 mod 2^16-1)+c3] int32_t
+ */
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) { return add(c, mul(a, b)); }
+
+ static INLINE vect_t fmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ /*
+ *
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ [c0, c1, c2, c3] int32_t
+ * Return : [-(a0*b0 mod 2^16-1)+c0, -(a1*b1 mod 2^16-1)+c1, -(a2*b2 mod 2^16-1)+c2, -(a3*b3 mod 2^16-1)+c3] int32_t
+ */
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mul(a, b)); }
+
+ static INLINE CONST vect_t fnmaddin(vect_t c, const vect_t a, const vect_t b) { return c = sub(c, mul(a, b)); }
+
+ /*
+ *
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ [c0, c1, c2, c3] int32_t
+ * Return : [(a0*b0 mod 2^16-1)-c0, (a1*b1 mod 2^16-1)-c1, (a2*b2 mod 2^16-1)-c2, (a3*b3 mod 2^16-1)-c3] int32_t
+ */
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) { return sub(mul(a, b), c); }
+
+ /*
+ *
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ [c0, c1, c2, c3] int32_t
+ * Return : [(a0*b0 mod 2^16-1)-c0, (a1*b1 mod 2^16-1)-c1, (a2*b2 mod 2^16-1)-c2, (a3*b3 mod 2^16-1)-c3] int32_t
+ */
+ static INLINE CONST vect_t fmsubin(vect_t c, const vect_t a, const vect_t b) { return c = sub(mul(a, b), c); }
+
+ /*
+ * Compare packed 32-bits in a and b for equality, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0==b0) ? 0xFFFFFFFF : 0, (a1==b1) ? 0xFFFFFFFF : 0,
+ (a2==b2) ? 0xFFFFFFFF : 0, (a3==b3) ? 0xFFFFFFFF : 0,
+ (a4==b4) ? 0xFFFFFFFF : 0, (a5==b5) ? 0xFFFFFFFF : 0,
+ (a6==b6) ? 0xFFFFFFFF : 0, (a7==b7) ? 0xFFFFFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm_cmpeq_epi32(a, b); }
+
+ /*
+ * Compare packed 32-bits in a and b for greater-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0>b0) ? 0xFFFFFFFF : 0, (a1>b1) ? 0xFFFFFFFF : 0,
+ (a2>b2) ? 0xFFFFFFFF : 0, (a3>b3) ? 0xFFFFFFFF : 0,
+ (a4>b4) ? 0xFFFFFFFF : 0, (a5>b5) ? 0xFFFFFFFF : 0,
+ (a6>b6) ? 0xFFFFFFFF : 0, (a7>b7) ? 0xFFFFFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm_cmpgt_epi32(a, b); }
+
+ /*
+ * Compare packed 32-bits in a and b for lesser-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0<b0) ? 0xFFFFFFFF : 0, (a1<b1) ? 0xFFFFFFFF : 0,
+ (a2<b2) ? 0xFFFFFFFF : 0, (a3<b3) ? 0xFFFFFFFF : 0,
+ (a4<b4) ? 0xFFFFFFFF : 0, (a5<b5) ? 0xFFFFFFFF : 0,
+ (a6<b6) ? 0xFFFFFFFF : 0, (a7<b7) ? 0xFFFFFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm_cmpgt_epi32(b, a); }
+
+ /*
+ * Compare packed 32-bits in a and b for greater or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0>=b0) ? 0xFFFFFFFF : 0, (a1>=b1) ? 0xFFFFFFFF : 0,
+ (a2>=b2) ? 0xFFFFFFFF : 0, (a3>=b3) ? 0xFFFFFFFF : 0,
+ (a4>=b4) ? 0xFFFFFFFF : 0, (a5>=b5) ? 0xFFFFFFFF : 0,
+ (a6>=b6) ? 0xFFFFFFFF : 0, (a7>=b7) ? 0xFFFFFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ /*
+ * Compare packed 32-bits in a and b for lesser or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0<=b0) ? 0xFFFFFFFF : 0, (a1<=b1) ? 0xFFFFFFFF : 0,
+ (a2<=b2) ? 0xFFFFFFFF : 0, (a3<=b3) ? 0xFFFFFFFF : 0,
+ (a4<=b4) ? 0xFFFFFFFF : 0, (a5<=b5) ? 0xFFFFFFFF : 0,
+ (a6<=b6) ? 0xFFFFFFFF : 0, (a7<=b7) ? 0xFFFFFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+
+ /*
+ * Compute the bitwise AND of packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3, a4 AND b4, a5 AND b5, a6 AND b6, a7 AND b7]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm_and_si128(b, a); }
+
+ /*
+ * Compute the bitwise OR of packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3, a4 OR b4, a5 OR b5, a6 OR b6, a7 OR b7]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm_or_si128(b, a); }
+
+ /*
+ * Compute the bitwise XOR of packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3, a4 XOR b4, a5 XOR b5, a6 XOR b6, a7 XOR b7]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm_xor_si128(b, a); }
+
+ /*
+ * Compute the bitwise AND NOT of packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 ANDNOT b0, a1 ANDNOT b1, a2 ANDNOT b2, a3 ANDNOT b3, a4 ANDNOT b4, a5 ANDNOT b5, a6 ANDNOT b6, a7
+ ANDNOT b7]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm_andnot_si128(b, a); }
+
+ /*
+ * Horizontally add 32-bits elements of a.
+ * Args : [a0, a1, a2, a3]
+ * Return : a0+a1+a2+a3
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ Converter conv;
+ conv.v = a;
+ return conv.t[0] + conv.t[1] + conv.t[2] + conv.t[3];
+ }
+
+ static INLINE CONST vect_t fmaddx(const vect_t c, const vect_t a, const vect_t b) { return add(mulx(a, b), c); }
+
+ static INLINE vect_t fmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fmaddx(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddx(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mulx(a, b)); }
+
+ static INLINE vect_t fnmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmaddx(c, a, b); }
+
+ static INLINE CONST vect_t round(const vect_t a) { return a; }
+
+ static INLINE CONST vect_t signbits(const vect_t x) {
+ vect_t signBits = sub(zero(), srl(x, 4*sizeof(scalar_t)-1));
+ return signBits;
+ }
+
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const vect_t &INVP, const vect_t &NEGP, const vect_t &MIN,
+ const vect_t &MAX, vect_t &Q, vect_t &T) {
+#ifdef __INTEL_COMPILER
+ C = _mm_rem_epi32(C, P);
+#else
+ FFLASFFPACK_abort("pas implementé");
+// C = fnmadd(C,_mm_castps_si128(_mm_floor_ps(_mm_mul_ps(INVP,_mm_castsi128_ps(C)))),P);
+#endif
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+ return C;
+ }
+
+#else
+#error "You need SSE instructions to perform 128 bits operations on int32"
+#endif
+};
+
+// uint32_t
+template <> struct Simd128_impl<true, true, false, 4> : public Simd128_impl<true, true, true, 4> {
+ using scalar_t = uint32_t;
+
+ /*
+ * Load 128-bits of unsigned integer data from memory into dst.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int16_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm_load_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 128-bits of unsigned integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int16_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm_loadu_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 128-bits of unsigned integer data from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm_store_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ static INLINE CONST vect_t greater(vect_t a, vect_t b) {
+
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm_cmpgt_epi32(a, b);
+ }
+
+ static INLINE CONST vect_t lesser(vect_t a, vect_t b) {
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm_cmpgt_epi32(a, b);
+ }
+
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd128_int32_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd128_int64.inl b/fflas-ffpack/fflas/fflas_simd/simd128_int64.inl
new file mode 100644
index 0000000..d154c3b
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd128_int64.inl
@@ -0,0 +1,493 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd128_int64_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd128_int64_INL
+
+/*
+ * Simd128 specialized for int64_t
+ */
+template <> struct Simd128_impl<true, true, true, 8> {
+
+#if defined(__FFLASFFPACK_USE_SIMD)
+ /*
+ * alias to 128 bit simd register
+ */
+ using vect_t = __m128i;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = int64_t;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 2;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 16;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Converter from vect_t to a tab.
+ * exple:
+ * Converter conv;
+ * conv.v = a;
+ * scalart_t x = conv.t[1]
+ */
+ union Converter {
+ vect_t v;
+ scalar_t t[vect_size];
+ };
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0] int64_t
+ */
+ static INLINE CONST vect_t zero() { return _mm_setzero_si128(); }
+
+ /*
+ * Broadcast 64-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x,x] int64_t
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm_set1_epi64x(x); }
+
+ /*
+ * Broadcast 64-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x0,x1] int64_t
+ */
+ static INLINE CONST vect_t set(const scalar_t x0, const scalar_t x1) { return _mm_set_epi64x(x1, x0); }
+
+ /*
+ * Gather 64-bit integer elements with indexes idx[0], ..., idx[1] from the address p in vect_t.
+ * Return [p[idx[0]], p[idx[1]]] int64_t
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ return set(p[idx[0]], p[idx[1]]);
+ }
+
+ /*
+ * Load 128-bits of integer data from memory into dst.
+ * p must be aligned on a 16-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1]] int64_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm_load_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 128-bits of integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1]] int64_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm_loadu_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory.
+ * p must be aligned on a 16-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm_store_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, vect_t v) {
+ _mm_storeu_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 128-bits of integer data from a into memory using a non-temporal memory hint.
+ * p must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+ */
+ // static INLINE void stream(scalar_t *p, const vect_t v) { _mm_stream_si128(static_cast<vect_t *>(p), v); }
+
+ /*
+ * Add packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1] int64_t
+ [b0, b1] int64_t
+ * Return : [a0+b0, a1+b1] int64_t
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm_add_epi64(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in vect_t.
+ * Args : [a0, a1] int64_t
+ [b0, b1] int64_t
+ * Return : [a0-b0, a1-b1] int64_t
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm_sub_epi64(a, b); }
+
+ static INLINE vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Shift packed 64-bit integers in a left by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ * Return : [a0 << s, a1 << s, a2 << s, a3 << s] int64_t
+ */
+ static INLINE CONST vect_t sll(const vect_t a, const int s) { return _mm_slli_epi64(a, s); }
+
+ /*
+ * Shift packed 64-bit integers in a right by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ * Return : [a0 >> s, a1 >> s, a2 >> s, a3 >> s] int64_t
+ */
+ static INLINE CONST vect_t srl(const vect_t a, const int s) { return _mm_srli_epi64(a, s); }
+
+ static INLINE CONST vect_t sra(const vect_t a, const int s) {
+#ifdef __AVX512__
+ return _mm_sra_epi64(a, set1(s));
+#else
+ const int b = 63 - s;
+ vect_t m = sll(set1(1), b);
+ vect_t x = srl(a, s);
+ vect_t result = sub(vxor(x, m), m); // result = x^m - m
+ return result;
+#endif // 512
+ }
+
+ /*
+ * Multiply the packed 64-bit integers in a and b, producing intermediate 128-bit integers, and store the low 64
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1] int64_t
+ [b0, b1] int64_t
+ * Return : [a0*b0 mod 2^16-1, a1*b1 mod 2^16-1] int64_t
+ */
+ static INLINE CONST vect_t mullo(const vect_t x0, const vect_t x1) {
+ // _mm_mullo_epi32 emul
+ // #pragma warning "The simd mullo function is emulate, it may impact the performances."
+
+ Converter c0, c1;
+ c0.v = x0;
+ c1.v = x1;
+ return set((scalar_t)(c0.t[0] * c1.t[0]), (scalar_t)(c0.t[1] * c1.t[1]));
+ }
+
+ static INLINE CONST vect_t mullox(const vect_t x0, const vect_t x1) { return _mm_mullo_epi32(x0, x1); }
+
+ /*
+ * Multiply the packed 64-bit integers in a and b, producing intermediate 128-bit integers, and store the low 64
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1] int64_t
+ [b0, b1] int64_t
+ * Return : [a0*b0 mod 2^16-1, a1*b1 mod 2^16-1] int64_t
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return mullo(a, b); }
+
+ static INLINE CONST vect_t mulhi(const vect_t a, const vect_t b) {
+// #pragma warning "The simd mulhi function is emulate, it may impact the performances."
+#ifdef __X86_64__
+ Converter c0, c1;
+ c0.v = a;
+ c1.v = b;
+ return set((scalar_t)((int128_t(c0.t[0]) * c1.t[0]) >> 64), (scalar_t)((int128_t(c0.t[1]) * c1.t[1]) >> 64));
+#else
+ return zero();
+#endif
+ }
+
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) { return add(c, mul(a, b)); }
+
+ static INLINE vect_t fmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mul(a, b)); }
+
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) { return sub(mul(a, b), c); }
+
+ static INLINE CONST vect_t mulx(const vect_t a, const vect_t b) { return _mm_mul_epi32(a, b); }
+
+ static INLINE CONST vect_t mulux(const vect_t a, const vect_t b) { return _mm_mul_epu32(a, b); }
+
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm_cmpeq_epi64(a, b); }
+
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) {
+#ifdef __SSE4_2__
+ return _mm_cmpgt_epi64(a, b);
+#else
+#warning "The simd greater function is emulate, it may impact the performances."
+ Converter ca, cb;
+ ca.v = a;
+ cb.v = b;
+ return set((ca.t[0] > cb.t[0]) ? 0xFFFFFFFFFFFFFFFF : 0, (ca.t[1] > cb.t[1]) ? 0xFFFFFFFFFFFFFFFF : 0);
+#endif // __SSE4_2__
+ }
+
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) {
+#ifdef __SSE4_2__
+ return _mm_cmpgt_epi64(b, a);
+#else
+#warning "The simd lesser function is emulate, it may impact the performances."
+ Converter ca, cb;
+ ca.v = a;
+ cb.v = b;
+ return set((ca.t[0] < cb.t[0]) ? 0xFFFFFFFFFFFFFFFF : 0, (ca.t[1] < cb.t[1]) ? 0xFFFFFFFFFFFFFFFF : 0);
+#endif // __SSE4_2__
+ }
+
+ /*
+ * Compare packed 64-bits in a and b for greater or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int64_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int64_t
+ * Return : [(a0>=b0) ? 0xFFFFFFFFFFFFFFFF : 0, (a1>=b1) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a2>=b2) ? 0xFFFFFFFFFFFFFFFF : 0, (a3>=b3) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a4>=b4) ? 0xFFFFFFFFFFFFFFFF : 0, (a5>=b5) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a6>=b6) ? 0xFFFFFFFFFFFFFFFF : 0, (a7>=b7) ? 0xFFFFFFFFFFFFFFFF : 0] int64_t
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ /*
+ * Compare packed 64-bits in a and b for lesser or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int64_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int64_t
+ * Return : [(a0<=b0) ? 0xFFFFFFFFFFFFFFFF : 0, (a1<=b1) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a2<=b2) ? 0xFFFFFFFFFFFFFFFF : 0, (a3<=b3) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a4<=b4) ? 0xFFFFFFFFFFFFFFFF : 0, (a5<=b5) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a6<=b6) ? 0xFFFFFFFFFFFFFFFF : 0, (a7<=b7) ? 0xFFFFFFFFFFFFFFFF : 0] int64_t
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+
+ /*
+ * Compute the bitwise AND of packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3, a4 AND b4, a5 AND b5, a6 AND b6, a7 AND b7]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm_and_si128(a, b); }
+
+ /*
+ * Compute the bitwise OR of packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3, a4 OR b4, a5 OR b5, a6 OR b6, a7 OR b7]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm_or_si128(a, b); }
+
+ /*
+ * Compute the bitwise XOR of packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3, a4 XOR b4, a5 XOR b5, a6 XOR b6, a7 XOR b7]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm_xor_si128(b, a); }
+
+ /*
+ * Compute the bitwise AND NOT of packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 ANDNOT b0, a1 ANDNOT b1, a2 ANDNOT b2, a3 ANDNOT b3, a4 ANDNOT b4, a5 ANDNOT b5, a6 ANDNOT b6, a7
+ ANDNOT b7]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm_andnot_si128(b, a); }
+
+ /*
+ * Horizontally add 64-bits elements of a.
+ * Args : [a0, a1, a2, a3]
+ * Return : a0+a1+a2+a3
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ Converter c;
+ c.v = a;
+ return c.t[0] + c.t[1];
+ }
+
+ static INLINE CONST vect_t fmaddx(const vect_t c, const vect_t a, const vect_t b) { return add(c, mulx(a, b)); }
+
+ static INLINE vect_t fmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fmaddx(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddx(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mulx(a, b)); }
+
+ static INLINE vect_t fnmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmaddx(c, a, b); }
+
+ static INLINE CONST vect_t round(const vect_t a) { return a; }
+
+ // mask the high 32 bits of a 64 bits, that is 00000000FFFFFFFF
+ static INLINE CONST vect_t mask_high() { return srl(_mm_set1_epi8(-1), 32); }
+
+ static INLINE CONST vect_t signbits(const vect_t x) {
+ vect_t signBits = sub(zero(), srl(x, 4*sizeof(scalar_t)-1));
+ return signBits;
+ }
+
+ // warning : may be off by 1 multiple, but we save a mul...
+ static INLINE CONST vect_t mulhi_fast(vect_t x, vect_t y) {
+ // unsigned mulhi starts:
+ // x1 = xy_high = mulhiu_fast(x,y)
+ const vect_t mask = mask_high();
+
+ vect_t x0 = vand(x, mask), x1 = srl(x, 32);
+ vect_t y0 = vand(y, mask), y1 = srl(y, 32);
+
+ x0 = mulux(x0, y1); // x0y1
+ y0 = mulux(x1, y0); // x1y0
+ y1 = mulux(x1, y1); // x1y1
+
+ x1 = vand(y0, mask);
+ y0 = srl(y0, 32); // x1y0_lo = x1 // y1yo_hi = y0
+ x1 = srl(add(x1, x0), 32);
+ y0 = add(y1, y0);
+
+ x1 = add(x1, y0);
+ // unsigned mulhi ends
+
+ // fixing signs
+ x0 = vand(signbits(x), y);
+ x1 = sub(x1, x0);
+ x0 = vand(signbits(y), x);
+ x1 = sub(x1, x0);
+ // end fixing
+ return x1;
+ }
+
+ template <bool overflow, bool poweroftwo>
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const int8_t &shifter, const vect_t &magic, const vect_t &NEGP,
+ const vect_t &MIN, const vect_t &MAX, vect_t &Q, vect_t &T) {
+#ifdef __INTEL_COMPILER
+ // Works fine with ICC 15.0.1 - A.B.
+ // #warning "not tested"
+ C = _mm_rem_epi64(C, P);
+#else
+ if (poweroftwo) {
+ Q = srl(C, 63);
+ vect_t un = set1(1);
+ T = sub(sll(un, shifter), un);
+ Q = add(C, vand(Q, T));
+ Q = sll(srl(Q, shifter), shifter);
+ C = sub(C, Q);
+ Q = vand(greater(zero(), Q), P);
+ C = add(C, Q);
+ } else {
+ Q = mulhi_fast(C, magic);
+ if (overflow) {
+ Q = add(Q, C);
+ }
+ Q = sra(Q, shifter);
+ vect_t q1 = mulux(Q, P);
+ vect_t q2 = sll(mulux(srl(Q, 32), P), 32);
+ C = sub(C, add(q1, q2));
+ T = greater_eq(C, P);
+ C = sub(C, vand(T, P));
+ }
+#endif
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+ return C;
+ }
+
+#else
+
+#error "You need SSE instructions to perform 128 bits operations on int64"
+
+#endif // __FFLASFFPACK_USE_SIMD
+};
+
+// uint64_t
+template <> struct Simd128_impl<true, true, false, 8> : public Simd128_impl<true, true, true, 8> {
+ using scalar_t = uint64_t;
+
+ /*
+ * Load 128-bits of unsigned integer data from memory into dst.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int16_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm_load_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 128-bits of unsigned integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int16_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm_loadu_si128(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 128-bits of unsigned integer data from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm_store_si128(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ static INLINE CONST vect_t greater(vect_t a, vect_t b) {
+#ifdef __SSE4_2__
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm_cmpgt_epi64(a, b);
+#else
+#warning "The simd greater function is emulate, it may impact the performances."
+ Converter ca, cb;
+ ca.v = a;
+ cb.v = b;
+ return set((ca.t[0] > cb.t[0]) ? 0xFFFFFFFFFFFFFFFF : 0, (ca.t[1] > cb.t[1]) ? 0xFFFFFFFFFFFFFFFF : 0);
+#endif
+ }
+
+ static INLINE CONST vect_t lesser(vect_t a, vect_t b) {
+#ifdef __SSE4_2__
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm_cmpgt_epi64(a, b);
+#else
+#warning "The simd greater function is emulate, it may impact the performances."
+ Converter ca, cb;
+ ca.v = a;
+ cb.v = b;
+ return set((ca.t[0] < cb.t[0]) ? 0xFFFFFFFFFFFFFFFF : 0, (ca.t[1] < cb.t[1]) ? 0xFFFFFFFFFFFFFFFF : 0);
+#endif
+ }
+
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd128_int64_INL
diff --git a/fflas-ffpack/field/modular-positive.h b/fflas-ffpack/fflas/fflas_simd/simd256.inl
similarity index 54%
rename from fflas-ffpack/field/modular-positive.h
rename to fflas-ffpack/fflas/fflas_simd/simd256.inl
index 9e12653..028ea9d 100644
--- a/fflas-ffpack/field/modular-positive.h
+++ b/fflas-ffpack/fflas/fflas_simd/simd256.inl
@@ -1,28 +1,25 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* fflas-ffpack/modular-positive.h
- * Copyright (C) 2003 Pascal Giorgi
- * 2008 Clement Pernet
- * Written by Clement Pernet <clement.pernet at gmail.com>
- * Pascal Giorgi <pascal.giorgi at ens-lyon.fr>
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
*
- * ------------------------------------
*
- *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -30,25 +27,27 @@
*.
*/
-#ifndef __FFLASFFPACK_modular_positive_H
-#define __FFLASFFPACK_modular_positive_H
-
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd256_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd256_INL
-namespace FFPACK {
+template <bool ArithType, bool Int, bool Signed, int Size> struct Simd256_impl;
- template <class Element>
- class Modular;
+#include "simd256_float.inl"
+#include "simd256_double.inl"
-} // FFPACK
+#ifdef SIMD_INT
+// Trop d'instructions SSE manquantes pour les int8_t
-#include "fflas-ffpack/field/modular-float.h"
-#include "fflas-ffpack/field/modular-double.h"
-#include "fflas-ffpack/field/modular-int32.h"
-#ifdef __x86_64__
-#include "fflas-ffpack/field/modular-int64.h"
+#if defined(__FFLASFFPACK_USE_AVX2)
+#include "simd256_int16.inl"
+#include "simd256_int32.inl"
+#include "simd256_int64.inl"
#endif
-#endif // __FFLASFFPACK_modular_positive_H
+#endif //#ifdef SIMD_INT
+
+template <class T>
+using Simd256 =
+ Simd256_impl<std::is_arithmetic<T>::value, std::is_integral<T>::value, std::is_signed<T>::value, sizeof(T)>;
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd256_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_double.inl b/fflas-ffpack/fflas/fflas_simd/simd256_double.inl
new file mode 100644
index 0000000..e8f176c
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd256_double.inl
@@ -0,0 +1,384 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd256_double_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd256_double_INL
+
+/*
+ * Simd256 specialized for double
+ */
+template <> struct Simd256_impl<true, false, true, 8> {
+#if defined(__FFLASFFPACK_USE_AVX) or defined(__FFLASFFPACK_USE_AVX2)
+
+ /*
+ * alias to 256 bit simd register
+ */
+ using vect_t = __m256d;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = double;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 4;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 32;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0,0,0]
+ */
+ static INLINE CONST vect_t zero() { return _mm256_setzero_pd(); }
+
+ /*
+ * Broadcast double-precision (64-bit) floating-point value x to all elements of vect_t.
+ * Return [x,x,x,x]
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm256_set1_pd(x); }
+
+ /*
+ * Set packed double-precision (64-bit) floating-point elements in vect_t with the supplied values.
+ * Return [x1,x2,x3,x4]
+ */
+ static INLINE CONST vect_t set(const scalar_t x1, const scalar_t x2, const scalar_t x3, const scalar_t x4) {
+ return _mm256_set_pd(x4, x3, x2, x1);
+ }
+
+ /*
+ * Gather double-precision (64-bit) floating-point elements with indexes idx[0], ..., idx[3] from the address p in
+ *vect_t.
+ * Return [p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]]]
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ // TODO AVX2 Gather
+ return _mm256_set_pd(p[idx[3]], p[idx[2]], p[idx[1]], p[idx[0]]);
+ }
+
+ /*
+ * Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into vect_t.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0], p[1], p[2], p[3]]
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) { return _mm256_load_pd(p); }
+
+ /*
+ * Load 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from memory into vect_t.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0], p[1], p[2], p[3]]
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) { return _mm256_loadu_pd(p); }
+
+ /*
+ * Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from p into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, const vect_t v) { _mm256_store_pd(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from p into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, const vect_t v) { _mm256_storeu_pd(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Store 256-bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a into memory using
+ * a non-temporal memory hint.
+ * p must be aligned on a 32-byte boundary or a general-protection exception may be generated.
+ */
+ static INLINE void stream(const scalar_t *p, const vect_t v) { _mm256_stream_pd(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0+b0, a1+b1, a2+b2, a3+b3]
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm256_add_pd(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit)
+ * floating-point elements in a, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0-b0, a1-b1, a2-b2, a3-b3]
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm256_sub_pd(a, b); }
+
+ static INLINE CONST vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3]
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return _mm256_mul_pd(a, b); }
+
+ static INLINE CONST vect_t mulin(vect_t &a, const vect_t b) { return a = mul(a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3]
+ */
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm256_fmadd_pd(a, b, c);
+#else
+ return add(c, mul(a, b));
+#endif
+ }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3]
+ */
+ static INLINE CONST vect_t madd(const vect_t c, const vect_t a, const vect_t b) { return fmadd(c, a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3]
+ */
+ static INLINE CONST vect_t maddx(const vect_t c, const vect_t a, const vect_t b) { return fmadd(c, a, b); }
+
+ static INLINE CONST vect_t fmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result
+ * to packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [-(a0*b0)+c0, -(a1*b1)+c1, -(a2*b2)+c2, -(a3*b3)+c3]
+ */
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm256_fnmadd_pd(a, b, c);
+#else
+ return sub(c, mul(a, b));
+#endif
+ }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result
+ * to packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [-(a0*b0)+c0, -(a1*b1)+c1, -(a2*b2)+c2, -(a3*b3)+c3]
+ */
+ static INLINE CONST vect_t nmadd(const vect_t c, const vect_t a, const vect_t b) { return fnmadd(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmadd(c, a, b); }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from
+ * the intermediate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0-c0, a1*b1-c1, a2*b2-c2, a3*b3-c3]
+ */
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm256_fmsub_pd(a, b, c);
+#else
+ return sub(mul(a, b), c);
+#endif
+ }
+
+ /*
+ * Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from
+ * the intermediate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3], [c0, c1, c2, c3]
+ * Return : [a0*b0-c0, a1*b1-c1, a2*b2-c2, a3*b3-c3]
+ */
+ static INLINE CONST vect_t msub(const vect_t c, const vect_t a, const vect_t b) { return fmsub(c, a, b); }
+
+ static INLINE CONST vect_t fmsubin(vect_t &c, const vect_t a, const vect_t b) { return c = fmsub(c, a, b); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results
+ in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0==b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1==b1) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a2==b2) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a3==b3) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm256_cmp_pd(a, b, _CMP_EQ_OQ); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for lesser-than, and store the
+ results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0<b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1<b1) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a2<b2) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a3<b3) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm256_cmp_pd(a, b, _CMP_LT_OS); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for lesser or equal than, and store
+ the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0<=b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1<=b1) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a2<=b2) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a3<=b3) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return _mm256_cmp_pd(a, b, _CMP_LE_OS); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for greater-than, and store the
+ results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0>b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1>b1) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a2>b2) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a3>b3) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm256_cmp_pd(a, b, _CMP_GT_OS); }
+
+ /*
+ * Compare packed double-precision (64-bit) floating-point elements in a and b for greater or equal than, and store
+ the results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [(a0>=b0) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a1>=b1) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a2>=b2) ? 0xFFFFFFFFFFFFFFFF : 0,
+ (a3>=b3) ? 0xFFFFFFFFFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return _mm256_cmp_pd(a, b, _CMP_GE_OS); }
+
+ /*
+ * Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm256_and_pd(a, b); }
+
+ /*
+ * Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm256_or_pd(a, b); }
+
+ /*
+ * Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm256_xor_pd(a, b); }
+
+ /*
+ * Compute the bitwise AND NOT of packed double-precision (64-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0 AND NOT b0, a1 AND NOT b1, a2 AND NOT b2, a3 AND NOT b3]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm256_andnot_pd(a, b); }
+
+ /*
+ * Round the packed double-precision (64-bit) floating-point elements in a down to an integer value, and store the
+ * results as packed double-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3]
+ * Return : [floor(a0), floor(a1), floor(a2), floor(a3)]
+ */
+ static INLINE CONST vect_t floor(const vect_t a) { return _mm256_floor_pd(a); }
+
+ /*
+ * Round the packed double-precision (64-bit) floating-point elements in a up to an integer value, and store the
+ * results as packed double-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3]
+ * Return : [ceil(a0), ceil(a1), ceil(a2), ceil(a3)]
+ */
+ static INLINE CONST vect_t ceil(const vect_t a) { return _mm256_ceil_pd(a); }
+
+ /*
+ * Round the packed double-precision (64-bit) floating-point elements in a, and store the results as packed
+ * double-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3]
+ * Return : [round(a0), round(a1), round(a2), round(a3)]
+ */
+ static INLINE CONST vect_t round(const vect_t a) {
+ return _mm256_round_pd(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ }
+
+ /*
+ * Horizontally add adjacent pairs of double-precision (64-bit) floating-point elements in a and b, and pack the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3], [b0, b1, b2, b3]
+ * Return : [a0+a1, b0+b1, a2+a3, b2+b3]
+ */
+ static INLINE CONST vect_t hadd(const vect_t a, const vect_t b) { return _mm256_hadd_pd(a, b); }
+
+ /*
+ * Horizontally add double-precision (64-bit) floating-point elements in a.
+ * Args : [a0, a1, a2, a3]
+ * Return : a0+a1+a2+a3
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ return ((const scalar_t *)&a)[0] + ((const scalar_t *)&a)[1] + ((const scalar_t *)&a)[2] +
+ ((const scalar_t *)&a)[3];
+ }
+
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const vect_t &INVP, const vect_t &NEGP, const vect_t &MIN,
+ const vect_t &MAX, vect_t &Q, vect_t &T) {
+ FLOAT_MOD(C, P, INVP, Q);
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+
+ return C;
+ }
+
+#else // __AVX__
+#error "You need AVX instructions to perform 256bits operations on double"
+#endif
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd256_double_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_float.inl b/fflas-ffpack/fflas/fflas_simd/simd256_float.inl
new file mode 100644
index 0000000..98c3d2d
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd256_float.inl
@@ -0,0 +1,406 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd256_float_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd256_float_INL
+
+/*
+ * Simd256 specialized for float
+ */
+template <> struct Simd256_impl<true, false, true, 4> {
+#if defined(__FFLASFFPACK_USE_AVX) or defined(__FFLASFFPACK_USE_AVX2)
+ /*
+ * alias to 256 bit simd register
+ */
+ using vect_t = __m256;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = float;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 8;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 32;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0,0,0,0,0,0,0]
+ */
+ static INLINE CONST vect_t zero() { return _mm256_setzero_ps(); }
+
+ /*
+ * Broadcast single-precision (32-bit) floating-point value x to all elements of vect_t.
+ * Return [x,x,x,x,x,x,x,x]
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm256_set1_ps(x); }
+
+ /*
+ * Set packed single-precision (32-bit) floating-point elements in vect_t with the supplied values.
+ * Return [x1,x2,x3,x4,x5,x6,x7,x8]
+ */
+ static INLINE CONST vect_t set(const scalar_t x1, const scalar_t x2, const scalar_t x3, const scalar_t x4,
+ const scalar_t x5, const scalar_t x6, const scalar_t x7, const scalar_t x8) {
+ return _mm256_set_ps(x8, x7, x6, x5, x4, x3, x2, x1);
+ }
+
+ /*
+ * Gather single-precision (32-bit) floating-point elements with indexes idx[0], ..., idx[3] from the address p in
+ *vect_t.
+ * Return [p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]], p[idx[4]], p[idx[5]], p[idx[6]], p[idx[7]]]
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ // TODO AVX2 Gather
+ return _mm256_set_ps(p[idx[7]], p[idx[6]], p[idx[5]], p[idx[4]], p[idx[3]], p[idx[2]], p[idx[1]], p[idx[0]]);
+ }
+
+ /*
+ * Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into vect_t.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]]
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) { return _mm256_load_ps(p); }
+
+ /*
+ * Load 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from memory into vect_t.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]]
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) { return _mm256_loadu_ps(p); }
+
+ /*
+ * Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, const vect_t v) { _mm256_store_ps(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Store 256-bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, const vect_t v) { _mm256_storeu_ps(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Store 256-bits (composed of 8 packed double-precision (32-bit) floating-point elements) from a into memory using
+ * a non-temporal memory hint.
+ * p must be aligned on a 32-byte boundary or a general-protection exception may be generated.
+ */
+ static INLINE void stream(const scalar_t *p, const vect_t v) { _mm256_stream_ps(const_cast<scalar_t *>(p), v); }
+
+ /*
+ * Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0+b0, a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7]
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm256_add_ps(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit)
+ * floating-point elements in a, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0-b0, a1-b1, a2-b2, a3-b3, a4-b4, a5-b5, a6-b6, a7-b7]
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm256_sub_ps(a, b); }
+
+ static INLINE CONST vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3, a4*b4, a5*b5, a6*b6, a7*b7]
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return _mm256_mul_ps(a, b); }
+
+ static INLINE CONST vect_t mulin(vect_t &a, const vect_t b) { return a = mul(a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7], [c0, c1, c2, c3, c4, c5, c6, c7]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3, a4*b4+c4, a5*b5+c5, a6*b6+c6, a7*b7+c7]
+ */
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm256_fmadd_ps(a, b, c);
+#else
+ return add(c, mul(a, b));
+#endif
+ }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7], [c0, c1, c2, c3, c4, c5, c6, c7]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3, a4*b4+c4, a5*b5+c5, a6*b6+c6, a7*b7+c7]
+ */
+ static INLINE CONST vect_t madd(const vect_t c, const vect_t a, const vect_t b) { return fmadd(c, a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to
+ * packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7], [c0, c1, c2, c3, c4, c5, c6, c7]
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3, a4*b4+c4, a5*b5+c5, a6*b6+c6, a7*b7+c7]
+ */
+ static INLINE CONST vect_t maddx(const vect_t c, const vect_t a, const vect_t b) { return fmadd(c, a, b); }
+
+ static INLINE CONST vect_t fmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result
+ * to packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7], [c0, c1, c2, c3, c4, c5, c6, c7]
+ * Return : [-(a0*b0)+c0, -(a1*b1)+c1, -(a2*b2)+c2, -(a3*b3)+c3, -(a4*b4)+c4, -(a5*b5)+c5, -(a6*b6)+c6, -(a7*b7)+c7]
+ */
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm256_fnmadd_ps(a, b, c);
+#else
+ return sub(c, mul(a, b));
+#endif
+ }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result
+ * to packed elements in c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7], [c0, c1, c2, c3, c4, c5, c6, c7]
+ * Return : [-(a0*b0)+c0, -(a1*b1)+c1, -(a2*b2)+c2, -(a3*b3)+c3, -(a4*b4)+c4, -(a5*b5)+c5, -(a6*b6)+c6, -(a7*b7)+c7]
+ */
+ static INLINE CONST vect_t nmadd(const vect_t c, const vect_t a, const vect_t b) { return fnmadd(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmadd(c, a, b); }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from
+ * the intermediate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7], [c0, c1, c2, c3, c4, c5, c6, c7]
+ * Return : [a0*b0-c0, a1*b1-c1, a2*b2-c2, a3*b3-c3, a4*b4-c4, a5*b5-c5, a6*b6-c6, a7*b7-c7]
+ */
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) {
+#ifdef __FMA__
+ return _mm256_fmsub_ps(a, b, c);
+#else
+ return sub(mul(a, b), c);
+#endif
+ }
+
+ /*
+ * Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from
+ * the intermediate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7], [c0, c1, c2, c3, c4, c5, c6, c7]
+ * Return : [a0*b0-c0, a1*b1-c1, a2*b2-c2, a3*b3-c3, a4*b4-c4, a5*b5-c5, a6*b6-c6, a7*b7-c7]
+ */
+ static INLINE CONST vect_t msub(const vect_t c, const vect_t a, const vect_t b) { return fmsub(c, a, b); }
+
+ static INLINE CONST vect_t fmsubin(vect_t &c, const vect_t a, const vect_t b) { return c = fmsub(c, a, b); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results
+ in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [(a0==b0) ? 0xFFFFFFFF : 0,
+ (a1==b1) ? 0xFFFFFFFF : 0,
+ (a2==b2) ? 0xFFFFFFFF : 0,
+ (a3==b3) ? 0xFFFFFFFF : 0,
+ (a4==b4) ? 0xFFFFFFFF : 0,
+ (a5==b5) ? 0xFFFFFFFF : 0,
+ (a6==b6) ? 0xFFFFFFFF : 0,
+ (a7==b7) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm256_cmp_ps(a, b, _CMP_EQ_OQ); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for lesser-than, and store the
+ results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [(a0<b0) ? 0xFFFFFFFF : 0,
+ (a1<b1) ? 0xFFFFFFFF : 0,
+ (a2<b2) ? 0xFFFFFFFF : 0,
+ (a3<b3) ? 0xFFFFFFFF : 0,
+ (a4<b4) ? 0xFFFFFFFF : 0,
+ (a5<b5) ? 0xFFFFFFFF : 0,
+ (a6<b6) ? 0xFFFFFFFF : 0,
+ (a7<b7) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm256_cmp_ps(a, b, _CMP_LT_OS); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for lesser or equal than, and store
+ the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [(a0<=b0) ? 0xFFFFFFFF : 0,
+ (a1<=b1) ? 0xFFFFFFFF : 0,
+ (a2<=b2) ? 0xFFFFFFFF : 0,
+ (a3<=b3) ? 0xFFFFFFFF : 0,
+ (a4<=b4) ? 0xFFFFFFFF : 0,
+ (a5<=b5) ? 0xFFFFFFFF : 0,
+ (a6<=b6) ? 0xFFFFFFFF : 0,
+ (a7<=b7) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return _mm256_cmp_ps(a, b, _CMP_LE_OS); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the
+ results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [(a0>b0) ? 0xFFFFFFFF : 0,
+ (a1>b1) ? 0xFFFFFFFF : 0,
+ (a2>b2) ? 0xFFFFFFFF : 0,
+ (a3>b3) ? 0xFFFFFFFF : 0,
+ (a4>b4) ? 0xFFFFFFFF : 0,
+ (a5>b5) ? 0xFFFFFFFF : 0,
+ (a6>b6) ? 0xFFFFFFFF : 0,
+ (a7>b7) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm256_cmp_ps(a, b, _CMP_GT_OS); }
+
+ /*
+ * Compare packed single-precision (32-bit) floating-point elements in a and b for greater or equal than, and store
+ the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [(a0>=b0) ? 0xFFFFFFFF : 0,
+ (a1>=b1) ? 0xFFFFFFFF : 0,
+ (a2>=b2) ? 0xFFFFFFFF : 0,
+ (a3>=b3) ? 0xFFFFFFFF : 0,
+ (a4>=b4) ? 0xFFFFFFFF : 0,
+ (a5>=b5) ? 0xFFFFFFFF : 0,
+ (a6>=b6) ? 0xFFFFFFFF : 0,
+ (a7>=b7) ? 0xFFFFFFFF : 0]
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return _mm256_cmp_ps(a, b, _CMP_GE_OS); }
+
+ /*
+ * Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3, a4 AND b4, a5 AND b5, a6 AND b6, a7 AND b7]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm256_and_ps(a, b); }
+
+ /*
+ * Compute the bitwise OR of packed single-precision (32-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3, a4 OR b4, a5 OR b5, a6 OR b6, a7 OR b7]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm256_or_ps(a, b); }
+
+ /*
+ * Compute the bitwise XOR of packed single-precision (32-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3, a4 XOR b4, a5 XOR b5, a6 XOR b6, a7 XOR b7]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm256_xor_ps(a, b); }
+
+ /*
+ * Compute the bitwise AND NOT of packed single-precision (32-bit) floating-point elements in a and b, and store the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 ANDNOT b0, a1 ANDNOT b1, a2 ANDNOT b2, a3 ANDNOT b3, a4 ANDNOT b4, a5 ANDNOT b5, a6 ANDNOT b6, a7
+ * ANDNOT b7]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm256_andnot_ps(a, b); }
+
+ /*
+ * Round the packed single-precision (32-bit) floating-point elements in a down to an integer value, and store the
+ * results as packed double-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ * Return : [floor(a0), floor(a1), floor(a2), floor(a3), floor(a4), floor(a5), floor(a6), floor(a7)]
+ */
+ static INLINE CONST vect_t floor(const vect_t a) { return _mm256_floor_ps(a); }
+
+ /*
+ * Round the packed single-precision (32-bit) floating-point elements in a up to an integer value, and store the
+ * results as packed single-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ * Return : [ceil(a0), ceil(a1), ceil(a2), ceil(a3), ceil(a4), ceil(a5), ceil(a6), ceil(a7)]
+ */
+ static INLINE CONST vect_t ceil(const vect_t a) { return _mm256_ceil_ps(a); }
+
+ /*
+ * Round the packed single-precision (32-bit) floating-point elements in a, and store the results as packed
+ * single-precision floating-point elements in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ * Return : [round(a0), round(a1), round(a2), round(a3), round(a4), round(a5), round(a6), round(a7)]
+ */
+ static INLINE CONST vect_t round(const vect_t a) {
+ return _mm256_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ }
+
+ /*
+ * Horizontally add adjacent pairs of single-precision (32-bit) floating-point elements in a and b, and pack the
+ * results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7], [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0+a1, b0+b1, a2+a3, b2+b3, a4+a5, b4+b5, a6+a7, b6+b7]
+ */
+ static INLINE CONST vect_t hadd(const vect_t a, const vect_t b) { return _mm256_hadd_ps(a, b); }
+
+ /*
+ * Horizontally add single-precision (32-bit) floating-point elements in a.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ * Return : a0+a1+a2+a3+a4+a5+a6+a7
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ return ((const scalar_t *)&a)[0] + ((const scalar_t *)&a)[1] + ((const scalar_t *)&a)[2] +
+ ((const scalar_t *)&a)[3] + ((const scalar_t *)&a)[4] + ((const scalar_t *)&a)[5] +
+ ((const scalar_t *)&a)[6] + ((const scalar_t *)&a)[7];
+ }
+
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const vect_t &INVP, const vect_t &NEGP, const vect_t &MIN,
+ const vect_t &MAX, vect_t &Q, vect_t &T) {
+ FLOAT_MOD(C, P, INVP, Q);
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+
+ return C;
+ }
+
+#else // __AVX__
+#error "You need AVX instructions to perform 256bits operations on float"
+#endif
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd256_float_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_int16.inl b/fflas-ffpack/fflas/fflas_simd/simd256_int16.inl
new file mode 100644
index 0000000..44596d9
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd256_int16.inl
@@ -0,0 +1,516 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd256_int16_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd256_int16_INL
+
+/*
+ * Simd256 specialized for int16_t
+ */
+template <> struct Simd256_impl<true, true, true, 2> {
+#if defined(__FFLASFFPACK_USE_AVX2)
+ /*
+ * alias to 256 bit simd register
+ */
+ using vect_t = __m256i;
+
+ /*
+ * alias to 256 bit simd register
+ */
+ using half_t = __m128i;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = int16_t;
+
+ /*
+ * Simd128 for scalar_t, to deal half_t
+ */
+ using simdHalf = Simd128<scalar_t>;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 16;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 32;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Converter from vect_t to a tab.
+ * exple:
+ * Converter conv;
+ * conv.v = a;
+ * scalart_t x = conv.t[1]
+ */
+ union Converter {
+ vect_t v;
+ scalar_t t[vect_size];
+ };
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] int16_t
+ */
+ static INLINE CONST vect_t zero() { return _mm256_setzero_si256(); }
+
+ /*
+ * Broadcast 16-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x] int16_t
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm256_set1_epi16(x); }
+
+ /*
+ * Broadcast 16-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15] int16_t
+ */
+ static INLINE CONST vect_t set(const scalar_t x0, const scalar_t x1, const scalar_t x2, const scalar_t x3,
+ const scalar_t x4, const scalar_t x5, const scalar_t x6, const scalar_t x7,
+ const scalar_t x8, const scalar_t x9, const scalar_t x10, const scalar_t x11,
+ const scalar_t x12, const scalar_t x13, const scalar_t x14, const scalar_t x15) {
+ return _mm256_set_epi16(x15, x14, x13, x12, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0);
+ }
+
+ /*
+ * Gather 16-bit integer elements with indexes idx[0], ..., idx[15] from the address p in vect_t.
+ * Return [p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]],
+ p[idx[4]], p[idx[5]], p[idx[6]], p[idx[7]],
+ p[idx[8]], p[idx[9]], p[idx[10]], p[idx[11]],
+ p[idx[12]], p[idx[13]], p[idx[14]], p[idx[15]]] int16_t
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ return set(p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]], p[idx[4]], p[idx[5]], p[idx[6]], p[idx[7]], p[idx[8]],
+ p[idx[9]], p[idx[10]], p[idx[11]], p[idx[12]], p[idx[13]], p[idx[14]], p[idx[15]]);
+ }
+
+ /*
+ * Load 256-bits of integer data from memory into dst.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7],p[8],p[9],p[10],p[11]p[12],p[13],p[14],p[15]] int16_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm256_load_si256(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 256-bits of integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7],p[8],p[9],p[10],p[11]p[12],p[13],p[14],p[15]] int16_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm256_loadu_si256(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm256_store_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, vect_t v) {
+ _mm256_storeu_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory using a non-temporal memory hint.
+ * p must be aligned on a 32-byte boundary or a general-protection exception may be generated.
+ */
+ static INLINE void stream(const scalar_t *p, const vect_t v) {
+ _mm256_stream_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Shift packed 16-bit integers in a left by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ * Return : [a0 << s, a1 << s, a2 << s, a3 << s, a4 << s, a5 << s, a6 << s, a7 << s,
+ * a8 << s, a9 << s, a10 << s, a11 << s, a12 << s, a13 << s, a14 << s, a15 << s] int16_t
+ */
+ static INLINE CONST vect_t sll(const vect_t a, const int s) { return _mm256_slli_epi16(a, s); }
+
+ /*
+ * Shift packed 16-bit integers in a right by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ * Return : [a0 >> s, a1 >> s, a2 >> s, a3 >> s, a4 >> s, a5 >> s, a6 >> s, a7 >> s,
+ * a8 >> s, a9 >> s, a10 >> s, a11 >> s, a12 >> s, a13 >> s, a14 >> s, a15 >> s] int16_t
+ */
+ static INLINE CONST vect_t srl(const vect_t a, const int s) { return _mm256_srli_epi16(a, s); }
+
+
+ static INLINE CONST vect_t sra(const vect_t a, const int s) { return _mm256_sra_epi16(a, Simd128<int>::set1(s)); }
+
+ /*
+ * Add packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [a0+b0, a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7,
+ a8+b8, a9+b9, a10+b10, a11+b11, a12+b12, a13+b13, a14+b14, a15+b15] int16_t
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm256_add_epi16(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [a0-b0, a1-b1, a2-b2, a3-b3, a4-b4, a5-b5, a6-b6, a7-b7,
+ a8-b8, a9-b9, a10-b10, a11-b11, a12-b12, a13-b13, a14-b14, a15-b15] int16_t
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm256_sub_epi16(a, b); }
+
+ static INLINE CONST vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits
+ of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [a0*b0 mod 2^16-1, a1*b1 mod 2^16-1, a2*b2 mod 2^16-1, a3*b3 mod 2^16-1,
+ a4*b4 mod 2^16-1, a5*b5 mod 2^16-1, a6*b6 mod 2^16-1, a7*b7 mod 2^16-1,
+ a8*b8 mod 2^16-1, a9*b9 mod 2^16-1, a10*b10 mod 2^16-1, a11*b11 mod 2^16-1,
+ a12*b12 mod 2^16-1, a13*b13 mod 2^16-1, a14-b14 mod 2^16-1, a15*b15 mod 2^16-1] int16_t
+ */
+ static INLINE CONST vect_t mullo(const vect_t a, const vect_t b) { return _mm256_mullo_epi16(a, b); }
+
+ /*
+ * Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits
+ of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [a0*b0 mod 2^16-1, a1*b1 mod 2^16-1, a2*b2 mod 2^16-1, a3*b3 mod 2^16-1,
+ a4*b4 mod 2^16-1, a5*b5 mod 2^16-1, a6*b6 mod 2^16-1, a7*b7 mod 2^16-1,
+ a8*b8 mod 2^16-1, a9*b9 mod 2^16-1, a10*b10 mod 2^16-1, a11*b11 mod 2^16-1,
+ a12*b12 mod 2^16-1, a13*b13 mod 2^16-1, a14-b14 mod 2^16-1, a15*b15 mod 2^16-1] int16_t
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return mullo(a, b); }
+
+ /*
+ * Multiply packed 16-bit integers in a and b, producing intermediate 32-bit integers, and add the low 16-bits of
+ the intermediate with c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ [c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15] int16_t
+ * Return : [(a0*b0 mod 2^16-1)+c0, (a1*b1 mod 2^16-1)+c1, (a2*b2 mod 2^16-1)+c2, (a3*b3 mod 2^16-1)+c3,
+ (a4*b4 mod 2^16-1)+c4, (a5*b5 mod 2^16-1)+c5, (a6*b6 mod 2^16-1)+c6, (a7*b7 mod 2^16-1)+c7,
+ (a8*b8 mod 2^16-1)+c8, (a9*b9 mod 2^16-1)+c9, (a10*b10 mod 2^16-1)+c10, (a11*b11 mod 2^16-1)+c11,
+ (a12*b12 mod 2^16-1)+c12, (a13*b13 mod 2^16-1)+c13, (a14*b14 mod 2^16-1)+c14, (a15*b15 mod 2^16-1)+c15]
+ */
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) { return add(c, mul(a, b)); }
+
+ static INLINE CONST vect_t fmaddin(vect_t c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ /*
+ * Multiply packed 16-bit integers in a and b, producing intermediate 32-bit integers, and substract elements of c
+ to the low 16-bit of the intermiate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ [c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15] int16_t
+ * Return : [-(a0*b0 mod 2^16-1)+c0, -(a1*b1 mod 2^16-1)+c1, -(a2*b2 mod 2^16-1)+c2, -(a3*b3 mod 2^16-1)+c3,
+ -(a4*b4 mod 2^16-1)+c4, -(a5*b5 mod 2^16-1)+c5, -(a6*b6 mod 2^16-1)+c6, -(a7*b7 mod 2^16-1)+c7,
+ -(a8*b8 mod 2^16-1)+c8, -(a9*b9 mod 2^16-1)+c9, -(a10*b10 mod 2^16-1)+c10, -(a11*b11 mod 2^16-1)+c11,
+ -(a12*b12 mod 2^16-1)+c12, -(a13*b13 mod 2^16-1)+c13, -(a14*b14 mod 2^16-1)+c14, -(a15*b15 mod 2^16-1)+c15]
+ */
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mul(a, b)); }
+
+ static INLINE CONST vect_t fnmaddin(vect_t c, const vect_t a, const vect_t b) { return c = fnmadd(c, a, b); }
+
+ /*
+ * Multiply packed 16-bit integers in a and b, producing intermediate 32-bit integers, and substract the low 16-bits
+ of the intermediate with c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ [c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15] int16_t
+ * Return : [(a0*b0 mod 2^16-1)-c0, (a1*b1 mod 2^16-1)-c1, (a2*b2 mod 2^16-1)-c2, (a3*b3 mod 2^16-1)-c3,
+ (a4*b4 mod 2^16-1)-c4, (a5*b5 mod 2^16-1)-c5, (a6*b6 mod 2^16-1)-c6, (a7*b7 mod 2^16-1)-c7,
+ (a8*b8 mod 2^16-1)-c8, (a9*b9 mod 2^16-1)-c9, (a10*b10 mod 2^16-1)-c10, (a11*b11 mod 2^16-1)-c11,
+ (a12*b12 mod 2^16-1)-c12, (a13*b13 mod 2^16-1)-c13, (a14*b14 mod 2^16-1)-c14, (a15*b15 mod 2^16-1)-c15]
+ */
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mul(a, b)); }
+
+ static INLINE CONST vect_t fsubin(vect_t c, const vect_t a, const vect_t b) { return c = fmsub(c, a, b); }
+
+ /*
+ * Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return :
+ */
+ static INLINE CONST vect_t mulhi(const vect_t a, const vect_t b) { return _mm256_mulhi_epi16(a, b); }
+
+ /*
+ * Multiply the low 8-bit integers from each packed 16-bit element in a and b, and store the signed 16-bit results
+ in dst.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3, a4*b4, a5*b5, a6*b6, a7*b7, a8*b8, a9*b9, a10*b10, a11*b11, a12*b12,
+ a13*b13, a14*b14, a15*b15] int16_t
+ */
+ static INLINE CONST vect_t mulx(vect_t a, vect_t b) {
+ vect_t mask = set1(0x00FF);
+ a = vand(a, mask);
+ b = vand(b, mask);
+ return mullo(a, b);
+ }
+
+ /*
+ * Compare packed 16-bits in a and b for equality, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [(a0==b0) ? 0xFFFF : 0, (a1==b1) ? 0xFFFF : 0,
+ (a2==b2) ? 0xFFFF : 0, (a3==b3) ? 0xFFFF : 0,
+ (a4==b4) ? 0xFFFF : 0, (a5==b5) ? 0xFFFF : 0,
+ (a6==b6) ? 0xFFFF : 0, (a7==b7) ? 0xFFFF : 0,
+ (a8==b8) ? 0xFFFF : 0, (a9==b9) ? 0xFFFF : 0,
+ (a10==b10) ? 0xFFFF : 0, (a11==b11) ? 0xFFFF : 0,
+ (a12==b12) ? 0xFFFF : 0, (a13==b13) ? 0xFFFF : 0,
+ (a14==b14) ? 0xFFFF : 0, (a15==b15) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm256_cmpeq_epi16(a, b); }
+
+ /*
+ * Compare packed 16-bits in a and b for greater-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [(a0>b0) ? 0xFFFF : 0, (a1>b1) ? 0xFFFF : 0,
+ (a2>b2) ? 0xFFFF : 0, (a3>b3) ? 0xFFFF : 0,
+ (a4>b4) ? 0xFFFF : 0, (a5>b5) ? 0xFFFF : 0,
+ (a6>b6) ? 0xFFFF : 0, (a7>b7) ? 0xFFFF : 0,
+ (a8>b8) ? 0xFFFF : 0, (a9>b9) ? 0xFFFF : 0,
+ (a10>b10) ? 0xFFFF : 0, (a11>b11) ? 0xFFFF : 0,
+ (a12>b12) ? 0xFFFF : 0, (a13>b13) ? 0xFFFF : 0,
+ (a14>b14) ? 0xFFFF : 0, (a15>b15) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm256_cmpgt_epi16(a, b); }
+
+ /*
+ * Compare packed 16-bits in a and b for lesser-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [(a0<b0) ? 0xFFFF : 0, (a1<b1) ? 0xFFFF : 0,
+ (a2<b2) ? 0xFFFF : 0, (a3<b3) ? 0xFFFF : 0,
+ (a4<b4) ? 0xFFFF : 0, (a5<b5) ? 0xFFFF : 0,
+ (a6<b6) ? 0xFFFF : 0, (a7<b7) ? 0xFFFF : 0,
+ (a8<b8) ? 0xFFFF : 0, (a9<b9) ? 0xFFFF : 0,
+ (a10<b10) ? 0xFFFF : 0, (a11<b11) ? 0xFFFF : 0,
+ (a12<b12) ? 0xFFFF : 0, (a13<b13) ? 0xFFFF : 0,
+ (a14<b14) ? 0xFFFF : 0, (a15>b15) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm256_cmpgt_epi16(b, a); }
+
+ /*
+ * Compare packed 16-bits in a and b for greater or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [(a0>=b0) ? 0xFFFF : 0, (a1>=b1) ? 0xFFFF : 0,
+ (a2>=b2) ? 0xFFFF : 0, (a3>=b3) ? 0xFFFF : 0,
+ (a4>=b4) ? 0xFFFF : 0, (a5>=b5) ? 0xFFFF : 0,
+ (a6>=b6) ? 0xFFFF : 0, (a7>=b7) ? 0xFFFF : 0,
+ (a8>=b8) ? 0xFFFF : 0, (a9>=b9) ? 0xFFFF : 0,
+ (a10>=b10) ? 0xFFFF : 0, (a11>=b11) ? 0xFFFF : 0,
+ (a12>=b12) ? 0xFFFF : 0, (a13>=b13) ? 0xFFFF : 0,
+ (a14>=b14) ? 0xFFFF : 0, (a15>=b15) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ /*
+ * Compare packed 16-bits in a and b for lesser or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ * Return : [(a0<=b0) ? 0xFFFF : 0, (a1<=b1) ? 0xFFFF : 0,
+ (a2<=b2) ? 0xFFFF : 0, (a3<=b3) ? 0xFFFF : 0,
+ (a4<=b4) ? 0xFFFF : 0, (a5<=b5) ? 0xFFFF : 0,
+ (a6<=b6) ? 0xFFFF : 0, (a7<=b7) ? 0xFFFF : 0,
+ (a8<=b8) ? 0xFFFF : 0, (a9<=b9) ? 0xFFFF : 0,
+ (a10<=b10) ? 0xFFFF : 0, (a11<=b11) ? 0xFFFF : 0,
+ (a12<=b12) ? 0xFFFF : 0, (a13<=b13) ? 0xFFFF : 0,
+ (a14<=b14) ? 0xFFFF : 0, (a15<=b15) ? 0xFFFF : 0] int16_t
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+
+ /*
+ * Compute the bitwise AND of packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15]
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3, a4 AND b4, a5 AND b5, a6 AND b6, a7 AND b7,
+ a8 AND b8, a9 AND b9, a10 AND b10, a11 AND b11, a12 AND b12, a13 AND b13, a14 AND b14, a15 AND b15]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm256_and_si256(b, a); }
+
+ /*
+ * Compute the bitwise OR of packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15]
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3, a4 OR b4, a5 OR b5, a6 OR b6, a7 OR b7,
+ a8 OR b8, a9 OR b9, a10 OR b10, a11 OR b11, a12 OR b12, a13 OR b13, a14 OR b14, a15 OR b15]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm256_or_si256(b, a); }
+
+ /*
+ * Compute the bitwise XOR of packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15]
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3, a4 XOR b4, a5 XOR b5, a6 XOR b6, a7 XOR b7,
+ a8 XOR b8, a9 XOR b9, a10 XOR b10, a11 XOR b11, a12 XOR b12, a13 XOR b13, a14 XOR b14, a15 XOR b15]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm256_xor_si256(b, a); }
+
+ /*
+ * Compute the bitwise AND NOT of packed 16-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15]
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15]
+ * Return : [a0 ANDNOT b0, a1 ANDNOT b1, a2 ANDNOT b2, a3 ANDNOT b3, a4 ANDNOT b4, a5 ANDNOT b5, a6 ANDNOT b6, a7
+ ANDNOT b7,
+ a8 ANDNOT b8, a9 ANDNOT b9, a10 ANDNOT b10, a11 ANDNOT b11, a12 ANDNOT b12, a13 ANDNOT b13, a14 ANDNOT b14, a15
+ ANDNOT b15]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm256_andnot_si256(b, a); }
+
+ /*
+ * Horizontally add 16-bits elements of a.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15]
+ * Return : a0+a1+a2+a3+a4+a5+a6+a7+a8+a9+a10+a11+a12+a13+a14+a15
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ Converter ca;
+ ca.v = a;
+ return ca.t[0] + ca.t[1] + ca.t[2] + ca.t[3] + ca.t[4] + ca.t[5] + ca.t[6] + ca.t[7] + ca.t[8] + ca.t[9] +
+ ca.t[10] + ca.t[11] + ca.t[12] + ca.t[13] + ca.t[14] + ca.t[15];
+ }
+
+ static INLINE PURE half_t load_half(const scalar_t *const p) {
+ return _mm_load_si128(reinterpret_cast<const half_t *>(p));
+ }
+
+ static INLINE PURE half_t loadu_half(const scalar_t *const p) {
+ return _mm_loadu_si128(reinterpret_cast<const half_t *>(p));
+ }
+
+ static INLINE void store_half(const scalar_t *p, half_t v) {
+ _mm_store_si128(reinterpret_cast<half_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ static INLINE void storeu_half(const scalar_t *p, half_t v) {
+ _mm_storeu_si128(reinterpret_cast<half_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ *
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15] int16_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15] int16_t
+ [c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15] int16_t
+ * Return : [a0*b0+c0, a1*b1+c1, a2*b2+c2, a3*b3+c3, a4*b4+c4, a5*b5+c5, a6*b6+c6, a7*b7+c7, a8*b8+c8, a9*b9+c9,
+ a10*b10+c10, a11*b11+c11, a12*b12+c12, a13*b13+c13, a14*b14+c14, a15*b15+c15] int16_t
+ */
+ static INLINE CONST vect_t fmaddx(const vect_t c, const vect_t a, const vect_t b) { return add(c, mulx(a, b)); }
+
+ static INLINE vect_t fmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fmaddx(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddx(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mulx(a, b)); }
+
+ static INLINE vect_t fnmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmaddx(c, a, b); }
+
+ static INLINE CONST vect_t round(const vect_t a) { return a; }
+
+ static INLINE CONST vect_t signbits(const vect_t x) {
+ vect_t signBits = sub(zero(), srl(x, 4*sizeof(scalar_t)-1));
+ return signBits;
+ }
+
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const vect_t &INVP, const vect_t &NEGP, const vect_t &MIN,
+ const vect_t &MAX, vect_t &Q, vect_t &T) {
+#ifdef __INTEL_COMPILER
+ C = _mm256_rem_epi16(C, P);
+#else
+ FFLASFFPACK_abort("pas implementé");
+#endif
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+ return C;
+ }
+
+#else
+
+#error "You need AVX2 instructions to perform 256bits operations on int16_t"
+
+#endif // defined(__FFLASFFPACK_USE_AVX2)
+};
+
+// uint16_t
+template <> struct Simd256_impl<true, true, false, 2> : public Simd256_impl<true, true, true, 2> {
+ using scalar_t = uint16_t;
+
+#if defined(__FFLASFFPACK_USE_AVX2)
+
+ static INLINE CONST vect_t greater(vect_t a, vect_t b) {
+
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm256_cmpgt_epi16(a, b);
+ }
+
+ static INLINE CONST vect_t lesser(vect_t a, vect_t b) {
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm256_cmpgt_epi16(a, b);
+ }
+
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+#else
+
+#error "You need AVX2 instructions to perform 256bits operations on uint16_t"
+
+#endif // defined(__FFLASFFPACK_USE_AVX2)
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd256_int16_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_int32.inl b/fflas-ffpack/fflas/fflas_simd/simd256_int32.inl
new file mode 100644
index 0000000..ffe0b7e
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd256_int32.inl
@@ -0,0 +1,484 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd256_int32_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd256_int32_INL
+
+/*
+ * Simd256 specialized for int32_t
+ */
+template <> struct Simd256_impl<true, true, true, 4> {
+#if defined(__FFLASFFPACK_USE_AVX2)
+ /*
+ * alias to 256 bit simd register
+ */
+ using vect_t = __m256i;
+
+ /*
+ * alias to 256 bit simd register
+ */
+ using half_t = __m128i;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = int32_t;
+
+ /*
+ * Simd128 for scalar_t, to deal half_t
+ */
+ using simdHalf = Simd128<scalar_t>;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 8;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 32;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Converter from vect_t to a tab.
+ * exple:
+ * Converter conv;
+ * conv.v = a;
+ * scalart_t x = conv.t[1]
+ */
+ union Converter {
+ vect_t v;
+ scalar_t t[vect_size];
+ };
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0,0,0,0,0,0,0] int32_t
+ */
+ static INLINE CONST vect_t zero() { return _mm256_setzero_si256(); }
+
+ /*
+ * Broadcast 32-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x,x,x,x,x,x,x,x] int32_t
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm256_set1_epi32(x); }
+
+ /*
+ * Broadcast 32-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x0,x1,x2,x3,x4,x5,x6,x7] int32_t
+ */
+ static INLINE CONST vect_t set(const scalar_t x0, const scalar_t x1, const scalar_t x2, const scalar_t x3,
+ const scalar_t x4, const scalar_t x5, const scalar_t x6, const scalar_t x7) {
+ return _mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0);
+ }
+
+ /*
+ * Gather 32-bit integer elements with indexes idx[0], ..., idx[7] from the address p in vect_t.
+ * Return [p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]],
+ p[idx[4]], p[idx[5]], p[idx[6]], p[idx[7]]] int32_t
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ return set(p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]], p[idx[4]], p[idx[5]], p[idx[6]], p[idx[7]]);
+ }
+
+ /*
+ * Load 256-bits of integer data from memory into dst.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int32_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm256_load_si256(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 256-bits of integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1],p[2],p[3],p[4],p[5],p[6],p[7]] int32_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm256_loadu_si256(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm256_store_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, vect_t v) {
+ _mm256_storeu_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory using a non-temporal memory hint.
+ * p must be aligned on a 32-byte boundary or a general-protection exception may be generated.
+ */
+ static INLINE void stream(const scalar_t *p, const vect_t v) {
+ _mm256_stream_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+
+ /*
+ * Shift packed 32-bit integers in a left by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ * Return : [a0 << s, a1 << s, a2 << s, a3 << s, a4 << s, a5 << s, a6 << s, a7 << s] int32_t
+ */
+ static INLINE CONST vect_t sll(const vect_t a, const int s) { return _mm256_slli_epi32(a, s); }
+
+ /*
+ * Shift packed 32-bit integers in a right by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ * Return : [a0 >> s, a1 >> s, a2 >> s, a3 >> s, a4 >> s, a5 >> s, a6 >> s, a7 >> s] int32_t
+ */
+ static INLINE CONST vect_t srl(const vect_t a, const int s) { return _mm256_srli_epi32(a, s); }
+
+
+ static INLINE CONST vect_t sra(const vect_t a, const int s) { return _mm256_sra_epi32(a, Simd128<int>::set1(s)); }
+
+ /*
+ * Add packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [a0+b0, a1+b1, a2+b2, a3+b3, a4+b4, a5+b5, a6+b6, a7+b7] int32_t
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm256_add_epi32(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed 32-bits integers in b from packed 32-bits integers in a, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [a0-b0, a1-b1, a2-b2, a3-b3, a4-b4, a5-b5, a6-b6, a7-b7] int32_t
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm256_sub_epi32(a, b); }
+
+ static INLINE vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Multiply the packed 32-bits integers in a and b, producing intermediate 64-bit integers, and store the low 32
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7, a8] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7, b8] int32_t
+ * Return : [a0*b0 mod 2^32-1, a1*b1 mod 2^32-1, a2*b2 mod 2^32-1, a3*b3 mod 2^32-1,
+ a4*b4 mod 2^32-1, a5*b5 mod 2^32-1, a6*b6 mod 2^32-1, a7*b7 mod 2^32-1] int32_t
+ */
+ static INLINE CONST vect_t mullo(const vect_t a, const vect_t b) { return _mm256_mullo_epi32(a, b); }
+
+ /*
+ * Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 16 bits
+ of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [a0*b0 mod 2^32-1, a1*b1 mod 2^32-1, a2*b2 mod 2^32-1, a3*b3 mod 2^32-1,
+ a4*b4 mod 2^32-1, a5*b5 mod 2^32-1, a6*b6 mod 2^32-1, a7*b7 mod 2^32-1] int32_t
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return mullo(a, b); }
+
+ /*
+ * Multiply packed 32-bit integers in a and b, producing intermediate 64-bit integers, and add the low 32-bits of
+ the intermediate with c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ [c0, c1, c2, c3, c4, c5, c6, c7] int32_t
+ * Return : [(a0*b0 mod 2^32-1)+c0, (a1*b1 mod 2^32-1)+c1, (a2*b2 mod 2^32-1)+c2, (a3*b3 mod 2^32-1)+c3,
+ (a4*b4 mod 2^32-1)+c4, (a5*b5 mod 2^32-1)+c5, (a6*b6 mod 2^32-1)+c6, (a7*b7 mod 2^32-1)+c7]
+ */
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) { return add(c, mul(a, b)); }
+
+ static INLINE CONST vect_t fmaddin(vect_t c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ /*
+ * Multiply packed 32-bit integers in a and b, producing intermediate 64-bit integers, and substract elements of c
+ to the low 32-bit of the intermiate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ [c0, c1, c2, c3, c4, c5, c6, c7] int32_t
+ * Return : [-(a0*b0 mod 2^32-1)+c0, -(a1*b1 mod 2^32-1)+c1, -(a2*b2 mod 2^32-1)+c2, -(a3*b3 mod 2^32-1)+c3,
+ -(a4*b4 mod 2^32-1)+c4, -(a5*b5 mod 2^32-1)+c5, -(a6*b6 mod 2^32-1)+c6, -(a7*b7 mod 2^32-1)+c7]
+ */
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mul(a, b)); }
+
+ static INLINE CONST vect_t fnmaddin(vect_t c, const vect_t a, const vect_t b) { return c = fnmadd(c, a, b); }
+
+ /*
+ * Multiply packed 32-bit integers in a and b, producing intermediate 64-bit integers, and substract the low 32-bits
+ of the intermediate with c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ [c0, c1, c2, c3, c4, c5, c6, c7] int32_t
+ * Return : [(a0*b0 mod 2^32-1)-c0, (a1*b1 mod 2^32-1)-c1, (a2*b2 mod 2^32-1)-c2, (a3*b3 mod 2^32-1)-c3,
+ (a4*b4 mod 2^32-1)-c4, (a5*b5 mod 2^32-1)-c5, (a6*b6 mod 2^32-1)-c6, (a7*b7 mod 2^32-1)-c7]
+ */
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mul(a, b)); }
+
+ static INLINE CONST vect_t fsubin(vect_t c, const vect_t a, const vect_t b) { return c = fmsub(c, a, b); }
+
+ /*
+ * Multiply the packed 32-bits integers in a and b, producing intermediate 64-bit integers, and store the high 32
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return :
+ */
+ static INLINE CONST vect_t mulhi(const vect_t a, const vect_t b) {
+ Converter ca, cb;
+ ca.v = a;
+ cb.v = b;
+ vect_t a1, a2, b1, b2, c1, c2;
+ a1 = set(0, ca.t[0], 0, ca.t[1], 0, ca.t[2], 0, ca.t[3]);
+ a2 = set(0, ca.t[4], 0, ca.t[5], 0, ca.t[6], 0, ca.t[7]);
+ b1 = set(0, cb.t[0], 0, cb.t[1], 0, cb.t[2], 0, cb.t[3]);
+ b2 = set(0, cb.t[4], 0, cb.t[5], 0, cb.t[6], 0, cb.t[7]);
+ c1 = mulx(a1, b1);
+ c2 = mulx(a2, b2);
+ ca.v = c1;
+ cb.v = c2;
+ return set(ca.t[0], ca.t[2], ca.t[4], ca.t[6], cb.t[0], cb.t[2], cb.t[4], cb.t[6]);
+ }
+
+ /*
+ * Multiply the low 16-bit integers from each packed 32-bit element in a and b, and store the signed 32-bit results
+ in dst.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3, a4*b4, a5*b5, a6*b6, a7*b7] int32_t
+ */
+ static INLINE CONST vect_t mulx(vect_t a, vect_t b) {
+ vect_t mask = set1(0x0000FFFF);
+ a = vand(a, mask);
+ b = vand(b, mask);
+ return mullo(a, b);
+ }
+
+ /*
+ * Compare packed 32-bits in a and b for equality, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0==b0) ? 0xFFFF : 0, (a1==b1) ? 0xFFFF : 0,
+ (a2==b2) ? 0xFFFF : 0, (a3==b3) ? 0xFFFF : 0,
+ (a4==b4) ? 0xFFFF : 0, (a5==b5) ? 0xFFFF : 0,
+ (a6==b6) ? 0xFFFF : 0, (a7==b7) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm256_cmpeq_epi32(a, b); }
+
+ /*
+ * Compare packed 32-bits in a and b for greater-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0>b0) ? 0xFFFF : 0, (a1>b1) ? 0xFFFF : 0,
+ (a2>b2) ? 0xFFFF : 0, (a3>b3) ? 0xFFFF : 0,
+ (a4>b4) ? 0xFFFF : 0, (a5>b5) ? 0xFFFF : 0,
+ (a6>b6) ? 0xFFFF : 0, (a7>b7) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm256_cmpgt_epi32(a, b); }
+
+ /*
+ * Compare packed 32-bits in a and b for lesser-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0<b0) ? 0xFFFF : 0, (a1<b1) ? 0xFFFF : 0,
+ (a2<b2) ? 0xFFFF : 0, (a3<b3) ? 0xFFFF : 0,
+ (a4<b4) ? 0xFFFF : 0, (a5<b5) ? 0xFFFF : 0,
+ (a6<b6) ? 0xFFFF : 0, (a7<b7) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm256_cmpgt_epi32(b, a); }
+
+ /*
+ * Compare packed 32-bits in a and b for greater or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0>=b0) ? 0xFFFF : 0, (a1>=b1) ? 0xFFFF : 0,
+ (a2>=b2) ? 0xFFFF : 0, (a3>=b3) ? 0xFFFF : 0,
+ (a4>=b4) ? 0xFFFF : 0, (a5>=b5) ? 0xFFFF : 0,
+ (a6>=b6) ? 0xFFFF : 0, (a7>=b7) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ /*
+ * Compare packed 32-bits in a and b for lesser or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0<=b0) ? 0xFFFF : 0, (a1<=b1) ? 0xFFFF : 0,
+ (a2<=b2) ? 0xFFFF : 0, (a3<=b3) ? 0xFFFF : 0,
+ (a4<=b4) ? 0xFFFF : 0, (a5<=b5) ? 0xFFFF : 0,
+ (a6<=b6) ? 0xFFFF : 0, (a7<=b7) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+
+ /*
+ * Compute the bitwise AND of packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3, a4 AND b4, a5 AND b5, a6 AND b6, a7 AND b7]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm256_and_si256(b, a); }
+
+ /*
+ * Compute the bitwise OR of packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3, a4 OR b4, a5 OR b5, a6 OR b6, a7 OR b7]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm256_or_si256(b, a); }
+
+ /*
+ * Compute the bitwise XOR of packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3, a4 XOR b4, a5 XOR b5, a6 XOR b6, a7 XOR b7]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm256_xor_si256(b, a); }
+
+ /*
+ * Compute the bitwise AND NOT of packed 32-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ [b0, b1, b2, b3, b4, b5, b6, b7]
+ * Return : [a0 ANDNOT b0, a1 ANDNOT b1, a2 ANDNOT b2, a3 ANDNOT b3, a4 ANDNOT b4, a5 ANDNOT b5, a6 ANDNOT b6, a7
+ ANDNOT b7]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm256_andnot_si256(b, a); }
+
+ /*
+ * Horizontally add 32-bits elements of a.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7]
+ * Return : a0+a1+a2+a3+a4+a5+a6+a7
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ Converter ca;
+ ca.v = a;
+ return ca.t[0] + ca.t[1] + ca.t[2] + ca.t[3] + ca.t[4] + ca.t[5] + ca.t[6] + ca.t[7];
+ }
+
+ static INLINE PURE half_t load_half(const scalar_t *const p) {
+ return _mm_load_si128(reinterpret_cast<const half_t *>(p));
+ }
+
+ static INLINE PURE half_t loadu_half(const scalar_t *const p) {
+ return _mm_loadu_si128(reinterpret_cast<const half_t *>(p));
+ }
+
+ static INLINE void store_half(const scalar_t *p, half_t v) {
+ _mm_store_si128(reinterpret_cast<half_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ static INLINE void storeu_half(const scalar_t *p, half_t v) {
+ _mm_storeu_si128(reinterpret_cast<half_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ *
+ * Args : [0, a1, 0, a3, 0, a5, 0, a7] int32_t
+ [0, b1, 0, b3, 0, b5, 0, b7] int32_t
+ [c0, c1, c2, c3] int64_t
+ * Return : [c0+a1*b1, c1+a3*b2, c2+a5*b5, c3+a7*b7] int64_t
+ */
+ static INLINE CONST vect_t fmaddx(vect_t c, const vect_t a, const vect_t b) { return add(c, mulx(a, b)); }
+ static INLINE vect_t fmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fmaddx(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddx(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mulx(a, b)); }
+
+ static INLINE vect_t fnmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmaddx(c, a, b); }
+
+ static INLINE CONST vect_t round(const vect_t a) { return a; }
+
+ static INLINE CONST vect_t signbits(const vect_t x) {
+ vect_t signBits = sub(zero(), srl(x, 4*sizeof(scalar_t)-1));
+ return signBits;
+ }
+
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const vect_t &INVP, const vect_t &NEGP, const vect_t &MIN,
+ const vect_t &MAX, vect_t &Q, vect_t &T) {
+#ifdef __INTEL_COMPILER
+ C = _mm256_rem_epi32(C, P);
+#else
+ FFLASFFPACK_abort("pas implementé");
+// C = fnmadd(C,_mm256_castps_si128(_mm256_floor_ps(_mm256_mul_ps(INVP,_mm256_castsi128_ps(C)))),P);
+#endif
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+ return C;
+ }
+
+#else
+
+#error "You need AVX2 instructions to perform 256bits operations on int32_t"
+
+#endif // defined(__FFLASFFPACK_USE_AVX2)
+};
+
+// uint16_t
+template <> struct Simd256_impl<true, true, false, 4> : public Simd256_impl<true, true, true, 4> {
+#if defined(__FFLASFFPACK_USE_AVX2)
+
+ using scalar_t = uint32_t;
+
+ static INLINE CONST vect_t greater(vect_t a, vect_t b) {
+
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm256_cmpgt_epi32(a, b);
+ }
+
+ static INLINE CONST vect_t lesser(vect_t a, vect_t b) {
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm256_cmpgt_epi32(a, b);
+ }
+
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+#else
+
+#error "You need AVX2 instructions to perform 256bits operations on uint32_t"
+
+#endif // defined(__FFLASFFPACK_USE_AVX2)
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd256_int32_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl b/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl
new file mode 100644
index 0000000..6f5b829
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl
@@ -0,0 +1,520 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_ffpack_utils_simd256_int64_INL
+#define __FFLASFFPACK_fflas_ffpack_utils_simd256_int64_INL
+
+/*
+ * Simd256 specialized for int64_t
+ */
+template <> struct Simd256_impl<true, true, true, 8> {
+
+#if defined(__FFLASFFPACK_USE_AVX2)
+ /*
+ * alias to 256 bit simd register
+ */
+ using vect_t = __m256i;
+
+ /*
+ * alias to 256 bit simd register
+ */
+ using half_t = __m128i;
+
+ /*
+ * define the scalar type corresponding to the specialization
+ */
+ using scalar_t = int64_t;
+
+ /*
+ * Simd128 for scalar_t, to deal half_t
+ */
+ using simdHalf = Simd128<scalar_t>;
+
+ /*
+ * number of scalar_t in a simd register
+ */
+ static const constexpr size_t vect_size = 4;
+
+ /*
+ * alignement required by scalar_t pointer to be loaded in a vect_t
+ */
+ static const constexpr size_t alignment = 32;
+
+ /*
+ * Check if the pointer p is a multiple of alignemnt
+ */
+ template <class T> static constexpr bool valid(T *p) { return (int64_t)p % alignment == 0; }
+
+ /*
+ * Check if the number n is a multiple of vect_size
+ */
+ template <class T> static constexpr bool compliant(T n) { return n % vect_size == 0; }
+
+ /*
+ * Converter from vect_t to a tab.
+ * exple:
+ * Converter conv;
+ * conv.v = a;
+ * scalar_t x = conv.t[i]
+ */
+ union Converter {
+ vect_t v;
+ scalar_t t[vect_size];
+ };
+
+ /*
+ * Return vector of type vect_t with all elements set to zero
+ * Return [0,0,0,0] int64_t
+ */
+ static INLINE CONST vect_t zero() { return _mm256_setzero_si256(); }
+
+ /*
+ * Broadcast 64-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x,x,x,x] int64_t
+ */
+ static INLINE CONST vect_t set1(const scalar_t x) { return _mm256_set1_epi64x(x); }
+
+ /*
+ * Broadcast 64-bit integer a to all all elements of dst. This intrinsic may generate the vpbroadcastw.
+ * Return [x0,x1,x2,x3] int64_t
+ */
+ static INLINE CONST vect_t set(const scalar_t x0, const scalar_t x1, const scalar_t x2, const scalar_t x3) {
+ return _mm256_set_epi64x(x3, x2, x1, x0);
+ }
+
+ /*
+ * Gather 64-bit integer elements with indexes idx[0], ..., idx[3] from the address p in vect_t.
+ * Return [p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]]] int64_t
+ */
+ template <class T> static INLINE PURE vect_t gather(const scalar_t *const p, const T *const idx) {
+ return set(p[idx[0]], p[idx[1]], p[idx[2]], p[idx[3]]);
+ }
+
+ /*
+ * Load 256-bits of integer data from memory into dst.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ * Return [p[0],p[1],p[2],p[3]] int32_t
+ */
+ static INLINE PURE vect_t load(const scalar_t *const p) {
+ return _mm256_load_si256(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Load 256-bits of integer data from memory into dst.
+ * p does not need to be aligned on any particular boundary.
+ * Return [p[0],p[1],p[2],p[3]] int64_t
+ */
+ static INLINE PURE vect_t loadu(const scalar_t *const p) {
+ return _mm256_loadu_si256(reinterpret_cast<const vect_t *>(p));
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory.
+ * p must be aligned on a 32-byte boundary or a general-protection exception will be generated.
+ */
+ static INLINE void store(const scalar_t *p, vect_t v) {
+ _mm256_store_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory.
+ * p does not need to be aligned on any particular boundary.
+ */
+ static INLINE void storeu(const scalar_t *p, vect_t v) {
+ _mm256_storeu_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Store 256-bits of integer data from a into memory using a non-temporal memory hint.
+ * p must be aligned on a 32-byte boundary or a general-protection exception may be generated.
+ */
+ static INLINE void stream(const scalar_t *p, const vect_t v) {
+ _mm256_stream_si256(reinterpret_cast<vect_t *>(const_cast<scalar_t *>(p)), v);
+ }
+
+ /*
+ * Add packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ * Return : [a0+b0, a1+b1, a2+b2, a3+b3] int64_t
+ */
+ static INLINE CONST vect_t add(const vect_t a, const vect_t b) { return _mm256_add_epi64(a, b); }
+
+ static INLINE vect_t addin(vect_t &a, const vect_t b) { return a = add(a, b); }
+
+ /*
+ * Subtract packed 64-bits integers in b from packed 64-bits integers in a, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ * Return : [a0-b0, a1-b1, a2-b2, a3-b3] int64_t
+ */
+ static INLINE CONST vect_t sub(const vect_t a, const vect_t b) { return _mm256_sub_epi64(a, b); }
+
+ static INLINE vect_t subin(vect_t &a, const vect_t b) { return a = sub(a, b); }
+
+ /*
+ * Shift packed 64-bit integers in a left by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ * Return : [a0 << s, a1 << s, a2 << s, a3 << s] int64_t
+ */
+ static INLINE CONST vect_t sll(const vect_t a, const int s) { return _mm256_slli_epi64(a, s); }
+
+ /*
+ * Shift packed 64-bit integers in a right by s while shifting in zeros, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ * Return : [a0 >> s, a1 >> s, a2 >> s, a3 >> s] int64_t
+ */
+ static INLINE CONST vect_t srl(const vect_t a, const int s) { return _mm256_srli_epi64(a, s); }
+
+ static INLINE CONST vect_t sra(const vect_t a, const int s) {
+#ifdef __AVX512__
+ return _mm256_sra_epi64(a, set1(s));
+#else
+ const int b = 63 - s;
+ vect_t m = sll(set1(1), b);
+ vect_t x = srl(a, s);
+ vect_t result = sub(vxor(x, m), m); // result = x^m - m
+ return result;
+#endif
+ }
+
+ /*
+ * Multiply the packed 64-bits integers in a and b, producing intermediate 128-bit integers, and store the low 64
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ * Return : [a0*b0 mod 2^64-1, a1*b1 mod 2^64-1, a2*b2 mod 2^64-1, a3*b3 mod 2^64-1] int64_t
+ */
+ static INLINE CONST vect_t mullo(vect_t a, vect_t b) {
+//#warning "The simd mullo function is emulate, it may impact the performances."
+ Converter ca, cb;
+ ca.v = a;
+ cb.v = b;
+ return set(ca.t[0] * cb.t[0], ca.t[1] * cb.t[1], ca.t[2] * cb.t[2], ca.t[3] * cb.t[3]);
+ }
+
+ static INLINE CONST vect_t mullox(const vect_t x0, const vect_t x1) { return _mm256_mullo_epi32(x0, x1); }
+
+ /*
+ * Multiply the packed 64-bits integers in a and b, producing intermediate 128-bit integers, and store the low 64
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ * Return : [a0*b0 mod 2^64-1, a1*b1 mod 2^64-1, a2*b2 mod 2^64-1, a3*b3 mod 2^64-1] int64_t
+ */
+ static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return mullo(a, b); }
+
+ /*
+ * Multiply the packed 64-bits integers in a and b, producing intermediate 128-bit integers, and store the high 64
+ bits of the intermediate integers in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ * Return :
+ */
+ static INLINE CONST vect_t mulhi(vect_t a, vect_t b) {
+ // ugly solution, but it works.
+ // tested with gcc, clang, icc
+ Converter ca, cb;
+ ca.v = a;
+ cb.v = b;
+ return set((int128_t(ca.t[0]) * cb.t[0]) >> 64, (int128_t(ca.t[1]) * cb.t[1]) >> 64,
+ (int128_t(ca.t[2]) * cb.t[2]) >> 64, (int128_t(ca.t[3]) * cb.t[3]) >> 64);
+ }
+
+ /*
+ * Multiply packed 64-bit integers in a and b, producing intermediate 128-bit integers, and add the low 64-bits of
+ the intermediate with c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ [c0, c1, c2, c3] int64_t
+ * Return : [(a0*b0 mod 2^64-1)+c0, (a1*b1 mod 2^64-1)+c1, (a2*b2 mod 2^64-1)+c2, (a3*b3 mod 2^64-1)+c3]
+ */
+ static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) { return add(c, mul(a, b)); }
+
+ static INLINE vect_t fmaddin(vect_t &c, const vect_t a, const vect_t b) { return c = fmadd(c, a, b); }
+
+ /*
+ * Multiply packed 64-bit integers in a and b, producing intermediate 128-bit integers, and substract elements of c
+ to the low 64-bit of the intermiate result, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ [c0, c1, c2, c3] int64_t
+ * Return : [-(a0*b0 mod 2^64-1)+c0, -(a1*b1 mod 2^64-1)+c1, -(a2*b2 mod 2^64-1)+c2, -(a3*b3 mod 2^64-1)+c3]
+ */
+ static INLINE CONST vect_t fnmadd(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mul(a, b)); }
+
+ /*
+ * Multiply packed 64-bit integers in a and b, producing intermediate 128-bit integers, and substract the low
+ 64-bits of the intermediate with c, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ [c0, c1, c2, c3] int64_t
+ * Return : [(a0*b0 mod 2^64-1)-c0, (a1*b1 mod 2^64-1)-c1, (a2*b2 mod 2^64-1)-c2, (a3*b3 mod 2^64-1)-c3]
+ */
+ static INLINE CONST vect_t fmsub(const vect_t c, const vect_t a, const vect_t b) { return sub(mul(a, b), c); }
+
+ /*
+ * Multiply the low 32-bits integers from each packed 64-bit element in a and b, and store the signed 64-bit results
+ in dst.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3] int64_t
+ */
+ static INLINE CONST vect_t mulx(const vect_t a, const vect_t b) { return _mm256_mul_epi32(a, b); }
+
+ /*
+ * Multiply the low 32-bits integers from each packed 64-bit element in a and b, and store the unsigned 64-bit
+ results in dst.
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ * Return : [a0*b0, a1*b1, a2*b2, a3*b3] uint64_t
+ */
+ static INLINE CONST vect_t mulux(const vect_t a, const vect_t b) { return _mm256_mul_epu32(a, b); }
+
+ /*
+ * Compare packed 64-bits in a and b for equality, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [(a0==b0) ? 0xFFFF : 0, (a1==b1) ? 0xFFFF : 0,
+ (a2==b2) ? 0xFFFF : 0, (a3==b3) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t eq(const vect_t a, const vect_t b) { return _mm256_cmpeq_epi64(a, b); }
+
+ /*
+ * Compare packed 64-bits in a and b for greater-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [(a0>b0) ? 0xFFFF : 0, (a1>b1) ? 0xFFFF : 0,
+ (a2>b2) ? 0xFFFF : 0, (a3>b3) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t greater(const vect_t a, const vect_t b) { return _mm256_cmpgt_epi64(a, b); }
+
+ /*
+ * Compare packed 64-bits in a and b for lesser-than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [(a0<b0) ? 0xFFFF : 0, (a1<b1) ? 0xFFFF : 0,
+ (a2<b2) ? 0xFFFF : 0, (a3<b3) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t lesser(const vect_t a, const vect_t b) { return _mm256_cmpgt_epi64(b, a); }
+
+ /*
+ * Compare packed 64-bits in a and b for greater or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3] int32_t
+ [b0, b1, b2, b3] int32_t
+ * Return : [(a0>=b0) ? 0xFFFF : 0, (a1>=b1) ? 0xFFFF : 0,
+ (a2>=b2) ? 0xFFFF : 0, (a3>=b3) ? 0xFFFF : 0,
+ (a4>=b4) ? 0xFFFF : 0, (a5>=b5) ? 0xFFFF : 0,
+ (a6>=b6) ? 0xFFFF : 0, (a7>=b7) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ /*
+ * Compare packed 64-bits in a and b for lesser or equal than, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3, a4, a5, a6, a7] int32_t
+ [b0, b1, b2, b3, b4, b5, b6, b7] int32_t
+ * Return : [(a0<=b0) ? 0xFFFF : 0, (a1<=b1) ? 0xFFFF : 0,
+ (a2<=b2) ? 0xFFFF : 0, (a3<=b3) ? 0xFFFF : 0] int32_t
+ */
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+
+ /*
+ * Compute the bitwise AND of packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3]
+ [b0, b1, b2, b3]
+ * Return : [a0 AND b0, a1 AND b1, a2 AND b2, a3 AND b3]
+ */
+ static INLINE CONST vect_t vand(const vect_t a, const vect_t b) { return _mm256_and_si256(b, a); }
+
+ /*
+ * Compute the bitwise OR of packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3]
+ [b0, b1, b2, b3]
+ * Return : [a0 OR b0, a1 OR b1, a2 OR b2, a3 OR b3]
+ */
+ static INLINE CONST vect_t vor(const vect_t a, const vect_t b) { return _mm256_or_si256(b, a); }
+
+ /*
+ * Compute the bitwise XOR of packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3]
+ [b0, b1, b2, b3]
+ * Return : [a0 XOR b0, a1 XOR b1, a2 XOR b2, a3 XOR b3]
+ */
+ static INLINE CONST vect_t vxor(const vect_t a, const vect_t b) { return _mm256_xor_si256(b, a); }
+
+ /*
+ * Compute the bitwise AND NOT of packed 64-bits integer in a and b, and store the results in vect_t.
+ * Args : [a0, a1, a2, a3]
+ [b0, b1, b2, b3]
+ * Return : [a0 ANDNOT b0, a1 ANDNOT b1, a2 ANDNOT b2, a3 ANDNOT b3]
+ */
+ static INLINE CONST vect_t vandnot(const vect_t a, const vect_t b) { return _mm256_andnot_si256(b, a); }
+
+ /*
+ * Horizontally add 64-bits elements of a.
+ * Args : [a0, a1, a2, a3]
+ * Return : a0+a1+a2+a3
+ */
+ static INLINE CONST scalar_t hadd_to_scal(const vect_t a) {
+ Converter ca;
+ ca.v = a;
+ return ca.t[0] + ca.t[1] + ca.t[2] + ca.t[3];
+ }
+
+ /*
+ *
+ * Args : [a0, a1, a2, a3] int64_t
+ [b0, b1, b2, b3] int64_t
+ [c0, c1, c2, c3] int64_t
+ * Return : [c0+a1*b1, c1+a3*b2, c2+a5*b5, c3+a7*b7] int64_t
+ */
+
+ static INLINE CONST vect_t fmaddx(const vect_t c, const vect_t a, const vect_t b) { return add(c, mulx(a, b)); }
+
+ static INLINE vect_t fmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fmaddx(c, a, b); }
+
+ static INLINE CONST vect_t fnmaddx(const vect_t c, const vect_t a, const vect_t b) { return sub(c, mulx(a, b)); }
+
+ static INLINE vect_t fnmaddxin(vect_t &c, const vect_t a, const vect_t b) { return c = fnmaddx(c, a, b); }
+
+ static INLINE CONST vect_t round(const vect_t a) { return a; }
+
+ // mask the high 32 bits of a 64 bits, that is 00000000FFFFFFFF
+ static INLINE CONST vect_t mask_high() { return srl(_mm256_set1_epi8(-1), 32); }
+
+ static INLINE CONST vect_t signbits(const vect_t x) {
+ vect_t signBits = sub(zero(), srl(x, 4*sizeof(scalar_t)-1));
+ return signBits;
+ }
+
+ // warning : may be off by 1 multiple, but we save a mul...
+ static INLINE CONST vect_t mulhi_fast(vect_t x, vect_t y) {
+ // unsigned mulhi starts:
+ // x1 = xy_high = mulhiu_fast(x,y)
+ const vect_t mask = mask_high();
+
+ vect_t x0 = vand(x, mask), x1 = srl(x, 32);
+ vect_t y0 = vand(y, mask), y1 = srl(y, 32);
+
+ x0 = mulux(x0, y1); // x0y1
+ y0 = mulux(x1, y0); // x1y0
+ y1 = mulux(x1, y1); // x1y1
+
+ x1 = vand(y0, mask);
+ y0 = srl(y0, 32); // x1y0_lo = x1 // y1yo_hi = y0
+ x1 = srl(add(x1, x0), 32);
+ y0 = add(y1, y0);
+
+ x1 = add(x1, y0);
+ // unsigned mulhi ends
+
+ // fixing signs
+ x0 = vand(signbits(x), y);
+ x1 = sub(x1, x0);
+ x0 = vand(signbits(y), x);
+ x1 = sub(x1, x0);
+ // end fixing
+ return x1;
+ }
+
+ template <bool overflow, bool poweroftwo>
+ static INLINE vect_t mod(vect_t &C, const vect_t &P, const int8_t &shifter, const vect_t &magic, const vect_t &NEGP,
+ const vect_t &MIN, const vect_t &MAX, vect_t &Q, vect_t &T) {
+#ifdef __INTEL_COMPILER
+ // Works fine with ICC 15.0.1 - A.B.
+ C = _mm256_rem_epi64(C, P);
+#else
+ if (poweroftwo) {
+ Q = srl(C, 63);
+ vect_t un = set1(1);
+ T = sub(sll(un, shifter), un);
+ Q = add(C, vand(Q, T));
+ Q = sll(srl(Q, shifter), shifter);
+ C = sub(C, Q);
+ Q = vand(greater(zero(), Q), P);
+ C = add(C, Q);
+ } else {
+ Q = mulhi_fast(C, magic);
+ if (overflow) {
+ Q = add(Q, C);
+ }
+ Q = sra(Q, shifter);
+ vect_t q1 = mulux(Q, P);
+ vect_t q2 = sll(mulux(srl(Q, 32), P), 32);
+ C = sub(C, add(q1, q2));
+ T = greater_eq(C, P);
+ C = sub(C, vand(T, P));
+ }
+#endif
+ NORML_MOD(C, P, NEGP, MIN, MAX, Q, T);
+ return C;
+ }
+
+#else
+
+#error "You need AVX2 instructions to perform 256bits operations on int64_t"
+
+#endif // defined(__FFLASFFPACK_USE_AVX2)
+};
+
+// uint64_t
+template <> struct Simd256_impl<true, true, false, 8> : public Simd256_impl<true, true, true, 8> {
+ using scalar_t = uint64_t;
+
+#if defined(__FFLASFFPACK_USE_AVX2)
+
+ static INLINE CONST vect_t greater(vect_t a, vect_t b) {
+
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm256_cmpgt_epi64(a, b);
+ }
+
+ static INLINE CONST vect_t lesser(vect_t a, vect_t b) {
+ vect_t x;
+ x = set1(-(static_cast<scalar_t>(1) << (sizeof(scalar_t) * 8 - 1)));
+ a = sub(x, a);
+ b = sub(x, b);
+ return _mm256_cmpgt_epi64(a, b);
+ }
+
+ static INLINE CONST vect_t greater_eq(const vect_t a, const vect_t b) { return vor(greater(a, b), eq(a, b)); }
+
+ static INLINE CONST vect_t lesser_eq(const vect_t a, const vect_t b) { return vor(lesser(a, b), eq(a, b)); }
+
+#else
+
+#error "You need AVX2 instructions to perform 256bits operations on uint64_t"
+
+#endif // defined(__FFLASFFPACK_USE_AVX2)
+};
+
+#endif // __FFLASFFPACK_fflas_ffpack_utils_simd256_int64_INL
diff --git a/fflas-ffpack/fflas/fflas_simd/simd_modular.inl b/fflas-ffpack/fflas/fflas_simd/simd_modular.inl
new file mode 100644
index 0000000..6daaa85
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_simd/simd_modular.inl
@@ -0,0 +1,179 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla<bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+// functions wih _r are relaxed, meaning no modular reduction
+
+template <class _Field> class FieldSimd {
+ public:
+ using Field = _Field;
+ using Element = typename Field::Element;
+ using simd = Simd<typename _Field::Element>;
+ using vect_t = typename simd::vect_t;
+ using scalar_t = typename simd::scalar_t;
+
+ static const constexpr size_t vect_size = simd::vect_size;
+
+ static const constexpr size_t alignment = simd::alignment;
+
+ private:
+ using Self = FieldSimd<Field>;
+
+ const Field *_field;
+ vect_t _modulus;
+ vect_t _invmod;
+ vect_t _negmod;
+ vect_t _mask;
+ vect_t _min;
+ vect_t _max;
+
+ public:
+ FieldSimd(const Field &f) : _field(&f) { init(); }
+
+ private:
+ void init() {
+ _modulus = simd::set1((Element)_field->characteristic());
+ _min = simd::set1(_field->minElement());
+ _max = simd::set1(_field->maxElement());
+ _negmod = simd::set1(-(Element)_field->characteristic());
+ if (std::is_floating_point<Element>::value) {
+ _invmod = simd::set1(1 / ((Element)_field->characteristic()));
+ }
+ }
+
+ public:
+ FieldSimd(const Self &) = default;
+ FieldSimd(Self &&) = default;
+
+ Self &operator=(const Self &) = default;
+ Self &operator=(Self &&) = default;
+
+ INLINE vect_t init(vect_t &x, const vect_t a) const { return x = mod(a); }
+
+ INLINE vect_t init(const vect_t a) const { return mod(a); }
+
+ INLINE vect_t add(vect_t &c, const vect_t a, const vect_t b) const {
+ c = simd::add(a, b);
+ _mask = simd::greater(c, _max);
+ _mask = simd::vand(_mask, _modulus);
+ return c = simd::sub(c, _mask);
+ }
+
+ INLINE vect_t add(const vect_t a, const vect_t b) const {
+ vect_t c;
+ c = simd::add(a, b);
+ _mask = simd::greater(c, _max);
+ _mask = simd::vand(_mask, _modulus);
+ return c = simd::sub(c, _mask);
+ }
+
+ INLINE vect_t addin(vect_t &a, const vect_t b) const { return a = add(a, b); }
+
+ INLINE vect_t add_r(vect_t &c, const vect_t a, const vect_t b) const { return c = simd::add(a, b); }
+
+ INLINE vect_t add_r(const vect_t a, const vect_t b) const { return simd::add(a, b); }
+
+ INLINE vect_t addin_r(vect_t &a, const vect_t b) const { return a = add_r(a, b); }
+
+ INLINE vect_t sub(vect_t &c, const vect_t a, const vect_t b) const {
+ c = simd::sub(a, b);
+ _mask = simd::lesser(c, _min);
+ _mask = simd::vand(_mask, _modulus);
+ return c = simd::add(c, _mask);
+ }
+
+ INLINE vect_t sub(const vect_t a, const vect_t b) const {
+ vect_t c;
+ c = simd::sub(a, b);
+ _mask = simd::greater(c, _max);
+ _mask = simd::vand(_mask, _modulus);
+ return c = simd::add(c, _mask);
+ }
+
+ INLINE vect_t subin(vect_t &a, const vect_t b) const { return a = sub(a, b); }
+
+ INLINE vect_t sub_r(vect_t &c, const vect_t a, const vect_t b) const { return c = simd::sub(a, b); }
+
+ INLINE vect_t sub_r(const vect_t a, const vect_t b) const { return simd::sub(a, b); }
+
+ INLINE vect_t subin_r(vect_t &a, const vect_t b) const { return a = sub_r(a, b); }
+
+ INLINE vect_t zero(vect_t &x) const { return x = simd::zero(); }
+
+ INLINE vect_t zero() const { return simd::zero(); }
+
+ INLINE vect_t mod(vect_t &c) const {
+ if (std::is_floating_point<Element>::value) {
+ vect_t q, t;
+ q = simd::mul(c, _invmod);
+ q = simd::floor(q);
+ c = simd::fnmadd(c, q, _modulus);
+ q = simd::greater(c, _max);
+ t = simd::lesser(c, _min);
+ q = simd::vand(q, _negmod);
+ t = simd::vand(t, _modulus);
+ q = simd::vor(q, t);
+ return c = simd::add(c, q);
+ } else {
+ FFLASFFPACK_abort("pas implementé");
+ }
+ }
+
+ INLINE vect_t mul(vect_t &c, const vect_t a, const vect_t b) const { return c = mod(simd::mul(a, b)); }
+
+ INLINE vect_t mul(const vect_t a, const vect_t b) const { return mod(simd::mul(a, b)); }
+
+ INLINE vect_t mulin(vect_t &a, const vect_t b) const { return mul(a, a, b); }
+
+ INLINE vect_t mul_r(vect_t &c, const vect_t a, const vect_t b) const { return c = simd::mul(a, b); }
+
+ INLINE vect_t mul_r(const vect_t a, const vect_t b) const { return simd::mul(a, b); }
+
+ INLINE vect_t axpy(vect_t &r, const vect_t a, const vect_t b, const vect_t c) const {
+ return r = mod(simd::fmadd(c, a, b));
+ }
+
+ INLINE vect_t axpy(const vect_t c, const vect_t a, const vect_t b) const { return mod(simd::fmadd(c, a, b)); }
+
+ INLINE vect_t axpyin(vect_t &c, const vect_t a, const vect_t b) const { return c = axpy(c, a, b); }
+
+ INLINE vect_t axpy_r(vect_t &r, const vect_t a, const vect_t b, const vect_t c) const {
+ return r = simd::fmadd(c, a, b);
+ }
+
+ INLINE vect_t axpy_r(const vect_t c, const vect_t a, const vect_t b) const { return simd::fmadd(c, a, b); }
+
+ INLINE vect_t axpyin_r(vect_t &c, const vect_t a, const vect_t b) const { return c = axpy_r(c, a, b); }
+
+ INLINE vect_t maxpy(vect_t &r, const vect_t a, const vect_t b, const vect_t c) const {
+ return r = mod(simd::fmsub(c, a, b));
+ }
+
+ INLINE vect_t maxpy(const vect_t c, const vect_t a, const vect_t b) const { return mod(simd::fmsub(c, a, b)); }
+
+ INLINE vect_t maxpyin(vect_t &c, const vect_t a, const vect_t b) const { return c = maxpy(c, a, b); }
+};
diff --git a/fflas-ffpack/fflas/fflas_sparse.h b/fflas-ffpack/fflas/fflas_sparse.h
new file mode 100644
index 0000000..6425b75
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse.h
@@ -0,0 +1,455 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_sparse.h
+*/
+
+#ifndef __FFLASFFPACK_fflas_fflas_sparse_H
+#define __FFLASFFPACK_fflas_fflas_sparse_H
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "fflas-ffpack/config.h"
+#include "fflas-ffpack/config-blas.h"
+#include "fflas-ffpack/paladin/parallel.h"
+
+#include <recint/recint.h>
+#include <givaro/udl.h>
+
+#ifndef index_t
+#define index_t uint32_t
+#endif
+
+#ifdef __FFLASFFPACK_HAVE_MKL
+#ifndef _MKL_H_ // temporary
+#error "MKL (mkl.h) not present, while you have MKL enabled"
+#endif
+#undef index_t
+#define index_t MKL_INT
+
+#endif // __FFLASFFPACK_HAVE_MKL
+
+// Bigger multiple of s lesser or equal than x, s must be a power of two
+#ifndef ROUND_DOWN
+#define ROUND_DOWN(x, s) ((x) & ~((s)-1))
+#endif
+
+#ifndef __FFLASFFPACK_CACHE_LINE_SIZE
+#define __FFLASFFPACK_CACHE_LINE_SIZE 64
+#endif
+
+#if (__GNUC_MAJOR > 4 || (__GNUC_MAJOR == 4 &&__GNUC_MINOR__ >= 7)) || defined(__clang__)
+ #define assume_aligned(pout, pin, v) decltype(pin) __restrict__ pout = static_cast<decltype(pin)>(__builtin_assume_aligned(pin, v));
+#elif defined(__INTEL_COMPILER)
+ #define assume_aligned(pout, pin, v) \
+ decltype(pin) __restrict pout = pin; \
+ __assume_aligned(pout)
+#else
+ #define assume_aligned(pout, pin, v) decltype(pin) pout = pin;
+#endif
+
+#define DENSE_THRESHOLD 0.5
+
+#include "fflas-ffpack/fflas/fflas.h"
+
+#include "fflas-ffpack/field/field-traits.h"
+#include "fflas-ffpack/fflas/fflas_bounds.inl"
+#include "fflas-ffpack/utils/fflas_memory.h"
+#include "fflas-ffpack/paladin/parallel.h"
+
+#ifdef __FFLASFFPACK_USE_SIMD
+#include "fflas-ffpack/fflas/fflas_simd.h"
+#endif
+
+#include <type_traits>
+#include <vector>
+#include <iostream>
+
+
+namespace MKL_CONFIG {
+ static const double dalpha = 1;
+ static const float salpha = 1;
+ static const double dbeta = 0;
+ static const float sbeta = 0;
+ static const char metaChar[4] = {'G', ' ', ' ', 'C'};
+ static const char trans[1] = {'N'};
+}
+
+namespace FFLAS {
+
+enum class SparseMatrix_t {
+ CSR,
+ CSR_ZO,
+ CSC,
+ CSC_ZO,
+ COO,
+ COO_ZO,
+ ELL,
+ ELL_ZO,
+ SELL,
+ SELL_ZO,
+ ELL_simd,
+ ELL_simd_ZO,
+ CSR_HYB,
+ HYB_ZO
+};
+
+template <class Field, SparseMatrix_t, class IdxT = index_t, class PtrT = index_t> struct Sparse;
+
+} // FFLAS
+#include "fflas-ffpack/fflas/fflas_sparse/sparse_matrix_traits.h"
+#include "fflas-ffpack/fflas/fflas_sparse/utils.h"
+#include "fflas-ffpack/fflas/fflas_sparse/csr.h"
+#include "fflas-ffpack/fflas/fflas_sparse/coo.h"
+#include "fflas-ffpack/fflas/fflas_sparse/ell.h"
+#include "fflas-ffpack/fflas/fflas_sparse/csr_hyb.h"
+#include "fflas-ffpack/fflas/fflas_sparse/ell_simd.h"
+#include "fflas-ffpack/fflas/fflas_sparse/hyb_zo.h"
+// #include "fflas-ffpack/fflas/fflas_sparse/sparse_matrix.h"
+
+namespace FFLAS {
+
+/*********************************************************************************************************************
+ *
+ * Sparse Details
+ *
+ *********************************************************************************************************************/
+
+namespace sparse_details {
+
+template <class Field>
+inline void init_y(const Field &F, const size_t m, const typename Field::Element b, typename Field::Element_ptr y);
+
+template <class Field>
+inline void init_y(const Field &F, const size_t m, const size_t n, const typename Field::Element b,
+ typename Field::Element_ptr y, const int ldy);
+
+/*************************************
+ fspmv
+**************************************/
+
+template <class Field, class SM, class FC, class MZO>
+inline typename std::enable_if<
+ !(std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value,
+ ElementCategories::MachineIntTag>::value)>::type
+fspmv_dispatch(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y, FC fc,
+ MZO mzo);
+
+template <class Field, class SM, class FC, class MZO>
+inline typename std::enable_if<
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineIntTag>::value>::type
+fspmv_dispatch(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y, FC fc,
+ MZO mzo);
+
+// non ZO matrix
+template <class Field, class SM>
+inline void fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::GenericTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!isSparseMatrixSimdFormat<Field, SM>::value>::type
+fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<isSparseMatrixSimdFormat<Field, SM>::value>::type
+fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!isSparseMatrixSimdFormat<Field, SM>::value>::type
+fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::ModularTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<isSparseMatrixSimdFormat<Field, SM>::value>::type
+fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::ModularTag, NotZOSparseMatrix);
+
+// ZO matrix
+
+template <class Field, class SM>
+inline void fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::GenericTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!isSparseMatrixSimdFormat<Field, SM>::value>::type
+fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<isSparseMatrixSimdFormat<Field, SM>::value>::type
+fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline void fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::ModularTag, std::true_type);
+
+/*************************************
+ fspmm
+**************************************/
+
+template <class Field, class SM, class FCat, class MZO>
+inline typename std::enable_if<
+ !(std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value,
+ ElementCategories::MachineIntTag>::value)>::type
+fspmm_dispatch(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FCat, MZO);
+
+template <class Field, class SM, class FCat, class MZO>
+inline typename std::enable_if<
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineIntTag>::value>::type
+fspmm_dispatch(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FCat, MZO);
+
+template <class Field, class SM>
+inline void fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, NotZOSparseMatrix);
+
+// ZO matrix
+template <class Field, class SM>
+inline void fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline void fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, ZOSparseMatrix);
+
+/*************************************
+ pfspmm
+**************************************/
+
+template <class Field, class SM, class FCat, class MZO>
+inline typename std::enable_if<
+ !(std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value,
+ ElementCategories::MachineIntTag>::value)>::type
+pfspmm_dispatch(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FCat, MZO);
+
+template <class Field, class SM, class FCat, class MZO>
+inline typename std::enable_if<
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineIntTag>::value>::type
+pfspmm_dispatch(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FCat, MZO);
+
+template <class Field, class SM>
+inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, NotZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, NotZOSparseMatrix);
+
+// ZO matrix
+template <class Field, class SM>
+inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, ZOSparseMatrix);
+
+template <class Field, class SM>
+inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, ZOSparseMatrix);
+
+/*************************************
+ pfspmv
+**************************************/
+template <class Field, class SM>
+inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::GenericTag, std::false_type);
+
+template <class Field, class SM>
+inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, std::false_type);
+
+template <class Field, class SM>
+inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::ModularTag, std::false_type);
+
+template <class Field, class SM>
+inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::GenericTag, std::true_type);
+
+template <class Field, class SM>
+inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, std::true_type);
+
+template <class Field, class SM>
+inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::ModularTag, std::true_type);
+
+} // sparse_details
+
+/*********************************************************************************************************************
+ *
+ * SpMV, SpMM, pSpMV, pSpMM
+ *
+ *********************************************************************************************************************/
+
+template <class Field, class SM>
+inline void fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, const typename Field::Element &beta,
+ typename Field::Element_ptr y);
+
+template <class Field, class SM>
+inline void fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ const typename Field::Element &beta, typename Field::Element_ptr y, int ldy);
+
+#if defined(__FFLASFFPACK_USE_OPENMP)
+template <class Field, class SM>
+inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, const typename Field::Element &beta,
+ typename Field::Element_ptr y);
+
+template <class Field, class SM>
+inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ const typename Field::Element &beta, typename Field::Element_ptr y, int ldy);
+#endif
+}
+
+#include "fflas-ffpack/fflas/fflas_sparse.inl"
+
+#include "fflas-ffpack/fflas/fflas_sparse/read_sparse.h"
+
+
+namespace FFLAS {
+ struct HelperFlag {
+ static constexpr uint64_t none = 0_ui64;
+ static constexpr uint64_t coo = 1_ui64;
+ static constexpr uint64_t csr = 1_ui64 << 1;
+ static constexpr uint64_t ell = 1_ui64 << 2;
+ static constexpr uint64_t aut = 1_ui64 << 32;
+ static constexpr uint64_t pm1 = 1_ui64 << 33;
+ };
+
+ template<class Field>
+ struct CsrMat {
+ typename FFLAS::Sparse<Field,SparseMatrix_t::CSR,int16_t> * _csr16 = nullptr;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::CSR,int32_t> * _csr32 = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::CSR,int64_t> * _csr64 = nullptr ;
+
+ typename FFLAS::Sparse<Field,SparseMatrix_t::CSR_ZO,int16_t> * _csr16_zo = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::CSR_ZO,int32_t> * _csr32_zo = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::CSR_ZO,int64_t> * _csr64_zo = nullptr ;
+ };
+
+ template<class Field>
+ struct CooMat {
+ typename FFLAS::Sparse<Field,SparseMatrix_t::COO,int16_t> * _coo16 = nullptr;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::COO,int32_t> * _coo32 = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::COO,int64_t> * _coo64 = nullptr ;
+
+ typename FFLAS::Sparse<Field,SparseMatrix_t::COO_ZO,int16_t> * _coo16_zo = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::COO_ZO,int32_t> * _coo32_zo = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::COO_ZO,int64_t> * _coo64_zo = nullptr ;
+ };
+
+ template<class Field>
+ struct EllMat {
+ typename FFLAS::Sparse<Field,SparseMatrix_t::ELL,int16_t> * _ell16 = nullptr;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::ELL,int32_t> * _ell32 = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::ELL,int64_t> * _ell64 = nullptr ;
+
+ typename FFLAS::Sparse<Field,SparseMatrix_t::ELL_ZO,int16_t> * _ell16_zo = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::ELL_ZO,int32_t> * _ell32_zo = nullptr ;
+ typename FFLAS::Sparse<Field,SparseMatrix_t::ELL_ZO,int64_t> * _ell64_zo = nullptr ;
+ };
+
+
+ template<class Field, int flag = HelperFlag::none >
+ struct SpMat {
+ typename FFLAS::CooMat<Field> * _coo = nullptr ;
+ typename FFLAS::CsrMat<Field> * _csr = nullptr ;
+ typename FFLAS::EllMat<Field> * _ell = nullptr ;
+ };
+}
+
+#undef ROUND_DOWN
+#undef DENSE_THRESHOLD
+#undef assume_aligned
+
+#endif // __FFLASFFPACK_fflas_fflas_sparse_H
diff --git a/fflas-ffpack/fflas/fflas_sparse.inl b/fflas-ffpack/fflas/fflas_sparse.inl
new file mode 100644
index 0000000..1848afd
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse.inl
@@ -0,0 +1,892 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_sparse.inl
+ */
+
+#ifndef __FFLASFFPACK_fflas_fflas_sparse_INL
+#define __FFLASFFPACK_fflas_fflas_sparse_INL
+
+namespace FFLAS {
+ namespace sparse_details {
+ template <class Field>
+ inline void init_y(const Field &F, const size_t m, const typename Field::Element b, typename Field::Element_ptr y) {
+ if (!F.isOne(b)) {
+ if (F.isZero(b)) {
+ fzero(F, m, y, 1);
+ } else if (F.isMOne(b)) {
+ fnegin(F, m, y, 1);
+ } else {
+ fscalin(F, m, b, y, 1);
+ }
+ }
+ }
+
+ template <class Field>
+ inline void init_y(const Field &F, const size_t m, const size_t n, const typename Field::Element b,
+ typename Field::Element_ptr y, const int ldy) {
+ if (!F.isOne(b)) {
+ if (F.isZero(b)) {
+ fzero(F, m, n, y, ldy);
+ } else if (F.isMOne(b)) {
+ fnegin(F, m, n, y, 1);
+ } else {
+ fscalin(F, m, n, b, y, 1);
+ }
+ }
+ }
+
+ } // sparse_details
+
+ namespace sparse_details {
+
+ /*************************************************************************************
+ *
+ * fspmv dispatch
+ *
+ *************************************************************************************/
+
+ template <class Field, class SM, class FC, class MZO>
+ inline typename std::enable_if<
+ !(std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value,
+ ElementCategories::MachineIntTag>::value)>::type
+ fspmv_dispatch(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y, FC fc,
+ MZO mzo) {
+ sparse_details::fspmv(F, A, x, y, FieldCategories::GenericTag(), MZO());
+ }
+
+ template <class Field, class SM, class FC, class MZO>
+ inline typename std::enable_if<
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineIntTag>::value>::type
+ fspmv_dispatch(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y, FC fc,
+ MZO mzo) {
+ sparse_details::fspmv(F, A, x, y, FC(), MZO());
+ }
+
+ // non ZO matrix
+ template <class Field, class SM>
+ inline void
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::GenericTag, NotZOSparseMatrix) {
+ sparse_details_impl::fspmv(F, A, x, y, FieldCategories::GenericTag());
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!isSparseMatrixSimdFormat<Field, SM>::value>::type
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, NotZOSparseMatrix) {
+ sparse_details_impl::fspmv(F, A, x, y, FieldCategories::UnparametricTag());
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<isSparseMatrixSimdFormat<Field, SM>::value &&
+ support_simd<typename Field::Element>::value
+ >::type
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, NotZOSparseMatrix) {
+ // #ifdef __FFLASFFPACK_USE_SIMD
+ sparse_details_impl::fspmv_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ // #else
+ // sparse_details_impl::fspmv(F, A, x, y, FieldCategories::UnparametricTag());
+ // #endif
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!isSparseMatrixSimdFormat<Field, SM>::value>::type
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::ModularTag, NotZOSparseMatrix) {
+ if (A.delayed) {
+ sparse_details::fspmv(F, A, x, y, FieldCategories::UnparametricTag(), std::false_type());
+ freduce(F, A.m, y, 1);
+ } else {
+ sparse_details_impl::fspmv(F, A, x, y, A.kmax);
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<isSparseMatrixSimdFormat<Field, SM>::value &&
+ support_simd<typename Field::Element>::value
+ >::type
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::ModularTag, NotZOSparseMatrix) {
+ // #ifdef __FFLASFFPACK_USE_SIMD
+ if (A.delayed) {
+ sparse_details::fspmv(F, A, x, y, FieldCategories::UnparametricTag(), std::false_type());
+ freduce(F, A.m, y, 1);
+ } else {
+ sparse_details_impl::fspmv_simd(F, A, x, y, A.kmax);
+ }
+ // #else
+ // if (A.delayed) {
+ // sparse_details::fspmv(F, A, x, y, FieldCategories::UnparametricTag(), std::false_type());
+ // freduce(F, A.m, y, 1);
+ // } else {
+ // sparse_details_impl::fspmv(F, A, x, y, A.kmax);
+ // }
+ // #endif
+ }
+
+ // ZO matrix
+ template <class Field, class SM>
+ inline void
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::GenericTag, ZOSparseMatrix) {
+ if (A.cst == 1) {
+ sparse_details_impl::fspmv_one(F, A, x, y, FieldCategories::GenericTag());
+ } else if (A.cst == -1) {
+ sparse_details_impl::fspmv_mone(F, A, x, y, FieldCategories::GenericTag());
+ } else {
+ auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ fscal(F, A.n, A.cst, x, 1, x1, 1);
+ sparse_details_impl::fspmv_one(F, A, x, y, FieldCategories::GenericTag());
+ fflas_delete(x1);
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!isSparseMatrixSimdFormat<Field, SM>::value>::type
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, ZOSparseMatrix) {
+ if (A.cst == 1) {
+ sparse_details_impl::fspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ } else if (A.cst == -1) {
+ sparse_details_impl::fspmv_mone(F, A, x, y, FieldCategories::UnparametricTag());
+ } else {
+ auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ fscal(F, A.n, A.cst, x, 1, x1, 1);
+ sparse_details_impl::fspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ fflas_delete(x1);
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<isSparseMatrixSimdFormat<Field, SM>::value &&
+ support_simd<typename Field::Element>::value
+ >::type
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag, ZOSparseMatrix) {
+ // #ifdef __FFLASFFPACK_USE_SIMD
+ if (A.cst == 1) {
+ sparse_details_impl::fspmv_one_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ } else if (A.cst == -1) {
+ sparse_details_impl::fspmv_mone_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ } else {
+ auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ fscal(F, A.n, A.cst, x, 1, x1, 1);
+ sparse_details_impl::fspmv_one_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ fflas_delete(x1);
+ }
+ // #else
+ // if (A.cst == 1) {
+ // sparse_details_impl::fspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else if (A.cst == -1) {
+ // sparse_details_impl::fspmv_mone(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ // fscal(F, A.n, A.cst, x, 1, x1, 1);
+ // sparse_details_impl::fspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ // fflas_delete(x1);
+ // }
+ // #endif // SIMD
+ }
+
+ template <class Field, class SM>
+ inline void
+ fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::ModularTag, std::true_type) {
+ sparse_details::fspmv<Field, SM>(F, A, x, y, FieldCategories::UnparametricTag(), std::true_type());
+ freduce(F, A.m, y, 1);
+ }
+
+ /*************************************************************************************
+ *
+ * fspmm dispatch
+ *
+ *************************************************************************************/
+
+ template <class Field, class SM, class FCat, class MZO>
+ inline typename std::enable_if<
+ !(std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value,
+ ElementCategories::MachineIntTag>::value)>::type
+ fspmm_dispatch(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FCat, MZO) {
+ sparse_details::fspmm(F, A, blockSize, x, ldx, y, ldy, typename FieldCategories::GenericTag(), MZO());
+ }
+
+ template <class Field, class SM, class FCat, class MZO>
+ inline typename std::enable_if<
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineIntTag>::value>::type
+ fspmm_dispatch(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FCat, MZO) {
+ sparse_details::fspmm(F, A, blockSize, x, ldx, y, ldy, FCat(), MZO());
+ }
+
+ template <class Field, class SM>
+ inline void
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, NotZOSparseMatrix) {
+ sparse_details_impl::fspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, NotZOSparseMatrix) {
+ using simd = Simd<typename Field::Element>;
+ if (simd::valid(y) && simd::valid(x) && simd::compliant(blockSize)) {
+ sparse_details_impl::fspmm_simd_aligned(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ } else {
+ sparse_details_impl::fspmm_simd_unaligned(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, NotZOSparseMatrix) {
+ sparse_details_impl::fspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, NotZOSparseMatrix) {
+ if (A.delayed) {
+ sparse_details::fspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag(),
+ typename std::false_type());
+ freduce(F, A.m, blockSize, y, ldy);
+ } else {
+ using simd = Simd<typename Field::Element>;
+ if (simd::valid(y) && simd::valid(x) && simd::compliant(blockSize)) {
+ sparse_details_impl::fspmm_simd_aligned(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ } else {
+ sparse_details_impl::fspmm_simd_unaligned(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ }
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, NotZOSparseMatrix) {
+ if (A.delayed) {
+ sparse_details::fspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag(), NotZOSparseMatrix());
+ freduce(F, A.m, blockSize, y, ldy);
+ } else {
+ sparse_details_impl::fspmm(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ }
+ }
+
+ // ZO matrix
+ template <class Field, class SM>
+ inline void
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, ZOSparseMatrix) {
+ if (F.isOne(A.cst)) {
+ sparse_details_impl::fspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ } else if (F.isMOne(A.cst)) {
+ sparse_details_impl::fspmm_mone(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ } else {
+ auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ sparse_details_impl::fspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ fflas_delete(x1);
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, ZOSparseMatrix) {
+ using simd = Simd<typename Field::Element>;
+ if (F.isOne(A.cst)) {
+ if (simd::valid(x) && simd::valid(y) && simd::compliant(blockSize)) {
+ sparse_details_impl::fspmm_one_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ } else {
+ sparse_details_impl::fspmm_one_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ }
+ } else if (F.isMOne(A.cst)) {
+ if (simd::valid(x) && simd::valid(y) && simd::compliant(blockSize)) {
+ sparse_details_impl::fspmm_mone_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ } else {
+ sparse_details_impl::fspmm_mone_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ }
+ } else {
+ auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ if (simd::valid(x) && simd::valid(y) && simd::compliant(blockSize)) {
+ sparse_details_impl::fspmm_one_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ } else {
+ sparse_details_impl::fspmm_one_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ }
+ fflas_delete(x1);
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, ZOSparseMatrix) {
+ if (F.isOne(A.cst)) {
+ sparse_details_impl::fspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ } else if (F.isMOne(A.cst)) {
+ sparse_details_impl::fspmm_mone(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ } else {
+ auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ sparse_details_impl::fspmm_one(F, A, blockSize, x1, ldx, y, ldy, FieldCategories::UnparametricTag());
+ fflas_delete(x1);
+ }
+ }
+
+ template <class Field, class SM>
+ inline void
+ fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, ZOSparseMatrix) {
+ sparse_details::fspmm(F, A, blockSize, x, ldx, y, ldy, typename FieldCategories::UnparametricTag(),
+ ZOSparseMatrix());
+ freduce(F, blockSize, A.m, y, ldy);
+ }
+
+#if defined(__FFLASFFPACK_USE_OPENMP)
+
+ /*************************************************************************************
+ *
+ * pfspmm dispatch
+ *
+ *************************************************************************************/
+
+ template <class Field, class SM, class FCat, class MZO>
+ inline typename std::enable_if<
+ !(std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineIntTag>::value)>::type
+ pfspmm_dispatch(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FCat, MZO) {
+ sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, typename FieldCategories::GenericTag(), MZO());
+ }
+
+ template <class Field, class SM, class FCat, class MZO>
+ inline typename std::enable_if<
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineFloatTag>::value ||
+ std::is_same<typename ElementTraits<typename Field::Element>::value, ElementCategories::MachineIntTag>::value>::type
+ pfspmm_dispatch(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FCat, MZO) {
+ sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, FCat(), MZO());
+ }
+
+ template <class Field, class SM>
+ inline void
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, NotZOSparseMatrix) {
+ sparse_details_impl::pfspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ }
+
+#if defined(__FFLASFFPACK_USE_SIMD)
+
+ template <class Field, class SM>
+ inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, NotZOSparseMatrix) {
+ using simd = Simd<typename Field::Element>;
+ if (simd::valid(y) && simd::valid(x) && simd::compliant(blockSize)) {
+ sparse_details_impl::pfspmm_simd_aligned(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ } else {
+ sparse_details_impl::pfspmm_simd_unaligned(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, NotZOSparseMatrix) {
+ sparse_details_impl::pfspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, NotZOSparseMatrix) {
+ if (A.delayed) {
+ sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag(),
+ typename std::false_type());
+ freduce(F, A.m, blockSize, y, ldy);
+ } else {
+ using simd = Simd<typename Field::Element>;
+ if (simd::valid(y) && simd::valid(x) && simd::compliant(blockSize)) {
+ sparse_details_impl::pfspmm_simd_aligned(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ } else {
+ sparse_details_impl::pfspmm_simd_unaligned(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ }
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, NotZOSparseMatrix) {
+ if (A.delayed) {
+ sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag(), NotZOSparseMatrix());
+ freduce(F, A.m, blockSize, y, ldy);
+ } else {
+ sparse_details_impl::pfspmm(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ }
+ }
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+ // ZO matrix
+ template <class Field, class SM>
+ inline void
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, ZOSparseMatrix) {
+ if (F.isOne(A.cst)) {
+ sparse_details_impl::pfspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ } else if (F.isMOne(A.cst)) {
+ sparse_details_impl::pfspmm_mone(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ } else {
+ auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ sparse_details_impl::pfspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ fflas_delete(x1);
+ }
+ }
+
+#if defined(__FFLASFFPACK_USE_SIMD)
+
+ template <class Field, class SM>
+ inline typename std::enable_if<support_simd<typename Field::Element>::value>::type
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, ZOSparseMatrix) {
+ using simd = Simd<typename Field::Element>;
+ if (F.isOne(A.cst)) {
+ if (simd::valid(x) && simd::valid(y) && simd::compliant(blockSize)) {
+ sparse_details_impl::pfspmm_one_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ } else {
+ sparse_details_impl::pfspmm_one_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ }
+ } else if (F.isMOne(A.cst)) {
+ if (simd::valid(x) && simd::valid(y) && simd::compliant(blockSize)) {
+ sparse_details_impl::pfspmm_mone_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ } else {
+ sparse_details_impl::pfspmm_mone_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ }
+ } else {
+ auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ if (simd::valid(x) && simd::valid(y) && simd::compliant(blockSize)) {
+ sparse_details_impl::pfspmm_one_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ } else {
+ sparse_details_impl::pfspmm_one_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ }
+ fflas_delete(x1);
+ }
+ }
+
+ template <class Field, class SM>
+ inline typename std::enable_if<!support_simd<typename Field::Element>::value>::type
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, ZOSparseMatrix) {
+ if (F.isOne(A.cst)) {
+ sparse_details_impl::pfspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ } else if (F.isMOne(A.cst)) {
+ sparse_details_impl::pfspmm_mone(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ } else {
+ auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ sparse_details_impl::pfspmm_one(F, A, blockSize, x1, ldx, y, ldy, FieldCategories::UnparametricTag());
+ fflas_delete(x1);
+ }
+ }
+
+ template <class Field, class SM>
+ inline void
+ pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, ZOSparseMatrix) {
+ sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, typename FieldCategories::UnparametricTag(),
+ ZOSparseMatrix());
+ freduce(F, blockSize, A.m, y, ldy);
+ }
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+ // /***************************** pfspmv ******************************/
+
+ // #if defined(__FFLASFFPACK_USE_OPENMP)
+
+ // template <class Field, class SM, class FC, class MZO>
+ // inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ // FieldCategories::MultiPrecisionTag ,FC fc, MZO mzo) {
+ // sparse_details::pfspmv(F, A, x, y, FieldCategories::GenericTag(), MZO());
+ // }
+
+ // template <class Field, class SM, class FC, class MZO>
+ // inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ // FieldCategories::GenericTag ,FC fc, MZO mzo) {
+ // sparse_details::pfspmv(F, A, x, y, FC(), MZO());
+ // }
+
+ template <class Field, class SM>
+ inline void pfspmv(const Field &F, const SM &A,
+ typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y,
+ FieldCategories::GenericTag tag, std::false_type) {
+ sparse_details_impl::pfspmv(F, A, x, y, tag);
+ }
+
+ // template <class Field, class SM>
+ // inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ // FieldCategories::UnparametricTag, std::false_type) {
+ // sparse_details_impl::pfspmv(F, A, x, y, FieldCategories::UnparametricTag());
+ // }
+
+ // template <class Field, class SM>
+ // inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ // FieldCategories::ModularTag, std::false_type) {
+ // if (A.delayed) {
+ // sparse_details::pfspmv(F, A, x, y, FieldCategories::UnparametricTag(), std::false_type());
+ // freduce(F, A.m, y, 1);
+ // } else {
+ // sparse_details_impl::pfspmv(F, A, x, y, A.kmax);
+ // }
+ // }
+
+ // // ZO matrix
+ // template <class Field, class SM>
+ // inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ // FieldCategories::GenericTag, std::true_type) {
+ // if (A.cst == 1) {
+ // sparse_details_impl::pfspmv_one(F, A, x, y, FieldCategories::GenericTag());
+ // } else if (A.cst == -1) {
+ // sparse_details_impl::pfspmv_mone(F, A, x, y, FieldCategories::GenericTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ // fscal(F, A.n, A.cst, x, 1, x1, 1);
+ // sparse_details_impl::pfspmv_one(F, A, x, y, FieldCategories::GenericTag());
+ // fflas_delete(x1);
+ // }
+ // }
+
+ // template <class Field>
+ // inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A, typename Field::ConstElement_ptr x,
+ // typename Field::Element_ptr y, FieldCategories::UnparametricTag, std::true_type) {
+ // #ifdef __FFLASFFPACK_USE_SIMD
+ // if (A.cst == 1) {
+ // sparse_details_impl::pfspmv_one_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else if (A.cst == -1) {
+ // sparse_details_impl::pfspmv_mone_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ // fscal(F, A.n, A.cst, x, 1, x1, 1);
+ // sparse_details_impl::pfspmv_one_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ // fflas_delete(x1);
+ // }
+ // #else
+ // if (A.cst == 1) {
+ // sparse_details_impl::pfspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else if (A.cst == -1) {
+ // sparse_details_impl::pfspmv_mone(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ // fscal(F, A.n, A.cst, x, 1, x1, 1);
+ // sparse_details_impl::pfspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ // fflas_delete(x1);
+ // }
+ // #endif // SIMD
+ // }
+
+ // template <class Field>
+ // inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A, typename Field::ConstElement_ptr
+ // x,
+ // typename Field::Element_ptr y, FieldCategories::UnparametricTag, std::true_type) {
+ // #ifdef __FFLASFFPACK_USE_SIMD
+ // if (A.cst == 1) {
+ // sparse_details_impl::pfspmv_one_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else if (A.cst == -1) {
+ // sparse_details_impl::pfspmv_mone_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ // fscal(F, A.n, A.cst, x, 1, x1, 1);
+ // sparse_details_impl::pfspmv_one_simd(F, A, x, y, FieldCategories::UnparametricTag());
+ // fflas_delete(x1);
+ // }
+ // #else
+ // if (A.cst == 1) {
+ // sparse_details_impl::pfspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else if (A.cst == -1) {
+ // sparse_details_impl::pfspmv_mone(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ // fscal(F, A.n, A.cst, x, 1, x1, 1);
+ // sparse_details_impl::pfspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ // fflas_delete(x1);
+ // }
+ // #endif // SIMD
+ // }
+
+ // template <class Field, class SM>
+ // inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ // FieldCategories::UnparametricTag, std::true_type) {
+ // if (A.cst == 1) {
+ // sparse_details_impl::pfspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else if (A.cst == -1) {
+ // sparse_details_impl::pfspmv_mone(F, A, x, y, FieldCategories::UnparametricTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ // fscal(F, A.n, A.cst, x, 1, x1, 1);
+ // sparse_details_impl::pfspmv_one(F, A, x, y, FieldCategories::UnparametricTag());
+ // fflas_delete(x1);
+ // }
+ // }
+
+ // template <class Field, class SM>
+ // inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ // FieldCategories::ModularTag, std::true_type) {
+ // sparse_details::pfspmv<Field, SM>(F, A, x, y, FieldCategories::UnparametricTag(), std::true_type());
+ // freduce(F, A.m, y, 1);
+ // }
+ // #endif
+
+ // /***************************** pfspmm *****************************/
+
+ // template<class Field, class SM, class FCat, class MZO>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ // typename Field::Element_ptr y, int ldy, FieldCategories::MultiPrecisionTag, FCat fc, MZO mz) {
+ // sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, typename FieldCategories::GenericTag(), MZO());
+ // }
+
+ // template<class Field, class SM, class FCat, class MZO>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ // typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, FCat fc, MZO mz) {
+ // sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, FCat(), MZO());
+ // }
+
+ // template <class Field, class SM>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ // typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, std::false_type) {
+ // // std::cout << "no ZO Generic" << std::endl;
+ // /*sparse_details_impl::*/pfspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ // }
+
+ // template <class Field, class SM>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ // typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, std::false_type) {
+ // // std::cout << "no ZO Unparametric" << std::endl;
+ // #ifdef __FFLASFFPACK_USE_SIMD
+ // using simd = Simd<typename Field::Element>;
+ // if (((uint64_t)y % simd::alignment == 0) && ((uint64_t)x % simd::alignment == 0) &&
+ // (blockSize % simd::vect_size == 0)) {
+ // // std::cout << "no ZO Unparametric algined" << std::endl;
+ // sparse_details_impl::pfspmm_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ // FieldCategories::UnparametricTag());
+ // }
+ // else{
+ // sparse_details_impl::fspmm_simd_unaligned(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ // }
+ // #else
+ // sparse_details_impl::pfspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ // #endif
+ // }
+
+ // template <class Field, class SM>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ // typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, std::false_type) {
+ // // std::cout << "no ZO Modular" << std::endl;
+ // if (A.delayed) {
+ // sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag(),
+ // typename std::false_type());
+ // freduce(F, A.m, blockSize, y, ldy);
+ // } else {
+ // #ifdef __FFLASFFPACK_USE_SIMD
+ // using simd = Simd<typename Field::Element>;
+ // if (((uint64_t)y % simd::alignment == 0) && ((uint64_t)x % simd::alignment == 0) &&
+ // (blockSize % simd::vect_size == 0)) {
+ // sparse_details_impl::pfspmm_simd_aligned(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ // } else {
+ // sparse_details_impl::pfspmm_simd_unaligned(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ // }
+ // #else
+ // sparse_details_impl::pfspmm(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ // #endif
+ // }
+ // }
+
+ // // ZO matrix
+ // template <class Field, class SM>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ // typename Field::Element_ptr y, int ldy, FieldCategories::GenericTag, std::true_type) {
+ // // std::cout << "ZO Generic" << std::endl;
+ // if (F.isOne(A.cst)) {
+ // sparse_details_impl::pfspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ // } else if (F.isMOne(A.cst)) {
+ // sparse_details_impl::pfspmm_mone(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ // fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ // sparse_details_impl::pfspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ // fflas_delete(x1);
+ // }
+ // }
+
+ // template <class Field, class SM>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ // typename Field::Element_ptr y, int ldy, FieldCategories::UnparametricTag, std::true_type) {
+ // // std::cout << "ZO Unparametric" << std::endl;
+ // #ifdef __FFLASFFPACK_USE_SIMD
+ // using simd = Simd<typename Field::Element>;
+ // if (F.isOne(A.cst)) {
+ // if (((uint64_t)y % simd::alignment == 0) && ((uint64_t)x % simd::alignment == 0) &&
+ // (blockSize % simd::vect_size == 0)) {
+ // // std::cout << "ZO Unparametric aligned" << std::endl;
+ // sparse_details_impl::pfspmm_one_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ // FieldCategories::UnparametricTag());
+ // } else {
+ // // std::cout << "ZO Unparametric unaligned" << std::endl;
+ // sparse_details_impl::pfspmm_one_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ // FieldCategories::UnparametricTag());
+ // }
+ // } else if (F.isMOne(A.cst)) {
+ // if (((uint64_t)y % simd::alignment == 0) && ((uint64_t)x % simd::alignment == 0) &&
+ // (blockSize % simd::vect_size == 0)) {
+ // sparse_details_impl::pfspmm_mone_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ // FieldCategories::UnparametricTag());
+ // } else {
+ // sparse_details_impl::pfspmm_mone_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ // FieldCategories::UnparametricTag());
+ // }
+ // } else {
+ // auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ // fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ // if (((uint64_t)y % simd::alignment == 0) && ((uint64_t)x % simd::alignment == 0) &&
+ // (blockSize % simd::vect_size == 0)) {
+ // sparse_details_impl::pfspmm_one_simd_aligned(F, A, blockSize, x, ldx, y, ldy,
+ // FieldCategories::UnparametricTag());
+ // } else {
+ // sparse_details_impl::pfspmm_one_simd_unaligned(F, A, blockSize, x, ldx, y, ldy,
+ // FieldCategories::UnparametricTag());
+ // }
+ // fflas_delete(x1);
+ // }
+ // #else
+ // if (F.isOne(A.cst)) {
+ // sparse_details_impl::pfspmm_one(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ // } else if (F.isMOne(A.cst)) {
+ // sparse_details_impl::pfspmm_mone(F, A, blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ // } else {
+ // auto x1 = fflas_new(F, A.m, blockSize, Alignment::CACHE_LINE);
+ // fscal(F, A.m, blockSize, A.cst, x, ldx, x1, 1);
+ // sparse_details_impl::pfspmm_one(F, A, blockSize, x1, ldx, y, ldy, FieldCategories::UnparametricTag());
+ // fflas_delete(x1);
+ // }
+ // #endif
+ // }
+
+ // template <class Field, class SM>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ // typename Field::Element_ptr y, int ldy, FieldCategories::ModularTag, std::true_type) {
+ // // std::cout << "ZO Modular" << std::endl;
+ // if (A.delayed) {
+ // sparse_details::pfspmm(F, A, blockSize, x, ldx, y, ldy, typename FieldCategories::UnparametricTag(),
+ // typename std::true_type());
+ // freduce(F, blockSize, A.m, y, ldy);
+ // } else {
+ // sparse_details_impl::pfspmm(F, A, blockSize, x, ldx, y, ldy, A.kmax);
+ // }
+ // }
+
+#endif // __FFLASFFPACK_USE_OPENMP
+
+ } // sparse details
+
+ template <class Field, class SM>
+ inline void fspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, const typename Field::Element &beta,
+ typename Field::Element_ptr y) {
+ sparse_details::init_y(F, A.m, beta, y);
+ sparse_details::fspmv_dispatch(F, A, x, y, typename FieldTraits<Field>::category(),
+ typename isZOSparseMatrix<Field, SM>::type());
+ }
+
+ template <class Field, class SM>
+ inline void fspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ const typename Field::Element &beta, typename Field::Element_ptr y, int ldy) {
+ sparse_details::init_y(F, A.m, blockSize, beta, y, ldy);
+ sparse_details::fspmm_dispatch<Field, SM>(F, A, blockSize, x, ldx, y, ldy, typename FieldTraits<Field>::category(),
+ typename isZOSparseMatrix<Field, SM>::type());
+ }
+
+#if defined(__FFLASFFPACK_USE_OPENMP)
+
+ template <class Field, class SM>
+ inline void pfspmv(const Field &F, const SM &A, typename Field::ConstElement_ptr x, const typename Field::Element &beta,
+ typename Field::Element_ptr y) {
+ sparse_details::init_y(F, A.m, beta, y);
+ sparse_details::pfspmv<Field, SM>(F, A, x, y,
+ typename FieldTraits<Field>::category(),
+ typename isZOSparseMatrix<Field, SM>::type());
+ }
+
+ template <class Field, class SM>
+ inline void pfspmm(const Field &F, const SM &A, size_t blockSize, typename Field::ConstElement_ptr x, int ldx,
+ const typename Field::Element &beta, typename Field::Element_ptr y, int ldy) {
+ sparse_details::init_y(F, A.m, blockSize, beta, y, ldy);
+ sparse_details::pfspmm_dispatch<Field, SM>(F, A, blockSize, x, ldx, y, ldy, typename FieldTraits<Field>::category(),
+ typename isZOSparseMatrix<Field, SM>::type());
+ }
+
+#endif // __FFLASFFPACK_USE_OPENMP
+
+ // template <class Field, class SM>
+ // inline void pfspmm(const Field &F, const SM &A, size_t blockSize,
+ // typename Field::ConstElement_ptr x, int ldx,
+ // const typename Field::Element &beta,
+ // typename Field::Element_ptr y, int ldy) {
+ // sparse_details::init_y(F, A.m, blockSize, beta, y, ldy);
+ // sparse_details::pfspmm<Field, SM>(
+ // F, A, blockSize, x, ldx, y, ldy, typename FieldTraits<Field>::value(),
+ // typename FieldTraits<Field>::category(),
+ // typename isZOSparseMatrix<Field, SM>::type());
+ // }
+}
+
+#endif // __FFLASFFPACK_fflas_fflas_sparse_INL
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/Makefile.am
similarity index 67%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_sparse/Makefile.am
index 31793b2..7bb4322 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,21 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse
+
+SUBDIRS=coo csr csr_hyb ell ell_simd hyb_zo sell
+
+
+pkgincludesub_HEADERS= \
+ sparse_matrix_traits.h \
+ read_sparse.h \
+ utils.h \
+ coo.h \
+ csr.h \
+ ell.h \
+ ell_simd.h \
+ sell.h \
+ csr_hyb.h \
+ hyb_zo.h
diff --git a/fflas-ffpack/fflas/fflas_sparse/coo.h b/fflas-ffpack/fflas/fflas_sparse/coo.h
new file mode 100644
index 0000000..ebe48e4
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/coo.h
@@ -0,0 +1,83 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fspmv_coo.inl
+ * NO DOC
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_coo_H
+#define __FFLASFFPACK_fflas_sparse_coo_H
+
+namespace FFLAS { /* COO */
+
+template <class _Field> struct Sparse<_Field, SparseMatrix_t::COO> {
+ using Field = _Field;
+ index_t *col = nullptr;
+ index_t *row = nullptr;
+ typename _Field::Element_ptr dat;
+ bool delayed = false;
+ uint64_t kmax = 0;
+ index_t m = 0;
+ index_t n = 0;
+ uint64_t nnz = 0;
+ uint64_t nElements = 0;
+ uint64_t maxrow = 0;
+};
+
+template <class _Field>
+struct Sparse<_Field, SparseMatrix_t::COO_ZO>
+ : public Sparse<_Field, SparseMatrix_t::COO> {
+ using Field = _Field;
+ typename _Field::Element cst = 1;
+};
+
+template <class Field, class IndexT>
+void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::COO> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+
+template <class Field, class IndexT>
+void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::COO_ZO> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+
+template <class Field>
+void sparse_delete(const Sparse<Field, SparseMatrix_t::COO> &A);
+
+template <class Field>
+void sparse_delete(const Sparse<Field, SparseMatrix_t::COO_ZO> &A);
+
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_sparse/coo/coo_utils.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/coo/coo_spmv.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/coo/coo_spmm.inl"
+
+#endif // __FFLASFFPACK_fflas_sparse_coo_H
\ No newline at end of file
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/coo/Makefile.am
similarity index 77%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_sparse/coo/Makefile.am
index 31793b2..f74741c 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/coo/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,10 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse/coo
+pkgincludesub_HEADERS= \
+ coo_spmv.inl \
+ coo_spmm.inl \
+ coo_utils.inl
diff --git a/fflas-ffpack/fflas/fflas_sparse/coo/coo_spmm.inl b/fflas-ffpack/fflas/fflas_sparse/coo/coo_spmm.inl
new file mode 100644
index 0000000..5da2769
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/coo/coo_spmm.inl
@@ -0,0 +1,338 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_coo_spmm_INL
+#define __FFLASFFPACK_fflas_sparse_coo_spmm_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nnz; ++i) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[row[i] * ldy + k], dat[i], x[col[i] * ldx + k]);
+ F.axpyin(y[row[i] * ldy + k + 1], dat[i], x[col[i] * ldx + k + 1]);
+ F.axpyin(y[row[i] * ldy + k + 2], dat[i], x[col[i] * ldx + k + 2]);
+ F.axpyin(y[row[i] * ldy + k + 3], dat[i], x[col[i] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[row[i] * ldy + k], dat[i], x[col[i] * ldx + k]);
+ }
+}
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nnz; ++i) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[row[i] * ldy + k] += dat[i] * x[col[i] * ldx + k];
+ y[row[i] * ldy + k + 1] += dat[i] * x[col[i] * ldx + k + 1];
+ y[row[i] * ldy + k + 2] += dat[i] * x[col[i] * ldx + k + 2];
+ y[row[i] * ldy + k + 3] += dat[i] * x[col[i] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[row[i] * ldy + k] += dat[i] * x[col[i] * ldx + k];
+ }
+}
+
+
+#ifdef __FFLASFFPACK_HAVE_MKL
+inline void fspmm_mkl(const Givaro::DoubleDomain &F, const Sparse<Givaro::DoubleDomain, SparseMatrix_t::COO> &A,
+ index_t blockSize ,
+ Givaro::DoubleDomain::ConstElement_ptr x_, index_t ldx,
+ Givaro::DoubleDomain::Element_ptr y_, index_t ldy,
+ FieldCategories::UnparametricTag) {
+ // assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ // assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ MKL_INT A_nnz = A.nnz ;
+ mkl_dcoomm(MKL_CONFIG::trans, &A.m , &blockSize, &A.n, &MKL_CONFIG::dalpha, MKL_CONFIG::metaChar,
+ A.dat, A.row, A.col, &A_nnz, x_, &ldx, &MKL_CONFIG::dbeta, y_, &ldy );
+
+ // void mkl_dcoomv (char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *rowind, MKL_INT *colind, MKL_INT *nnz, double *x, double *beta, double *y);
+
+}
+
+inline void fspmm_mkl(const Givaro::FloatDomain &F, const Sparse<Givaro::FloatDomain, SparseMatrix_t::COO> &A,
+ index_t blockSize ,
+ Givaro::FloatDomain::ConstElement_ptr x_, index_t ldx,
+ Givaro::FloatDomain::Element_ptr y_, index_t ldy,
+ FieldCategories::UnparametricTag) {
+ // assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ // assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ MKL_INT A_nnz = A.nnz ;
+ mkl_scoomm(MKL_CONFIG::trans, &A.m , &blockSize, &A.n, &MKL_CONFIG::salpha, MKL_CONFIG::metaChar,
+ A.dat, A.row, A.col, &A_nnz, x_, &ldx, &MKL_CONFIG::sbeta, y_, &ldy );
+
+ // void mkl_scoomm (char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, float *alpha, char *matdescra, float *val, MKL_INT *rowind, MKL_INT *colind, MKL_INT *nnz, float *b, MKL_INT *ldb, float *beta, float *c, MKL_INT *ldc);
+
+}
+#endif // __FFLASFFPACK_HAVE_MKL
+
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nnz; ++i) {
+ vect_t vy, vx, vdat;
+ vdat = simd::set1(dat[i]);
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy = simd::load(y + row[i] * ldy + k);
+ vx = simd::load(x + col[i] * ldx + k);
+ simd::store(y + row[i] * ldy + k, simd::fmadd(vy, vdat, vx));
+ }
+ for (; k < blockSize; ++k)
+ y[row[i] * ldy + k] += dat[i] * x[col[i] * ldx + k];
+ }
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ auto x = x_;
+ auto y = y_;
+ for (index_t i = 0; i < A.nnz; ++i) {
+ vect_t vy, vx, vdat;
+ vdat = simd::set1(dat[i]);
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy = simd::loadu(y + row[i] * ldy + k);
+ vx = simd::loadu(x + col[i] * ldx + k);
+ simd::storeu(y + row[i] * ldy + k, simd::fmadd(vy, vdat, vx));
+ }
+ for (; k < blockSize; ++k)
+ y[row[i] * ldy + k] += dat[i] * x[col[i] * ldx + k];
+ }
+}
+
+#endif // SIMD
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ // TODO
+}
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ // TODO
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ // TODO
+}
+
+template <class Field>
+inline void fspmm_one(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nnz; ++i) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[row[i] * ldy + k], x[col[i] * ldx + k]);
+ F.addin(y[row[i] * ldy + k + 1], x[col[i] * ldx + k + 1]);
+ F.addin(y[row[i] * ldy + k + 2], x[col[i] * ldx + k + 2]);
+ F.addin(y[row[i] * ldy + k + 3], x[col[i] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[row[i] * ldy + k], x[col[i] * ldx + k]);
+ }
+}
+
+template <class Field>
+inline void fspmm_mone(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nnz; ++i) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[row[i] * ldy + k], x[col[i] * ldx + k]);
+ F.subin(y[row[i] * ldy + k + 1], x[col[i] * ldx + k + 1]);
+ F.subin(y[row[i] * ldy + k + 2], x[col[i] * ldx + k + 2]);
+ F.subin(y[row[i] * ldy + k + 3], x[col[i] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[row[i] * ldy + k], x[col[i] * ldx + k]);
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_one_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nnz; ++i) {
+ vect_t vy, vx;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy = simd::load(y + row[i] * ldy + k);
+ vx = simd::load(x + col[i] * ldx + k);
+ simd::store(y + row[i] * ldy + k, simd::add(vy, vx));
+ }
+ for (; k < blockSize; ++k)
+ y[row[i] * ldy + k] += x[col[i] * ldx + k];
+ }
+}
+
+template <class Field>
+inline void fspmm_one_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nnz; ++i) {
+ vect_t vy, vx;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy = simd::loadu(y + row[i] * ldy + k);
+ vx = simd::loadu(x + col[i] * ldx + k);
+ simd::storeu(y + row[i] * ldy + k, simd::add(vy, vx));
+ }
+ for (; k < blockSize; ++k)
+ y[row[i] * ldy + k] += x[col[i] * ldx + k];
+ }
+}
+
+template <class Field>
+inline void fspmm_mone_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nnz; ++i) {
+ vect_t vy, vx;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy = simd::load(y + row[i] * ldy + k);
+ vx = simd::load(x + col[i] * ldx + k);
+ simd::store(y + row[i] * ldy + k, simd::sub(vy, vx));
+ }
+ for (; k < blockSize; ++k)
+ y[row[i] * ldy + k] -= x[col[i] * ldx + k];
+ }
+}
+
+template <class Field>
+inline void fspmm_mone_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nnz; ++i) {
+ vect_t vy, vx;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy = simd::loadu(y + row[i] * ldy + k);
+ vx = simd::loadu(x + col[i] * ldx + k);
+ simd::storeu(y + row[i] * ldy + k, simd::sub(vy, vx));
+ }
+ for (; k < blockSize; ++k)
+ y[row[i] * ldy + k] -= x[col[i] * ldx + k];
+ }
+}
+
+// #endif /* __FFLASFFPACK_USE_SIMD */
+
+} // coo_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_coo_spmm_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/coo/coo_spmv.inl b/fflas-ffpack/fflas/fflas_sparse/coo/coo_spmv.inl
new file mode 100644
index 0000000..4312e80
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/coo/coo_spmv.inl
@@ -0,0 +1,231 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Barowien Vialla <barowien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redirowribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is dirowributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin rowreet, Fifth Floor, Borowon, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_coo_spmv_INL
+#define __FFLASFFPACK_fflas_sparse_coo_spmv_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t j = 0;
+ for (; j < ROUND_DOWN(A.nnz, 4); j += 4) {
+ F.axpyin(y[row[j]], dat[j], x[col[j]]);
+ F.axpyin(y[row[j + 1]], dat[j + 1], x[col[j + 1]]);
+ F.axpyin(y[row[j + 2]], dat[j + 2], x[col[j + 2]]);
+ F.axpyin(y[row[j + 3]], dat[j + 3], x[col[j + 3]]);
+ }
+ for (; j < A.nnz; ++j) {
+ F.axpyin(y[row[j]], dat[j], x[col[j]]);
+ }
+}
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t j = 0;
+ for (; j < ROUND_DOWN(A.nnz, 4); j += 4) {
+ y[row[j]] += dat[j] * x[col[j]];
+ y[row[j + 1]] += dat[j + 1] * x[col[j + 1]];
+ y[row[j + 2]] += dat[j + 2] * x[col[j + 2]];
+ y[row[j + 3]] += dat[j + 3] * x[col[j + 3]];
+ }
+ for (; j < A.nnz; ++j) {
+ y[row[j]] += dat[j] * x[col[j]];
+ }
+}
+
+#ifdef __FFLASFFPACK_HAVE_MKL
+inline void fspmv_mkl(const Givaro::DoubleDomain &F, const Sparse<Givaro::DoubleDomain, SparseMatrix_t::COO> &A,
+ Givaro::DoubleDomain::ConstElement_ptr x_,
+ Givaro::DoubleDomain::Element_ptr y_, FieldCategories::UnparametricTag) {
+ // assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ // assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ MKL_INT A_nnz = A.nnz ;
+ mkl_dcoomv(MKL_CONFIG::trans, &A.m , &A.n, &MKL_CONFIG::dalpha, MKL_CONFIG::metaChar,
+ A.dat, A.row, A.col, &A_nnz, x_, &MKL_CONFIG::dbeta, y_ );
+
+ // void mkl_dcoomv (char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *rowind, MKL_INT *colind, MKL_INT *nnz, double *x, double *beta, double *y);
+
+}
+
+inline void fspmv_mkl(const Givaro::FloatDomain &F, const Sparse<Givaro::FloatDomain, SparseMatrix_t::COO> &A,
+ Givaro::FloatDomain::ConstElement_ptr x_,
+ Givaro::FloatDomain::Element_ptr y_, FieldCategories::UnparametricTag) {
+ // assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ // assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ MKL_INT A_nnz = A.nnz ;
+ mkl_scoomv(MKL_CONFIG::trans, &A.m , &A.n, &MKL_CONFIG::salpha, MKL_CONFIG::metaChar,
+ A.dat, A.row, A.col, &A_nnz, x_, &MKL_CONFIG::sbeta, y_ );
+
+ // void mkl_scoomv (char *transa, MKL_INT *m, MKL_INT *k, float *alpha, char *matdescra, float *val, MKL_INT *rowind, MKL_INT *colind, MKL_INT *nnz, float *x, float *beta, float *y);
+
+}
+#endif // __FFLASFFPACK_HAVE_MKL
+
+
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::COO> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const uint64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ size_t w = 0;
+ index_t larow_i = 0;
+ typename Field::Element e;
+ F.init(e, y[larow_i]);
+ size_t accu = 0;
+
+ while (w < A.nnz) {
+ if (row[w] == larow_i) { // same line
+ if (accu < (size_t)kmax) {
+ e += dat[w] * x[col[w]];
+ accu += 1;
+ } else {
+ F.axpyin(e, dat[w], x[col[w]]);
+ accu = 0;
+ }
+ } else { // new line
+ F.init(y[larow_i], e);
+ larow_i = row[w];
+ F.init(e, y[larow_i]);
+ e += dat[w] * x[col[w]];
+ accu = 1;
+ }
+ ++w;
+ }
+ F.init(y[larow_i], e);
+}
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t j = 0;
+ for (; j < ROUND_DOWN(A.nnz, 4); j += 4) {
+ F.addin(y[row[j]], x[col[j]]);
+ F.addin(y[row[j + 1]], x[col[j + 1]]);
+ F.addin(y[row[j + 2]], x[col[j + 2]]);
+ F.addin(y[row[j + 3]], x[col[j + 3]]);
+ }
+ for (; j < A.nnz; ++j) {
+ F.addin(y[row[j]], x[col[j]]);
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t j = 0;
+ for (; j < ROUND_DOWN(A.nnz, 4); j += 4) {
+ F.subin(y[row[j]], x[col[j]]);
+ F.subin(y[row[j + 1]], x[col[j + 1]]);
+ F.subin(y[row[j + 2]], x[col[j + 2]]);
+ F.subin(y[row[j + 3]], x[col[j + 3]]);
+ }
+ for (; j < A.nnz; ++j) {
+ F.subin(y[row[j]], x[col[j]]);
+ }
+}
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t j = 0;
+ for (; j < ROUND_DOWN(A.nnz, 4); j += 4) {
+ y[row[j]] += x[col[j]];
+ y[row[j + 1]] += x[col[j + 1]];
+ y[row[j + 2]] += x[col[j + 2]];
+ y[row[j + 3]] += x[col[j + 3]];
+ }
+ for (; j < A.nnz; ++j) {
+ y[row[j]] += x[col[j]];
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::COO_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(row, A.row, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t j = 0;
+ for (; j < ROUND_DOWN(A.nnz, 4); j += 4) {
+ y[row[j]] -= x[col[j]];
+ y[row[j + 1]] -= x[col[j + 1]];
+ y[row[j + 2]] -= x[col[j + 2]];
+ y[row[j + 3]] -= x[col[j + 3]];
+ }
+ for (; j < A.nnz; ++j) {
+ y[row[j]] -= x[col[j]];
+ }
+}
+
+} // coo_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_coo_spmv_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/coo/coo_utils.inl b/fflas-ffpack/fflas/fflas_sparse/coo/coo_utils.inl
new file mode 100644
index 0000000..74c923e
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/coo/coo_utils.inl
@@ -0,0 +1,95 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_coo_utils_INL
+#define __FFLASFFPACK_fflas_sparse_coo_utils_INL
+
+namespace FFLAS {
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::COO> &A) {
+ fflas_delete(A.dat);
+ fflas_delete(A.col);
+ fflas_delete(A.row);
+}
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::COO_ZO> &A) {
+ fflas_delete(A.col);
+ fflas_delete(A.row);
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::COO> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<uint64_t> rows(rowdim, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ if (A.kmax > A.maxrow)
+ A.delayed = true;
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.row = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.dat = fflas_new(F, nnz, 1, Alignment::CACHE_LINE);
+
+ for (uint64_t i = 0; i < nnz; ++i) {
+ A.col[i] = (index_t)col[i];
+ A.row[i] = (index_t)row[i];
+ A.dat[i] = dat[i];
+ }
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::COO_ZO> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<uint64_t> rows(A.m, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ if (A.kmax > A.maxrow)
+ A.delayed = true;
+
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.row = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+
+ for (uint64_t i = 0; i < nnz; ++i) {
+ A.col[i] = (index_t)col[i];
+ A.row[i] = (index_t)row[i];
+ }
+}
+}
+
+#endif // __FFLASFFPACK_fflas_sparse_coo_spmv_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr.h b/fflas-ffpack/fflas/fflas_sparse/csr.h
new file mode 100644
index 0000000..73252d8
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr.h
@@ -0,0 +1,93 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fspmv_CSR.inl
+ * NO DOC
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_H
+#define __FFLASFFPACK_fflas_sparse_CSR_H
+
+namespace FFLAS { /* CSR */
+
+template <class _Field> struct Sparse<_Field, SparseMatrix_t::CSR> {
+ using Field = _Field;
+ bool delayed = false;
+ uint64_t kmax = 0;
+ index_t m = 0;
+ index_t n = 0;
+ uint64_t nnz = 0;
+ uint64_t nElements = 0;
+ uint64_t maxrow = 0;
+ index_t *col = nullptr;
+ index_t *st = nullptr;
+ index_t *stend = nullptr;
+ typename _Field::Element_ptr dat;
+};
+
+template <class _Field>
+struct Sparse<_Field, SparseMatrix_t::CSR_ZO>
+ : public Sparse<_Field, SparseMatrix_t::CSR> {
+ using Field = _Field;
+ int64_t cst = 1;
+ bool delayed = false;
+};
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::CSR> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F,
+ Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+
+template <class Field>
+inline void sparse_delete(const Sparse<Field, SparseMatrix_t::CSR> &A);
+
+template <class Field>
+inline void sparse_delete(const Sparse<Field, SparseMatrix_t::CSR_ZO> &A);
+
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_sparse/csr/csr_utils.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/csr/csr_spmv.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/csr/csr_spmm.inl"
+
+#if defined(__FFLASFFPACK_USE_OPENMP) || defined(__FFLASFFPACK_USE_TBB)
+
+#include "fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmv.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmm.inl"
+
+#endif
+
+#endif // __FFLASFFPACK_fflas_sparse_CSR_H
\ No newline at end of file
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/csr/Makefile.am
similarity index 73%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_sparse/csr/Makefile.am
index 31793b2..7a030e6 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/csr/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,12 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse/csr
+pkgincludesub_HEADERS= \
+ csr_spmv.inl \
+ csr_spmm.inl \
+ csr_pspmv.inl \
+ csr_pspmm.inl \
+ csr_utils.inl
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmm.inl b/fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmm.inl
new file mode 100644
index 0000000..468b16e
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmm.inl
@@ -0,0 +1,939 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_pspmm_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_pspmm_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ size_t m = A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(CONSTREFERENCE(F) MODE(READ(dat, col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * ldy + k], dat[j], x[col[j] * ldx + k]);
+ F.axpyin(y[i * ldy + k + 1], dat[j], x[col[j] * ldx + k + 1]);
+ F.axpyin(y[i * ldy + k + 2], dat[j], x[col[j] * ldx + k + 2]);
+ F.axpyin(y[i * ldy + k + 3], dat[j], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * ldy + k], dat[j], x[col[j] * ldx + k]);
+ }
+ }
+ }
+ );
+ );
+ );
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ size_t m = A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(dat, col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ y[i * ldy + k + 1] += dat[j] * x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += dat[j] * x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += dat[j] * x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ }
+ );
+ );
+ );
+ /*
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ */
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void pfspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+
+ size_t m = A.m;
+ vect_t y1, x1, y2, x2, vdat;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(dat, col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ uint32_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::store(y + i * ldy + k, y1);
+ simd::store(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::store(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ }
+ }
+ );
+ );
+ );
+ /*
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::store(y + i * ldy + k, y1);
+ simd::store(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::store(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ }
+ //*/
+}
+
+template <class Field>
+inline void pfspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+
+ size_t m = A.m;
+ vect_t y1, x1, y2, x2, vdat;
+
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(dat, col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ uint32_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ simd::storeu(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ }
+ }
+ );
+ );
+ );
+/*
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2, dat;
+ size_t k = 0;
+ dat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, dat);
+ y2 = simd::fmadd(y2, x2, dat);
+ simd::storeu(y + i * ldy + k, y1);
+ simd::storeu(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, dat);
+ simd::storeu(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ }
+ */
+}
+#endif
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ for (size_t k = 0; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+ for (; j < j_end; ++j) {
+ for (size_t k = 0; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void pfspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ simd::storeu(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+ for (; j < j_end; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ simd::storeu(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+}
+
+template <class Field>
+inline void pfspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::store(y + i * ldy + k, y1);
+ simd::store(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::store(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+ for (; j < j_end; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::store(y + i * ldy + k, y1);
+ simd::store(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::store(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+}
+
+#endif // SIMD
+
+template <class Field>
+inline void pfspmm_one(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+// for (index_t i = 0; i < A.m; ++i) {
+ index_t am=A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, am,
+ SPLITTER(NUM_THREADS),
+ TASK(MODE(CONSTREFERENCE(F) READ(/*dat,*/ col, st, x) READWRITE(y)),
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ auto start = st[i];
+ auto stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[i * ldy + k], x[col[j] * ldx + k]);
+ F.addin(y[i * ldy + k + 1], x[col[j] * ldx + k + 1]);
+ F.addin(y[i * ldy + k + 2], x[col[j] * ldx + k + 2]);
+ F.addin(y[i * ldy + k + 3], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[i * ldy + k], x[col[j] * ldx + k]);
+ }
+ }
+ );
+ );
+ );
+// }
+
+}
+
+template <class Field>
+inline void pfspmm_mone(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+// #pragma omp parallel for schedule(static, 32)
+// for (index_t i = 0; i < A.m; ++i) {
+ index_t am=A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, am,
+ SPLITTER(NUM_THREADS),
+ TASK(MODE(CONSTREFERENCE(F) READ(/*dat,*/ col, st, x) READWRITE(y)),
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ auto start = st[i];
+ auto stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[i * ldy + k], x[col[j] * ldx + k]);
+ F.subin(y[i * ldy + k + 1], x[col[j] * ldx + k + 1]);
+ F.subin(y[i * ldy + k + 2], x[col[j] * ldx + k + 2]);
+ F.subin(y[i * ldy + k + 3], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[i * ldy + k], x[col[j] * ldx + k]);
+ }
+ }
+ );
+ );
+ );
+// }
+}
+
+template <class Field>
+inline void pfspmm_one(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ size_t m = A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(/*dat,*/ col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ y[i * ldy + k + 1] += x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+ );
+ );
+ );
+ /*
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ y[i * ldy + k + 1] += x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ */
+}
+
+template <class Field>
+inline void pfspmm_mone(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ size_t m = A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(/*dat,*/ col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ y[i * ldy + k + 1] -= x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] -= x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] -= x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ }
+ }
+ );
+ );
+ );
+ /*
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ y[i * ldy + k + 1] -= x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] -= x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] -= x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ }
+ */
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void pfspmm_one_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ //*
+ size_t m = A.m;
+ vect_t y1, x1, y2, x2, vdat;
+
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(/*dat,*/ col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ uint32_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::add(y1, x1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::add(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::add(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+ }
+ );
+ );
+ );
+ //*/
+ /*
+#pragma omp parallel for schedule(static, 256)
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::add(y1, x1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::add(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::add(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+ //*/
+}
+
+template <class Field>
+inline void pfspmm_one_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+
+ vect_t y1, x1, y2, x2, vdat;
+ size_t m = A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(/*dat,*/ col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ uint32_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::add(y1, x1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::add(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::add(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+ }
+ );
+ );
+ );
+ /*
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::add(y1, x1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::add(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::add(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+ */
+}
+
+template <class Field>
+inline void pfspmm_mone_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ //*
+ size_t m = A.m;
+ vect_t y1, x1, y2, x2, vdat;
+
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(/*dat,*/ col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ uint32_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::sub(y1, x1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::sub(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::sub(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+ }
+ );
+ );
+ );
+ //*/
+ /*
+#pragma omp parallel for schedule(static, 256)
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::sub(y1, x1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::sub(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::sub(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ }
+ }
+ //*/
+}
+
+template <class Field>
+inline void pfspmm_mone_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+
+ size_t m = A.m;
+ vect_t y1, x1, y2, x2, vdat;
+
+ SYNCH_GROUP(
+ FORBLOCK1D(it, m, SPLITTER(NUM_THREADS),
+ TASK(MODE(READ(/*dat,*/ col, st, x) READWRITE(y)),
+ {
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ for (index_t j = st[i]; j < st[i + 1]; ++j) {
+ uint32_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::sub(y1, x1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::sub(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::sub(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+ }
+ );
+ );
+ );
+ /*
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::sub(y1, x1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::sub(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::sub(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ }
+ }
+ */
+}
+
+#endif //__FFLASFFPACK_USE_SIMD
+
+} // CSR_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_CSR_spmm_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmv.inl b/fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmv.inl
new file mode 100644
index 0000000..4fc16f2
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr/csr_pspmv.inl
@@ -0,0 +1,429 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_pspmv_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_pspmv_INL
+
+#ifdef __FFLASFFPACK_USE_TBB
+#include "tbb/parallel_for.h"
+#include "tbb/blocked_range.h"
+#endif
+
+ #include <thread>
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, dat, col, st](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ F.axpyin(y2, dat[start + j + 1], x[col[start + j + 1]]);
+ F.axpyin(y3, dat[start + j + 2], x[col[start + j + 2]]);
+ F.axpyin(y4, dat[start + j + 3], x[col[start + j + 3]]);
+ }
+ for (; j < diff; ++j) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+ });
+#else
+// The minimum size has to be a multiple of cache_line/sizeof(Element) to avoid
+// cache coherency problem (ex: 8 for double, 16 for float)
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ F.axpyin(y2, dat[start + j + 1], x[col[start + j + 1]]);
+ F.axpyin(y3, dat[start + j + 2], x[col[start + j + 2]]);
+ F.axpyin(y4, dat[start + j + 3], x[col[start + j + 3]]);
+ }
+ for (; j < diff; ++j) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+#endif
+}
+
+template<class Field>
+inline void pfspmv_task(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const index_t iStart, const index_t iStop, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for(index_t i = iStart ; i < iStop ; ++i){
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += dat[start + j] * x[col[start + j]];
+ y2 += dat[start + j + 1] * x[col[start + j + 1]];
+ y3 += dat[start + j + 2] * x[col[start + j + 2]];
+ y4 += dat[start + j + 3] * x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += dat[start + j] * x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+}
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, dat, col, st](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += dat[start + j] * x[col[start + j]];
+ y2 += dat[start + j + 1] * x[col[start + j + 1]];
+ y3 += dat[start + j + 2] * x[col[start + j + 2]];
+ y4 += dat[start + j + 3] * x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += dat[start + j] * x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+ });
+#else
+// #pragma omp parallel for schedule(static, 8)
+// for (index_t i = 0; i < A.m; ++i) {
+// auto start = st[i], stop = st[i + 1];
+// index_t j = 0;
+// index_t diff = stop - start;
+// typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+// for (; j < ROUND_DOWN(diff, 4); j += 4) {
+// y1 += dat[start + j] * x[col[start + j]];
+// y2 += dat[start + j + 1] * x[col[start + j + 1]];
+// y3 += dat[start + j + 2] * x[col[start + j + 2]];
+// y4 += dat[start + j + 3] * x[col[start + j + 3]];
+// }
+// for (; j < diff; ++j) {
+// y1 += dat[start + j] * x[col[start + j]];
+// }
+// y[i] += y1 + y2 + y3 + y4;
+// }
+ std::vector<std::thread> pool(6);
+
+#endif
+}
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const int64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, kmax, dat, col, st](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ y[i] += dat[j] * x[col[j]];
+ }
+ F.reduce(y[i]);
+ }
+ for (; j < j_end; ++j) {
+ y[i] += dat[j] * x[col[j]];
+ }
+ F.reduce(y[i]);
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ y[i] += dat[j] * x[col[j]];
+ }
+ F.reduce(y[i]);
+ }
+ for (; j < j_end; ++j) {
+ y[i] += dat[j] * x[col[j]];
+ }
+ F.reduce(y[i]);
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ size_t am = A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, am, SPLITTER(NUM_THREADS),
+ TASK(MODE(CONSTREFERENCE(F) READ(col, st, x) READWRITE(y)),
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ auto start = st[i];
+ auto stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1;
+ typename Field::Element y2;
+ typename Field::Element y3;
+ typename Field::Element y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ F.addin(y1, x[col[start + j]]);
+ F.addin(y2, x[col[start + j + 1]]);
+ F.addin(y3, x[col[start + j + 2]]);
+ F.addin(y4, x[col[start + j + 3]]);
+ }
+ for (; j < diff; ++j) {
+ F.addin(y1, x[col[start + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+ );
+ );
+ );
+}
+
+template <class Field>
+inline void pfspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ size_t am = A.m;
+ SYNCH_GROUP(
+ FORBLOCK1D(it, am, SPLITTER(NUM_THREADS),
+ TASK(MODE(CONSTREFERENCE(F) READ(col, st, x) READWRITE(y)),
+ for (index_t i = it.begin(); i < it.end(); ++i) {
+ auto start = st[i];
+ auto stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1;
+ typename Field::Element y2;
+ typename Field::Element y3;
+ typename Field::Element y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ F.addin(y1, x[col[start + j]]);
+ F.addin(y2, x[col[start + j + 1]]);
+ F.addin(y3, x[col[start + j + 2]]);
+ F.addin(y4, x[col[start + j + 3]]);
+ }
+ for (; j < diff; ++j) {
+ F.addin(y1, x[col[start + j]]);
+ }
+ F.subin(y[i], y1);
+ F.subin(y[i], y2);
+ F.subin(y[i], y3);
+ F.subin(y[i], y4);
+ }
+ );
+ );
+ );
+}
+
+template <class Field>
+inline void pfspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, col, st](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, col, st](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ }
+#endif
+}
+
+} // CSR_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_CSR_pspmv_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr/csr_spmm.inl b/fflas-ffpack/fflas/fflas_sparse/csr/csr_spmm.inl
new file mode 100644
index 0000000..467f265
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr/csr_spmm.inl
@@ -0,0 +1,611 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_spmm_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_spmm_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * ldy + k], dat[j], x[col[j] * ldx + k]);
+ F.axpyin(y[i * ldy + k + 1], dat[j], x[col[j] * ldx + k + 1]);
+ F.axpyin(y[i * ldy + k + 2], dat[j], x[col[j] * ldx + k + 2]);
+ F.axpyin(y[i * ldy + k + 3], dat[j], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * ldy + k], dat[j], x[col[j] * ldx + k]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A,
+ index_t blockSize,
+ typename Field::ConstElement_ptr x_, index_t ldx,
+ typename Field::Element_ptr y_, index_t ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ y[i * ldy + k + 1] += dat[j] * x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += dat[j] * x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += dat[j] * x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+}
+
+#ifdef __FFLASFFPACK_HAVE_MKL
+inline void fspmm_mkl(const Givaro::DoubleDomain &F, const Sparse<Givaro::DoubleDomain, SparseMatrix_t::CSR> &A,
+ index_t blockSize,
+ Givaro::DoubleDomain::ConstElement_ptr x_, index_t ldx,
+ Givaro::DoubleDomain::Element_ptr y_, index_t ldy,
+ FieldCategories::UnparametricTag) {
+ // assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ // assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ mkl_dcsrmm(MKL_CONFIG::trans, &A.m , &blockSize, &A.n, &MKL_CONFIG::dalpha, MKL_CONFIG::metaChar,
+ A.dat, A.col, A.st, A.st+1, x_, &ldx, &MKL_CONFIG::dbeta, y_ , &ldy);
+
+ // void mkl_dcsrmm (char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc);
+
+}
+inline void fspmm_mkl(const Givaro::FloatDomain &F, const Sparse<Givaro::FloatDomain, SparseMatrix_t::CSR> &A,
+ index_t blockSize,
+ Givaro::FloatDomain::ConstElement_ptr x_, index_t ldx,
+ Givaro::FloatDomain::Element_ptr y_, index_t ldy,
+ FieldCategories::UnparametricTag) {
+ // assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ // assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ mkl_scsrmm(MKL_CONFIG::trans, &A.m , &blockSize, &A.n, &MKL_CONFIG::salpha, MKL_CONFIG::metaChar,
+ A.dat, A.col, A.st, A.st+1, x_, &ldx, &MKL_CONFIG::sbeta, y_ , &ldy);
+
+ // void mkl_scsrmm (char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, float *alpha, char *matdescra, float *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, float *b, MKL_INT *ldb, float *beta, float *c, MKL_INT *ldc);i
+
+}
+#endif // __FFLASFFPACK_HAVE_MKL
+
+
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ // std::cout << "spmm simd Unparam aligned" << std::endl;
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::store(y + i * ldy + k, y1);
+ simd::store(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::store(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ // std::cout << "spmm simd Unparam unaligned" << std::endl;
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + A.col[j] * ldx + k);
+ x2 = simd::loadu(x + A.col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ simd::storeu(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ }
+}
+// #endif
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ for (size_t k = 0; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+ for (; j < j_end; ++j) {
+ for (size_t k = 0; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ simd::storeu(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ //
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+ for (; j < j_end; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ simd::storeu(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::storeu(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+}
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::store(y + i * ldy + k, y1);
+ simd::store(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::store(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+ for (; j < j_end; ++j) {
+ vect_t y1, x1, y2, x2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ y1 = simd::fmadd(y1, x1, vdat);
+ y2 = simd::fmadd(y2, x2, vdat);
+ simd::store(y + i * ldy + k, y1);
+ simd::store(y + i * ldy + k + simd::vect_size, y2);
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ y1 = simd::fmadd(y1, x1, vdat);
+ simd::store(y + i * ldy + k, y1);
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[j] * x[col[j] * ldx + k];
+ }
+ }
+ FFLAS::freduce(F,blockSize,y+i*ldy,1);
+ // for (size_t k = 0; k < blockSize; ++k) {
+ // F.reduce(y[i * ldy + k]);
+ // }
+ }
+}
+
+#endif // SIMD
+
+template <class Field>
+inline void fspmm_one(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[i * ldy + k], x[col[j] * ldx + k]);
+ F.addin(y[i * ldy + k + 1], x[col[j] * ldx + k + 1]);
+ F.addin(y[i * ldy + k + 2], x[col[j] * ldx + k + 2]);
+ F.addin(y[i * ldy + k + 3], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[i * ldy + k], x[col[j] * ldx + k]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_mone(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[i * ldy + k], x[col[j] * ldx + k]);
+ F.subin(y[i * ldy + k + 1], x[col[j] * ldx + k + 1]);
+ F.subin(y[i * ldy + k + 2], x[col[j] * ldx + k + 2]);
+ F.subin(y[i * ldy + k + 3], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[i * ldy + k], x[col[j] * ldx + k]);
+ }
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_one_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::add(y1, x1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::add(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::add(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_one_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::add(y1, x1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::add(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::add(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_mone_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ y2 = simd::load(y+i*ldy+k+simd::vect_size);
+ x1 = simd::load(x + col[j] * ldx + k);
+ x2 = simd::load(x + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::sub(y1, x1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::sub(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::load(y+i*ldy+k);
+ x1 = simd::load(x + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::sub(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_mone_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ for (index_t j = start; j < stop; ++j) {
+ vect_t y1, x1, y2, x2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ y2 = simd::loadu(y+i*ldy+k+simd::vect_size);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ x2 = simd::loadu(x + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::sub(y1, x1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::sub(y2, x2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ y1 = simd::loadu(y+i*ldy+k);
+ x1 = simd::loadu(x + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::sub(y1, x1));
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ }
+ }
+}
+
+// #endif //__FFLASFFPACK_USE_SIMD
+
+} // CSR_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_CSR_spmm_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr/csr_spmv.inl b/fflas-ffpack/fflas/fflas_sparse/csr/csr_spmv.inl
new file mode 100644
index 0000000..b066d5e
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr/csr_spmv.inl
@@ -0,0 +1,330 @@
+ /* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_spmv_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_spmv_INL
+
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ F.axpyin(y2, dat[start + j + 1], x[col[start + j + 1]]);
+ F.axpyin(y3, dat[start + j + 2], x[col[start + j + 2]]);
+ F.axpyin(y4, dat[start + j + 3], x[col[start + j + 3]]);
+ }
+ for (; j < diff; ++j) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+}
+
+#if 0
+template <class Field>
+inline void fspmv_task(const Field &F, const index_t start_, const index_t size_ const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = start_; i < start_+size_; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ F.axpyin(y2, dat[start + j + 1], x[col[start + j + 1]]);
+ F.axpyin(y3, dat[start + j + 2], x[col[start + j + 2]]);
+ F.axpyin(y4, dat[start + j + 3], x[col[start + j + 3]]);
+ }
+ for (; j < diff; ++j) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+}
+#endif
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += dat[start + j] * x[col[start + j]];
+ y2 += dat[start + j + 1] * x[col[start + j + 1]];
+ y3 += dat[start + j + 2] * x[col[start + j + 2]];
+ y4 += dat[start + j + 3] * x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += dat[start + j] * x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+}
+
+#ifdef __FFLASFFPACK_HAVE_MKL
+inline void fspmv_mkl(const Givaro::DoubleDomain &F, const Sparse<Givaro::DoubleDomain, SparseMatrix_t::CSR> &A,
+ Givaro::DoubleDomain::ConstElement_ptr x_,
+ Givaro::DoubleDomain::Element_ptr y_, FieldCategories::UnparametricTag) {
+ // assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ // assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ mkl_dcsrmv(MKL_CONFIG::trans, &A.m , &A.n, &MKL_CONFIG::dalpha, MKL_CONFIG::metaChar,
+ A.dat, A.col, A.st, A.st+1, x_, &MKL_CONFIG::dbeta, y_ );
+
+ // void mkl_dcsrmv (char *transa, MKL_INT *m, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *x, double *beta, double *y);
+
+}
+
+inline void fspmv_mkl(const Givaro::FloatDomain &F, const Sparse<Givaro::FloatDomain, SparseMatrix_t::CSR> &A,
+ Givaro::FloatDomain::ConstElement_ptr x_,
+ Givaro::FloatDomain::Element_ptr y_, FieldCategories::UnparametricTag) {
+ // assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ // assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ // assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ mkl_scsrmv(MKL_CONFIG::trans, &A.m , &A.n, &MKL_CONFIG::salpha, MKL_CONFIG::metaChar,
+ A.dat, A.col, A.st, A.st+1, x_, &MKL_CONFIG::sbeta, y_ );
+
+ // void mkl_scsrmv (char *transa, MKL_INT *m, MKL_INT *k, float *alpha, char *matdescra, float *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, float *x, float *beta, float *y);
+
+}
+#endif // __FFLASFFPACK_HAVE_MKL
+
+#if 0
+template <class Field>
+inline void fspmv_task(const Field &F, const index_t start_, const index_t size_, const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ for (index_t i = start_; i < start_+size_; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += dat[start + j] * x[col[start + j]];
+ y2 += dat[start + j + 1] * x[col[start + j + 1]];
+ y3 += dat[start + j + 2] * x[col[start + j + 2]];
+ y4 += dat[start + j + 3] * x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += dat[start + j] * x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+}
+#endif
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const int64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = st[i];
+ index_t j_loc = j;
+ index_t j_end = st[i + 1];
+ index_t block = (j_end - j_loc) / kmax;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ y[i] += dat[j] * x[col[j]];
+ }
+ F.reduce(y[i]);
+ }
+ for (; j < j_end; ++j) {
+ y[i] += dat[j] * x[col[j]];
+ }
+ F.reduce(y[i]);
+ }
+}
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ F.addin(y1, x[col[start + j]]);
+ F.addin(y2, x[col[start + j + 1]]);
+ F.addin(y3, x[col[start + j + 2]]);
+ F.addin(y4, x[col[start + j + 3]]);
+ }
+ for (; j < diff; ++j) {
+ F.addin(y1, x[col[start + j]]);
+ }
+ F.addin(y[i], y1 + y2 + y3 + y4);
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ F.addin(y1, x[col[start + j]]);
+ F.addin(y2, x[col[start + j + 1]]);
+ F.addin(y3, x[col[start + j + 2]]);
+ F.addin(y4, x[col[start + j + 3]]);
+ }
+ for (; j < diff; ++j) {
+ F.addin(y1, x[col[start + j]]);
+ }
+ F.subin(y[i], y1 + y2 + y3 + y4);
+ }
+}
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = st[i], stop = st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ }
+}
+
+} // sparse_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_CSR_spmv_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr/csr_utils.inl b/fflas-ffpack/fflas/fflas_sparse/csr/csr_utils.inl
new file mode 100755
index 0000000..d40b4df
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr/csr_utils.inl
@@ -0,0 +1,251 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+namespace FFLAS {
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::CSR> &A) {
+ fflas_delete(A.dat);
+ fflas_delete(A.col);
+ fflas_delete(A.st);
+}
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::CSR_ZO> &A) {
+ fflas_delete(A.col);
+ fflas_delete(A.st);
+}
+
+template <class Field> inline std::ostream& sparse_print(std::ostream& os, const Sparse<Field, SparseMatrix_t::CSR> &A) {
+ // for (size_t i = 0; i <= A.m; ++i)
+ // std::cout << A.st[i] << " ";
+ // std::cout << std::endl;
+ for (index_t i = 0; i < A.m; ++i) {
+ auto start = A.st[i], stop = A.st[i + 1];
+ index_t j = 0;
+ index_t diff = stop - start;
+ os << i << " : ";
+ for (; j < diff; ++j) {
+ os << '(' << A.col[start + j] << ',' << A.dat[start+j] << ") ";
+ }
+ os << std::endl;
+ }
+ return os;
+}
+
+template <class IndexT>
+inline void sparse_init(const Givaro::Modular<Givaro::Integer> &F, Sparse<Givaro::Modular<Givaro::Integer>, SparseMatrix_t::CSR> &A, const IndexT *row, const IndexT *col,
+ Givaro::Integer* dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<uint64_t> rows(rowdim, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+
+ A.delayed = true;
+
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.st = fflas_new<index_t>(rowdim + 1, Alignment::CACHE_LINE);
+ A.dat = fflas_new(F, nnz, 1, Alignment::CACHE_LINE);
+
+ for(size_t i = 0 ; i < nnz ; ++i){
+ if(col[i] >= coldim){
+ std::cout << "Error col index too big" << std::endl;
+ }
+ }
+
+ for (size_t i = 0; i < nnz; ++i) {
+ A.col[i] = static_cast<index_t>(col[i]);
+ A.dat[i] = dat[i];
+ }
+
+ A.st[0] = 0;
+ for (size_t i = 1; i <= rowdim; ++i) {
+ A.st[i] = A.st[i - 1] + rows[i - 1];
+ }
+}
+
+template <class IndexT>
+inline void sparse_init(const Givaro::ZRing<Givaro::Integer> &F, Sparse<Givaro::ZRing<Givaro::Integer>, SparseMatrix_t::CSR_ZO> &A, const IndexT *row, const IndexT *col,
+ Givaro::Integer* dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<uint64_t> rows(rowdim, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+
+ A.delayed = true;
+
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.st = fflas_new<index_t>(rowdim + 1, Alignment::CACHE_LINE);
+
+ for(size_t i = 0 ; i < nnz ; ++i){
+ if(col[i] >= coldim){
+ std::cout << "Error col index too big" << std::endl;
+ }
+ }
+
+ for (size_t i = 0; i < nnz; ++i) {
+ A.col[i] = static_cast<index_t>(col[i]);
+ }
+
+ A.st[0] = 0;
+ for (size_t i = 1; i <= rowdim; ++i) {
+ A.st[i] = A.st[i - 1] + rows[i - 1];
+ }
+}
+
+template <class IndexT, size_t RECINT_SIZE>
+inline void sparse_init(const Givaro::ZRing<RecInt::rmint<RECINT_SIZE>> &F, Sparse<Givaro::ZRing<RecInt::rmint<RECINT_SIZE>>, SparseMatrix_t::CSR_ZO> &A, const IndexT *row, const IndexT *col,
+ typename Givaro::ZRing<RecInt::rmint<RECINT_SIZE>>::Element_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<uint64_t> rows(rowdim, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+
+ A.delayed = true;
+
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.st = fflas_new<index_t>(rowdim + 1, Alignment::CACHE_LINE);
+
+ for(size_t i = 0 ; i < nnz ; ++i){
+ if(col[i] >= coldim){
+ std::cout << "Error col index too big" << std::endl;
+ }
+ }
+
+ for (size_t i = 0; i < nnz; ++i) {
+ A.col[i] = static_cast<index_t>(col[i]);
+ }
+
+ A.st[0] = 0;
+ for (size_t i = 1; i <= rowdim; ++i) {
+ A.st[i] = A.st[i - 1] + rows[i - 1];
+ }
+}
+
+template <class IndexT, size_t RECINT_SIZE>
+inline void sparse_init(const Givaro::ZRing<RecInt::rmint<RECINT_SIZE>> &F, Sparse<Givaro::ZRing<RecInt::rmint<RECINT_SIZE>>, SparseMatrix_t::CSR> &A, const IndexT *row, const IndexT *col,
+ typename Givaro::ZRing<RecInt::rmint<RECINT_SIZE>>::Element_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<uint64_t> rows(rowdim, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+
+ A.delayed = true;
+
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.st = fflas_new<index_t>(rowdim + 1, Alignment::CACHE_LINE);
+
+ for(size_t i = 0 ; i < nnz ; ++i){
+ if(col[i] >= coldim){
+ std::cout << "Error col index too big" << std::endl;
+ }
+ }
+
+ for (size_t i = 0; i < nnz; ++i) {
+ A.col[i] = static_cast<index_t>(col[i]);
+ }
+
+ A.st[0] = 0;
+ for (size_t i = 1; i <= rowdim; ++i) {
+ A.st[i] = A.st[i - 1] + rows[i - 1];
+ }
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::CSR> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<uint64_t> rows(rowdim, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+
+ if (A.kmax > A.maxrow)
+ A.delayed = true;
+
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.st = fflas_new<index_t>(rowdim + 1, Alignment::CACHE_LINE);
+ A.stend = fflas_new<index_t>(rowdim + 1, Alignment::CACHE_LINE);
+ A.dat = fflas_new(F, nnz, 1, Alignment::CACHE_LINE);
+
+ for (size_t i = 0; i < nnz; ++i) {
+ A.col[i] = static_cast<index_t>(col[i]);
+ A.dat[i] = dat[i];
+ }
+ A.st[0] = 0;
+ for (size_t i = 1; i <= rowdim; ++i) {
+ A.st[i] = A.st[i - 1] + rows[i - 1];
+ }
+ for(size_t i = 0 ; i < rowdim ; ++i){
+ A.stend[i] = A.st[i+1];
+ }
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::CSR_ZO> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.delayed = true;
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<uint64_t> rows(A.m, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.st = fflas_new<index_t>(rowdim + 1, Alignment::CACHE_LINE);
+ for (size_t i = 0; i < nnz; ++i) {
+ A.col[i] = static_cast<index_t>(col[i]);
+ }
+ for (size_t i = 0; i <= rowdim; ++i) {
+ A.st[i] = 0;
+ }
+ for (size_t i = 0; i < nnz; ++i) {
+ A.st[row[i] + 1]++;
+ }
+ for (size_t i = 1; i <= rowdim; ++i) {
+ A.st[i] += A.st[i - 1];
+ }
+}
+}
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr_hyb.h b/fflas-ffpack/fflas/fflas_sparse/csr_hyb.h
new file mode 100644
index 0000000..23c80cc
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr_hyb.h
@@ -0,0 +1,73 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fspmv_CSR_HYB.inl
+ * NO DOC
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_HYB_H
+#define __FFLASFFPACK_fflas_sparse_CSR_HYB_H
+
+namespace FFLAS { /* CSR_HYB */
+
+template <class _Field> struct Sparse<_Field, SparseMatrix_t::CSR_HYB> {
+ using Field = _Field;
+ bool delayed = false;
+ index_t *col = nullptr;
+ index_t *st = nullptr;
+ typename _Field::Element_ptr dat;
+ uint64_t kmax = 0;
+ index_t m = 0;
+ index_t n = 0;
+ uint64_t nnz = 0;
+ uint64_t nElements = 0;
+ uint64_t maxrow = 0;
+ uint64_t nOnes = 0;
+ uint64_t nMOnes = 0;
+ uint64_t nOthers = 0;
+};
+
+template <class Field>
+void sparse_delete(const Sparse<Field, SparseMatrix_t::CSR_HYB> &A);
+
+template <class Field, class IndexT>
+void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::CSR_HYB> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_utils.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_spmv.inl"
+#if defined(__FFLASFFPACK_USE_OPENMP)
+#include "fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_pspmv.inl"
+#endif
+#include "fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_spmm.inl"
+// #include "fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_pspmm.inl"
+
+#endif // __FFLASFFPACK_fflas_sparse_CSR_HYB_H
\ No newline at end of file
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/Makefile.am
similarity index 72%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_sparse/csr_hyb/Makefile.am
index 31793b2..9725d45 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,12 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse/csr_hyb
+pkgincludesub_HEADERS= \
+ csr_hyb_spmv.inl \
+ csr_hyb_spmm.inl \
+ csr_hyb_pspmv.inl \
+ csr_hyb_pspmm.inl \
+ csr_hyb_utils.inl
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_pspmm.inl b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_pspmm.inl
new file mode 100644
index 0000000..a7e9062
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_pspmm.inl
@@ -0,0 +1,703 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_HYB_pspmm_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_HYB_pspmm_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y, FieldCategories::GenericTag) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[i * blockSize + k], x[A.col[j] * blockSize + k]);
+ F.subin(y[i * blockSize + k + 1], x[A.col[j] * blockSize + k + 1]);
+ F.subin(y[i * blockSize + k + 2], x[A.col[j] * blockSize + k + 2]);
+ F.subin(y[i * blockSize + k + 3], x[A.col[j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[i * blockSize + k], x[A.col[j] * blockSize + k]);
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[i * blockSize + k], x[A.col[j] * blockSize + k]);
+ F.addin(y[i * blockSize + k + 1], x[A.col[j] * blockSize + k + 1]);
+ F.addin(y[i * blockSize + k + 2], x[A.col[j] * blockSize + k + 2]);
+ F.addin(y[i * blockSize + k + 3], x[A.col[j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[i * blockSize + k], x[A.col[j] * blockSize + k]);
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * blockSize + k], A.dat[startDat + k], x[A.col[j] * blockSize + k]);
+ F.axpyin(y[i * blockSize + k + 1], A.dat[startDat + k], x[A.col[j] * blockSize + k + 1]);
+ F.axpyin(y[i * blockSize + k + 2], A.dat[startDat + k], x[A.col[j] * blockSize + k + 2]);
+ F.axpyin(y[i * blockSize + k + 3], A.dat[startDat + k], x[A.col[j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * blockSize + k], A.dat[startDat + k], x[A.col[j] * blockSize + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[i * blockSize + k], x[A.col[j] * blockSize + k]);
+ F.subin(y[i * blockSize + k + 1], x[A.col[j] * blockSize + k + 1]);
+ F.subin(y[i * blockSize + k + 2], x[A.col[j] * blockSize + k + 2]);
+ F.subin(y[i * blockSize + k + 3], x[A.col[j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[i * blockSize + k], x[A.col[j] * blockSize + k]);
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[i * blockSize + k], x[A.col[j] * blockSize + k]);
+ F.addin(y[i * blockSize + k + 1], x[A.col[j] * blockSize + k + 1]);
+ F.addin(y[i * blockSize + k + 2], x[A.col[j] * blockSize + k + 2]);
+ F.addin(y[i * blockSize + k + 3], x[A.col[j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[i * blockSize + k], x[A.col[j] * blockSize + k]);
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * blockSize + k], A.dat[startDat + k], x[A.col[j] * blockSize + k]);
+ F.axpyin(y[i * blockSize + k + 1], A.dat[startDat + k], x[A.col[j] * blockSize + k + 1]);
+ F.axpyin(y[i * blockSize + k + 2], A.dat[startDat + k], x[A.col[j] * blockSize + k + 2]);
+ F.axpyin(y[i * blockSize + k + 3], A.dat[startDat + k], x[A.col[j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * blockSize + k], A.dat[startDat + k], x[A.col[j] * blockSize + k]);
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::GenericTag) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[i * ldy + k], x[A.col[j] * ldx + k]);
+ F.subin(y[i * ldy + k + 1], x[A.col[j] * ldx + k + 1]);
+ F.subin(y[i * ldy + k + 2], x[A.col[j] * ldx + k + 2]);
+ F.subin(y[i * ldy + k + 3], x[A.col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[i * ldy + k], x[A.col[j] * ldx + k]);
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[i * ldy + k], x[A.col[j] * ldx + k]);
+ F.addin(y[i * ldy + k + 1], x[A.col[j] * ldx + k + 1]);
+ F.addin(y[i * ldy + k + 2], x[A.col[j] * ldx + k + 2]);
+ F.addin(y[i * ldy + k + 3], x[A.col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[i * ldy + k], x[A.col[j] * ldx + k]);
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * ldy + k], A.dat[startDat + k], x[A.col[j] * ldx + k]);
+ F.axpyin(y[i * ldy + k + 1], A.dat[startDat + k], x[A.col[j] * ldx + k + 1]);
+ F.axpyin(y[i * ldy + k + 2], A.dat[startDat + k], x[A.col[j] * ldx + k + 2]);
+ F.axpyin(y[i * ldy + k + 3], A.dat[startDat + k], x[A.col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * ldy + k], A.dat[startDat + k], x[A.col[j] * ldx + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[i * ldy + k], x[A.col[j] * ldx + k]);
+ F.subin(y[i * ldy + k + 1], x[A.col[j] * ldx + k + 1]);
+ F.subin(y[i * ldy + k + 2], x[A.col[j] * ldx + k + 2]);
+ F.subin(y[i * ldy + k + 3], x[A.col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[i * ldy + k], x[A.col[j] * ldx + k]);
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[i * ldy + k], x[A.col[j] * ldx + k]);
+ F.addin(y[i * ldy + k + 1], x[A.col[j] * ldx + k + 1]);
+ F.addin(y[i * ldy + k + 2], x[A.col[j] * ldx + k + 2]);
+ F.addin(y[i * ldy + k + 3], x[A.col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[i * ldy + k], x[A.col[j] * ldx + k]);
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * ldy + k], A.dat[startDat + k], x[A.col[j] * ldx + k]);
+ F.axpyin(y[i * ldy + k + 1], A.dat[startDat + k], x[A.col[j] * ldx + k + 1]);
+ F.axpyin(y[i * ldy + k + 2], A.dat[startDat + k], x[A.col[j] * ldx + k + 2]);
+ F.axpyin(y[i * ldy + k + 3], A.dat[startDat + k], x[A.col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * ldy + k], A.dat[startDat + k], x[A.col[j] * ldx + k]);
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] -= x[A.col[j] * blockSize + k];
+ y[i * blockSize + k + 1] -= x[A.col[j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] -= x[A.col[j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] -= x[A.col[j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] -= x[A.col[j] * blockSize + k];
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += x[A.col[j] * blockSize + k];
+ y[i * blockSize + k + 1] += x[A.col[j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += x[A.col[j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += x[A.col[j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += x[A.col[j] * blockSize + k];
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += A.dat[startDat + j] * x[A.col[j] * blockSize + k];
+ y[i * blockSize + k + 1] += A.dat[startDat + j] * x[A.col[j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += A.dat[startDat + j] * x[A.col[j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += A.dat[startDat + j] * x[A.col[j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += A.dat[startDat + j] * x[A.col[j] * blockSize + k];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] -= x[A.col[j] * blockSize + k];
+ y[i * blockSize + k + 1] -= x[A.col[j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] -= x[A.col[j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] -= x[A.col[j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] -= x[A.col[j] * blockSize + k];
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += x[A.col[j] * blockSize + k];
+ y[i * blockSize + k + 1] += x[A.col[j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += x[A.col[j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += x[A.col[j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += x[A.col[j] * blockSize + k];
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += A.dat[startDat + j] * x[A.col[j] * blockSize + k];
+ y[i * blockSize + k + 1] += A.dat[startDat + j] * x[A.col[j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += A.dat[startDat + j] * x[A.col[j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += A.dat[startDat + j] * x[A.col[j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += A.dat[startDat + j] * x[A.col[j] * blockSize + k];
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::UnparametricTag) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] -= x[A.col[j] * ldx + k];
+ y[i * ldy + k + 1] -= x[A.col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] -= x[A.col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] -= x[A.col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[A.col[j] * ldx + k];
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += x[A.col[j] * ldx + k];
+ y[i * ldy + k + 1] += x[A.col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += x[A.col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += x[A.col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[A.col[j] * ldx + k];
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += A.dat[startDat + j] * x[A.col[j] * ldx + k];
+ y[i * ldy + k + 1] += A.dat[startDat + j] * x[A.col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += A.dat[startDat + j] * x[A.col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += A.dat[startDat + j] * x[A.col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += A.dat[startDat + j] * x[A.col[j] * ldx + k];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] -= x[A.col[j] * ldx + k];
+ y[i * ldy + k + 1] -= x[A.col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] -= x[A.col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] -= x[A.col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[A.col[j] * ldx + k];
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += x[A.col[j] * ldx + k];
+ y[i * ldy + k + 1] += x[A.col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += x[A.col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += x[A.col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[A.col[j] * ldx + k];
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += A.dat[startDat + j] * x[A.col[j] * ldx + k];
+ y[i * ldy + k + 1] += A.dat[startDat + j] * x[A.col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += A.dat[startDat + j] * x[A.col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += A.dat[startDat + j] * x[A.col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += A.dat[startDat + j] * x[A.col[j] * ldx + k];
+ }
+ }
+#endif
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+template <class Field, class LFunc, class SFunc>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y, LFunc &&lfunc, SFunc &&sfunc,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, lfunc, sfunc](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ vx2 = lfunc(y + A.col[j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, simd::sub(vy1, vx1));
+ sfunc(y + i * blockSize + k + simd::vect_size, simd::sub(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ sfunc(y + i * blockSize + k, simd::sub(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] -= x[A.col[j] * blockSize + k];
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ vx2 = lfunc(y + A.col[j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, simd::add(vy1, vx1));
+ sfunc(y + i * blockSize + k + simd::vect_size, simd::add(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ sfunc(y + i * blockSize + k, simd::add(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += x[A.col[j] * blockSize + k];
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ vdat = simd::set1(A.dat[startDat + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ vx2 = lfunc(y + A.col[j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, simd::fmadd(vy1, vdat, vx1));
+ sfunc(y + i * blockSize + k + simd::vect_size, simd::fmadd(vy2, vdat, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ sfunc(y + i * blockSize + k, simd::fmadd(vy1, vdat, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] -= A.dat[startDat + j] * x[A.col[j] * blockSize + k];
+ }
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (uint64_t i = 0; i < A.m; ++i) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ vx2 = lfunc(y + A.col[j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, simd::sub(vy1, vx1));
+ sfunc(y + i * blockSize + k + simd::vect_size, simd::sub(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ sfunc(y + i * blockSize + k, simd::sub(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] -= x[A.col[j] * blockSize + k];
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ vx2 = lfunc(y + A.col[j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, simd::add(vy1, vx1));
+ sfunc(y + i * blockSize + k + simd::vect_size, simd::add(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ sfunc(y + i * blockSize + k, simd::add(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += x[A.col[j] * blockSize + k];
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ vdat = simd::set1(A.dat[startDat + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ vx2 = lfunc(y + A.col[j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, simd::fmadd(vy1, vdat, vx1));
+ sfunc(y + i * blockSize + k + simd::vect_size, simd::fmadd(vy2, vdat, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vx1 = lfunc(y + A.col[j] * blockSize + k);
+ sfunc(y + i * blockSize + k, simd::fmadd(vy1, vdat, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] -= A.dat[startDat + j] * x[A.col[j] * blockSize + k];
+ }
+ }
+ }
+#endif
+}
+
+template <class Field, class LFunc, class SFunc>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy, LFunc &&lfunc,
+ SFunc &&sfunc, FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy, lfunc, sfunc](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ vx2 = lfunc(y + A.col[j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldy + k, simd::sub(vy1, vx1));
+ sfunc(y + i * ldy + k + simd::vect_size, simd::sub(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ sfunc(y + i * ldy + k, simd::sub(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[A.col[j] * ldx + k];
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ vx2 = lfunc(y + A.col[j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldy + k, simd::add(vy1, vx1));
+ sfunc(y + i * ldy + k + simd::vect_size, simd::add(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ sfunc(y + i * ldy + k, simd::add(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[A.col[j] * ldx + k];
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ vdat = simd::set1(A.dat[startDat + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ vx2 = lfunc(y + A.col[j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldy + k, simd::fmadd(vy1, vdat, vx1));
+ sfunc(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vdat, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ sfunc(y + i * ldy + k, simd::fmadd(vy1, vdat, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= A.dat[startDat + j] * x[A.col[j] * ldx + k];
+ }
+ }
+ }
+ });
+
+#else
+#pragma omp parallel for
+ for (uint64_t i = 0; i < A.m; ++i) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ vx2 = lfunc(y + A.col[j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldy + k, simd::sub(vy1, vx1));
+ sfunc(y + i * ldy + k + simd::vect_size, simd::sub(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ sfunc(y + i * ldy + k, simd::sub(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[A.col[j] * ldx + k];
+ }
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ vx2 = lfunc(y + A.col[j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldy + k, simd::add(vy1, vx1));
+ sfunc(y + i * ldy + k + simd::vect_size, simd::add(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ sfunc(y + i * ldy + k, simd::add(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[A.col[j] * ldx + k];
+ }
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ index_t startDat = A.st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ vdat = simd::set1(A.dat[startDat + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ vx2 = lfunc(y + A.col[j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldy + k, simd::fmadd(vy1, vdat, vx1));
+ sfunc(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vdat, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vx1 = lfunc(y + A.col[j] * ldx + k);
+ sfunc(y + i * ldy + k, simd::fmadd(vy1, vdat, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= A.dat[startDat + j] * x[A.col[j] * ldx + k];
+ }
+ }
+ }
+#endif
+}
+#endif
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y, const int64_t kmax) {
+ // TODO
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ const int64_t kmax) {
+ // TODO
+}
+
+} // csr_hyb_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_CSR_HYB_pspmm_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_pspmv.inl b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_pspmv.inl
new file mode 100644
index 0000000..414f794
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_pspmv.inl
@@ -0,0 +1,217 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_HYB_pspmv_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_HYB_pspmv_INL
+
+#ifdef __FFLASFFPACK_USE_TBB
+#include "tbb/parallel_for.h"
+#include "tbb/blocked_range.h"
+#endif
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, dat, st, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ F.subin(y[i], x[col[j]]);
+ }
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ F.addin(y[i], x[col[j]]);
+ }
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ index_t startDat = st[4 * i + 3];
+ for (uint64_t j = start, k = 0; j < stop; ++j, ++k) {
+ F.axpyin(y[i], dat[startDat + k], x[col[j]]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ F.subin(y[i], x[col[j]]);
+ }
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ F.addin(y[i], x[col[j]]);
+ }
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ index_t startDat = st[4 * i + 3];
+ for (uint64_t j = start, k = 0; j < stop; ++j, ++k) {
+ F.axpyin(y[i], dat[startDat + k], x[col[j]]);
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, col, dat, st](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ uint64_t j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ y1 = 0;
+ y2 = 0;
+ y3 = 0;
+ y4 = 0;
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ diff = stop - start;
+ j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ y1 = 0;
+ y2 = 0;
+ y3 = 0;
+ y4 = 0;
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ diff = stop - start;
+ index_t startDat = st[4 * i + 3];
+ j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += dat[startDat + j] * x[col[start + j]];
+ y2 += dat[startDat + j + 1] * x[col[start + j + 1]];
+ y3 += dat[startDat + j + 2] * x[col[start + j + 2]];
+ y4 += dat[startDat + j + 3] * x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += dat[startDat + j] * x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+ });
+#else
+#pragma omp parallel for
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ uint64_t j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ y1 = 0;
+ y2 = 0;
+ y3 = 0;
+ y4 = 0;
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ diff = stop - start;
+ j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ y1 = 0;
+ y2 = 0;
+ y3 = 0;
+ y4 = 0;
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ diff = stop - start;
+ index_t startDat = st[4 * i + 3];
+ j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += dat[startDat + j] * x[col[start + j]];
+ y2 += dat[startDat + j + 1] * x[col[start + j + 1]];
+ y3 += dat[startDat + j + 2] * x[col[start + j + 2]];
+ y4 += dat[startDat + j + 3] * x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += dat[startDat + j] * x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const int64_t kmax) {
+ // TODO
+}
+
+} // CSR_HYB_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_CSR_HYB_pspmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_spmm.inl b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_spmm.inl
new file mode 100644
index 0000000..bcb127f
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_spmm.inl
@@ -0,0 +1,317 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_HYB_spmm_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_HYB_spmm_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[i * ldy + k], x[col[j] * ldx + k]);
+ F.subin(y[i * ldy + k + 1], x[col[j] * ldx + k + 1]);
+ F.subin(y[i * ldy + k + 2], x[col[j] * ldx + k + 2]);
+ F.subin(y[i * ldy + k + 3], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[i * ldy + k], x[col[j] * ldx + k]);
+ }
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[i * ldy + k], x[col[j] * ldx + k]);
+ F.addin(y[i * ldy + k + 1], x[col[j] * ldx + k + 1]);
+ F.addin(y[i * ldy + k + 2], x[col[j] * ldx + k + 2]);
+ F.addin(y[i * ldy + k + 3], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[i * ldy + k], x[col[j] * ldx + k]);
+ }
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ index_t startDat = st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * ldy + k], dat[startDat + k], x[col[j] * ldx + k]);
+ F.axpyin(y[i * ldy + k + 1], dat[startDat + k], x[col[j] * ldx + k + 1]);
+ F.axpyin(y[i * ldy + k + 2], dat[startDat + k], x[col[j] * ldx + k + 2]);
+ F.axpyin(y[i * ldy + k + 3], dat[startDat + k], x[col[j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * ldy + k], dat[startDat + k], x[col[j] * ldx + k]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ y[i * ldy + k + 1] -= x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] -= x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] -= x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ y[i * ldy + k + 1] += x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ index_t startDat = st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += dat[startDat + j] * x[col[j] * ldx + k];
+ y[i * ldy + k + 1] += dat[startDat + j] * x[col[j] * ldx + k + 1];
+ y[i * ldy + k + 2] += dat[startDat + j] * x[col[j] * ldx + k + 2];
+ y[i * ldy + k + 3] += dat[startDat + j] * x[col[j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[startDat + j] * x[col[j] * ldx + k];
+ }
+ }
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vy2 = simd::load(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::load(y + col[j] * ldx + k);
+ vx2 = simd::load(y + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::sub(vy1, vx1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::sub(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vx1 = simd::load(y + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::sub(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vy2 = simd::load(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::load(y + col[j] * ldx + k);
+ vx2 = simd::load(y + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::add(vy1, vx1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::add(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vx1 = simd::load(y + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::add(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ index_t startDat = st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ vdat = simd::set1(dat[startDat + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vy2 = simd::load(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::load(y + col[j] * ldx + k);
+ vx2 = simd::load(y + col[j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::fmadd(vy1, vdat, vx1));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vdat, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vx1 = simd::load(y + col[j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::fmadd(vy1, vdat, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= dat[startDat + j] * x[col[j] * ldx + k];
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vy2 = simd::loadu(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::loadu(y + col[j] * ldx + k);
+ vx2 = simd::loadu(y + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::sub(vy1, vx1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::sub(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vx1 = simd::loadu(y + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::sub(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[col[j] * ldx + k];
+ }
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vy2 = simd::loadu(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::loadu(y + col[j] * ldx + k);
+ vx2 = simd::loadu(y + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::add(vy1, vx1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::add(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vx1 = simd::loadu(y + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::add(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[col[j] * ldx + k];
+ }
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ index_t startDat = st[4 * i + 3];
+ for (uint64_t j = start; j < stop; ++j) {
+ size_t k = 0;
+ vdat = simd::set1(dat[startDat + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vy2 = simd::loadu(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::loadu(y + col[j] * ldx + k);
+ vx2 = simd::loadu(y + col[j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::fmadd(vy1, vdat, vx1));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vdat, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vx1 = simd::loadu(y + col[j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::fmadd(vy1, vdat, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= dat[startDat + j] * x[col[j] * ldx + k];
+ }
+ }
+}
+#endif
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ // TODO
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ uint64_t kmax) {
+ // TODO
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ uint64_t kmax) {
+ // TODO
+}
+
+#endif
+
+} // csr_hyb_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_CSR_HYB_spmm_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_spmv.inl b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_spmv.inl
new file mode 100644
index 0000000..5f1ad0b
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_spmv.inl
@@ -0,0 +1,131 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_HYB_spmv_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_HYB_spmv_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ for (uint64_t j = start; j < stop; ++j) {
+ F.subin(y[i], x[col[j]]);
+ }
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ for (uint64_t j = start; j < stop; ++j) {
+ F.addin(y[i], x[col[j]]);
+ }
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ index_t startDat = st[4 * i + 3];
+ for (uint64_t j = start, k = 0; j < stop; ++j, ++k) {
+ F.axpyin(y[i], dat[startDat + k], x[col[j]]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (uint64_t i = 0; i < A.m; ++i) {
+ index_t start = st[4 * i], stop = st[4 * i + 1];
+ index_t diff = stop - start;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ uint64_t j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ y1 = 0;
+ y2 = 0;
+ y3 = 0;
+ y4 = 0;
+ start = st[4 * i + 1], stop = st[4 * i + 2];
+ diff = stop - start;
+ j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ y1 = 0;
+ y2 = 0;
+ y3 = 0;
+ y4 = 0;
+ start = st[4 * i + 2], stop = st[4 * (i + 1)];
+ diff = stop - start;
+ index_t startDat = st[4 * i + 3];
+ j = 0;
+ for (; j < ROUND_DOWN(diff, 4); j += 4) {
+ y1 += dat[startDat + j] * x[col[start + j]];
+ y2 += dat[startDat + j + 1] * x[col[start + j + 1]];
+ y3 += dat[startDat + j + 2] * x[col[start + j + 2]];
+ y4 += dat[startDat + j + 3] * x[col[start + j + 3]];
+ }
+ for (; j < diff; ++j) {
+ y1 += dat[startDat + j] * x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+}
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::CSR_HYB> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const uint64_t kmax) {
+ return;
+ // TODO
+}
+
+} // CSR_HYB_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_CSR_HYB_spmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_utils.inl b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_utils.inl
new file mode 100644
index 0000000..97cd9cf
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/csr_hyb/csr_hyb_utils.inl
@@ -0,0 +1,214 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_CSR_HYB_utils_INL
+#define __FFLASFFPACK_fflas_sparse_CSR_HYB_utils_INL
+
+// #define CSR_HYB_DEBUG 1
+
+namespace FFLAS {
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::CSR_HYB> &A) {
+ fflas_delete(A.dat);
+ fflas_delete(A.col);
+ fflas_delete(A.st);
+}
+
+namespace csr_hyb_details {
+
+struct Info {
+ uint64_t size = 0;
+ uint64_t perm = 0;
+ uint64_t begin = 0;
+
+ Info(uint64_t it, uint64_t s, uint64_t p) : size(s), perm(p), begin(it) {}
+ Info() = default;
+ Info(const Info &) = default;
+ Info(Info &&) = default;
+
+ Info &operator=(const Info &) = default;
+ Info &operator=(Info &&) = default;
+};
+
+template <class ValT, class IdxT> struct Coo {
+ using Self = Coo<ValT, IdxT>;
+
+ ValT val = 0;
+ IdxT row = 0;
+ IdxT col = 0;
+
+ Coo(ValT v, IdxT r, IdxT c) : val(v), row(r), col(c) {}
+ Coo() = default;
+ Coo(const Self &) = default;
+ Coo(Self &&) = default;
+
+ Self &operator=(const Self &) = default;
+ Self &operator=(Self &&) = default;
+};
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::CSR_HYB> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ using namespace csr_hyb_details;
+ using coo = Coo<typename Field::Element, index_t>;
+
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.nElements = nnz;
+ std::vector<coo> data(nnz);
+ for (uint64_t i = 0; i < nnz; ++i) {
+ // data.emplace_back(dat[i], col[i], row[i]);
+ data[i].val = dat[i];
+ data[i].col = col[i];
+ data[i].row = row[i];
+ }
+
+ std::vector<uint64_t> rows(rowdim, 0);
+
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+
+ if (A.kmax > A.maxrow)
+ A.delayed = true;
+
+ rows.resize(3 * (rowdim + 1));
+ for (auto &x : rows)
+ x = 0;
+
+ for (uint64_t i = 0; i < data.size(); ++i) {
+ auto x = data[i];
+ if (F.isOne(x.val)) {
+ rows[3 * x.row + 1]++;
+ A.nOnes++;
+ } else if (F.isMOne(x.val)) {
+ rows[3 * x.row]++;
+ A.nMOnes++;
+ } else {
+ rows[3 * x.row + 2]++;
+ A.nOthers++;
+ }
+ }
+
+ A.col = fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ A.st = fflas_new<index_t>(4 * (rowdim + 1), Alignment::CACHE_LINE);
+ A.dat = fflas_new(F, A.nOthers, 1, Alignment::CACHE_LINE);
+
+ for (uint64_t i = 0; i < 4 * (rowdim + 1); ++i)
+ A.st[i] = 0;
+
+ for (size_t i = 0; i < nnz; ++i) {
+ A.col[i] = static_cast<index_t>(data[i].col);
+ }
+
+ data.shrink_to_fit();
+
+ // sort nnz by row with order -1 1 L
+
+ std::sort(data.begin(), data.end(), [&F](const coo &a, const coo &b) {
+ return (a.row < b.row) || ((a.row == b.row) && (F.isMOne(a.val) && !F.isMOne(b.val))) ||
+ ((a.row == b.row) && (F.isMOne(a.val) && F.isMOne(b.val) && (a.col < b.col))) ||
+ ((a.row == b.row) && (F.isOne(a.val) && !F.isOne(b.val) && !F.isMOne(b.val))) ||
+ ((a.row == b.row) && (F.isOne(a.val) && F.isOne(b.val) && (a.col < b.col))) ||
+ ((a.row == b.row) && (!F.isOne(a.val) && !F.isMOne(a.val) && !F.isOne(b.val) && !F.isMOne(b.val)) &&
+ (a.col < b.col));
+ });
+
+#ifdef CSR_HYB_DEBUG
+ for (auto &x : data) {
+ cout << "(" << x.row << "," << x.col << "," << x.val << ") ";
+ }
+ cout << endl;
+#endif
+
+ uint64_t it = 0;
+ for (size_t i = 0; i < data.size(); ++i) {
+ if (!F.isOne(data[i].val) && !F.isMOne(data[i].val)) {
+ A.dat[it] = data[i].val;
+ ++it;
+ }
+ }
+
+ A.st[1] = rows[0];
+ A.st[2] = rows[1] + A.st[1];
+ A.st[3] = 0;
+ A.st[4] = rows[2] + A.st[2];
+
+ for (uint64_t i = 1; i < rowdim; ++i) {
+ A.st[4 * i + 1] = rows[3 * i] + A.st[4 * i];
+ A.st[4 * i + 2] = rows[3 * i + 1] + A.st[4 * i + 1];
+ A.st[4 * i + 3] = rows[3 * (i - 1) + 2] + A.st[4 * (i - 1) + 3];
+ A.st[4 * (i + 1)] = rows[3 * i + 2] + A.st[4 * i + 2];
+ }
+
+#ifdef CSR_HYB_DEBUG
+ for (uint64_t i = 0; i < it; ++i)
+ cout << A.dat[i] << " ";
+ cout << endl;
+ for (uint64_t i = 0; i < nnz; ++i)
+ cout << A.col[i] << " ";
+ cout << endl;
+
+ for (uint64_t i = 0; i < rowdim; ++i)
+ cout << "(" << A.st[4 * i] << " , " << A.st[4 * i + 1] << " , " << A.st[4 * i + 2] << " , " << A.st[4 * i + 3]
+ << ") " << endl;
+ cout << endl;
+ cout << endl;
+ for (uint64_t i = 0; i < rowdim; ++i) {
+ index_t start = A.st[4 * i], stop = A.st[4 * i + 1];
+ index_t diff = stop - start;
+ cout << i << endl;
+ cout << " -1 : ";
+ for (uint64_t j = 0; j < diff; ++j) {
+ cout << A.col[start + j] << " ";
+ }
+ cout << endl;
+ start = A.st[4 * i + 1], stop = A.st[4 * i + 2];
+ diff = stop - start;
+ cout << " 1 : ";
+ for (uint64_t j = 0; j < diff; ++j) {
+ cout << A.col[start + j] << " ";
+ }
+ cout << endl;
+ start = A.st[4 * i + 2], stop = A.st[4 * (i + 1)];
+ diff = stop - start;
+ index_t startDat = A.st[4 * i + 3];
+ cout << " l : ";
+ for (uint64_t j = 0; j < diff; ++j) {
+ cout << "(" << A.col[start + j] << " , " << A.dat[startDat + j] << ") ";
+ }
+ cout << endl;
+ }
+#endif
+}
+} // FFLAS
+#endif
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell.h b/fflas-ffpack/fflas/fflas_sparse/ell.h
new file mode 100644
index 0000000..e18de4f
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell.h
@@ -0,0 +1,90 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fspmv_ell.inl
+ * NO DOC
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ell_H
+#define __FFLASFFPACK_fflas_sparse_ell_H
+
+namespace FFLAS { /* ELL */
+
+template <class _Field> struct Sparse<_Field, SparseMatrix_t::ELL> {
+ using Field = _Field;
+ bool delayed = false;
+ uint64_t kmax = 0;
+ index_t m = 0;
+ index_t n = 0;
+ index_t ld = 0;
+ uint64_t nnz = 0;
+ uint64_t nElements = 0;
+ uint64_t maxrow = 0;
+ index_t *col = nullptr;
+ typename _Field::Element_ptr dat;
+};
+
+template <class _Field>
+struct Sparse<_Field, SparseMatrix_t::ELL_ZO>
+ : public Sparse<_Field, SparseMatrix_t::ELL> {
+ using Field = _Field;
+ typename _Field::Element cst = 1;
+};
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::ELL> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F,
+ Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+
+template <class Field>
+inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL> &A);
+
+template <class Field>
+inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL_ZO> &A);
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_sparse/ell/ell_utils.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/ell/ell_spmv.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/ell/ell_spmm.inl"
+
+#if defined(__FFLASFFPACK_USE_OPENMP) || defined(__FFLASFFPACK_USE_TBB)
+
+#include "fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmv.inl"
+// #include "fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmm.inl"
+
+#endif
+
+#endif // __FFLASFFPACK_fflas_sparse_ELL_H
\ No newline at end of file
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/ell/Makefile.am
similarity index 73%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_sparse/ell/Makefile.am
index 31793b2..611b644 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/ell/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,12 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse/ell
+pkgincludesub_HEADERS= \
+ ell_spmv.inl \
+ ell_spmm.inl \
+ ell_pspmv.inl \
+ ell_pspmm.inl \
+ ell_utils.inl
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmm.inl b/fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmm.inl
new file mode 100644
index 0000000..f7c42fd
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmm.inl
@@ -0,0 +1,697 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_pspmm_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_pspmm_INL
+
+#ifdef __FFLASFFPACK_USE_TBB
+#include "tbb/parallel_for.h"
+#include "tbb/blocked_range.h"
+#endif
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y, FieldCategories::GenericTag) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * blockSize + k], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k]);
+ F.axpyin(y[i * blockSize + k + 1], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k + 1]);
+ F.axpyin(y[i * blockSize + k + 2], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k + 2]);
+ F.axpyin(y[i * blockSize + k + 3], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * blockSize + k], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * blockSize + k], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k]);
+ F.axpyin(y[i * blockSize + k + 1], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k + 1]);
+ F.axpyin(y[i * blockSize + k + 2], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k + 2]);
+ F.axpyin(y[i * blockSize + k + 3], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * blockSize + k], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * blockSize + k]);
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::GenericTag) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * ldy + k], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k]);
+ F.axpyin(y[i * ldy + k + 1], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k + 1]);
+ F.axpyin(y[i * ldy + k + 2], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k + 2]);
+ F.axpyin(y[i * ldy + k + 3], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * ldy + k], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * ldy + k], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k]);
+ F.axpyin(y[i * ldy + k + 1], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k + 1]);
+ F.axpyin(y[i * ldy + k + 2], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k + 2]);
+ F.axpyin(y[i * ldy + k + 3], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * ldy + k], A.dat[i * A.ld + j], x[A.col[i * A.ld + j] * ldx + k]);
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y,
+ FieldCategories::UnparametricTag) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ y[i * blockSize + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ y[i * blockSize + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::UnparametricTag) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ }
+ }
+#endif
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field, class LFunc, class SFunc>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y, LFunc &&lfunc, SFunc &&sfunc,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vec_t = typename simd::vec_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, lfunc, sfunc](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vec_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(A.dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * blockSize + k);
+ vy2 = lfunc(x + A.col[i * A.ld + j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, simd::fmadd(vy1, vx1, vdat));
+ sfunc(y + i * blockSize + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * blockSize + k);
+ sfunc(y + i * blockSize + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vec_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(A.dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * blockSize + k);
+ vy2 = lfunc(x + A.col[i * A.ld + j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, simd::fmadd(vy1, vx1, vdat));
+ sfunc(y + i * blockSize + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * blockSize + k);
+ sfunc(y + i * blockSize + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ }
+ }
+#endif
+}
+
+template <class Field, class LFunc, class SFunc>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy, LFunc &&lfunc,
+ SFunc &&sfunc, FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vec_t = typename simd::vec_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy, lfunc, sfunc](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vec_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(A.dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * ldx + k);
+ vy2 = lfunc(x + A.col[i * A.ld + j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ sfunc(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * ldx + k);
+ sfunc(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vec_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(A.dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * ldx + k);
+ vy2 = lfunc(x + A.col[i * A.ld + j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ sfunc(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * ldx + k);
+ sfunc(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ }
+ }
+#endif
+}
+
+#endif
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y, const int64_t kmax) {
+ index_t block = (A.ld) / kmax;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, kmax](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ y[i * blockSize + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * blockSize + k]);
+ }
+ }
+ for (; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ y[i * blockSize + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * blockSize + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ y[i * blockSize + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * blockSize + k]);
+ }
+ }
+ for (; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ y[i * blockSize + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 1];
+ y[i * blockSize + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 2];
+ y[i * blockSize + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * blockSize + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * blockSize + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * blockSize + k]);
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ const int64_t kmax) {
+ index_t block = (A.ld) / kmax;
+
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy, kmax](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+ for (; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+ for (; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += A.dat[i * A.ld + j] * x[A.col[i * A.ld + j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+#endif
+}
+
+template <class Field, class Func>
+inline void pfspmm_zo(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y, Func &&func) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, func](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ func(y[i * blockSize + k], x[A.col[i * A.ld + j] * blockSize + k]);
+ func(y[i * blockSize + k + 1], x[A.col[i * A.ld + j] * blockSize + k + 1]);
+ func(y[i * blockSize + k + 2], x[A.col[i * A.ld + j] * blockSize + k + 2]);
+ func(y[i * blockSize + k + 3], x[A.col[i * A.ld + j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ func(y[i * blockSize + k], x[A.col[i * A.ld + j] * blockSize + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ func(y[i * blockSize + k], x[A.col[i * A.ld + j] * blockSize + k]);
+ func(y[i * blockSize + k + 1], x[A.col[i * A.ld + j] * blockSize + k + 1]);
+ func(y[i * blockSize + k + 2], x[A.col[i * A.ld + j] * blockSize + k + 2]);
+ func(y[i * blockSize + k + 3], x[A.col[i * A.ld + j] * blockSize + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ func(y[i * blockSize + k], x[A.col[i * A.ld + j] * blockSize + k]);
+ }
+ }
+#endif
+}
+
+template <class Field, class Func>
+inline void pfspmm_zo(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ Func &&func) {
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy, func](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ func(y[i * ldy + k], x[A.col[i * A.ld + j] * ldx + k]);
+ func(y[i * ldy + k + 1], x[A.col[i * A.ld + j] * ldx + k + 1]);
+ func(y[i * ldy + k + 2], x[A.col[i * A.ld + j] * ldx + k + 2]);
+ func(y[i * ldy + k + 3], x[A.col[i * A.ld + j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ func(y[i * ldy + k], x[A.col[i * A.ld + j] * ldx + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ func(y[i * ldy + k], x[A.col[i * A.ld + j] * ldx + k]);
+ func(y[i * ldy + k + 1], x[A.col[i * A.ld + j] * ldx + k + 1]);
+ func(y[i * ldy + k + 2], x[A.col[i * A.ld + j] * ldx + k + 2]);
+ func(y[i * ldy + k + 3], x[A.col[i * A.ld + j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ func(y[i * ldy + k], x[A.col[i * A.ld + j] * ldx + k]);
+ }
+ }
+#endif
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field, class LFunc, class SFunc, class VectFunc, class ScalFunc>
+inline void pfspmm_zo(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, typename Field::Element_ptr y, VectFunc &&vfunc,
+ ScalFunc &&scalfunc, LFunc &&lfunc, SFunc &&sfunc) {
+ using simd = Simd<typename Field::Element>;
+ using vec_t = typename simd::vec_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, vfunc, scalfunc, lfunc, sfunc](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vec_t vx1, vx2, vy1, vy2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * blockSize + k);
+ vy2 = lfunc(x + A.col[i * A.ld + j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, vfunc(vy1, vx1));
+ sfunc(y + i * blockSize + k + simd::vect_size, vfunc(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * blockSize + k);
+ sfunc(y + i * blockSize + k, vfunc(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ scalfunc(y[i * blockSize + k], x[A.col[i * A.ld + j] * blockSize + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vec_t vx1, vx2, vy1, vy2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy2 = lfunc(y + i * blockSize + k + simd::vect_size);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * blockSize + k);
+ vy2 = lfunc(x + A.col[i * A.ld + j] * blockSize + k + simd::vect_size);
+ sfunc(y + i * blockSize + k, vfunc(vy1, vx1));
+ sfunc(y + i * blockSize + k + simd::vect_size, vfunc(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * blockSize + k);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * blockSize + k);
+ sfunc(y + i * blockSize + k, vfunc(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ scalfunc(y[i * blockSize + k], x[A.col[i * A.ld + j] * blockSize + k]);
+ }
+ }
+#endif
+}
+
+template <class Field, class LFunc, class SFunc>
+inline void pfspmm_zo(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ LFunc &&lfunc, SFunc &&sfunc, FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vec_t = typename simd::vec_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(
+ tbb::blocked_range<index_t>(0, A.m),
+ [&F, &A, &x, &y, blockSize, ldx, ldy, vfunc, scalfunc, lfunc, sfunc](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vec_t vx1, vx2, vy1, vy2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * ldx + k);
+ vy2 = lfunc(x + A.col[i * A.ld + j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldx + k, vfunc(vy1, vx1));
+ sfunc(y + i * ldx + k + simd::vect_size, vfunc(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * ldy + k);
+ sfunc(y + i * ldx + k, vfunc(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ scalfunc(y[i * ldy + k], x[A.col[i * A.ld + j] * ldx + k]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vec_t vx1, vx2, vy1, vy2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy2 = lfunc(y + i * ldy + k + simd::vect_size);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * ldx + k);
+ vy2 = lfunc(x + A.col[i * A.ld + j] * ldx + k + simd::vect_size);
+ sfunc(y + i * ldx + k, vfunc(vy1, vx1));
+ sfunc(y + i * ldx + k + simd::vect_size, vfunc(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = lfunc(y + i * ldy + k);
+ vy1 = lfunc(x + A.col[i * A.ld + j] * ldy + k);
+ sfunc(y + i * ldx + k, vfunc(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ scalfunc(y[i * ldy + k], x[A.col[i * A.ld + j] * ldx + k]);
+ }
+ }
+#endif
+}
+
+#endif
+
+} // ell_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_ELL_pspmm_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmv.inl b/fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmv.inl
new file mode 100644
index 0000000..7fe900c
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell/ell_pspmv.inl
@@ -0,0 +1,401 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_pspmv_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_pspmv_INL
+
+#ifdef __FFLASFFPACK_USE_TBB
+#include "tbb/parallel_for.h"
+#include "tbb/blocked_range.h"
+#endif
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, x, y, dat, col, &A](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.axpyin(y1, dat[i * A.ld + j], x[col[i * A.ld + j]]);
+ F.axpyin(y2, dat[i * A.ld + j + 1], x[col[i * A.ld + j + 1]]);
+ F.axpyin(y3, dat[i * A.ld + j + 2], x[col[i * A.ld + j + 2]]);
+ F.axpyin(y4, dat[i * A.ld + j + 3], x[col[i * A.ld + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.axpyin(y1, dat[i * A.ld + j], x[col[i * A.ld + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.axpyin(y1, dat[i * A.ld + j], x[col[i * A.ld + j]]);
+ F.axpyin(y2, dat[i * A.ld + j + 1], x[col[i * A.ld + j + 1]]);
+ F.axpyin(y3, dat[i * A.ld + j + 2], x[col[i * A.ld + j + 2]]);
+ F.axpyin(y4, dat[i * A.ld + j + 3], x[col[i * A.ld + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.axpyin(y1, dat[i * A.ld + j], x[col[i * A.ld + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, x, y, dat, col, &A](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += dat[i * A.ld + j] * x[col[i * A.ld + j]];
+ y2 += dat[i * A.ld + j + 1] * x[col[i * A.ld + j + 1]];
+ y3 += dat[i * A.ld + j + 2] * x[col[i * A.ld + j + 2]];
+ y4 += dat[i * A.ld + j + 3] * x[col[i * A.ld + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += dat[i * A.ld + j] * x[col[i * A.ld + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += dat[i * A.ld + j] * x[col[i * A.ld + j]];
+ y2 += dat[i * A.ld + j + 1] * x[col[i * A.ld + j + 1]];
+ y3 += dat[i * A.ld + j + 2] * x[col[i * A.ld + j + 2]];
+ y4 += dat[i * A.ld + j + 3] * x[col[i * A.ld + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += dat[i * A.ld + j] * x[col[i * A.ld + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const int64_t kmax) {
+ index_t block = (A.ld) / kmax;
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, kmax, block, dat, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ y[i] += dat[i * A.ld + j] * x[col[i * A.ld + j]];
+ }
+ F.reduce(y[i]);
+ }
+ for (; j < A.ld; ++j) {
+ y[i] += dat[i * A.ld + j] * x[col[i * A.ld + j]];
+ }
+ F.reduce(y[i]);
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ y[i] += dat[i * A.ld + j] * x[col[i * A.ld + j]];
+ }
+ F.reduce(y[i]);
+ }
+ for (; j < A.ld; ++j) {
+ y[i] += dat[i * A.ld + j] * x[col[i * A.ld + j]];
+ }
+ F.reduce(y[i]);
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.addin(y1, x[col[i * A.ld + j]]);
+ F.addin(y2, x[col[i * A.ld + j + 1]]);
+ F.addin(y3, x[col[i * A.ld + j + 2]]);
+ F.addin(y4, x[col[i * A.ld + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.addin(y1, x[col[i * A.ld + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.addin(y1, x[col[i * A.ld + j]]);
+ F.addin(y2, x[col[i * A.ld + j + 1]]);
+ F.addin(y3, x[col[i * A.ld + j + 2]]);
+ F.addin(y4, x[col[i * A.ld + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.addin(y1, x[col[i * A.ld + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.addin(y1, x[col[i * A.ld + j]]);
+ F.addin(y2, x[col[i * A.ld + j + 1]]);
+ F.addin(y3, x[col[i * A.ld + j + 2]]);
+ F.addin(y4, x[col[i * A.ld + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.addin(y1, x[col[i * A.ld + j]]);
+ }
+ F.subin(y[i], y1);
+ F.subin(y[i], y2);
+ F.subin(y[i], y3);
+ F.subin(y[i], y4);
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.addin(y1, x[col[i * A.ld + j]]);
+ F.addin(y2, x[col[i * A.ld + j + 1]]);
+ F.addin(y3, x[col[i * A.ld + j + 2]]);
+ F.addin(y4, x[col[i * A.ld + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.addin(y1, x[col[i * A.ld + j]]);
+ }
+ F.subin(y[i], y1);
+ F.subin(y[i], y2);
+ F.subin(y[i], y3);
+ F.subin(y[i], y4);
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += x[col[i * A.ld + j]];
+ y2 += x[col[i * A.ld + j + 1]];
+ y3 += x[col[i * A.ld + j + 2]];
+ y4 += x[col[i * A.ld + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += x[col[i * A.ld + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += x[col[i * A.ld + j]];
+ y2 += x[col[i * A.ld + j + 1]];
+ y3 += x[col[i * A.ld + j + 2]];
+ y4 += x[col[i * A.ld + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += x[col[i * A.ld + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#if defined(__FFLASFFPACK_USE_TBB)
+ int step = __FFLASFFPACK_CACHE_LINE_SIZE / sizeof(typename Field::Element);
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.m, step),
+ [&F, &A, x, y, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += x[col[i * A.ld + j]];
+ y2 += x[col[i * A.ld + j + 1]];
+ y3 += x[col[i * A.ld + j + 2]];
+ y4 += x[col[i * A.ld + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += x[col[i * A.ld + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += x[col[i * A.ld + j]];
+ y2 += x[col[i * A.ld + j + 1]];
+ y3 += x[col[i * A.ld + j + 2]];
+ y4 += x[col[i * A.ld + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += x[col[i * A.ld + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ }
+#endif
+}
+
+} // ELL_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_ELL_pspmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell/ell_spmm.inl b/fflas-ffpack/fflas/fflas_sparse/ell/ell_spmm.inl
new file mode 100644
index 0000000..773120a
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell/ell_spmm.inl
@@ -0,0 +1,567 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_spmm_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_spmm_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.axpyin(y[i * ldy + k], dat[i * A.ld + j], x[col[i * A.ld + j] * ldx + k]);
+ F.axpyin(y[i * ldy + k + 1], dat[i * A.ld + j], x[col[i * A.ld + j] * ldx + k + 1]);
+ F.axpyin(y[i * ldy + k + 2], dat[i * A.ld + j], x[col[i * A.ld + j] * ldx + k + 2]);
+ F.axpyin(y[i * ldy + k + 3], dat[i * A.ld + j], x[col[i * A.ld + j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.axpyin(y[i * ldy + k], dat[i * A.ld + j], x[col[i * A.ld + j] * ldx + k]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vy2 = simd::load(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::load(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vy2 = simd::loadu(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::loadu(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+#endif
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t block = (A.ld) / kmax;
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+ for (; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k) {
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ index_t block = (A.ld) / kmax;
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vy2 = simd::load(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::load(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+ for (; j < A.ld; ++j) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vy2 = simd::load(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::load(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ simd::store(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldx + k);
+ simd::store(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ const int64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ index_t block = (A.ld) / kmax;
+ for (index_t i = 0; i < A.m; ++i) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vy2 = simd::loadu(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::loadu(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+ for (; j < A.ld; ++j) {
+ vect_t vx1, vx2, vy1, vy2, vdat;
+ size_t k = 0;
+ vdat = simd::set1(dat[i * A.ld + j]);
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vy2 = simd::loadu(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::loadu(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ simd::storeu(y + i * ldy + k + simd::vect_size, simd::fmadd(vy2, vx2, vdat));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldx + k);
+ simd::storeu(y + i * ldy + k, simd::fmadd(vy1, vx1, vdat));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += dat[i * A.ld + j] * x[col[i * A.ld + j] * ldx + k];
+ }
+ // TODO : replace with freduce
+ for (size_t k = 0; k < blockSize; ++k) {
+ F.reduce(y[i * ldy + k]);
+ }
+ }
+}
+
+#endif // SIMD
+
+template <class Field>
+inline void fspmm_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.subin(y[i * ldy + k], x[col[i * A.ld + j] * ldx + k]);
+ F.subin(y[i * ldy + k + 1], x[col[i * A.ld + j] * ldx + k + 1]);
+ F.subin(y[i * ldy + k + 2], x[col[i * A.ld + j] * ldx + k + 2]);
+ F.subin(y[i * ldy + k + 3], x[col[i * A.ld + j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.subin(y[i * ldy + k], x[col[i * A.ld + j] * ldx + k]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ F.addin(y[i * ldy + k], x[col[i * A.ld + j] * ldx + k]);
+ F.addin(y[i * ldy + k + 1], x[col[i * A.ld + j] * ldx + k + 1]);
+ F.addin(y[i * ldy + k + 2], x[col[i * A.ld + j] * ldx + k + 2]);
+ F.addin(y[i * ldy + k + 3], x[col[i * A.ld + j] * ldx + k + 3]);
+ }
+ for (; k < blockSize; ++k)
+ F.addin(y[i * ldy + k], x[col[i * A.ld + j] * ldx + k]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] -= x[col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] -= x[col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] -= x[col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] -= x[col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_, int ldy,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 4); k += 4) {
+ y[i * ldy + k] += x[col[i * A.ld + j] * ldx + k];
+ y[i * ldy + k + 1] += x[col[i * A.ld + j] * ldx + k + 1];
+ y[i * ldy + k + 2] += x[col[i * A.ld + j] * ldx + k + 2];
+ y[i * ldy + k + 3] += x[col[i * A.ld + j] * ldx + k + 3];
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_one_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vect_t vx1, vx2, vy1, vy2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vy2 = simd::load(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::load(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldx + k, simd::add(vy1, vx1));
+ simd::store(y + i * ldx + k + simd::vect_size, simd::add(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldy + k);
+ simd::store(y + i * ldx + k, simd::add(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_one_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vect_t vx1, vx2, vy1, vy2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vy2 = simd::loadu(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::loadu(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldx + k, simd::add(vy1, vx1));
+ simd::storeu(y + i * ldx + k + simd::vect_size, simd::add(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldy + k);
+ simd::storeu(y + i * ldx + k, simd::add(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] += x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_mone_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vect_t vx1, vx2, vy1, vy2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vy2 = simd::load(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::load(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::store(y + i * ldx + k, simd::sub(vy1, vx1));
+ simd::store(y + i * ldx + k + simd::vect_size, simd::sub(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::load(y + i * ldy + k);
+ vx1 = simd::load(x + col[i * A.ld + j] * ldy + k);
+ simd::store(y + i * ldx + k, simd::sub(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+template <class Field>
+inline void fspmm_mone_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x_, int ldx, typename Field::Element_ptr y_,
+ int ldy, FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.m; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ vect_t vx1, vx2, vy1, vy2;
+ size_t k = 0;
+ for (; k < ROUND_DOWN(blockSize, 2 * simd::vect_size); k += 2 * simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vy2 = simd::loadu(y + i * ldy + k + simd::vect_size);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldx + k);
+ vx2 = simd::loadu(x + col[i * A.ld + j] * ldx + k + simd::vect_size);
+ simd::storeu(y + i * ldx + k, simd::sub(vy1, vx1));
+ simd::storeu(y + i * ldx + k + simd::vect_size, simd::sub(vy2, vx2));
+ }
+ for (; k < ROUND_DOWN(blockSize, simd::vect_size); k += simd::vect_size) {
+ vy1 = simd::loadu(y + i * ldy + k);
+ vx1 = simd::loadu(x + col[i * A.ld + j] * ldy + k);
+ simd::storeu(y + i * ldx + k, simd::sub(vy1, vx1));
+ }
+ for (; k < blockSize; ++k)
+ y[i * ldy + k] -= x[col[i * A.ld + j] * ldx + k];
+ }
+ }
+}
+
+// #endif /* __FFLASFFPACK_USE_SIMD */
+
+} // ell_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_ELL_spmm_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell/ell_spmv.inl b/fflas-ffpack/fflas/fflas_sparse/ell/ell_spmv.inl
new file mode 100644
index 0000000..c717887
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell/ell_spmv.inl
@@ -0,0 +1,264 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_spmv_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_spmv_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t start = 0;
+ for (index_t i = 0; i < A.m; ++i, start += A.ld) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ F.axpyin(y2, dat[start + j + 1], x[col[start + j + 1]]);
+ F.axpyin(y3, dat[start + j + 2], x[col[start + j + 2]]);
+ F.axpyin(y4, dat[start + j + 3], x[col[start + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.axpyin(y1, dat[start + j], x[col[start + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+}
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t start = 0;
+ for (index_t i = 0; i < A.m; ++i, start += A.ld) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += dat[start + j] * x[col[start + j]];
+ y2 += dat[start + j + 1] * x[col[start + j + 1]];
+ y3 += dat[start + j + 2] * x[col[start + j + 2]];
+ y4 += dat[start + j + 3] * x[col[start + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += dat[start + j] * x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+}
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const uint64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t start = 0;
+ index_t block = (A.ld) / kmax;
+ for (index_t i = 0; i < A.m; ++i, start += A.ld) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0; l < (index_t)block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ y[i] += dat[start + j] * x[col[start + j]];
+ }
+ F.reduce(y[i]);
+ }
+ for (; j < A.ld; ++j) {
+ y[i] += dat[start + j] * x[col[start + j]];
+ }
+ F.reduce(y[i]);
+ }
+}
+
+// template <class Field, class Func>
+// inline void
+// fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+// typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+// Func &&func, FieldCategories::GenericTag) {
+// index_t start = 0;
+// for (index_t i = 0; i < A.m; ++i, start += A.ld) {
+// index_t j = 0;
+// typename Field::Element y1, y2, y3, y4;
+// F.assign(y1, F.zero);
+// F.assign(y2, F.zero);
+// F.assign(y3, F.zero);
+// F.assign(y4, F.zero);
+// for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+// func(y1, x[col[start + j]]);
+// func(y2, x[col[start + j + 1]]);
+// func(y3, x[col[start + j + 2]]);
+// func(y4, x[col[start + j + 3]]);
+// }
+// for (; j < A.ld; ++j) {
+// func(y1, x[col[start + j]]);
+// }
+// F.addin(y[i], y1);
+// F.addin(y[i], y2);
+// F.addin(y[i], y3);
+// F.addin(y[i], y4);
+// }
+// }
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t start = 0;
+ for (index_t i = 0; i < A.m; ++i, start += A.ld) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.addin(y1, x[col[start + j]]);
+ F.addin(y2, x[col[start + j + 1]]);
+ F.addin(y3, x[col[start + j + 2]]);
+ F.addin(y4, x[col[start + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.addin(y1, x[col[start + j]]);
+ }
+ F.addin(y[i], y1);
+ F.addin(y[i], y2);
+ F.addin(y[i], y3);
+ F.addin(y[i], y4);
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t start = 0;
+ for (index_t i = 0; i < A.m; ++i, start += A.ld) {
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ F.addin(y1, x[col[start + j]]);
+ F.addin(y2, x[col[start + j + 1]]);
+ F.addin(y3, x[col[start + j + 2]]);
+ F.addin(y4, x[col[start + j + 3]]);
+ }
+ for (; j < A.ld; ++j) {
+ F.addin(y1, x[col[start + j]]);
+ }
+ F.subin(y[i], y1);
+ F.subin(y[i], y2);
+ F.subin(y[i], y3);
+ F.subin(y[i], y4);
+ }
+}
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t start = 0;
+ for (index_t i = 0; i < A.m; ++i, start += A.ld) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] += y1 + y2 + y3 + y4;
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t start = 0;
+ for (index_t i = 0; i < A.m; ++i, start += A.ld) {
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for (; j < ROUND_DOWN(A.ld, 4); j += 4) {
+ y1 += x[col[start + j]];
+ y2 += x[col[start + j + 1]];
+ y3 += x[col[start + j + 2]];
+ y4 += x[col[start + j + 3]];
+ }
+ for (; j < A.ld; ++j) {
+ y1 += x[col[start + j]];
+ }
+ y[i] -= y1 + y2 + y3 + y4;
+ }
+}
+
+} // ELL_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_ELL_spmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell/ell_utils.inl b/fflas-ffpack/fflas/fflas_sparse/ell/ell_utils.inl
new file mode 100644
index 0000000..e6bc8c3
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell/ell_utils.inl
@@ -0,0 +1,116 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_utils_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_utils_INL
+
+#include <vector>
+
+namespace FFLAS {
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL> &A) {
+ fflas_delete(A.dat);
+ fflas_delete(A.col);
+}
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL_ZO> &A) {
+ fflas_delete(A.col);
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::ELL> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ // cout << A.m << " " << A.n << endl;
+ std::vector<uint64_t> rows(A.m, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ // cout << "maxrow : " << A.maxrow << endl;
+ A.ld = A.maxrow;
+ if (A.kmax > A.maxrow)
+ A.delayed = true;
+ // cout << A.ld << " " << rowdim << " " << nnz << " " << A.ld*rowdim << endl;
+ A.nElements = A.m * A.ld;
+ A.col = fflas_new<index_t>(rowdim * A.ld, Alignment::CACHE_LINE);
+ A.dat = fflas_new(F, rowdim * A.ld, 1, Alignment::CACHE_LINE);
+
+ for (size_t i = 0; i < rowdim * A.ld; ++i) {
+ A.col[i] = 0;
+ F.assign(A.dat[i], F.zero);
+ }
+
+ size_t currow = row[0], it = 0;
+ for (size_t i = 0; i < nnz; ++i) {
+ if (row[i] != currow) {
+ it = 0;
+ currow = row[i];
+ }
+ A.col[row[i] * A.ld + it] = col[i];
+ A.dat[row[i] * A.ld + it] = dat[i];
+ ++it;
+ }
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::ELL_ZO> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ std::vector<uint64_t> rows(A.m, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i]]++;
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ A.ld = A.maxrow;
+ if (A.kmax > A.maxrow)
+ A.delayed = true;
+ A.nElements = A.m * A.ld;
+ A.col = fflas_new<index_t>(rowdim * A.ld, Alignment::CACHE_LINE);
+
+ for (size_t i = 0; i < rowdim * A.ld; ++i) {
+ A.col[i] = 0;
+ }
+
+ size_t currow = row[0], it = 0;
+
+ for (size_t i = 0; i < nnz; ++i) {
+ if (row[i] != currow) {
+ it = 0;
+ currow = row[i];
+ }
+ A.col[row[i] * A.ld + it] = col[i];
+ ++it;
+ }
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell_r.h b/fflas-ffpack/fflas/fflas_sparse/ell_r.h
new file mode 100644
index 0000000..58a76a0
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell_r.h
@@ -0,0 +1,84 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fspmv_ELL_R.inl
+ * NO DOC
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_R_H
+#define __FFLASFFPACK_fflas_sparse_ELL_R_H
+
+namespace FFLAS { /* ELL_R */
+
+template <class _Field> struct Sparse<_Field, SparseMatrix_t::ELL_R> {
+ bool delayed = false;
+ uint64_t kmax = 0;
+ index_t m = 0;
+ index_t n = 0;
+ index_t ld = 0;
+ uint64_t nnz = 0;
+ uint64_t maxrow = 0;
+ uint64_t mRow = 0;
+ index_t *col = nullptr;
+ index_t *row = nullptr;
+ typename _Field::Element_ptr dat;
+};
+
+template <class _Field>
+struct Sparse<_Field, SparseMatrix_t::ELL_R_ZO>
+ : public Sparse<_Field, SparseMatrix_t::ELL_R> {
+ typename _Field::Element cst = 1;
+};
+
+template <class Field>
+void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_R> &A,
+ typename Field::ConstElement_ptr x,
+ const typename Field::Element &beta, typename Field::Element_ptr y);
+
+template <class Field>
+void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_R_ZO> &A,
+ typename Field::ConstElement_ptr x,
+ const typename Field::Element &beta, typename Field::Element_ptr y);
+
+template <class Field>
+void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_R> &A,
+ const size_t blockSize, const typename Field::Element_ptr &x,
+ const int ldx, const typename Field::Element &beta,
+ typename Field::Element_ptr &y, const int ldy);
+
+template <class Field>
+void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_R_ZO> &A,
+ const size_t blockSize, const typename Field::Element_ptr &x,
+ const int ldx, const typename Field::Element &beta,
+ typename Field::Element_ptr &y, const int ldy);
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_sparse/ell_r_spmv.inl"
+// #include "fflas-ffpack/fflas/fflas_sparse/ell_r_spmm.inl"
+
+#endif // __FFLASFFPACK_fflas_sparse_ELL_R_H
\ No newline at end of file
diff --git a/benchmark/test-src/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/ell_r/Makefile.am
similarity index 80%
rename from benchmark/test-src/Makefile.am
rename to fflas-ffpack/fflas/fflas_sparse/ell_r/Makefile.am
index 5e3f1d1..5985c66 100644
--- a/benchmark/test-src/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/ell_r/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,8 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-#
-EXTRA_DIST=mesure-BLAS_LAPACK.sh \
- mesure-FFLAS_FFPACK.sh \
- mesure.sh \
- parameter.in
+
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse/ell_r
+
+pkgincludesub_HEADERS= \
+ ell_r_spmv.inl
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell_r/ell_r_spmv.inl b/fflas-ffpack/fflas/fflas_sparse/ell_r/ell_r_spmv.inl
new file mode 100644
index 0000000..f207c89
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell_r/ell_r_spmv.inl
@@ -0,0 +1,319 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_R_spmv_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_R_spmv_INL
+
+namespace FFLAS{
+ namespace ell_r_details{
+ template<class Field>
+ inline void fspmv(const Field & F, const Sparse<Field, SparseMatrix_t::ELL_R> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::GenericTag){
+ index_t start = 0;
+ for(index_t i = 0 ; i < A.mRow ; ++i, start+=A.ld){
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for(; j < ROUND_DOWN(A.ld, 4) ; j+=4){
+ F.axpyin(y1,A.dat[start+j],x[A.col[start+j]]);
+ F.axpyin(y2,A.dat[start+j+1],x[A.col[start+j+1]]);
+ F.axpyin(y3,A.dat[start+j+2],x[A.col[start+j+2]]);
+ F.axpyin(y4,A.dat[start+j+3],x[A.col[start+j+3]]);
+ }
+ for(; j < A.ld ; ++j){
+ F.axpyin(y1,A.dat[start+j],x[A.col[start+j]]);
+ }
+ F.addin(y[A.row[i]], y1);
+ F.addin(y[A.row[i]], y2);
+ F.addin(y[A.row[i]], y3);
+ F.addin(y[A.row[i]], y4);
+ }
+ }
+
+ template<class Field>
+ inline void fspmv(const Field & F, const Sparse<Field, SparseMatrix_t::ELL_R> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::UnparametricTag){
+ index_t start = 0;
+ for(index_t i = 0 ; i < A.mRow ; ++i, start+=A.ld){
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for(; j < ROUND_DOWN(A.ld, 4) ; j+=4){
+ y1 += A.dat[start+j] * x[A.col[start+j]];
+ y2 += A.dat[start+j+1] * x[A.col[start+j+1]];
+ y3 += A.dat[start+j+2] * x[A.col[start+j+2]];
+ y4 += A.dat[start+j+3] * x[A.col[start+j+3]];
+ }
+ for(; j < A.ld ; ++j){
+ y1 += A.dat[start+j] * x[A.col[start+j]];
+ }
+ y[A.row[i]] += y1+y2+y3+y4;
+ }
+ }
+
+ template<class Field>
+ inline void fspmv(const Field & F, const Sparse<Field, SparseMatrix_t::ELL_R> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, const int64_t kmax){
+ index_t start = 0;
+ index_t block = (A.ld)/kmax ;
+ for (index_t i = 0 ; i < A.mRow ; ++i, start+=A.ld) {
+ index_t j_loc = 0, j = 0;
+ for (index_t l = 0 ; l < (index_t) block ; ++l) {
+ j_loc += kmax ;
+ for ( ; j < j_loc ; ++j) {
+ y[A.row[i]] += A.dat[start+j] * x[A.col[start+j]];
+ }
+ F.reduce(y[A.row[i]]);
+ }
+ for ( ; j < A.ld ; ++j) {
+ y[A.row[i]] += A.dat[start+j] * x[A.col[start+j]];
+ }
+ F.reduce(y[A.row[i]];
+ }
+ }
+
+ template<class Field, class Func>
+ inline void fspmv(const Field & F, const Sparse<Field, SparseMatrix_t::ELL_R_ZO> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, Func && func, FieldCategories::GenericTag){
+ index_t start = 0;
+ for(index_t i = 0 ; i < A.mRow ; ++i, start+=A.ld){
+ index_t j = 0;
+ typename Field::Element y1, y2, y3, y4;
+ F.assign(y1, F.zero);
+ F.assign(y2, F.zero);
+ F.assign(y3, F.zero);
+ F.assign(y4, F.zero);
+ for(; j < ROUND_DOWN(A.ld, 4) ; j+=4){
+ func(y1,x[A.col[start+j]]);
+ func(y2,x[A.col[start+j+1]]);
+ func(y3,x[A.col[start+j+2]]);
+ func(y4,x[A.col[start+j+3]]);
+ }
+ for(; j < A.ld ; ++j){
+ func(y1,x[A.col[start+j]]);
+ }
+ F.addin(y[A.row[i]], y1);
+ F.addin(y[A.row[i]], y2);
+ F.addin(y[A.row[i]], y3);
+ F.addin(y[A.row[i]], y4);
+ }
+ }
+
+ template<class Field, class Func>
+ inline void fspmv(const Field & F, const Sparse<Field, SparseMatrix_t::ELL_R_ZO> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, Func && func, FieldCategories::UnparametricTag){
+ index_t start = 0;
+ for(index_t i = 0 ; i < A.mRow ; ++i, start+=A.ld){
+ index_t j = 0;
+ typename Field::Element y1 = 0, y2 = 0, y3 = 0, y4 = 0;
+ for(; j < ROUND_DOWN(A.ld, 4) ; j+=4){
+ func(y1,x[A.col[start+j]]);
+ func(y2,x[A.col[start+j+1]]);
+ func(y3,x[A.col[start+j+2]]);
+ func(y4,x[A.col[start+j+3]]);
+ }
+ for(; j < A.ld ; ++j){
+ func(y1,x[A.col[start+j]]);
+ }
+ y[A.row[i]] += y1+y2+y3+y4;
+ }
+ }
+ }// ELL_R_details
+
+ template<class Field>
+ inline void fspmv(const Field& F, const Sparse<Field, SparseMatrix_t::ELL_R> & A, typename Field::ConstElement_ptr x,
+ const typename Field::Element & beta, typename Field::Element_ptr y){
+ sparse_details::init_y(F, A.m, beta, y, typename FieldTraits<Field>::category());
+ fspmv(F, A, x, y, typename FieldTraits<Field>::category());
+ }
+
+ template<class Field>
+ inline void fspmv(const Field& F, const Sparse<Field, SparseMatrix_t::ELL_R> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::GenericTag){
+ ell_r_details::fspmv(F, A, x, y, FieldCategories::GenericTag());
+ }
+
+ template<class Field>
+ inline void fspmv(const Field& F, const Sparse<Field, SparseMatrix_t::ELL_R> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::UnparametricTag){
+ ell_r_details::fspmv(F, A, x, y, FieldCategories::UnparametricTag());
+ }
+
+ template<class Field>
+ inline void fspmv(const Field& F, const Sparse<Field, SparseMatrix_t::ELL_R> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::ModularTag){
+ if(A.delayed){
+ ell_r_details::fspmv(F, A, x, y, FieldCategories::UnparametricTag());
+ freduce(F, A.m, y, 1);
+ }else{
+ ell_r_details::fspmv(F, A, x, y, A.kmax);
+ }
+ }
+
+ template<class Field>
+ inline void fspmv(const Field& F, const Sparse<Field, SparseMatrix_t::ELL_R_ZO> & A, typename Field::ConstElement_ptr x,
+ const typename Field::Element & beta, typename Field::Element_ptr y){
+ sparse_details::init_y(F, A.m, beta, y, typename FieldTraits<Field>::category());
+ fspmv(F, A, x, y, typename FieldTraits<Field>::category());
+ }
+
+ template<class Field>
+ inline void fspmv(const Field& F, const Sparse<Field, SparseMatrix_t::ELL_R_ZO> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::GenericTag){
+ using Element = typename Field::Element;
+ if(A.cst == 1){
+ ell_r_details::fspmv(F, A, x, y, [&F](Element & a, const Element & b){F.addin(a, b);}, FieldCategories::GenericTag());
+ }else if(A.cst == -1){
+ ell_r_details::fspmv(F, A, x, y, [&F](Element & a, const Element & b){F.subin(a, b);}, FieldCategories::GenericTag());
+ }else{
+ auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ fscal(F, A.n, A.cst, x, 1, x1, 1);
+ ell_r_details::fspmv(F, A, x, y, [&F](Element & a, const Element & b){F.addin(a, b);}, FieldCategories::GenericTag());
+ fflas_delete(x1);
+ }
+ }
+
+ template<class Field>
+ inline void fspmv(const Field& F, const Sparse<Field, SparseMatrix_t::ELL_R_ZO> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::UnparametricTag){
+ using Element = typename Field::Element;
+ if(A.cst == 1){
+ ell_r_details::fspmv(F, A, x, y, [](Element & a, const Element & b){a += b;}, FieldCategories::UnparametricTag());
+ }else if(A.cst == -1){
+ ell_r_details::fspmv(F, A, x, y, [](Element & a, const Element & b){a -= b;}, FieldCategories::UnparametricTag());
+ }else{
+ auto x1 = fflas_new(F, A.n, 1, Alignment::CACHE_LINE);
+ fscal(F, A.n, A.cst, x, 1, x1, 1);
+ ell_r_details::fspmv(F, A, x, y, [](Element & a, const Element & b){a += b;}, FieldCategories::UnparametricTag());
+ fflas_delete(x1);
+ }
+ }
+
+ template<class Field>
+ inline void fspmv(const Field& F, const Sparse<Field, SparseMatrix_t::ELL_R_ZO> & A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::ModularTag){
+ fspmv(F, A, x, y, FieldCategories::UnparametricTag());
+ freduce(F, A.m, y, 1);
+ }
+
+ template<class Field>
+ inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL_R> & A){
+ fflas_delete(A.dat);
+ fflas_delete(A.col);
+ }
+
+ template<class Field>
+ inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL_R_ZO> & A){
+ fflas_delete(A.col);
+ }
+
+ template<class Field, class IndexT>
+ inline void sparse_init(const Field & F, Sparse<Field, SparseMatrix_t::ELL_R> & A,
+ const IndexT * row, const IndexT * col, typename Field::ConstElement_ptr dat,
+ uint64_t rowdim, uint64_t coldim, uint64_t nnz){
+ // TODO
+
+ // A.kmax = Protected::DotProdBoundClassic(F,F.one);
+ // A.m = rowdim;
+ // A.n = coldim;
+ // A.nnz = nnz;
+ // std::vector<uint64_t> rows(A.m, 0);
+ // for(uint64_t i = 0 ; i < A.nnz ; ++i)
+ // rows[row[i]]++;
+ // A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ // A.ld = A.maxrow;
+ // for(auto & x : rows)
+ // if(x != 0)
+ // A.mRow++;
+
+ // if(A.kmax > A.maxrow)
+ // A.delayed = true;
+
+ // A.col = fflas_new<index_t>(A.mRow*A.ld, Alignment::CACHE_LINE);
+ // A.dat = fflas_new(F, rowdim*A.ld, 1, Alignment::CACHE_LINE);
+
+ // for(size_t i = 0 ; i < rowdim*A.ld ; ++i){
+ // A.col[i] = 0;
+ // F.assign(A.dat[i], F.zero);
+ // }
+
+ // size_t currow = row[0], it = 0;
+
+ // for(size_t i = 0 ; i < nnz ; ++i){
+ // if(row[i] != currow){
+ // it = 0;
+ // currow = row[i];
+ // }
+ // A.col[row[i]*A.ld + it] = col[i];
+ // A.dat[row[i]*A.ld + it] = dat[i];
+ // ++it;
+ // }
+ }
+
+ template<class Field, class IndexT>
+ inline void sparse_init(const Field & F, Sparse<Field, SparseMatrix_t::ELL_R_ZO> & A,
+ const IndexT * row, const IndexT * col, typename Field::ConstElement_ptr dat,
+ uint64_t rowdim, uint64_t coldim, uint64_t nnz){
+ // TODO
+
+ // A.kmax = Protected::DotProdBoundClassic(F,F.one);
+ // A.m = rowdim;
+ // A.n = coldim;
+ // A.nnz = nnz;
+ // std::vector<uint64_t> rows(A.m, 0);
+ // for(uint64_t i = 0 ; i < A.nnz ; ++i)
+ // rows[row[i]]++;
+ // A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ // A.ld = A.maxrow;
+ // if(A.kmax > A.maxrow)
+ // A.delayed = true;
+
+ // A.col = fflas_new<index_t>(rowdim*A.ld, Alignment::CACHE_LINE);
+
+ // for(size_t i = 0 ; i < rowdim*A.ld ; ++i){
+ // A.col[i] = 0;
+ // }
+
+ // size_t currow = row[0], it = 0;
+
+ // for(size_t i = 0 ; i < nnz ; ++i){
+ // if(row[i] != currow){
+ // it = 0;
+ // currow = row[i];
+ // }
+ // A.col[row[i]*A.ld + it] = col[i];
+ // ++it;
+ // }
+ }
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_ELL_R_spmv_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell_simd.h b/fflas-ffpack/fflas/fflas_sparse/ell_simd.h
new file mode 100644
index 0000000..df8167a
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell_simd.h
@@ -0,0 +1,87 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fspmv_ELL_simd.inl
+ * NO DOC
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_simd_H
+#define __FFLASFFPACK_fflas_sparse_ELL_simd_H
+
+namespace FFLAS { /* ELL_simd */
+
+template <class _Field> struct Sparse<_Field, SparseMatrix_t::ELL_simd> {
+ bool delayed = false;
+ int chunk = 0;
+ index_t m = 0;
+ index_t n = 0;
+ index_t ld = 0;
+ uint64_t kmax = 0;
+ uint64_t nnz = 0;
+ uint64_t nElements = 0;
+ uint64_t maxrow = 0;
+ uint64_t nChunks = 0;
+ index_t *col = nullptr;
+ typename _Field::Element_ptr dat;
+};
+
+template <class _Field>
+struct Sparse<_Field, SparseMatrix_t::ELL_simd_ZO>
+ : public Sparse<_Field, SparseMatrix_t::ELL_simd> {
+ typename _Field::Element cst = 1;
+};
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F,
+ Sparse<Field, SparseMatrix_t::ELL_simd> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F,
+ Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim,
+ uint64_t coldim, uint64_t nnz);
+
+template <class Field>
+inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL_simd> &A);
+
+template <class Field>
+inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A);
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_utils.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_spmv.inl"
+#if defined(__FFLASFFPACK_USE_OPENMP)
+#include "fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_pspmv.inl"
+#endif
+// #include "fflas-ffpack/fflas/fflas_sparse/ell_simd_spmm.inl"
+
+#endif // __FFLASFFPACK_fflas_sparse_ELL_simd_H
\ No newline at end of file
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/ell_simd/Makefile.am
similarity index 75%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_sparse/ell_simd/Makefile.am
index 31793b2..4f74f1e 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/ell_simd/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,10 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse/ell_simd
+pkgincludesub_HEADERS= \
+ ell_simd_spmv.inl \
+ ell_simd_pspmv.inl \
+ ell_simd_utils.inl
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_pspmv.inl b/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_pspmv.inl
new file mode 100644
index 0000000..5b7aee6
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_pspmv.inl
@@ -0,0 +1,628 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_simd_pspmv_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_simd_pspmv_INL
+
+#ifdef __FFLASFFPACK_USE_TBB
+#include "tbb/parallel_for.h"
+#include "tbb/blocked_range.h"
+#endif
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, dat, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.axpyin(y[i * A.chunk + k], dat[i * A.ld * A.chunk + j * A.chunk + k],
+ x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.axpyin(y[i * A.chunk + k], dat[i * A.ld * A.chunk + j * A.chunk + k],
+ x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+ }
+#endif
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+template <class Field>
+inline void pfspmv_simd(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, dat, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+ dat1 = simd::load(dat + i * A.ld * A.chunk + j * A.chunk);
+ dat2 = simd::load(dat + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ y2 = simd::fmadd(y2, dat2, x2);
+ }
+ for (; j < A.ld; ++j) {
+ dat1 = simd::load(dat + i * A.ld * A.chunk + j * A.chunk);
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ }
+ yy = simd::load(y + i * A.chunk);
+ simd::store(y + i * A.chunk, simd::add(yy, simd::add(y1, y2)));
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+ dat1 = simd::load(dat + i * A.ld * A.chunk + j * A.chunk);
+ dat2 = simd::load(dat + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ y2 = simd::fmadd(y2, dat2, x2);
+ }
+ for (; j < A.ld; ++j) {
+ dat1 = simd::load(dat + i * A.ld * A.chunk + j * A.chunk);
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ }
+ yy = simd::load(y + i * A.chunk);
+ simd::store(y + i * A.chunk, simd::add(yy, simd::add(y1, y2)));
+ }
+#endif
+}
+#endif // SIMD
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, dat, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + J * A.chunk + k]];
+ y[i * A.chunk + k + 1] += dat[i * A.ld * A.chunk + j * A.chunk + k + 1] *
+ x[col[i * A.ld * A.chunk + J * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[i * A.ld * A.chunk + j * A.chunk + k + 2] *
+ x[col[i * A.ld * A.chunk + J * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[i * A.ld * A.chunk + j * A.chunk + k + 3] *
+ x[col[i * A.ld * A.chunk + J * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + J * A.chunk + k]];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k + 1] * x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k + 2] * x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k + 3] * x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+#endif // TBB
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+template <class Field>
+inline void pfspmv_simd(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_, const uint64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t block = (A.ld) / kmax; // use DIVIDE_INTO from pfspmvgpu
+ index_t chunk = A.chunk;
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+
+ vect_t X, Y, D, C, Q, TMP, NEGP, INVP, MIN, MAX, P;
+ double p = (typename Field::Element)F.characteristic();
+
+ P = simd::set1(p);
+ NEGP = simd::set1(-p);
+ INVP = simd::set1(1 / p);
+ MIN = simd::set1(F.minElement());
+ MAX = simd::set1(F.maxElement());
+
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, P, NEGP, INVP, MAX, MIN, dat, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ Y = simd::load(y + i * chunk);
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ D = simd::load(dat + i * A.chunk * A.ld + j * A.chunk);
+ X = simd::gather(x, col + i * A.chunk * A.ld + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ }
+ for (; j < A.ld; ++j) {
+ D = simd::load(dat + i * A.chunk * A.ld + j * A.chunk);
+ X = simd::gather(x, col + i * A.chunk * A.ld + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ simd::store(y + i * A.chunk, Y);
+ }
+ });
+#else
+
+#pragma omp parallel for
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ Y = simd::load(y + i * chunk);
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+
+ for (; j < j_loc; ++j) {
+ D = simd::load(dat + i * A.chunk * A.ld + j * A.chunk);
+ X = simd::gather(x, col + i * A.chunk * A.ld + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ }
+ for (; j < A.ld; ++j) {
+ D = simd::load(dat + i * A.chunk * A.ld + j * A.chunk);
+ X = simd::gather(x, col + i * A.chunk * A.ld + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ simd::store(y + i * A.chunk, Y);
+ }
+#endif // TBB
+}
+#endif
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_, const uint64_t kmax) {
+ index_t block = (A.ld) / kmax; // use DIVIDE_INTO from pfspmvgpu
+ // index_t chunk = A.chunk;
+ // size_t end = (A.m % chunk == 0) ? A.m : A.m + A.m % chunk;
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, P, NEGP, INVP, MAX, MIN, dat, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < A.chunk; ++k)
+ F.reduce(y[i * A.chunk + k], y[i * A.chunk + k]);
+ }
+ for (; j < A.ld; ++j) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < A.chunk; ++k)
+ F.reduce(y[i * A.chunk + k], y[i * A.chunk + k]);
+ }
+ });
+#else
+#pragma omp parallel for
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+
+ for (; j < j_loc; ++j) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < A.chunk; ++k)
+ F.reduce(y[i * A.chunk + k], y[i * A.chunk + k]);
+ }
+ for (; j < A.ld; ++j) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < A.chunk; ++k)
+ F.reduce(y[i * A.chunk + k], y[i * A.chunk + k]);
+ }
+#endif // TBB
+}
+
+template <class Field>
+inline void pfspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, dat, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ F.addin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ F.addin(y[i * A.chunk + k + 1], x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]]);
+ F.addin(y[i * A.chunk + k + 2], x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]]);
+ F.addin(y[i * A.chunk + k + 3], x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]]);
+ }
+ for (; k < A.chunk; ++k)
+ F.addin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ F.addin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ F.addin(y[i * A.chunk + k + 1], x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]]);
+ F.addin(y[i * A.chunk + k + 2], x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]]);
+ F.addin(y[i * A.chunk + k + 3], x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]]);
+ }
+ for (; k < A.chunk; ++k)
+ F.addin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, dat, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ F.subin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ F.subin(y[i * A.chunk + k + 1], x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]]);
+ F.subin(y[i * A.chunk + k + 2], x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]]);
+ F.subin(y[i * A.chunk + k + 3], x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]]);
+ }
+ for (; k < A.chunk; ++k)
+ F.subin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ F.subin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ F.subin(y[i * A.chunk + k + 1], x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]]);
+ F.subin(y[i * A.chunk + k + 2], x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]]);
+ F.subin(y[i * A.chunk + k + 3], x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]]);
+ }
+ for (; k < A.chunk; ++k)
+ F.subin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+#endif
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+template <class Field>
+inline void pfspmv_one_simd(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+ dat1 = simd::load(dat + i * A.ld * A.chunk + j * A.chunk);
+ dat2 = simd::load(dat + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y1 = simd::add(y2, x2);
+ }
+ for (; j < A.ld; ++j) {
+
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ y1 = simd::add(y1, dat1, x1);
+ }
+ yy = simd::load(y + i * A.chunk);
+ simd::store(y + i * A.chunk, simd::add(yy, simd::add(y1, y2)));
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y1 = simd::add(y2, x2);
+ }
+ for (; j < A.ld; ++j) {
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ y1 = simd::add(y1, dat1, x1);
+ }
+ yy = simd::load(y + i * A.chunk);
+ simd::store(y + i * A.chunk, simd::add(yy, simd::add(y1, y2)));
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_mone_simd(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y1 = simd::add(y2, x2);
+ }
+ for (; j < A.ld; ++j) {
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ y1 = simd::add(y1, dat1, x1);
+ }
+ yy = simd::load(y + i * A.chunk);
+ simd::store(y + i * A.chunk, simd::sub(yy, simd::add(y1, y2)));
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y1 = simd::add(y2, x2);
+ }
+ for (; j < A.ld; ++j) {
+
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ y1 = simd::add(y1, dat1, x1);
+ }
+ yy = simd::load(y + i * A.chunk);
+ simd::store(y + i * A.chunk, simd::sub(yy, simd::add(y1, y2)));
+ }
+#endif
+}
+
+#endif // SIMD
+
+template <class Field>
+inline void pfspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] += x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] += x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+#endif // TBB
+}
+
+template <class Field>
+inline void pfspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nChunks, 2),
+ [&F, &A, x, y, col](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] -= x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] -= x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] -= x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] -= x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+#endif // TBB
+}
+
+} // ELL_simd_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_ELL_simd_pspmv_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_spmv.inl b/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_spmv.inl
new file mode 100644
index 0000000..28e06b5
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_spmv.inl
@@ -0,0 +1,368 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_simd_spmv_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_simd_spmv_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.axpyin(y[i * A.chunk + k], dat[i * A.ld * A.chunk + j * A.chunk + k],
+ x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmv_simd(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ index_t chunk = A.chunk;
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+ dat1 = simd::load(dat + i * A.ld * A.chunk + j * chunk);
+ dat2 = simd::load(dat + i * A.ld * A.chunk + (j + 1) * chunk);
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ y2 = simd::fmadd(y2, dat2, x2);
+ }
+ for (; j < A.ld; ++j) {
+ dat1 = simd::load(dat + i * A.ld * A.chunk + j * chunk);
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ }
+ yy = simd::load(y + i * chunk);
+ simd::store(y + i * chunk, simd::add(yy, simd::add(y1, y2)));
+ }
+}
+// #endif
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ for (index_t j = 0; j < A.ld; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k + 1] * x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k + 2] * x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k + 3] * x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+template <class Field>
+inline void fspmv_simd(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_, const uint64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t block = (A.ld) / kmax; // use DIVIDE_INTO from fspmvgpu
+ index_t chunk = A.chunk;
+ size_t end = (A.m % chunk == 0) ? A.m : A.m + A.m % chunk;
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+
+ vect_t X, Y, D, C, Q, TMP, NEGP, INVP, MIN, MAX, P;
+ double p = (typename Field::Element)F.characteristic();
+
+ P = simd::set1(p);
+ NEGP = simd::set1(-p);
+ INVP = simd::set1(1 / p);
+ MIN = simd::set1(F.minElement());
+ MAX = simd::set1(F.maxElement());
+
+ for (size_t i = 0; i < end / chunk; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ Y = simd::load(y + i * chunk);
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+
+ for (; j < j_loc; ++j) {
+ D = simd::load(dat + i * A.chunk * A.ld + j * A.chunk);
+ X = simd::gather(x, col + i * A.chunk * A.ld + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ }
+ for (; j < A.ld; ++j) {
+ D = simd::load(dat + i * A.chunk * A.ld + j * A.chunk);
+ X = simd::gather(x, col + i * A.chunk * A.ld + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ simd::store(y + i * A.chunk, Y);
+ }
+}
+// #endif
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const uint64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t block = (A.ld) / kmax; // use DIVIDE_INTO from fspmvgpu
+ index_t chunk = A.chunk;
+ size_t end = (A.m % chunk == 0) ? A.m : A.m + A.m % chunk;
+ for (size_t i = 0; i < end / chunk; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+
+ for (; j < j_loc; ++j) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < A.chunk; ++k)
+ F.reduce(y[i * A.chunk + k], y[i * A.chunk + k]);
+ }
+ for (; j < A.ld; ++j) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ y[i * A.chunk + k] +=
+ dat[i * A.ld * A.chunk + j * A.chunk + k] * x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < A.chunk; ++k)
+ F.reduce(y[i * A.chunk + k], y[i * A.chunk + k]);
+ }
+}
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ F.addin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ F.addin(y[i * A.chunk + k + 1], x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]]);
+ F.addin(y[i * A.chunk + k + 2], x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]]);
+ F.addin(y[i * A.chunk + k + 3], x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]]);
+ }
+ for (; k < A.chunk; ++k)
+ F.addin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ F.subin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ F.subin(y[i * A.chunk + k + 1], x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]]);
+ F.subin(y[i * A.chunk + k + 2], x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]]);
+ F.subin(y[i * A.chunk + k + 3], x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]]);
+ }
+ for (; k < A.chunk; ++k)
+ F.subin(y[i * A.chunk + k], x[col[i * A.ld * A.chunk + j * A.chunk + k]]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] += x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ for (; j < A.ld; ++j) {
+ index_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] -= x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] -= x[col[i * A.ld * A.chunk + j * A.chunk + k + 3]];
+ }
+ for (; k < A.chunk; ++k)
+ y[i * A.chunk + k] -= x[col[i * A.ld * A.chunk + j * A.chunk + k]];
+ }
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+template <class Field>
+inline void fspmv_one_simd(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y1 = simd::add(y2, x2);
+ }
+ for (; j < A.ld; ++j) {
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ y1 = simd::add(y1, x1);
+ }
+ yy = simd::load(y + i * A.chunk);
+ simd::store(y + i * A.chunk, simd::add(yy, simd::add(y1, y2)));
+ }
+}
+
+template <class Field>
+inline void fspmv_mone_simd(const Field &F, const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ vect_t y1, y2, x1, x2, dat1, dat2, yy;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ for (; j < ROUND_DOWN(A.ld, 2); j += 2) {
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ x2 = simd::gather(x, col + i * A.ld * A.chunk + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y1 = simd::add(y2, x2);
+ }
+ for (; j < A.ld; ++j) {
+ x1 = simd::gather(x, col + i * A.ld * A.chunk + j * A.chunk);
+ y1 = simd::add(y1, x1);
+ }
+ yy = simd::load(y + i * A.chunk);
+ simd::store(y + i * A.chunk, simd::sub(yy, simd::add(y1, y2)));
+ }
+}
+// #endif
+
+} // ELL_simd_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_ELL_simd_spmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_utils.inl b/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_utils.inl
new file mode 100644
index 0000000..b5325c5
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/ell_simd/ell_simd_utils.inl
@@ -0,0 +1,160 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_ELL_simd_utils_INL
+#define __FFLASFFPACK_fflas_sparse_ELL_simd_utils_INL
+
+namespace FFLAS {
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL_simd> &A) {
+ fflas_delete(A.dat);
+ fflas_delete(A.col);
+}
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A) {
+ fflas_delete(A.col);
+}
+
+template <class Field> inline void sparse_print(const Sparse<Field, SparseMatrix_t::ELL_simd> &A) {
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ std::cout << i *A.chunk + k << " : ";
+ for (size_t j = 0; j < A.ld; ++j) {
+ std::cout << A.dat[i * A.ld * A.chunk + j * A.chunk + k] << " ";
+ }
+ std::cout << std::endl;
+ }
+ }
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::ELL_simd> &A, const IndexT *row,
+ const IndexT *col, typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim,
+ uint64_t nnz) {
+#ifdef __FFLASFFPACK_USE_SIMD
+ using simd = Simd<typename Field::Element>;
+ A.chunk = simd::vect_size;
+#else
+ A.chunk = 8;
+#endif
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ std::vector<uint64_t> rows(A.m + 1, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i] + 1]++;
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ A.ld = A.maxrow;
+ if (A.kmax > A.maxrow)
+ A.delayed = true;
+ for (size_t i = 1; i <= A.m; ++i) {
+ rows[i] += rows[i - 1];
+ }
+
+ index_t m = (A.m % A.chunk == 0) ? A.m : ROUND_DOWN(A.m, A.chunk) + A.chunk;
+ // cout << A.m << " " << ROUND_DOWN(A.m, simd::vect_size)+simd::vect_size <<
+ // " " << m/A.chunk << endl;
+ A.nChunks = m / A.chunk;
+
+ A.col = fflas_new<index_t>(A.nChunks * A.chunk * A.ld, Alignment::CACHE_LINE);
+ A.dat = fflas_new(F, A.nChunks * A.chunk * A.ld, 1, Alignment::CACHE_LINE);
+
+ A.nElements = A.nChunks * A.chunk * A.ld;
+
+ for (size_t i = 0; i < A.nChunks * A.chunk * A.ld; ++i) {
+ A.col[i] = 0;
+ F.assign(A.dat[i], F.zero);
+ }
+
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ if (i * A.chunk + k < rowdim) {
+ uint64_t start = rows[i * A.chunk + k], stop = rows[i * A.chunk + k + 1];
+ // cout << "start " << start << " stop " << stop << endl;
+ for (size_t j = 0; j < stop - start; ++j) {
+ A.dat[i * A.chunk * A.ld + j * A.chunk + k] = dat[start + j];
+ A.col[i * A.chunk * A.ld + j * A.chunk + k] = col[start + j];
+ }
+ }
+ }
+ }
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::ELL_simd_ZO> &A, const IndexT *row,
+ const IndexT *col, typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim,
+ uint64_t nnz) {
+#ifdef __FFLASFFPACK_USE_SIMD
+ using simd = Simd<typename Field::Element>;
+ A.chunk = simd::vect_size;
+#else
+ A.chunk = 8;
+#endif
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ std::vector<uint64_t> rows(A.m + 1, 0);
+ for (uint64_t i = 0; i < A.nnz; ++i)
+ rows[row[i] + 1]++;
+ A.maxrow = *(std::max_element(rows.begin(), rows.end()));
+ A.ld = A.maxrow;
+ if (A.kmax > A.maxrow)
+ A.delayed = true;
+ for (size_t i = 1; i <= A.m; ++i) {
+ rows[i] += rows[i - 1];
+ }
+
+ index_t m = (A.m % A.chunk == 0) ? A.m : ROUND_DOWN(A.m, A.chunk) + A.chunk;
+ // cout << A.m << " " << ROUND_DOWN(A.m, simd::vect_size)+simd::vect_size <<
+ // " " << m/A.chunk << endl;
+ A.nChunks = m / A.chunk;
+
+ A.col = fflas_new<index_t>(A.nChunks * A.chunk * A.ld, Alignment::CACHE_LINE);
+
+ A.nElements = A.nChunks * A.chunk * A.ld;
+
+ for (size_t i = 0; i < A.nChunks * A.chunk * A.ld; ++i) {
+ A.col[i] = 0;
+ }
+
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ if (i * A.chunk + k < rowdim) {
+ uint64_t start = rows[i * A.chunk + k], stop = rows[i * A.chunk + k + 1];
+ // cout << "start " << start << " stop " << stop << endl;
+ for (size_t j = 0; j < stop - start; ++j) {
+ A.col[i * A.chunk * A.ld + j * A.chunk + k] = col[start + j];
+ }
+ }
+ }
+ }
+}
+}
+#endif
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/hyb_zo.h b/fflas-ffpack/fflas/fflas_sparse/hyb_zo.h
new file mode 100755
index 0000000..6d5e1d7
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/hyb_zo.h
@@ -0,0 +1,65 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fspmv_HYB_ZO.inl
+ * NO DOC
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_HYB_ZO_H
+#define __FFLASFFPACK_fflas_sparse_HYB_ZO_H
+
+namespace FFLAS { /* HYB_ZO */
+
+template <class _Field> struct Sparse<_Field, SparseMatrix_t::HYB_ZO> {
+ using Field = _Field;
+ typedef Sparse<_Field, SparseMatrix_t::HYB_ZO> Self_t;
+ bool delayed = false;
+ uint64_t kmax = 0;
+ index_t m = 0;
+ index_t n = 0;
+ uint64_t nnz = 0;
+ uint64_t maxrow = 0;
+ uint64_t nElements = 0;
+ Sparse<_Field, SparseMatrix_t::CSR> *dat = nullptr;
+ Sparse<_Field, SparseMatrix_t::CSR_ZO> *one = nullptr;
+ Sparse<_Field, SparseMatrix_t::CSR_ZO> *mone = nullptr;
+
+};
+
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_utils.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_spmv.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_spmm.inl"
+#if defined(__FFLASFFPACK_USE_OPENMP)
+#include "fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_pspmv.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_pspmm.inl"
+#endif
+
+
+#endif // __FFLASFFPACK_fflas_sparse_HYB_ZO_H
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/Makefile.am
similarity index 72%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_sparse/hyb_zo/Makefile.am
index 31793b2..e022d14 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,13 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse/hyb_zo
+
+pkgincludesub_HEADERS= \
+ hyb_zo_spmv.inl \
+ hyb_zo_spmm.inl \
+ hyb_zo_pspmm.inl \
+ hyb_zo_pspmv.inl \
+ hyb_zo_utils.inl
diff --git a/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_pspmm.inl b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_pspmm.inl
new file mode 100644
index 0000000..42bc4ab
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_pspmm.inl
@@ -0,0 +1,141 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_HYB_ZO_pspmm_INL
+#define __FFLASFFPACK_fflas_sparse_HYB_ZO_pspmm_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::GenericTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmm_one(F, *(A.one), blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmm_mone(F, *(A.mone), blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmm(F, *(A.dat), blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+}
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::UnparametricTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmm_one(F, *(A.one), blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmm_mone(F, *(A.mone), blockSize, x, ldx, y, FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmm(F, *(A.dat), blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void pfspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::UnparametricTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmm_one_simd_aligned(F, *(A.one), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmm_mone_simd_aligned(F, *(A.mone), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmm_simd_aligned(F, *(A.dat), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+}
+
+template <class Field>
+inline void pfspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::UnparametricTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmm_one_simd_unaligned(F, *(A.one), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmm_mone_simd_unaligned(F, *(A.mone), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmm_simd_unaligned(F, *(A.dat), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+}
+
+#endif
+
+template <class Field>
+inline void pfspmm(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy, uint64_t kmax) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmm_one(F, *(A.one), blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmm_mone(F, *(A.mone), blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmm(F, *(A.dat), blockSize, x, ldx, y, ldy, kmax);
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void pfspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ uint64_t kmax) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmm_one_simd_aligned(F, *(A.one), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmm_mone_simd_aligned(F, *(A.mone), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmm_simd_aligned(F, *(A.dat), blockSize, x, ldx, y, ldy, kmax);
+}
+
+template <class Field>
+inline void pfspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ uint64_t kmax) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmm_one_simd_unaligned(F, *(A.one), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmm_mone_simd_unaligned(F, *(A.mone), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmm_simd_unaligned(F, *(A.dat), blockSize, x, ldx, y, ldy, kmax);
+}
+
+#endif
+
+} // HYB_ZO_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_HYB_ZO_pspmm_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_pspmv.inl b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_pspmv.inl
new file mode 100644
index 0000000..84a1ba2
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_pspmv.inl
@@ -0,0 +1,72 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_HYB_ZO_pspmv_INL
+#define __FFLASFFPACK_fflas_sparse_HYB_ZO_pspmv_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::GenericTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmv_one(F, *(A.one), x, y, FieldCategories::GenericTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmv_mone(F, *(A.mone), x, y, FieldCategories::GenericTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmv(F, *(A.dat), x, y, FieldCategories::GenericTag());
+}
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::UnparametricTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmv_one(F, *(A.one), x, y, FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmv_mone(F, *(A.mone), x, y, FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmv(F, *(A.dat), x, y, FieldCategories::UnparametricTag());
+}
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, uint64_t kmax) {
+ if (A.one != nullptr)
+ sparse_details_impl::pfspmv_one(F, *(A.one), x, y, FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::pfspmv_mone(F, *(A.mone), x, y, FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::pfspmv(F, *(A.dat), x, y, kmax);
+}
+
+} // HYB_ZO_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_HYB_ZO_pspmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_spmm.inl b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_spmm.inl
new file mode 100644
index 0000000..0c0585f
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_spmm.inl
@@ -0,0 +1,140 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_HYB_ZO_spmm_INL
+#define __FFLASFFPACK_fflas_sparse_HYB_ZO_spmm_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::GenericTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmm_one(F, *(A.one), blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmm_mone(F, *(A.mone), blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmm(F, *(A.dat), blockSize, x, ldx, y, ldy, FieldCategories::GenericTag());
+}
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::UnparametricTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmm_one(F, *(A.one), blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmm_mone(F, *(A.mone), blockSize, x, ldx, y, FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmm(F, *(A.dat), blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::UnparametricTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmm_one_simd_aligned(F, *(A.one), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmm_mone_simd_aligned(F, *(A.mone), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmm_simd_aligned(F, *(A.dat), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ FieldCategories::UnparametricTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmm_one_simd_unaligned(F, *(A.one), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmm_mone_simd_unaligned(F, *(A.mone), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmm_simd_unaligned(F, *(A.dat), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+}
+
+#endif
+
+template <class Field>
+inline void fspmm(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy, uint64_t kmax) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmm_one(F, *(A.one), blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmm_mone(F, *(A.mone), blockSize, x, ldx, y, ldy, FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmm(F, *(A.dat), blockSize, x, ldx, y, ldy, kmax);
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmm_simd_aligned(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ uint64_t kmax) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmm_one_simd_aligned(F, *(A.one), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmm_mone_simd_aligned(F, *(A.mone), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmm_simd_aligned(F, *(A.dat), blockSize, x, ldx, y, ldy, kmax);
+}
+
+template <class Field>
+inline void fspmm_simd_unaligned(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, size_t blockSize,
+ typename Field::ConstElement_ptr x, int ldx, typename Field::Element_ptr y, int ldy,
+ uint64_t kmax) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmm_one_simd_unaligned(F, *(A.one), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmm_mone_simd_unaligned(F, *(A.mone), blockSize, x, ldx, y, ldy,
+ FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmm_simd_unaligned(F, *(A.dat), blockSize, x, ldx, y, ldy, kmax);
+}
+
+#endif
+
+} // HYB_ZO_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_HYB_ZO_spmm_INL
diff --git a/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_spmv.inl b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_spmv.inl
new file mode 100644
index 0000000..07d16e0
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_spmv.inl
@@ -0,0 +1,72 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_HYB_ZO_spmv_INL
+#define __FFLASFFPACK_fflas_sparse_HYB_ZO_spmv_INL
+
+namespace FFLAS {
+namespace sparse_details_impl {
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::GenericTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmv_one(F, *(A.one), x, y, FieldCategories::GenericTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmv_mone(F, *(A.mone), x, y, FieldCategories::GenericTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmv(F, *(A.dat), x, y, FieldCategories::GenericTag());
+}
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::UnparametricTag) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmv_one(F, *(A.one), x, y, FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmv_mone(F, *(A.mone), x, y, FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmv(F, *(A.dat), x, y, FieldCategories::UnparametricTag());
+}
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::HYB_ZO> &A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, uint64_t kmax) {
+ if (A.one != nullptr)
+ sparse_details_impl::fspmv_one(F, *(A.one), x, y, FieldCategories::UnparametricTag());
+ if (A.mone != nullptr)
+ sparse_details_impl::fspmv_mone(F, *(A.mone), x, y, FieldCategories::UnparametricTag());
+ if (A.dat != nullptr)
+ sparse_details_impl::fspmv(F, *(A.dat), x, y, kmax);
+}
+
+} // HYB_ZO_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_HYB_ZO_spmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_utils.inl b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_utils.inl
new file mode 100755
index 0000000..9d2b8ef
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/hyb_zo/hyb_zo_utils.inl
@@ -0,0 +1,136 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_HYB_ZO_utils_INL
+#define __FFLASFFPACK_fflas_sparse_HYB_ZO_utils_INL
+
+namespace FFLAS {
+
+// #define HYB_ZO_DEBUG 1
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::HYB_ZO> &A) {
+ if (A.dat != nullptr)
+ sparse_delete(*(A.dat));
+ if (A.one != nullptr)
+ sparse_delete(*(A.one));
+ if (A.mone != nullptr)
+ sparse_delete(*(A.mone));
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::HYB_ZO> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+ A.delayed = true;
+ A.nElements = nnz;
+ uint64_t nOnes = 0, nMOnes = 0, nOthers = 0;
+ for (uint64_t i = 0; i < nnz; ++i) {
+ if (F.isOne(dat[i]))
+ nOnes++;
+ else if (F.isMOne(dat[i]))
+ nMOnes++;
+ else
+ nOthers++;
+ }
+
+ typename Field::Element_ptr dat2;
+ index_t *colOne = nullptr, *colMOne = nullptr, *colOther = nullptr, *rowOne = nullptr, *rowMOne = nullptr,
+ *rowOther = nullptr;
+ if (nOnes) {
+ colOne = fflas_new<index_t>(nOnes, Alignment::CACHE_LINE);
+ rowOne = fflas_new<index_t>(nOnes, Alignment::CACHE_LINE);
+ }
+ if (nMOnes) {
+ colMOne = fflas_new<index_t>(nMOnes, Alignment::CACHE_LINE);
+ rowMOne = fflas_new<index_t>(nMOnes, Alignment::CACHE_LINE);
+ }
+ if (nOthers) {
+ dat2 = fflas_new(F, nOthers, 1, Alignment::CACHE_LINE);
+ colOther = fflas_new<index_t>(nOthers, Alignment::CACHE_LINE);
+ rowOther = fflas_new<index_t>(nOthers, Alignment::CACHE_LINE);
+ }
+
+ uint64_t itOne = 0, itMOne = 0, itOther = 0;
+ for (uint64_t i = 0; i < nnz; ++i) {
+ if (F.isOne(dat[i])) {
+ colOne[itOne] = col[i];
+ rowOne[itOne] = row[i];
+ ++itOne;
+ } else if (F.isMOne(dat[i])) {
+ colMOne[itMOne] = col[i];
+ rowMOne[itMOne] = row[i];
+ ++itMOne;
+ } else {
+ dat2[itOther] = dat[i];
+ colOther[itOther] = col[i];
+ rowOther[itOther] = row[i];
+ ++itOther;
+ }
+ }
+
+ if (nOnes) {
+ A.one = new Sparse<Field, SparseMatrix_t::CSR_ZO>();
+ sparse_init(F, *(A.one), rowOne, colOne, nullptr, rowdim, coldim, nOnes);
+ }
+ if (nMOnes) {
+ A.mone = new Sparse<Field, SparseMatrix_t::CSR_ZO>();
+ sparse_init(F, *(A.mone), rowMOne, colMOne, nullptr, rowdim, coldim, nMOnes);
+ A.mone->cst = -1;
+ }
+ if (nOthers) {
+ A.dat = new Sparse<Field, SparseMatrix_t::CSR>();
+ sparse_init(F, *(A.dat), rowOther, colOther, dat2, rowdim, coldim, nOthers);
+ }
+
+ if (nOnes) {
+ fflas_delete(colOne);
+ fflas_delete(rowOne);
+ }
+ if (nMOnes) {
+ fflas_delete(colMOne);
+ fflas_delete(rowMOne);
+ }
+ if (nOthers) {
+ fflas_delete(colOther);
+ fflas_delete(rowOther);
+ fflas_delete(dat2);
+ }
+}
+
+template<typename _Field>
+std::ostream& operator<<(std::ostream& os, const Sparse<_Field, SparseMatrix_t::HYB_ZO>& A) {
+ return sparse_print(os << "non-ones: ", *(A.dat));
+}
+
+
+}
+
+#endif
diff --git a/fflas-ffpack/fflas/fflas_sparse/read_sparse.h b/fflas-ffpack/fflas/fflas_sparse/read_sparse.h
new file mode 100644
index 0000000..29c25a4
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/read_sparse.h
@@ -0,0 +1,460 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2015 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_sparse/read_sparse.h
+*/
+
+#ifndef __FFLASFFPACK_fflas_fflas_sparse_read_sparse_H
+#define __FFLASFFPACK_fflas_fflas_sparse_read_sparse_H
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+
+// #include <sstream>
+// #include <iostream>
+#include <fstream> /* getline */
+#include <string>
+// #include <cstdio>
+#include <cstdlib>
+#include <iterator> /* istream_iterator */
+
+
+
+namespace FFLAS { namespace details_spmv {
+ template <class Field> struct Coo {
+ private:
+ using Self = Coo<Field>;
+
+ public:
+ typename Field::Element val = 0;
+ index_t col = 0;
+ index_t row = 0;
+ bool deleted = false;
+
+ Coo() = default;
+ Coo(typename Field::Element v, index_t r, index_t c) : val(v), col(c), row(r) {}
+ Coo(const Self &) = default;
+ Coo(Self &&) = default;
+
+ Self &operator=(const Self &) = default;
+ Self &operator=(Self &&) = default;
+ };
+} // details_spmv
+} // FFLAS
+
+namespace FFLAS {
+
+ template <class Field, bool sorted=true, bool read_integer = false>
+ void readSmsFormat(const std::string &path, const Field &f, index_t *&row, index_t *&col,
+ typename Field::Element_ptr &val, index_t &rowdim, index_t &coldim, uint64_t &nnz)
+ {
+ using namespace details_spmv;
+ std::ifstream file(path, std::ios::in);
+ std::string line;
+ std::vector<Coo<Field>> data;
+ while (true) { /* comments ? */
+ std::getline(file,line);
+ if (line.empty()) {
+ continue ;
+ }
+ std::istringstream ligne (line);
+ std::string comm ;
+ if (ligne >> comm ) {
+ if (comm[0] != '%') {
+ break;
+ }
+ }
+ else {
+ std::cerr << " the impossible happened, continuing for now " << std::endl;
+ break;
+ }
+ }
+ bool sms = false ;
+ std::istringstream ligne (line);
+ std::string nbnz_s ;
+ if (ligne >> rowdim >> coldim >> nbnz_s) {
+ if (nbnz_s == "M") {
+ sms = true ;
+ // nnz = 0;
+ }
+ else
+ nnz = std::strtoull(nbnz_s.c_str(),NULL,0);
+ }
+ else {
+ std::cerr << "file " << path << " is not in sms/smf format " << line << std::endl;
+ exit(1);
+ }
+
+ row = fflas_new<index_t>(rowdim+1);
+ std::memset(row,0, sizeof(index_t)*(rowdim+1));
+ assert(!row[0] && !row[rowdim]);
+ std::vector<index_t> colid((sms)?0:nnz);
+ std::vector<typename Field::Element> dat((sms)?0:nnz);
+
+
+ /* got header */
+ if (!rowdim || !coldim)
+ exit(-1) ;
+ if (!sms && !nnz)
+ exit(-1) ;
+
+ size_t i=0,l,c ;
+ int64_t d ;
+ while (true) {
+ if ((!sms) && (i == nnz)){
+ break ;
+ }
+ std::getline(file,line);
+ // std::cout << i << ',' << nnz << std::endl;
+ if (file.bad() || file.eof())
+ exit(-3);
+ if (line.empty()){
+ continue;
+ }
+
+ std::istringstream lign (line);
+ if (lign >> l >> c >> d){
+ // std::cout << l << ' ' << c << ' ' << d << std::endl;
+ if (sms) {
+ if (l == 0 && c == 0 && d == 0)
+ break ;
+ // nnz ++;
+ }
+ typename Field::Element v;
+
+ assert(l && c);
+ f.init(v, d);
+ if (!f.isZero(v)) {
+ if (!sorted) {
+ data.emplace_back(v, l-1, c-1);
+ }
+ else {
+ row[l] += 1 ;
+ if (!sms) {
+ colid[i] = c-1 ;
+ dat[i] = v ;
+ }
+ else {
+ colid.push_back(c-1);
+ dat.push_back(v);
+ }
+ }
+ }
+ }
+ else {
+ exit(1);
+ }
+ ++i ;
+ }
+ if (sms) {
+ nnz=dat.size();
+ }
+ assert(i == nnz);
+
+ col = fflas_new<index_t>(nnz);
+ val = fflas_new(f, nnz, 1);
+
+ if (!sorted) {
+ assert(nnz == data.size());
+
+ std::sort(data.begin(), data.end(), [](const Coo<Field> &a, const Coo<Field> &b) {
+ return (a.row < b.row) || ((a.row == b.row) && (a.col < b.col));
+ });
+ auto rowmax = (std::max_element(data.begin(), data.end(),
+ [](const Coo<Field> &a, const Coo<Field> &b) { return a.row < b.row; }))->row;
+ if (rowdim != rowmax + 1) {
+ std::cout << "Matrix row dimension change : " << rowdim << " -> " << rowmax << std::endl;
+ rowdim = rowmax;
+ }
+
+
+ for (size_t j = 0, end = data.size(); j < end; ++j) {
+ val[j] = data[j].val;
+ col[j] = data[j].col;
+ row[data[j].row+1]+=1;
+ }
+ }
+ else {
+ assert(nnz==dat.size());
+ for (size_t j = 0, end = nnz; j < end; ++j) {
+ val[j] = dat[j];
+ col[j] = colid[j];
+ }
+
+ }
+
+ for (size_t j = 0, end = rowdim ; j < end; ++j) {
+ row[j+1] += row[j] ;
+ }
+ }
+
+ template <class Field>
+ void readSprFormat(const std::string &path, const Field &f, index_t *&row, index_t *&col,
+ typename Field::Element_ptr &val, index_t &rowdim, index_t &coldim, uint64_t &nnz)
+ {
+ using namespace details_spmv;
+ std::ifstream file(path, std::ios::in);
+ std::vector<std::string> tokens;
+ std::string line;
+ // while(std::getline(file, line) && line.size()!=0);
+ std::getline(file, line);
+ std::istringstream is(line);
+ // std::cout << "line : " << line << std::endl;
+ std::copy(std::istream_iterator<std::string>(is), std::istream_iterator<std::string>(),
+ std::back_inserter<std::vector<std::string>>(tokens));
+ // std::cout << tokens.size() << std::endl;
+ // std::cout << " " << std::stoull(tokens[0]) << " " << std::stoull(tokens[1]) << std::endl;
+ rowdim = static_cast<index_t>(std::stoull(tokens[0]));
+ coldim = static_cast<index_t>(std::stoull(tokens[1]));
+ std::vector<Coo<Field>> data;
+ nnz = 0;
+ uint64_t itLine = 0;
+ while (std::getline(file, line)) {
+ tokens.resize(0);
+ std::istringstream iss(line);
+
+ std::copy(std::istream_iterator<std::string>(iss), std::istream_iterator<std::string>(),
+ std::back_inserter<std::vector<std::string>>(tokens));
+
+ // if (!(tokens[0] == "0" && tokens[1] == "0" && tokens[2] == "0")) {
+ uint64_t nElements = std::stoull(tokens[0]);
+ for (uint64_t i = 0; i < nElements; ++i) {
+ index_t c = std::stoull(tokens[2 * i + 1]) - 1;
+ typename Field::Element v;
+ int64_t vtmp = std::stoll(tokens[2 * (i + 1)]);
+ f.init(v, vtmp);
+ data.emplace_back(v, itLine, c);
+ }
+ // typename Field::Element v;
+ // f.init(v, std::stol(tokens[2]));
+ // index_t r = (index_t)(std::stoull(tokens[0])) - 1;
+ // index_t c = (index_t)(std::stoull(tokens[1])) - 1;
+ // data.emplace_back(v, r, c);
+ // }
+ ++itLine;
+ }
+ std::sort(data.begin(), data.end(), [](const Coo<Field> &a, const Coo<Field> &b) {
+ return (a.row < b.row) || ((a.row == b.row) && (a.col < b.col));
+ });
+ auto rowmax = (std::max_element(data.begin(), data.end(),
+ [](const Coo<Field> &a, const Coo<Field> &b) { return a.row < b.row; }))->row;
+ if (rowdim != rowmax + 1) {
+ std::cout << "Matrix row dimension change : " << rowdim << " -> " << rowmax << std::endl;
+ rowdim = rowmax;
+ }
+ row = fflas_new<index_t>(data.size());
+ col = fflas_new<index_t>(data.size());
+ val = fflas_new(f, data.size(), 1);
+ nnz = data.size();
+ std::cout << "nnz : " << nnz << std::endl;
+ for (size_t i = 0, end = data.size(); i < end; ++i) {
+ val[i] = data[i].val;
+ col[i] = data[i].col;
+ row[i] = data[i].row;
+ }
+ }
+
+#define DNS_BIN_VER 0
+#define mask_t uint64_t
+
+ template<class Field, class T>
+ struct readMyMachineType {
+ typedef typename Field::Element Element ;
+ typedef typename Field::Element_ptr Element_ptr ;
+ void operator() (const Field &F,
+ Element & modulo,
+ Element_ptr val,
+ std::ifstream & file,
+ const uint64_t dims,
+ const mask_t data_type,
+ const mask_t field_desc);
+ };
+
+ template<class Field>
+ struct readMyMachineType<Field,mpz_t> {
+ typedef typename Field::Element Element ;
+ typedef typename Field::Element_ptr Element_ptr ;
+ void operator() (const Field &F,
+ Element & modulo,
+ Element_ptr val,
+ std::ifstream & file,
+ const uint64_t dims,
+ const mask_t data_type,
+ const mask_t field_desc);
+ };
+
+
+
+ template<class Field, typename T>
+ void readMyMachineType<Field,T>:: operator() (const Field &F,
+ Element & modulo,
+ Element_ptr val,
+ std::ifstream & file,
+ const uint64_t dims,
+ const mask_t data_type,
+ const mask_t field_desc)
+ {
+ if (field_desc ==1) { /* modulo */
+ T modulo_read ;
+ file.read((char*) &modulo_read, sizeof(T));
+ F.init(modulo,modulo_read);
+ }
+ /* do something with field_desc and multiprec... */
+ T * data_read = fflas_new<T>(dims);
+ file.read((char*)data_read,sizeof(T));
+ /* TODO freduce ? */
+ for (size_t i = 0 ; i< dims ; ++i) {
+ F.init(val[i],data_read[i]);
+ }
+ }
+
+ template<class Field>
+ void readMyMachineType<Field,mpz_t>:: operator() (const Field &F,
+ typename Field::Element & modulo,
+ typename Field::Element_ptr val,
+ std::ifstream & file,
+ const uint64_t dims,
+ const mask_t data_type,
+ const mask_t field_desc)
+ {
+ /* need to use FILE * instead of std::ifstream */
+ throw("not implemented, use mpz_in_raw, but FILE*...");
+ }
+
+
+ template<class T>
+ std::enable_if<std::is_integral<T>::value,int>
+ getDataType()
+ {
+ return (1<<(sizeof(T)-1))+ std::is_unsigned<T>::value ;
+ }
+
+ template<class T>
+ std::enable_if<std::is_floating_point<T>::value,int>
+ getDataType()
+ {
+ return (1<<8)+std::is_same<T,double>::value ;
+ }
+
+ template<class T>
+ std::enable_if<std::is_same<T,mpz_t>::value,int>
+ getDataType()
+ {
+ return (1<<16) ;
+ }
+
+ template<class T>
+ int getDataType()
+ {
+ return -1 ;
+ }
+
+
+ template<class Field>
+ void readMachineType(const Field &F,
+ typename Field::Element & modulo,
+ typename Field::Element_ptr val,
+ std::ifstream & file,
+ const uint64_t dims,
+ const mask_t data_type,
+ const mask_t field_desc)
+ {
+ // switch(data_type) {
+ // case (1<<0) + 0 :
+ // readMyMachineType<Field,int8_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<0) + 1 :
+ // readMyMachineType<Field,uint8_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<1) + 0 :
+ // readMyMachineType<Field,int16_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<1) + 1 :
+ // readMyMachineType<Field,uint16_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<2) + 0 :
+ // readMyMachineType<Field,int32_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<2) + 0 :
+ // readMyMachineType<Field,uint32_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<3) + 0 :
+ // readMyMachineType<Field,int64_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<3) + 0 :
+ // readMyMachineType<Field,uint64_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<8) :
+ // readMyMachineType<Field,float >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<8)+1 :
+ // readMyMachineType<Field,double >() (F,val, modulo, file,dims,data_type,field_desc);
+ // case (1<<16) :
+ // readMyMachineType<Field,mpz_t >() (F,val, modulo, file,dims,data_type,field_desc);
+ // default :
+ // throw("bad data type descriptor");
+ // }
+
+ }
+
+ template<class Field>
+ void readDnsFormat(const std::string &path, const Field &F, index_t &rowdim, index_t &coldim,
+ typename Field::Element_ptr &val)
+ {
+ std::ifstream file(path, std::ifstream::binary);
+ mask_t magic, field_desc, data_type ;
+ typename Field::Element modulo ;
+
+ file.read((char*) &magic , sizeof(int64_t)) ;
+ if (magic != DNS_BIN_VER) {
+ throw("bad version");
+ }
+ file.read((char*) &field_desc, sizeof(int64_t)) ;
+ file.read((char*) &data_type , sizeof(int64_t)) ;
+ file.read((char*) &rowdim , sizeof(int64_t)) ;
+ file.read((char*) &coldim , sizeof(int64_t)) ;
+ val = fflas_new(F,rowdim*coldim,1);
+ readMachineType(F,val, modulo, file,rowdim*coldim,field_desc,data_type);
+
+ }
+
+ template<class Field>
+ void writeDnsFormat(const std::string &path, const Field &F, const index_t &rowdim, const index_t &coldim,
+ typename Field::Element_ptr A, index_t ldA)
+ {
+ typedef typename Field::Element Element ;
+ std::ofstream file(path, std::ofstream::binary);
+ mask_t field_desc = getFieldDesc(F);
+ mask_t magic = DNS_BIN_VER ;
+ mask_t data_type = getDataType<Element>(F);
+ Element modulo ;
+
+ file.write((char*) &magic , sizeof(int64_t)) ;
+ file.write((char*) &field_desc, sizeof(int64_t)) ;
+ file.write((char*) &data_type , sizeof(int64_t)) ;
+ file.write((char*) &rowdim , sizeof(int64_t)) ;
+ file.write((char*) &coldim , sizeof(int64_t)) ;
+ // writeMachineType(F,A, modulo, file,rowdim,coldim,lda,field_desc,data_type);
+
+ }
+
+}// FFLAS
+
+#endif /* __FFLASFFPACK_fflas_fflas_sparse_read_sparse_H */
+
diff --git a/fflas-ffpack/fflas/fflas_sparse/sell.h b/fflas-ffpack/fflas/fflas_sparse/sell.h
new file mode 100644
index 0000000..56ad03e
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/sell.h
@@ -0,0 +1,74 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_fspmv_sell.inl
+ * NO DOC
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_sell_H
+#define __FFLASFFPACK_fflas_sparse_sell_H
+
+namespace FFLAS { /* SELL */
+
+template <class _Field> struct Sparse<_Field, SparseMatrix_t::SELL> {
+ using Field = _Field;
+ bool delayed = false;
+ int chunk = 0;
+ index_t kmax = 0;
+ index_t m = 0;
+ index_t n = 0;
+ index_t maxrow = 0;
+ index_t sigma = 0;
+ index_t nChunks = 0;
+ uint64_t nnz = 0;
+ uint64_t nElements = 0;
+ index_t *perm = nullptr;
+ uint64_t *st = nullptr;
+ index_t *chunkSize = nullptr;
+ index_t *col = nullptr;
+ typename _Field::Element_ptr dat;
+};
+
+template <class _Field>
+struct Sparse<_Field, SparseMatrix_t::SELL_ZO>
+ : public Sparse<_Field, SparseMatrix_t::SELL> {
+ using Field = _Field;
+ typename _Field::Element cst = 1;
+};
+
+} // FFLAS
+
+#include "fflas-ffpack/fflas/fflas_sparse/sell/sell_utils.inl"
+#include "fflas-ffpack/fflas/fflas_sparse/sell/sell_spmv.inl"
+#if defined(__FFLASFFPACK_USE_OPENMP)
+#include "fflas-ffpack/fflas/fflas_sparse/sell/sell_pspmv.inl"
+#endif
+// #include "fflas-ffpack/fflas/fflas_sparse/sell/sell_spmm.inl"
+// #include "fflas-ffpack/fflas/fflas_sparse/sell/sell_pspmm.inl"
+
+#endif // __FFLASFFPACK_fflas_sparse_SELL_H
\ No newline at end of file
diff --git a/benchmark/Makefile.am b/fflas-ffpack/fflas/fflas_sparse/sell/Makefile.am
similarity index 76%
copy from benchmark/Makefile.am
copy to fflas-ffpack/fflas/fflas_sparse/sell/Makefile.am
index 31793b2..012854d 100644
--- a/benchmark/Makefile.am
+++ b/fflas-ffpack/fflas/fflas_sparse/sell/Makefile.am
@@ -1,5 +1,7 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2014 FFLAS-FFPACK
+# written by Bastien Vialla <bastien.vialla at lirmm.fr>
+#
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +21,10 @@
# ========LICENCE========
#/
-#
-# Nothing yet
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+pkgincludesubdir=$(pkgincludedir)/fflas/fflas_sparse/sell
+pkgincludesub_HEADERS= \
+ sell_spmv.inl \
+ sell_utils.inl \
+ sell_pspmv.inl
diff --git a/fflas-ffpack/fflas/fflas_sparse/sell/sell_pspmv.inl b/fflas-ffpack/fflas/fflas_sparse/sell/sell_pspmv.inl
new file mode 100644
index 0000000..20b53a6
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/sell/sell_pspmv.inl
@@ -0,0 +1,681 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_sell_pspmv_INL
+#define __FFLASFFPACK_fflas_sparse_sell_pspmv_INL
+
+#ifdef __FFLASFFPACK_USE_TBB
+#include "tbb/parallel_for.h"
+#include "tbb/blocked_range.h"
+#endif
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, col, st, dat, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.axpyin(y[i * A.chunk + k], dat[start + j * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.axpyin(y[i * A.chunk + k], dat[start + j * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+#endif
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void pfspmv_simd(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, dat, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2, dat1, dat2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ dat1 = simd::load(dat + start + j * A.chunk);
+ dat2 = simd::load(dat + start + (j + 1) * A.chunk);
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ y2 = simd::fmadd(y2, dat2, x2);
+ }
+ if (size % 2 != 0) {
+ dat1 = simd::load(dat + start + j * A.chunk);
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::add(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2, dat1, dat2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ dat1 = simd::load(dat + start + j * A.chunk);
+ dat2 = simd::load(dat + start + (j + 1) * A.chunk);
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ y2 = simd::fmadd(y2, dat2, x2);
+ }
+ if (size % 2 != 0) {
+ dat1 = simd::load(dat + start + j * A.chunk);
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::add(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+#endif // TBB
+}
+
+#endif // SIMD
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, dat, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += dat[start + j * A.chunk + k + 1] * x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[start + j * A.chunk + k + 2] * x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[start + j * A.chunk + k + 3] * x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ }
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += dat[start + j * A.chunk + k + 1] * x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[start + j * A.chunk + k + 2] * x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[start + j * A.chunk + k + 3] * x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ }
+ }
+ }
+#endif // TBB
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+template <class Field>
+inline void pfspmv_simd(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_, const int64_t kmax) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t chunk = A.chunk;
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+
+ vect_t X, Y, D, C, Q, TMP, NEGP, INVP, MIN, MAX, P;
+ double p = (typename Field::Element)F.characteristic();
+
+ P = simd::set1(p);
+ NEGP = simd::set1(-p);
+ INVP = simd::set1(1 / p);
+ MIN = simd::set1(F.minElement());
+ MAX = simd::set1(F.maxElement());
+
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, dat, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ Y = simd::load(y + i * A.chunk);
+ index_t size = chunkSize[i];
+ index_t start = st[i];
+ index_t block = size / kmax;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ D = simd::load(dat + start + j * A.chunk);
+ X = simd::gather(x, col + start + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ }
+ for (; j < size; ++j) {
+ D = simd::load(dat + start + j * A.chunk);
+ X = simd::gather(x, col + start + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ simd::store(y + i * A.chunk, Y);
+ }
+ });
+#else
+#pragma omp parallel for
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ Y = simd::load(y + i * A.chunk);
+ index_t size = chunkSize[i];
+ index_t start = st[i];
+ index_t block = size / kmax;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ D = simd::load(dat + start + j * A.chunk);
+ X = simd::gather(x, col + start + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ }
+ for (; j < size; ++j) {
+ D = simd::load(dat + start + j * A.chunk);
+ X = simd::gather(x, col + start + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ simd::store(y + i * A.chunk, Y);
+ }
+#endif // TBB
+}
+
+#endif // SIMD
+
+template <class Field>
+inline void pfspmv(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const int64_t kmax) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t chunk = A.chunk;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, &x, &y, st, col, dat, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ index_t size = chunkSize[i];
+ index_t start = st[i];
+ index_t block = size / kmax;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] +=
+ dat[start + j * A.chunk + k + 1] * x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] +=
+ dat[start + j * A.chunk + k + 2] * x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] +=
+ dat[start + j * A.chunk + k + 3] * x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < size; ++k) {
+ F.reduce(y[i * A.chunk + k]);
+ }
+ }
+ for (; j < size; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += dat[start + j * A.chunk + k + 1] * x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[start + j * A.chunk + k + 2] * x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[start + j * A.chunk + k + 3] * x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < size; ++k) {
+ F.reduce(y[i * A.chunk + k]);
+ }
+ }
+ });
+
+#else
+#pragma omp parallel for
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ index_t size = chunkSize[i];
+ index_t start = st[i];
+ index_t block = size / kmax;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += dat[start + j * A.chunk + k + 1] * x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[start + j * A.chunk + k + 2] * x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[start + j * A.chunk + k + 3] * x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < size; ++k) {
+ F.reduce(y[i * A.chunk + k]);
+ }
+ }
+ for (; j < size; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += dat[start + j * A.chunk + k + 1] * x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[start + j * A.chunk + k + 2] * x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[start + j * A.chunk + k + 3] * x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < size; ++k) {
+ F.reduce(y[i * A.chunk + k]);
+ }
+ }
+#endif // TBB
+}
+
+template <class Field>
+inline void pfspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.addin(y[i * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.addin(y[i * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+#endif
+}
+
+template <class Field>
+inline void pfspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.subin(y[i * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.subin(y[i * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+#endif
+}
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void pfspmv_one_simd(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y2 = simd::add(y2, x2);
+ }
+ if (size % 2 != 0) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::add(y1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::add(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y2 = simd::add(y2, x2);
+ }
+ if (size % 2 != 0) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::add(y1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::add(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+#endif // TBB
+}
+
+template <class Field>
+inline void pfspmv_mone_simd(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y2 = simd::add(y2, x2);
+ }
+ if (size % 2 != 0) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::add(y1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::sub(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y2 = simd::add(y2, x2);
+ }
+ if (size % 2 != 0) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::add(y1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::sub(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+#endif // TBB
+}
+
+#endif // SIMD
+
+template <class Field>
+inline void pfspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; j++) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += x[col[start + j * A.chunk + k]];
+ }
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; j++) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += x[col[start + j * A.chunk + k]];
+ }
+ }
+ }
+#endif // TBB
+}
+
+template <class Field>
+inline void pfspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+#ifdef __FFLASFFPACK_USE_TBB
+ tbb::parallel_for(tbb::blocked_range<index_t>(0, A.nbChunks, 2),
+ [&F, &A, x, y, st, col, chunkSize](const tbb::blocked_range<index_t> &r) {
+ for (index_t i = r.begin(), end = r.end(); i < end; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; j++) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] -= x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] -= x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] -= x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] -= x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] -= x[col[start + j * A.chunk + k]];
+ }
+ }
+ }
+ });
+#else
+#pragma omp parallel for
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; j++) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] -= x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] -= x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] -= x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] -= x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] -= x[col[start + j * A.chunk + k]];
+ }
+ }
+ }
+#endif // TBB
+}
+
+} // SELL_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_SELL_spmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/sell/sell_spmv.inl b/fflas-ffpack/fflas/fflas_sparse/sell/sell_spmv.inl
new file mode 100644
index 0000000..0f529ee
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/sell/sell_spmv.inl
@@ -0,0 +1,396 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_sell_spmv_INL
+#define __FFLASFFPACK_fflas_sparse_sell_spmv_INL
+
+// #define SELL_DEBUG 1
+
+namespace FFLAS {
+namespace sparse_details_impl {
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::GenericTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.axpyin(y[i * A.chunk + k], dat[start + j * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+
+template <class Field>
+inline void fspmv_simd(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2, dat1, dat2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ dat1 = simd::load(dat + start + j * A.chunk);
+ dat2 = simd::load(dat + start + (j + 1) * A.chunk);
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ y2 = simd::fmadd(y2, dat2, x2);
+ }
+ if (size % 2 != 0) {
+ dat1 = simd::load(dat + start + j * A.chunk);
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::fmadd(y1, dat1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::add(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+}
+
+// #endif
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, FieldCategories::UnparametricTag) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ y[i * A.chunk + k + 1] += dat[start + j * A.chunk + k + 1] * x[col[start + j * A.chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[start + j * A.chunk + k + 2] * x[col[start + j * A.chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[start + j * A.chunk + k + 3] * x[col[start + j * A.chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * A.chunk + k] * x[col[start + j * A.chunk + k]];
+ }
+ }
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+template <class Field>
+inline void fspmv_simd(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_, const uint64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t chunk = A.chunk;
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+
+ vect_t X, Y, D, C, Q, TMP, NEGP, INVP, MIN, MAX, P;
+ double p = (typename Field::Element)F.characteristic();
+
+ P = simd::set1(p);
+ NEGP = simd::set1(-p);
+ INVP = simd::set1(1 / p);
+ MIN = simd::set1(F.minElement());
+ MAX = simd::set1(F.maxElement());
+
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ Y = simd::load(y + i * chunk);
+ index_t size = chunkSize[i];
+ index_t start = st[i];
+ index_t block = size / kmax;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ D = simd::load(dat + start + j * A.chunk);
+ X = simd::gather(x, col + start + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ }
+ for (; j < size; ++j) {
+ D = simd::load(dat + start + j * A.chunk);
+ X = simd::gather(x, col + start + j * A.chunk);
+ Y = simd::fmadd(Y, D, X);
+ }
+ simd::mod(Y, P, INVP, NEGP, MIN, MAX, Q, TMP);
+ simd::store(y + i * A.chunk, Y);
+ }
+}
+// #endif
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::SELL> &A, typename Field::ConstElement_ptr x_,
+ typename Field::Element_ptr y_, const uint64_t kmax) {
+ assume_aligned(dat, A.dat, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ index_t chunk = A.chunk;
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ index_t j = 0;
+ index_t j_loc = 0;
+ index_t size = chunkSize[i];
+ index_t start = st[i];
+ index_t block = size / kmax;
+ for (size_t l = 0; l < block; ++l) {
+ j_loc += kmax;
+ for (; j < j_loc; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * chunk + k] * x[col[start + j * chunk + k]];
+ y[i * A.chunk + k + 1] += dat[start + j * chunk + k + 1] * x[col[start + j * chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[start + j * chunk + k + 2] * x[col[start + j * chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[start + j * chunk + k + 3] * x[col[start + j * chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * chunk + k] * x[col[start + j * chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < size; ++k) {
+ F.reduce(y[i * A.chunk + k]);
+ }
+ }
+ for (; j < size; ++j) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += dat[start + j * chunk + k] * x[col[start + j * chunk + k]];
+ y[i * A.chunk + k + 1] += dat[start + j * chunk + k + 1] * x[col[start + j * chunk + k + 1]];
+ y[i * A.chunk + k + 2] += dat[start + j * chunk + k + 2] * x[col[start + j * chunk + k + 2]];
+ y[i * A.chunk + k + 3] += dat[start + j * chunk + k + 3] * x[col[start + j * chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += dat[start + j * chunk + k] * x[col[start + j * chunk + k]];
+ }
+ }
+ for (size_t k = 0; k < size; ++k) {
+ F.reduce(y[i * A.chunk + k]);
+ }
+ }
+}
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.addin(y[i * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::GenericTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ index_t j = 0;
+ for (; j < size; j++) {
+ for (index_t k = 0; k < A.chunk; ++k) {
+ F.subin(y[i * A.chunk + k], x[col[start + j * A.chunk + k]]);
+ }
+ }
+ }
+}
+
+// #ifdef __FFLASFFPACK_USE_SIMD
+template <class Field>
+inline void fspmv_one_simd(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y2 = simd::add(y2, x2);
+ }
+ if (size % 2 != 0) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::add(y1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::add(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+}
+
+template <class Field>
+inline void fspmv_mone_simd(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+
+ using simd = Simd<typename Field::Element>;
+ using vect_t = typename simd::vect_t;
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ vect_t x1, x2, y1, y2;
+ y1 = simd::zero();
+ y2 = simd::zero();
+ index_t j = 0;
+ for (; j < ROUND_DOWN(size, 2); j += 2) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ x2 = simd::gather(x, col + start + (j + 1) * A.chunk);
+ y1 = simd::add(y1, x1);
+ y2 = simd::add(y2, x2);
+ }
+ if (size % 2 != 0) {
+ x1 = simd::gather(x, col + start + j * A.chunk);
+ y1 = simd::add(y1, x1);
+ }
+ simd::store(y + i * A.chunk, simd::sub(simd::load(y + i * A.chunk), simd::add(y1, y2)));
+ }
+}
+
+// #endif
+
+template <class Field>
+inline void fspmv_one(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ auto chunk = A.chunk;
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; j++) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] += x[col[start + j * chunk + k]];
+ y[i * A.chunk + k + 1] += x[col[start + j * chunk + k + 1]];
+ y[i * A.chunk + k + 2] += x[col[start + j * chunk + k + 2]];
+ y[i * A.chunk + k + 3] += x[col[start + j * chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] += x[col[start + j * chunk + k]];
+ }
+ }
+ }
+}
+
+template <class Field>
+inline void fspmv_mone(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A,
+ typename Field::ConstElement_ptr x_, typename Field::Element_ptr y_,
+ FieldCategories::UnparametricTag) {
+ assume_aligned(col, A.col, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(st, A.st, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(chunkSize, A.chunkSize, (size_t)Alignment::CACHE_LINE);
+ assume_aligned(x, x_, (size_t)Alignment::DEFAULT);
+ assume_aligned(y, y_, (size_t)Alignment::DEFAULT);
+ auto chunk = A.chunk;
+ for (index_t i = 0; i < A.nChunks; ++i) {
+ index_t start = st[i];
+ index_t size = chunkSize[i];
+ for (index_t j = 0; j < size; j++) {
+ size_t k = 0;
+ for (; k < ROUND_DOWN(A.chunk, 4); k += 4) {
+ y[i * A.chunk + k] -= x[col[start + j * chunk + k]];
+ y[i * A.chunk + k + 1] -= x[col[start + j * chunk + k + 1]];
+ y[i * A.chunk + k + 2] -= x[col[start + j * chunk + k + 2]];
+ y[i * A.chunk + k + 3] -= x[col[start + j * chunk + k + 3]];
+ }
+ for (; k < size; ++k) {
+ y[i * A.chunk + k] -= x[col[start + j * chunk + k]];
+ }
+ }
+ }
+}
+
+} // SELL_details
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_fflas_SELL_spmv_INL
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/sell/sell_utils.inl b/fflas-ffpack/fflas/fflas_sparse/sell/sell_utils.inl
new file mode 100644
index 0000000..aef50c4
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/sell/sell_utils.inl
@@ -0,0 +1,281 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_sparse_sell_utils_INL
+#define __FFLASFFPACK_fflas_sparse_sell_utils_INL
+
+namespace FFLAS {
+
+template <class Field>
+inline void fspmv(const Field &F, const Sparse<Field, SparseMatrix_t::SELL_ZO> &A, typename Field::ConstElement_ptr x,
+ typename Field::Element_ptr y, FieldCategories::ModularTag) {
+ fspmv(F, A, x, y, FieldCategories::UnparametricTag());
+ freduce(F, A.m, y, 1);
+}
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::SELL> &A) {
+ fflas_delete(A.dat);
+ fflas_delete(A.col);
+ fflas_delete(A.st);
+ fflas_delete(A.chunkSize);
+}
+
+template <class Field> inline void sparse_delete(const Sparse<Field, SparseMatrix_t::SELL_ZO> &A) {
+ fflas_delete(A.col);
+ fflas_delete(A.st);
+ fflas_delete(A.chunkSize);
+}
+
+namespace sell_details {
+
+struct Info {
+ uint64_t size = 0;
+ uint64_t perm = 0;
+ uint64_t begin = 0;
+
+ Info(uint64_t it, uint64_t s, uint64_t p) : size(s), perm(p), begin(it) {}
+ Info() = default;
+ Info(const Info &) = default;
+ Info(Info &&) = default;
+
+ Info &operator=(const Info &) = default;
+ Info &operator=(Info &&) = default;
+};
+
+template <class ValT, class IdxT> struct Coo {
+ using Self = Coo<ValT, IdxT>;
+
+ ValT val = 0;
+ IdxT row = 0;
+ IdxT col = 0;
+
+ Coo(ValT v, IdxT r, IdxT c) : val(v), row(r), col(c) {}
+ Coo() = default;
+ Coo(const Self &) = default;
+ Coo(Self &&) = default;
+
+ Self &operator=(const Self &) = default;
+ Self &operator=(Self &&) = default;
+};
+}
+
+template <class Field> inline void sparse_print(const Sparse<Field, SparseMatrix_t::SELL> &A) {
+ uint64_t it = 0;
+ for (size_t i = 0; i < A.nChunks; ++i) {
+ for (size_t k = 0; k < A.chunk; ++k) {
+ std::cout << i *A.chunk + k << " : ";
+ for (size_t j = 0; j < A.chunkSize[i]; ++j) {
+ std::cout << A.dat[it + j * A.chunk + k] << " ";
+ }
+ std::cout << std::endl;
+ }
+ it += A.chunkSize[i] * A.chunk;
+ }
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::SELL> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz,
+ uint64_t sigma = 0) {
+ using namespace sell_details;
+ using coo = Coo<typename Field::Element, IndexT>;
+ if (!sigma)
+ sigma = rowdim;
+ A.kmax = Protected::DotProdBoundClassic(F, F.one);
+ A.m = rowdim;
+ A.n = coldim;
+ A.nnz = nnz;
+#ifdef __FFLASFFPACK_USE_SIMD
+ using simd = Simd<typename Field::Element>;
+ A.chunk = simd::vect_size;
+#else
+ A.chunk = 8;
+#endif
+ index_t m = (A.m % A.chunk == 0) ? A.m : ROUND_DOWN(A.m, A.chunk) + A.chunk;
+ A.nChunks = (m / A.chunk);
+
+ std::vector<coo> data;
+ std::vector<Info> infos(A.nChunks * A.chunk);
+
+ for (uint64_t i = 0; i < nnz; ++i) {
+ data.emplace_back(dat[i], row[i], col[i]);
+ }
+
+ IndexT currow = row[0];
+
+ infos[currow].begin = 0;
+ for (uint64_t i = 0; i < nnz; ++i) {
+ if (row[i] != currow) {
+ currow = row[i];
+ infos[currow].begin = i;
+ }
+ infos[row[i]].size++;
+ }
+
+ A.maxrow = (std::max_element(infos.begin(), infos.end(),
+ [](const Info &a, const Info &b) { return a.size >= b.size; }))->size;
+
+ // cout << "maxrow : " << A.maxrow << endl;
+
+ if (A.maxrow < A.kmax)
+ A.delayed = true;
+
+ for (uint64_t i = 0; i < rowdim; ++i) {
+ infos[i].perm = i;
+ }
+
+#ifdef SELL_DEBUG
+ for (auto &x : infos) {
+ cout << x.size << " ";
+ }
+ std::cout << std::endl;
+#endif
+
+ uint64_t it = 0;
+ for (; it < ROUND_DOWN(rowdim, sigma); it += sigma) {
+ std::sort(infos.begin() + it, infos.begin() + it + sigma,
+ [](const Info &a, const Info &b) { return a.size >= b.size; });
+ }
+ if (it != rowdim) {
+ std::sort(infos.begin() + it, infos.end(), [](Info a, Info b) { return a.size >= b.size; });
+ }
+
+ // cout << "sorted : " << std::is_sorted(infos.begin(), infos.end(), [](Info
+ // a, Info b){
+ // return a.size >= b.size;
+ // }) << endl;
+
+ for (size_t i = 0; i < infos.size(); ++i) {
+ if (infos[i].begin > nnz)
+ std::cout << "ERROR sort " << i << " size : " << infos[i].size << " begin : " << infos[i].begin
+ << " perm : " << infos[i].perm << std::endl;
+ }
+
+#ifdef SELL_DEBUG
+ for (auto &x : infos) {
+ cout << x.size << " ";
+ }
+ std::cout << std::endl;
+#endif
+
+ A.perm = fflas_new<index_t>(rowdim, Alignment::CACHE_LINE);
+
+ // cout << "perm : ";
+ for (uint64_t i = 0; i < rowdim; ++i) {
+ // cout << "(" << i << " , " << infos[i].perm << ") ";
+ A.perm[infos[i].perm] = i;
+ }
+
+ // for(size_t i = 0 ; i < A.m ; ++i)
+ // cout << A.perm[i] << " ";
+ // cout << endl;
+ // cout << endl;
+
+ // add info if rowdim%chunk != 0, with empty infos (size = 0, begin = 0)
+ // infos.resize(A.nChunks*A.chunk);
+
+ // for(auto & x:infos)
+ // if(x.begin > nnz)
+ // cout << "ERROR resize" << endl;
+
+ A.chunkSize = fflas_new<index_t>(A.nChunks, Alignment::CACHE_LINE);
+
+ for (uint64_t i = 0; i < A.nChunks; ++i)
+ A.chunkSize[i] = 0;
+
+ for (uint64_t i = 0; i < A.nChunks; ++i) {
+ for (uint64_t j = 0; j < A.chunk; ++j) {
+ if (infos[i * A.chunk + j].size >= A.chunkSize[i])
+ A.chunkSize[i] = infos[i * A.chunk + j].size;
+ }
+ }
+
+#ifdef SELL_DEBUG
+ for (uint64_t i = 0; i < A.nChunks; ++i)
+ cout << "chunk " << i << " : " << A.chunkSize[i] << endl;
+ ;
+#endif
+ uint64_t sum = 0;
+ for (uint64_t i = 0; i < A.nChunks; ++i)
+ sum += A.chunkSize[i];
+#ifdef SELL_DEBUG
+ cout << "sum : " << sum << " chunk : " << A.chunk << endl;
+#endif
+ A.col = fflas_new<index_t>(sum * A.chunk, Alignment::CACHE_LINE);
+ A.dat = fflas_new(F, sum * A.chunk, 1, Alignment::CACHE_LINE);
+ A.nElements = sum * A.chunk;
+
+ for (uint64_t i = 0; i < sum * A.chunk; ++i) {
+ A.col[i] = 0;
+ F.assign(A.dat[i], F.zero);
+ }
+
+ it = 0;
+ for (uint64_t i = 0; i < A.nChunks; ++i) {
+ for (uint64_t k = 0; k < A.chunk; ++k) {
+ uint64_t start = infos[i * A.chunk + k].begin;
+#ifdef SELL_DEBUG
+ cout << it << " " << start << " " << infos[i * A.chunk + k].size << endl;
+ cout << " ";
+#endif
+ for (uint64_t j = 0; j < infos[i * A.chunk + k].size; ++j) {
+ if (it + k + j * A.chunk >= sum * A.chunk)
+ std::cout << "error : " << it + k + j *A.chunk << " " << sum *A.chunk << std::endl;
+ A.dat[it + k + j * A.chunk] = data[start + j].val;
+ A.col[it + k + j * A.chunk] = data[start + j].col;
+#ifdef SELL_DEBUG
+ cout << data[start + j].val << " ";
+#endif
+ }
+#ifdef SELL_DEBUG
+ cout << endl;
+#endif
+ }
+ it += A.chunkSize[i] * A.chunk;
+ }
+ A.st = fflas_new<uint64_t>(A.nChunks, Alignment::CACHE_LINE);
+ A.st[0] = 0;
+ for (uint64_t i = 1; i < A.nChunks; ++i) {
+ A.st[i] = A.chunkSize[i - 1] * A.chunk;
+ }
+ for (uint64_t i = 1; i < A.nChunks; ++i) {
+ A.st[i] += A.st[i - 1];
+ }
+#ifdef SELL_DEBUG
+ cout << "st : ";
+ for (uint64_t i = 0; i < A.nChunks; ++i)
+ cout << A.st[i] << " ";
+ cout << endl;
+#endif
+}
+
+template <class Field, class IndexT>
+inline void sparse_init(const Field &F, Sparse<Field, SparseMatrix_t::SELL_ZO> &A, const IndexT *row, const IndexT *col,
+ typename Field::ConstElement_ptr dat, uint64_t rowdim, uint64_t coldim, uint64_t nnz) {}
+} // FFLAS
+#endif
\ No newline at end of file
diff --git a/fflas-ffpack/fflas/fflas_sparse/sparse_matrix_traits.h b/fflas-ffpack/fflas/fflas_sparse/sparse_matrix_traits.h
new file mode 100644
index 0000000..90b16d3
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/sparse_matrix_traits.h
@@ -0,0 +1,338 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_SPARSEMATRIX_TRAITS_H
+#define __FFLASFFPACK_SPARSEMATRIX_TRAITS_H
+
+#include <type_traits>
+
+namespace FFLAS {
+
+/****************************************************************************************************************
+ *
+ * SparseMatrix Traits
+ *
+ ****************************************************************************************************************/
+
+template <class Field, class M> struct isSparseMatrix : public std::false_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::CSR>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::CSR_ZO>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::COO>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::COO_ZO>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::ELL>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::ELL_ZO>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::SELL>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::SELL_ZO>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::ELL_simd>> : public std::true_type {};
+
+template <class Field>
+struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::ELL_simd_ZO>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::CSR_HYB>> : public std::true_type {};
+
+template <class Field> struct isSparseMatrix<Field, Sparse<Field, SparseMatrix_t::HYB_ZO>> : public std::true_type {};
+
+
+template <class F, class M> struct isZOSparseMatrix : public std::false_type {};
+
+template <class Field> struct isZOSparseMatrix<Field, Sparse<Field, SparseMatrix_t::CSR_ZO>> : public std::true_type {};
+
+template <class Field> struct isZOSparseMatrix<Field, Sparse<Field, SparseMatrix_t::COO_ZO>> : public std::true_type {};
+
+template <class Field> struct isZOSparseMatrix<Field, Sparse<Field, SparseMatrix_t::ELL_ZO>> : public std::true_type {};
+
+template <class Field>
+struct isZOSparseMatrix<Field, Sparse<Field, SparseMatrix_t::SELL_ZO>> : public std::true_type {};
+
+template <class Field>
+struct isZOSparseMatrix<Field, Sparse<Field, SparseMatrix_t::ELL_simd_ZO>> : public std::true_type {};
+
+using ZOSparseMatrix = std::true_type;
+using NotZOSparseMatrix = std::false_type;
+
+
+template<class F, class M> struct isSparseMatrixSimdFormat : public std::false_type {};
+
+#ifdef __FFLASFFPACK_USE_SIMD
+
+template<class Field> struct isSparseMatrixSimdFormat<Field, Sparse<Field, SparseMatrix_t::SELL>> : public support_simd<typename Field::Element>::type {};
+
+template<class Field> struct isSparseMatrixSimdFormat<Field, Sparse<Field, SparseMatrix_t::ELL_simd>> : public support_simd<typename Field::Element>::type {};
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+using SimdSparseMatrix = std::true_type;
+using NoSimdSparseMatrix = std::false_type;
+
+
+template<class F, class M> struct isSparseMatrixMKLFormat : public std::false_type {};
+
+#ifdef __FFLASFFPACK_HAVE_MKL
+
+template<class Field> struct isSparseMatrixMKLFormat<Field, Sparse<Field, SparseMatrix_t::CSR>> : public std::true_type {};
+template<class Field> struct isSparseMatrixMKLFormat<Field, Sparse<Field, SparseMatrix_t::CSC>> : public std::true_type {};
+
+#endif // __FFLASFFPACK_HAVE_MKL
+
+using MKLSparseMatrixFormat = std::true_type;
+using NotMKLSparseMatrixFormat = std::false_type;
+
+/********************************************************************************************************
+ *
+ * Traits to test if operator +, -, *, =, +=, -=, *= exists
+ *
+ ********************************************************************************************************/
+
+#if 0
+#define function_to_functor(X) \
+ struct tfn_##X { \
+ template <typename... Args> \
+ auto operator()(Args&&... args) const \
+ ->decltype(X(std::forward<Args>(args)...)){ \
+ return X(std::forward<Args>(args)...); } }
+#endif
+
+struct tfn_plus {
+ template <typename... Args>
+ auto operator()(Args&&... args) const ->decltype(operator+(std::forward<Args>(args)...))
+ {
+ return operator+(std::forward<Args>(args)...);
+ }
+};
+
+struct tfn_mul {
+ template <typename... Args>
+ auto operator()(Args&&... args) const ->decltype(operator+(std::forward<Args>(args)...))
+ {
+ return operator*(std::forward<Args>(args)...);
+ }
+};
+
+struct tfn_mul_eq {
+ template <typename... Args>
+ auto operator()(Args&&... args) const ->decltype(operator+(std::forward<Args>(args)...))
+ {
+ return operator*=(std::forward<Args>(args)...);
+ }
+};
+
+struct tfn_minus {
+ template <typename... Args>
+ auto operator()(Args&&... args) const ->decltype(operator+(std::forward<Args>(args)...))
+ {
+ return operator-(std::forward<Args>(args)...);
+ }
+};
+
+struct tfn_plus_eq {
+ template <typename... Args>
+ auto operator()(Args&&... args) const ->decltype(operator+(std::forward<Args>(args)...))
+ {
+ return operator+=(std::forward<Args>(args)...);
+ }
+};
+
+struct tfn_minus_eq {
+ template <typename... Args>
+ auto operator()(Args&&... args) const ->decltype(operator+(std::forward<Args>(args)...))
+ {
+ return operator+=(std::forward<Args>(args)...);
+ }
+};
+
+template<typename C>
+struct has_plus_impl {
+private:
+ // Test for non member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<typename std::result_of<tfn_plus(const T&, const T&)>::type, T>::type;
+
+ // Test for member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<decltype(std::declval<T>().operator+(std::declval<T>())), T>::type;
+
+ template<typename>
+ static constexpr std::false_type check(...);
+
+ typedef decltype(check<C>(0)) type;
+
+public:
+ static constexpr bool value = type::value;
+};
+
+template<typename C>
+struct has_mul_impl {
+private:
+ // Test for non member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<typename std::result_of<tfn_mul(const T&, const T&)>::type, T>::type;
+
+ // Test for member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<decltype(std::declval<T>().operator*(std::declval<T>())), T>::type;
+
+ template<typename>
+ static constexpr std::false_type check(...);
+
+ typedef decltype(check<C>(0)) type;
+
+public:
+ static constexpr bool value = type::value;
+};
+
+template<typename C>
+struct has_mul_eq_impl {
+private:
+ // Test for non member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<typename std::result_of<tfn_mul_eq(const T&, const T&)>::type, T>::type;
+
+ // Test for member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<decltype(std::declval<T>().operator*=(std::declval<T>())), typename std::add_lvalue_reference<T>::type>::type;
+
+ template<typename>
+ static constexpr std::false_type check(...);
+
+ typedef decltype(check<C>(0)) type;
+
+public:
+ static constexpr bool value = type::value;
+};
+
+template<typename C>
+struct has_plus_eq_impl {
+private:
+ // Test for non member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<typename std::result_of<tfn_plus_eq(const T&, const T&)>::type, T>::type;
+
+ // Test for member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<decltype(std::declval<T>().operator+=(std::declval<T>())), typename std::add_lvalue_reference<T>::type>::type;
+
+ template<typename>
+ static constexpr std::false_type check(...);
+
+ typedef decltype(check<C>(0)) type;
+
+public:
+ static constexpr bool value = type::value;
+};
+
+template<typename C>
+struct has_minus_eq_impl {
+private:
+ // Test for non member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<typename std::result_of<tfn_minus_eq(const T&, const T&)>::type, T>::type;
+
+ // Test for member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<decltype(std::declval<T>().operator-=(std::declval<T>())), typename std::add_lvalue_reference<T>::type>::type;
+
+ template<typename>
+ static constexpr std::false_type check(...);
+
+ typedef decltype(check<C>(0)) type;
+
+public:
+ static constexpr bool value = type::value;
+};
+
+template<typename C>
+struct has_minus_impl {
+private:
+ // Test for non member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<typename std::result_of<tfn_minus(const T&, const T&)>::type, T>::type;
+
+ // Test for member operator
+ template<typename T>
+ static constexpr auto check(T*)
+ -> typename std::is_same<decltype(std::declval<T>().operator-(std::declval<T>())), T>::type;
+
+ template<typename>
+ static constexpr std::false_type check(...);
+
+ typedef decltype(check<C>(0)) type;
+
+public:
+ static constexpr bool value = type::value;
+};
+
+template<class T>
+using has_plus = typename std::conditional<std::is_arithmetic<T>::value, std::true_type, has_plus_impl<T>>::type;
+
+template<class T>
+using has_minus = typename std::conditional<std::is_arithmetic<T>::value, std::true_type, has_minus_impl<T>>::type;
+
+template<class T>
+using has_equal = typename std::conditional<std::is_arithmetic<T>::value, std::true_type, std::is_copy_assignable<T>>::type;
+
+template<class T>
+using has_plus_eq = typename std::conditional<std::is_arithmetic<T>::value, std::true_type, has_plus_eq_impl<T>>::type;
+
+template<class T>
+using has_minus_eq = typename std::conditional<std::is_arithmetic<T>::value, std::true_type, has_minus_eq_impl<T>>::type;
+
+template<class T>
+using has_mul = typename std::conditional<std::is_arithmetic<T>::value, std::true_type, has_mul_impl<T>>::type;
+
+template<class T>
+using has_mul_eq = typename std::conditional<std::is_arithmetic<T>::value, std::true_type, has_mul_eq_impl<T>>::type;
+
+template<class T>
+struct has_operation{
+ static constexpr bool value = (has_plus<T>::value && has_minus<T>::value && has_equal<T>::value &&
+ has_plus_eq<T>::value && has_minus_eq<T>::value && has_mul<T>::value && has_mul_eq<T>::value);
+};
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_SPARSEMATRIX_TRAITS_H
diff --git a/fflas-ffpack/fflas/fflas_sparse/utils.h b/fflas-ffpack/fflas/fflas_sparse/utils.h
new file mode 100644
index 0000000..a667eca
--- /dev/null
+++ b/fflas-ffpack/fflas/fflas_sparse/utils.h
@@ -0,0 +1,130 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas/fflas_sparse.h
+*/
+
+#ifndef __FFLASFFPACK_fflas_fflas_sparse_utils_H
+#define __FFLASFFPACK_fflas_fflas_sparse_utils_H
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
+namespace FFLAS{
+
+struct StatsMatrix {
+ uint64_t rowdim = 0;
+ uint64_t coldim = 0;
+ uint64_t nOnes = 0;
+ uint64_t nMOnes = 0;
+ uint64_t nOthers = 0;
+ uint64_t nnz = 0;
+ uint64_t maxRow = 0;
+ uint64_t minRow = 0;
+ uint64_t averageRow = 0;
+ uint64_t deviationRow = 0;
+ uint64_t maxCol = 0;
+ uint64_t minCol = 0;
+ uint64_t averageCol = 0;
+ uint64_t deviationCol = 0;
+ uint64_t minColDifference = 0;
+ uint64_t maxColDifference = 0;
+ uint64_t averageColDifference = 0;
+ uint64_t deviationColDifference = 0;
+ uint64_t minRowDifference = 0;
+ uint64_t maxRowDifference = 0;
+ uint64_t averageRowDifference = 0;
+ uint64_t deviationRowDifference = 0;
+ uint64_t nDenseRows = 0;
+ uint64_t nDenseCols = 0;
+ uint64_t nEmptyRows = 0;
+ uint64_t nEmptyCols = 0;
+ uint64_t nEmptyColsEnd = 0;
+ std::vector<uint64_t> denseRows;
+ std::vector<uint64_t> denseCols;
+};
+
+template <class It> double computeDeviation(It begin, It end) {
+ using T = typename std::decay<decltype(*begin)>::type;
+ T average = 0;
+ average = std::accumulate(begin, end, 0) / (end - begin);
+ T sum = 0;
+ for (It i = begin; i != end; ++i) {
+ sum += ((*(i)) - average) * ((*(i)) - average);
+ }
+ return std::sqrt(sum / (end - begin));
+}
+
+template <class Field>
+StatsMatrix getStat(const Field &F, const index_t *row, const index_t *col, typename Field::ConstElement_ptr val,
+ uint64_t rowdim, uint64_t coldim, uint64_t nnz) {
+ StatsMatrix stats;
+ stats.nnz = nnz;
+ stats.rowdim = rowdim;
+ stats.coldim = coldim;
+ std::vector<int64_t> rows(rowdim+1);
+ std::vector<int64_t> cols(coldim);
+ std::fill(rows.begin(), rows.end(), 0);
+ std::fill(cols.begin(), cols.end(), 0);
+ for (uint64_t i = 0; i < nnz; ++i) {
+ cols[col[i]]++;
+ if (F.isOne(val[i])) {
+ stats.nOnes++;
+ } else if (F.isMOne(val[i])) {
+ stats.nMOnes++;
+ } else {
+ stats.nOthers++;
+ }
+ }
+ rows[0] = row[0];
+ for(size_t i = 1 ; i < rowdim+1 ; ++i){
+ rows[i] = row[i] - row[i-1];
+ }
+ stats.nEmptyRows = std::count(rows.begin(), rows.end(), 0);
+ stats.nEmptyCols = std::count(cols.begin(), cols.end(), 0);
+ auto rowMinMax = std::minmax_element(rows.begin(), rows.end());
+ auto colMinMax = std::minmax_element(cols.begin(), cols.end());
+ stats.minRow = (*(rowMinMax.first));
+ stats.maxRow = (*(rowMinMax.second));
+ stats.minCol = (*(colMinMax.first));
+ stats.maxCol = (*(colMinMax.second));
+ stats.averageRow = std::accumulate(rows.begin(), rows.end(), 0) / rowdim;
+ stats.averageCol = std::accumulate(cols.begin(), cols.end(), 0) / coldim;
+ stats.deviationRow = (uint64_t)computeDeviation(rows.begin(), rows.end());
+ stats.deviationCol = (uint64_t)computeDeviation(cols.begin(), cols.end());
+ stats.nDenseRows = std::count_if(rows.begin(), rows.begin(),
+ [rowdim](uint64_t &x) { return x >= DENSE_THRESHOLD * rowdim; });
+ stats.nDenseCols = std::count_if(cols.begin(), cols.begin(),
+ [coldim](uint64_t &x) { return x >= DENSE_THRESHOLD * coldim; });
+ return stats;
+}
+
+}
+
+#endif
diff --git a/fflas-ffpack/ffpack/Makefile.am b/fflas-ffpack/ffpack/Makefile.am
index 33b74ff..fd18112 100644
--- a/fflas-ffpack/ffpack/Makefile.am
+++ b/fflas-ffpack/ffpack/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -24,6 +24,8 @@
pkgincludesubdir=$(pkgincludedir)/ffpack
+multiprecision= ffpack_ludivine_mp.inl ffpack_pluq_mp.inl
+
pkgincludesub_HEADERS= \
ffpack_charpoly_danilevski.inl \
ffpack_charpoly.inl \
@@ -34,7 +36,19 @@ pkgincludesub_HEADERS= \
ffpack_charpoly_kglu.inl \
ffpack_echelonforms.inl \
ffpack_ludivine.inl \
+ ffpack_pluq.inl \
+ ffpack_ppluq.inl \
ffpack_frobenius.inl \
ffpack_minpoly_construct.inl \
- ffpack_minpoly.inl
+ ffpack_minpoly.inl \
+ ffpack.inl\
+ ffpack_invert.inl\
+ ffpack_fgesv.inl\
+ ffpack_fgetrs.inl\
+ ffpack_permutation.inl\
+ ffpack_ftrtr.inl\
+ ffpack_rankprofiles.inl\
+ $(multiprecision)
+
+EXTRA_DIST=ffpack.doxy
diff --git a/fflas-ffpack/ffpack/Makefile.in b/fflas-ffpack/ffpack/Makefile.in
deleted file mode 100644
index 1d25f2e..0000000
--- a/fflas-ffpack/ffpack/Makefile.in
+++ /dev/null
@@ -1,559 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# adapted from LinBox configuration
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = fflas-ffpack/ffpack
-DIST_COMMON = $(pkgincludesub_HEADERS) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__uninstall_files_from_dir = { \
- test -z "$$files" \
- || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
- || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
- $(am__cd) "$$dir" && rm -f $$files; }; \
- }
-am__installdirs = "$(DESTDIR)$(pkgincludesubdir)"
-HEADERS = $(pkgincludesub_HEADERS)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-pkgincludesubdir = $(pkgincludedir)/ffpack
-pkgincludesub_HEADERS = \
- ffpack_charpoly_danilevski.inl \
- ffpack_charpoly.inl \
- ffpack_charpoly_kgfastgeneralized.inl \
- ffpack.h \
- ffpack_charpoly_kgfast.inl \
- ffpack_krylovelim.inl \
- ffpack_charpoly_kglu.inl \
- ffpack_echelonforms.inl \
- ffpack_ludivine.inl \
- ffpack_frobenius.inl \
- ffpack_minpoly_construct.inl \
- ffpack_minpoly.inl
-
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps fflas-ffpack/ffpack/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps fflas-ffpack/ffpack/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-install-pkgincludesubHEADERS: $(pkgincludesub_HEADERS)
- @$(NORMAL_INSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(pkgincludesubdir)" || exit 1; \
- fi; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; \
- done | $(am__base_list) | \
- while read files; do \
- echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludesubdir)" || exit $$?; \
- done
-
-uninstall-pkgincludesubHEADERS:
- @$(NORMAL_UNINSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- dir='$(DESTDIR)$(pkgincludesubdir)'; $(am__uninstall_files_from_dir)
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(HEADERS)
-installdirs:
- for dir in "$(DESTDIR)$(pkgincludesubdir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am: install-pkgincludesubHEADERS
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-pkgincludesubHEADERS
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
- clean-libtool ctags distclean distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-pkgincludesubHEADERS install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags uninstall uninstall-am uninstall-pkgincludesubHEADERS
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/fflas-ffpack/ffpack/ffpack.doxy b/fflas-ffpack/ffpack/ffpack.doxy
new file mode 100644
index 0000000..b7f8428
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack.doxy
@@ -0,0 +1,31 @@
+// Copyright (c) 2011 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+
+/** \ingroup fflas-ffpack
+ * \defgroup ffpack FFPACK
+ *
+ * \brief Class FFPACK provides functions using fflas much as Lapack uses BLAS.
+ *
+ */
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/ffpack/ffpack.h b/fflas-ffpack/ffpack/ffpack.h
index 8fd81fd..aa71fa3 100644
--- a/fflas-ffpack/ffpack/ffpack.h
+++ b/fflas-ffpack/ffpack/ffpack.h
@@ -1,9 +1,11 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* ffpack.h
* Copyright (C) 2005 Clement Pernet
+ * 2014 FFLAS-FFPACK group
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
*
*
* ========LICENCE========
@@ -35,12 +37,18 @@
#ifndef __FFLASFFPACK_ffpack_H
#define __FFLASFFPACK_ffpack_H
-#include "fflas-ffpack/fflas/fflas.h"
+#include <fflas-ffpack/fflas-ffpack-config.h>
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+#include <omp.h>
+#endif
+#include "fflas-ffpack/fflas/fflas.h"
+//#include "parallel.h"
#include <list>
#include <vector>
#include <iostream> // std::cout
-
+#include <algorithm>
// The use of the small size LQUP is currently disabled:
// need for a better handling of element base (double, float, generic) combined
@@ -53,6 +61,7 @@
#ifndef __FFPACK_CHARPOLY_THRESHOLD
#define __FFPACK_CHARPOLY_THRESHOLD 30
#endif
+
/** @brief <b>F</b>inite <b>F</b>ield <b>PACK</b>
* Set of elimination based routines for dense linear algebra.
*
@@ -60,14 +69,13 @@
* level routines based on elimination.
\ingroup ffpack
*/
-namespace FFPACK {
+namespace FFPACK { /* tags */
-
- // public:
- enum FFPACK_LUDIVINE_TAG
+ enum FFPACK_LU_TAG
{
- FfpackLQUP=1,
- FfpackSingular=2
+ FfpackSlabRecursive = 1,
+ FfpackTileRecursive = 2,
+ FfpackSingular = 3
};
enum FFPACK_CHARPOLY_TAG
@@ -89,6 +97,61 @@ namespace FFPACK {
FfpackKGF=2
};
+}
+namespace FFPACK { /* Permutations */
+
+ /*****************/
+ /* PERMUTATIONS */
+ /*****************/
+
+
+ void LAPACKPerm2MathPerm (size_t * MathP, const size_t * LapackP,
+ const size_t N);
+
+ void MathPerm2LAPACKPerm (size_t * LapackP, const size_t * MathP,
+ const size_t N);
+
+ template <class Field>
+ void MatrixApplyS (const Field& F, typename Field::Element_ptr A, const size_t lda, const size_t width,
+ const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4);
+
+ template <class Element>
+ void PermApplyS (Element* A, const size_t lda, const size_t width,
+ const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4);
+
+ template <class Field>
+ void MatrixApplyT (const Field& F, typename Field::Element_ptr A, const size_t lda, const size_t width,
+ const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4);
+
+ template <class Element>
+ void PermApplyT (Element* A, const size_t lda, const size_t width,
+ const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4);
+
+ void composePermutationsP (size_t * MathP,
+ const size_t * P1,
+ const size_t * P2,
+ const size_t R, const size_t N);
+ void composePermutationsQ (size_t * MathP,
+ const size_t * Q1,
+ const size_t * Q2,
+ const size_t R, const size_t N);
+
+ void cyclic_shift_mathPerm (size_t * P, const size_t s);
+ template<typename Base_t>
+ void cyclic_shift_row_col(Base_t * A, size_t m, size_t n, size_t lda);
+ template<class Field>
+ void cyclic_shift_row(const Field& F, typename Field::Element_ptr A, size_t m, size_t n, size_t lda);
+ template<class Field>
+ void cyclic_shift_col(const Field& F, typename Field::Element_ptr A, size_t m, size_t n, size_t lda);
+
/** Apply a permutation submatrix of P (between ibeg and iend) to a matrix
* to (iend-ibeg) vectors of size M stored in A (as column for NoTrans
@@ -112,161 +175,52 @@ namespace FFPACK {
applyP( const Field& F,
const FFLAS::FFLAS_SIDE Side,
const FFLAS::FFLAS_TRANSPOSE Trans,
- const size_t M, const int ibeg, const int iend,
- typename Field::Element * A, const size_t lda, const size_t * P )
- {
+ const size_t M, const size_t ibeg, const size_t iend,
+ typename Field::Element_ptr A, const size_t lda, const size_t * P );
- if ( Side == FFLAS::FflasRight ) {
- typename Field::Element tmp;
- if ( Trans == FFLAS::FflasTrans )
- for (size_t j = 0 ; j < M ; ++j){
- for ( size_t i=(size_t)ibeg; i<(size_t) iend; ++i)
- if ( P[i]> i ) {
- F.assign(tmp,A[j*lda+P[i]]);
- F.assign(A[j*lda+P[i]],A[j*lda+i]);
- F.assign(A[j*lda+i],tmp);
- // std::swap(A[j*lda+P[i]],A[j*lda+i]);
- }
- //FFLAS::fswap( F, M, A + P[i]*1, lda, A + i*1, lda );
- }
- else // Trans == FFLAS::FflasNoTrans
- for (size_t j = 0 ; j < M ; ++j){
- for (int i=iend; i-->ibeg; )
- if ( P[i]>(size_t)i ) {
- F.assign(tmp,A[j*lda+P[i]]);
- F.assign(A[j*lda+P[i]],A[j*lda+(size_t)i]);
- F.assign(A[j*lda+(size_t)i],tmp);
- // std::swap(A[j*lda+P[i]],A[j*lda+(size_t)i]);
- }
- //FFLAS::fswap( F, M, A + P[i]*1, lda, A + i*1, lda );
- }
- }
- else { // Side == FFLAS::FflasLeft
- if ( Trans == FFLAS::FflasNoTrans )
- for (size_t i=(size_t)ibeg; i<(size_t)iend; ++i){
- if ( P[i]> (size_t) i )
- FFLAS::fswap( F, M,
- A + P[i]*lda, 1,
- A + i*lda, 1 );
- }
- else // Trans == FFLAS::FflasTrans
- for (int i=iend; i-->ibeg; ){
- if ( P[i]> (size_t) i ){
- FFLAS::fswap( F, M,
- A + P[i]*lda, 1,
- A + (size_t)i*lda, 1 );
- }
- }
- }
-
- }
+//#ifdef __FFLASFFPACK_USE_OPENMP
- /** Computes the rank of the given matrix using a LQUP factorization.
- * The input matrix is modified.
- * @param F field
- * @param M row dimension of the matrix
- * @param N column dimension of the matrix
- * @param A input matrix
- * @param lda leading dimension of A
- */
- template <class Field>
- size_t
- Rank( const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda)
- {
- size_t *P = new size_t[N];
- size_t *Q = new size_t[M];
- size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N,
- A, lda, P, Q, FfpackLQUP);
- delete[] Q;
- delete[] P;
- return R;
- }
+ //! Parallel applyP with OPENMP tasks
+ template<class Field>
+ void
+ papplyP( const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const FFLAS::FFLAS_TRANSPOSE Trans,
+ const size_t m, const size_t ibeg, const size_t iend,
+ typename Field::Element_ptr A, const size_t lda, const size_t * P );
- /** Returns true if the given matrix is singular.
- * The method is a block elimination with early termination
- *
- * using LQUP factorization with early termination.
- * @warning The input matrix is modified.
- * @param F field
- * @param M row dimension of the matrix
- * @param N column dimension of the matrix
- * @param A input matrix
- * @param lda leading dimension of A
- */
+ //! Parallel applyT with OPENMP tasks
template <class Field>
- bool
- IsSingular( const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda)
- {
- size_t *P = new size_t[N];
- size_t *Q = new size_t[M];
- bool singular = !LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N,
- A, lda, P, Q, FfpackSingular);
+ void pMatrixApplyT (const Field& F, typename Field::Element_ptr A, const size_t lda,
+ const size_t width, const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4) ;
- delete[] P;
- delete[] Q;
- return singular;
- }
- /** Returns the determinant of the given matrix.
- * The method is a block elimination with early termination
- * @warning The input matrix is modified.
- * @param F field
- * @param M row dimension of the matrix
- * @param N column dimension of the matrix
- * @param A input matrix
- * @param lda leading dimension of A
- */
- /// using LQUP factorization with early termination.
+ //! Parallel applyS tasks with OPENMP tasks
template <class Field>
- typename Field::Element
- Det( const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda)
- {
+ void pMatrixApplyS (const Field& F, typename Field::Element_ptr A, const size_t lda,
+ const size_t width, const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4) ;
- typename Field::Element det; F.init(det);
- bool singular;
- size_t *P = new size_t[N];
- size_t *Q = new size_t[M];
- singular = !LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N,
- A, lda, P, Q, FfpackSingular);
- if (singular){
- F.assign(det,F.zero);
- delete[] P;
- delete[] Q;
- return det;
- }
- else{
- F.assign(det,F.one);
- typename Field::Element *Ai=A;
- for (; Ai < A+ M*lda+N; Ai+=lda+1 )
- F.mulin( det, *Ai );
- int count=0;
- for (size_t i=0;i<N;++i)
- if (P[i] != i) ++count;
-
- if ((count&1) == 1)
- F.negin(det);
- }
- delete[] P;
- delete[] Q;
- return det;
- }
-
- // forward declaration
template<class Field>
- void
- solveLB2( const Field& F, const FFLAS::FFLAS_SIDE Side,
- const size_t M, const size_t N, const size_t R,
- typename Field::Element * L, const size_t ldl,
- const size_t * Q,
- typename Field::Element * B, const size_t ldb ) ;
+ size_t
+ pPLUQ(const Field& Fi, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Q, int nt);
+
+
+//#endif
+} // FFPACK permutations
+// #include "ffpack_permutation.inl"
+namespace FFPACK { /* fgetrs, fgesv */
/** Solve the system \f$A X = B\f$ or \f$X A = B\f$.
* Solving using the \c LQUP decomposition of \p A
@@ -293,71 +247,10 @@ namespace FFPACK {
fgetrs (const Field& F,
const FFLAS::FFLAS_SIDE Side,
const size_t M, const size_t N, const size_t R,
- typename Field::Element *A, const size_t lda,
+ typename Field::Element_ptr A, const size_t lda,
const size_t *P, const size_t *Q,
- typename Field::Element *B, const size_t ldb,
- int * info)
- {
-
- *info =0;
- if (Side == FFLAS::FflasLeft) { // Left looking solve A X = B
-
- solveLB2 (F, FFLAS::FflasLeft, M, N, R, A, lda, Q, B, ldb);
-
- applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- N, 0,(int) R, B, ldb, Q);
-
- bool consistent = true;
- for (size_t i = R; i < M; ++i)
- for (size_t j = 0; j < N; ++j)
- if (!F.isZero (*(B + i*ldb + j)))
- consistent = false;
- if (!consistent) {
- std::cerr<<"System is inconsistent"<<std::endl;
- *info = 1;
- }
- // The last rows of B are now supposed to be 0
-#if 0
- for (size_t i = R; i < M; ++i)
- for (size_t j = 0; j < N; ++j)
- *(B + i*ldb + j) = F.zero;
-#endif
-
- ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
- R, N, F.one, A, lda , B, ldb);
-
- applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- N, 0,(int) R, B, ldb, P);
-
- }
-else { // Right Looking X A = B
-
- applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- M, 0,(int) R, B, ldb, P);
-
- ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
- M, R, F.one, A, lda , B, ldb);
-
- fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M, N-R, R, F.one,
- B, ldb, A+R, lda, F.mOne, B+R, ldb);
-
- bool consistent = true;
- for (size_t i = 0; i < M; ++i)
- for (size_t j = R; j < N; ++j)
- if (!F.isZero (*(B + i*ldb + j)))
- consistent = false;
- if (!consistent) {
- std::cerr<<"System is inconsistent"<<std::endl;
- *info = 1;
- }
- // The last cols of B are now supposed to be 0
-
- applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- M, 0,(int) R, B, ldb, Q);
-
- solveLB2 (F, FFLAS::FflasRight, M, N, R, A, lda, Q, B, ldb);
- }
- }
+ typename Field::Element_ptr B, const size_t ldb,
+ int * info);
/** Solve the system A X = B or X A = B.
* Solving using the LQUP decomposition of A
@@ -383,170 +276,15 @@ else { // Right Looking X A = B
* @param info Succes of the computation: 0 if successfull, >0 if system is inconsistent
*/
template <class Field>
- typename Field::Element *
+ typename Field::Element_ptr
fgetrs (const Field& F,
const FFLAS::FFLAS_SIDE Side,
const size_t M, const size_t N, const size_t NRHS, const size_t R,
- typename Field::Element *A, const size_t lda,
+ typename Field::Element_ptr A, const size_t lda,
const size_t *P, const size_t *Q,
- typename Field::Element *X, const size_t ldx,
- const typename Field::Element *B, const size_t ldb,
- int * info)
- {
-
- *info =0;
-
- typename Field::Element* W;
- size_t ldw;
-
- if (Side == FFLAS::FflasLeft) { // Left looking solve A X = B
-
- // Initializing X to 0 (to be optimized)
- for (size_t i = 0; i <N; ++i)
- for (size_t j=0; j< NRHS; ++j)
- F.assign (*(X+i*ldx+j), F.zero);
-
- if (M > N){ // Cannot copy B into X
- W = new typename Field::Element [M*NRHS];
- ldw = NRHS;
- for (size_t i=0; i < M; ++i)
- FFLAS::fcopy (F, NRHS, W + i*ldw, 1, B + i*ldb, 1);
-
- solveLB2 (F, FFLAS::FflasLeft, M, NRHS, R, A, lda, Q, W, ldw);
-
- applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- NRHS, 0,(int) R, W, ldw, Q);
-
- bool consistent = true;
- for (size_t i = R; i < M; ++i)
- for (size_t j = 0; j < NRHS; ++j)
- if (!F.isZero (*(W + i*ldw + j)))
- consistent = false;
- if (!consistent) {
- std::cerr<<"System is inconsistent"<<std::endl;
- *info = 1;
- delete[] W;
- return X;
- }
- // Here the last rows of W are supposed to be 0
-
- ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
- R, NRHS, F.one, A, lda , W, ldw);
-
- for (size_t i=0; i < R; ++i)
- FFLAS::fcopy (F, NRHS, X + i*ldx, 1, W + i*ldw, 1);
-
- delete[] W;
- applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- NRHS, 0,(int) R, X, ldx, P);
-
- }
-else { // Copy B to X directly
- for (size_t i=0; i < M; ++i)
- FFLAS::fcopy (F, NRHS, X + i*ldx, 1, B + i*ldb, 1);
-
- solveLB2 (F, FFLAS::FflasLeft, M, NRHS, R, A, lda, Q, X, ldx);
-
- applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- NRHS, 0,(int) R, X, ldx, Q);
-
- bool consistent = true;
- for (size_t i = R; i < M; ++i)
- for (size_t j = 0; j < NRHS; ++j)
- if (!F.isZero (*(X + i*ldx + j)))
- consistent = false;
- if (!consistent) {
- std::cerr<<"System is inconsistent"<<std::endl;
- *info = 1;
- return X;
- }
- // Here the last rows of W are supposed to be 0
-
- ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
- R, NRHS, F.one, A, lda , X, ldx);
-
- applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- NRHS, 0,(int) R, X, ldx, P);
- }
- return X;
-
- }
-else { // Right Looking X A = B
-
- for (size_t i = 0; i <NRHS; ++i)
- for (size_t j=0; j< M; ++j)
- F.assign (*(X+i*ldx+j), F.zero);
-
- if (M < N) {
- W = new typename Field::Element [NRHS*N];
- ldw = N;
- for (size_t i=0; i < NRHS; ++i)
- FFLAS::fcopy (F, N, W + i*ldw, 1, B + i*ldb, 1);
-
- applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- NRHS, 0,(int) R, W, ldw, P);
-
- ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
- NRHS, R, F.one, A, lda , W, ldw);
-
- fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, NRHS, N-R, R, F.one,
- W, ldw, A+R, lda, F.mOne, W+R, ldw);
-
- bool consistent = true;
- for (size_t i = 0; i < NRHS; ++i)
- for (size_t j = R; j < N; ++j)
- if (!F.isZero (*(W + i*ldw + j)))
- consistent = false;
- if (!consistent) {
- std::cerr<<"System is inconsistent"<<std::endl;
- *info = 1;
- delete[] W;
- return X;
- }
- // The last N-R cols of W are now supposed to be 0
- for (size_t i=0; i < NRHS; ++i)
- FFLAS::fcopy (F, R, X + i*ldx, 1, W + i*ldb, 1);
- delete[] W;
- applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- NRHS, 0,(int) R, X, ldx, Q);
-
- solveLB2 (F, FFLAS::FflasRight, NRHS, M, R, A, lda, Q, X, ldx);
-
- }
-else {
- for (size_t i=0; i < NRHS; ++i)
- FFLAS::fcopy (F, N, X + i*ldx, 1, B + i*ldb, 1);
-
- applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- NRHS, 0,(int) R, X, ldx, P);
-
- ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
- NRHS, R, F.one, A, lda , X, ldx);
-
- fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, NRHS, N-R, R, F.one,
- X, ldx, A+R, lda, F.mOne, X+R, ldx);
-
- bool consistent = true;
- for (size_t i = 0; i < NRHS; ++i)
- for (size_t j = R; j < N; ++j)
- if (!F.isZero (*(X + i*ldx + j)))
- consistent = false;
- if (!consistent) {
- std::cerr<<"System is inconsistent"<<std::endl;
- *info = 1;
- return X;
- }
- // The last N-R cols of W are now supposed to be 0
-
- applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- NRHS, 0,(int) R, X, ldx, Q);
-
- solveLB2 (F, FFLAS::FflasRight, NRHS, M, R, A, lda, Q, X, ldx);
-
- }
- return X;
- }
- }
+ typename Field::Element_ptr X, const size_t ldx,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ int * info);
/** @brief Square system solver
* @param F The computation domain
@@ -570,29 +308,9 @@ else {
fgesv (const Field& F,
const FFLAS::FFLAS_SIDE Side,
const size_t M, const size_t N,
- typename Field::Element *A, const size_t lda,
- typename Field::Element *B, const size_t ldb,
- int * info)
- {
-
- size_t Na;
- if (Side == FFLAS::FflasLeft)
- Na = M;
- else
- Na = N;
-
- size_t* P = new size_t[Na];
- size_t* Q = new size_t[Na];
-
- size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, Na, Na, A, lda, P, Q, FfpackLQUP);
-
- fgetrs (F, Side, M, N, R, A, lda, P, Q, B, ldb, info);
-
- delete[] P;
- delete[] Q;
-
- return R;
- }
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ int * info);
/** @brief Rectangular system solver
* @param F The computation domain
@@ -619,24 +337,10 @@ else {
fgesv (const Field& F,
const FFLAS::FFLAS_SIDE Side,
const size_t M, const size_t N, const size_t NRHS,
- typename Field::Element *A, const size_t lda,
- typename Field::Element *X, const size_t ldx,
- const typename Field::Element *B, const size_t ldb,
- int * info)
- {
-
- size_t* P = new size_t[N];
- size_t* Q = new size_t[M];
-
- size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q, FfpackLQUP);
-
- fgetrs (F, Side, M, N, NRHS, R, A, lda, P, Q, X, ldx, B, ldb, info);
-
- delete[] P;
- delete[] Q;
-
- return R;
- }
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ int * info);
/** Solve the system Ax=b.
* Solving using LQUP factorization and
@@ -651,593 +355,214 @@ else {
* @param b right hand side vector
* @param incb increment of b
*/
- /// Solve linear system using LQUP factorization.
- template <class Field>
- typename Field::Element*
- Solve( const Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * x, const int incx,
- const typename Field::Element * b, const int incb )
- {
-
- size_t *P = new size_t[M];
- size_t *rowP = new size_t[M];
- if (LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, M, A, lda, P, rowP, FfpackLQUP) < M){
- std::cerr<<"SINGULAR MATRIX"<<std::endl;
- delete[] P;
- delete[] rowP;
- return x;
- }
- else{
- FFLAS::fcopy( F, M, x, incx, b, incb );
+} // FFPACK fgesv, fgetrs
+// #include "ffpack_fgesv.inl"
+// #include "ffpack_fgetrs.inl"
- ftrsv(F, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M,
- A, lda , x, incx);
- ftrsv(F, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, M,
- A, lda , x, incx);
- applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans,
- M, 0,(int) M, x, incx, P );
- delete[] rowP;
- delete[] P;
+namespace FFPACK { /* ftrtr */
- return x;
- }
- }
-
-
- /** Computes a basis of the Left/Right nullspace of the matrix A.
- * return the dimension of the nullspace.
- *
- * @param F The computation domain
- * @param Side
- * @param M
- * @param N
- * @param A input matrix of dimension M x N, A is modified
- * @param lda
- * @param NS output matrix of dimension N x NSdim (allocated here)
- * @param ldn
- * @param NSdim the dimension of the Nullspace (N-rank(A))
- *
- */
- template <class Field>
- size_t NullSpaceBasis (const Field& F, const FFLAS::FFLAS_SIDE Side,
- const size_t M, const size_t N,
- typename Field::Element* A, const size_t lda,
- typename Field::Element*& NS, size_t& ldn,
- size_t& NSdim)
- {
- if (Side == FFLAS::FflasRight) { // Right NullSpace
- size_t* P = new size_t[N];
- size_t* Qt = new size_t[M];
-
- size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Qt);
-
- ldn = N-R;
- NSdim = ldn;
- NS = new typename Field::Element [N*ldn];
-
- for (size_t i=0; i<R; ++i)
- FFLAS::fcopy (F, ldn, NS + i*ldn, 1, A + R + i*lda, 1);
-
- ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, R, ldn,
- F.mOne, A, lda, NS, ldn);
-
- for (size_t i=R; i<N; ++i){
- for (size_t j=0; j < ldn; ++j)
- F.assign (*(NS+i*ldn+j), F.zero);
- F.assign (*(NS + i*ldn + i-R), F.one);
- }
- applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- NSdim, 0,(int) R, NS, ldn, P);
- delete [] P;
- delete [] Qt;
- return N-R;
- }
-else { // Left NullSpace
- size_t* P = new size_t[M];
- size_t* Qt = new size_t[N];
-
- size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Qt);
-
- ldn = M;
- NSdim = M-R;
- NS = new typename Field::Element [NSdim*ldn];
- for (size_t i=0; i<NSdim; ++i)
- FFLAS::fcopy (F, R, NS + i*ldn, 1, A + (R + i)*lda, 1);
- ftrsm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, NSdim, R,
- F.mOne, A, lda, NS, ldn);
-
- for (size_t i=0; i<NSdim; ++i){
- for (size_t j=R; j < M; ++j)
- F.assign (*(NS+i*ldn+j), F.zero);
- F.assign (*(NS + i*ldn + i+R), F.one);
- }
- applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- NSdim, 0,(int) R, NS, ldn, P);
- delete [] P;
- delete [] Qt;
- return N-R;
- }
- }
-
- /** Computes the row rank profile of A.
- *
+ /** Compute the inverse of a triangular matrix.
* @param F
- * @param M
+ * @param Uplo whether the matrix is upper of lower triangular
+ * @param Diag whether the matrix if unit diagonal
* @param N
- * @param A input matrix of dimension M x N
+ * @param A
* @param lda
- * @param rkprofile return the rank profile as an array of row indexes, of dimension r=rank(A)
*
- * rkprofile is allocated during the computation.
- * @returns R
*/
- template <class Field>
- size_t RowRankProfile (const Field& F, const size_t M, const size_t N,
- typename Field::Element* A, const size_t lda,
- size_t* &rkprofile)
- {
- size_t *P = new size_t[N];
- size_t *Q = new size_t[M];
- size_t R;
+ template<class Field>
+ void
+ ftrtri (const Field& F, const FFLAS::FFLAS_UPLO Uplo, const FFLAS::FFLAS_DIAG Diag,
+ const size_t N, typename Field::Element_ptr A, const size_t lda);
- R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q);
- rkprofile = new size_t[R];
- for (size_t i=0; i<R; ++i)
- rkprofile[i] = Q[i];
- delete[] P;
- delete[] Q;
- return R;
- }
+ template<class Field>
+ void trinv_left( const Field& F, const size_t N, typename Field::ConstElement_ptr L, const size_t ldl,
+ typename Field::Element_ptr X, const size_t ldx );
- /** Computes the column rank profile of A.
- *
+ /** Compute the product UL.
+ * Product UL of the upper, resp lower triangular matrices U and L
+ * stored one above the other in the square matrix A.
+ * Diag == Unit if the matrix U is unit diagonal
* @param F
- * @param M
+ * @param diag
* @param N
- * @param A input matrix of dimension
+ * @param A
* @param lda
- * @param rkprofile return the rank profile as an array of row indexes, of dimension r=rank(A)
*
- * A is modified
- * rkprofile is allocated during the computation.
- * @returns R
*/
- template <class Field>
- size_t ColumnRankProfile (const Field& F, const size_t M, const size_t N,
- typename Field::Element* A, const size_t lda,
- size_t* &rkprofile)
- {
- size_t *P = new size_t[N];
- size_t *Q = new size_t[M];
- size_t R;
+ template<class Field>
+ void
+ ftrtrm (const Field& F, const FFLAS::FFLAS_DIAG diag, const size_t N,
+ typename Field::Element_ptr A, const size_t lda);
- R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Q);
- rkprofile = new size_t[R];
+} // FFPACK ftrtr
+// #include "ffpack_ftrtr.inl"
- for (size_t i=0; i<R; ++i)
- rkprofile[i] = Q[i];
- delete[] P;
- delete[] Q;
- return R;
- }
+namespace FFPACK { /* PLUQ */
- /** RowRankProfileSubmatrixIndices.
- * Computes the indices of the submatrix r*r X of A whose rows correspond to
- * the row rank profile of A.
- *
- * @param F
- * @param M
- * @param N
- * @param A input matrix of dimension
- * @param rowindices array of the row indices of X in A
- * @param colindices array of the col indices of X in A
- * @param lda
- * @param[out] R
- *
- * rowindices and colindices are allocated during the computation.
- * A is modified
- * @returns R
+ /** @brief Compute the PLUQ factorization of the given matrix.
+ * Using a block algorithm and return its rank.
+ * The permutations P and Q are represented
+ * using LAPACK's convention.
+ * @param F field
+ * @param Diag whether U should have a unit diagonal or not
+ * @param trans, \c LU of \f$A^t\f$
+ * @param M matrix row dimension
+ * @param N matrix column dimension
+ * @param A input matrix
+ * @param lda leading dimension of \p A
+ * @param P the row permutation
+ * @param Q the column permutation
+
+ * @return the rank of \p A
+ * @bib
+ * - Dumas J-G., Pernet C., and Sultan Z. <i>\c Simultaneous computation of the row and column rank profiles </i>, ISSAC'13, 2013
+ * .
*/
- template <class Field>
- size_t RowRankProfileSubmatrixIndices (const Field& F,
- const size_t M, const size_t N,
- typename Field::Element* A,
- const size_t lda,
- size_t*& rowindices,
- size_t*& colindices,
- size_t& R)
- {
- size_t *P = new size_t[N];
- size_t *Q = new size_t[M];
-
- R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q);
- rowindices = new size_t[M];
- colindices = new size_t[N];
- for (size_t i=0; i<R; ++i){
- rowindices [i] = Q [i];
- }
- for (size_t i=0; i<N; ++i)
- colindices [i] = i;
- size_t tmp;
- for (size_t i=0; i<R; ++i){
- if (i != P[i]){
- tmp = colindices[i];
- colindices[i] = colindices[P[i]];
- colindices[P[i]] = tmp;
- }
- }
-
- delete[] P;
- delete[] Q;
-
- return R;
- }
+ template<class Field>
+ size_t
+ PLUQ (const Field& F, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t*P, size_t *Q);
- /** Computes the indices of the submatrix r*r X of A whose columns correspond to
- * the column rank profile of A.
- *
- * @param F
- * @param M
- * @param N
- * @param A input matrix of dimension
- * @param rowindices array of the row indices of X in A
- * @param colindices array of the col indices of X in A
- * @param lda
- * @param[out] R
+} // FFPACK PLUQ
+// #include "ffpack_pluq.inl"
+
+namespace FFPACK { /* ludivine */
+
+ /** @brief Compute the CUP factorization of the given matrix.
+ * Using
+ * a block algorithm and return its rank.
+ * The permutations P and Q are represented
+ * using LAPACK's convention.
+ * @param F field
+ * @param Diag whether U should have a unit diagonal or not
+ * @param trans \c LU of \f$A^t\f$
+ * @param M matrix row dimension
+ * @param N matrix column dimension
+ * @param A input matrix
+ * @param lda leading dimension of \p A
+ * @param P the column permutation
+ * @param Qt the transpose of the row permutation \p Q
+ * @param LuTag flag for setting the earling termination if the matrix
+ * is singular
+ * @param cutoff UNKOWN TAG, probably a switch to a faster algo below \c cutoff
*
- * rowindices and colindices are allocated during the computation.
- * @warning A is modified
- * \return R
+ * @return the rank of \p A
+ * @bib
+ * - Jeannerod C-P, Pernet, C. and Storjohann, A. <i>\c Rank-profile revealing Gaussian elimination and the CUP matrix decomposition </i>, J. of Symbolic Comp., 2013
+ * - Pernet C, Brassel M <i>\c LUdivine, une divine factorisation \c LU</i>, 2002
+ * .
*/
template <class Field>
- size_t ColRankProfileSubmatrixIndices (const Field& F,
- const size_t M, const size_t N,
- typename Field::Element* A,
- const size_t lda,
- size_t*& rowindices,
- size_t*& colindices,
- size_t& R)
- {
- size_t *P = new size_t[M];
- size_t *Q = new size_t[N];
-
- R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Q);
- rowindices = new size_t[M];
- colindices = new size_t[N];
- for (size_t i=0; i<R; ++i)
- colindices [i] = Q [i];
-
- for (size_t i=0; i<N; ++i)
- rowindices [i] = i;
-
- size_t tmp;
- for (size_t i=0; i<R; ++i){
- if (i != P[i]){
- tmp = rowindices[i];
- rowindices[i] = rowindices[P[i]];
- rowindices[P[i]] = tmp;
- }
- }
- delete[] P;
- delete[] Q;
-
- return R;
- }
-
- /** Compute the r*r submatrix X of A, by picking the row rank profile rows of A.
- *
- * @param F
- * @param M
- * @param N
- * @param A input matrix of dimension M x N
- * @param lda
- * @param X the output matrix
- * @param[out] R
- *
- * A is not modified
- * X is allocated during the computation.
- * @return R
- */
+ size_t
+ LUdivine (const Field& F, const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt,
+ const FFPACK_LU_TAG LuTag = FfpackSlabRecursive,
+ const size_t cutoff=__FFPACK_LUDIVINE_CUTOFF);
+
+ template<class Element>
+ class callLUdivine_small;
+
+ //! LUdivine small case
template <class Field>
- size_t RowRankProfileSubmatrix (const Field& F,
- const size_t M, const size_t N,
- typename Field::Element* A,
- const size_t lda,
- typename Field::Element*& X, size_t& R)
- {
+ size_t
+ LUdivine_small (const Field& F, const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Q,
+ const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
- size_t * rowindices, * colindices;
+ //! LUdivine gauss
+ template <class Field>
+ size_t
+ LUdivine_gauss (const Field& F, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Q,
+ const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
- typename Field::Element * A2 = FFLAS::MatCopy (F, M, N, A, lda);
+ namespace Protected {
- RowRankProfileSubmatrixIndices (F, M, N, A2, N, rowindices, colindices, R);
- X = new typename Field::Element[R*R];
- for (size_t i=0; i<R; ++i)
- for (size_t j=0; j<R; ++j)
- F.assign (*(X + i*R + j), *(A + rowindices[i]*lda + colindices[j]));
- delete[] A2;
- delete[] rowindices;
- delete[] colindices;
- return R;
- }
- /** Compute the \f$ r\times r\f$ submatrix X of A, by picking the row rank profile rows of A.
- *
- *
- * @param F
- * @param M
- * @param N
- * @param A input matrix of dimension M x N
- * @param lda
- * @param X the output matrix
- * @param[out] R
- *
- * A is not modified
- * X is allocated during the computation.
- * \returns R
- */
- template <class Field>
- size_t ColRankProfileSubmatrix (const Field& F, const size_t M, const size_t N,
- typename Field::Element* A, const size_t lda,
- typename Field::Element*& X, size_t& R)
- {
-
- size_t * rowindices, * colindices;
-
- typename Field::Element * A2 = FFLAS::MatCopy (F, M, N, A, lda);
-
- ColRankProfileSubmatrixIndices (F, M, N, A2, N, rowindices, colindices, R);
-
- X = new typename Field::Element[R*R];
- for (size_t i=0; i<R; ++i)
- for (size_t j=0; j<R; ++j)
- F.assign (*(X + i*R + j), *(A + rowindices[i]*lda + colindices[j]));
- delete[] A2;
- delete[] colindices;
- delete[] rowindices;
- return R;
- }
-
- /** LQUPtoInverseOfFullRankMinor.
- * Suppose A has been factorized as L.Q.U.P, with rank r.
- * Then Qt.A.Pt has an invertible leading principal r x r submatrix
- * This procedure efficiently computes the inverse of this minor and puts it into X.
- * @note It changes the lower entries of A_factors in the process (NB: unless A was nonsingular and square)
- *
- * @param F
- * @param rank rank of the matrix.
- * @param A_factors matrix containing the L and U entries of the factorization
- * @param lda
- * @param QtPointer theLQUP->getQ()->getPointer() (note: getQ returns Qt!)
- * @param X desired location for output
- * @param ldx
- */
- template <class Field>
- typename Field::Element*
- LQUPtoInverseOfFullRankMinor( const Field& F, const size_t rank,
- typename Field::Element * A_factors, const size_t lda,
- const size_t* QtPointer,
- typename Field::Element * X, const size_t ldx)
- {
-
- // upper entries are okay, just need to move up bottom ones
- const size_t* srcRow = QtPointer;
- for (size_t row=0; row<rank; row++, srcRow++)
- if (*srcRow != row) {
- typename Field::Element* oldRow = A_factors + (*srcRow) * lda;
- typename Field::Element* newRow = A_factors + row * lda;
- for (size_t col=0; col<row; col++, oldRow++, newRow++)
- F.assign(*newRow, *oldRow);
- }
-
- // X <- (Qt.L.Q)^(-1)
- //invL( F, rank, A_factors, lda, X, ldx);
- ftrtri (F, FFLAS::FflasLower, FFLAS::FflasUnit, rank, A_factors, lda);
- for (size_t i=0; i<rank; ++i)
- FFLAS::fcopy (F, rank, A_factors+i*lda, 1, X+i*ldx,1);
-
- // X = U^-1.X
- ftrsm( F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans,
- FFLAS::FflasNonUnit, rank, rank, F.one, A_factors, lda, X, ldx);
-
- return X;
-
- }
-
- //---------------------------------------------------------------------
- // TURBO: rank computation algorithm
- //---------------------------------------------------------------------
- template <class Field>
- size_t
- TURBO (const Field& F, const size_t M, const size_t N,
- typename Field::Element* A, const size_t lda, size_t * P, size_t * Q, const size_t cutoff);
-
- /** @brief Compute the LQUP factorization of the given matrix.
- * Using
- * a block algorithm and return its rank.
- * The permutations P and Q are represented
- * using LAPACK's convention.
- * @param F field
- * @param Diag precise whether U should have a unit diagonal or not
- * @param trans UNKOWN TAG, probably the \c LU of \f$A^t\f$
- * @param M matrix row dimension
- * @param N matrix column dimension
- * @param A input matrix
- * @param lda leading dimension of \p A
- * @param P the column permutation
- * @param Qt the transpose of the row permutation \p Q
- * @param LuTag flag for setting the earling termination if the matrix
- * is singular
- * @param cutoff UNKOWN TAG, probably a switch to a faster algo below \c cutoff
- *
- * @return the rank of \p A
- * @bib
- * - Jeannerod CP, <i>\c LSP Matrix Decomposition Revisited</i>, 2006
- * - Pernet C, Brassel M <i>\c LUdivine, une divine factorisation \c LU</i>, 2002
- * .
- */
- template <class Field>
- size_t
- LUdivine (const Field& F, const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
- const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt
- , const FFPACK_LUDIVINE_TAG LuTag=FfpackLQUP
- , const size_t cutoff=__FFPACK_LUDIVINE_CUTOFF
- );
-
- //! LUpdate
- template <class Field>
- size_t LUpdate (const Field& F,
- const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
- const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- const size_t R,
- const size_t K,
- typename Field::Element * B, const size_t ldb,
- size_t*P, size_t *Q
- , const FFPACK::FFPACK_LUDIVINE_TAG LuTag =FFPACK::FfpackLQUP
- , const size_t cutoff =__FFPACK_LUDIVINE_CUTOFF
- );
-
- template<class Element>
- class callLUdivine_small;
-
- //! LUdivine small case
- template <class Field>
- size_t
- LUdivine_small (const Field& F, const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
- const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Q,
- const FFPACK_LUDIVINE_TAG LuTag=FfpackLQUP);
-
- //! LUdivine gauss
- template <class Field>
- size_t
- LUdivine_gauss (const Field& F, const FFLAS::FFLAS_DIAG Diag,
- const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Q,
- const FFPACK_LUDIVINE_TAG LuTag=FfpackLQUP);
-
-
-
-
- /** Compute the inverse of a triangular matrix.
- * @param F
- * @param Uplo whether the matrix is upper of lower triangular
- * @param Diag whether the matrix if unit diagonal
- * @param N
- * @param A
- * @param lda
- *
- */
- template<class Field>
- void
- ftrtri (const Field& F, const FFLAS::FFLAS_UPLO Uplo, const FFLAS::FFLAS_DIAG Diag,
- const size_t N, typename Field::Element * A, const size_t lda)
- {
- if (N == 1){
- if (Diag == FFLAS::FflasNonUnit)
- F.invin (*A);
- }
- else {
- size_t N1 = N/2;
- size_t N2 = N - N1;
- ftrtri (F, Uplo, Diag, N1, A, lda);
- ftrtri (F, Uplo, Diag, N2, A + N1*(lda+1), lda);
- if (Uplo == FFLAS::FflasUpper){
- ftrmm (F, FFLAS::FflasLeft, Uplo, FFLAS::FflasNoTrans, Diag, N1, N2,
- F.one, A, lda, A + N1, lda);
- ftrmm (F, FFLAS::FflasRight, Uplo, FFLAS::FflasNoTrans, Diag, N1, N2,
- F.mOne, A + N1*(lda+1), lda, A + N1, lda);
- }
- else {
- ftrmm (F, FFLAS::FflasLeft, Uplo, FFLAS::FflasNoTrans, Diag, N2, N1,
- F.one, A + N1*(lda+1), lda, A + N1*lda, lda);
- ftrmm (F, FFLAS::FflasRight, Uplo, FFLAS::FflasNoTrans, Diag, N2, N1,
- F.mOne, A, lda, A + N1*lda, lda);
- }
- }
- }
-
-
- /** Compute the product UL.
- * Product UL of the upper, resp lower triangular matrices U and L
- * stored one above the other in the square matrix A.
- * Diag == Unit if the matrix U is unit diagonal
- * @param F
- * @param diag
- * @param N
- * @param A
- * @param lda
- *
- */
- template<class Field>
- void
- ftrtrm (const Field& F, const FFLAS::FFLAS_DIAG diag, const size_t N,
- typename Field::Element * A, const size_t lda)
- {
-
- if (N == 1)
- return;
- size_t N1 = N/2;
- size_t N2 = N-N1;
-
- ftrtrm (F, diag, N1, A, lda);
-
- fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, N1, N1, N2, F.one,
- A+N1, lda, A+N1*lda, lda, F.one, A, lda);
-
- ftrmm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans,
- (diag == FFLAS::FflasUnit) ? FFLAS::FflasNonUnit : FFLAS::FflasUnit,
- N1, N2, F.one, A + N1*(lda+1), lda, A + N1, lda);
+ //---------------------------------------------------------------------
+ // LUdivine_construct: (Specialisation of LUdivine)
+ // LUP factorisation of X, the Krylov base matrix of A^t and v, in A.
+ // X contains the nRowX first vectors v, vA, .., vA^{nRowX-1}
+ // A contains the LUP factorisation of the nUsedRowX first row of X.
+ // When all rows of X have been factorized in A, and rank is full,
+ // then X is updated by the following scheme: X <= ( X; X.B ), where
+ // B = A^2^i.
+ // This enables to make use of Matrix multiplication, and stop computing
+ // Krylov vector, when the rank is not longer full.
+ // P is the permutation matrix stored in an array of indexes
+ //---------------------------------------------------------------------
- ftrmm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, diag, N2, N1,
- F.one, A + N1*(lda+1), lda, A + N1*lda, lda);
+ template <class Field>
+ size_t
+ LUdivine_construct( const Field& F, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx,
+ typename Field::Element_ptr u, size_t* P,
+ bool computeX, const FFPACK_MINPOLY_TAG MinTag= FfpackDense
+ , const size_t kg_mc =0
+ , const size_t kg_mb =0
+ , const size_t kg_j =0
+ );
- ftrtrm (F, diag, N2, A + N1*(lda+1), lda);
+ } // Protected
- }
+} //FFPACK ludivine, turbo
+// #include "ffpack_ludivine.inl"
+namespace FFPACK { /* echelon */
/*****************/
/* ECHELON FORMS */
/*****************/
/** Compute the Column Echelon form of the input matrix in-place.
*
- * After the computation A = [ M \ V ] such that AU = C is a column echelon
- * decomposition of A, with U = P^T [ V ] and C = M + Q [ Ir ]
- * [ 0 In-r ] [ 0 ]
+ * If LuTag == FfpackTileRecursive, then after the computation A = [ M \ V ]
+ * such that AU = C is a column echelon decomposition of A,
+ * with U = P^T [ V ] and C = M + Q [ Ir ]
+ * [ 0 In-r ] [ 0 ]
+ * If LuTag == FfpackTileRecursive then A = [ N \ V ] such that the same holds with M = Q N
+ *
* Qt = Q^T
- * If transform=false, the matrix U is not computed.
+ * If transform=false, the matrix V is not computed.
* See also test-colechelon for an example of use
* @param F
* @param M
* @param N
* @param A
* @param lda
- * @param P
- * @param Qt
+ * @param P the column permutation
+ * @param Qt the row position of the pivots in the echelon form
* @param transform
*/
template <class Field>
size_t
ColumnEchelonForm (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, bool transform = true);
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform = false,
+ const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
+
/** Compute the Row Echelon form of the input matrix in-place.
*
- * After the computation A = [ L \ M ] such that L A = R is a row echelon
- * decomposition of A, with L = [ L 0 ] P and R = M + [Ir 0] Q^T
- * [ In-r]
+ * If LuTag == FfpackTileRecursive, then after the computation A = [ L \ M ]
+ * such that X A = R is a row echelon decomposition of A,
+ * with X = [ L 0 ] P and R = M + [Ir 0] Q^T
+ * [ In-r]
+ * If LuTag == FfpackTileRecursive then A = [ L \ N ] such that the same holds with M = N Q^T
* Qt = Q^T
* If transform=false, the matrix L is not computed.
* See also test-rowechelon for an example of use
@@ -1246,24 +571,25 @@ else { // Left NullSpace
* @param N
* @param A
* @param lda
- * @param P
- * @param Qt
+ * @param P the row permutation
+ * @param Qt the column position of the pivots in the echelon form
* @param transform
*/
template <class Field>
size_t
RowEchelonForm (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, const bool transform = false);
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform = false,
+ const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
/** Compute the Reduced Column Echelon form of the input matrix in-place.
*
- * After the computation A = [ V ] such that AU = R is a reduced col echelon
+ * After the computation A = [ V ] such that AX = R is a reduced col echelon
* [ M 0 ]
- * decomposition of A, where U = P^T [ V ] and R = Q [ Ir ]
+ * decomposition of A, where X = P^T [ V ] and R = Q [ Ir ]
* [ 0 In-r ] [ M 0 ]
* Qt = Q^T
- * If transform=false, the matrix U is not computed and the matrix A = R
+ * If transform=false, the matrix X is not computed and the matrix A = R
*
* @param F
* @param M
@@ -1277,17 +603,18 @@ else { // Left NullSpace
template <class Field>
size_t
ReducedColumnEchelonForm (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, const bool transform = true);
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform = false,
+ const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
/** Compute the Reduced Row Echelon form of the input matrix in-place.
*
- * After the computation A = [ V1 M ] such that L A = R is a reduced row echelon
+ * After the computation A = [ V1 M ] such that X A = R is a reduced row echelon
* [ V2 0 ]
- * decomposition of A, where L = [ V1 0 ] P and R = [ Ir M ] Q^T
+ * decomposition of A, where X = [ V1 0 ] P and R = [ Ir M ] Q^T
* [ V2 In-r ] [ 0 ]
* Qt = Q^T
- * If transform=false, the matrix U is not computed and the matrix A = R
+ * If transform=false, the matrix X is not computed and the matrix A = R
* @param F
* @param M
* @param N
@@ -1300,8 +627,9 @@ else { // Left NullSpace
template <class Field>
size_t
ReducedRowEchelonForm (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, const bool transform = true);
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform = false,
+ const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
/** Variant by the block recursive algorithm.
* (See A. Storjohann Thesis 2000)
@@ -1318,173 +646,98 @@ else { // Left NullSpace
template <class Field>
size_t
ReducedRowEchelonForm2 (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, const bool transform = true){
- for (size_t i=0; i<N; ++i)
- Qt[i] = i;
- return REF (F, M, N, A, lda, 0, 0, N, P, Qt);
-
- }
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform = true);
//! REF
template <class Field>
size_t
REF (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
+ typename Field::Element_ptr A, const size_t lda,
const size_t colbeg, const size_t rowbeg, const size_t colsize,
size_t* Qt, size_t* P);
+} // FFPACK
+// #include "ffpack_echelonforms.inl"
+
+namespace FFPACK { /* invert */
/*****************/
/* INVERSION */
/*****************/
- /** Invert the given matrix in place
+ /** @brief Invert the given matrix in place
* or computes its nullity if it is singular.
- * An inplace 2n^3 algorithm is used.
+ *
+ * An inplace \f$2n^3\f$ algorithm is used.
* @param F The computation domain
* @param M order of the matrix
* @param [in,out] A input matrix (\f$M \times M\f$)
* @param lda leading dimension of A
* @param nullity dimension of the kernel of A
- * @return pointer to \f$A \gets A^{-1}\f$
+ * @return pointer to \f$A\f$ and \f$A \gets A^{-1}\f$
*/
template <class Field>
- typename Field::Element*
+ typename Field::Element_ptr
Invert (const Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- int& nullity)
- {
+ typename Field::Element_ptr A, const size_t lda,
+ int& nullity);
- size_t * P = new size_t[M];
- size_t * Q = new size_t[M];
- size_t R = ReducedColumnEchelonForm (F, M, M, A, lda, P, Q);
- nullity = (int)(M - R);
- applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- M, 0, (int)R, A, lda, P);
- delete [] P;
- delete [] Q;
- return A;
- }
-
- /** Invert the given matrix in place
+ /** @brief Invert the given matrix in place
* or computes its nullity if it is singular.
*
- * X is preallocated.
+ * @pre \p X is preallocated and should be large enough to store the
+ * \f$ m \times m\f$ matrix \p A.
*
* @param F The computation domain
* @param M order of the matrix
* @param [in] A input matrix (\f$M \times M\f$)
* @param lda leading dimension of \p A
- * @param [out] X output matrix
+ * @param [out] X this is the inverse of \p A if \p A is invertible
+ * (non \c NULL and \f$ \mathtt{nullity} = 0\f$). It is untouched
+ * otherwise.
* @param ldx leading dimension of \p X
* @param nullity dimension of the kernel of \p A
* @return pointer to \f$X = A^{-1}\f$
*/
template <class Field>
- typename Field::Element*
+ typename Field::Element_ptr
Invert (const Field& F, const size_t M,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * X, const size_t ldx,
- int& nullity)
- {
-
- FFLAS::fcopy(F,M,M,X,ldx,A,lda);
- Invert (F, M, X, lda, nullity);
- return X;
- }
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx,
+ int& nullity);
- /** Invert the given matrix or computes its nullity if it is singular.
- * An 2n^3 algorithm is used.
- * This routine can be \% faster than Invert but is not totally inplace.
- * X is preallocated.
+ /** @brief Invert the given matrix or computes its nullity if it is singular.
+ *
+ * An \f$2n^3f\f$ algorithm is used.
+ * This routine can be \% faster than FFPACK::Invert but is not totally inplace.
+ *
+ * @pre \p X is preallocated and should be large enough to store the
+ * \f$ m \times m\f$ matrix \p A.
+ *
* @warning A is overwritten here !
- * @warning not tested.
+ * @bug not tested.
* @param F
* @param M order of the matrix
- * @param [in,out] A input matrix (\f$M \times M\f$)
+ * @param [in,out] A input matrix (\f$M \times M\f$). On output, \p A
+ * is modified and represents a "psycological" factorisation \c LU.
* @param lda leading dimension of A
- * @param [out] X output matrix
- * @param ldx leading dimension of X
- * @param nullity dimension of the kernel of A
+ * @param [out] X this is the inverse of \p A if \p A is invertible
+ * (non \c NULL and \f$ \mathtt{nullity} = 0\f$). It is untouched
+ * otherwise.
+ * @param ldx leading dimension of \p X
+ * @param nullity dimension of the kernel of \p A
* @return pointer to \f$X = A^{-1}\f$
*/
template <class Field>
- typename Field::Element*
+ typename Field::Element_ptr
Invert2( const Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * X, const size_t ldx,
- int& nullity)
- {
-
- size_t *P = new size_t[M];
- size_t *rowP = new size_t[M];
-
-#if 0 /* timer remnants */
- Timer t1;
- t1.clear();
- t1.start();
-#endif
-
- nullity = int(M - LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, M, A, lda, P, rowP, FfpackLQUP));
-
-#if 0/* timer remnants */
- t1.stop();
- cerr<<"LU --> "<<t1.usertime()<<endl;
-#endif
-
- if (nullity > 0){
- delete[] P;
- delete[] rowP;
- return NULL;
- }
- else {
- // Initializing X to 0
-#if 0/* timer remnants */
- t1.clear();
- t1.start();
-#endif
- //! @todo this init is not necessary (done after ftrtri)
- for (size_t i=0; i<M; ++i)
- for (size_t j=0; j<M;++j)
- F.assign(*(X+i*ldx+j), F.zero);
-
- // X = L^-1 in n^3/3
- ftrtri (F, FFLAS::FflasLower, FFLAS::FflasUnit, M, A, lda);
- for (size_t i=0; i<M; ++i){
- for (size_t j=i; j<M; ++j)
- F.assign(*(X +i*ldx+j), F.zero);
- F.assign (*(X+i*(ldx+1)), F.one);
- }
- for (size_t i=1; i<M; ++i)
- FFLAS::fcopy (F, i, (X+i*ldx), 1, (A+i*lda), 1);
-#if 0/* timer remnants */
- t1.stop();
- cerr<<"U^-1 --> "<<t1.usertime()<<endl;
-
- invL( F, M, A, lda, X, ldx );
- // X = Q^-1.X is not necessary since Q = Id
-
- // X = U^-1.X
- t1.clear();
- t1.start();
-#endif
- ftrsm( F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
- M, M, F.one, A, lda , X, ldx);
-#if 0/* timer remnants */
- t1.stop();
- cerr<<"ftrsm --> "<<t1.usertime()<<endl;
-#endif
-
- // X = P^-1.X
- applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- M, 0,(int) M, X, ldx, P );
-
- delete[] P;
- delete[] rowP;
- return X;
- }
- }
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx,
+ int& nullity);
+} // FFPACK invert
+// #include "ffpack_invert.inl"
+namespace FFPACK { /* charpoly */
/*****************************/
/* CHARACTERISTIC POLYNOMIAL */
/*****************************/
@@ -1497,49 +750,83 @@ else { // Left NullSpace
template <class Field, class Polynomial>
std::list<Polynomial>&
CharPoly( const Field& F, std::list<Polynomial>& charp, const size_t N,
- typename Field::Element * A, const size_t lda,
- const FFPACK_CHARPOLY_TAG CharpTag= FfpackArithProg);
+ typename Field::Element_ptr A, const size_t lda,
+ const FFPACK_CHARPOLY_TAG CharpTag= FfpackArithProg);
template<class Polynomial, class Field>
- Polynomial & mulpoly(const Field& F, Polynomial &res, const Polynomial & P1, const Polynomial & P2)
- {
- size_t i,j;
- // Warning: assumes that res is allocated to the size of the product
- res.resize(P1.size()+P2.size()-1);
- for (i=0;i<res.size();i++)
- F.assign(res[i], F.zero);
- for ( i=0;i<P1.size();i++)
- for ( j=0;j<P2.size();j++)
- F.axpyin(res[i+j],P1[i],P2[j]);
- return res;
- }
+ Polynomial & mulpoly(const Field& F, Polynomial &res, const Polynomial & P1, const Polynomial & P2);
template <class Field, class Polynomial>
- std::list<Polynomial>&
+ Polynomial&
CharPoly( const Field& F, Polynomial& charp, const size_t N,
- typename Field::Element * A, const size_t lda,
- const FFPACK_CHARPOLY_TAG CharpTag= FfpackArithProg)
- {
+ typename Field::Element_ptr A, const size_t lda,
+ const FFPACK_CHARPOLY_TAG CharpTag= FfpackArithProg);
+
+
+ namespace Protected {
+ template <class Field, class Polynomial>
+ std::list<Polynomial>&
+ KellerGehrig( const Field& F, std::list<Polynomial>& charp, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda );
+
+ template <class Field, class Polynomial>
+ int
+ KGFast ( const Field& F, std::list<Polynomial>& charp, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t * kg_mc, size_t* kg_mb, size_t* kg_j );
+
+ template <class Field, class Polynomial>
+ std::list<Polynomial>&
+ KGFast_generalized (const Field& F, std::list<Polynomial>& charp,
+ const size_t N,
+ typename Field::Element_ptr A, const size_t lda);
+
- std::list<Polynomial> factor_list;
- CharPoly (F, factor_list, N, A, lda, CharpTag);
- typename std::list<std::vector<typename Field::Element> >::const_iterator it;
- it = factor_list.begin();
- // std::vector<Element>* tmp = new std::vector<Element> (n+1);
- charp.resize(N+1);
+ template<class Field>
+ void
+ fgemv_kgf( const Field& F, const size_t N,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ typename Field::Element_ptr Y, const size_t incY,
+ const size_t kg_mc, const size_t kg_mb, const size_t kg_j );
+
+ template <class Field, class Polynomial>
+ std::list<Polynomial>&
+ LUKrylov( const Field& F, std::list<Polynomial>& charp, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr U, const size_t ldu);
+
+ template <class Field, class Polynomial>
+ std::list<Polynomial>&
+ Danilevski (const Field& F, std::list<Polynomial>& charp,
+ const size_t N, typename Field::Element_ptr A, const size_t lda);
+
+ template <class Field, class Polynomial>
+ std::list<Polynomial>&
+ LUKrylov_KGFast( const Field& F, std::list<Polynomial>& charp, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx);
+ } // Protected
+} // FFPACK charpoly
+// #include "ffpack_charpoly_kglu.inl"
+// #include "ffpack_charpoly_kgfast.inl"
+// #include "ffpack_charpoly_kgfastgeneralized.inl"
+// #include "ffpack_charpoly_danilevski.inl"
+// #include "ffpack_charpoly.inl"
+
+namespace FFPACK { /* frobenius, charpoly */
+
+ template <class Field, class Polynomial>
+ std::list<Polynomial>&
+ CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
+ const size_t N, typename Field::Element_ptr A, const size_t lda, const size_t c);
- Polynomial P = *(it++);
- while( it!=factor_list.end() ){
- mulpoly (F,charp, P, *it);
- P = charp;
- // delete &(*it);
- ++it;
- }
- return charp;
+} // FFPACK frobenius
+// #include "ffpack_frobenius.inl"
+namespace FFPACK { /* minpoly */
- }
/**********************/
/* MINIMAL POLYNOMIAL */
@@ -1554,291 +841,610 @@ else { // Left NullSpace
template <class Field, class Polynomial>
Polynomial&
MinPoly( const Field& F, Polynomial& minP, const size_t N,
- const typename Field::Element *A, const size_t lda,
- typename Field::Element* X, const size_t ldx, size_t* P,
- const FFPACK::FFPACK_MINPOLY_TAG MinTag= FFPACK::FfpackDense,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx, size_t* P,
+ const FFPACK_MINPOLY_TAG MinTag= FFPACK::FfpackDense,
const size_t kg_mc=0, const size_t kg_mb=0, const size_t kg_j=0 );
+} // FFPACK minpoly
+// #include "ffpack_minpoly.inl"
- //! Solve L X = B or X L = B in place.
- //! L is M*M if Side == FFLAS::FflasLeft and N*N if Side == FFLAS::FflasRight, B is M*N.
- //! Only the R non trivial column of L are stored in the M*R matrix L
- //! Requirement : so that L could be expanded in-place
- template<class Field>
- void
- solveLB( const Field& F, const FFLAS::FFLAS_SIDE Side,
- const size_t M, const size_t N, const size_t R,
- typename Field::Element * L, const size_t ldl,
- const size_t * Q,
- typename Field::Element * B, const size_t ldb )
- {
-
- size_t LM = (Side == FFLAS::FflasRight)?N:M;
- int i = (int)R ;
- for (; i--; ){ // much faster for
- if ( Q[i] > (size_t) i){
- //for (size_t j=0; j<=Q[i]; ++j)
- //F.init( *(L+Q[i]+j*ldl), 0 );
- //std::cerr<<"1 deplacement "<<i<<"<-->"<<Q[i]<<endl;
- FFLAS::fcopy( F, LM-Q[i]-1, L+Q[i]*(ldl+1)+ldl,ldl, L+(Q[i]+1)*ldl+i, ldl );
- for ( size_t j=Q[i]*ldl; j<LM*ldl; j+=ldl)
- F.assign( *(L+i+j), F.zero );
- }
- }
- ftrsm( F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M, N, F.one, L, ldl , B, ldb);
- //write_field(F,std::cerr<<"dans solveLB "<<endl,L,N,N,ldl);
- // Undo the permutation of L
- for (size_t ii=0; ii<R; ++ii){
- if ( Q[ii] > (size_t) ii){
- //for (size_t j=0; j<=Q[ii]; ++j)
- //F.init( *(L+Q[ii]+j*ldl), 0 );
- FFLAS::fcopy( F, LM-Q[ii]-1, L+(Q[ii]+1)*ldl+ii, ldl, L+Q[ii]*(ldl+1)+ldl,ldl );
- for ( size_t j=Q[ii]*ldl; j<LM*ldl; j+=ldl)
- F.assign( *(L+Q[ii]+j), F.zero );
- }
- }
- }
-
- //! Solve L X = B in place.
- //! L is M*M or N*N, B is M*N.
- //! Only the R non trivial column of L are stored in the M*R matrix L
- template<class Field>
- void
- solveLB2( const Field& F, const FFLAS::FFLAS_SIDE Side,
- const size_t M, const size_t N, const size_t R,
- typename Field::Element * L, const size_t ldl,
- const size_t * Q,
- typename Field::Element * B, const size_t ldb )
- {
- typename Field::Element * Lcurr,* Rcurr,* Bcurr;
- size_t ib, Ldim;
- int k;
- if ( Side == FFLAS::FflasLeft ){
- size_t j = 0;
- while ( j<R ) {
- ib = Q[j];
- k = (int)ib ;
- while ((j<R) && ( (int) Q[j] == k) ) {k++;j++;}
- Ldim = (size_t)k-ib;
- Lcurr = L + j-Ldim + ib*ldl;
- Bcurr = B + ib*ldb;
- Rcurr = Lcurr + Ldim*ldl;
-
- ftrsm( F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, Ldim, N, F.one,
- Lcurr, ldl , Bcurr, ldb );
-
- fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-k, N, Ldim, F.mOne,
- Rcurr , ldl, Bcurr, ldb, F.one, Bcurr+Ldim*ldb, ldb);
- }
- }
- else{ // Side == FFLAS::FflasRight
- int j=(int)R-1;
- while ( j >= 0 ) {
- ib = Q[j];
- k = (int) ib;
- while ( (j >= 0) && ( (int)Q[j] == k) ) {--k;--j;}
- Ldim = ib-(size_t)k;
- Lcurr = L + j+1 + (k+1)*ldl;
- Bcurr = B + ib+1;
- Rcurr = Lcurr + Ldim*ldl;
-
- fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M, Ldim, N-ib-1, F.mOne,
- Bcurr, ldb, Rcurr, ldl, F.one, Bcurr-Ldim, ldb);
-
- ftrsm (F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M, Ldim, F.one,
- Lcurr, ldl , Bcurr-Ldim, ldb );
- }
- }
- }
-
-
- template<class Field>
- void trinv_left( const Field& F, const size_t N, const typename Field::Element * L, const size_t ldl,
- typename Field::Element * X, const size_t ldx )
- {
- for (size_t i=0; i<N; ++i)
- FFLAS::fcopy (F, N, X+i*ldx, 1, L+i*ldl, 1);
- ftrtri (F, FFLAS::FflasLower, FFLAS::FflasUnit, N, X, ldx);
- //invL(F,N,L,ldl,X,ldx);
- }
+namespace FFPACK { /* Krylov Elim */
template <class Field>
size_t KrylovElim( const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, size_t*P,
- size_t *Q, const size_t deg, size_t *iterates, size_t * inviterates, const size_t maxit,size_t virt);
+ typename Field::Element_ptr A, const size_t lda, size_t*P,
+ size_t *Q, const size_t deg, size_t *iterates, size_t * inviterates, const size_t maxit,size_t virt);
template <class Field>
size_t SpecRankProfile (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, const size_t deg, size_t *rankProfile);
- template <class Field, class Polynomial>
- std::list<Polynomial>&
- CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
- const size_t N, typename Field::Element * A, const size_t lda, const size_t c);
-
- template <class Field>
- void CompressRows (Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
- const size_t * d, const size_t nb_blocs);
-
- template <class Field>
- void CompressRowsQK (Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
- const size_t * d,const size_t deg, const size_t nb_blocs);
-
- template <class Field>
- void DeCompressRows (Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
- const size_t * d, const size_t nb_blocs);
- template <class Field>
- void DeCompressRowsQK (Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
- const size_t * d, const size_t deg, const size_t nb_blocs);
-
- template <class Field>
- void CompressRowsQA (Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
- const size_t * d, const size_t nb_blocs);
- template <class Field>
- void DeCompressRowsQA (Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
- const size_t * d, const size_t nb_blocs);
+ typename Field::Element_ptr A, const size_t lda, const size_t deg, size_t *rankProfile);
+} // FFPACK KrylovElim
+// #include "ffpack_krylovelim.inl"
- namespace Protected {
+namespace FFPACK { /* Solutions */
+ /********/
+ /* RANK */
+ /********/
- // Subroutine for Keller-Gehrig charpoly algorithm
- // Compute the new d after a LSP ( d[i] can be zero )
- template<class Field>
- size_t
- newD( const Field& F, size_t * d, bool& KeepOn,
- const size_t l, const size_t N,
- typename Field::Element * X,
- const size_t* Q,
- std::vector<std::vector<typename Field::Element> >& minpt);
- template<class Field>
- size_t
- updateD(const Field& F, size_t * d, size_t k,
- std::vector<std::vector<typename Field::Element> >& minpt );
+ /** Computes the rank of the given matrix using a LQUP factorization.
+ * The input matrix is modified.
+ * @param F field
+ * @param M row dimension of the matrix
+ * @param N column dimension of the matrix
+ * @param A input matrix
+ * @param lda leading dimension of A
+ */
+ template <class Field>
+ size_t
+ Rank( const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda) ;
- //---------------------------------------------------------------------
- // RectangleCopyTURBO: Copy A to T, with respect to the row permutation
- // defined by the lsp factorization of located in
- // A-dist2pivot
- //---------------------------------------------------------------------
- template <class Field>
- void
- RectangleCopyTURBO( const Field& F, const size_t M, const size_t N,
- const size_t dist2pivot, const size_t rank,
- typename Field::Element * T, const size_t ldt,
- const typename Field::Element * A, const size_t lda )
- {
-
- const typename Field::Element * Ai = A;
- typename Field::Element * T1i = T, T2i = T + rank*ldt;
- size_t x = dist2pivot;
- for (; Ai<A+M*lda; Ai+=lda){
- while ( F.isZero(*(Ai-x)) ) { // test if the pivot is 0
- FFLAS::fcopy( F, N, T2i, 1, Ai, 1);
- Ai += lda;
- T2i += ldt;
- }
- FFLAS::fcopy( F, N, T1i, 1, Ai, 1);
- T1i += ldt;
- x--;
- }
- }
+ /********/
+ /* DET */
+ /********/
+ /** Returns true if the given matrix is singular.
+ * The method is a block elimination with early termination
+ *
+ * using LQUP factorization with early termination.
+ * If <code>M != N</code>,
+ * then the matrix is virtually padded with zeros to make it square and
+ * it's determinant is zero.
+ * @warning The input matrix is modified.
+ * @param F field
+ * @param M row dimension of the matrix
+ * @param N column dimension of the matrix.
+ * @param [in,out] A input matrix
+ * @param lda leading dimension of A
+ */
+ template <class Field>
+ bool
+ IsSingular( const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda);
- //---------------------------------------------------------------------
- // LUdivine_construct: (Specialisation of LUdivine)
- // LUP factorisation of X, the Krylov base matrix of A^t and v, in A.
- // X contains the nRowX first vectors v, vA, .., vA^{nRowX-1}
- // A contains the LUP factorisation of the nUsedRowX first row of X.
- // When all rows of X have been factorized in A, and rank is full,
- // then X is updated by the following scheme: X <= ( X; X.B ), where
- // B = A^2^i.
- // This enables to make use of Matrix multiplication, and stop computing
- // Krylov vector, when the rank is not longer full.
- // P is the permutation matrix stored in an array of indexes
- //---------------------------------------------------------------------
+ /** @brief Returns the determinant of the given matrix.
+ * @details The method is a block elimination with early termination
+ * using LQUP factorization with early termination.
+ * If <code>M != N</code>,
+ * then the matrix is virtually padded with zeros to make it square and
+ * it's determinant is zero.
+ * @warning The input matrix is modified.
+ * @param F field
+ * @param M row dimension of the matrix
+ * @param N column dimension of the matrix.
+ * @param [in,out] A input matrix
+ * @param lda leading dimension of A
+ */
+ template <class Field>
+ typename Field::Element
+ Det( const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda);
- template <class Field>
- size_t
- LUdivine_construct( const Field& F, const FFLAS::FFLAS_DIAG Diag,
- const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * X, const size_t ldx,
- typename Field::Element * u, size_t* P,
- bool computeX, const FFPACK_MINPOLY_TAG MinTag= FFPACK::FfpackDense
- , const size_t kg_mc =0
- , const size_t kg_mb =0
- , const size_t kg_j =0
- );
+ /*********/
+ /* SOLVE */
+ /*********/
- template <class Field, class Polynomial>
- std::list<Polynomial>&
- KellerGehrig( const Field& F, std::list<Polynomial>& charp, const size_t N,
- const typename Field::Element * A, const size_t lda );
- template <class Field, class Polynomial>
- int
- KGFast ( const Field& F, std::list<Polynomial>& charp, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t * kg_mc, size_t* kg_mb, size_t* kg_j );
+ /// Solve linear system using LQUP factorization.
+ template <class Field>
+ typename Field::Element_ptr
+ Solve( const Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr x, const int incx,
+ typename Field::ConstElement_ptr b, const int incb );
- template <class Field, class Polynomial>
- std::list<Polynomial>&
- KGFast_generalized (const Field& F, std::list<Polynomial>& charp,
- const size_t N,
- typename Field::Element * A, const size_t lda);
+ //! Solve L X = B or X L = B in place.
+ //! L is M*M if Side == FFLAS::FflasLeft and N*N if Side == FFLAS::FflasRight, B is M*N.
+ //! Only the R non trivial column of L are stored in the M*R matrix L
+ //! Requirement : so that L could be expanded in-place
+ template<class Field>
+ void
+ solveLB( const Field& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::Element_ptr L, const size_t ldl,
+ const size_t * Q,
+ typename Field::Element_ptr B, const size_t ldb );
+
+ //! Solve L X = B in place.
+ //! L is M*M or N*N, B is M*N.
+ //! Only the R non trivial column of L are stored in the M*R matrix L
+ template<class Field>
+ void
+ solveLB2( const Field& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::Element_ptr L, const size_t ldl,
+ const size_t * Q,
+ typename Field::Element_ptr B, const size_t ldb );
- template<class Field>
- void
- fgemv_kgf( const Field& F, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * X, const size_t incX,
- typename Field::Element * Y, const size_t incY,
- const size_t kg_mc, const size_t kg_mb, const size_t kg_j );
- template <class Field, class Polynomial>
- std::list<Polynomial>&
- LUKrylov( const Field& F, std::list<Polynomial>& charp, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * U, const size_t ldu);
+ /*************/
+ /* NULLSPACE */
+ /*************/
- template <class Field, class Polynomial>
- std::list<Polynomial>&
- Danilevski (const Field& F, std::list<Polynomial>& charp,
- const size_t N, typename Field::Element * A, const size_t lda);
+ /** Computes a vector of the Left/Right nullspace of the matrix A.
+ *
+ * @param F The computation domain
+ * @param Side
+ * @param M
+ * @param N
+ * @param[in,out] A input matrix of dimension M x N, A is modified to its LU version
+ * @param lda
+ * @param[out] X output vector
+ * @param incX
+ *
+ */
+ template <class Field>
+ void RandomNullSpaceVector (const Field& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t incX);
+
+ /** Computes a basis of the Left/Right nullspace of the matrix A.
+ * return the dimension of the nullspace.
+ *
+ * @param F The computation domain
+ * @param Side
+ * @param M
+ * @param N
+ * @param[in,out] A input matrix of dimension M x N, A is modified
+ * @param lda
+ * @param[out] NS output matrix of dimension N x NSdim (allocated here)
+ * @param[out] ldn
+ * @param[out] NSdim the dimension of the Nullspace (N-rank(A))
+ *
+ */
+ template <class Field>
+ size_t NullSpaceBasis (const Field& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr& NS, size_t& ldn,
+ size_t& NSdim);
+
+ /*****************/
+ /* RANK PROFILES */
+ /*****************/
+
+ /** @brief Computes the row rank profile of A.
+ *
+ * @param F
+ * @param M
+ * @param N
+ * @param A input matrix of dimension M x N
+ * @param lda
+ * @param rkprofile return the rank profile as an array of row indexes, of dimension r=rank(A)
+ * @param LuTag: chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
+ *
+ * A is modified
+ * rkprofile is allocated during the computation.
+ * @returns R
+ */
+ template <class Field>
+ size_t RowRankProfile (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* &rkprofile,
+ const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
+
+
+ /** @brief Computes the column rank profile of A.
+ *
+ * @param F
+ * @param M
+ * @param N
+ * @param A input matrix of dimension
+ * @param lda
+ * @param rkprofile return the rank profile as an array of row indexes, of dimension r=rank(A)
+ * @param LuTag: chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
+ *
+ * A is modified
+ * rkprofile is allocated during the computation.
+ * @returns R
+ */
+ template <class Field>
+ size_t ColumnRankProfile (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* &rkprofile,
+ const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
+
+ /** @brief Recovers the column/row rank profile from the permutation of an LU decomposition.
+ *
+ * Works with both the CUP/PLE decompositions (obtained by LUdivine) or the PLUQ decomposition
+ * Assumes that the output vector containing the rank profile is already allocated.
+ * @param P the permutation carrying the rank profile information
+ * @param N the row/col dimension for a row/column rank profile
+ * @param R the rank of the matrix (
+ * @param rkprofile return the rank profile as an array of indices
+ * @param LuTag: chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
+ *
+ * A is modified
+ *
+ */
+ void RankProfileFromLU (const size_t* P, const size_t N, const size_t R,
+ size_t* rkprofile, const FFPACK_LU_TAG LuTag);
+
+ /** @brief Recovers the row and column rank profiles of any leading submatrix from the PLUQ decomposition.
+ *
+ * Only works with the PLUQ decomposition
+ * Assumes that the output vectors containing the rank profiles are already allocated.
+ *
+ * @param P the permutation carrying the rank profile information
+ * @param M the row dimension of the initial matrix
+ * @param N the column dimension of the initial matrix
+ * @param R the rank of the initial matrix
+ * @param LSm the row dimension of the leading submatrix considered
+ * @param LSn the column dimension of the leading submatrix considered
+ * @param P the row permutation of the PLUQ decomposition
+ * @param Q the column permutation of the PLUQ decomposition
+ * @param RRP return the row rank profile of the leading
+ * @param LuTag: chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
+ * @return the rank of the LSm x LSn leading submatrix
+ *
+ * A is modified
+ * @bib
+ * - Dumas J-G., Pernet C., and Sultan Z. <i>\c Simultaneous computation of the row and column rank profiles </i>, ISSAC'13.
+ */
+ size_t LeadingSubmatrixRankProfiles (const size_t M, const size_t N, const size_t R,
+ const size_t LSm, const size_t LSn,
+ const size_t* P, const size_t* Q,
+ size_t* RRP, size_t* CRP);
+ /** RowRankProfileSubmatrixIndices.
+ * Computes the indices of the submatrix r*r X of A whose rows correspond to
+ * the row rank profile of A.
+ *
+ * @param F
+ * @param M
+ * @param N
+ * @param A input matrix of dimension
+ * @param rowindices array of the row indices of X in A
+ * @param colindices array of the col indices of X in A
+ * @param lda
+ * @param[out] R
+ *
+ * rowindices and colindices are allocated during the computation.
+ * A is modified
+ * @returns R
+ */
+ template <class Field>
+ size_t RowRankProfileSubmatrixIndices (const Field& F,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A,
+ const size_t lda,
+ size_t*& rowindices,
+ size_t*& colindices,
+ size_t& R);
+
+ /** Computes the indices of the submatrix r*r X of A whose columns correspond to
+ * the column rank profile of A.
+ *
+ * @param F
+ * @param M
+ * @param N
+ * @param A input matrix of dimension
+ * @param rowindices array of the row indices of X in A
+ * @param colindices array of the col indices of X in A
+ * @param lda
+ * @param[out] R
+ *
+ * rowindices and colindices are allocated during the computation.
+ * @warning A is modified
+ * \return R
+ */
+ template <class Field>
+ size_t ColRankProfileSubmatrixIndices (const Field& F,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A,
+ const size_t lda,
+ size_t*& rowindices,
+ size_t*& colindices,
+ size_t& R);
+
+ /** Computes the r*r submatrix X of A, by picking the row rank profile rows of A.
+ *
+ * @param F
+ * @param M
+ * @param N
+ * @param A input matrix of dimension M x N
+ * @param lda
+ * @param X the output matrix
+ * @param[out] R
+ *
+ * A is not modified
+ * X is allocated during the computation.
+ * @return R
+ */
+ template <class Field>
+ size_t RowRankProfileSubmatrix (const Field& F,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A,
+ const size_t lda,
+ typename Field::Element_ptr& X, size_t& R);
+
+ /** Compute the \f$ r\times r\f$ submatrix X of A, by picking the row rank profile rows of A.
+ *
+ *
+ * @param F
+ * @param M
+ * @param N
+ * @param A input matrix of dimension M x N
+ * @param lda
+ * @param X the output matrix
+ * @param[out] R
+ *
+ * A is not modified
+ * X is allocated during the computation.
+ * \returns R
+ */
+ template <class Field>
+ size_t ColRankProfileSubmatrix (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr& X, size_t& R);
+
+ /*********************************************/
+ /* Accessors to Triangular and Echelon forms */
+ /*********************************************/
+
+ /** Extracts a triangular matrix from a compact storage A=L\U of rank R.
+ * if OnlyNonZeroVectors is false, then T and A have the same dimensions
+ * Otherwise, T is R x N if UpLo = FflasUpper, else T is M x R
+ * @param F: base field
+ * @param UpLo: selects if the upper or lower triangular matrix is returned
+ * @param diag: selects if the triangular matrix unit-diagonal
+ * @param M: row dimension of T
+ * @param N: column dimension of T
+ * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
+ * @param A: input matrix
+ * @param lda: leading dimension of A
+ * @param T: output matrix
+ * @param ldt: leading dimension of T
+ * @param OnlyNonZeroVectors: decides whether the last zero rows/columns should be ignored
+ */
+ template <class Field>
+ void
+ getTriangular (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const bool OnlyNonZeroVectors = false);
+
+ /** Cleans up a compact storage A=L\U to reveal a triangular matrix of rank R.
+ * @param F: base field
+ * @param UpLo: selects if the upper or lower triangular matrix is revealed
+ * @param diag: selects if the triangular matrix unit-diagonal
+ * @param M: row dimension of A
+ * @param N: column dimension of A
+ * @param R: rank of the triangular matrix
+ * @param A: input/output matrix
+ * @param lda: leading dimension of A
+ */
+ template <class Field>
+ void
+ getTriangular (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::Element_ptr A, const size_t lda);
+
+ /** Extracts a matrix in echelon form from a compact storage A=L\U of rank R obtained by
+ * RowEchelonForm or ColumnEchelonForm.
+ * Either L or U is in Echelon form (depending on Uplo)
+ * The echelon structure is defined by the first R values of the array P.
+ * row and column dimension of T are greater or equal to that of A
+ * @param F: base field
+ * @param UpLo: selects if the upper or lower triangular matrix is returned
+ * @param diag: selects if the echelon matrix has unit pivots
+ * @param M: row dimension of T
+ * @param N: column dimension of T
+ * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
+ * @param P: positions of the R pivots
+ * @param A: input matrix
+ * @param lda: leading dimension of A
+ * @param T: output matrix
+ * @param ldt: leading dimension of T
+ * @param OnlyNonZeroVectors: decides whether the last zero rows/columns should be ignored
+ * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
+ */
+ template <class Field>
+ void
+ getEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const bool OnlyNonZeroVectors = false,
+ const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
+
+ /** Cleans up a compact storage A=L\U obtained by RowEchelonForm or ColumnEchelonForm
+ * to reveal an echelon form of rank R.
+ * Either L or U is in Echelon form (depending on Uplo)
+ * The echelon structure is defined by the first R values of the array P.
+ * @param F: base field
+ * @param UpLo: selects if the upper or lower triangular matrix is returned
+ * @param diag: selects if the echelon matrix has unit pivots
+ * @param M: row dimension of A
+ * @param N: column dimension of A
+ * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
+ * @param P: positions of the R pivots
+ * @param A: input/output matrix
+ * @param lda: leading dimension of A
+ * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
+ */
+ template <class Field>
+ void
+ getEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ typename Field::Element_ptr A, const size_t lda,
+ const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
+
+ /** Extracts a transformation matrix to echelon form from a compact storage A=L\U
+ * of rank R obtained by RowEchelonForm or ColumnEchelonForm.
+ * If Uplo == FflasLower:
+ * T is N x N (already allocated) such that A T = C is a transformation of A in
+ * Column echelon form
+ * Else
+ * T is M x M (already allocated) such that T A = E is a transformation of A in
+ * Row Echelon form
+ * @param F: base field
+ * @param UpLo: Lower means Transformation to Column Echelon Form, Upper, to Row Echelon Form
+ * @param diag: selects if the echelon matrix has unit pivots
+ * @param M: row dimension of A
+ * @param N: column dimension of A
+ * @param R: rank of the triangular matrix
+ * @param P: permutation matrix
+ * @param A: input matrix
+ * @param lda: leading dimension of A
+ * @param T: output matrix
+ * @param ldt: leading dimension of T
+ * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
+ */
+ template <class Field>
+ void
+ getEchelonTransform (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
+ /** Extracts a matrix in echelon form from a compact storage A=L\U of rank R obtained by
+ * ReducedRowEchelonForm or ReducedColumnEchelonForm with transform = true.
+ * Either L or U is in Echelon form (depending on Uplo)
+ * The echelon structure is defined by the first R values of the array P.
+ * row and column dimension of T are greater or equal to that of A
+ * @param F: base field
+ * @param UpLo: selects if the upper or lower triangular matrix is returned
+ * @param diag: selects if the echelon matrix has unit pivots
+ * @param M: row dimension of T
+ * @param N: column dimension of T
+ * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
+ * @param P: positions of the R pivots
+ * @param A: input matrix
+ * @param lda: leading dimension of A
+ * @param ldt: leading dimension of T
+ * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
+ * @param OnlyNonZeroVectors: decides whether the last zero rows/columns should be ignored
+ */
+ template <class Field>
+ void
+ getReducedEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const bool OnlyNonZeroVectors = false,
+ const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
+
+ /** Cleans up a compact storage A=L\U of rank R obtained by ReducedRowEchelonForm or
+ * ReducedColumnEchelonForm with transform = true.
+ * Either L or U is in Echelon form (depending on Uplo)
+ * The echelon structure is defined by the first R values of the array P.
+ * @param F: base field
+ * @param UpLo: selects if the upper or lower triangular matrix is returned
+ * @param diag: selects if the echelon matrix has unit pivots
+ * @param M: row dimension of A
+ * @param N: column dimension of A
+ * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
+ * @param P: positions of the R pivots
+ * @param A: input/output matrix
+ * @param lda: leading dimension of A
+ * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
+ */
+ template <class Field>
+ void
+ getReducedEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ typename Field::Element_ptr A, const size_t lda,
+ const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
+
+ /** Extracts a transformation matrix to echelon form from a compact storage A=L\U
+ * of rank R obtained by RowEchelonForm or ColumnEchelonForm.
+ * If Uplo == FflasLower:
+ * T is N x N (already allocated) such that A T = C is a transformation of A in
+ * Column echelon form
+ * Else
+ * T is M x M (already allocated) such that T A = E is a transformation of A in
+ * Row Echelon form
+ * @param F: base field
+ * @param UpLo: selects Col or Row Echelon Form
+ * @param diag: selects if the echelon matrix has unit pivots
+ * @param M: row dimension of A
+ * @param N: column dimension of A
+ * @param R: rank of the triangular matrix
+ * @param P: permutation matrix
+ * @param A: input matrix
+ * @param lda: leading dimension of A
+ * @param T: output matrix
+ * @param ldt: leading dimension of T
+ * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
+ */
+ template <class Field>
+ void
+ getReducedEchelonTransform (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
+ /** Auxiliary routine: determines the permutation that changes a PLUQ decomposition
+ * into a echelon form revealing PLUQ decomposition
+ */
+ void
+ PLUQtoEchelonPermutation (const size_t N, const size_t R, const size_t * P, size_t * outPerm);
- template <class Field, class Polynomial>
- std::list<Polynomial>&
- LUKrylov_KGFast( const Field& F, std::list<Polynomial>& charp, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * X, const size_t ldx);
- } // Protected
} // FFPACK
+// #include "ffpack.inl"
+namespace FFPACK { /* not used */
+
+ /** LQUPtoInverseOfFullRankMinor.
+ * Suppose A has been factorized as L.Q.U.P, with rank r.
+ * Then Qt.A.Pt has an invertible leading principal r x r submatrix
+ * This procedure efficiently computes the inverse of this minor and puts it into X.
+ * @note It changes the lower entries of A_factors in the process (NB: unless A was nonsingular and square)
+ *
+ * @param F
+ * @param rank rank of the matrix.
+ * @param A_factors matrix containing the L and U entries of the factorization
+ * @param lda
+ * @param QtPointer theLQUP->getQ()->getPointer() (note: getQ returns Qt!)
+ * @param X desired location for output
+ * @param ldx
+ */
+ template <class Field>
+ typename Field::Element_ptr
+ LQUPtoInverseOfFullRankMinor( const Field& F, const size_t rank,
+ typename Field::Element_ptr A_factors, const size_t lda,
+ const size_t* QtPointer,
+ typename Field::Element_ptr X, const size_t ldx);
+
+} // FFPACK
+// include precompiled instantiation headers (avoiding to recompile them)
+#ifdef FFPACK_COMPILED
+#include "fflas-ffpack/interfaces/libs/ffpack_inst.h"
+#endif
+
+#include "ffpack_fgesv.inl"
+#include "ffpack_fgetrs.inl"
+#include "ffpack_ftrtr.inl"
+#include "ffpack_pluq.inl"
+#include "ffpack_pluq_mp.inl"
+#include "ffpack_ppluq.inl"
#include "ffpack_ludivine.inl"
-#include "ffpack_minpoly.inl"
+#include "ffpack_ludivine_mp.inl"
+#include "ffpack_echelonforms.inl"
+#include "ffpack_invert.inl"
#include "ffpack_charpoly_kglu.inl"
#include "ffpack_charpoly_kgfast.inl"
#include "ffpack_charpoly_kgfastgeneralized.inl"
#include "ffpack_charpoly_danilevski.inl"
#include "ffpack_charpoly.inl"
-#include "ffpack_krylovelim.inl"
#include "ffpack_frobenius.inl"
-#include "ffpack_echelonforms.inl"
-
+#include "ffpack_minpoly.inl"
+#include "ffpack_krylovelim.inl"
+#include "ffpack_permutation.inl"
+#include "ffpack_rankprofiles.inl"
+#include "ffpack.inl"
#endif // __FFLASFFPACK_ffpack_H
diff --git a/fflas-ffpack/ffpack/ffpack.inl b/fflas-ffpack/ffpack/ffpack.inl
new file mode 100644
index 0000000..936808d
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack.inl
@@ -0,0 +1,401 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* ffpack.inl
+ * Copyright (C) 2014 FFLAS-FFACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_INL
+#define __FFLASFFPACK_ffpack_INL
+
+namespace FFPACK {
+
+
+template <class Field>
+size_t
+Rank (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda)
+{
+ if (M == 0 and N == 0)
+ return 0 ;
+
+ size_t *P = FFLAS::fflas_new<size_t>(N);
+ size_t *Q = FFLAS::fflas_new<size_t>(M);
+ size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q);
+ FFLAS::fflas_delete( Q);
+ FFLAS::fflas_delete( P);
+ return R;
+}
+
+template <class Field>
+bool
+IsSingular (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda)
+{
+ if ( (M==0) and (N==0) ) return false;
+ if ( (M==0) or (N==0) ) return true;
+ if ( M != N ) return true ;
+
+
+ size_t *P = FFLAS::fflas_new<size_t>(N);
+ size_t *Q = FFLAS::fflas_new<size_t>(M);
+ bool singular = !LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q, FfpackSingular);
+
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ return singular;
+ }
+
+template <class Field>
+typename Field::Element
+Det( const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda)
+{
+ if ( (M==0) and (N==0) )
+ return F.one ;
+ if ( (M==0) or (N==0) )
+ return F.zero ;
+ if ( M != N )
+ return F.zero ;
+
+ typename Field::Element det; F.init(det);
+ bool singular;
+ size_t *P = FFLAS::fflas_new<size_t>(N);
+ size_t *Q = FFLAS::fflas_new<size_t>(M);
+ singular = !LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N,
+ A, lda, P, Q, FfpackSingular);
+ if (singular){
+ F.assign(det,F.zero);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ return det;
+ }
+ else{
+ F.assign(det,F.one);
+ typename Field::Element_ptr Ai=A;
+ for (; Ai < A+ M*lda+N; Ai+=lda+1 )
+ F.mulin( det, *Ai );
+ int count=0;
+ for (size_t i=0;i<N;++i)
+ if (P[i] != i) ++count;
+
+ if ((count&1) == 1)
+ F.negin(det);
+ }
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ return det;
+}
+
+template <class Field>
+typename Field::Element_ptr
+Solve( const Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr x, const int incx,
+ typename Field::ConstElement_ptr b, const int incb )
+{
+
+ size_t *P = FFLAS::fflas_new<size_t>(M);
+ size_t *rowP = FFLAS::fflas_new<size_t>(M);
+
+ if (LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, M, A, lda, P, rowP) < M){
+ std::cerr<<"SINGULAR MATRIX"<<std::endl;
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( rowP);
+ return x;
+ }
+ else{
+ FFLAS::fassign( F, M, b, incb, x, incx );
+
+ ftrsv(F, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M,
+ A, lda , x, incx);
+ ftrsv(F, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, M,
+ A, lda , x, incx);
+ applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans,
+ M, 0,(int) M, x, incx, P );
+ FFLAS::fflas_delete( rowP);
+ FFLAS::fflas_delete( P);
+
+ return x;
+
+ }
+}
+
+template <class Field>
+void RandomNullSpaceVector (const Field& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t incX)
+{
+ // Right kernel vector: X s.t. AX == 0
+ if (Side == FFLAS::FflasRight) {
+ size_t* P = FFLAS::fflas_new<size_t>(N);
+ size_t* Qt = FFLAS::fflas_new<size_t>(M);
+
+ size_t R = LUdivine(F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Qt);
+ FFLAS::fflas_delete(Qt);
+
+ // Nullspace is {0}
+ if (N == R) {
+ FFLAS::fzero(F, N, X, incX);
+ FFLAS::fflas_delete(P);
+ return;
+ }
+
+ // We create t (into X) not null such that U * t == 0, i.e. U1 * t1 == -U2 * t2
+
+ // Random after rank is passed (t2)
+ typename Field::RandIter g(F);
+ for (size_t i = R; i < N; ++i)
+ g.random(*(X + i * incX));
+
+ // Nullspace is total, any random vector would do
+ if (R == 0) {
+ FFLAS::fflas_delete(P);
+ return;
+ }
+
+ // Compute -U2 * t2 (into t1 as temporary)
+ FFLAS::fgemv(F, FFLAS::FflasNoTrans, R, N - R,
+ F.mOne, A + R, lda, X + R * incX, incX, 0u, X, incX);
+
+ // Now get t1 such that U1 * t1 == -U2 * t2
+ FFLAS::ftrsv(F, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, R,
+ A, lda, X, (int)incX);
+
+ applyP(F, FFLAS::FflasLeft, FFLAS::FflasTrans, 1u, 0u, (int) R, X, 1u, P);
+
+ FFLAS::fflas_delete(P);
+ }
+
+ // Left kernel vector
+ else {
+ size_t* P = FFLAS::fflas_new<size_t>(M);
+ size_t* Qt = FFLAS::fflas_new<size_t>(N);
+
+ size_t R = LUdivine(F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Qt);
+ FFLAS::fflas_delete(Qt);
+
+ // Nullspace is {0}
+ if (M == R) {
+ FFLAS::fzero(F, M, X, incX);
+ FFLAS::fflas_delete(P);
+ return;
+ }
+
+ // We create t (into X) not null such that t * L == 0, i.e. t1 * L1 == -t2 * L2
+
+ // Random after rank is passed (t2)
+ typename Field::RandIter g(F);
+ for (size_t i = R; i < M; ++i)
+ g.random(*(X + i * incX));
+
+ // Nullspace is total, any random vector would do
+ if (R == 0) {
+ FFLAS::fflas_delete(P);
+ return;
+ }
+
+ // Compute -t2 * L2 (into t1 as temporary)
+ FFLAS::fgemv(F, FFLAS::FflasTrans, M - R, R,
+ F.mOne, A + R * lda, lda, X + R * incX, incX, 0u, X, incX);
+
+ // Now get t1 such that t1 * L1 == -t2 * L2
+ FFLAS::ftrsv(F, FFLAS::FflasLower, FFLAS::FflasTrans, FFLAS::FflasNonUnit, R,
+ A, lda, X, (int)incX);
+
+ applyP(F, FFLAS::FflasRight, FFLAS::FflasNoTrans, 1u, 0u, (int) R, X, 1u, P);
+
+ FFLAS::fflas_delete(P);
+ }
+}
+
+template <class Field>
+size_t NullSpaceBasis (const Field& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr& NS, size_t& ldn,
+ size_t& NSdim)
+{
+ if (Side == FFLAS::FflasRight) { // Right NullSpace
+ size_t* P = FFLAS::fflas_new<size_t>(N);
+ size_t* Qt = FFLAS::fflas_new<size_t>(M);
+
+ size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Qt);
+ delete [] Qt;
+
+ ldn = N-R;
+ NSdim = ldn;
+
+ if (NSdim == 0) {
+ FFLAS::fflas_delete( P);
+ NS = NULL ;
+ return NSdim ;
+ }
+
+ NS = FFLAS::fflas_new (F, N, ldn);
+
+ if (R == 0) {
+ FFLAS::fflas_delete( P);
+ FFLAS::fidentity(F,N,ldn,NS,ldn);
+ return NSdim;
+ }
+
+ FFLAS::fassign (F, R, ldn, A + R, lda, NS , ldn );
+
+ ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, R, ldn,
+ F.mOne, A, lda, NS, ldn);
+
+ FFLAS::fidentity(F,NSdim,NSdim,NS+R*ldn,ldn);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ NSdim, 0,(int) R, NS, ldn, P);
+
+ delete [] P;
+
+ return NSdim;
+ }
+ else { // Left NullSpace
+ size_t* P = FFLAS::fflas_new<size_t>(M);
+ size_t* Qt = FFLAS::fflas_new<size_t>(N);
+
+ size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Qt);
+ delete [] Qt;
+
+ ldn = M;
+ NSdim = M-R;
+
+ if (NSdim == 0) {
+ FFLAS::fflas_delete( P);
+ NS = NULL;
+ return NSdim;
+ }
+
+ NS = FFLAS::fflas_new (F, NSdim, ldn);
+
+ if (R == 0) {
+ FFLAS::fflas_delete( P);
+ FFLAS::fidentity(F,NSdim,ldn,NS,ldn);
+ return NSdim;
+ }
+
+ FFLAS::fassign (F, NSdim, R, A + R *lda, lda, NS, ldn);
+ ftrsm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, NSdim, R, F.mOne, A, lda, NS, ldn);
+
+ FFLAS::fidentity(F,NSdim,NSdim,NS+R,ldn);
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, NSdim, 0,(int) R, NS, ldn, P);
+
+ delete [] P;
+ return NSdim;
+ }
+}
+
+template<class Field>
+void
+solveLB( const Field& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::Element_ptr L, const size_t ldl,
+ const size_t * Q,
+ typename Field::Element_ptr B, const size_t ldb )
+{
+
+ size_t LM = (Side == FFLAS::FflasRight)?N:M;
+ int i = (int)R ;
+ for (; i--; ){ // much faster for
+ if ( Q[i] > (size_t) i){
+ //for (size_t j=0; j<=Q[i]; ++j)
+ //F.init( *(L+Q[i]+j*ldl), 0 );
+ //std::cerr<<"1 deplacement "<<i<<"<-->"<<Q[i]<<endl;
+ FFLAS::fassign( F, LM-Q[i]-1, L+(Q[i]+1)*ldl+i, ldl , L+Q[i]*(ldl+1)+ldl,ldl);
+ for ( size_t j=Q[i]*ldl; j<LM*ldl; j+=ldl)
+ F.assign( *(L+i+j), F.zero );
+ }
+ }
+ ftrsm( F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M, N, F.one, L, ldl , B, ldb);
+ //write_field(F,std::cerr<<"dans solveLB "<<endl,L,N,N,ldl);
+ // Undo the permutation of L
+ for (size_t ii=0; ii<R; ++ii){
+ if ( Q[ii] > (size_t) ii){
+ //for (size_t j=0; j<=Q[ii]; ++j)
+ //F.init( *(L+Q[ii]+j*ldl), 0 );
+ FFLAS::fassign( F, LM-Q[ii]-1, L+Q[ii]*(ldl+1)+ldl,ldl, L+(Q[ii]+1)*ldl+ii, ldl );
+ for ( size_t j=Q[ii]*ldl; j<LM*ldl; j+=ldl)
+ F.assign( *(L+Q[ii]+j), F.zero );
+ }
+ }
+}
+
+template<class Field>
+void
+solveLB2( const Field& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::Element_ptr L, const size_t ldl,
+ const size_t * Q,
+ typename Field::Element_ptr B, const size_t ldb )
+{
+ typename Field::Element_ptr Lcurr, Rcurr, Bcurr;
+ size_t ib, Ldim;
+ int k;
+ if ( Side == FFLAS::FflasLeft ){
+ size_t j = 0;
+ while ( j<R ) {
+ ib = Q[j];
+ k = (int)ib ;
+ while ((j<R) && ( (int) Q[j] == k) ) {k++;j++;}
+ Ldim = (size_t)k-ib;
+ Lcurr = L + j-Ldim + ib*ldl;
+ Bcurr = B + ib*ldb;
+ Rcurr = Lcurr + Ldim*ldl;
+
+ ftrsm( F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, Ldim, N, F.one,
+ Lcurr, ldl , Bcurr, ldb );
+
+ fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-(size_t)k, N, Ldim, F.mOne,
+ Rcurr , ldl, Bcurr, ldb, F.one, Bcurr+Ldim*ldb, ldb);
+ }
+ }
+ else{ // Side == FFLAS::FflasRight
+ int j=(int)R-1;
+ while ( j >= 0 ) {
+ ib = Q[j];
+ k = (int) ib;
+ while ( (j >= 0) && ( (int)Q[j] == k) ) {--k;--j;}
+ Ldim = ib-(size_t)k;
+ Lcurr = L + j+1 + (k+1)*(int)ldl;
+ Bcurr = B + ib+1;
+ Rcurr = Lcurr + Ldim*ldl;
+
+ fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M, Ldim, N-ib-1, F.mOne,
+ Bcurr, ldb, Rcurr, ldl, F.one, Bcurr-Ldim, ldb);
+
+ ftrsm (F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M, Ldim, F.one,
+ Lcurr, ldl , Bcurr-Ldim, ldb );
+ }
+ }
+}
+
+} // FFPACK
+
+#endif // __FFLASFFPACK_ffpack_INL
diff --git a/fflas-ffpack/ffpack/ffpack_charpoly.inl b/fflas-ffpack/ffpack/ffpack_charpoly.inl
index 3af6437..c5765f3 100644
--- a/fflas-ffpack/ffpack/ffpack_charpoly.inl
+++ b/fflas-ffpack/ffpack/ffpack_charpoly.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -32,82 +32,151 @@
namespace FFPACK {
+// template <class FloatElement, class Field, class Polynomial>
+// std::list<typename Polynomial<Element> >&
+// CharPoly_convert (const Field& F, std::list<typename Polynomial<Element> >& charp, const size_t N,
+// typename Field::Element_ptr A, const size_t lda,
+// const FFPACK_CHARPOLY_TAG CharpTag)
+// {
+// Givaro::ModularBalanced<FloatElement> G((FloatElement) F.cardinality());
+// FloatElement* Af = FFLAS::fflas_new<FloatElement>(N*N);
+// typename std::list< Polynomial<FloatElement> > charp_float;
+// fconvert(F, M, N, Af, N, A, lda);
+// //convertir aussi le poly
+// CharPoly (G, charp_float, N, Af, N, CharpTag);
+
+// finit(F, ma, Yf, 1, Y, incY);
+// fflas_delete (Af);
+// return charp;
+// }
template <class Field, class Polynomial>
std::list<Polynomial>&
CharPoly (const Field& F, std::list<Polynomial>& charp, const size_t N,
- typename Field::Element * A, const size_t lda,
+ typename Field::Element_ptr A, const size_t lda,
const FFPACK_CHARPOLY_TAG CharpTag)
{
+ // if (Protected::AreEqual<Field, Givaro::Modular<double> >::value ||
+ // Protected::AreEqual<Field, Givaro::ModularBalanced<double> >::value){
+ // if (F.characteristic() < DOUBLE_TO_FLOAT_CROSSOVER)
+ // return CharPoly_convert <float,Field> (F, charp, N, A, lda, CharpTag);
+ // }
switch (CharpTag) {
- case FfpackLUK:{
- typename Field::Element * X = new typename Field::Element[N*(N+1)];
- Protected::LUKrylov (F, charp, N, A, lda, X, N);
- delete[] X;
- return charp;
- }
- case FfpackKG:{
- return Protected::KellerGehrig (F, charp, N, A, lda);
- // break;
- }
- case FfpackDanilevski:{
- return Danilevski (F, charp, N, A, lda);
- // break;
- }
- case FfpackKGFast:{
- size_t mc, mb, j;
- if (Protected::KGFast (F, charp, N, A, lda, &mc, &mb, &j)){
- std::cerr<<"NON GENERIC MATRIX PROVIDED TO KELLER-GEHRIG-FAST"<<std::endl;
- }
- return charp;
- // break;
- }
- case FfpackKGFastG:{
- return Protected::KGFast_generalized (F, charp, N, A, lda);
- }
- case FfpackHybrid:{
- typename Field::Element * X = new typename Field::Element[N*(N+1)];
- Protected::LUKrylov_KGFast (F, charp, N, A, lda, X, N);
- delete[] X;
- return charp;
- }
- case FfpackArithProg:{
- size_t attempts=0;
- bool cont = false;
- FFLAS_INT_TYPE p;
- F.characteristic(p);
- // Heuristic condition (the pessimistic theoretical one being p<2n^2.
- if ((unsigned long) (p) < N)
- return CharPoly (F, charp, N, A, lda, FfpackLUK);
-
- do{
- try {
- CharpolyArithProg (F, charp, N, A, lda, __FFPACK_CHARPOLY_THRESHOLD);
- }
- catch (CharpolyFailed){
- if (attempts++ < 2)
- cont = true;
- else
- return CharPoly(F, charp, N, A, lda, FfpackLUK);
-
- }
- } while (cont);
- return charp;
- }
- default:{
- typename Field::Element * X = new typename Field::Element[N*(N+1)];
+ case FfpackLUK:
+ {
+ typename Field::Element_ptr X = FFLAS::fflas_new (F, N, N+1);
Protected::LUKrylov (F, charp, N, A, lda, X, N);
- delete[] X;
+ FFLAS::fflas_delete (X);
return charp;
}
+ case FfpackKG:
+ {
+ return Protected::KellerGehrig (F, charp, N, A, lda);
+ // break;
+ }
+ case FfpackDanilevski:
+ {
+ return Danilevski (F, charp, N, A, lda);
+ // break;
+ }
+ case FfpackKGFast:
+ {
+ size_t mc, mb, j;
+ if (Protected::KGFast (F, charp, N, A, lda, &mc, &mb, &j)){
+ std::cerr<<"NON GENERIC MATRIX PROVIDED TO KELLER-GEHRIG-FAST"<<std::endl;
+ }
+ return charp;
+ // break;
+ }
+ case FfpackKGFastG:
+ {
+ return Protected::KGFast_generalized (F, charp, N, A, lda);
+ }
+ case FfpackHybrid:
+ {
+ typename Field::Element_ptr X = FFLAS::fflas_new (F, N, N+1);
+ Protected::LUKrylov_KGFast (F, charp, N, A, lda, X, N);
+ FFLAS::fflas_delete (X);
+ return charp;
+ }
+ case FfpackArithProg:
+ {
+ size_t attempts=0;
+ bool cont = false;
+ const uint64_t p = static_cast<uint64_t>(F.characteristic());
+ // Heuristic condition (the pessimistic theoretical one being p<2n^2.
+ if (p < static_cast<uint64_t>(N)){
+ return CharPoly (F, charp, N, A, lda, FfpackLUK);
+ }
+
+ do{
+ try {
+ CharpolyArithProg (F, charp, N, A, lda, __FFPACK_CHARPOLY_THRESHOLD);
+ }
+ catch (CharpolyFailed){
+ if (attempts++ < 2)
+ cont = true;
+ else
+ return CharPoly(F, charp, N, A, lda, FfpackLUK);
+
+ }
+ } while (cont);
+ return charp;
+ }
+ default:
+ {
+ typename Field::Element_ptr X = FFLAS::fflas_new (F, N, N+1);
+ Protected::LUKrylov (F, charp, N, A, lda, X, N);
+ FFLAS::fflas_delete (X);
+ return charp;
+ }
+ }
+ }
+
+ template<class Polynomial, class Field>
+ Polynomial & mulpoly(const Field& F, Polynomial &res, const Polynomial & P1, const Polynomial & P2)
+ {
+ size_t i,j;
+ // Warning: assumes that res is allocated to the size of the product
+ res.resize(P1.size()+P2.size()-1);
+ FFLAS::fzero(F,res.size(),&res[0],1);
+ for ( i=0;i<P1.size();i++)
+ for ( j=0;j<P2.size();j++)
+ F.axpyin(res[i+j],P1[i],P2[j]);
+ return res;
+ }
+
+ template <class Field, class Polynomial>
+ Polynomial&
+ CharPoly( const Field& F, Polynomial& charp, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ const FFPACK_CHARPOLY_TAG CharpTag/*= FfpackArithProg*/)
+ {
+
+ std::list<Polynomial> factor_list;
+ CharPoly (F, factor_list, N, A, lda, CharpTag);
+ typename std::list<Polynomial >::const_iterator it;
+ it = factor_list.begin();
+
+ charp.resize(N+1);
+
+ Polynomial P = charp = *(it++);
+
+ while( it!=factor_list.end() ){
+ mulpoly (F,charp, P, *it);
+ P = charp;
+ ++it;
}
+
+ return charp;
}
+
namespace Protected {
template <class Field, class Polynomial>
std::list<Polynomial>&
LUKrylov (const Field& F, std::list<Polynomial>& charp, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * X, const size_t ldx)
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx)
{
typedef typename Field::Element elt;
@@ -116,46 +185,48 @@ namespace FFPACK {
charp.clear();
int nbfac = 0;
while (Ncurr > 0){
- size_t *P = new size_t[(size_t)Ncurr];
+ size_t *P = FFLAS::fflas_new<size_t>((size_t)Ncurr);
Polynomial minP;//=new Polynomial();
- FFPACK::MinPoly (F, minP, Ncurr, A, lda, X2, ldx, P);
+ FFPACK::MinPoly (F, minP, (size_t)Ncurr, A, lda, X2, ldx, P);
int k = int(minP.size()-1); // degre of minpoly
if ((k==1) && F.isZero ((minP)[0])){ // minpoly is X
Ai = A;
int j = Ncurr*Ncurr;
while (j-- && F.isZero(*(Ai++))) ;
if (!j){ // A is 0, CharPoly=X^n
- minP.resize(Ncurr+1);
+ minP.resize((size_t)Ncurr+1);
(minP)[1] = F.zero;
- (minP)[Ncurr] = F.one;
+ (minP)[(size_t)Ncurr] = F.one;
k=Ncurr;
}
}
nbfac++;
charp.push_front (minP);
if (k==Ncurr){
+ FFLAS::fflas_delete( P);
return charp;
}
size_t Nrest = (size_t)(Ncurr-k);
- elt * X21 = X2 + k*ldx;
+ elt * X21 = X2 + k*(int)ldx;
elt * X22 = X21 + k;
// Compute the n-k last rows of A' = PA^tP^t in X2_
// A = A . P^t
applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- Ncurr, 0, (int)k, A, lda, P);
+ (size_t)Ncurr, 0, (int)k, A, lda, P);
// Copy X2_ = (A'_2)^t
- for (Xi = X21, Ai = A+k; Xi != X21 + Nrest*ldx; Ai++, Xi+=ldx-Ncurr)
+ for (Xi = X21, Ai = A+k; Xi != X21 + Nrest*ldx; Ai++, Xi+=ldx-(size_t)Ncurr)
for (size_t jj=0; jj<(size_t)Ncurr*lda; jj+=lda)
*(Xi++) = *(Ai+jj);
// A = A . P : Undo the permutation on A
applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- Ncurr, 0, (int)k, A, lda, P);
+ (size_t)Ncurr, 0, (int)k, A, lda, P);
// X2_ = X2_ . P^t (= (P A^t P^t)2_)
applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
Nrest, 0, (int)k, X21, ldx, P);
- delete[] P ;
+ FFLAS::fflas_delete( P );
// X21 = X21 . S1^-1
- ftrsm(F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasUnit, Nrest, k,
+ ftrsm(F, FFLAS::FflasRight, FFLAS::FflasUpper,
+ FFLAS::FflasNoTrans, FFLAS::FflasUnit, Nrest, (size_t)k,
F.one, X2, ldx, X21, ldx);
// Creation of the matrix A2 for recurise call
for (Xi = X22, Ai = A;
@@ -163,7 +234,7 @@ namespace FFPACK {
Xi += (ldx-Nrest), Ai += (lda-Nrest))
for (size_t jj=0; jj<Nrest; ++jj)
*(Ai++) = *(Xi++);
- fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Nrest, Nrest, k, F.mOne,
+ fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Nrest, Nrest, (size_t)k, F.mOne,
X21, ldx, X2+k, ldx, F.one, A, lda);
X2 = X22;
Ncurr = int(Nrest);
@@ -175,21 +246,19 @@ namespace FFPACK {
template <class Field, class Polynomial>
std::list<Polynomial>&
LUKrylov_KGFast (const Field& F, std::list<Polynomial>& charp, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * X, const size_t ldx)
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx)
{
- typedef typename Field::Element elt;
-
size_t kg_mc, kg_mb, kg_j;
if (!KGFast (F, charp, N, A, lda, &kg_mc, &kg_mb, &kg_j))
return charp;
else{// Matrix A is not generic
Polynomial *minP = new Polynomial();
- const elt* Ai;
- elt* A2i, *Xi;
- size_t *P = new size_t[N];
+ typename Field::ConstElement_ptr Ai;
+ typename Field::Element_ptr A2i, Xi;
+ size_t *P = FFLAS::fflas_new<size_t>(N);
FFPACK::MinPoly (F, *minP, N, A, lda, X, ldx, P, FfpackKGF, kg_mc, kg_mb, kg_j);
size_t k = minP->size()-1; // degre of minpoly
@@ -198,7 +267,7 @@ namespace FFPACK {
int j = int(N*N);
while (j-- && F.isZero(*(Ai++))) ;
if (!j){ // A is 0, CharPoly=X^n
- minP->resize(N+1);
+ minP->resize((size_t)N+1);
(*minP)[1] = F.zero;
(*minP)[N] = F.one;
k=N;
@@ -208,16 +277,16 @@ namespace FFPACK {
if (k==N){
charp.clear();
charp.push_front(*minP); // CharPoly = MinPoly
- delete[] P;
+ FFLAS::fflas_delete( P);
return charp;
}
size_t Nrest = N-k;
- elt * X21 = X + k*ldx;
- elt * X22 = X21 + k;
+ typename Field::Element_ptr X21 = X + k*ldx;
+ typename Field::Element_ptr X22 = X21 + k;
// Creates the matrix A
- //size_t lambda = MAX(0,N - kg_mc*(kg_j+1) - kg_mb); // uint >= 0 !!!
+ //size_t lambda = std::max(0,N - kg_mc*(kg_j+1) - kg_mb); // uint >= 0 !!!
size_t lambda = kg_mc*(kg_j+1) + kg_mb;
if (lambda > N)
lambda = 0 ;
@@ -235,8 +304,8 @@ namespace FFPACK {
++imax;
}
// Column block B
- for (typename Field::Element* Aj=A; Aj<A+N*lda; Aj+=lda)
- FFLAS::fcopy (F, kg_mb, Aj+lambda, 1, Aj+N-kg_mc-kg_mb, 1);
+ for (typename Field::Element_ptr Aj=A; Aj<A+N*lda; Aj+=lda)
+ FFLAS::fassign (F, kg_mb, Aj+N-kg_mc-kg_mb, 1, Aj+lambda, 1);
// Second Id block
imax = N- kg_j*kg_mc;
@@ -254,19 +323,19 @@ namespace FFPACK {
// A = P . A
applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
N, 0,(int) k,
- const_cast<typename Field::Element* &>(A), lda, P);
+ /*const_cast<typename Field::Element_ptr &>*/(A), lda, P);
// Copy X2_ = (A'2_)
for (Xi = X21, Ai = A+k*lda; Xi != X21 + Nrest*ldx; Ai+=lda-N, Xi+=ldx-N){
for (size_t jj=0; jj<N; ++jj){
- *(Xi++) = *(Ai++);
+ F.assign(*(Xi++), *(Ai++));
}
}
// A = P^t . A : Undo the permutation on A
applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
N, 0,(int) k,
- const_cast<typename Field::Element* &>(A), lda, P);
+ /*const_cast<typename Field::Element_ptr &>*/(A), lda, P);
// X2_ = X2_ . P^t (= (P A P^t)2_)
applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
@@ -277,7 +346,7 @@ namespace FFPACK {
F.one, X, ldx, X21, ldx);
// Creation of the matrix A2 for recurise call
- elt * A2 = new elt[Nrest*Nrest];
+ typename Field::Element_ptr A2 = FFLAS::fflas_new (F, Nrest, Nrest);
for (Xi = X22, A2i = A2;
Xi != X22 + Nrest*ldx;
@@ -292,8 +361,8 @@ namespace FFPACK {
// Recursive call on X22
LUKrylov_KGFast (F, charp, Nrest, A2, Nrest, X22, ldx);
charp.push_front (*minP);
- delete[] P;
- delete[] A2;
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete (A2);
return charp;
}
}
diff --git a/fflas-ffpack/ffpack/ffpack_charpoly_danilevski.inl b/fflas-ffpack/ffpack/ffpack_charpoly_danilevski.inl
index 0b297e6..623a434 100644
--- a/fflas-ffpack/ffpack/ffpack_charpoly_danilevski.inl
+++ b/fflas-ffpack/ffpack/ffpack_charpoly_danilevski.inl
@@ -6,30 +6,28 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* ========LICENCE========
*.
*/
-
-#ifndef MIN
-#define MIN(a,b) (a<b)?a:b
-#endif
+#ifndef __FFLASFFPACK_ffpack_charpoly_danilveski_INL
+#define __FFLASFFPACK_ffpack_charpoly_danilveski_INL
namespace FFPACK {
@@ -41,15 +39,15 @@ namespace FFPACK {
template <class Field, class Polynomial>
std::list<Polynomial>&
Danilevski (const Field& F, std::list<Polynomial>& charp,
- const size_t N, typename Field::Element * A,
+ const size_t N, typename Field::Element_ptr A,
const size_t lda)
{
charp.clear();
size_t dtot=0;
- typename Field::Element *pivot,*e,*u1,invp;
+ typename Field::Element_ptr pivot,e,u1;
+ typename Field::Element invp;
for (size_t k=0; k<N; ++k){
size_t i = k+1;
- size_t d;
e = pivot = A + (k+1) * lda + k; // coef
while ((i<N) && F.isZero(*e)) { e += lda; i++; }
if (i < N){
@@ -58,8 +56,8 @@ Danilevski (const Field& F, std::list<Polynomial>& charp,
FFLAS::fswap (F, N, A+i, lda, A+k+1, lda);
}
F.inv (invp, *pivot);
- FFLAS::fscal (F, N-k-1, invp, pivot+1, 1);
- FFLAS::fscal (F, N-dtot, *pivot, A+dtot*lda+k+1, lda);
+ FFLAS::fscalin (F, N-k-1, invp, pivot+1, 1);
+ FFLAS::fscalin (F, N-dtot, *pivot, A+dtot*lda+k+1, lda);
// X <- X - uw
FFLAS::fger (F, k + 1-dtot, N - k -1, F.mOne,
A + dtot*lda + k, lda, pivot+1, 1,
@@ -82,8 +80,9 @@ Danilevski (const Field& F, std::list<Polynomial>& charp,
}
}
if (i==N){// completed one companion block
+ size_t d;
d = k+1-dtot;
- typename Field::Element *Ai = A+k+dtot*lda;
+ typename Field::Element_ptr Ai = A+k+dtot*lda;
Polynomial * P = new Polynomial(d+1);
for (i = 0; i < d; ++i){
F.neg (P->operator[](i), *(Ai+i*lda));
@@ -97,3 +96,5 @@ Danilevski (const Field& F, std::list<Polynomial>& charp,
}
} // FFPACK
+
+#endif // __FFLASFFPACK_ffpack_charpoly_danilveski_INL
diff --git a/fflas-ffpack/ffpack/ffpack_charpoly_kgfast.inl b/fflas-ffpack/ffpack/ffpack_charpoly_kgfast.inl
index 707f914..8485974 100644
--- a/fflas-ffpack/ffpack/ffpack_charpoly_kgfast.inl
+++ b/fflas-ffpack/ffpack/ffpack_charpoly_kgfast.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -30,10 +30,6 @@
#ifndef __FFLASFFPACK_ffpack_charpoly_kgfast_INL
#define __FFLASFFPACK_ffpack_charpoly_kgfast_INL
-#ifndef MIN
-#define MIN(a,b) (a<b)?a:b
-#endif
-
namespace FFPACK {
namespace Protected {
//---------------------------------------------------------------------
@@ -45,18 +41,18 @@ namespace FFPACK {
int
KGFast ( const Field& F, std::list<Polynomial>& charp,
const size_t N,
- typename Field::Element * A, const size_t lda,
+ typename Field::Element_ptr A, const size_t lda,
size_t * kg_mc, size_t* kg_mb, size_t* kg_j )
{
//std::cerr<<"Dans KGFast"<<std::endl;
size_t mc=N>>1; // Matrix A is transformed into a mc_Frobenius form
size_t mb=N-mc;
- size_t r;
- typename Field::Element * C, *B;
+ typename Field::Element_ptr C, B;
while ( mc > 0 ) {
+ // size_t r;
#if 0
std::cerr<<"Boucle1: mc,mb,N="<<mc<<" "<<mb<<" "<<N<<std::endl;
write_field( F, std::cerr, A, N, N, lda );
@@ -65,7 +61,7 @@ namespace FFPACK {
C = A + (N-mc);
//std::cerr<<std::endl<<"mc="<<mc<<":";
while ( (j+1)*mc < N ) {
- mb = MIN ( mb, N-(j+1)*mc );
+ mb = std::min ( mb, N-(j+1)*mc );
#if 0
std::cerr<<"Boucle2: j,mb="<<j<<" "<<mb<<std::endl;
write_field( F, std::cerr, A, N, N, lda );
@@ -73,20 +69,20 @@ namespace FFPACK {
B = A + (N-mc-mb);
// B1 <- C1^-1.B1
- typename Field::Element * LUP = new typename Field::Element[mc*mc];
- for (size_t i=0; i<mc; ++i)
- FFLAS::fcopy( F, mc, LUP+i*mc, 1, C+i*lda, 1);
- size_t * P = new size_t[mc];
- size_t * Q = new size_t[mc];
-
- if ( (r = LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, mc, mc,
- LUP, mc, P, Q, FfpackLQUP)) < mc ){
+ typename Field::Element_ptr LUP = FFLAS::fflas_new (F, mc, mc);
+ // for (size_t i=0; i<mc; ++i)
+ // FFLAS::fassign( F, mc, C+i*lda, 1, LUP+i*mc, 1);
+ FFLAS::fassign(F,mc,mc,C,lda,LUP,mc);
+ size_t * P = FFLAS::fflas_new<size_t>(mc);
+ size_t * Q = FFLAS::fflas_new<size_t>(mc);
+
+ if ( (LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, mc, mc, LUP, mc, P, Q)) < mc ){
* kg_mc = mc;
* kg_mb = mb;
* kg_j = j;
- delete[] P;
- delete[] Q;
- delete[] LUP;
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ FFLAS::fflas_delete (LUP);
return -1;
}
@@ -99,11 +95,11 @@ namespace FFPACK {
mc, mb, F.one, LUP, mc , B, lda);
ftrsm(F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
mc, mb, F.one, LUP, mc , B, lda);
- delete[] LUP;
+ FFLAS::fflas_delete (LUP);
applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, mb, 0, (int)mc, B, lda, P );
- delete[] P;
- delete[] Q;
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
#if 0
std::cerr<<"Apres B1<-C1^-1"<<std::endl;
write_field( F, std::cerr, A, N, N, lda );
@@ -120,14 +116,17 @@ namespace FFPACK {
#endif
// Shifting B: B1;B2 -> B2;B1
- typename Field::Element * tmp = new typename Field::Element[mc*mb];
- for (size_t i=0; i<mc; ++i)
- FFLAS::fcopy( F, mb, tmp+i*mb, 1, B+i*lda, 1);
- for (size_t i=mc; i<N; ++i)
- FFLAS::fcopy( F, mb, B+(i-mc)*lda, 1, B+i*lda, 1);
- for (size_t i=0; i<mc; ++i)
- FFLAS::fcopy( F, mb, B+(i+N-mc)*lda, 1, tmp+i*mb, 1);
- delete[] tmp;
+ typename Field::Element_ptr tmp = FFLAS::fflas_new (F, mc, mb);
+ // for (size_t i=0; i<mc; ++i)
+ // FFLAS::fassign( F, mb, B+i*lda, 1, tmp+i*mb, 1);
+ FFLAS::fassign(F,mc,mb,B,lda,tmp,mb);
+ // for (size_t i=mc; i<N; ++i)
+ // FFLAS::fassign( F, mb, B+i*lda, 1, B+(i-mc)*lda, 1);
+ FFLAS::fassign(F,N-mc,mb,B+mc*lda,lda,B,lda);
+ // for (size_t i=0; i<mc; ++i)
+ // FFLAS::fassign( F, mb, tmp+i*mb, 1, B+(i+N-mc)*lda, 1);
+ FFLAS::fassign(F,mc,mb,tmp,mb,B+(N-mc)*lda,lda);
+ FFLAS::fflas_delete (tmp);
#if 0
std::cerr<<"Apres shift de B"<<std::endl;
write_field( F, std::cerr, A, N, N, lda );
@@ -153,58 +152,62 @@ namespace FFPACK {
#if 0
std::cerr<<"mb<lambda"<<std::endl;
#endif
- typename Field::Element * tmp2 = new typename Field::Element[lambda*mc];
+ typename Field::Element_ptr tmp2 = FFLAS::fflas_new (F, (size_t)lambda, mc);
// tmp2 <- C1
- for (int i=0; i<lambda; ++i)
- FFLAS::fcopy( F, mc, tmp2+i*mc, 1, C+i*lda, 1);
+ // for (int i=0; i<lambda; ++i)
+ // FFLAS::fassign( F, mc, C+i*(int)lda, 1, tmp2+i*(int)mc, 1);
+ FFLAS::fassign(F,(size_t)lambda,mc,C,lda,tmp2,mc);
// C1' <- B1.C2
fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mb, mc, mb,
- F.one, B, lda, C+lambda*lda, lda,
+ F.one, B, lda, C+lambda*(int)lda, lda,
F.zero, C, lda);
// tmp2 <- B2.C2 + tmp2
- fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, lambda, mc, mb,
- F.one, B+mb*lda, lda, C+lambda*lda, lda,
+ fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, (size_t)lambda, mc, mb,
+ F.one, B+mb*lda, lda, C+lambda*(int)lda, lda,
F.one, tmp2, mc);
// C2' <- tmp2
- for (int i=0; i<lambda; ++i)
- FFLAS::fcopy( F, mc, C+mb*lda+i*lda, 1, tmp2+i*mc, 1);
- delete[] tmp2;
+ // for (int i=0; i<lambda; ++i)
+ // FFLAS::fassign( F, mc, tmp2+(size_t)i*mc, 1, C+(mb+(size_t)i)*lda, 1);
+ FFLAS::fassign(F,(size_t)lambda,mc,tmp2,mc,C+mb*lda,lda);
+ FFLAS::fflas_delete (tmp2);
}
else if ( lambda > 0 ){
#if 0
std::cerr<<"lambda>0"<<std::endl;
#endif
- typename Field::Element * tmp2 = new typename Field::Element[mb*mc];
+ typename Field::Element_ptr tmp2 = FFLAS::fflas_new (F, mb, mc);
// C1 <- B2.C2 + C1
- fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, lambda, mc, mb,
- F.one, B+mb*lda, lda, C+lambda*lda, lda,
+ fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, (size_t)lambda, mc, mb,
+ F.one, B+mb*lda, lda, C+lambda*(int)lda, lda,
F.one, C, lda);
// tmp2 <-B1.C2
fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mb, mc, mb,
- F.one, B, lda, C+lambda*lda, lda,
+ F.one, B, lda, C+lambda*(int)lda, lda,
F.zero, tmp2, mc);
// C2' <- C1
- for (int i=0; i<lambda; ++i)
- FFLAS::fcopy( F, mc, C+mb*lda+i*lda, 1, C+i*lda, 1);
+ // for (int i=0; i<lambda; ++i)
+ // FFLAS::fassign( F, mc, C+i*(int)lda, 1, C+(mb+(size_t)i)*lda, 1);
+ FFLAS::fassign(F,(size_t)lambda,mc,C,lda,C+mb*lda,lda);
// C1' <- tmp2
- for (size_t i=0; i<mb; ++i)
- FFLAS::fcopy( F, mc, C+i*lda, 1, tmp2+i*mc, 1);
- delete[] tmp2;
+ // for (size_t i=0; i<mb; ++i)
+ // FFLAS::fassign( F, mc, tmp2+i*mc, 1, C+i*lda, 1);
+ FFLAS::fassign(F,mb,mc,tmp2,mc,C,lda);
+ FFLAS::fflas_delete (tmp2);
}
else{
#if 0
std::cerr<<"lambda<0"<<std::endl;
#endif
mb = N - (j+1)*mc;
- typename Field::Element * tmp2 = new typename Field::Element[mb*mc];
+ typename Field::Element_ptr tmp2 = FFLAS::fflas_new (F, mb, mc);
// tmp2 <-B1.C1
fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mb, mc, mb,
@@ -212,9 +215,10 @@ namespace FFPACK {
F.zero, tmp2, mc);
// C1' <- tmp2
- for (size_t i=0; i<mb; ++i)
- FFLAS::fcopy( F, mc, C+i*lda, 1, tmp2+i*mc, 1);
- delete[] tmp2;
+ // for (size_t i=0; i<mb; ++i)
+ // FFLAS::fassign( F, mc, tmp2+i*mc, 1, C+i*lda, 1);
+ FFLAS::fassign(F,mb,mc,tmp2,mc,C,lda);
+ FFLAS::fflas_delete (tmp2);
}
j++;
@@ -239,20 +243,20 @@ namespace FFPACK {
template<class Field>
void
fgemv_kgf( const Field& F, const size_t N,
- const typename Field::Element * A, const size_t lda,
- const typename Field::Element * X, const size_t incX,
- typename Field::Element * Y, const size_t incY,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::ConstElement_ptr X, const size_t incX,
+ typename Field::Element_ptr Y, const size_t incY,
const size_t kg_mc, const size_t kg_mb, const size_t kg_j )
{
size_t big_truc =kg_mb-kg_mc*(kg_j+1) ;
size_t lambda = (N<big_truc)?(0):(N-big_truc);
// Y1 <- X2
- FFLAS::fcopy ( F, lambda, Y, incY, X+(kg_mb+kg_mc)*incX, incX );
+ FFLAS::fassign ( F, lambda, X+(kg_mb+kg_mc)*incX, incX, Y, incY );
// Y2 <- X.B
fgemv( F, FFLAS::FflasTrans, N, kg_mb, F.one, A+N-kg_mc-kg_mb, lda, X, incX, F.zero, Y+lambda*incY, incY );
// Y3 <- X3
- FFLAS::fcopy ( F, kg_j*kg_mc, Y+(lambda+kg_mb)*incY, incY, X+(lambda+kg_mb+kg_mc)*incX, incX );
+ FFLAS::fassign ( F, kg_j*kg_mc, X+(lambda+kg_mb+kg_mc)*incX, incX, Y+(lambda+kg_mb)*incY, incY );
// Y4 <- X.C
fgemv( F, FFLAS::FflasTrans, N, kg_mc, F.one, A+N-kg_mc, lda, X, incX, F.zero, Y+(N-kg_mc)*incY, incY );
}
diff --git a/fflas-ffpack/ffpack/ffpack_charpoly_kgfastgeneralized.inl b/fflas-ffpack/ffpack/ffpack_charpoly_kgfastgeneralized.inl
index 347b781..3c7d74d 100644
--- a/fflas-ffpack/ffpack/ffpack_charpoly_kgfastgeneralized.inl
+++ b/fflas-ffpack/ffpack/ffpack_charpoly_kgfastgeneralized.inl
@@ -5,20 +5,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -29,9 +29,6 @@
#ifndef __FFLASFFPACK_ffpack_charpoly_kgfastgeneralized_INL
#define __FFLASFFPACK_ffpack_charpoly_kgfastgeneralized_INL
-#ifndef MIN
-#define MIN(a,b) (a<b)?a:b
-#endif
//---------------------------------------------------------------------
// CharPoly: Compute the characteristic polynomial of A using
@@ -48,27 +45,27 @@ namespace FFPACK {
template <class Field>
void printA(const Field& F,
std::ostream& os,
- const typename Field::Element * E,
- const typename Field::Element * C,
+ typename Field::ConstElement_ptr E,
+ typename Field::ConstElement_ptr C,
const size_t lda,
const size_t*B,
const size_t*T,
const size_t me,const size_t mc, const size_t lambda, const size_t mu)
{
- typename Field::Element * A = buildMatrix(F,E,C,lda,B,T,me,mc,lambda,mu);
+ typename Field::Element_ptr A = buildMatrix(F,E,C,lda,B,T,me,mc,lambda,mu);
size_t N = mc+me+lambda+mu;
write_field(F,os,A,N,N,N);
- delete[] A;
+ FFLAS::fflas_delete (A);
}
} // FFPACK
#endif
namespace FFPACK {
template <class Field>
- typename Field::Element * buildMatrix (const Field& F,
- const typename Field::Element * E,
- const typename Field::Element * C,
+ typename Field::Element_ptr buildMatrix (const Field& F,
+ typename Field::ConstElement_ptr E,
+ typename Field::ConstElement_ptr C,
const size_t lda,
const size_t*B,
const size_t*T,
@@ -79,14 +76,14 @@ namespace FFPACK {
{
size_t N = mc+me+lambda+mu;
- typename Field::Element * A = new typename Field::Element[N*N];
+ typename Field::Element_ptr A = FFLAS::fflas_new (F, N, N);
for (size_t j=0; j<lambda+me;++j)
if (B[j] < N){
for (size_t i=0;i<N;++i)
F.assign( *(A+i*N+j), F.zero);
F.assign( *(A+B[j]*lda+j), F.one);
} else {
- FFLAS::fcopy (F, N, A+j, N, E+B[j]-N, lda);
+ FFLAS::fassign (F, N, E+B[j]-N, lda, A+j, N);
}
for (size_t j=lambda+me; j<lambda+me+mu; ++j)
for (size_t i=0;i<N;++i)
@@ -94,7 +91,9 @@ namespace FFPACK {
for (size_t i=0; i<mu; ++i)
F.assign( *(A+(lambda+me+mc+i)*lda+lambda+me+T[i]), F.one);
for (size_t j=0; j<mc; ++j)
- FFLAS::fcopy(F,N,A+N-mc+j,N,C+j,lda);
+ FFLAS::fassign(F,N,C+j,lda,A+N-mc+j,N);
+ //! @bug is this :
+ // FFLAS::fassign(F,N,mc,C,lda,A+N-mc,N);
return A;
}
@@ -104,7 +103,7 @@ namespace FFPACK {
std::list<Polynomial>&
KGFast_generalized (const Field& F, std::list<Polynomial>& charp,
const size_t N,
- typename Field::Element * A, const size_t lda)
+ typename Field::Element_ptr A, const size_t lda)
{
//std::cerr<<"Dans KGFast"<<std::endl;
@@ -112,21 +111,22 @@ namespace FFPACK {
size_t me=N-mc;
// B[i] = j, the row of the 1 if the col Ai is sparse;
// B[i] = n+k, if the col Ai is the kth col of E
- size_t * B = new size_t[N];
- bool * allowedRows = new bool[N];
+ size_t * B = FFLAS::fflas_new<size_t>(N);
+ bool * allowedRows = FFLAS::fflas_new<bool>(N);
for (size_t i=0;i<(N+1)/2;++i)
allowedRows[i]=true;
// T[i] = j si T_i,j = 1
- size_t * T = new size_t[N];
+ size_t * T = FFLAS::fflas_new<size_t>(N);
for (size_t i=0;i<N;++i)
T[i]=i;
size_t lambda=0;
- typename Field::Element * C, *E = A;
+ typename Field::Element_ptr C, E = A;
#ifdef LB_DEBUG
std::cerr<<"Debut KGFG"<<std::endl
<<" ----------------------------"<<std::endl;
#endif
+ int exit_value = 0 ;
while (mc > 0) {
#ifdef LB_DEBUG
std::cerr<<"Boucle1: mc,me,lambda="<<mc<<" "<<me<<" "<<lambda<<std::endl;
@@ -141,7 +141,7 @@ namespace FFPACK {
std::cerr<<"B["<<i<<"] = "<<B[i]<<std::endl;
//std::cerr<<std::endl<<"mc="<<mc<<":";
#endif
- while (mu < N-mc) {
+ while (mu < N-mc && !exit_value) {
#ifdef LB_DEBUG
std::cerr<<"Boucle2: mu,me,lambda="<<mu<<" "<<me<<" "<<lambda<<std::endl;
printA(F,std::cerr<<"A="<<std::endl,E,C,lda,B,T,me,mc,lambda,mu);
@@ -150,10 +150,10 @@ namespace FFPACK {
std::cerr<<"Forming LUP";
#endif
size_t ncols = ((mu==0)||(mc<=mu))?mc:mc-mu;
- typename Field::Element * LUP = new typename Field::Element[(lambda+me)*ncols];
+ typename Field::Element_ptr LUP = FFLAS::fflas_new (F, lambda+me, ncols);
for (size_t i=0;i < lambda + me; ++i)
if (allowedRows[i])
- FFLAS::fcopy (F, ncols, LUP+i*ncols, 1, C+i*lda, 1);
+ FFLAS::fassign (F, ncols, C+i*lda, 1, LUP+i*ncols, 1);
else
for (size_t j = 0; j < ncols; ++j)
F.assign (*(LUP+i*ncols+j), F.zero);
@@ -162,15 +162,14 @@ namespace FFPACK {
write_field (F,std::cerr<<"LUP="<<std::endl,LUP,lambda+me,ncols,ncols);
std::cerr<<"LQUP(C1)";
#endif
- size_t * P = new size_t[ncols];
- size_t * Q = new size_t[lambda+me];
+ size_t * P = FFLAS::fflas_new<size_t>(ncols);
+ size_t * Q = FFLAS::fflas_new<size_t>(lambda+me);
for (size_t i=0; i<ncols;++i)
P[i]=0;
for (size_t i=0; i<lambda+me;++i)
Q[i]=0;
- size_t r = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, lambda + me, ncols, LUP, ncols,
- P, Q, FfpackLQUP);
+ size_t r = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, lambda + me, ncols, LUP, ncols, P, Q);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
#endif
@@ -182,14 +181,16 @@ namespace FFPACK {
KGFast_generalized (F, charp, me, A, lda);
//Rec call on the trailing block
- typename Field::Element * At = buildMatrix(F,E,C,lda,B,T,me,mc,lambda,mu);
+ typename Field::Element_ptr At = buildMatrix(F,E,C,lda,B,T,me,mc,lambda,mu);
KGFast_generalized (F, charp, N-me, At+me*(lda+1), lda);
- delete[] At;
- exit(-1);
+ FFLAS::fflas_delete (At);
+ exit_value = -1;
+ break;
} else if (me != 0) {
std::cerr<<"BLOCAGE me!=0!!!"<<std::endl;
- exit(-1);
+ exit_value = -1;
+ break ;
}
else {
@@ -245,7 +246,8 @@ namespace FFPACK {
}
else if (mu){
std::cerr<<"CAS MU < MC - k"<<std::endl;
- exit(-1);
+ exit_value = -1;
+ break;
}
// Updating B to be improved (tabulated B^-1)
for (size_t i=0; i<lambda+me; ++i){
@@ -256,6 +258,8 @@ namespace FFPACK {
}
}
+ if (exit_value)
+ break;
#ifdef LB_DEBUG
std::cerr<<".";
//printA(F,std::cerr<<"A="<<std::endl,E,C,lda,B,T,me,mc,lambda,mu);
@@ -274,7 +278,7 @@ namespace FFPACK {
std::cerr<<".";
#endif
// F <- Q^T F
- size_t * tempP = new size_t[lambda+me+mc];
+ size_t * tempP = FFLAS::fflas_new<size_t>(lambda+me+mc);
for (size_t i=0; i< lambda+me+mc; ++i)
tempP[i] = i;
@@ -299,7 +303,7 @@ namespace FFPACK {
#ifdef LB_DEBUG
std::cerr<<".";
#endif
- delete[] tempP;
+ FFLAS::fflas_delete( tempP);
#ifdef LB_DEBUG
std::cerr<<std::endl<<"Avant B<-BQ"<<std::endl;
@@ -325,7 +329,7 @@ namespace FFPACK {
// grouping the bloc L in LUP
for (size_t i=0; i<r; ++i)
if (Q[i]>i)
- FFLAS::fcopy(F, i, LUP+i*mc, 1, LUP+Q[i]*mc,1);
+ FFLAS::fassign(F, i, LUP+Q[i]*mc,1, LUP+i*mc, 1);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
@@ -355,9 +359,9 @@ namespace FFPACK {
r, mc-r, F.one, LUP, mc , C+r, lda);
ftrsm(F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
r, mc-r, F.one, LUP, mc , C+r, lda);
- delete[] LUP;
- delete[] P;
- delete[] Q;
+ FFLAS::fflas_delete (LUP);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
printA(F,std::cerr<<"A="<<std::endl,E,C,lda,B,T,me,mc,lambda,mu);
@@ -393,28 +397,28 @@ namespace FFPACK {
// Shifting E: E1;E2 -> E2;E1
std::cerr<<"// Shifting E: E1;E2 -> E2;E1";
#endif
- typename Field::Element * tmp = new typename Field::Element[r*me];
+ typename Field::Element_ptr tmp = FFLAS::fflas_new (F, r, me);
for (size_t i=0; i<r; ++i)
- FFLAS::fcopy (F, me, tmp+i*me, 1, E+i*lda, 1);
+ FFLAS::fassign (F, me, E+i*lda, 1, tmp+i*me, 1);
for (size_t i=r; i< N; ++i)
- FFLAS::fcopy (F, me, E+(i-r)*lda, 1, E+i*lda, 1);
+ FFLAS::fassign (F, me, E+i*lda, 1, E+(i-r)*lda, 1);
for (size_t i=0; i<r; ++i)
- FFLAS::fcopy (F, me, E+(i+N-r)*lda, 1, tmp+i*me, 1);
- delete[] tmp;
+ FFLAS::fassign (F, me, tmp+i*me, 1, E+(i+N-r)*lda, 1);
+ FFLAS::fflas_delete (tmp);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
// Shifting C_{*,2}: C_{1,2};C_{2,2} -> C_{2,2};C_{1,2}
std::cerr<<"// Shifting C_{*,2}: C_{1,2};C_{2,2} -> C_{2,2};C_{1,2}";
#endif
- tmp = new typename Field::Element[r*(mc-r)];
+ tmp = FFLAS::fflas_new (F, r, mc-r);
for (size_t i=0; i<r; ++i)
- FFLAS::fcopy (F, mc-r, tmp+i*(mc-r), 1, C+r+i*lda, 1);
+ FFLAS::fassign (F, mc-r, C+r+i*lda, 1, tmp+i*(mc-r), 1);
for (size_t i=r; i< N; ++i)
- FFLAS::fcopy (F, mc-r, C+r+(i-r)*lda, 1, C+r+i*lda, 1);
+ FFLAS::fassign (F, mc-r, C+r+i*lda, 1, C+r+(i-r)*lda, 1);
for (size_t i=0; i<r; ++i)
- FFLAS::fcopy (F, mc-r, C+r+(i+N-r)*lda, 1, tmp+i*(mc-r), 1);
- delete[] tmp;
+ FFLAS::fassign (F, mc-r, tmp+i*(mc-r), 1, C+r+(i+N-r)*lda, 1);
+ FFLAS::fflas_delete (tmp);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
@@ -427,42 +431,42 @@ namespace FFPACK {
std::cerr<<"// C'2 <- T C2";
#endif
// To be improved!!!
- tmp = new typename Field::Element[mu*r];
- typename Field::Element * C2 = C+(N-mu-mc)*lda;
+ tmp = FFLAS::fflas_new (F, mu, r);
+ typename Field::Element_ptr C2 = C+(N-mu-mc)*lda;
for (size_t i=0; i<mu; ++i)
- FFLAS::fcopy (F, r, tmp+i*r, 1, C2+T[i]*lda, 1);
+ FFLAS::fassign (F, r, C2+T[i]*lda, 1, tmp+i*r, 1);
for (size_t i=0; i<mu; ++i)
- FFLAS::fcopy (F, r, C2+i*lda, 1, tmp+i*r, 1);
- delete[] tmp;
+ FFLAS::fassign (F, r, tmp+i*r, 1, C2+i*lda, 1);
+ FFLAS::fflas_delete (tmp);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
// [C'2;C'3] += [E2;E3].C
std::cerr<<"// [C'2;C'3] += [E2;E3].C";
#endif
- tmp = new typename Field::Element[me*r];
+ tmp = FFLAS::fflas_new (F, me, r);
for (size_t i=0; i<lambda+me; ++i)
if (B[i] >= N){
- FFLAS::fcopy (F, r, tmp+(B[i]-N)*r, 1, C+i*lda, 1);
+ FFLAS::fassign (F, r, C+i*lda, 1, tmp+(B[i]-N)*r, 1);
}
fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mu + r, r, me,
F.one, E+(N-mu-r)*lda, lda, tmp, r,
F.one, C+(N-mu-mc)*lda, lda);
- delete[] tmp;
+ FFLAS::fflas_delete (tmp);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
// shifting [C'2;C'3]
std::cerr<<"// shifting [C'2;C'3]";
#endif
- tmp = new typename Field::Element[(mc-r)*r];
- typename Field::Element * C4 = C + (N-mc+r)*lda;
+ tmp = FFLAS::fflas_new (F, mc-r, r);
+ typename Field::Element_ptr C4 = C + (N-mc+r)*lda;
for (size_t i=0; i < (mc-r); ++i){
- FFLAS::fcopy (F, r, tmp+i*r, 1, C4 + i*lda, 1);
+ FFLAS::fassign (F, r, C4 + i*lda, 1, tmp+i*r, 1);
}
for (int i = int(N-1); i >= (int) (N -mu-r); --i)
- FFLAS::fcopy (F, r, C+i*lda, 1, C+(i-mc+r)*lda, 1);
+ FFLAS::fassign (F, r, C+((size_t)i-mc+r)*lda, 1, C+i*(int)lda, 1);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
@@ -472,13 +476,13 @@ namespace FFPACK {
// tmp2 <- C'1 (the rows corresponding to E)
std::cerr<<"// tmp2 <- C'1 (the rows corresponding to E)";
#endif
- typename Field::Element * tmp2 = new typename Field::Element[me*r];
+ typename Field::Element_ptr tmp2 = FFLAS::fflas_new (F, me, r);
for (size_t i = 0; i < lambda+me; ++i)
if (B[i] >= N){
#ifdef LB_DEBUG
std::cerr<<"saving in row "<<B[i]-N<<std::endl;
#endif
- FFLAS::fcopy (F, r, tmp2+(B[i]-N)*r, 1, C+i*lda, 1);
+ FFLAS::fassign (F, r, C+i*lda, 1, tmp2+(B[i]-N)*r, 1);
}
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
@@ -487,14 +491,14 @@ namespace FFPACK {
std::cerr<<"// C'_F[i] <- C_i";
std::cerr<<"lambda,r,me = "<<lambda<<" "<<r<<" "<<me<<std::endl;
#endif
- typename Field::Element * tmp3 = new typename Field::Element[(lambda+me)*r];
+ typename Field::Element_ptr tmp3 = FFLAS::fflas_new (F, lambda+me,r);
for (size_t i = 0; i < lambda+me; ++i)
if (B[i] < N){
#ifdef LB_DEBUG
std::cerr<<"copie de la ligne "<<i<<std::endl;
#endif
- FFLAS::fcopy (F, r, tmp3 + i*r, 1, C + i*lda, 1);
+ FFLAS::fassign (F, r, C + i*lda, 1, tmp3 + i*r, 1);
}
#ifdef LB_DEBUG
std::cerr<<"1"<<std::endl;
@@ -510,12 +514,12 @@ namespace FFPACK {
std::cerr<<"B["<<i<<"] = "<<B[i]<<std::endl;
#endif
if (B[i] < N)
- FFLAS::fcopy (F, r, C+(B[i]-r)*lda, 1, tmp3+i*r, 1);
+ FFLAS::fassign (F, r, tmp3+i*r, 1, C+(B[i]-r)*lda, 1);
}
#ifdef LB_DEBUG
std::cerr<<"3"<<std::endl;
#endif
- delete[] tmp3;
+ FFLAS::fflas_delete (tmp3);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
@@ -526,7 +530,7 @@ namespace FFPACK {
#endif
fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, N-mu-r, r, me,
F.one, E, lda, tmp2, r, F.one, C, lda);
- delete[] tmp2;
+ FFLAS::fflas_delete (tmp2);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
@@ -537,7 +541,7 @@ namespace FFPACK {
#endif
fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, N, r, mc-r,
F.one, C+r, lda, tmp, r, F.one, C, lda);
- delete[] tmp;
+ FFLAS::fflas_delete (tmp);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
@@ -546,14 +550,14 @@ namespace FFPACK {
// switching C_1 <-> C_2
std::cerr<<"// switching C_1 <-> C_2";
#endif
- tmp = new typename Field::Element[N*r];
+ tmp = FFLAS::fflas_new (F, N, r);
for (size_t j = 0; j<r; ++j)
- FFLAS::fcopy (F, N, tmp+j, r, C+j, lda);
+ FFLAS::fassign (F, N, C+j, lda, tmp+j, r);
for (size_t j = r; j<mc; ++j)
- FFLAS::fcopy (F, N, C+j-r, lda, C+j, lda);
+ FFLAS::fassign (F, N, C+j, lda, C+j-r, lda);
for (size_t j = 0; j<r; ++j)
- FFLAS::fcopy (F, N, C+mc-r+j, lda, tmp+j, r);
- delete[] tmp;
+ FFLAS::fassign (F, N, tmp+j, r, C+mc-r+j, lda);
+ FFLAS::fflas_delete (tmp);
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
@@ -565,7 +569,7 @@ namespace FFPACK {
std::cerr<<"// update the datastructure:";
#endif
mu += r;
- tmp2 = new typename Field::Element[N*me];
+ tmp2 = FFLAS::fflas_new (F, N, me);
size_t nlambda= 0, nme=0;
for (size_t i=0;i<lambda+me;++i)
allowedRows[i]=true;
@@ -574,7 +578,7 @@ namespace FFPACK {
#ifdef LB_DEBUG
std::cerr<<"B["<<j-r<<"] = "<<N+nme<<std::endl;
#endif
- FFLAS::fcopy (F, N, tmp2+nme, me, E+(B[j]-N), lda);
+ FFLAS::fassign (F, N, E+(B[j]-N), lda, tmp2+nme, me);
B[j-r] = N + nme;
nme++;
} else {
@@ -587,13 +591,13 @@ namespace FFPACK {
}
}
for (size_t j=0; j<nme; ++j)
- FFLAS::fcopy (F, N, E+j, lda, tmp2+j, me);
+ FFLAS::fassign (F, N, tmp2+j, me, E+j, lda);
lambda = nlambda;
me = nme;
#ifdef LB_DEBUG
std::cerr<<"..done"<<std::endl;
#endif
- delete[] tmp2;
+ FFLAS::fflas_delete (tmp2);
}
// update the datastructure: F <- T
for (size_t i=0; i<mu; ++i){
@@ -616,6 +620,12 @@ namespace FFPACK {
}
+ FFLAS::fflas_delete( B );
+ FFLAS::fflas_delete( T );
+ FFLAS::fflas_delete( allowedRows );
+
+ if (exit_value)
+ exit(exit_value);
Polynomial *minP = new Polynomial();
minP->resize(N+1);
minP->operator[](N) = F.one;
diff --git a/fflas-ffpack/ffpack/ffpack_charpoly_kglu.inl b/fflas-ffpack/ffpack/ffpack_charpoly_kglu.inl
index 0eac33e..e98ec0b 100644
--- a/fflas-ffpack/ffpack/ffpack_charpoly_kglu.inl
+++ b/fflas-ffpack/ffpack/ffpack_charpoly_kglu.inl
@@ -9,20 +9,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -55,11 +55,12 @@ namespace FFPACK {
return ind;
}
+ // Subroutine for Keller-Gehrig charpoly algorithm
// Compute the new d after a LSP ( d[i] can be zero )
template<class Field>
size_t newD( const Field& F, size_t * d, bool& KeepOn,
const size_t l, const size_t N,
- typename Field::Element * X,
+ typename Field::Element_ptr X,
const size_t * Q,
std::vector<std::vector<typename Field::Element> >& minpt)
{
@@ -67,11 +68,12 @@ namespace FFPACK {
//const elt * Xi = X; // Xi points to the begining of each block
elt *Li=X, *Xminp=X;
KeepOn = false;
- size_t nr, s, i, j, jtot=0, dtot = 0, nrtot=0;
+ size_t i, jtot=0, dtot = 0, nrtot=0;
for ( i=0; dtot<N; ++i){ // for each block
- j = 0;
- nr = s = ( d[i]==l )? 2*l : d[i];
+ size_t j = 0;
+ size_t s ;
+ size_t nr = s = ( d[i]==l )? 2*l : d[i];
if (s > N-dtot)
s= N-dtot;
nrtot += nr;
@@ -108,27 +110,26 @@ namespace FFPACK {
template <class Field, class Polynomial>
std::list<Polynomial>&
KellerGehrig( const Field& F, std::list<Polynomial>& charp, const size_t N,
- const typename Field::Element * A, const size_t lda )
+ typename Field::ConstElement_ptr A, const size_t lda )
{
- typedef typename Field::Element elt;
- const elt * Ai=A;
- elt * U = new elt[N*N]; // to store A^2^i
- elt * B = new elt[N*N]; // to store A^2^i
- elt * V = new elt[N*N]; // to store A^2^i.U
- elt * X = new elt[2*N*N]; // to compute the LSP factorization
- elt *Ui, *Uj, *Uk, *Ukp1, *Ukp1new, *Bi, *Vi, *Vk, *Xi=X, *Xj;
- size_t * P = new size_t[N]; // Column Permutation for LQUP
- size_t * Q = new size_t[2*N]; // Row Permutation for LQUP
+ typename Field::ConstElement_ptr Ai = A;
+ typename Field::Element_ptr U = FFLAS::fflas_new (F, N, N); // to store A^2^i
+ typename Field::Element_ptr B = FFLAS::fflas_new (F, N, N); // to store A^2^i
+ typename Field::Element_ptr V = FFLAS::fflas_new (F, N, N); // to store A^2^i.U
+ typename Field::Element_ptr X = FFLAS::fflas_new (F, 2*N, N); // to compute the LSP factorization
+ typename Field::Element_ptr Ui, Uj, Uk, Ukp1, Ukp1new, Bi, Vi, Vk, Xi=X, Xj;
+ size_t * P = FFLAS::fflas_new<size_t>(N); // Column Permutation for LQUP
+ size_t * Q = FFLAS::fflas_new<size_t>(2*N); // Row Permutation for LQUP
- size_t * d= new size_t[N]; // dimensions of Vect(ei, Aei...)
- size_t * dv = new size_t[N];
- size_t * dold = new size_t[N]; // copy of d
+ size_t * d= FFLAS::fflas_new<size_t>(N); // dimensions of Vect(ei, Aei...)
+ size_t * dv = FFLAS::fflas_new<size_t>(N);
+ size_t * dold = FFLAS::fflas_new<size_t>(N); // copy of d
// vector of the opposite of the coefficient of computed minpolys
- std::vector< std::vector< elt > > m(N);
+ std::vector< std::vector< typename Field::Element > > m(N);
typename Polynomial::iterator it;
- size_t i=0, l=1, j, k=N, cpt, newRowNb, nrowX, ind;
+ size_t i=0, l=1, j, k=N, cpt, newRowNb;
bool KeepOn;
for ( i=0; i<N; ++i)
@@ -155,11 +156,12 @@ namespace FFPACK {
P[i]=0;
for ( i=0;i<2*N;++i)
Q[i]=0;
- LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, 2*N, N, X, N, P, Q, FfpackLQUP);
+ LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, 2*N, N, X, N, P, Q);
k = Protected::newD( F,d, KeepOn, l, N, X, Q, m);
while(KeepOn){ // Main loop, until each subspace dimension has been found
+ size_t nrowX, ind ;
// Updating U:
Uk = U;
// Firstly, removing extra rows
@@ -246,18 +248,18 @@ namespace FFPACK {
P[i]=0;
for ( i=0;i<2*N;++i)
Q[i]=0;
- LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, nrowX, N, X, N, P, Q, FfpackLQUP);
+ LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, nrowX, N, X, N, P, Q);
// Recompute the degrees of the list factors
k = Protected::newD(F, d, KeepOn, l, N, X,Q, m);
}
- delete[] U;
- delete[] V;
- delete[] B;
- delete[] P;
- delete[] Q;
- delete[] dv;
- delete[] dold;
+ FFLAS::fflas_delete (U);
+ FFLAS::fflas_delete (V);
+ FFLAS::fflas_delete (B);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ FFLAS::fflas_delete( dv);
+ FFLAS::fflas_delete( dold);
k = Protected::updateD( F, d, k, m);
// Constructing the CharPoly
@@ -269,8 +271,8 @@ namespace FFPACK {
F.neg(*it, m[i][j]);
charp.push_back( *minP );
}
- delete[] X;
- delete[] d;
+ FFLAS::fflas_delete (X);
+ FFLAS::fflas_delete( d);
return charp;
}
diff --git a/fflas-ffpack/ffpack/ffpack_echelonforms.inl b/fflas-ffpack/ffpack/ffpack_echelonforms.inl
index c41caee..87a8ee0 100644
--- a/fflas-ffpack/ffpack/ffpack_echelonforms.inl
+++ b/fflas-ffpack/ffpack/ffpack_echelonforms.inl
@@ -1,25 +1,25 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* ffpack_echelon.h
* Copyright (C) 2009, 2010 Clement Pernet
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -31,130 +31,121 @@
#define __FFLASFFPACK_ffpack_echelon_forms_INL
template <class Field>
-size_t FFPACK::ColumnEchelonForm (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, const bool transform)
+inline size_t FFPACK::ColumnEchelonForm (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const FFPACK_LU_TAG LuTag)
{
-
size_t r;
- r = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Qt);
+ if (LuTag == FFPACK::FfpackSlabRecursive)
+ r = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Qt);
+ else
+ r = PLUQ (F, FFLAS::FflasNonUnit, M, N, A, lda, Qt, P);
if (transform){
ftrtri (F, FFLAS::FflasUpper, FFLAS::FflasNonUnit, r, A, lda);
- ftrmm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, r, N-r,
- F.mOne, A, lda, A+r, lda);
+ ftrmm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, r, N-r, F.mOne, A, lda, A+r, lda);
}
return r;
}
template <class Field>
-size_t FFPACK::RowEchelonForm (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, const bool transform)
+inline size_t FFPACK::RowEchelonForm (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const FFPACK_LU_TAG LuTag)
{
-
size_t r;
- r = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Qt);
+ if (LuTag == FFPACK::FfpackSlabRecursive)
+ r = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Qt);
+ else
+ r = PLUQ (F, FFLAS::FflasUnit, M, N, A, lda, P, Qt);
if (transform){
-
ftrtri (F, FFLAS::FflasLower, FFLAS::FflasNonUnit, r, A, lda);
- ftrmm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, M-r, r,
- F.mOne, A, lda, A+r*lda, lda);
+ ftrmm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, M-r, r, F.mOne, A, lda, A+r*lda, lda);
}
return r;
}
template <class Field>
-size_t
+inline size_t
FFPACK::ReducedColumnEchelonForm (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, const bool transform)
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const FFPACK_LU_TAG LuTag)
{
-
size_t r;
- r = ColumnEchelonForm (F, M, N, A, lda, P, Qt, transform);
- // M = Q^T M
- for (size_t i=0; i<r; ++i){
- if ( Qt[i]> (size_t) i ){
- FFLAS::fswap( F, i,
- A + Qt[i]*lda, 1,
- A + i*lda, 1 );
+ r = ColumnEchelonForm (F, M, N, A, lda, P, Qt, transform, LuTag);
+
+ if (LuTag == FfpackSlabRecursive){
+ // Putting Echelon in compressed triangular form : M = Q^T M
+ for (size_t i=0; i<r; ++i){
+ if ( Qt[i]> (size_t) i ){
+ FFLAS::fswap( F, i,
+ A + Qt[i]*lda, 1,
+ A + i*lda, 1 );
+ }
}
}
if (transform){
ftrtri (F, FFLAS::FflasLower, FFLAS::FflasUnit, r, A, lda);
- ftrmm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M-r, r,
- F.one, A, lda, A+r*lda, lda);
+ ftrmm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M-r, r, F.one, A, lda, A+r*lda, lda);
ftrtrm (F, FFLAS::FflasNonUnit, r, A, lda);
} else {
- ftrsm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M-r, r,
- F.one, A, lda, A+r*lda, lda);
- for (size_t i=0; i<r; i++){
- for (size_t j=0; j<N; j++)
- F.assign (*(A+i*lda+j), F.zero);
- F.assign (*(A + i*(lda+1)), F.one);
- }
- applyP(F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- r, 0,(int) r, A, lda, Qt);
+ ftrsm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M-r, r, F.one, A, lda, A+r*lda, lda);
+ //FFLAS::fidentity (F, r, r, A, lda);
+ //applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, r, 0,(int) r, A, lda, Qt);
}
-
return r;
}
template <class Field>
-size_t
+inline size_t
FFPACK::ReducedRowEchelonForm (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t* P, size_t* Qt, const bool transform)
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const FFPACK_LU_TAG LuTag)
{
size_t r;
- r = RowEchelonForm (F, M, N, A, lda, P, Qt, transform);
- // M = M Q
- for (size_t i=0; i<r; ++i){
- if ( Qt[i]> i ){
- FFLAS::fswap( F, i,
- A + Qt[i], lda,
- A + i, lda );
- }
+ r = RowEchelonForm (F, M, N, A, lda, P, Qt, transform, LuTag);
+ if (LuTag == FfpackSlabRecursive){
+ // Putting Echelon in compressed triangular form : M = M Q
+ for (size_t i=0; i<r; ++i)
+ if ( Qt[i]> i )
+ FFLAS::fswap (F, i, A + Qt[i], lda, A + i, lda );
}
+
if (transform){
ftrtri (F, FFLAS::FflasUpper, FFLAS::FflasUnit, r, A, lda);
- ftrmm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasUnit, r, N-r,
- F.one, A, lda, A+r, lda);
+ ftrmm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasUnit, r, N-r, F.one, A, lda, A+r, lda);
+
ftrtrm (F, FFLAS::FflasUnit, r, A, lda);
} else {
- ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasUnit, r, N-r,
- F.one, A, lda, A+r, lda);
- for (size_t i=0; i<r; i++){
- for (size_t j=0; j<M; j++)
- F.assign (*(A+j*lda+i), F.zero);
- F.assign (*(A + i*(lda+1)), F.one);
- }
- applyP(F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- r, 0, (int)r, A, lda, Qt);
+ ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasUnit, r, N-r, F.one, A, lda, A+r, lda);
+ //FFLAS::fidentity (F, r, r, A, lda);
+ //applyP(F, FFLAS::FflasRight, FFLAS::FflasNoTrans, r, 0, (int)r, A, lda, Qt);
}
return r;
}
/*
- * Warning, this implementation is currently broken:
+ * @bug Warning, this implementation is currently broken:
* the LAPACK permutation mechanism can not be used here as is
* More work required on the construction of the permutation P...
+ * LapackPermToMathPerm systems of ffpack_pluq could be used here.
+ * Need work.
*/
template <class Field>
-size_t
+inline size_t
FFPACK::REF (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
+ typename Field::Element_ptr A, const size_t lda,
const size_t colbeg, const size_t rowbeg, const size_t colsize,
size_t* Qt, size_t* P)
{
-
- typedef typename Field::Element Element;
-
if (colsize == 1){
for (size_t i=rowbeg; i<M; ++i){
if (!F.isZero(*(A+i*lda+colbeg))){
@@ -163,14 +154,16 @@ FFPACK::REF (const Field& F, const size_t M, const size_t N,
F.assign(*(A+rowbeg*lda+colbeg),*(A+i*lda+colbeg));
F.assign(*(A+i*lda+colbeg), F.zero);
}
- Element invpiv;
+ typename Field::Element invpiv;
F.inv(invpiv, *(A+rowbeg*lda + colbeg));
F.assign(*(A+rowbeg*lda+colbeg), invpiv);
F.negin(invpiv);
- for (size_t j=0; j<rowbeg; ++j)
- F.mulin (*(A+j*lda+colbeg), invpiv);
- for (size_t j=rowbeg+1; j<M; ++j)
- F.mulin (*(A+j*lda+colbeg), invpiv);
+ // for (size_t j=0; j<rowbeg; ++j)
+ // F.mulin (*(A+j*lda+colbeg), invpiv);
+ FFLAS::fscalin(F,rowbeg,invpiv,A+colbeg,lda);
+ // for (size_t j=rowbeg+1; j<M; ++j)
+ // F.mulin (*(A+j*lda+colbeg), invpiv);
+ FFLAS::fscalin(F,M-rowbeg-1,invpiv,A+colbeg,lda);
return 1;
}
}
@@ -182,12 +175,12 @@ FFPACK::REF (const Field& F, const size_t M, const size_t N,
// Recurive call on slice A*1
size_t r1 = REF(F, M, N, A, lda, colbeg, rowbeg, recsize, Qt, P);
- Element* A11 = A+colbeg;
- Element* A12 = A11+recsize;
- Element* A22 = A12+rowbeg*lda;
- Element* A21 = A11+rowbeg*lda;
- Element* A31 = A21+r1*lda;
- Element* A32 = A22+r1*lda;
+ typename Field::Element_ptr A11 = A+colbeg;
+ typename Field::Element_ptr A12 = A11+recsize;
+ typename Field::Element_ptr A22 = A12+rowbeg*lda;
+ typename Field::Element_ptr A21 = A11+rowbeg*lda;
+ typename Field::Element_ptr A31 = A21+r1*lda;
+ typename Field::Element_ptr A32 = A22+r1*lda;
/**
* ---------------------
@@ -213,12 +206,12 @@ FFPACK::REF (const Field& F, const size_t M, const size_t N,
F.one, A31, lda, A22, lda, F.one, A32, lda);
// A22 <- A21*A22
- Element* tmp = new Element [r1*(colsize-recsize)];
+ typename Field::Element_ptr tmp = FFLAS::fflas_new (F, r1, colsize-recsize);
for (size_t i = 0; i < r1; ++i)
- fcopy (F, colsize-recsize, tmp+i*(colsize-recsize), 1, A22+i*lda, 1);
+ FFLAS::fassign (F, colsize-recsize, A22+i*lda, 1, tmp+i*(colsize-recsize), 1);
fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, r1, colsize-recsize, r1,
F.one, A21, lda, tmp, colsize-recsize, F.zero, A22, lda);
- delete[] tmp;
+ FFLAS::fflas_delete (tmp);
// Recurive call on slice A*2
size_t r2 = REF(F, M, N, A, lda, colbeg + recsize, rowbeg + r1,
@@ -227,12 +220,12 @@ FFPACK::REF (const Field& F, const size_t M, const size_t N,
// Apply permutation on A*1
applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans, r1, rowbeg+r1, rowbeg+r1+r2, A11, lda, Qt);
- Element * U11 = A11;
- Element * U12 = A12;
- Element * U21 = A31;
- Element * U22 = A32;
- Element * U31 = U21+r2*lda;
- Element * U32 = U31+recsize;
+ typename Field::Element_ptr U11 = A11;
+ typename Field::Element_ptr U12 = A12;
+ typename Field::Element_ptr U21 = A31;
+ typename Field::Element_ptr U22 = A32;
+ typename Field::Element_ptr U31 = U21+r2*lda;
+ typename Field::Element_ptr U32 = U31+recsize;
// U11 <- U11 + U12 * U21
fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, rowbeg+r1, r1, r2,
@@ -243,36 +236,41 @@ FFPACK::REF (const Field& F, const size_t M, const size_t N,
F.one, U32, lda, U21, lda, F.one, U31, lda);
// U21 <- U22*U21
- tmp = new Element [r2*r1];
+ tmp = FFLAS::fflas_new (F, r2, r1);
for (size_t i = 0; i < r2; ++i)
- fcopy (F, r1, tmp+i*r1, 1, U21+i*lda, 1);
+ FFLAS::fassign (F, r1, U21+i*lda, 1, tmp+i*r1, 1);
fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, r2, r1, r2,
F.one, U22, lda, tmp, r1, F.zero, U21, lda);
- delete[] tmp;
+ FFLAS::fflas_delete(tmp);
//Permute the non pivot columns to the end
if (r1 < recsize){
size_t ncol = recsize -r1;
size_t nrow = rowbeg + r1;
- Element * NZ1 = A11+r1;
+ typename Field::Element_ptr NZ1 = A11+r1;
- tmp = new Element [nrow*ncol];
+ tmp = FFLAS::fflas_new (F, nrow, ncol);
for (size_t i=0; i < nrow; ++i)
- fcopy (F, ncol, tmp+i*ncol, 1, NZ1 + i*lda, 1);
+ FFLAS::fassign (F, ncol, NZ1 + i*lda, 1, tmp+i*ncol, 1);
for (size_t i=0; i < M; ++i)
// Risky copy with overlap, but safe with the naive
- // implementation of fcopy
- fcopy (F, r2, NZ1+i*lda, 1, A12 + i*lda, 1);
+ // implementation of fassign
+ //! @bug safe ???
+ FFLAS::fassign (F, r2, A12 + i*lda, 1, NZ1+i*lda, 1);
NZ1 += r2;
for (size_t i=0; i<nrow; ++i)
- fcopy (F, ncol, NZ1 + i*lda, 1, tmp+i*ncol,1);
- delete[] tmp;
+ FFLAS::fassign (F, ncol, tmp+i*ncol,1, NZ1 + i*lda, 1);
+ FFLAS::fflas_delete (tmp);
+#if 0
for (size_t i=rowbeg+r1; i<M; ++i)
for (size_t j=0; j<recsize-r1; ++j)
F.assign(*(NZ1+i*lda+j), F.zero);
- // size_t * temp = new size_t[recsize-r1];
+#else
+ FFLAS::fzero(F,M-rowbeg-r1,recsize-r1,NZ1+(rowbeg+r1)*lda,lda);
+#endif
+ // size_t * temp = FFLAS::fflas_new<size_t>(recsize-r1);
// for (size_t i=0,j = colbeg+r1; j<colbeg+recsize; ++i,++j)
// temp[i] = P[j];
// for (size_t i = colbeg+recsize, j = colbeg+r1; i<colbeg+recsize+r2; ++i,++j)
@@ -292,4 +290,406 @@ FFPACK::REF (const Field& F, const size_t M, const size_t N,
return r1+r2;
}
+namespace FFPACK {
+template <class Field>
+inline size_t
+ReducedRowEchelonForm2 (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform /*= true */)
+{
+ for (size_t i=0; i<N; ++i)
+ Qt[i] = i;
+ return REF (F, M, N, A, lda, 0, 0, N, P, Qt);
+
+}
+} // FFPACK
+
+namespace FFPACK{
+
+ /*********************************************/
+ /* Accessors to Triangular and Echelon forms */
+ /*********************************************/
+template <class Field>
+inline void
+getTriangular (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const bool OnlyNonZeroVectors)
+{
+ typename Field::ConstElement_ptr Ai = A;
+ typename Field::Element_ptr Ti = T;
+ if (Uplo == FFLAS::FflasUpper){
+ for (size_t i=0; i<R; i++, Ai += lda, Ti += ldt){
+ //!@todo just one triangular fzero+fassign ?
+ if (diag == FFLAS::FflasNonUnit){
+ FFLAS::fzero(F,i,Ti,1);
+ FFLAS::fassign (F, N-i, Ai+i, 1, Ti+i, 1);
+ }
+ else {
+ FFLAS::fzero(F,i,Ti,1);
+ F.assign (*(Ti+i), F.one);
+ FFLAS::fassign (F, N-i-1, Ai+i+1, 1, Ti+i+1, 1);
+ }
+ }
+ if (!OnlyNonZeroVectors)
+ FFLAS::fzero(F,M-R,N,Ti,ldt);
+ } else {
+ size_t maxcol = (OnlyNonZeroVectors ? R : N);
+ for (size_t i=0; i<R; i++, Ai += lda, Ti += ldt){
+ if (diag == FFLAS::FflasNonUnit){
+ FFLAS::fassign (F, i+1, Ai, 1, Ti, 1);
+ FFLAS::fzero(F,maxcol-i-1,Ti+i+1,1);
+ }
+ else {
+ FFLAS::fassign (F, i, Ai, 1, Ti, 1);
+ F.assign (Ti[i], F.one);
+ FFLAS::fzero(F,maxcol-i-1,Ti+i+1,1);
+ }
+ }
+ //Ti = T+R*ldt;
+ FFLAS::fassign(F, M-R, R, Ai, lda, Ti, ldt);
+ if (!OnlyNonZeroVectors)
+ FFLAS::fzero(F, M-R, N-R, Ti+R, ldt);
+ }
+}
+template <class Field>
+inline void
+getTriangular (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::Element_ptr A, const size_t lda)
+{
+ typename Field::Element_ptr Ai = A;
+ if (Uplo == FFLAS::FflasUpper){
+ for (size_t i=0; i<R; i++, Ai += lda){
+ //!@todo just one triangular fzero+fassign ?
+ FFLAS::fzero(F,i,Ai,1);
+ if (diag == FFLAS::FflasUnit)
+ F.assign (Ai[i], F.one);
+ }
+ FFLAS::fzero(F,M-R,N,Ai,lda);
+ } else {
+ for (size_t i=0; i<R; i++, Ai += lda){
+ if (diag == FFLAS::FflasUnit)
+ F.assign (Ai[i], F.one);
+ FFLAS::fzero(F,N-i-1,Ai+i+1,1);
+ }
+ FFLAS::fzero(F, M-R, N-R, Ai+R, lda);
+ }
+}
+
+inline void
+PLUQtoEchelonPermutation (const size_t N, const size_t R, const size_t * P, size_t * outPerm)
+{
+ size_t * MP = new size_t[N];
+ size_t * invMP = new size_t[N];
+ LAPACKPerm2MathPerm (MP, P, N);
+ for (size_t i=0; i<N; ++i)
+ invMP[MP[i]] = i;
+ std::sort(MP,MP+R);
+ for (size_t i=0;i<R; ++i)
+ MP[i] = invMP [MP [i]];
+ MathPerm2LAPACKPerm (outPerm, MP, R);
+ delete[] MP;
+ delete[] invMP;
+}
+
+template <class Field>
+inline void
+getEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const bool OnlyNonZeroVectors,
+ const FFPACK_LU_TAG LuTag)
+{
+ if (LuTag == FfpackSlabRecursive){
+ typename Field::ConstElement_ptr Ai = A;
+ typename Field::Element_ptr Ti = T;
+ if (Uplo == FFLAS::FflasUpper){ // Extracting a row echelon form
+ for (size_t i=0; i<R; i++, Ai += lda, Ti += ldt){
+ size_t piv = P[i];
+ FFLAS::fzero(F,piv,Ti,1);
+ if (diag == FFLAS::FflasNonUnit)
+ FFLAS::fassign (F, N-piv, Ai+piv, 1, Ti+piv, 1);
+ else {
+ F.assign (Ti[piv], F.one);
+ FFLAS::fassign (F, N-piv-1, Ai+piv+1, 1, Ti+piv+1, 1);
+ }
+ }
+ if (!OnlyNonZeroVectors)
+ FFLAS::fzero(F,M-R,N,Ti,ldt);
+ } else { // Extracting a column echelon form
+ for (size_t i=0; i<R; i++, Ai++, Ti++){
+ size_t piv = P[i];
+ FFLAS::fzero(F,piv,Ti,ldt);
+ if (diag == FFLAS::FflasNonUnit)
+ FFLAS::fassign (F, M-piv, Ai+piv*lda, lda, Ti+piv*ldt, ldt);
+ else {
+ F.assign (*(Ti+piv*ldt), F.one);
+ FFLAS::fassign (F, M-piv-1, Ai+(piv+1)*lda, lda, Ti+(piv+1)*ldt, ldt);
+ }
+ }
+ if (!OnlyNonZeroVectors)
+ FFLAS::fzero(F,M,N-R,Ti,ldt);
+ }
+ } else { // TileRecursive
+ getTriangular (F, Uplo, diag, M, N, R, A, lda, T, ldt, OnlyNonZeroVectors);
+ if (Uplo == FFLAS::FflasLower){
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, OnlyNonZeroVectors ? R : N, 0, M, T, ldt, P);
+
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (M, R, P, LPerm);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, M, 0, R, T, ldt, LPerm);
+
+ delete[] LPerm;
+ } else{
+ applyP (F, FFLAS::FflasRight,FFLAS::FflasNoTrans, OnlyNonZeroVectors ? R : M, 0, N, T, ldt, P);
+
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (N, R, P, LPerm);
+
+ applyP (F, FFLAS::FflasLeft,FFLAS::FflasNoTrans, N, 0, R, T, ldt, LPerm);
+
+ delete[] LPerm;
+ }
+ }
+}
+template <class Field>
+inline void
+getEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ typename Field::Element_ptr A, const size_t lda,
+ const FFPACK_LU_TAG LuTag)
+{
+ if (LuTag == FfpackSlabRecursive){
+ typename Field::Element_ptr Ai = A;
+ if (Uplo == FFLAS::FflasUpper){ // row echelon form
+ for (size_t i=0; i<R; i++, Ai += lda){
+ size_t piv = P[i];
+ FFLAS::fzero(F,piv,Ai,1);
+ if (diag == FFLAS::FflasUnit)
+ F.assign (Ai[piv], F.one);
+ }
+ FFLAS::fzero(F,M-R,N,Ai,lda);
+ } else { // Extracting a column echelon form
+ for (size_t i=0; i<R; i++, Ai++){
+ size_t piv = P[i];
+ FFLAS::fzero(F,piv,Ai,lda);
+ if (diag == FFLAS::FflasUnit)
+ F.assign (*(Ai+piv*lda), F.one);
+ }
+ FFLAS::fzero(F,M,N-R,Ai,lda);
+ }
+ } else { // TileRecursive
+ getTriangular (F, Uplo, diag, M, N, R, A, lda);
+
+ if (Uplo == FFLAS::FflasLower){
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, N, 0, M, A, lda, P);
+
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (M, R, P, LPerm);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, M, 0, R, A, lda, LPerm);
+
+ delete[] LPerm;
+ } else {
+ applyP (F, FFLAS::FflasRight,FFLAS::FflasNoTrans, M, 0, N, A, lda, P);
+
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (N, R, P, LPerm);
+
+ applyP (F, FFLAS::FflasLeft,FFLAS::FflasNoTrans, N, 0, R, A, lda, LPerm);
+
+ delete[] LPerm;
+ }
+ }
+}
+
+template <class Field>
+inline void
+getEchelonTransform (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t * Q,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const FFPACK_LU_TAG LuTag)
+{
+ FFLAS::FFLAS_DIAG oppDiag = (diag == FFLAS::FflasNonUnit) ? FFLAS::FflasUnit : FFLAS::FflasNonUnit;
+ FFLAS::FFLAS_UPLO oppUpLo = (Uplo == FFLAS::FflasUpper) ? FFLAS::FflasLower: FFLAS::FflasUpper;
+ size_t Tdim = (Uplo == FFLAS::FflasUpper) ? M : N;
+ size_t MaxPidx = (LuTag == FfpackSlabRecursive) ? R : Tdim;
+
+ getTriangular (F, oppUpLo, oppDiag, M, N, R, A, lda, T, ldt, true);
+
+ FFLAS::fidentity (F, Tdim-R, Tdim-R, T + R*ldt +R, ldt);
+
+ if (oppUpLo == FFLAS::FflasUpper){ // Transform is upper triangular
+ FFLAS::fzero (F, Tdim - R, R, T + R*ldt, ldt);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, Tdim, 0, MaxPidx, T, ldt, P);
+
+ if (LuTag==FfpackTileRecursive){
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (M, R, Q, LPerm);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, N, 0, R, T, ldt, LPerm);
+
+ delete[] LPerm;
+ }
+ } else { // Transform is lower triangular
+ FFLAS::fzero (F, R, Tdim - R, T + R, ldt);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, Tdim, 0, MaxPidx, T, ldt, P);
+
+ if (LuTag==FfpackTileRecursive){
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (N, R, Q, LPerm);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans, M, 0, R, T, ldt, LPerm);
+
+ delete[] LPerm;
+ }
+ }
+}
+template <class Field>
+inline void
+getReducedEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const bool OnlyNonZeroVectors,
+ const FFPACK_LU_TAG LuTag)
+{
+ size_t MaxPidx = (LuTag == FfpackSlabRecursive) ? R : ((Uplo == FFLAS::FflasUpper)?N:M);
+
+ FFLAS::fidentity (F, R, R, T, ldt);
+
+ if (Uplo == FFLAS::FflasUpper){ // Extracting a reduced row echelon form
+ FFLAS::fassign(F, R, N-R, A+R, lda, T+R, ldt);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R, 0, MaxPidx, T, ldt, P);
+
+ if (!OnlyNonZeroVectors)
+ FFLAS::fzero (F, M-R, N, T + R*ldt, ldt);
+
+ if (LuTag==FfpackTileRecursive){
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (N, R, P, LPerm);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N, 0, R, T, ldt, LPerm);
+
+ delete[] LPerm;
+ }
+
+ } else { // Extracting a reduced column echelon form
+ FFLAS::fassign(F, M-R, R, A+R*lda, lda, T+R*ldt, ldt);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, R, 0, MaxPidx, T, ldt, P);
+
+ if (!OnlyNonZeroVectors)
+ FFLAS::fzero (F, M, N-R, T + R, ldt);
+
+ if (LuTag==FfpackTileRecursive){
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (M, R, P, LPerm);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, M, 0, R, T, ldt, LPerm);
+
+ delete[] LPerm;
+ }
+ }
+}
+template <class Field>
+inline void
+getReducedEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ typename Field::Element_ptr A, const size_t lda,
+ const FFPACK_LU_TAG LuTag)
+{
+ size_t MaxPidx = (LuTag == FfpackSlabRecursive) ? R : ((Uplo == FFLAS::FflasUpper)?N:M);
+ FFLAS::fidentity (F, R, R, A, lda);
+ if (Uplo == FFLAS::FflasUpper){ // Extracting a reduced row echelon form
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R, 0, MaxPidx, A, lda, P);
+
+ FFLAS::fzero (F, M-R, N, A + R*lda, lda);
+
+ if (LuTag==FfpackTileRecursive){
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (N, R, P, LPerm);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N, 0, R, A, lda, LPerm);
+
+ delete[] LPerm;
+ }
+
+ } else { // Extracting a reduced column echelon form
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, R, 0, MaxPidx, A, lda, P);
+
+ FFLAS::fzero (F, M, N-R, A + R, lda);
+
+ if (LuTag==FfpackTileRecursive){
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (M, R, P, LPerm);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, M, 0, R, A, lda, LPerm);
+
+ delete[] LPerm;
+ }
+ }
+}
+
+template <class Field>
+inline void
+getReducedEchelonTransform (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr T, const size_t ldt,
+ const FFPACK_LU_TAG LuTag)
+{
+ FFLAS::FFLAS_UPLO oppUpLo = (Uplo == FFLAS::FflasUpper) ? FFLAS::FflasLower: FFLAS::FflasUpper;
+ size_t Tdim = (Uplo == FFLAS::FflasUpper) ? M : N;
+ size_t MaxPidx = (LuTag == FfpackSlabRecursive) ? R : Tdim;
+
+ FFLAS::fidentity (F, Tdim-R, Tdim-R, T + R*ldt +R, ldt);
+
+ if (oppUpLo == FFLAS::FflasUpper){ // Transform is upper triangular
+ FFLAS::fassign (F, R, N, A, lda, T, ldt);
+
+ FFLAS::fzero (F, Tdim - R, R, T + R*ldt, ldt);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, Tdim, 0, MaxPidx, T, ldt, P);
+
+ if (LuTag==FfpackTileRecursive){
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (M, R, Q, LPerm);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, Tdim, 0, R, T, ldt, LPerm);
+
+ delete[] LPerm;
+ }
+ } else { // Triangular is lower triangular
+ FFLAS::fassign (F, M, R, A, lda, T, ldt);
+
+ FFLAS::fzero (F, R, Tdim - R, T + R, ldt);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, Tdim, 0, MaxPidx, T, ldt, P);
+
+ if (LuTag==FfpackTileRecursive){
+ size_t * LPerm = new size_t[R];
+ PLUQtoEchelonPermutation (N, R, Q, LPerm);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans, Tdim, 0, R, T, ldt, LPerm);
+
+ delete[] LPerm;
+ }
+ }
+}
+
+} // FFPACK
#endif // __FFLASFFPACK_ffpack_echelon_forms_INL
diff --git a/fflas-ffpack/ffpack/ffpack_fgesv.inl b/fflas-ffpack/ffpack/ffpack_fgesv.inl
new file mode 100644
index 0000000..4f69a6a
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_fgesv.inl
@@ -0,0 +1,93 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 FFLAS-FFACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_fgesv_INL
+#define __FFLASFFPACK_ffpack_fgesv_INL
+
+
+namespace FFPACK {
+
+
+
+template <class Field>
+ size_t
+ fgesv (const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr B, const size_t ldb,
+ int * info)
+ {
+
+ size_t Na;
+ if (Side == FFLAS::FflasLeft)
+ Na = M;
+ else
+ Na = N;
+
+ size_t* P = FFLAS::fflas_new<size_t>(Na);
+ size_t* Q = FFLAS::fflas_new<size_t>(Na);
+
+ size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, Na, Na, A, lda, P, Q);
+
+ fgetrs (F, Side, M, N, R, A, lda, P, Q, B, ldb, info);
+
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ return R;
+ }
+
+ template <class Field>
+ size_t
+ fgesv (const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t NRHS,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ int * info)
+ {
+
+ size_t* P = FFLAS::fflas_new<size_t>(N);
+ size_t* Q = FFLAS::fflas_new<size_t>(M);
+
+ size_t R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q);
+
+ fgetrs (F, Side, M, N, NRHS, R, A, lda, P, Q, X, ldx, B, ldb, info);
+
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ return R;
+ }
+
+} //FFPACK
+
+#endif // __FFLASFFPACK_ffpack_fgesv_INL
diff --git a/fflas-ffpack/ffpack/ffpack_fgetrs.inl b/fflas-ffpack/ffpack/ffpack_fgetrs.inl
new file mode 100644
index 0000000..a75e3dc
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_fgetrs.inl
@@ -0,0 +1,271 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 FFLAS-FFACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_fgetrs_INL
+#define __FFLASFFPACK_ffpack_fgetrs_INL
+
+
+namespace FFPACK {
+
+
+ template <class Field>
+ void
+ fgetrs (const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ typename Field::Element_ptr A, const size_t lda,
+ const size_t *P, const size_t *Q,
+ typename Field::Element_ptr B, const size_t ldb,
+ int * info)
+ {
+
+ *info =0;
+ if (Side == FFLAS::FflasLeft) { // Left looking solve A X = B
+
+ solveLB2 (F, FFLAS::FflasLeft, M, N, R, A, lda, Q, B, ldb);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
+ N, 0,(int) R, B, ldb, Q);
+
+ bool consistent = true;
+ for (size_t i = R; i < M; ++i)
+ for (size_t j = 0; j < N; ++j)
+ if (!F.isZero (*(B + i*ldb + j)))
+ consistent = false;
+ if (!consistent) {
+ std::cerr<<"System is inconsistent"<<std::endl;
+ *info = 1;
+ }
+ // The last rows of B are now supposed to be 0
+#if 0
+ for (size_t i = R; i < M; ++i)
+ for (size_t j = 0; j < N; ++j)
+ *(B + i*ldb + j) = F.zero;
+#endif
+
+ ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ R, N, F.one, A, lda , B, ldb);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ N, 0,(int) R, B, ldb, P);
+
+ }
+ else { // Right Looking X A = B
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
+ M, 0,(int) R, B, ldb, P);
+
+ ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ M, R, F.one, A, lda , B, ldb);
+
+ fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M, N-R, R, F.one,
+ B, ldb, A+R, lda, F.mOne, B+R, ldb);
+
+ bool consistent = true;
+ for (size_t i = 0; i < M; ++i)
+ for (size_t j = R; j < N; ++j)
+ if (!F.isZero (*(B + i*ldb + j)))
+ consistent = false;
+ if (!consistent) {
+ std::cerr<<"System is inconsistent"<<std::endl;
+ *info = 1;
+ }
+ // The last cols of B are now supposed to be 0
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ M, 0,(int) R, B, ldb, Q);
+
+ solveLB2 (F, FFLAS::FflasRight, M, N, R, A, lda, Q, B, ldb);
+ }
+ }
+
+ template <class Field>
+ typename Field::Element_ptr
+ fgetrs (const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t NRHS, const size_t R,
+ typename Field::Element_ptr A, const size_t lda,
+ const size_t *P, const size_t *Q,
+ typename Field::Element_ptr X, const size_t ldx,
+ typename Field::ConstElement_ptr B, const size_t ldb,
+ int * info)
+ {
+ *info =0;
+ typename Field::Element_ptr W;
+ size_t ldw;
+
+ if (Side == FFLAS::FflasLeft) { // Left looking solve A X = B
+
+ // Initializing X to 0 (to be optimized)
+ FFLAS::fzero(F,N,NRHS,X,ldx);
+ // for (size_t i = 0; i <N; ++i)
+ // for (size_t j=0; j< NRHS; ++j)
+ // F.assign (*(X+i*ldx+j), F.zero);
+
+ if (M > N){ // Cannot copy B into X
+ W = FFLAS::fflas_new (F, M, NRHS);
+ ldw = NRHS;
+ FFLAS::fassign(F,M,NRHS,B,ldb,W,ldw);
+
+ solveLB2 (F, FFLAS::FflasLeft, M, NRHS, R, A, lda, Q, W, ldw);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
+ NRHS, 0,(int) R, W, ldw, Q);
+
+ bool consistent = true;
+ for (size_t i = R; i < M; ++i)
+ for (size_t j = 0; j < NRHS; ++j)
+ if (!F.isZero (*(W + i*ldw + j)))
+ consistent = false;
+ if (!consistent) {
+ std::cerr<<"System is inconsistent"<<std::endl;
+ *info = 1;
+ FFLAS::fflas_delete (W);
+ return X;
+ }
+ // Here the last rows of W are supposed to be 0
+
+ ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ R, NRHS, F.one, A, lda , W, ldw);
+
+ FFLAS::fassign(F,R,NRHS,W,ldw,X,ldx);
+
+ FFLAS::fflas_delete (W);
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ NRHS, 0,(int) R, X, ldx, P);
+
+ }
+ else { // Copy B to X directly
+
+ FFLAS::fassign(F,M,NRHS,B,ldb,X,ldx);
+
+ solveLB2 (F, FFLAS::FflasLeft, M, NRHS, R, A, lda, Q, X, ldx);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
+ NRHS, 0,(int) R, X, ldx, Q);
+
+ bool consistent = true;
+ for (size_t i = R; i < M; ++i)
+ for (size_t j = 0; j < NRHS; ++j)
+ if (!F.isZero (*(X + i*ldx + j)))
+ consistent = false;
+ if (!consistent) {
+ std::cerr<<"System is inconsistent"<<std::endl;
+ *info = 1;
+ return X;
+ }
+ // Here the last rows of W are supposed to be 0
+
+ ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ R, NRHS, F.one, A, lda , X, ldx);
+
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ NRHS, 0,(int) R, X, ldx, P);
+ }
+ return X;
+
+ }
+ else { // Right Looking X A = B
+
+ FFLAS::fzero(F,NRHS,M,X,ldx);
+ // for (size_t i = 0; i <NRHS; ++i)
+ // for (size_t j=0; j< M; ++j)
+ // F.assign (*(X+i*ldx+j), F.zero);
+
+ if (M < N) {
+ W = FFLAS::fflas_new (F, NRHS, N);
+ ldw = N;
+ FFLAS::fassign (F,NRHS, N, B, ldb, W, ldw);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
+ NRHS, 0,(int) R, W, ldw, P);
+
+ ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ NRHS, R, F.one, A, lda , W, ldw);
+
+ fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, NRHS, N-R, R, F.one,
+ W, ldw, A+R, lda, F.mOne, W+R, ldw);
+
+ bool consistent = true;
+ for (size_t i = 0; i < NRHS; ++i)
+ for (size_t j = R; j < N; ++j)
+ if (!F.isZero (*(W + i*ldw + j)))
+ consistent = false;
+ if (!consistent) {
+ std::cerr<<"System is inconsistent"<<std::endl;
+ *info = 1;
+ FFLAS::fflas_delete (W);
+ return X;
+ }
+ // The last N-R cols of W are now supposed to be 0
+ FFLAS::fassign (F, NRHS,R, W , ldb, X ,ldx);
+ FFLAS::fflas_delete (W);
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ NRHS, 0,(int) R, X, ldx, Q);
+
+ solveLB2 (F, FFLAS::FflasRight, NRHS, M, R, A, lda, Q, X, ldx);
+
+ }
+ else { // M >=N
+ FFLAS::fassign(F,NRHS,N,B,ldb,X,ldx);
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
+ NRHS, 0,(int) R, X, ldx, P);
+
+ ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ NRHS, R, F.one, A, lda , X, ldx);
+
+ fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, NRHS, N-R, R, F.one,
+ X, ldx, A+R, lda, F.mOne, X+R, ldx);
+
+ bool consistent = true;
+ for (size_t i = 0; i < NRHS; ++i)
+ for (size_t j = R; j < N; ++j)
+ if (!F.isZero (*(X + i*ldx + j)))
+ consistent = false;
+ if (!consistent) {
+ std::cerr<<"System is inconsistent"<<std::endl;
+ *info = 1;
+ return X;
+ }
+ // The last N-R cols of W are now supposed to be 0
+
+ applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ NRHS, 0,(int) R, X, ldx, Q);
+
+ solveLB2 (F, FFLAS::FflasRight, NRHS, M, R, A, lda, Q, X, ldx);
+
+ }
+ return X;
+ }
+ }
+
+} // FFPACK
+
+#endif // __FFLASFFPACK_ffpack_fgetrs_INL
diff --git a/fflas-ffpack/ffpack/ffpack_frobenius.inl b/fflas-ffpack/ffpack/ffpack_frobenius.inl
index 0643010..9d29dc9 100644
--- a/fflas-ffpack/ffpack/ffpack_frobenius.inl
+++ b/fflas-ffpack/ffpack/ffpack_frobenius.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <cpernet at uwaterloo.ca>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -27,44 +27,80 @@
*.
*/
-#include <fflas-ffpack/field/nonzero-randiter.h>
-
-#ifndef MIN
-#define MIN(a,b) (a<b)?a:b
-#endif
+#include <givaro/givranditer.h>
//---------------------------------------------------------------------
// CharpolyArithProg: Las Vegas algorithm to compute the Charpoly
// over a large field (Z/pZ, s.t. p > 2n^2)
//---------------------------------------------------------------------
+//
+//
+namespace FFPACK { namespace Protected {
+ template <class Field>
+ void CompressRows (Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
+ const size_t * d, const size_t nb_blocs);
+
+ template <class Field>
+ void CompressRowsQK (Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
+ const size_t * d,const size_t deg, const size_t nb_blocs);
+
+ template <class Field>
+ void DeCompressRows (Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
+ const size_t * d, const size_t nb_blocs);
+ template <class Field>
+ void DeCompressRowsQK (Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
+ const size_t * d, const size_t deg, const size_t nb_blocs);
+
+ template <class Field>
+ void CompressRowsQA (Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
+ const size_t * d, const size_t nb_blocs);
+ template <class Field>
+ void DeCompressRowsQA (Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
+ const size_t * d, const size_t nb_blocs);
+ } // Protected
+} // FFPACK
+
+
template <class Field, class Polynomial>
std::list<Polynomial>&
FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
- const size_t N, typename Field::Element * A, const size_t lda,
+ const size_t N, typename Field::Element_ptr A, const size_t lda,
const size_t c)
{
FFLASFFPACK_check(c);
- size_t * rp = new size_t[2*N];
+ size_t * rp = FFLAS::fflas_new<size_t>(2*N);
size_t noc = static_cast<size_t>(ceil(double(N)/double(c)));
size_t Nnoc = N*noc;
// Building the workplace matrix
- typename Field::Element *K = new typename Field::Element[Nnoc*c];
- typename Field::Element *K2 = new typename Field::Element[Nnoc*c];
+ typename Field::Element_ptr K = FFLAS::fflas_new (F, Nnoc, c);
+ typename Field::Element_ptr K2 = FFLAS::fflas_new (F, Nnoc, c);
// for (size_t i = 0 ; i < Nnoc*c ; ++i)
// K[i] = F.zero;
size_t ldk = N;
- size_t *dA = new size_t[N]; //PA
- size_t *dK = new size_t[noc*c];
+ size_t *dA = FFLAS::fflas_new<size_t>(N); //PA
+ size_t *dK = FFLAS::fflas_new<size_t>(noc*c);
for (size_t i=0; i<noc; ++i)
dK[i]=0;
// Picking a random noc x N block vector U^T
typename Field::RandIter g (F);
- NonzeroRandIter<Field> nzg (F,g);
+ Givaro::GeneralRingNonZeroRandIter<Field> nzg (g);
for (size_t i = 0; i < noc; ++i)
for (size_t j = 0; j < N; ++j)
g.random( *(K + i*ldk +j) );
@@ -82,20 +118,20 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
size_t w_idx = 0;
for (size_t i=0; i<noc; ++i)
for (size_t j=0; j<c; ++j, w_idx++)
- FFLAS::fcopy(F, N, (K2+(w_idx)*ldk), 1, (K+(i+j*noc)*ldk), 1);
+ FFLAS::fassign(F, N, (K+(i+j*noc)*ldk), 1, (K2+(w_idx)*ldk), 1);
// Copying K <- K2
for (size_t i=0; i<noc*c; ++i)
- FFLAS::fcopy (F, N, (K+i*ldk), 1, K2+i*ldk, 1);
+ FFLAS::fassign (F, N, K2+i*ldk, 1, (K+i*ldk), 1);
- size_t * Pk = new size_t[N];
- size_t * Qk = new size_t[N];
+ size_t * Pk = FFLAS::fflas_new<size_t>(N);
+ size_t * Qk = FFLAS::fflas_new<size_t>(N);
for (size_t i=0; i<N; ++i)
Qk[i] = 0;
for (size_t i=0; i<N; ++i)
Pk[i] = 0;
- size_t R = LUdivine(F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, N, N, K, ldk, Pk, Qk, FfpackLQUP);
+ size_t R = LUdivine(F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, N, N, K, ldk, Pk, Qk);
size_t row_idx = 0;
size_t ii=0;
@@ -110,8 +146,12 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
// std::cerr << "FAIL in preconditionning phase:"
// << " degree sequence is not monotonically not increasing"
// << std::endl;
- delete[] rp; delete[] K;
- delete[] Pk; delete[] Qk; delete[] dA; delete[] dK;
+ FFLAS::fflas_delete( rp);
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete( Pk);
+ FFLAS::fflas_delete( Qk);
+ FFLAS::fflas_delete(dA);
+ FFLAS::fflas_delete( dK);
throw CharpolyFailed();
}
dK[k] = dold = d;
@@ -124,14 +164,14 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
// Selection of the last iterate of each block
- typename Field::Element * K3 = new typename Field::Element[Mk*N];
- typename Field::Element * K4 = new typename Field::Element[Mk*N];
+ typename Field::Element_ptr K3 = FFLAS::fflas_new (F, Mk, N);
+ typename Field::Element_ptr K4 = FFLAS::fflas_new (F, Mk, N);
size_t bk_idx = 0;
for (size_t i = 0; i < Mk; ++i){
- FFLAS::fcopy (F, N, (K3+i*ldk), 1, (K2 + (bk_idx + dK[i]-1)*ldk), 1);
+ FFLAS::fassign (F, N, (K2 + (bk_idx + dK[i]-1)*ldk), 1, (K3+i*ldk), 1);
bk_idx += c;
}
- delete[] K2;
+ FFLAS::fflas_delete (K2);
// K <- K A^T
fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasTrans, Mk, N, N,F.one, K3, ldk, A, lda, F.zero, K4, ldk);
@@ -163,9 +203,14 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
for (size_t j = offset+1; j<R; ++j)
if (!F.isZero(*(K4 + i*ldk + j))){
//std::cerr<<"FAIL C != 0 in preconditionning"<<std::endl;
- delete[] K3; delete[] K4; delete[] K;
- delete[] Pk; delete[] Qk; delete[] rp;
- delete[] dA; delete[] dK;
+ FFLAS::fflas_delete (K3);
+ FFLAS::fflas_delete (K4);
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete( Pk);
+ FFLAS::fflas_delete(Qk);
+ FFLAS::fflas_delete( rp);
+ FFLAS::fflas_delete( dA);
+ FFLAS::fflas_delete( dK);
throw CharpolyFailed();
}
Polynomial P (dK [i]+1);
@@ -184,9 +229,14 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
for (size_t i=0; i<nb_full_blocks + 1; ++i)
for (size_t j=R; j<N; ++j){
if (!F.isZero( *(K4+i*ldk+j) )){
- delete[] K3; delete[] K4; delete[] K;
- delete[] Pk; delete[] Qk; delete[] rp;
- delete[] dA; delete[] dK;
+ FFLAS::fflas_delete (K3);
+ FFLAS::fflas_delete (K4);
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete( Pk);
+ FFLAS::fflas_delete( Qk);
+ FFLAS::fflas_delete( rp);
+ FFLAS::fflas_delete( dA);
+ FFLAS::fflas_delete( dK);
throw CharpolyFailed();
}
}
@@ -195,9 +245,9 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
// <<" completing the Krylov matrix"
// <<std::endl;
size_t Nrest = N-R;
- typename Field::Element * K21 = K + R*ldk;
- typename Field::Element * K22 = K21 + R;
- typename Field::Element * Ki, *Ai;
+ typename Field::Element_ptr K21 = K + R*ldk;
+ typename Field::Element_ptr K22 = K21 + R;
+ typename Field::Element_ptr Ki, Ai;
// Compute the n-k last rows of A' = P A^T P^T in K2_
// A = A . P^t
@@ -221,7 +271,7 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, Nrest, R,
F.one, K, ldk, K21, ldk);
- typename Field::Element * Arec = new typename Field::Element[Nrest*Nrest];
+ typename Field::Element_ptr Arec = FFLAS::fflas_new (F, Nrest, Nrest);
size_t ldarec = Nrest;
// Creation of the matrix A2 for recursive call
@@ -238,35 +288,35 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
// Recursive call on the complementary subspace
CharPoly(F, polyList, Nrest, Arec, ldarec);
- delete[] Arec;
+ FFLAS::fflas_delete (Arec);
frobeniusForm.merge(polyList);
}
- delete[] Pk;
- delete[] Qk;
+ FFLAS::fflas_delete( Pk);
+ FFLAS::fflas_delete( Qk);
size_t deg = c+1;
for (size_t i=0; i<Mk; ++i)
dA[i] = dK[i];
bk_idx = 0;
- typename Field::Element *Arp = new typename Field::Element[Ncurr*Ma];
- typename Field::Element *Ac = new typename Field::Element[Ncurr*Ma];
+ typename Field::Element_ptr Arp = FFLAS::fflas_new (F, Ncurr, Ma);
+ typename Field::Element_ptr Ac = FFLAS::fflas_new (F, Ncurr, Ma);
size_t ldac = Ma;
size_t ldarp = Ncurr;
for (size_t i=0; i < Ncurr; ++i)
for (size_t j=0; j<Ma; ++j)
*(K+i*ldk+j) = *(Ac + i*Ma +j) = *(K4 + i + (j)*ldk);
- delete[] K4;
+ FFLAS::fflas_delete (K4);
- size_t block_idx, it_idx, rp_val;
// Main loop of the arithmetic progession
while ((nb_full_blocks >= 1) && (Mk > 1)) {
- delete[] K;
- delete[] K3;
- K = new typename Field::Element[Ncurr*Ma];
- K3 = new typename Field::Element[Ncurr*Ma];
+ size_t block_idx, it_idx, rp_val;
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete (K3);
+ K = FFLAS::fflas_new (F, Ncurr, Ma);
+ K3 = FFLAS::fflas_new (F, Ncurr, Ma);
ldk = Ma;
// Computation of the rank profile
@@ -279,14 +329,24 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
try{
RR = SpecRankProfile (F, Ma, Ncurr, Arp, ldarp, deg-1, rp);
} catch (CharpolyFailed){
- delete[] Arp; delete[] Ac; delete[] K; delete[] K3;
- delete[] rp; delete[] dA; delete[] dK;
+ FFLAS::fflas_delete (Arp);
+ FFLAS::fflas_delete (Ac);
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete (K3);
+ FFLAS::fflas_delete( rp);
+ FFLAS::fflas_delete( dA);
+ FFLAS::fflas_delete( dK);
throw CharpolyFailed();
}
if (RR < Ncurr){
//std::cerr<<"FAIL RR<Ncurr"<<std::endl;
- delete[] Arp; delete[] Ac; delete[] K; delete[] K3;
- delete[] rp; delete[] dA; delete[] dK;
+ FFLAS::fflas_delete (Arp);
+ FFLAS::fflas_delete (Ac);
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete (K3);
+ FFLAS::fflas_delete( rp);
+ FFLAS::fflas_delete( dA);
+ FFLAS::fflas_delete( dK);
throw CharpolyFailed();
}
@@ -301,8 +361,13 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
do {gg++; rp_val++; it_idx++;}
while ( /*(gg<Ncurr ) &&*/ (rp[gg] == rp_val) && (it_idx < deg ));
if ((block_idx)&&(it_idx > dK[block_idx-1])){
- delete[] Arp; delete[] Ac;delete[] K; delete[] K3;
- delete[] rp; delete[] dA; delete[] dK;
+ FFLAS::fflas_delete (Arp);
+ FFLAS::fflas_delete (Ac);
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete (K3);
+ FFLAS::fflas_delete( rp);
+ FFLAS::fflas_delete( dA);
+ FFLAS::fflas_delete(dK);
throw CharpolyFailed();
//std::cerr<<"FAIL d non decroissant"<<std::endl;
//exit(-1);
@@ -319,7 +384,7 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
// Selection of dense colums of K
for (size_t i=0; i < nb_full_blocks; ++i){
- FFLAS::fcopy (F, Ncurr, K+i, ldk, Ac+i, ldac);
+ FFLAS::fassign (F, Ncurr, Ac+i, ldac, K+i, ldk);
}
// K <- QK K
@@ -333,15 +398,15 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
// Copying K3 <- K
for (size_t i=0; i<Mk; ++i)
- FFLAS::fcopy (F, Ncurr, K3+i, ldk, K+i, ldk);
- CompressRowsQK (F, Mk, K3 + nb_full_blocks*(deg-1)*ldk, ldk,
+ FFLAS::fassign (F, Ncurr, K+i, ldk, K3+i, ldk);
+ Protected::CompressRowsQK (F, Mk, K3 + nb_full_blocks*(deg-1)*ldk, ldk,
Arp, ldarp, dK+nb_full_blocks, deg, Mk-nb_full_blocks);
// K <- PA K
- CompressRows (F, nb_full_blocks, K, ldk, Arp, ldarp, dA, Ma);
+ Protected::CompressRows (F, nb_full_blocks, K, ldk, Arp, ldarp, dA, Ma);
// A <- newQA^T K (compress)
- CompressRowsQA (F, Ma, Ac, ldac, Arp, ldarp, dA, Ma);
+ Protected::CompressRowsQA (F, Ma, Ac, ldac, Arp, ldarp, dA, Ma);
// K <- A K
fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ncurr-Ma, nb_full_blocks, Ma,F.one,
@@ -349,7 +414,7 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ma, nb_full_blocks, Ma,F.one,
Ac+(Ncurr-Ma)*ldac, ldac, K+(Ncurr-Ma)*ldk, ldk, F.zero, Arp, ldarp);
for (size_t i=0; i< Ma; ++i)
- FFLAS::fcopy(F, nb_full_blocks, K+(Ncurr-Ma+i)*ldk, 1, Arp+i*ldarp, 1);
+ FFLAS::fassign(F, nb_full_blocks, Arp+i*ldarp, 1, K+(Ncurr-Ma+i)*ldk, 1);
// Copying the last rows of A times K
offset = (deg-2)*nb_full_blocks;
@@ -357,7 +422,7 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
for (size_t j=0; j<Ncurr; ++j)
F.assign(*(K+i+j*ldk), F.zero);
if (dK[i] == dA[i]) // copy the column of A
- FFLAS::fcopy (F, Ncurr, K+i, ldk, Ac+i, ldac);
+ FFLAS::fassign (F, Ncurr, Ac+i, ldac, K+i, ldk);
else{
F.assign (*(K + i + (offset+dK[i]-1)*ldk),F.one);
}
@@ -365,16 +430,16 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
}
// K <- QA K
- DeCompressRowsQA (F, Mk, Ncurr, K, ldk, Arp, ldarp, dA, Ma);
+ Protected::DeCompressRowsQA (F, Mk, Ncurr, K, ldk, Arp, ldarp, dA, Ma);
// K <- QK^T K
- CompressRowsQK (F, Mk, K + nb_full_blocks*(deg-1)*ldk, ldk, Arp, ldarp,
+ Protected::CompressRowsQK (F, Mk, K + nb_full_blocks*(deg-1)*ldk, ldk, Arp, ldarp,
dK+nb_full_blocks, deg, Mk-nb_full_blocks);
// K <- K^-1 K
- size_t *P=new size_t[Mk];
- size_t *Q=new size_t[Mk];
- if (LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, Mk, Mk , K3 + (Ncurr-Mk)*ldk, ldk, P, Q, FfpackLQUP) < Mk){
+ size_t *P=FFLAS::fflas_new<size_t>(Mk);
+ size_t *Q=FFLAS::fflas_new<size_t>(Mk);
+ if (LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, Mk, Mk , K3 + (Ncurr-Mk)*ldk, ldk, P, Q) < Mk){
// should never happen (not a LAS VEGAS check)
//std::cerr<<"FAIL R2 < MK"<<std::endl;
// exit(-1);
@@ -387,11 +452,11 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
Mk, 0,(int) Mk, K+(Ncurr-Mk)*ldk,ldk, P);
fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ncurr-Mk, Mk, Mk,F.mOne,
K3, ldk, K+(Ncurr-Mk)*ldk,ldk,F.one, K, ldk);
- delete[] P;
- delete[] Q;
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
// K <- PK^T K
- DeCompressRows (F, Mk, Ncurr, K, ldk, Arp, ldarp, dK, Mk);
+ Protected::DeCompressRows (F, Mk, Ncurr, K, ldk, Arp, ldarp, dK, Mk);
// K <- K PK (dA <- dK)
if (nb_full_blocks*deg < Ncurr)
@@ -419,21 +484,26 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
for (size_t j=0; j<nb_full_blocks+1; ++j){
if (!F.isZero( *(K+i*ldk+j) )){
//std::cerr<<"FAIL C != 0"<<std::endl;
- delete[] rp; delete[] Arp; delete[] Ac;
- delete[] K; delete[] K3;
- delete[] dA; delete[] dK;
+ FFLAS::fflas_delete( rp);
+ FFLAS::fflas_delete (Arp);
+ FFLAS::fflas_delete (Ac);
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete (K3);
+ FFLAS::fflas_delete( dA);
+ FFLAS::fflas_delete( dK);
throw CharpolyFailed();
}
}
// A <- K
- delete[] Ac; delete[] Arp;
- Ac = new typename Field::Element[Ncurr*Mk];
+ FFLAS::fflas_delete (Ac);
+ FFLAS::fflas_delete (Arp);
+ Ac = FFLAS::fflas_new (F, Ncurr, Mk);
ldac = Mk;
- Arp = new typename Field::Element[Ncurr*Mk];
+ Arp = FFLAS::fflas_new (F, Ncurr, Mk);
ldarp=Ncurr;
for (size_t i=0; i < Ncurr; ++i )
- FFLAS::fcopy (F, Mk, Ac + i*ldac, 1, K + i*ldk, 1);
+ FFLAS::fassign (F, Mk, K + i*ldk, 1, Ac + i*ldac, 1);
deg++;
@@ -445,15 +515,21 @@ FFPACK::CharpolyArithProg (const Field& F, std::list<Polynomial>& frobeniusForm,
for (size_t j=0; j < dK[0]; ++j)
F.neg( Pl[j], *(K + j*ldk));
frobeniusForm.push_front(Pl);
- delete[] rp; delete[] Arp; delete[] Ac; delete[] K; delete[] K3;
- delete[] dA; delete[] dK;
+ FFLAS::fflas_delete( rp);
+ FFLAS::fflas_delete (Arp);
+ FFLAS::fflas_delete (Ac);
+ FFLAS::fflas_delete (K);
+ FFLAS::fflas_delete (K3);
+ FFLAS::fflas_delete( dA);
+ FFLAS::fflas_delete( dK);
return frobeniusForm;
}
+namespace FFPACK { namespace Protected {
template <class Field>
-void FFPACK::CompressRowsQK (Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
+void CompressRowsQK (Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
const size_t * d, const size_t deg,const size_t nb_blocs)
{
@@ -461,62 +537,64 @@ void FFPACK::CompressRowsQK (Field& F, const size_t M,
size_t currw = d[0]-1;
size_t currr = d[0]-1;
for (int i = 0; i< int(nb_blocs)-1; ++i){
+ // FFLAS::fassign(F,deg-d[i],M,A+currr*lda,lda,tmp+(size_t)currtmp*ldtmp);
for (int j = int(d[i]-1); j<int(deg)-1; ++j, ++currr, ++currtmp)
- FFLAS::fcopy(F, M, tmp + (size_t)currtmp*ldtmp, 1, A + currr*lda, 1);
+ FFLAS::fassign(F, M, A + currr*lda, 1, tmp + (size_t)currtmp*ldtmp, 1);
+ // currr += (deg - d[i]);
for (int j=0; j < int(d[i+1]) -1; ++j, ++currr, ++currw){
- FFLAS::fcopy(F, M, A + (currw)*lda, 1, A+(currr)*lda, 1);
+ FFLAS::fassign(F, M, A+(currr)*lda, 1, A + (currw)*lda, 1);
}
}
for (int i=0; i < currtmp; ++i, ++currw){
- FFLAS::fcopy (F, M, A + (currw)*lda, 1, tmp + (size_t)i*ldtmp, 1);
+ FFLAS::fassign (F, M, tmp + (size_t)i*ldtmp, 1, A + (currw)*lda, 1);
}
}
template <class Field>
-void FFPACK::CompressRows (Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
+void CompressRows (Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
const size_t * d, const size_t nb_blocs)
{
size_t currd = d[0]-1;
size_t curri = d[0]-1;
for (int i = 0; i< int(nb_blocs)-1; ++i){
- FFLAS::fcopy(F, M, tmp + i*ldtmp, 1, A + currd*lda, 1);
+ FFLAS::fassign(F, M, A + currd*lda, 1, tmp + i*(int)ldtmp, 1);
for (int j=0; j < int(d[i+1]) -1; ++j){
- FFLAS::fcopy(F, M, A + (curri++)*lda, 1, A+(currd+j+1)*lda, 1);
+ FFLAS::fassign(F, M, A+(currd+(size_t)j+1)*lda, 1, A + (curri++)*lda, 1);
}
currd += d[i+1];
}
for (int i=0; i < int(nb_blocs)-1; ++i){
- FFLAS::fcopy (F, M, A + (curri++)*lda, 1, tmp + i*ldtmp, 1);
+ FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (curri++)*lda, 1);
}
}
template <class Field>
-void FFPACK::DeCompressRows (Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
+void DeCompressRows (Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
const size_t * d, const size_t nb_blocs)
{
for (int i=0; i<int(nb_blocs)-1; ++i)
- FFLAS::fcopy(F, M, tmp + i*ldtmp, 1, A + (N-nb_blocs+i)*lda, 1);
+ FFLAS::fassign(F, M, A + (N-nb_blocs+(size_t)i)*lda, 1, tmp + i*(int)ldtmp, 1);
size_t w_idx = N - 2;
size_t r_idx = N - nb_blocs - 1;
int i = int(nb_blocs)-1 ;
for (; i--; ){
for (size_t j = 0; j<d[i+1]-1; ++j)
- FFLAS::fcopy (F, M, A + (w_idx--)*lda, 1, A + (r_idx--)*lda, 1);
- FFLAS::fcopy (F, M, A + (w_idx--)*lda, 1, tmp + i*ldtmp, 1);
+ FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1);
+ FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (w_idx--)*lda, 1);
}
}
template <class Field>
-void FFPACK::DeCompressRowsQK (Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
+void DeCompressRowsQK (Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
const size_t * d, const size_t deg,const size_t nb_blocs)
{
@@ -525,7 +603,7 @@ void FFPACK::DeCompressRowsQK (Field& F, const size_t M, const size_t N,
for (int i=0; i<int(nb_blocs)-1; ++i)
zeroblockdim += deg - d[i];
for (size_t i=0; i < zeroblockdim - 1; ++i, ++currtmp)
- FFLAS::fcopy(F, M, tmp + currtmp*ldtmp, 1, A + (N - zeroblockdim +i)*lda, 1);
+ FFLAS::fassign(F, M, A + (N - zeroblockdim +i)*lda, 1, tmp + currtmp*ldtmp, 1);
currtmp--;
size_t w_idx = N - 2;
size_t r_idx = N - zeroblockdim - 1;
@@ -533,47 +611,50 @@ void FFPACK::DeCompressRowsQK (Field& F, const size_t M, const size_t N,
int i = int(nb_blocs)-1 ;
for (; i--;){
for (size_t j = 0; j < d [i+1] - 1; ++j)
- FFLAS::fcopy (F, M, A + (w_idx--)*lda, 1, A + (r_idx--)*lda, 1);
+ FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1);
for (size_t j = 0; j < deg - d[i]; ++j)
- FFLAS::fcopy (F, M, A + (w_idx--)*lda, 1, tmp + (currtmp--)*ldtmp, 1);
+ FFLAS::fassign (F, M, tmp + (currtmp--)*ldtmp, 1, A + (w_idx--)*lda, 1);
}
}
template <class Field>
-void FFPACK::CompressRowsQA (Field& F, const size_t M,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
+void CompressRowsQA (Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
const size_t * d, const size_t nb_blocs)
{
size_t currd = 0;
size_t curri = 0;
for (size_t i = 0; i< nb_blocs; ++i){
- FFLAS::fcopy(F, M, tmp + i*ldtmp, 1, A + currd*lda, 1);
+ FFLAS::fassign(F, M, A + currd*lda, 1, tmp + i*ldtmp, 1);
for (size_t j=0; j < d[i] -1; ++j)
- FFLAS::fcopy(F, M, A + (curri++)*lda, 1, A+(currd+j+1)*lda, 1);
+ FFLAS::fassign(F, M, A+(currd+j+1)*lda, 1, A + (curri++)*lda, 1);
currd += d[i];
}
for (size_t i=0; i < nb_blocs; ++i)
- FFLAS::fcopy (F, M, A + (curri++)*lda, 1, tmp + i*ldtmp, 1);
+ FFLAS::fassign (F, M, tmp + i*ldtmp, 1, A + (curri++)*lda, 1);
}
template <class Field>
-void FFPACK::DeCompressRowsQA (Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- typename Field::Element * tmp, const size_t ldtmp,
+void DeCompressRowsQA (Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr tmp, const size_t ldtmp,
const size_t * d, const size_t nb_blocs)
{
for (size_t i=0; i<nb_blocs; ++i)
- FFLAS::fcopy(F, M, tmp + i*ldtmp, 1, A + (N-nb_blocs+i)*lda, 1);
+ FFLAS::fassign(F, M, A + (N-nb_blocs+i)*lda, 1, tmp + i*ldtmp, 1);
size_t w_idx = N - 1;
size_t r_idx = N - nb_blocs - 1;
int i = int(nb_blocs) ;
for (; i--; ){
for (size_t j = 0; j<d[i]-1; ++j)
- FFLAS::fcopy (F, M, A + (w_idx--)*lda, 1, A + (r_idx--)*lda, 1);
- FFLAS::fcopy (F, M, A + (w_idx--)*lda, 1, tmp + i*ldtmp, 1);
+ FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1);
+ FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (w_idx--)*lda, 1);
}
}
+
+} // Protected
+} //FFPACK
diff --git a/fflas-ffpack/ffpack/ffpack_ftrtr.inl b/fflas-ffpack/ffpack/ffpack_ftrtr.inl
new file mode 100644
index 0000000..a01afbd
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_ftrtr.inl
@@ -0,0 +1,105 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 FFLAS-FFACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_ftrtr_INL
+#define __FFLASFFPACK_ffpack_ftrtr_INL
+
+
+namespace FFPACK {
+
+
+template<class Field>
+ void
+ ftrtri (const Field& F, const FFLAS::FFLAS_UPLO Uplo, const FFLAS::FFLAS_DIAG Diag,
+ const size_t N, typename Field::Element_ptr A, const size_t lda)
+ {
+ if (N == 1){
+ if (Diag == FFLAS::FflasNonUnit)
+ F.invin (*A);
+ }
+ else {
+ size_t N1 = N/2;
+ size_t N2 = N - N1;
+ ftrtri (F, Uplo, Diag, N1, A, lda);
+ ftrtri (F, Uplo, Diag, N2, A + N1*(lda+1), lda);
+ if (Uplo == FFLAS::FflasUpper){
+ ftrmm (F, FFLAS::FflasLeft, Uplo, FFLAS::FflasNoTrans, Diag, N1, N2,
+ F.one, A, lda, A + N1, lda);
+ ftrmm (F, FFLAS::FflasRight, Uplo, FFLAS::FflasNoTrans, Diag, N1, N2,
+ F.mOne, A + N1*(lda+1), lda, A + N1, lda);
+ }
+ else {
+ ftrmm (F, FFLAS::FflasLeft, Uplo, FFLAS::FflasNoTrans, Diag, N2, N1,
+ F.one, A + N1*(lda+1), lda, A + N1*lda, lda);
+ ftrmm (F, FFLAS::FflasRight, Uplo, FFLAS::FflasNoTrans, Diag, N2, N1,
+ F.mOne, A, lda, A + N1*lda, lda);
+ }
+ }
+ }
+
+
+ template<class Field>
+ void
+ ftrtrm (const Field& F, const FFLAS::FFLAS_DIAG diag, const size_t N,
+ typename Field::Element_ptr A, const size_t lda)
+ {
+
+ if (N == 1)
+ return;
+ size_t N1 = N/2;
+ size_t N2 = N-N1;
+
+ ftrtrm (F, diag, N1, A, lda);
+
+ fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, N1, N1, N2, F.one,
+ A+N1, lda, A+N1*lda, lda, F.one, A, lda);
+
+ ftrmm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans,
+ (diag == FFLAS::FflasUnit) ? FFLAS::FflasNonUnit : FFLAS::FflasUnit,
+ N1, N2, F.one, A + N1*(lda+1), lda, A + N1, lda);
+
+ ftrmm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, diag, N2, N1,
+ F.one, A + N1*(lda+1), lda, A + N1*lda, lda);
+
+ ftrtrm (F, diag, N2, A + N1*(lda+1), lda);
+
+ }
+
+ template<class Field>
+ void trinv_left( const Field& F, const size_t N, typename Field::ConstElement_ptr L, const size_t ldl,
+ typename Field::Element_ptr X, const size_t ldx )
+ {
+ FFLAS::fassign(F,N,N,L,ldl,X,ldx);
+ ftrtri (F, FFLAS::FflasLower, FFLAS::FflasUnit, N, X, ldx);
+ //invL(F,N,L,ldl,X,ldx);
+ }
+
+} // FFPACK
+
+#endif // __FFLASFFPACK_ffpack_ftrtr_INL
diff --git a/fflas-ffpack/ffpack/ffpack_invert.inl b/fflas-ffpack/ffpack/ffpack_invert.inl
new file mode 100644
index 0000000..5472267
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_invert.inl
@@ -0,0 +1,139 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 FFLAS-FFACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_invert_INL
+#define __FFLASFFPACK_ffpack_invert_INL
+
+
+namespace FFPACK {
+template <class Field>
+ typename Field::Element_ptr
+ Invert (const Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ int& nullity)
+ {
+ FFLASFFPACK_check(lda >= M);
+
+ if (M == 0) {
+ nullity = 0 ;
+ return NULL ;
+ }
+
+ size_t * P = FFLAS::fflas_new<size_t>(M);
+ size_t * Q = FFLAS::fflas_new<size_t>(M);
+ size_t R = ReducedColumnEchelonForm (F, M, M, A, lda, P, Q);
+ nullity = (int)(M - R);
+ applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ M, 0, (int)R, A, lda, P);
+ delete [] P;
+ delete [] Q;
+ return A;
+ }
+
+ template <class Field>
+ typename Field::Element_ptr
+ Invert (const Field& F, const size_t M,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx,
+ int& nullity)
+ {
+ FFLASFFPACK_check(lda >= M);
+ FFLASFFPACK_check(ldx >= M);
+ if (M == 0) {
+ nullity = 0 ;
+ return NULL ;
+ }
+
+
+ FFLAS::fassign(F,M,M,A,lda,X,ldx);
+ Invert (F, M, X, lda, nullity);
+ return X;
+ }
+
+ template <class Field>
+ typename Field::Element_ptr
+ Invert2( const Field& F, const size_t M,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx,
+ int& nullity)
+ {
+ FFLASFFPACK_check(lda >= M);
+ FFLASFFPACK_check(ldx >= M);
+
+ if (M == 0) {
+ nullity = 0 ;
+ return NULL ;
+ }
+
+ size_t *P = FFLAS::fflas_new<size_t>(M);
+ size_t *rowP = FFLAS::fflas_new<size_t>(M);
+
+
+ nullity = int(M - LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, M, A, lda, P, rowP));
+
+ if (nullity > 0){
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( rowP);
+ return NULL;
+ }
+ else {
+ // Initializing X to 0
+#if 0/* timer remnants */
+ t1.clear();
+ t1.start();
+#endif
+ //! @todo this init is not all necessary (done after ftrtri)
+ FFLAS::fzero(F,M,M,X,ldx);
+
+ // X = L^-1 in n^3/3
+ ftrtri (F, FFLAS::FflasLower, FFLAS::FflasUnit, M, A, lda);
+ for (size_t i=0; i<M; ++i){
+ for (size_t j=i; j<M; ++j)
+ F.assign(*(X +i*ldx+j), F.zero);
+ F.assign (*(X+i*(ldx+1)), F.one);
+ }
+ for (size_t i=1; i<M; ++i)
+ FFLAS::fassign (F, i, (A+i*lda), 1, (X+i*ldx), 1);
+
+ ftrsm( F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
+ M, M, F.one, A, lda , X, ldx);
+
+ // X = P^-1.X
+ applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ M, 0,(int) M, X, ldx, P );
+
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( rowP);
+ return X;
+ }
+ }
+
+} // FFPACK
+
+#endif // __FFLASFFPACK_ffpack_invert_INL
diff --git a/fflas-ffpack/ffpack/ffpack_krylovelim.inl b/fflas-ffpack/ffpack/ffpack_krylovelim.inl
index 92de97c..034adbc 100644
--- a/fflas-ffpack/ffpack/ffpack_krylovelim.inl
+++ b/fflas-ffpack/ffpack/ffpack_krylovelim.inl
@@ -5,20 +5,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -29,14 +29,6 @@
#ifndef __FFLASFFPACK_ffpack_krylovelim_INL
#define __FFLASFFPACK_ffpack_krylovelim_INL
-#ifndef MIN
-#define MIN(a,b) (a<b)?a:b
-#endif
-#ifndef MAX
-#define MAX(a,b) (a<b)?b:a
-#endif
-
-//#define LB_DEBUG
// A is m x n with m <= n
// Ensures : rankprof is the row rankprofil of the matrix k x n matrix B formed as follows (k = sum d_i):
@@ -47,13 +39,12 @@
template <class Field>
inline size_t
FFPACK::KrylovElim( const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, size_t*P,
+ typename Field::Element_ptr A, const size_t lda, size_t*P,
size_t *Q, const size_t deg, size_t *iterates, size_t * inviterates,size_t maxit,
size_t virt)
{
if ( !(M && N) ) return 0;
- typedef typename Field::Element elt;
if (M == 1){
virt += deg;
@@ -112,9 +103,9 @@ FFPACK::KrylovElim( const Field& F, const size_t M, const size_t N,
// Recursive call on NW
size_t R = KrylovElim (F, Nup, N, A, lda, P, Q, deg, iterates, inviterates, maxit, virt);
- typename Field::Element *Ar = A + Nup*lda; // SW
- typename Field::Element *Ac = A + R; // NE
- typename Field::Element *An = Ar + R; // SE
+ typename Field::Element_ptr Ar = A + Nup*lda; // SW
+ typename Field::Element_ptr Ac = A + R; // NE
+ typename Field::Element_ptr An = Ar + R; // SE
if (R){
// Ar <- Ar.P
@@ -129,7 +120,7 @@ FFPACK::KrylovElim( const Field& F, const size_t M, const size_t N,
F.mOne, Ar, lda, Ac, lda, F.one, An, lda);
}
// Recursive call on SE
- size_t R2 = KrylovElim (F, Ndown, N-R, An, lda,P+R, Q+Nup, deg, iterates, inviterates, maxit, MIN(maxit-deg,(virt+Nup*deg)));
+ size_t R2 = KrylovElim (F, Ndown, N-R, An, lda,P+R, Q+Nup, deg, iterates, inviterates, maxit, std::min(maxit-deg,(virt+Nup*deg)));
for (size_t i = R; i < R + R2; ++i)
P[i] += R;
@@ -144,8 +135,8 @@ FFPACK::KrylovElim( const Field& F, const size_t M, const size_t N,
if (R < Nup){
// Permutation of the 0 rows
for ( size_t i = Nup, j = R ; i < Nup + R2; ++i, ++j){
- FFLAS::fcopy( F, N - j, A + j*(lda + 1), 1, A + i*lda + j, 1);
- for (typename Field::Element *Ai = A + i*lda + j;
+ FFLAS::fassign( F, N - j, A + i*lda + j, 1, A + j*(lda + 1), 1);
+ for (typename Field::Element_ptr Ai = A + i*lda + j;
Ai != A + i*lda + N; ++Ai)
F.assign (*Ai, F.zero);
size_t t = Q[j];
@@ -160,15 +151,15 @@ FFPACK::KrylovElim( const Field& F, const size_t M, const size_t N,
template <class Field>
size_t
FFPACK::SpecRankProfile (const Field& F, const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, const size_t deg,
+ typename Field::Element_ptr A, const size_t lda, const size_t deg,
size_t *rankProfile)
{
//size_t deg = (N-1)/M+1; // Number of trivial iterates per blocs
- size_t * Q = new size_t[M];
- size_t * P = new size_t[N];
- size_t * iterates = new size_t[N];
- size_t * inviterates = new size_t[N+1];
+ size_t * Q = FFLAS::fflas_new<size_t>(M);
+ size_t * P = FFLAS::fflas_new<size_t>(N);
+ size_t * iterates = FFLAS::fflas_new<size_t>(N);
+ size_t * inviterates = FFLAS::fflas_new<size_t>(N+1);
for (size_t i=0; i < N; ++i)
inviterates[i+1] = iterates[i] = i+1;
@@ -195,10 +186,10 @@ FFPACK::SpecRankProfile (const Field& F, const size_t M, const size_t N,
#ifdef LB_DEBUG
std::cerr<<"FAIL itere dependant intercale"<<std::endl;
#endif
- delete[] P;
- delete[] Q;
- delete[] iterates;
- delete[] inviterates;
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ FFLAS::fflas_delete( iterates);
+ FFLAS::fflas_delete( inviterates);
throw CharpolyFailed();
}
#ifdef LB_DEBUG
@@ -228,10 +219,10 @@ FFPACK::SpecRankProfile (const Field& F, const size_t M, const size_t N,
#endif
curr_row++;
}
- delete[] P;
- delete[] Q;
- delete[] inviterates;
- delete[] iterates;
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ FFLAS::fflas_delete( inviterates);
+ FFLAS::fflas_delete( iterates);
return rp_idx;
}
diff --git a/fflas-ffpack/ffpack/ffpack_ludivine.inl b/fflas-ffpack/ffpack/ffpack_ludivine.inl
index 6f47888..d5c6619 100644
--- a/fflas-ffpack/ffpack/ffpack_ludivine.inl
+++ b/fflas-ffpack/ffpack/ffpack_ludivine.inl
@@ -1,24 +1,24 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* ffpack/ffpack_ludivine.inl
* Copyright (C) 2005 Clement Pernet
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -29,24 +29,19 @@
#ifndef __FFLASFFPACK_ffpack_ludivine_INL
#define __FFLASFFPACK_ffpack_ludivine_INL
-#ifndef MIN
-#define MIN(a,b) (a<b)?a:b
-#endif
-#ifndef MAX
-#define MAX(a,b) (a<b)?b:a
-#endif
+#include "fflas-ffpack/fflas/fflas_bounds.inl"
//#define LB_DEBUG
namespace FFPACK {
template<class Field>
inline size_t
LUdivine_gauss( const Field& F, const FFLAS::FFLAS_DIAG Diag,
- const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, size_t*P,
- size_t *Q, const FFPACK::FFPACK_LUDIVINE_TAG LuTag)
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda, size_t*P,
+ size_t *Q, const FFPACK::FFPACK_LU_TAG LuTag)
{
- size_t MN = MIN(M,N);
- typename Field::Element * Acurr = A;
+ size_t MN = std::min(M,N);
+ typename Field::Element_ptr Acurr = A;
size_t r = 0;
for (size_t k = 0; k < MN; ++k){
@@ -57,7 +52,7 @@ namespace FFPACK {
if (p < N){
P[r] = p;
if (r < k){
- FFLAS::fcopy (F, N-r, (A + r*(lda+1)), 1, (A+k*lda+r),1);
+ FFLAS::fassign (F, N-r, (A+k*lda+r),1, (A + r*(lda+1)), 1);
Acurr = A+r+k*lda;
for (size_t i=r; i<N; ++i)
F.assign(*(Acurr++),F.zero);
@@ -87,8 +82,8 @@ namespace FFPACK {
inline size_t
LUdivine_small( const Field& F, const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, size_t*P,
- size_t *Q, const FFPACK::FFPACK_LUDIVINE_TAG LuTag)
+ typename Field::Element_ptr A, const size_t lda, size_t*P,
+ size_t *Q, const FFPACK::FFPACK_LU_TAG LuTag)
{
return callLUdivine_small <typename Field::Element> ()
(F, Diag, trans, M, N, A, lda, P, Q, LuTag);
@@ -101,25 +96,24 @@ namespace FFPACK {
inline size_t
operator()( const Field& F, const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, size_t*P,
- size_t *Q, const FFPACK::FFPACK_LUDIVINE_TAG LuTag)
+ typename Field::Element_ptr A, const size_t lda, size_t*P,
+ size_t *Q, const FFPACK::FFPACK_LU_TAG LuTag)
{
-
if ( !(M && N) ) return 0;
typedef typename Field::Element elt;
- elt * Aini = A;
- elt * Acurr;
+ typedef typename Field::Element_ptr elt_ptr;
+ elt_ptr Aini = A;
+ elt_ptr Acurr;
size_t rowp = 0;
- size_t colp;
size_t R = 0;
size_t k = 0;
- //size_t kmax = FFLAS::Protected::DotProdBound (F, 0, one) -1; // the max number of delayed operations
while ((rowp<M) && (k<N)){
+ size_t colp;
//Find non zero pivot
colp = k;
Acurr = Aini;
- while ((F.isZero(*Acurr)) || (F.isZero (F.init (*Acurr, *Acurr))))
+ while ((F.isZero(*Acurr)) || (F.isZero (F.reduce (*Acurr))))
if (++colp == N){
if (rowp==M-1)
break;
@@ -140,26 +134,30 @@ namespace FFPACK {
//Normalization
elt invpiv;
- F.init(*Aini,*Aini);
+ F.init (invpiv);
+ F.reduce (*Aini);
F.inv (invpiv,*Aini);
for (size_t j=1; j<N-k; ++j)
if (!F.isZero(*(Aini+j)))
- F.init(*(Aini+j), *(Aini+j));
+ F.reduce (*(Aini+j));
for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
if (!F.isZero(*(Aini+i)))
- F.init(*(Aini+i), *(Aini+i));
+ F.reduce (*(Aini+i));
if (Diag == FFLAS::FflasUnit) {
- for (size_t j=1; j<N-k; ++j)
- if (!F.isZero(*(Aini+j)))
- F.mulin (*(Aini+j),invpiv);
+ // for (size_t j=1; j<N-k; ++j)
+ // if (!F.isZero(*(Aini+j)))
+ // F.mulin (*(Aini+j),invpiv);
+ FFLAS::fscalin(F,N-k-1,invpiv,Aini+1,1);
+ }
+ else {
+ // for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
+ // if (!F.isZero(*(Aini+i)))
+ // F.mulin (*(Aini+i),invpiv);
+ FFLAS::fscalin(F,M-rowp-1,invpiv,Aini+lda,lda);
}
- else
- for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
- if (!F.isZero(*(Aini+i)))
- F.mulin (*(Aini+i),invpiv);
//Elimination
//Or equivalently, but without delayed ops :
@@ -180,7 +178,7 @@ namespace FFPACK {
}
for (size_t i=0; i<R; ++i, Aini += lda+1) {
if (Q[i] > i){
- FFLAS::fcopy (F, l-i, Aini, 1, Aini+(Q[i]-i)*lda, 1);
+ FFLAS::fassign (F, l-i, Aini+(Q[i]-i)*lda, 1, Aini, 1);
for (size_t j=0; j<l-i; ++j)
F.assign (*(Aini+(Q[i]-i)*lda+j), F.zero);
}
@@ -197,8 +195,8 @@ namespace FFPACK {
operator()( const Field& F,
const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, size_t*P,
- size_t *Q, const FFPACK::FFPACK_LUDIVINE_TAG LuTag)
+ typename Field::Element_ptr A, const size_t lda, size_t*P,
+ size_t *Q, const FFPACK::FFPACK_LU_TAG LuTag)
{
if ( !(M && N) ) return 0;
@@ -206,17 +204,17 @@ namespace FFPACK {
elt * Aini = A;
elt * Acurr;
size_t rowp = 0;
- size_t colp;
size_t R = 0;
size_t k = 0;
size_t delay =0;
- size_t kmax = FFLAS::Protected::DotProdBound (F, 0, F.one, FFLAS::FflasDouble) -1; // the max number of delayed operations
+ size_t kmax = FFLAS::Protected::DotProdBoundClassic (F, F.one) -1; // the max number of delayed operations
while ((rowp<M) && (k<N)){
+ size_t colp;
//Find non zero pivot
colp = k;
Acurr = Aini;
- while ((F.isZero(*Acurr)) || (F.isZero (F.init (*Acurr, *Acurr))))
+ while ((F.isZero(*Acurr)) || (F.isZero (F.reduce (*Acurr))))
if (++colp == N){
if (rowp==M-1)
break;
@@ -237,32 +235,36 @@ namespace FFPACK {
//Normalization
elt invpiv;
- F.init(*Aini,*Aini);
+ F.reduce (*Aini);
F.inv (invpiv,*Aini);
for (size_t j=1; j<N-k; ++j)
if (!F.isZero(*(Aini+j)))
- F.init(*(Aini+j), *(Aini+j));
+ F.reduce (*(Aini+j));
for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
if (!F.isZero(*(Aini+i)))
- F.init(*(Aini+i), *(Aini+i));
+ F.reduce(*(Aini+i));
if (Diag == FFLAS::FflasUnit) {
- for (size_t j=1; j<N-k; ++j)
- if (!F.isZero(*(Aini+j)))
- F.mulin (*(Aini+j),invpiv);
+ // for (size_t j=1; j<N-k; ++j)
+ // if (!F.isZero(*(Aini+j)))
+ // F.mulin (*(Aini+j),invpiv);
+ FFLAS::fscalin(F,N-k-1,invpiv,Aini+1,1);
+ }
+ else {
+ // for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
+ // if (!F.isZero(*(Aini+i)))
+ // F.mulin (*(Aini+i),invpiv);
+ FFLAS::fscalin(F,M-rowp-1,invpiv,Aini+lda,lda);
}
- else
- for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
- if (!F.isZero(*(Aini+i)))
- F.mulin (*(Aini+i),invpiv);
if (delay++ >= kmax){ // Reduction has to be done
delay = 0;
- for (size_t i=1; i<M-rowp; ++i)
- for (size_t j=1; j<N-k; ++j)
- F.init( *(Aini+i*lda+j),*(Aini+i*lda+j));
+ FFLAS::freduce (F, M-rowp-1,N-k-1, Aini+lda+1, lda);
+ // for (size_t i=1; i<M-rowp; ++i)
+ // for (size_t j=1; j<N-k; ++j)
+ // F.init( *(Aini+i*lda+j),*(Aini+i*lda+j));
}
//Elimination
for (size_t i=1; i<M-rowp; ++i)
@@ -286,7 +288,7 @@ namespace FFPACK {
}
for (size_t i=0; i<R; ++i, Aini += lda+1) {
if (Q[i] > i){
- FFLAS::fcopy (F, l-i, Aini, 1, Aini+(Q[i]-i)*lda, 1);
+ FFLAS::fassign (F, l-i, Aini+(Q[i]-i)*lda, 1, Aini, 1);
for (size_t j=0; j<l-i; ++j)
F.assign (*(Aini+(Q[i]-i)*lda+j), F.zero);
}
@@ -303,8 +305,8 @@ namespace FFPACK {
operator()( const Field& F,
const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda, size_t*P,
- size_t *Q, const FFPACK::FFPACK_LUDIVINE_TAG LuTag)
+ typename Field::Element_ptr A, const size_t lda, size_t*P,
+ size_t *Q, const FFPACK::FFPACK_LU_TAG LuTag)
{
if ( !(M && N) ) return 0;
@@ -312,17 +314,17 @@ namespace FFPACK {
elt * Aini = A;
elt * Acurr;
size_t rowp = 0;
- size_t colp;
size_t R = 0;
size_t k = 0;
size_t delay =0;
- size_t kmax = FFLAS::Protected::DotProdBound (F, 0, F.one, FFLAS::FflasFloat) -1; // the max number of delayed operations
+ size_t kmax = FFLAS::Protected::DotProdBoundClassic (F, F.one) -1; // the max number of delayed operations
while ((rowp<M) && (k<N)){
+ size_t colp;
//Find non zero pivot
colp = k;
Acurr = Aini;
- while ((F.isZero(*Acurr)) || (F.isZero (F.init (*Acurr, *Acurr))))
+ while ((F.isZero(*Acurr)) || (F.isZero (F.reduce (*Acurr))))
if (++colp == N){
if (rowp==M-1)
break;
@@ -343,32 +345,36 @@ namespace FFPACK {
//Normalization
elt invpiv;
- F.init(*Aini,*Aini);
+ F.init(invpiv);
+ F.reduce (*Aini);
F.inv (invpiv,*Aini);
for (size_t j=1; j<N-k; ++j)
if (!F.isZero(*(Aini+j)))
- F.init(*(Aini+j), *(Aini+j));
+ F.reduce (*(Aini+j));
for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
if (!F.isZero(*(Aini+i)))
- F.init(*(Aini+i), *(Aini+i));
-
+ F.reduce (*(Aini+i));
if (Diag == FFLAS::FflasUnit) {
- for (size_t j=1; j<N-k; ++j)
- if (!F.isZero(*(Aini+j)))
- F.mulin (*(Aini+j),invpiv);
+ // for (size_t j=1; j<N-k; ++j)
+ // if (!F.isZero(*(Aini+j)))
+ // F.mulin (*(Aini+j),invpiv);
+ FFLAS::fscalin(F,N-k-1,invpiv,Aini+1,1);
+ }
+ else {
+ // for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
+ // if (!F.isZero(*(Aini+i)))
+ // F.mulin (*(Aini+i),invpiv);
+ FFLAS::fscalin(F,M-rowp-1,invpiv,Aini+lda,lda);
}
- else
- for (size_t i=lda; i<(M-rowp)*lda; i+=lda)
- if (!F.isZero(*(Aini+i)))
- F.mulin (*(Aini+i),invpiv);
if (delay++ >= kmax){ // Reduction has to be done
delay = 0;
- for (size_t i=1; i<M-rowp; ++i)
- for (size_t j=1; j<N-k; ++j)
- F.init( *(Aini+i*lda+j),*(Aini+i*lda+j));
+ FFLAS::freduce (F, M-rowp-1, N-k-1, Aini+lda+1, lda);
+ // for (size_t i=1; i<M-rowp; ++i)
+ // for (size_t j=1; j<N-k; ++j)
+ // F.reduce (*(Aini+i*lda+j));
}
//Elimination
for (size_t i=1; i<M-rowp; ++i)
@@ -392,7 +398,7 @@ namespace FFPACK {
}
for (size_t i=0; i<R; ++i, Aini += lda+1) {
if (Q[i] > i){
- FFLAS::fcopy (F, l-i, Aini, 1, Aini+(Q[i]-i)*lda, 1);
+ FFLAS::fassign (F, l-i, Aini+(Q[i]-i)*lda, 1, Aini, 1);
for (size_t j=0; j<l-i; ++j)
F.assign (*(Aini+(Q[i]-i)*lda+j), F.zero);
}
@@ -404,18 +410,18 @@ namespace FFPACK {
template <class Field>
inline size_t
LUdivine (const Field& F,
- const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
- const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- size_t*P, size_t *Q
- , const FFPACK::FFPACK_LUDIVINE_TAG LuTag // =FFPACK::FfpackLQUP
- , const size_t cutoff // =__FFPACK_LUDIVINE_CUTOFF
+ const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t*P, size_t *Q
+ , const FFPACK::FFPACK_LU_TAG LuTag // =FFPACK::FfpackSlabRecursive
+ , const size_t cutoff // =__FFPACK_LUDIVINE_CUTOFF
)
{
-
+ //std::cout<<"LUDivine ("<<M<<","<<N<<")"<<std::endl;
if ( !(M && N) ) return 0;
typedef typename Field::Element elt;
- size_t MN = MIN(M,N);
+ size_t MN = std::min(M,N);
size_t incRow, incCol, rowDim, colDim;
if (trans == FFLAS::FflasTrans){
@@ -437,7 +443,6 @@ namespace FFPACK {
else { // recursively :
if (MN == 1){
size_t ip=0;
- //while (ip<N && !F.isUnit(*(A+ip)))ip++;
while (F.isZero (*(A+ip*incCol)))
if (++ip == colDim)
break;
@@ -447,7 +452,7 @@ namespace FFPACK {
if (colDim == 1){
//while (ip<M && !F.isUnit(*(A+ip*lda)))
while (ip<rowDim && F.isZero(*(A + ip*incRow))){
- Q[ip]=ip;
+ // Q[ip]=ip;
ip++;
}
if (ip == rowDim) {
@@ -457,13 +462,16 @@ namespace FFPACK {
size_t oldip = ip;
if ( Diag == FFLAS::FflasNonUnit ){
elt invpiv;
+ F.init(invpiv);
F.inv(invpiv,*(A+ip*incRow));
- while(++ip<rowDim)
- F.mulin(*(A + ip*incRow), invpiv);
- elt tmp;
- F.assign(tmp, *(A+oldip*incRow));
- F.assign( *(A+oldip*incRow), *A);
- F.assign( *A, tmp);
+ if (++ip < rowDim)
+ FFLAS::fscalin(F,rowDim-ip,invpiv,A+ip*incRow,incRow);
+ // elt tmp;
+// F.init(tmp);
+// F.assign(tmp, *(A+oldip*incRow));
+// F.assign( *(A+oldip*incRow), *A);
+ F.assign( *A,*(A+oldip*incRow));
+ F.assign( *(A+oldip*incRow), F.zero);
}
*Q=oldip;
@@ -477,21 +485,23 @@ namespace FFPACK {
*P=ip;
if (ip!=0){
// swap the pivot
- typename Field::Element tmp=*A;
- *A = *(A + ip*incCol);
- *(A + ip*incCol) = tmp;
+ typename Field::Element tmp;
+ F.init(tmp);
+ F.assign(tmp,*A);
+ F.assign(*A, *(A + ip*incCol));
+ F.assign(*(A + ip*incCol), tmp);
}
elt invpiv;
+ F.init(invpiv);
F.inv(invpiv, *A);
- if ( Diag == FFLAS::FflasUnit ){
+ if ( Diag == FFLAS::FflasUnit && colDim>1){
// Normalisation of the row
- for (size_t k=1; k<colDim; k++)
- F.mulin(*(A+k*incCol), invpiv);
+ FFLAS::fscalin(F,colDim-1,invpiv,A+incCol,incCol);
}
- else {
- if ( colDim==1 )
- while(++ip<rowDim)
- F.mulin(*(A + ip*incRow), invpiv);
+ else if ( (colDim==1) &&(Diag==FFLAS::FflasNonUnit) ){
+ if (++ip < rowDim){
+ FFLAS::fscalin(F,rowDim-ip,invpiv,A+ip*incRow,incRow);
+ }
}
return 1;
}
@@ -505,9 +515,9 @@ namespace FFPACK {
R = LUdivine (F, Diag, trans, colDim, Nup, A, lda, P, Q,
LuTag, cutoff);
- typename Field::Element *Ar = A + Nup*incRow; // SW
- typename Field::Element *Ac = A + R*incCol; // NE
- typename Field::Element *An = Ar+ R*incCol; // SE
+ typename Field::Element_ptr Ar = A + Nup*incRow; // SW
+ typename Field::Element_ptr Ac = A + R*incCol; // NE
+ typename Field::Element_ptr An = Ar+ R*incCol; // SE
if (!R){
if (LuTag == FFPACK::FfpackSingular )
@@ -517,12 +527,13 @@ namespace FFPACK {
FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
Ndown, 0,(int) R, Ar, lda, P);
// Ar <- L1^-1 Ar
- ftrsm( F, FFLAS::FflasLeft, FFLAS::FflasLower,
+ FFLAS::ftrsm( F, FFLAS::FflasLeft, FFLAS::FflasLower,
FFLAS::FflasNoTrans, Diag, R, Ndown,
F.one, A, lda, Ar, lda);
// An <- An - Ac*Ar
- fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, colDim-R, Ndown, R,
- F.mOne, Ac, lda, Ar, lda, F.one, An, lda);
+ if (colDim>R)
+ fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, colDim-R, Ndown, R,
+ F.mOne, Ac, lda, Ar, lda, F.one, An, lda);
}
// Recursive call on SE
R2 = LUdivine (F, Diag, trans, colDim-R, Ndown, An, lda, P + R, Q + Nup, LuTag, cutoff);
@@ -541,10 +552,9 @@ namespace FFPACK {
}
else { // trans == FFLAS::FflasNoTrans
R = LUdivine (F, Diag, trans, Nup, colDim, A, lda, P, Q, LuTag, cutoff);
- typename Field::Element *Ar = A + Nup*incRow; // SW
- typename Field::Element *Ac = A + R*incCol; // NE
- typename Field::Element *An = Ar+ R*incCol; // SE
-
+ typename Field::Element_ptr Ar = A + Nup*incRow; // SW
+ typename Field::Element_ptr Ac = A + R*incCol; // NE
+ typename Field::Element_ptr An = Ar+ R*incCol; // SE
if (!R){
if (LuTag == FFPACK::FfpackSingular )
@@ -559,8 +569,9 @@ namespace FFPACK {
FFLAS::FflasNoTrans, Diag, Ndown, R,
F.one, A, lda, Ar, lda);
// An <- An - Ar*Ac
- fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ndown, colDim-R, R,
- F.mOne, Ar, lda, Ac, lda, F.one, An, lda );
+ if (colDim>R)
+ fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ndown, colDim-R, R,
+ F.mOne, Ar, lda, Ac, lda, F.one, An, lda );
}
// Recursive call on SE
@@ -582,8 +593,8 @@ namespace FFPACK {
// Permutation of the 0 rows
if (Diag == FFLAS::FflasNonUnit){
for ( size_t i = Nup, j = R ; i < Nup + R2; ++i, ++j){
- FFLAS::fcopy( F, colDim - j, A + j * (lda + 1), incCol, A + i*incRow + j*incCol, incCol);
- for (typename Field::Element *Ai = A + i*incRow + j*incCol;
+ FFLAS::fassign( F, colDim - j, A + i*incRow + j*incCol, incCol, A + j * (lda + 1), incCol);
+ for (typename Field::Element_ptr Ai = A + i*incRow + j*incCol;
Ai != A + i*incRow + colDim*incCol; Ai+=incCol)
F.assign (*Ai, F.zero);
///@todo std::swap ?
@@ -594,10 +605,10 @@ namespace FFPACK {
}
else { // Diag == FFLAS::FflasUnit
for ( size_t i = Nup, j = R+1 ; i < Nup + R2; ++i, ++j){
- FFLAS::fcopy( F, colDim - j,
- A + (j-1)*incRow + j*incCol, incCol,
- A + i*incRow + j*incCol, incCol);
- for (typename Field::Element *Ai = A + i*incRow + j*incCol;
+ FFLAS::fassign( F, colDim - j,
+ A + i*incRow + j*incCol, incCol,
+ A + (j-1)*incRow + j*incCol, incCol);
+ for (typename Field::Element_ptr Ai = A + i*incRow + j*incCol;
Ai != A + i*incRow + colDim*incCol; Ai+=incCol)
F.assign (*Ai, F.zero);
size_t t = Q[j-1];
@@ -605,7 +616,7 @@ namespace FFPACK {
Q[i] = t;
}
}
- }
+ }
return R + R2;
}
}
@@ -627,9 +638,9 @@ namespace FFPACK {
size_t
LUdivine_construct( const Field& F, const FFLAS::FFLAS_DIAG Diag,
const size_t M, const size_t N,
- const typename Field::Element * A, const size_t lda,
- typename Field::Element * X, const size_t ldx,
- typename Field::Element * u, size_t* P,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr X, const size_t ldx,
+ typename Field::Element_ptr u, size_t* P,
bool computeX
, const FFPACK::FFPACK_MINPOLY_TAG MinTag //= FFPACK::FfpackDense
, const size_t kg_mc// =0
@@ -638,7 +649,7 @@ namespace FFPACK {
)
{
- size_t MN = MIN(M,N);
+ size_t MN = std::min(M,N);
if (MN == 1){
size_t ip=0;
@@ -650,17 +661,20 @@ namespace FFPACK {
*P=ip;
if (ip!=0){
// swap the pivot
- typename Field::Element tmp=*X;
+ typename Field::Element tmp;
+ F.init(tmp);
+ F.assign(tmp,*X);
*X = *(X+ip);
*(X+ip) = tmp;
}
if ( Diag == FFLAS::FflasUnit ){
typename Field::Element invpiv;
F.inv(invpiv, *X);
-
+
// Normalisation of the row
- for (size_t k=1; k<N; k++)
- F.mulin(*(X+k), invpiv);
+ // for (size_t k=1; k<N; k++)
+ // F.mulin(*(X+k), invpiv);
+ FFLAS::fscalin(F,N-1,invpiv,X+1,1);
}
if (N==1 && M>1 && computeX)// Only appends when A is 1 by 1
F.mul(*(X+ldx),*X, *A);
@@ -675,22 +689,22 @@ namespace FFPACK {
size_t R = LUdivine_construct(F, Diag, Nup, N, A, lda, X, ldx, u,
P, computeX, MinTag, kg_mc, kg_mb, kg_j );
if (R==Nup){
- typename Field::Element * Xr = X + Nup*ldx; // SW
- typename Field::Element * Xc = X + Nup; // NE
- typename Field::Element * Xn = Xr + Nup; // SE
- typename Field::Element * Xi = Xr;
+ typename Field::Element_ptr Xr = X + Nup*ldx; // SW
+ typename Field::Element_ptr Xc = X + Nup; // NE
+ typename Field::Element_ptr Xn = Xr + Nup; // SE
+ typename Field::Element_ptr Xi = Xr;
if ( computeX ){
if (MinTag == FFPACK::FfpackDense)
for (size_t i=0; i< Ndown; ++i, Xi+=ldx){
fgemv(F, FFLAS::FflasNoTrans, N, N, F.one,
A, lda, u, 1, F.zero, Xi,1);
- FFLAS::fcopy(F, N, u,1,Xi, 1);
+ FFLAS::fassign(F, N,Xi, 1, u,1);
}
else // Keller-Gehrig Fast algorithm's matrix
for (size_t i=0; i< Ndown; ++i, Xi+=ldx){
FFPACK::Protected::fgemv_kgf( F, N, A, lda, u, 1, Xi, 1,
kg_mc, kg_mb, kg_j );
- FFLAS::fcopy(F, N, u,1,Xi, 1);
+ FFLAS::fassign(F, N,Xi, 1, u,1);
}
}
// Apply the permutation on SW
@@ -729,769 +743,6 @@ namespace FFPACK {
} // Protected
- //---------------------------------------------------------------------
- // TURBO: rank computation algorithm
- //---------------------------------------------------------------------
-
- template <class Field>
- inline size_t
- TURBO (const Field& F, const size_t M, const size_t N,
- typename Field::Element* A, const size_t lda, size_t * P, size_t * Q, const size_t cutoff)
- {
-
- size_t mo2 = (M>>1);
- size_t no2 = (N>>1);
-
- typename Field::Element * NW = A;
- typename Field::Element * NE = A + no2;
- typename Field::Element * SW = A + mo2*lda;
- typename Field::Element * SE = SW + no2;
-
- size_t ld1, ld2, ld3, ld4;
- ld1 = ld2 = ld3 = ld4 = lda;
-
- if ( !(M && N) ) return 0;
- typedef typename Field::Element elt;
-
- // Column permutation
- size_t * P1 = new size_t[no2];
- size_t * P2 = new size_t[N-no2];
- // Row Permutation
- size_t * Q1 = new size_t[mo2];
- size_t * Q2 = new size_t[M-mo2];
- for (size_t i=0; i<mo2; ++i)
- Q1[i] = 0;
- for (size_t i=0; i<M-mo2; ++i)
- Q2[i] = 0;
- size_t q1,q2,q3,q3b,q4;
- q1=q2=q3=q3b=q4=0;
-
- // Step 1: NW = L1.Q1.U1.P1
- size_t mloc = mo2;
- size_t nloc = no2;
-#if 0
- Timer tim;
- tim.clear();
- tim.start();
-#endif
- q1 = LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, mloc, no2, NW, ld1, P1, Q1, FFPACK::FfpackLQUP, cutoff);
-
-#if 0
- tim.stop();
- cerr<<"LQUP1:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-#if LB_DEBUG
- std::cerr<<"NW= L1.Q1.U1.P1"<<std::endl;
- write_field(F,std::cerr,NW,M,N,lda);
-#endif
- // B1 = L^-1.NE
-#ifdef LB_DEBUG
- std::cerr<<"avant B1 = L^-1.NE"<<std::endl;
- write_field(F,std::cerr,NE,mloc,N-no2,ld2);
-#endif
- solveLB( F, FFLAS::FflasLeft, mo2, N-no2, q1, NW, ld1, Q1, NE, ld2);
-#ifdef LB_DEBUG
- std::cerr<<"B1 = L^-1.NE"<<std::endl;
- write_field(F,std::cerr,NE,mloc,N-no2,ld2);
-#endif
-
- // NE = Q^-1.NE
-
- FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- N-no2, 0,(int) mo2, NE, ld2, Q1);
-#ifdef LB_DEBUG
- std::cerr<<"NE=Q^-1.NE"<<std::endl;
- write_field(F,std::cerr,NE,mloc,N-no2,ld2);
-#endif
-
- // SW = SW.P1
- FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans,
- M-mo2, 0,(int) q1, SW, ld3, P1 );
-#ifdef LB_DEBUG
- std::cerr<<"SW = SW.P1"<<std::endl;
- write_field(F,std::cerr,SW,M-mo2,no2,ld3);
-#endif
-
-#if 0
- tim.stop();
- std::cerr<<"L^-1:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
- // N1 = SW_{1,q1} . U1^-1
- ftrsm( F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, M-mo2, q1, F.one, NW, ld1 , SW, ld3 );
-#ifdef LB_DEBUG
- std::cerr<<" N1 = SW_{1,q1} . U1^-1"<<std::endl;
- write_field(F,std::cerr,SW,M-mo2,no2,ld3);
-#endif
-
-#if 0
- tim.stop();
- std::cerr<<"trsm:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
- // I1 = SW_{q1+1,n} - N1.G1
- fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-mo2, no2-q1, q1, F.mOne, SW, ld3, NW+q1, ld1, F.one, SW+q1, ld3);
-#ifdef LB_DEBUG
- std::cerr<<" I1 = SW_{q1+1,n} - N1.G1"<<std::endl;
- write_field(F,std::cerr,SW,M-mo2,no2,ld3);
-#endif
-
-#if 0
- tim.stop();
- std::cerr<<"fgemm1:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
- // E1 = SE - N1.B1_{1,q1}
- fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-mo2, N-no2, q1, F.mOne, SW, ld3, NE, ld2, F.one, SE, ld4);
-#ifdef LB_DEBUG
- std::cerr<<" E1 = SE - N1.B1_{1,q1}"<<std::endl;
- write_field(F,std::cerr,SE,M-mo2,N-no2,ld4);
-#endif
-
-#if 0
- tim.stop();
- std::cerr<<"fgemm2:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
-
- //Step 2: E1 = L2.Q2.U2.P2
- mloc = M-mo2;
- nloc = N-no2;
- q2 = LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, mloc, nloc, SE, ld4, P2, Q2, FFPACK::FfpackLQUP, cutoff);
-#ifdef LB_DEBUG
- std::cerr<<" E1 = L2.Q2.U2.P2"<<std::endl;
- write_field(F,std::cerr,SE,M-mo2,N-no2,ld4);
-#endif
-
-#if 0
- tim.stop();
- std::cerr<<"LQUP2:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
- // [I2;F2] = L2^-1.I1
- solveLB( F, FFLAS::FflasLeft, mloc, no2-q1, q2, SE, ld4, Q2, SW+q1, ld3);
-#ifdef LB_DEBUG
- std::cerr<<" [I2;F2] = L2^-1.I1"<<std::endl;
- write_field(F,std::cerr,SW,M-mo2,no2,ld3);
-#endif
- // I1 = Q2^-1.I1
- FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- no2-q1, 0,(int) mloc, SW+q1, ld3, Q2 );
-#ifdef LB_DEBUG
- std::cerr<<"I1 = Q2^-1.I1"<<std::endl;
- write_field(F,std::cerr,SW,mloc,no2,ld3);
-#endif
-
- // B1 = B1.P2
- FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans,
- mo2, 0,(int) q2, NE, ld2, P2 );
-#ifdef LB_DEBUG
- std::cerr<<"B1 = B1.P2"<<std::endl;
- write_field(F,std::cerr,NE,mo2,N-no2,ld2);
-#endif
- // Updating P
-#if 0
- for (size_t i=no2;i<N;++i)
- P[i] += no2;
- tim.stop();
- std::cerr<<"L2^-1:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
- //alternative: de 0 a q2 avant
- // N2 = B1_{q1+1,mo2} . V2^-1
- ftrsm(F, FFLAS::FflasRight, FFLAS::FflasUpper,FFLAS::FflasNoTrans,FFLAS::FflasNonUnit, mo2-q1, q2, F.one, SE, ld4, NE+q1*ld2,ld2);
-#if 0
- tim.stop();
- std::cerr<<"trsm2:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
- // H2 = B1_{q1+1,mo2;q2,N-no2} - N2.E2
- fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mo2-q1, N-no2-q2, q2, F.mOne, NE+q1*ld2, ld2, SE+q2, ld4, F.one, NE+q1*ld2+q2, ld2);
-
-#if 0
- tim.stop();
- std::cerr<<"fgemm12:"<<tim.realtime()<<std::endl;
- tim.start();
- O2 = NW_{q1+1,mo2;q1+1,N-no2} = - N2.I2
- write_field (F,cerr<<"avant O2"<<endl, A, M, N, lda);
-#endif
-
- fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mo2-q1, no2-q1, q2, F.mOne, NE+q1*ld2, ld2, SW+q1, ld3, F.zero,
- NW+q1*(ld1+1), ld1);
- // write_field (F,cerr<<"apres O2"<<endl, A, M, N, lda);
-#if 0
- tim.stop();
- std::cerr<<"fgemm22:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
-
- //Step 3: F2 = L3.Q3.U3.P3
- mloc = M-mo2-q2;
- nloc = no2-q1;
- q3 = LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, mloc, nloc, SW+q2*ld3+q1, ld3, P1+q1, Q2+q2, FFPACK::FfpackLQUP, cutoff);
-
- // Updating P1,Q2
- for (size_t i=q1;i<no2;++i)
- P1[i] += q1;
- for (size_t i=q2;i<q2+q3;++i)
- Q2[i] += q2;
-
- //Step 3bis: H2 = L3b.Q3b.U3b.P3b
- mloc = mo2-q1;
- nloc = N-no2-q2;
-
- q3b = LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, mloc, nloc, NE+q1*ld2+q2, ld2, P2+q2, Q1+q1, FFPACK::FfpackLQUP, cutoff);
-
- // Updating P2, Q1
- for (size_t i = q2; i < q2+q3b; ++i)
- P2[i] += q2;
-
-#if 0
- tim.stop();
- std::cerr<<"LQUP3et3bis:"<<tim.realtime()<<std::endl;
- tim.start();
-#endif
-
- if (( q3 < no2-q1) && (q3b<mo2-q1)){
-
- // [O3;_] = L3b^-1.O2
- if (q3b>0){
-#if 0
- if ( mo2-q1 < N-no2-q2+q1)
- // L is expanded to a Lower triangular matrix
- solveLB( F, FFLAS::FflasLeft,mloc, no2-q1, q3b, NE+q1*ld2+q2 , ld2, rP3b, NW+q1*(ld1+1), ld1);
- else
-#endif
- //std::cerr<<"USING SOLVELB2"<<std::endl;
- //no modification of L
- solveLB2( F, FFLAS::FflasLeft,mloc, no2-q1, q3b, NE+q1*ld2+q2 , ld2, Q1+q1, NW+q1*(ld1+1), ld1);
-#ifdef LB_DEBUG
- std::cerr<<"O2 avant="<<std::endl;
- write_field(F,std::cerr,NW+q1*(ld1+1),mloc,no2-q1,ld1);
-#endif
-
- // O2 = Q3b^-1.O2
- FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- no2-q1, 0,(int) mloc, NW+q1*(ld1+1), ld1, Q1+q1 );
-#ifdef LB_DEBUG
- std::cerr<<"O2 apres="<<std::endl;
- write_field(F,std::cerr,NW+q1*(ld1+1),mloc,no2-q1,ld1);
-#endif
-
- //updating Q
-#if 0
- size_t tmp;
- for (size_t j=0;j<mo2-q1;++j)
- if (rP3b[j]!=j){
- // std::cerr<<"(rP3b["<<j<<"]="<<rP3b[j]<<std::endl;
- tmp = Q[j+q1];
- Q[j+q1] = Q[rP3b[j]+q1];
- Q[rP3b[j]+q1] = tmp;
- }
-#endif
-
- // X2 = X2.P3
- // Si plusieurs niveaux rec, remplacer X2 par [NW;I2]
- FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans,
- mo2-q1-q3b,(int) q1, (int)(q1+q3),
- NW/*+(q1+q3b)*ld1*/, ld1, P1);
- FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans,
- q2,(int) q1, (int)(q1+q3),
- SW/*+(q1+q3b)*ld1*/, ld3, P1);
-
-
- // A faire si plusieurs niveaux recursifs
- // B2 = B2.P3b
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- q1,(int) q2, (int)(q2+q3b),
- NW, ld2, P2);
- //flaswp(F,q1,NE,lda,no2+q2,no2+q2+q3b,P,1);
- // E2 = E2.P3b
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- q2,(int) q2, (int)(q2+q3b),
- SE, ld4, P2);
- //flaswp(F,q2,SE+q2,lda,no2+q2,no2+q2+q3b,P,1);
- }
-
- // N3 = X2 . D3^-1
- ftrsm( F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, mo2-q1-q3b, q3, F.one, SW+q2*ld3+q1, ld3 ,NW+(q1+q3b)*ld1+q1,ld1);
-
- // T2 = T2 - N3.F3
- fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mo2-q1-q3b, no2-q1-q3,q3, F.mOne, NW+(q1+q3b)*ld1+q1, ld1, SW+q2*ld3+q3+q1, ld3, F.one, NW+(q1+q3b)*ld1+q1+q3, ld1 );
-
-
- //Step 4: T2 = L4.Q4.U4.P4
- mloc = mo2-q1-q3b;
- nloc = no2-q1-q3;
-
-#if 0
- size_t * rP4 = new size_t[mloc];
- for (size_t j=0;j<mo2-q1;++j)
- rP4[j]=0;
-#endif
- q4 = LUdivine( F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, mloc, nloc, NW+(q1+q3b)*ld1+q1+q3, ld1, P1+q1+q3, Q1+q1+q3b, FFPACK::FfpackLQUP, cutoff);
-
- // Updating P
- for (size_t i=q1+q3;i<q1+q3+q4;++i)
- P1[i] += q3;
-
-#if 0
- size_t tmp;
- if (rP4[j]!=j){
- // std::cerr<<"(rP3b["<<j<<"]="<<rP3b[j]<<std::endl;
- tmp = Q[j+q1+q3b];
- Q[j+q1+q3b] = Q[rP3b[j]+q1+q3b];
- Q[rP3b[j]+q1+q3b] = tmp;
- }
-#endif
-
- // A faire si plusieurs niveaux recursifs
- // [G1;O3] = [G1;O3].P4
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- q1+q3b, (int)(q1+q3), (int)(q1+q3+q4),
- NW, ld1, P1);
- //flaswp(F,q1+q3b,NE,lda,no2+q2,no2+q2+q3b,P,1);
- // [I2;F3] = [I2;F3].P4
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- q2+q3, (int)(q1+q3),(int) (q1+q3+q4),
- SW, ld3, P1);
- //flaswp(F,q2,SE+q2,lda,no2+q2,no2+q2+q3b,P,1);
- }
- //!!!!!! Attention a appliquer Q4, Q2, Q3, Q3b a gauche !!!!!!!
-
- //updating Q1
- for (size_t i = q1; i < q1+q3b; ++i)
- Q1[i] += q1;
- for (size_t i=q1+q3b;i<q1+q3b+q4;++i)
- Q1[i] += q1 + q3b;
-
- for (size_t i=0; i<q1; ++i)
- P[i] = P1[i];
- for (size_t i=q1; i<q1+q2; ++i)
- P[i] = P2[i-q1] + no2;
- for (size_t i=q1+q2; i<q1+q2+q3; ++i)
- P[i] = P1[i-q2];
- for (size_t i=q1+q2+q3; i<q1+q2+q3+q3b; ++i)
- P[i] = P2[i-q1-q3]+no2;
- for (size_t i=q1+q2+q3+q3b; i<q1+q2+q3+q3b+q4; ++i)
- P[i] = P1[i-q2-q3b];
- delete[] P1;
- delete[] P2;
-
- for (size_t i=0; i<q1; ++i)
- Q[i] = Q1[i];
- for (size_t i=q1; i<q1+q2; ++i)
- Q[i] = Q2[i-q1] + mo2;
- for (size_t i=q1+q2; i<q1+q2+q3; ++i)
- Q[i] = Q2[i-q1] + mo2;
- for (size_t i=q1+q2+q3; i<q1+q2+q3+q3b; ++i)
- Q[i] = Q1[i-q2-q3];
- for (size_t i=q1+q2+q3+q3b; i<q1+q2+q3+q3b+q4; ++i)
- P[i] = Q1[i-q2-q3];
- delete[] Q1;
- delete[] Q2;
-
-
- //write_field (F, cerr<<"avant reordonnancement"<<endl, A, M,N, lda)<<endl;
- typename Field::Element * R = new typename Field::Element[M*N];
- size_t ldr = N;
- // Copying first q1 cols
- for (size_t i=0; i<q1; ++i)
- FFLAS::fcopy (F, q1, R+i*ldr, 1, NW+i*ld1,1);
- for (size_t i=q1; i<q1+q2+q3; ++i)
- FFLAS::fcopy (F, q1, R+i*ldr, 1, SW+(i-q1)*ld3,1);
- for (size_t i=q1+q2+q3; i<q2+q3+mo2; ++i)
- FFLAS::fcopy (F, q1, R+i*ldr, 1, NW+(i-q2-q3)*ld1,1);
- for (size_t i=q2+q3+mo2; i<M; ++i)
- FFLAS::fcopy (F, q1, R+i*ldr, 1, SW+(i-mo2)*ld3,1);
- // Copying q1..q2 cols
- for (size_t i=0; i<q1; ++i)
- FFLAS::fcopy (F, q2, R+q1+i*ldr, 1, NE+i*ld2,1);
- for (size_t i=q1; i<q1+q2+q3; ++i)
- FFLAS::fcopy (F, q2, R+q1+i*ldr, 1, SE+(i-q1)*ld4,1);
- for (size_t i=q1+q2+q3; i<q2+q3+mo2; ++i)
- FFLAS::fcopy (F, q2, R+q1+i*ldr, 1, NE+(i-q2-q3)*ld2,1);
- for (size_t i=q2+q3+mo2; i<M; ++i)
- FFLAS::fcopy (F, q2, R+q1+i*ldr, 1, SE+(i-mo2)*ld4,1);
- // Copying q2..q3 cols
- for (size_t i=0; i<q1; ++i)
- FFLAS::fcopy (F, q3, R+q1+q2+i*ldr, 1, NW+q1+i*ld1,1);
- for (size_t i=q1; i<q1+q2+q3; ++i)
- FFLAS::fcopy (F, q3, R+q1+q2+i*ldr, 1, SW+q1+(i-q1)*ld3,1);
- for (size_t i=q1+q2+q3; i<q2+q3+mo2; ++i)
- FFLAS::fcopy (F, q3, R+q1+q2+i*ldr, 1, NW+q1+(i-q2-q3)*ld1,1);
- for (size_t i=q2+q3+mo2; i<M; ++i)
- FFLAS::fcopy (F, q3, R+q1+q2+i*ldr, 1, SW+q1+(i-mo2)*ld3,1);
- // Copying q3..q3b cols
- for (size_t i=0; i<q1; ++i)
- FFLAS::fcopy (F, q3b, R+q1+q2+q3+i*ldr, 1, NE+q2+i*ld2,1);
- for (size_t i=q1; i<q1+q2+q3; ++i)
- FFLAS::fcopy (F, q3b, R+q1+q2+q3+i*ldr, 1, SE+q2+(i-q1)*ld4,1);
- for (size_t i=q1+q2+q3; i<q2+q3+mo2; ++i)
- FFLAS::fcopy (F, q3b, R+q1+q2+q3+i*ldr, 1, NE+q2+(i-q2-q3)*ld2,1);
- for (size_t i=q2+q3+mo2; i<M; ++i)
- FFLAS::fcopy (F, q3b, R+q1+q2+q3+i*ldr, 1, SE+q2+(i-mo2)*ld4,1);
- // Copying q3b..q4 cols
- for (size_t i=0; i<q1; ++i)
- FFLAS::fcopy (F, q4, R+q1+q2+q3+q3b+i*ldr, 1, NW+q1+q3+i*ld1,1);
- for (size_t i=q1; i<q1+q2+q3; ++i)
- FFLAS::fcopy (F, q4, R+q1+q2+q3+q3b+i*ldr, 1, SW+q1+q3+(i-q1)*ld3,1);
- for (size_t i=q1+q2+q3; i<q2+q3+mo2; ++i)
- FFLAS::fcopy (F, q4, R+q1+q2+q3+q3b+i*ldr, 1, NW+q1+q3+(i-q2-q3)*ld1,1);
- for (size_t i=q2+q3+mo2; i<M; ++i)
- FFLAS::fcopy (F, q4, R+q1+q2+q3+q3b+i*ldr, 1, SW+q1+q3+(i-mo2)*ld3,1);
- // Copying the last cols
- for (size_t i=0; i<q1; ++i)
- FFLAS::fcopy (F, no2-q1-q3-q4, R+q1+q2+q3+q3b+q4+i*ldr, 1, NW+q1+q3+q4+i*ld1,1);
- for (size_t i=q1; i<q1+q2+q3; ++i)
- FFLAS::fcopy (F, no2-q1-q3-q4, R+q1+q2+q3+q3b+q4+i*ldr, 1, SW+q1+q3+q4+(i-q1)*ld3,1);
- for (size_t i=q1+q2+q3; i<q2+q3+mo2; ++i)
- FFLAS::fcopy (F, no2-q1-q3-q4, R+q1+q2+q3+q3b+q4+i*ldr, 1, NW+q1+q3+q4+(i-q2-q3)*ld1,1);
- for (size_t i=q2+q3+mo2; i<M; ++i)
- FFLAS::fcopy (F, no2-q1-q3-q4, R+q1+q2+q3+q3b+q4+i*ldr, 1, SW+q1+q3+q4+(i-mo2)*ld3,1);
- // Copying the last cols
- for (size_t i=0; i<q1; ++i)
- FFLAS::fcopy (F, N-no2-q2-q3b, R+no2+q2+q3b+i*ldr, 1, NE+q2+q3b+i*ld2,1);
- for (size_t i=q1; i<q1+q2+q3; ++i)
- FFLAS::fcopy (F, N-no2-q2-q3b, R+no2+q2+q3b+i*ldr, 1, SE+q2+q3b+(i-q1)*ld4,1);
- for (size_t i=q1+q2+q3; i<q2+q3+mo2; ++i)
- FFLAS::fcopy (F, N-no2-q2-q3b, R+no2+q2+q3b+i*ldr, 1, NE+q2+q3b+(i-q2-q3)*ld2,1);
- for (size_t i=q2+q3+mo2; i<M; ++i)
- FFLAS::fcopy (F, N-no2-q2-q3b, R+no2+q2+q3b+i*ldr, 1, SE+q2+q3b+(i-mo2)*ld4,1);
-
- // A=R : to be improved (avoid allocation of R). To be changed if rec data structure are used
- for (size_t i=0; i<M; ++i)
- FFLAS::fcopy (F, N, A+i*lda, 1, R+i*ldr,1);
-
- delete[] R;
- //delete[] Q;
- // Necessaire:
- // 1 traiter les flaswp manquants
- // Facultatif:
- // 2 permutations de lignes doivent etre coherentes
- // 3 effectuer les dernieres permutations lignes et colonnes
- //std::cerr<<q1<<" "<<q2<<" "<<q3<<" "<<q3b<<" "<<q4<<std::endl;
- return q1+q2+q3+q3b+q4;
- }
-
-
- /*!
- * @brief Updates an existing LU factorisation with more rows.
- *
- * @param F Field on which arithmetic is done
- * @param Diag Is \p L unit ? (if so, \c FFLAS::FflasUnit)
- * @param trans Not used yet, should be \c FFLAS::FflasNoTrans
- * @param M rows in \p A
- * @param N cols in \p A
- * @param A \p A is already in \c LU factorisation
- * @param lda leading dimension of \p A
- * @param R rank of \p A
- * @param K rows in \p B
- * @param B more rows to append to \p A
- * @param ldb leading dimension of \p B (not tested if != lda)
- * @param P permutation for \c LU in \p A. Should be big enough so it can store the permutation for \c LU of \p A and \p B
- * @param Q same as \p P
- * @param LuTag see \c LUdivine.
- * @param cutoff see \c LUdivine.
- *
- * @return rank of <code>A.append(B)</code>
- * @bug may be bogus.
- */
- template <class Field>
- size_t LUpdate (const Field& F,
- const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
- const size_t M, const size_t N,
- typename Field::Element * A, const size_t lda,
- const size_t R,
- const size_t K,
- typename Field::Element * B, const size_t ldb,
- size_t*P, size_t *Q , const FFPACK::FFPACK_LUDIVINE_TAG LuTag , const size_t cutoff)
- {
- if (trans == FFLAS::FflasTrans)
- throw Failure(__func__,__FILE__,__LINE__,"Transposed version is not implemented yet");
- typedef typename Field::Element elt;
- // size_t MN = MIN(M,N);
-
- size_t incRow, incCol, rowDim, colDim;
-#if 0 /* not working */
- if (trans == FFLAS::FflasTrans){
- incRow = 1;
- incCol = lda;
- colDim = M;
- rowDim = N;
- }
- else
-#endif
- { // trans == FFLAS::FflasNoTrans
- incRow = lda;
- incCol = 1;
- colDim = N;
- rowDim = M;
- }
-
- size_t Nup = rowDim;
- size_t Ndown = K;
-
-
- if ( !K ) { // no line to append
- std::cout << "no row to append" << std::endl;
- return R;
- }
- if ( !R ) { // A was null
- // std::cout << "A was 0" << std::endl;
- // LU on B
- size_t R2 = LUdivine(F,Diag,trans,K,N,B,lda,P,Q+Nup,LuTag,cutoff);
- if (!R2)
- return 0 ;
- // Non zero row permutations
- for (size_t i = Nup; i < Nup + R2; ++i)
- Q[i] += Nup;
- { // Permutation of the 0 rows Could probably be improved !
- if (Diag == FFLAS::FflasNonUnit){
- for ( size_t i = 0, j = R ; i < R2; ++i, ++j){
- FFLAS::fcopy( F, colDim - j, A + j * (lda + 1),
- incCol, B + i*incRow + j*incCol, incCol);
- typename Field::Element *Ai = B + i*incRow + j*incCol ;
- typename Field::Element *Aend = B + colDim*incCol ;
- for (; Ai != Aend + i*incRow ; Ai+=incCol)
- F.assign (*Ai, F.zero);
- ///@todo std::swap ?
- size_t t = Q[j];
- Q[j]=Q[Nup+i];
- Q[Nup+i] = t;
- }
- }
- else { // Diag == FFLAS::FflasUnit
- for ( size_t i = 0, ii = R+1 ; i < R2; ++i, ++ii){
- if (ii < M)
- FFLAS::fcopy( F, colDim - ii,
- A + (ii-1)*incRow + ii*incCol, incCol,
- B + i*incRow + ii*incCol, incCol);
- else {
- std::cout << "dangerous zone" << std::endl;
- FFLAS::fcopy( F, colDim - ii,
- B + (ii-M-1)*incRow + ii*incCol, incCol,
- B + i*incRow + ii*incCol, incCol);
- }
-
- typename Field::Element *Ai = B + i*incRow + ii*incCol ;
- typename Field::Element *Aend = B + colDim*incCol ;
- for (; Ai != Aend + i*incRow ; Ai+=incCol)
- F.assign (*Ai, F.zero);
- size_t t = Q[ii-1];
- Q[ii-1]=Q[Nup+i];
- Q[Nup+i] = t;
- }
- }
- }
-#if 0 /* surréaliste ! */
- // move to B and A
- if (K <= M) {
- FFLAS::fmove(F,K,N,A,lda,B,ldb);
- }
- else { // K >M
- FFLAS::fcopy(F,M,N,A,lda,B,ldb);
- FFLAS::fcopy(F,K-M,N,B,ldb,B+M*ldb,ldb);
- FFLAS::fzero(F,M,N,B+(K-M)*ldb,ldb);
- }
- for (size_t i = Nup, ii = 0 ; i < R2 ; ++ii, ++i)
- std::swap(Q[i],Q[ii]);
-#endif
- return R2 ;
- }
-
-
-#if 0 /* not ported */
- if (MN == 1){
- size_t ip=0;
- //while (ip<N && !F.isUnit(*(A+ip)))ip++;
- while (F.isZero (*(A+ip*incCol)))
- if (++ip == colDim)
- break;
- Q[0] = 0;
- if (ip == colDim){ // current row is zero
- P[0] = 0;
- if (colDim == 1){
- while (ip<rowDim && F.isZero(*(A + ip*incRow))){
- Q[ip]=ip;
- ip++;
- }
- if (ip == rowDim) {
- return 0;
- }
- else {
- size_t oldip = ip;
- if ( Diag == FFLAS::FflasNonUnit ){
- elt invpiv;
- F.inv(invpiv,*(A+ip*incRow));
- while(++ip<rowDim)
- F.mulin(*(A + ip*incRow), invpiv);
- elt tmp;
- F.assign(tmp, *(A+oldip*incRow));
- F.assign( *(A+oldip*incRow), *A);
- F.assign( *A, tmp);
- }
- *Q=oldip;
-
- return 1;
- }
- }
- else{ *Q=0; return 0;}
- }
- *P=ip;
- if (ip!=0){
- // swap the pivot
- typename Field::Element tmp=*A;
- *A = *(A + ip*incCol);
- *(A + ip*incCol) = tmp;
- }
- elt invpiv;
- F.inv(invpiv, *A);
- if ( Diag == FFLAS::FflasUnit ){
- // Normalisation of the row
- for (size_t k=1; k<colDim; k++)
- F.mulin(*(A+k*incCol), invpiv);
- }
- else if ( colDim==1 )
- while(++ip<rowDim)
- F.mulin(*(A + ip*incRow), invpiv);
- return 1;
- }
- else // MN>1
-#endif
- {
- // Recursive call on NW
- size_t R2;
-#if 0 /* not working */
- if (trans == FFLAS::FflasTrans){
- size_t Nd = N / 2 ;
- size_t Ng = N-Nd ;
- typename Field::Element *Ar = A + Nd; // SW
- typename Field::Element *Ac_sup = A + R*lda; // NE
- typename Field::Element *An_sup = Ar + R*lda; // SE
- typename Field::Element *Ac_inf = B ; // NE
- typename Field::Element *An_sup = B + Nd; // SE
-
-
-
- // Ar <- P.Ar
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- Ndown, 0,(int) R, Ar, lda, P);
- // Ar <- L1^-1 Ar
- ftrsm( F, FFLAS::FflasLeft, FFLAS::FflasLower,
- FFLAS::FflasNoTrans, Diag, R, Ndown,
- F.one, A, lda, Ar, lda);
- // An <- An - Ac*Ar
- fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, colDim-R, Ndown, R,
- F.mOne, Ac, lda, Ar, lda, F.one, An, lda);
- // LU call on SE
- R2 = LUdivine (F, Diag, trans, colDim-R, Ndown, An, lda, P + R, Q + Nup,
- LuTag, cutoff);
- for (size_t i = R; i < R + R2; ++i)
- P[i] += R;
- if (R2) {
- // An <- An.P2
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- Nup,(int) R, (int)(R+R2), A, lda, P);
- }
- else { // !R2
- if (LuTag == FFPACK::FfpackSingular)
- return 0;
- }
-
- }
- else
-#endif
- { // trans == FFLAS::FflasNoTrans
- typename Field::Element *Ac = A + R*incCol; // NE
- typename Field::Element *Ar = B; // SW
- typename Field::Element *An = Ar + R*incCol; // SE
-
-
- // Ar <- Ar.P
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- Ndown, 0,(int) R, Ar, lda, P);
- // Ar <- Ar.U1^-1
- ftrsm( F, FFLAS::FflasRight, FFLAS::FflasUpper,
- FFLAS::FflasNoTrans, Diag, Ndown, R,
- F.one, A, lda, Ar, lda);
- // An <- An - Ar*Ac
- fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ndown, colDim-R, R,
- F.mOne, Ar, lda, Ac, lda, F.one, An, lda);
-
- // LU call on SE
- R2=LUdivine (F, Diag, trans, Ndown, N-R, An, lda,P+R, Q+Nup,
- LuTag, cutoff);
- if (R2) {
- for (size_t i = R; i < R + R2; ++i)
- P[i] += R;
- // An <- An.P2
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans,
- Nup,(int) R, (int)(R+R2), A, lda, P);
-
- }
- else {
- if (LuTag == FFPACK::FfpackSingular)
- return 0;
- }
-
- }
- // Non zero row permutations
- for (size_t i = Nup; i < Nup + R2; ++i)
- Q[i] += Nup;
- if (R < Nup){ // Permutation of the 0 rows
- if (Diag == FFLAS::FflasNonUnit){
- for ( size_t i = 0, j = R ; i < R2; ++i, ++j){
- FFLAS::fcopy( F, colDim - j, A + j * (lda + 1),
- incCol, B + i*incRow + j*incCol, incCol);
- typename Field::Element *Ai = B + i*incRow + j*incCol ;
- typename Field::Element *Aend = B + colDim*incCol ;
- for (; Ai != Aend + i*incRow ; Ai+=incCol)
- F.assign (*Ai, F.zero);
- ///@todo std::swap ?
- size_t t = Q[j];
- Q[j]=Q[Nup+i];
- Q[Nup+i] = t;
- }
- }
- else { // Diag == FFLAS::FflasUnit
- for ( size_t i = 0, ii = R+1 ; i < R2; ++i, ++ii){
- if (ii < M)
- FFLAS::fcopy( F, colDim - ii,
- A + (ii-1)*incRow + ii*incCol, incCol,
- B + i*incRow + ii*incCol, incCol);
- else {
- // std::cout << "dangerous zone" << std::endl;
- FFLAS::fcopy( F, colDim - ii,
- B + (ii-M-1)*incRow + ii*incCol, incCol,
- B + i*incRow + ii*incCol, incCol);
- }
-
- typename Field::Element *Ai = B + i*incRow + ii*incCol ;
- typename Field::Element *Aend = B + colDim*incCol ;
- for (; Ai != Aend + i*incRow ; Ai+=incCol)
- F.assign (*Ai, F.zero);
- size_t t = Q[ii-1];
- Q[ii-1]=Q[Nup+i];
- Q[Nup+i] = t;
- }
- }
- }
- return R + R2;
- }
- }
-
-
-
} // FFPACK
#undef LB_DEBUG
diff --git a/fflas-ffpack/ffpack/ffpack_ludivine_mp.inl b/fflas-ffpack/ffpack/ffpack_ludivine_mp.inl
new file mode 100644
index 0000000..9688a15
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_ludivine_mp.inl
@@ -0,0 +1,136 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFPACK_ludivine_mp_INL
+#define __FFPACK_ludivine_mp_INL
+
+#include <givaro/modular-integer.h>
+#include <givaro/givinteger.h>
+
+#ifdef BENCH_PERF_LQUP_MP
+#define BENCH_PERF_FGEMM_MP
+#endif
+
+#include "fflas-ffpack/field/rns-integer-mod.h"
+#include "fflas-ffpack/field/rns-integer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/ffpack/ffpack_ludivine.inl"
+
+namespace FFPACK {
+
+ template <>
+ inline size_t
+ LUdivine (const Givaro::Modular<Givaro::Integer>& F,
+ const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
+ const size_t M, const size_t N,
+ typename Givaro::Integer* A, const size_t lda,
+ size_t*P, size_t *Q,
+ const FFPACK::FFPACK_LU_TAG LuTag,
+ const size_t cutoff
+ )
+ {
+#ifdef BENCH_PERF_LQUP_MP
+ double t_init=0, t_lqup=0, t_mod=0, t_rec=0;
+ FFLAS::Timer chrono;
+ chrono.start();
+#endif
+ Givaro::Integer p;
+ F.cardinality(p);
+ size_t logp=p.bitsize();
+ size_t K = std::max(M,N);
+
+ // compute bit size of feasible prime
+ size_t _k=std::max(K+1,logp/20), lk=0;
+ while ( _k ) {_k>>=1; ++lk;}
+ size_t prime_bitsize= (53-lk)>>1;
+
+ // construct rns basis
+ Givaro::Integer maxC= (p-1)*(p-1)*(p-1)*uint64_t(K);
+ uint64_t n_pr =uint64_t(ceil(double(maxC.bitsize())/double(prime_bitsize)));
+ maxC=(p-1)*(p-1)*uint64_t(K)*(1<<prime_bitsize)*n_pr;
+
+
+ FFPACK::rns_double RNS(maxC, prime_bitsize, true);
+ FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS);
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ t_init+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ // compute A in RNS
+ FFPACK::rns_double::Element_ptr Ap;
+ Ap = FFLAS::fflas_new(Zp,M,N);
+ FFLAS::finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),A,lda,Ap);
+
+
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ t_mod+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+
+ // call lqup in rns
+ size_t R=FFPACK::LUdivine(Zp, Diag, trans, M, N, Ap, N, P, Q, LuTag, cutoff);
+
+ //std::cout<<"LUDivine RNS done"<<std::endl;
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ t_lqup+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ // reconstruct the result
+ FFLAS::fconvert_rns(Zp,M,N,F.zero,A,lda,Ap);
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ t_rec+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ // reduce it modulo p
+ FFLAS::freduce (F,M,N,A,lda);
+
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ //t_rec+=chrono.usertime();
+ cout<<"LUDIVINE RNS PERF:"<<endl;
+ cout<<" --- RNS basis size: "<<Zp.size() <<endl;
+ cout<<" *** init : "<<t_init<<endl;
+ cout<<" *** rns mod : "<<t_mod<<endl;
+ cout<<" *** rns lqup : "<<t_lqup<<" ( igemm="<<Zp.t_igemm<<" ftrsm="<<Zp.t_trsm<<" scal="<<Zp.t_scal
+ <<" modp="<<Zp.t_modp<<endl;
+ cout<<" *** rns rec : "<<t_rec<<endl;
+ cout<<" *** mod : "<<chrono.usertime()<<endl;
+
+#endif
+ FFLAS::fflas_delete(Ap);
+ return R;
+
+ }
+
+} // namespace FFPACK
+
+#endif
diff --git a/fflas-ffpack/ffpack/ffpack_minpoly.inl b/fflas-ffpack/ffpack/ffpack_minpoly.inl
index 3ef900c..c3f9726 100644
--- a/fflas-ffpack/ffpack/ffpack_minpoly.inl
+++ b/fflas-ffpack/ffpack/ffpack_minpoly.inl
@@ -6,20 +6,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -33,8 +33,8 @@ namespace FFPACK {
template <class Field, class Polynomial>
Polynomial&
MinPoly( const Field& F, Polynomial& minP, const size_t N
- ,const typename Field::Element *A, const size_t lda
- ,typename Field::Element* X, const size_t ldx
+ ,typename Field::ConstElement_ptr A, const size_t lda
+ ,typename Field::Element_ptr X, const size_t ldx
,size_t* P
,const FFPACK_MINPOLY_TAG MinTag// = FfpackDense
,const size_t kg_mc// =0
@@ -42,16 +42,14 @@ namespace FFPACK {
,const size_t kg_j //=0
)
{
-
- typedef typename Field::Element elt;
// nRow is the number of row in the krylov base already computed
size_t j, k ;
//size_t nRow = 2;
typename Polynomial::iterator it;
- elt* Xi, *Ui;
+ typename Field::Element_ptr Xi, Ui;
typename Field::RandIter g (F);
bool KeepOn=true;
- elt* U = new elt[N];
+ typename Field::Element_ptr U = FFLAS::fflas_new (F, N, 1);
// Picking a non zero vector
do{
for (Ui=U, Xi = X; Ui<U+N; ++Ui, ++Xi){
@@ -66,24 +64,24 @@ namespace FFPACK {
// LUP factorization of the Krylov Base Matrix
k = Protected::LUdivine_construct (F, FFLAS::FflasUnit, N+1, N, A, lda, X, ldx, U, P, true,
MinTag, kg_mc, kg_mb, kg_j);
- //delete[] U;
+ //FFLAS::fflas_delete( U);
minP.resize(k+1);
minP[k] = F.one;
if ( (k==1) && F.isZero(*(X+ldx))){ // minpoly is X
- delete[] U;
+ FFLAS::fflas_delete (U);
for (size_t i=0; i<k; ++i)
minP[i] = F.zero;
return minP;
}
// U contains the k first coefs of the minpoly
- //elt* m= new elt[k];
- FFLAS::fcopy( F, k, U, 1, X+k*ldx, 1);
+ //typename Field::Element_ptr m= FFLAS::fflas_new<elt>(k);
+ FFLAS::fassign( F, k, X+k*ldx, 1, U, 1);
ftrsv( F, FFLAS::FflasLower, FFLAS::FflasTrans, FFLAS::FflasNonUnit, k, X, ldx, U, 1);
it = minP.begin();
for (j=0; j<k; ++j, it++){
F.neg(*it, U[j]);
}
- delete[] U;
+ FFLAS::fflas_delete (U);
return minP;
}
diff --git a/fflas-ffpack/ffpack/ffpack_minpoly_construct.inl b/fflas-ffpack/ffpack/ffpack_minpoly_construct.inl
index b4018ae..7edcf5f 100644
--- a/fflas-ffpack/ffpack/ffpack_minpoly_construct.inl
+++ b/fflas-ffpack/ffpack/ffpack_minpoly_construct.inl
@@ -5,20 +5,20 @@
*
* Written by Clement Pernet <Clement.Pernet at imag.fr>
*
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -41,21 +41,20 @@ namespace FFPACK {
template <class Field, class Polynomial>
Polynomial&
MinPoly( const Field& F, Polynomial& minP, const size_t N,
- const typename Field::Element *A, const size_t lda,
- typename Field::Element* U, size_t ldu,typename Field::Element* X, size_t ldx,
+ typename Field::ConstElement_ptr A, const size_t lda,
+ typename Field::Element_ptr U, size_t ldu, typename Field::Element_ptr X, size_t ldx,
size_t* P)
{
- typedef typename Field::Element elt;
// nRow is the number of row in the krylov base already computed
size_t j, k, nRow = 2;
- elt* B = new elt[ N*N ];
+ typename Field::Element_ptr B = FFLAS::fflas_new (F, N, N);
typename Polynomial::iterator it;
- elt* Xi, *Ui;
+ typename Field::Element_ptr Xi, *Ui;
typename Field::RandIter g (F);
bool KeepOn=true;
// Creating the Krylov Base copy matrix X where to factorize
- //elt * X = new elt[(N+1)*N];
+ //typename Field::Element_ptr X = FFLAS::fflas_new<elt>((N+1)*N);
#ifdef LB_DEBUG
for (j=0;j<(N+1)*N;j++)
X[j] = zero;
@@ -83,22 +82,22 @@ namespace FFPACK {
k = Protected::LUdivine_construct(F, FflasUnit, N+1, N, B, N, U, ldu, X, N, P,
&nRow, N+1, &nUsedRow );
- delete[] B;
+ FFLAS::fflas_delete (B);
minP.resize(k+1);
minP[k] = one;
if (k==1 && F.isZero(*(X+N))){ // minpoly is X
return minP;
}
// m contains the k first coefs of the minpoly
- elt* m= new elt[k];
- fcopy( F, k, m, 1, X+k*N, 1);
+ typename Field::Element_ptr m= FFLAS::fflas_new (F,k,1);
+ fassign( F, k, X+k*N, 1, m, 1);
ftrsv( F, FflasLower, FflasTrans, FflasNonUnit, k, X, N, m, 1);
- //delete[] X;
+ //FFLAS::fflas_delete( X);
it = minP.begin();
for (j=0; j<k; ++j, it++){
F.neg(*it, m[j]);
}
- delete[] m;
+ FFLAS::fflas_delete (m);
return minP;
}
} // FFPACK
diff --git a/fflas-ffpack/ffpack/ffpack_permutation.inl b/fflas-ffpack/ffpack/ffpack_permutation.inl
new file mode 100644
index 0000000..720e048
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_permutation.inl
@@ -0,0 +1,541 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 FFLAS-FFACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_permutation_INL
+#define __FFLASFFPACK_ffpack_permutation_INL
+
+#include <givaro/zring.h>
+
+#include "fflas-ffpack/fflas/fflas_fassign.h"
+
+namespace FFPACK {
+
+ template<class Field>
+ void
+ applyP( const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const FFLAS::FFLAS_TRANSPOSE Trans,
+ const size_t M, const size_t ibeg, const size_t iend,
+ typename Field::Element_ptr A, const size_t lda, const size_t * P )
+ {
+ if ( Side == FFLAS::FflasRight ) {
+ if ( Trans == FFLAS::FflasTrans ){
+ for ( size_t i=(size_t)ibeg; i<(size_t) iend; ++i)
+ if ( P[i]!= i )
+ FFLAS::fswap( F, M, A + P[i]*1, lda, A + i*1, lda);
+ } else { // Trans == FFLAS::FflasNoTrans
+ for (size_t i=iend; i-->ibeg; )
+ if ( P[i]!=(size_t)i )
+ FFLAS::fswap( F, M, A + P[i]*1, lda, A + i*1, lda);
+ }
+ } else { // Side == FFLAS::FflasLeft
+ if ( Trans == FFLAS::FflasNoTrans ) {
+ for (size_t i=(size_t)ibeg; i<(size_t)iend; ++i)
+ if ( P[i]!= (size_t) i )
+ FFLAS::fswap( F, M, A + P[i]*lda, 1, A + i*lda, 1);
+ } else { // Trans == FFLAS::FflasTrans
+ for (size_t i=iend; i-->ibeg; )
+ if ( P[i]!= (size_t) i )
+ FFLAS::fswap( F, M, A + P[i]*lda, 1, A + i*lda, 1);
+ }
+ }
+ }
+
+
+ template<class Field>
+ inline void doApplyS (const Field& F,
+ typename Field::Element_ptr A, const size_t lda, typename Field::Element_ptr tmp,
+ const size_t width, const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+ {
+ FFLAS::fassign(F, M2-R1-R2, width, A + (R1+R2)*lda, lda, tmp, width);
+ FFLAS::fassign(F, R3+R4, width, A + M2*lda, lda, A + (R1+R2)*lda, lda);
+ FFLAS::fassign(F, M2-R1-R2, width, tmp, width, A + (R1+R2+R3+R4)*lda, lda);
+ }
+ template <class Field>
+ inline void MatrixApplyS (const Field& F, typename Field::Element_ptr A, const size_t lda,
+ const size_t width, const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+ {
+ typename Field::Element_ptr tmp = FFLAS::fflas_new (F, M2-R1-R2, width);
+ doApplyS (F, A, lda, tmp, width, M2, R1, R2, R3, R4);
+ FFLAS::fflas_delete (tmp);
+ }
+ template <class T>
+ inline void PermApplyS (T* A, const size_t lda,
+ const size_t width, const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+ {
+ Givaro::ZRing<T> D;
+ T* tmp = FFLAS::fflas_new<T>((M2-R1-R2)*width);
+ doApplyS (D, A, lda, tmp, width, M2, R1, R2, R3, R4);
+ FFLAS::fflas_delete( tmp);
+ }
+
+
+
+ template <class Field>
+ inline void doApplyT (const Field& F, typename Field::Element_ptr A, const size_t lda, typename Field::Element_ptr tmp,
+ const size_t width, const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+ {
+ for (size_t k = 0; k < width; ++k){
+ FFLAS::fassign(F, N2-R1, A+R1+k*lda, 1, tmp + k*(N2-R1), 1);
+ FFLAS::fassign(F, R2, A+N2+k*lda, 1, A + R1 + k*lda, 1);
+ FFLAS::fassign(F, R3, tmp + k*(N2-R1), 1, A+R1+R2+k*lda, 1);
+ FFLAS::fassign(F, R4, A + N2 + R2 + k*lda, 1, A + R1+R2+R3 + k*lda, 1);
+ FFLAS::fassign(F, N2-R1-R3, tmp + R3 + k*(N2-R1), 1, A+R1+R2+R3+R4+k*lda, 1);
+ }
+ }
+
+ template <class Field>
+ inline void MatrixApplyT (const Field& F, typename Field::Element_ptr A, const size_t lda,
+ const size_t width, const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+ {
+ typename Field::Element_ptr tmp = FFLAS::fflas_new (F, N2-R1, width);
+ doApplyT (F, A, lda, tmp, width, N2, R1, R2, R3, R4);
+ FFLAS::fflas_delete (tmp);
+ }
+ template <class T>
+ inline void PermApplyT (T* A, const size_t lda,
+ const size_t width, const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+ {
+ Givaro::ZRing<T> D;
+ T* tmp = FFLAS::fflas_new<T >((N2-R1)*width);
+ doApplyT (D, A, lda, tmp, width, N2, R1, R2, R3, R4);
+ FFLAS::fflas_delete( tmp);
+ }
+
+ /**
+ * Conversion of a permutation from LAPACK format to Math format
+ */
+ inline void LAPACKPerm2MathPerm (size_t * MathP, const size_t * LapackP,
+ const size_t N)
+ {
+ for (size_t i=0; i<N; i++)
+ MathP[i] = i;
+ for (size_t i=0; i<N; i++){
+ if (LapackP[i] != i){
+ std::swap(MathP[i],MathP[LapackP[i]]);
+ // size_t tmp = MathP[i];
+ // MathP[i] = MathP[LapackP[i]];
+ // MathP[LapackP[i]] = tmp;
+ }
+ }
+ }
+
+ /**
+ * Conversion of a permutation from Maths format to LAPACK format
+ */
+ inline void MathPerm2LAPACKPerm (size_t * LapackP, const size_t * MathP,
+ const size_t N)
+ {
+ size_t * T = FFLAS::fflas_new<size_t>(N);
+ size_t * Tinv = FFLAS::fflas_new<size_t>(N);
+ for (size_t i=0; i<N; i++){
+ T[i] =i;
+ Tinv[i] = i;
+ }
+ for (size_t i=0; i<N; i++){
+ size_t j = Tinv [MathP [i]];
+ LapackP [i] = j;
+ size_t tmp = T[j];
+ T[j] = T[i];
+ Tinv[T[i]] = j;
+ T[i] = tmp;
+ Tinv[tmp] = i;
+ }
+ FFLAS::fflas_delete( T);
+ FFLAS::fflas_delete( Tinv);
+ }
+
+ /**
+ * Computes P1 [ I_R ] stored in MathPermutation format
+ * [ P_2 ]
+ */
+ inline void composePermutationsP (size_t * MathP,
+ const size_t * P1,
+ const size_t * P2,
+ const size_t R, const size_t N)
+ {
+ for (size_t i=0; i<N; ++i)
+ MathP[i] = i;
+ LAPACKPerm2MathPerm (MathP, P1, N);
+
+ for (size_t i=R; i<N; i++){
+ if (P2[i-R] != i-R){
+ size_t tmp = MathP[i];
+ MathP[i] = MathP[P2[i-R]+R];
+ MathP[P2[i-R]+R] = tmp;
+ }
+ }
+ }
+
+ inline void composePermutationsQ (size_t * MathP,
+ const size_t * Q1,
+ const size_t * Q2,
+ const size_t R, const size_t N)
+ {
+ for (size_t i=0; i<N; ++i)
+ MathP[i] = i;
+ LAPACKPerm2MathPerm (MathP, Q1, N);
+
+ for (size_t i=R; i<N; i++){
+ if (Q2[i-R] != i-R){
+ size_t tmp = MathP[i];
+ MathP[i] = MathP[Q2[i-R]+R];
+ MathP[Q2[i-R]+R] = tmp;
+ }
+ }
+ }
+
+ inline void
+ cyclic_shift_mathPerm (size_t * P, const size_t s)
+ {
+ size_t tmp;
+ tmp = P[s-1];
+ //memmove(P+1, P, (s)*sizeof(size_t));
+ size_t * Pi = P;
+ std::copy(Pi, Pi+s-1, Pi+1);
+
+ *(P)=tmp;
+ }
+ // @BUG highly not portable to other fields than modular<basis type>
+ // Need a rewrite in order to support RNSModP field
+ template<class Field>
+ inline void cyclic_shift_row_col(const Field & F, typename Field::Element_ptr A, size_t m, size_t n, size_t lda)
+ {
+ typedef typename Field::Element Element;
+ typedef typename Field::Element_ptr Element_ptr;
+#ifdef MEMCOPY
+ // std::cerr << "BEF m: " << m << ", n: " << n << std::endl;
+
+ if (m > 1) {
+ const size_t mun(m-1);
+ if (n > 1) {
+ // std::cerr << "m: " << m << ", n: " << n << std::endl;
+ const size_t nun(n-1);
+ const size_t blo(sizeof(Element));
+ // const size_t bmu(blo*mun);
+ const size_t bnu(blo*nun);
+ Element_ptr b = FFLAS::fflas_new(F,mun);
+ for(size_t i=0; i<mun; ++i) b[i] = A[i*lda+nun];
+ Element_ptr dc = FFLAS::fflas_new (F,n);
+ memcpy(dc+1,A+mun*lda,bnu);
+ *dc = *(A+mun*lda+nun); // this is d
+ // dc = [ d c ]
+
+ for(size_t i=mun; i>0; --i)
+ memcpy(A+1+i*lda, A+(i-1)*lda, bnu);
+
+ memcpy(A, dc, bnu+blo);
+ for(size_t i=0; i<mun; ++i) A[(i+1)*lda] = b[i];
+ delete [] dc;
+ delete [] b;
+
+ } else if (n != 0) {
+ Base_t d = A[mun*lda];
+ for(size_t i=mun; i>0; --i) A[i*lda]=A[(i-1)*lda];
+ *A=d;
+ }
+ } else {
+ if ((m!=0) && (n > 1)) {
+ const size_t nun(n-1);
+ const size_t blo(sizeof(Element));
+ const size_t bnu(blo*nun);
+ Element d = A[nun];
+ // std::cerr << "d: " << d << std::endl;
+ Element_ptr tmp = FFLAS::fflas_new(F,nun);
+ memcpy(tmp,A,bnu);
+ memcpy(A+1,tmp,bnu);
+ // std::copy(A,A+nun,A+1);
+ *A=d;
+ delete [] tmp;
+ }
+ }
+ // std::cerr << "AFT m: " << m << ", n: " << n << std::endl;
+
+#else
+
+ // std::cerr << "BEF m: " << m << ", n: " << n << std::endl;
+ if (m > 1) {
+ const size_t mun(m-1);
+ if (n > 1) {
+ const size_t nun(n-1);
+
+ Element_ptr b = FFLAS::fflas_new (F,mun);
+ Element_ptr Ainun = A+nun;
+ for(size_t i=0; i<mun; ++i, Ainun+=lda) b[i] = *Ainun;
+
+ // dc = [ d c ]
+ Element_ptr dc = FFLAS::fflas_new (F,n);
+ FFLAS::fassign(F,nun,Ainun-nun,1, dc+1,1);
+ //std::copy(Ainun-nun, Ainun, dc+1);
+
+ // this is d
+ *dc = *Ainun;
+
+ Element_ptr Ai = A+(mun-1)*lda;
+ for(size_t i=mun; i>0; --i, Ai-=lda)
+ FFLAS::fassign(F, nun, Ai,1,Ai+1+lda,1);
+// std::copy(Ai, Ai+nun, Ai+1+lda);
+
+ FFLAS::fassign(F, n, dc, 1, A, 1);
+ //std::copy(dc, dc+n, A);
+
+ Element_ptr Aipo = A+lda;
+ for(size_t i=0; i<mun; ++i, Aipo+=lda) *Aipo = b[i];
+
+ FFLAS::fflas_delete(dc);
+ FFLAS::fflas_delete(b);
+ } else if (n != 0) {
+ Element_ptr Ai=A+mun*lda;
+ Element_ptr d = *Ai;
+ for(; Ai != A; Ai-=lda) *Ai= *(Ai-lda);
+ *A=d;
+ }
+ } else {
+ if ((m!=0) && (n > 1)) {
+ const size_t nun(n-1);
+ Element d = A[nun];
+ FFLAS::fassign(F,nun,A,1,A+1,1);
+ //std::copy(A,A+nun,A+1);
+ *A=d;
+ }
+ }
+
+#endif
+ }
+
+ template<class Field>
+ inline void cyclic_shift_row(const Field& F, typename Field::Element_ptr A, size_t m, size_t n, size_t lda)
+ {
+
+#ifdef MEMCOPY
+ if (m > 1) {
+ const size_t mun(m-1);
+
+ typename Field::Element_ptr b = FFLAS::fflas_new (F,n,1);
+ typename Field::Element_ptr Ai = A+mun*lda;
+ //@BUG not safe with RNSModp field
+ memcpy (b,Ai,n*sizeof(typename Field::Element));
+
+ for(typename Field::Element_ptr Ac = A+mun*lda; Ac!=A;Ac-=lda)
+ memcpy (Ac, Ac-lda, n*sizeof(typename Field::Element));
+
+ memcpy ( A, b, n*sizeof(typename Field::Element));
+ FFLAS::fflas_delete (b);
+ }
+
+#else
+ if (m > 1) {
+ const size_t mun(m-1);
+
+ typename Field::Element_ptr b = FFLAS::fflas_new (F, n, 1);
+ typename Field::Element_ptr Ai = A+mun*lda;
+ for(size_t i=0; i<n; ++i, Ai+=1) b[i] = *Ai;
+
+ for(typename Field::Element_ptr Ac = A+mun*lda; Ac!=A;Ac-=lda)
+ FFLAS::fassign(F,n, Ac-lda, 1, Ac, 1);
+ //std::copy(Ac-lda,Ac-lda+n, Ac);
+
+ typename Field::Element_ptr Aii = A;
+ for(size_t i=0; i<n; ++i, Aii+=1) *Aii = b[i];
+
+ FFLAS::fflas_delete (b);
+ }
+
+#endif
+ }
+
+ template<typename T>
+ inline void cyclic_shift_row(const RNSIntegerMod<T>& F, typename T::Element_ptr A, size_t m, size_t n, size_t lda)
+ {
+ if (m > 1) {
+ const size_t mun(m-1);
+
+ typename T::Element_ptr b = FFLAS::fflas_new (F, n, 1);
+ typename T::Element_ptr Ai = A+mun*lda;
+ for(size_t i=0; i<n; ++i, Ai+=1) F.assign(b[i] , *Ai);
+
+ for(typename T::Element_ptr Ac = A+mun*lda; Ac!=A;Ac-=lda)
+ FFLAS::fassign(F, n, Ac-lda, 1, Ac, 1);
+
+ typename T::Element_ptr Aii = A;
+ for(size_t i=0; i<n; ++i, Aii+=1) F.assign(*Aii, b[i]);
+
+ FFLAS::fflas_delete (b);
+ }
+ }
+
+ template<class Field>
+ inline void cyclic_shift_col(const Field& F, typename Field::Element_ptr A, size_t m, size_t n, size_t lda)
+ {
+ if (n > 1) {
+ const size_t nun(n-1);
+ for(typename Field::Element_ptr Ai=A; Ai!= A+m*lda; Ai+=lda)
+ {
+ typename Field::Element tmp;
+ F.init(tmp);
+ F.assign(tmp, Ai[nun]);
+ //@BUG: not safe with RNSModP field
+ std::copy_backward(Ai, Ai+nun, Ai+n);
+ *Ai=tmp;
+ }
+ }
+ }
+
+ template<typename T>
+ inline void cyclic_shift_col(const RNSIntegerMod<T>& F, typename T::Element_ptr A, size_t m, size_t n, size_t lda)
+ {
+ if (n > 1) {
+ const size_t nun(n-1);
+ for(typename T::Element_ptr Ai=A; Ai!= A+m*lda; Ai+=lda)
+ {
+ typename T::Element tmp; F.init(tmp);
+ F.assign(tmp, Ai[nun]);
+ //std::copy_backward(Ai, Ai+nun, Ai+n);
+ typename T::Element_ptr Xi = Ai+nun;
+ typename T::ConstElement_ptr Yi=Ai+nun-1;
+ for (size_t i =0;i<nun;++i, --Xi, --Yi)
+ F.assign(*Xi,*Yi);
+ F.assign(*Ai,tmp);
+ }
+ }
+ }
+
+
+//#if defined(__FFLASFFPACK_USE_OPENMP) and defined(_OPENMP)
+ template<class Field>
+ void
+ papplyP( const Field& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const FFLAS::FFLAS_TRANSPOSE Trans,
+ const size_t m, const size_t ibeg, const size_t iend,
+ typename Field::Element_ptr A, const size_t lda, const size_t * P )
+ {
+ int numthreads = MAX_THREADS;//omp_get_max_threads();
+ size_t BLOCKSIZE=std::max(2*m/numthreads,(size_t)1); // Assume that there is at least 2 ApplyP taking place in parallel
+ size_t NBlocks = m/BLOCKSIZE;
+ size_t LastBlockSize = m % BLOCKSIZE;
+ if (LastBlockSize)
+ NBlocks++;
+ else
+ LastBlockSize=BLOCKSIZE;
+
+ SYNCH_GROUP(
+ for (size_t t = 0; t < NBlocks; ++t)
+ {
+ size_t BlockDim = BLOCKSIZE;
+ if (t == NBlocks-1)
+ BlockDim = LastBlockSize;
+ //#pragma omp task shared (A, P, F) firstprivate(BlockDim)
+
+ TASK(MODE(CONSTREFERENCE(F, A,P) READ(A[BLOCKSIZE*t*((Side == FFLAS::FflasRight)?lda:1)])),
+ applyP(F, Side, Trans, BlockDim, ibeg, iend, A+BLOCKSIZE*t*((Side == FFLAS::FflasRight)?lda:1), lda, P););
+
+ }
+ );
+ //#pragma omp taskwait
+
+ }
+
+ template <class Field>
+ void pMatrixApplyT (const Field& F, typename Field::Element_ptr A, const size_t lda,
+ const size_t width, const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+ {
+ int numthreads = MAX_THREADS;//omp_get_max_threads();
+ size_t BLOCKSIZE=std::max(width/numthreads,(size_t)1);
+ size_t NBlocks = width/BLOCKSIZE;
+ size_t LastBlockSize = width % BLOCKSIZE;
+ if (LastBlockSize)
+ NBlocks++;
+ else
+ LastBlockSize=BLOCKSIZE;
+ SYNCH_GROUP(
+ for (size_t t = 0; t < NBlocks; ++t)
+ {
+ size_t BlockDim = BLOCKSIZE;
+ if (t == NBlocks-1)
+ BlockDim = LastBlockSize;
+ TASK(MODE(CONSTREFERENCE(F, A) READWRITE(A[BLOCKSIZE*t*lda])),
+ {MatrixApplyT(F,A+BLOCKSIZE*t*lda, lda, BlockDim, N2, R1, R2, R3, R4);}
+ );
+ }
+ );
+
+ }
+
+
+ template <class Field>
+ void pMatrixApplyS (const Field& F, typename Field::Element_ptr A, const size_t lda,
+ const size_t width, const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+ {
+ int numthreads = MAX_THREADS;//omp_get_max_threads();
+ size_t BLOCKSIZE=std::max(width/numthreads,(size_t)1);
+ size_t NBlocks = width/BLOCKSIZE;
+ size_t LastBlockSize = width % BLOCKSIZE;
+ if (LastBlockSize)
+ NBlocks++;
+ else
+ LastBlockSize=BLOCKSIZE;
+
+ SYNCH_GROUP(
+
+ for (size_t t = 0; t < NBlocks; ++t)
+ {
+ size_t BlockDim = BLOCKSIZE;
+ if (t == NBlocks-1)
+ BlockDim = LastBlockSize;
+ //#pragma omp task shared (F, A) firstprivate(BlockDim)
+ TASK(MODE(CONSTREFERENCE(F,A) READ(A[BLOCKSIZE*t])),
+ MatrixApplyS (F, A+BLOCKSIZE*t, lda, BlockDim, M2, R1, R2, R3, R4););
+ }
+ );
+ //#pragma omp taskwait
+
+ }
+
+//#endif // __FFLASFFPACK_USE_OPENMP
+
+} // FFPACK
+
+#endif // __FFLASFFPACK_ffpack_permutation_INL
diff --git a/fflas-ffpack/ffpack/ffpack_pluq.inl b/fflas-ffpack/ffpack/ffpack_pluq.inl
new file mode 100644
index 0000000..badd07b
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_pluq.inl
@@ -0,0 +1,632 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* ffpack/ffpack_pluq.inl
+ * Copyright (C) 2012 Clement Pernet
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_pluq_INL
+#define __FFLASFFPACK_ffpack_pluq_INL
+
+//#define BCONLY
+//#define CROUT
+//#define BCV2
+//#define BCV3
+//#define LEFTLOOKING
+#ifndef BASECASE_K
+#define BASECASE_K 256
+#endif
+
+
+namespace FFPACK {
+ template<class Field>
+ inline size_t
+ PLUQ_basecaseV3 (const Field& Fi, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::Element * A, const size_t lda, size_t*P, size_t *Q)
+ {
+ typedef typename Field::Element Element;
+ size_t row = 0;
+ size_t col = 0;
+ size_t rank = 0;
+ size_t * MathP = new size_t[M];
+ size_t * MathQ = new size_t[N];
+
+ for (size_t i=0; i<M; ++i) MathP[i] = i;
+ for (size_t i=0; i<N; ++i) MathQ[i] = i;
+ for (size_t i=0; i<M; ++i) P[i] = i;
+ for (size_t i=0; i<N; ++i) Q[i] = i;
+ while ((col < N)||(row < M)){
+ size_t piv2 = rank;
+ size_t piv3 = rank;
+ Element * A1 = A + rank*lda;
+ Element * A2 = A + col;
+ Element * A3 = A + row*lda;
+ // search for pivot in A2
+ if (row==M){
+ piv3=col;
+ }else
+ while ((piv3 < col) && Fi.isZero (A3 [piv3])) piv3++;
+ if (piv3 == col){
+ if (col==N){
+ row++;
+ continue;
+ }
+#ifdef LEFTLOOKING
+ // Left looking style update
+ ftrsv (Fi, FFLAS::FflasLower, FFLAS::FflasNoTrans,
+ (Diag==FFLAS::FflasUnit)?FFLAS::FflasNonUnit:FFLAS::FflasUnit,
+ rank, A, lda, A2, lda);
+ fgemv (Fi, FFLAS::FflasNoTrans, M-rank, rank, Fi.mOne,
+ A1,lda, A2, lda,
+ Fi.one, A2+rank*lda, lda);
+#endif
+ while ((piv2 < row) && Fi.isZero (A2 [piv2*lda])) piv2++;
+ if (col<N) col++;
+ if (piv2==M)
+ continue;
+ } else
+ piv2 = row;
+ if (row<M) row++;
+ if (Fi.isZero (A [piv2*lda+piv3])){
+ // no pivot found
+ //cerr<<endl;
+ continue;
+ }
+ // At this point the pivot is located at x=piv2 y = piv3
+// P [rank] = piv2;
+// Q [rank] = piv3;
+
+ A2 = A+piv3;
+ A3 = A+piv2*lda;
+
+ // update permutations (cyclic shift)
+
+
+ //if(piv2 > rank)
+ cyclic_shift_mathPerm(MathP+rank, piv2-rank+1);
+
+ //if(piv3 > rank)
+ cyclic_shift_mathPerm(MathQ+rank, piv3-rank+1);
+
+ Element invpiv;
+ Fi.inv (invpiv, A3[piv3]);
+ if (Diag==FFLAS::FflasUnit){
+#ifdef LEFTLOOKING
+ // Normalizing the pivot row
+ for (size_t i=piv3+1; i<N; ++i)
+ Fi.mulin (A3[i], invpiv);
+#endif
+ }
+ else
+ // Normalizing the pivot column
+ for (size_t i=piv2+1; i<M; ++i)
+ Fi.mulin (A2 [i*lda], invpiv);
+ // Update
+#ifndef LEFTLOOKING
+ for (size_t i=piv2+1; i<M; ++i)
+ for (size_t j=piv3+1; j<N; ++j)
+ Fi.maxpyin (A[i*lda+j], A2[i*lda], A3[j]);
+#endif
+
+
+ // cyclic shift pivot column and row
+ if(piv3 > rank || piv2 > rank)
+ {
+
+ cyclic_shift_row_col(A+rank*(1+lda), piv2-rank+1, piv3-rank+1, lda);
+
+ cyclic_shift_row(Fi,A+rank*lda, piv2-rank+1, rank, lda);
+ cyclic_shift_row(Fi,A+rank*lda+piv3+1, piv2-rank+1, N-1-piv3, lda);
+ cyclic_shift_col(Fi,A+rank, rank, piv3-rank+1, lda);
+ cyclic_shift_col(Fi,A+rank+(piv2+1)*lda, M-1-piv2, piv3-rank+1, lda);
+ }
+
+
+/*
+
+ if(piv2 > rank)
+ cyclic_shift_row(A+rank*lda, piv2-rank+1, N, lda);
+ if(piv3 > rank)
+ cyclic_shift_col(A+rank, M, piv3-rank+1, lda);
+*/
+
+#ifdef LEFTLOOKING
+ // Need to update the cols already updated
+ for (size_t i=piv2+1; i<M; ++i)
+ for (size_t j=piv3+1; j<col; ++j)
+ Fi.maxpyin (A[i*lda+j],
+ A[i*lda+rank], A[rank*lda+j]);
+#endif
+ rank++;
+ }
+ MathPerm2LAPACKPerm(P, MathP, M);
+ MathPerm2LAPACKPerm(Q, MathQ, N);
+ delete[] MathP;
+ delete[] MathQ;
+
+ return rank;
+ }
+
+ template<class Field>
+ inline size_t
+ PLUQ_basecaseV2 (const Field& Fi, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::Element * A, const size_t lda, size_t*P, size_t *Q)
+ {
+ typedef typename Field::Element Element;
+ size_t row = 0;
+ size_t col = 0;
+ size_t rank = 0;
+ std::vector<bool> pivotRows(M,false);
+ std::vector<bool> pivotCols(N,false);
+ size_t * MathP = new size_t[M];
+ size_t * MathQ = new size_t[N];
+ // size_t npp=0;
+ // size_t npq=0;
+
+#ifdef LEFTLOOKING
+ Element* Ltemp = new Element[M*N];
+ for (size_t i=0; i<M*N; ++i)
+ Fi.assign(Ltemp[i],Fi.zero);
+ // this is C99 (-Wno-vla)
+ Element vtemp[M];
+#endif
+ while ((col < N)||(row < M)){
+ size_t piv2 = 0;
+ size_t piv3 = 0;
+// Element * A1 = A;
+ Element * A2 = A + col;
+ Element * A3 = A + row*lda;
+ // search for pivot in A2
+ if (row==M){
+ piv3=col;
+ }else
+ while ((piv3 < col) && (pivotCols[piv3] || Fi.isZero (A3 [piv3]))) piv3++;
+ if (piv3 == col){
+ if (col==N){
+ row++;
+ continue;
+ }
+#ifdef LEFTLOOKING
+ for (size_t i=0; i<rank; ++i)
+ Fi.assign (vtemp[i], A2 [MathP[i]*lda]);
+ Element * vtemp_it = vtemp +rank;
+ for (size_t i=0; i<M; ++i)
+ if (!pivotRows[i])
+ Fi.assign (*(vtemp_it++), A2[i*lda]);
+ // Left looking update
+ ftrsv (Fi, FFLAS::FflasLower, FFLAS::FflasNoTrans,
+ (Diag==FFLAS::FflasUnit)?FFLAS::FflasNonUnit:FFLAS::FflasUnit,
+ rank, Ltemp, N, vtemp, 1);
+ fgemv (Fi, FFLAS::FflasNoTrans, M-rank, rank, Fi.mOne,
+ Ltemp + rank*N, N,
+ vtemp, 1, Fi.one, vtemp + rank, 1);
+ for (size_t i=0; i<rank; ++i)
+ Fi.assign (A2 [MathP[i]*lda], vtemp[i]);
+ vtemp_it = vtemp +rank;
+ for (size_t i=0; i<M; ++i)
+ if (!pivotRows[i])
+ Fi.assign (A2[i*lda], *(vtemp_it++));
+#endif
+ while ((piv2 < row) && (pivotRows[piv2] || Fi.isZero (A2 [piv2*lda]))) piv2++;
+ if (col<N) col++;
+ if (piv2==M)
+ continue;
+ } else
+ piv2 = row;
+
+ if (row<M) row++;
+ if (Fi.isZero (A [piv2*lda+piv3])){
+ // no pivot found
+ continue;
+ }
+ // At this point the pivot is located at x=piv2 y = piv3
+ A2 = A+piv3;
+ A3 = A+piv2*lda;
+ MathQ[rank] = piv3;
+ MathP[rank] = piv2;
+ pivotCols[piv3] = true;
+ pivotRows[piv2] = true;
+ Element invpiv;
+ Fi.inv (invpiv, A3[piv3]);
+ if (Diag==FFLAS::FflasUnit){
+#ifndef LEFTLOOKING
+ // Normalizing the pivot row
+ for (size_t i=piv3+1; i<N; ++i)
+ Fi.mulin (A3[i], invpiv);
+#endif
+ }
+ else{
+#ifdef LEFTLOOKING
+ // finding the row idx of row piv2 in Ltemp
+ size_t Lpiv2 = rank;
+ for (size_t i=0; i<piv2; ++i)
+ if (!pivotRows[i])
+ Lpiv2 ++;
+ if (Lpiv2 != rank)
+ cyclic_shift_row(Fi,Ltemp+rank*N, Lpiv2-rank+1, rank, N);
+ // Normalizing the pivot column
+ Element * Lt_it = Ltemp + rank*(N+1) + N;
+ for (size_t i=0; i<row; ++i)
+ if (!pivotRows[i]){
+ Fi.assign (*Lt_it, Fi.mulin (A2 [i*lda], invpiv));
+ Lt_it+= N;
+ }
+
+ for (size_t i=row; i<M; ++i){
+ Fi.assign (*Lt_it,Fi.mulin (A2 [i*lda], invpiv));
+ Lt_it+=N;
+ }
+#else
+ // Normalizing the pivot column
+ for (size_t i=piv2+1; i<row; ++i)
+ if (!pivotRows[i])
+ Fi.mulin (A2 [i*lda], invpiv);
+ for (size_t i=row; i<M; ++i)
+ Fi.mulin (A2 [i*lda], invpiv);
+#endif
+ }
+ // Update
+#ifndef LEFTLOOKING
+ for (size_t i=piv2+1; i<row; ++i)
+ if (!pivotRows[i]){
+ for (size_t j=piv3+1; j<col; ++j)
+ if (!pivotCols[j])
+ Fi.maxpyin (A[i*lda+j], A2[i*lda], A3[j]);
+ for (size_t j=col; j<N; ++j)
+ Fi.maxpyin (A[i*lda+j], A2[i*lda], A3[j]);
+ }
+ for (size_t i=row; i<M; ++i){
+ for (size_t j=piv3+1; j<col; ++j)
+ if (!pivotCols[j])
+ Fi.maxpyin (A[i*lda+j], A2[i*lda], A3[j]);
+ for (size_t j=col; j<N; ++j)
+ Fi.maxpyin (A[i*lda+j], A2[i*lda], A3[j]);
+ }
+#endif
+#ifdef LEFTLOOKING
+ // Need to update the cols already updated
+ if (piv3<col)
+ for (size_t i=piv2+1; i<M; ++i)
+ for (size_t j=piv3+1; j<col; ++j)
+ if (!pivotCols[j])
+ Fi.maxpyin (A[i*lda+j], A2[i*lda], A3[j]);
+#endif
+ rank++;
+ }
+#ifdef LEFTLOOKING
+ delete[] Ltemp;
+#endif
+ // Building permutations
+ size_t nonpiv = rank;
+ for (size_t i = 0; i<M; ++i)
+ if (!pivotRows[i])
+ MathP[nonpiv++] = i;
+ nonpiv = rank;
+ for (size_t i = 0; i<N; ++i)
+ if (!pivotCols[i])
+ MathQ[nonpiv++] = i;
+ MathPerm2LAPACKPerm (Q, MathQ, N);
+ delete[] MathQ;
+ applyP (Fi, FFLAS::FflasRight, FFLAS::FflasTrans, M, 0, N, A, lda, Q);
+
+ MathPerm2LAPACKPerm (P, MathP, M);
+ delete[] MathP;
+ applyP (Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N, 0, M, A, lda, P);
+
+ return rank;
+ }
+
+ // Base Case based on a CUP decomp with rotations
+ template<class Field>
+ inline size_t
+ PLUQ_basecaseCrout (const Field& Fi, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda, size_t*P, size_t *Q)
+ {
+ size_t row = 0;
+ size_t rank = 0;
+ typename Field::Element_ptr CurrRow=A;
+ size_t * MathP = FFLAS::fflas_new<size_t>(M);
+ size_t * MathQ = FFLAS::fflas_new<size_t>(N);
+ for (size_t i=0; i<M; ++i) MathP[i] = i;
+ for (size_t i=0; i<N; ++i) MathQ[i] = i;
+
+ while (((size_t)row<M) && ((size_t)rank<N)){
+ // Updating row where pivot will be searched for
+ fgemv(Fi, FFLAS::FflasTrans, rank, N-rank, Fi.mOne, A+rank, lda, CurrRow, 1, Fi.one, CurrRow+rank, 1);
+ size_t i = rank;
+ while(Fi.isZero (*(CurrRow+ i++)) && (i<N));
+ i--;
+ if (!Fi.isZero (*(CurrRow+i))){
+ // found pivot
+ // Updating column below pivot
+ // Q [rank] = i;
+ // pivotRows [row] = true;
+ // P [rank] = row;
+ fgemv(Fi, FFLAS::FflasNoTrans, M-row-1, rank, Fi.mOne, CurrRow+lda, lda, A+i, lda, Fi.one, CurrRow+lda+i, lda);
+ // Normalization
+ typename Field::Element invpiv;
+ Fi.init(invpiv);
+ Fi.inv (invpiv, *(CurrRow+i));
+ if (Diag == FFLAS::FflasUnit)
+ FFLAS::fscalin (Fi, N-i-1, invpiv, CurrRow+i+1,1);
+ else
+ FFLAS::fscalin (Fi, M-row-1, invpiv, CurrRow+i+lda,lda);
+
+ if (i > rank){
+ // Column rotation to move pivot on the diagonal
+ // on U
+ cyclic_shift_col(Fi, A+rank, rank, i-rank+1, lda);
+ cyclic_shift_mathPerm(MathQ+rank, (size_t)(i-rank+1));
+ // on A
+ cyclic_shift_col(Fi, CurrRow+lda+rank, M-row-1, i-rank+1, lda);
+ Fi.assign(*(A+rank*(lda+1)), *(CurrRow+i));
+ FFLAS::fzero (Fi, i-rank, A+rank*(lda+1)+1, 1);
+ }
+ if (row > rank){
+ // Row rotation for L
+ // Optimization: delay this to the end
+ cyclic_shift_row(Fi, A+rank*lda, row-rank+1, rank, lda);
+ cyclic_shift_mathPerm(MathP+rank, (size_t) (row-rank+1) );
+ // Row rotation for U (not moving the 0 block)
+ FFLAS::fassign (Fi, N-i-1, CurrRow+i+1, 1, A+rank*lda+i+1, 1);
+ Fi.assign(*(A+rank*(lda+1)), *(CurrRow+i));
+ FFLAS::fzero (Fi, row-rank, A+rank*(lda+1)+lda, lda);
+ Fi.assign(*(CurrRow+i),Fi.zero); // only needed once here
+ }
+ rank++;
+ }
+ CurrRow+=lda;
+ row++;
+ }
+
+ // size_t nonpiv = rank;
+ // for (size_t i = 0; i<M; ++i)
+ // if (!pivotRows[i])
+ // MathP[nonpiv++] = i;
+ // nonpiv = rank;
+ // for (size_t i = 0; i<N; ++i)
+ // if (!pivotCols[i])
+ // MathQ[nonpiv++] = i;
+
+ // std::cerr<<"MathP = ";
+ // for (int i=0; i<M; ++i)
+ // std::cerr<<MathP[i]<<" ";
+ // std::cerr<<std::endl;
+ // std::cerr<<"MathQ = ";
+ // for (int i=0; i<N; ++i)
+ // std::cerr<<MathQ[i]<<" ";
+ // std::cerr<<std::endl;
+
+ MathPerm2LAPACKPerm (Q, MathQ, N);
+ FFLAS::fflas_delete( MathQ);
+ MathPerm2LAPACKPerm (P, MathP, M);
+ FFLAS::fflas_delete( MathP);
+ FFLAS::fzero (Fi, M-rank, N-rank, A+rank*(1+lda), lda);
+
+ return (size_t) rank;
+ }
+
+
+ template<class Field>
+ inline size_t
+ PLUQ (const Field& Fi, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda, size_t*P, size_t *Q)
+ {
+#ifdef BCONLY
+ #ifdef CROUT
+ return PLUQ_basecaseCrout(Fi,Diag,M,N,A,lda,P,Q);
+ #elif defined BCV2
+ return PLUQ_basecaseV2(Fi,Diag,M,N,A,lda,P,Q);
+ #elif defined BCV3
+ return PLUQ_basecaseV3(Fi,Diag,M,N,A,lda,P,Q);
+ #else
+ return PLUQ_basecase(Fi,Diag,M,N,A,lda,P,Q);
+ #endif
+#endif
+ for (size_t i=0; i<M; ++i) P[i] = i;
+ for (size_t i=0; i<N; ++i) Q[i] = i;
+ if (std::min (M,N) == 0) return 0;
+ if (std::max (M,N) == 1) return (Fi.isZero(*A))? 0 : 1;
+#ifndef BASECASE_K
+ if (M == 1){
+ size_t piv = 0;
+ while ((piv < N) && Fi.isZero (A[piv])) piv++;
+ if (piv == N)
+ return 0;
+ if (piv){
+ Q[0] = piv;
+ Fi.assign (*A, A[piv]);
+ Fi.assign (A[piv], Fi.zero);
+ }
+ if (Diag== FFLAS::FflasUnit){
+ typename Field::Element invpivot;
+ Fi.inv(invpivot, *A);
+ // for (size_t i=piv+1; i<N; ++i)
+ // Fi.mulin (A[i], invpivot);
+ FFLAS::fscalin(Fi,N-piv-1,invpivot,A+piv+1,1);
+ }
+ return 1;
+ }
+ if (N == 1){
+ size_t piv = 0;
+ while ((piv < M) && Fi.isZero (A[piv*lda])) piv++;
+ if (piv == M)
+ return 0;
+ if (piv){
+ P[0] = piv;
+ Fi.assign (*A, *(A+piv*lda));
+ Fi.assign (*(A+piv*lda), Fi.zero);
+ }
+ if (Diag== FFLAS::FflasNonUnit){
+ typename Field::Element invpivot;
+ Fi.inv(invpivot, *A);
+ // for (size_t i=piv+1; i<M; ++i)
+ // Fi.mulin (*(A+i*lda), invpivot);
+ FFLAS::fscalin(Fi,M-piv-1,invpivot,A+(piv+1)*lda,lda);
+ }
+ return 1;
+ }
+#endif
+#ifdef BASECASE_K
+ if (std::min(M,N) < BASECASE_K)
+ return PLUQ_basecaseCrout (Fi, Diag, M, N, A, lda, P, Q);
+#endif
+
+ FFLAS::FFLAS_DIAG OppDiag = (Diag == FFLAS::FflasUnit)? FFLAS::FflasNonUnit : FFLAS::FflasUnit;
+ size_t M2 = M >> 1;
+ size_t N2 = N >> 1;
+ size_t * P1 = FFLAS::fflas_new<size_t >(M2);
+ size_t * Q1 = FFLAS::fflas_new<size_t >(N2);
+ size_t R1,R2,R3,R4;
+
+ // A1 = P1 [ L1 ] [ U1 V1 ] Q1
+ // [ M1 ]
+ R1 = PLUQ (Fi, Diag, M2, N2, A, lda, P1, Q1);
+ typename Field::Element_ptr A2 = A + N2;
+ typename Field::Element_ptr A3 = A + M2*lda;
+ typename Field::Element_ptr A4 = A3 + N2;
+ typename Field::Element_ptr F = A2 + R1*lda;
+ typename Field::Element_ptr G = A3 + R1;
+ // [ B1 ] <- P1^T A2
+ // [ B2 ]
+ applyP (Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N-N2, size_t(0), M2, A2, lda, P1);
+ // [ C1 C2 ] <- A3 Q1^T
+ applyP (Fi, FFLAS::FflasRight, FFLAS::FflasTrans, M-M2, size_t(0), N2, A3, lda, Q1);
+ // D <- L1^-1 B1
+ ftrsm (Fi, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans, OppDiag, R1, N-N2, Fi.one, A, lda, A2, lda);
+ // E <- C1 U1^-1
+ ftrsm (Fi, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, Diag, M-M2, R1, Fi.one, A, lda, A3, lda);
+ // F <- B2 - M1 D
+ fgemm (Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M2-R1, N-N2, R1, Fi.mOne, A + R1*lda, lda, A2, lda, Fi.one, A2+R1*lda, lda);
+ // G <- C2 - E V1
+ fgemm (Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-M2, N2-R1, R1, Fi.mOne, A3, lda, A+R1, lda, Fi.one, A3+R1, lda);
+ // H <- A4 - ED
+ fgemm (Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-M2, N-N2, R1, Fi.mOne, A3, lda, A2, lda, Fi.one, A4, lda);
+ // F = P2 [ L2 ] [ U2 V2 ] Q2
+ // [ M2 ]
+ size_t * P2 = FFLAS::fflas_new<size_t >(M2-R1);
+ size_t * Q2 = FFLAS::fflas_new<size_t >(N-N2);
+ R2 = PLUQ (Fi, Diag, M2-R1, N-N2, F, lda, P2, Q2);
+ // G = P3 [ L3 ] [ U3 V3 ] Q3
+ // [ M3 ]
+ size_t * P3 = FFLAS::fflas_new<size_t >(M-M2);
+ size_t * Q3 = FFLAS::fflas_new<size_t >(N2-R1);
+ R3 = PLUQ (Fi, Diag, M-M2, N2-R1, G, lda, P3, Q3);
+ // [ H1 H2 ] <- P3^T H Q2^T
+ // [ H3 H4 ]
+ applyP (Fi, FFLAS::FflasRight, FFLAS::FflasTrans, M-M2, size_t(0), N-N2, A4, lda, Q2);
+ applyP (Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N-N2, size_t(0), M-M2, A4, lda, P3);
+ // [ E1 ] <- P3^T E
+ // [ E2 ]
+ applyP (Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, R1, size_t(0), M-M2, A3, lda, P3);
+ // [ M11 ] <- P2^T M1
+ // [ M12 ]
+ applyP (Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, R1, size_t(0), M2-R1, A+R1*lda, lda, P2);
+ // [ D1 D2 ] <- D Q2^T
+ applyP (Fi, FFLAS::FflasRight, FFLAS::FflasTrans, R1, size_t(0), N-N2, A2, lda, Q2);
+ // [ V1 V2 ] <- V1 Q3^T
+ applyP (Fi, FFLAS::FflasRight, FFLAS::FflasTrans, R1, size_t(0), N2-R1, A+R1, lda, Q3);
+ // I <- H U2^-1
+ // K <- H3 U2^-1
+ ftrsm (Fi, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, Diag, M-M2, R2, Fi.one, F, lda, A4, lda);
+ // J <- L3^-1 I (in a temp)
+ typename Field::Element_ptr temp = FFLAS::fflas_new (Fi, R3, R2);
+ // for (size_t i=0; i<R3; ++i)
+ // FFLAS::fassign (Fi, R2, A4 + i*lda, 1, temp + i*R2, 1);
+ FFLAS::fassign (Fi, R3, R2, A4 , lda, temp , R2);
+ ftrsm (Fi, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans, OppDiag, R3, R2, Fi.one, G, lda, temp, R2);
+ // N <- L3^-1 H2
+ ftrsm (Fi, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans, OppDiag, R3, N-N2-R2, Fi.one, G, lda, A4+R2, lda);
+ // O <- N - J V2
+ fgemm (Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, R3, N-N2-R2, R2, Fi.mOne, temp, R2, F+R2, lda, Fi.one, A4+R2, lda);
+ FFLAS::fflas_delete (temp);
+ // R <- H4 - K V2 - M3 O
+ typename Field::Element_ptr R = A4 + R2 + R3*lda;
+ fgemm (Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-M2-R3, N-N2-R2, R2, Fi.mOne, A4+R3*lda, lda, F+R2, lda, Fi.one, R, lda);
+ fgemm (Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-M2-R3, N-N2-R2, R3, Fi.mOne, G+R3*lda, lda, A4+R2, lda, Fi.one, R, lda);
+ // H4 = P4 [ L4 ] [ U4 V4 ] Q4
+ // [ M4 ]
+ size_t * P4 = FFLAS::fflas_new<size_t >(M-M2-R3);
+ size_t * Q4 = FFLAS::fflas_new<size_t >(N-N2-R2);
+ R4 = PLUQ (Fi, Diag, M-M2-R3, N-N2-R2, R, lda, P4, Q4);
+ // [ E21 M31 0 K1 ] <- P4^T [ E2 M3 0 K ]
+ // [ E22 M32 0 K2 ]
+ applyP (Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N2+R2, size_t(0), M-M2-R3, A3+R3*lda, lda, P4);
+ // [ D21 D22 ] [ D2 ]
+ // [ V21 V22 ] <- [ V2 ] Q4^T
+ // [ 0 0 ] [ 0 ]
+ // [ O1 O2 ] [ O ]
+ applyP (Fi, FFLAS::FflasRight, FFLAS::FflasTrans, M2+R3, size_t(0), N-N2-R2, A2+R2, lda, Q4);
+
+ // P <- Diag (P1 [ I_R1 ] , P3 [ I_R3 ])
+ // [ P2 ] [ P4 ]
+ size_t* MathP = FFLAS::fflas_new<size_t>(M);
+ composePermutationsP (MathP, P1, P2, R1, M2);
+ composePermutationsP (MathP+M2, P3, P4, R3, M-M2);
+ FFLAS::fflas_delete( P1);
+ FFLAS::fflas_delete( P2);
+ FFLAS::fflas_delete( P3);
+ FFLAS::fflas_delete( P4);
+ for (size_t i=M2; i<M; ++i)
+ MathP[i] += M2;
+ if (R1+R2 < M2){
+ // P <- P S
+ PermApplyS (MathP, 1,1,M2, R1, R2, R3, R4);
+ // A <- S^T A
+ MatrixApplyS (Fi, A, lda, N, M2, R1, R2, R3, R4);
+ }
+ MathPerm2LAPACKPerm (P, MathP, M);
+ FFLAS::fflas_delete( MathP);
+
+ // Q<- Diag ( [ I_R1 ] Q1, [ I_R2 ] Q2 )
+ // [ Q3 ] [ P4 ]
+ size_t * MathQ = FFLAS::fflas_new<size_t >(N);
+ composePermutationsQ (MathQ, Q1, Q3, R1, N2);
+ composePermutationsQ (MathQ+N2, Q2, Q4, R2, N-N2);
+ FFLAS::fflas_delete( Q1);
+ FFLAS::fflas_delete( Q2);
+ FFLAS::fflas_delete( Q3);
+ FFLAS::fflas_delete( Q4);
+ for (size_t i=N2; i<N; ++i)
+ MathQ[i] += N2;
+
+ if (R1 < N2){
+ // Q <- T Q
+ PermApplyT (MathQ, 1,1,N2, R1, R2, R3, R4);
+ // A <- A T^T
+ MatrixApplyT (Fi, A, lda, M, N2, R1, R2, R3, R4);
+ }
+ MathPerm2LAPACKPerm (Q, MathQ, N);
+ FFLAS::fflas_delete( MathQ);
+
+ return R1+R2+R3+R4;
+ }
+
+
+} // namespace FFPACK
+#endif // __FFLASFFPACK_ffpack_pluq_INL
diff --git a/fflas-ffpack/ffpack/ffpack_pluq_mp.inl b/fflas-ffpack/ffpack/ffpack_pluq_mp.inl
new file mode 100644
index 0000000..7aa9476
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_pluq_mp.inl
@@ -0,0 +1,130 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFPACK_pluq_mp_INL
+#define __FFPACK_pluq_mp_INL
+
+#ifdef BENCH_PERF_LQUP_MP
+#define BENCH_PERF_FGEMM_MP
+#endif
+#include "fflas-ffpack/field/rns-integer-mod.h"
+#include "fflas-ffpack/field/rns-integer.h"
+#include "fflas-ffpack/fflas-ffpack.h"
+
+#include "givaro/givinteger.h"
+#include "givaro/modular-integer.h"
+namespace FFPACK {
+
+ template <>
+ inline size_t
+ PLUQ (const Givaro::Modular<Givaro::Integer>& F,
+ const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Givaro::Integer* A, const size_t lda,
+ size_t*P, size_t *Q)
+ {
+
+#ifdef BENCH_PERF_LQUP_MP
+ double t_init=0, t_lqup=0, t_mod=0, t_rec=0;
+ FFLAS::Timer chrono;
+ chrono.start();
+#endif
+ Givaro::Integer p;
+ F.cardinality(p);
+ size_t logp=p.bitsize();
+ size_t K = std::max(M,N);
+
+ // compute bit size of feasible prime
+ size_t _k=std::max(K,logp/20), lk=0;
+ while ( _k ) {_k>>=1; ++lk;}
+ size_t prime_bitsize= (53-lk)>>1;
+
+ // construct rns basis
+ Givaro::Integer maxC= (p-1)*(p-1)*(p-1)*uint64_t(K);
+ uint64_t n_pr =uint64_t(ceil(double(maxC.bitsize())/double(prime_bitsize)));
+ maxC=(p-1)*(p-1)*uint64_t(K)*(1<<prime_bitsize)*n_pr;
+
+ FFPACK::rns_double RNS(maxC, prime_bitsize, true);
+ FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS);
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ t_init+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ // compute A in RNS
+ FFPACK::rns_double::Element_ptr Ap;
+ Ap = FFLAS::fflas_new(Zp,M,N);
+ FFLAS::finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),A,lda,Ap);
+
+
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ t_mod+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ // call lqup in rns
+ size_t R=FFPACK::PLUQ(Zp, Diag, M, N, Ap, N, P, Q);
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ t_lqup+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ //Zp.write(std::cout,*Ap);
+ // reconstruct the result
+ FFLAS::fconvert_rns(Zp,M,N,F.zero,A,lda,Ap);
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ t_rec+=chrono.usertime();
+ chrono.clear();chrono.start();
+#endif
+ // reduce it modulo p
+ FFLAS::freduce (F,M,N,A,lda);
+ //F.write(std::cout,*A);
+
+#ifdef BENCH_PERF_LQUP_MP
+ chrono.stop();
+ //t_rec+=chrono.usertime();
+ cout<<"PLUQ RNS PERF:"<<endl;
+ cout<<" --- RNS basis size: "<<Zp.size() <<endl;
+ cout<<" *** init : "<<t_init<<endl;
+ cout<<" *** rns mod : "<<t_mod<<endl;
+ cout<<" *** rns lqup : "<<t_lqup<<" ( igemm="<<Zp.t_igemm<<" ftrsm="<<Zp.t_trsm<<" scal="<<Zp.t_scal
+ <<" modp="<<Zp.t_modp<<endl;
+ cout<<" *** rns rec : "<<t_rec<<endl;
+ cout<<" *** mod : "<<chrono.usertime()<<endl;
+
+#endif
+ FFLAS::fflas_delete(Ap);
+ return R;
+
+ }
+
+} // namespace FFPACK
+
+#endif
+
diff --git a/fflas-ffpack/ffpack/ffpack_ppluq.inl b/fflas-ffpack/ffpack/ffpack_ppluq.inl
new file mode 100644
index 0000000..434d851
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_ppluq.inl
@@ -0,0 +1,407 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* ffpack/ffpack_ppluq.inl
+ * Copyright (C) 2014 Ziad Sultan
+ *
+ * Written by Ziad.Sultan at imag.fr
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, WRITE to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_ppluq_INL
+#define __FFLASFFPACK_ffpack_ppluq_INL
+
+
+//#ifdef __FFLASFFPACK_USE_OPENMP
+
+#define __FFLAS__TRSM_READONLY
+
+#define PBASECASE_K 256
+
+
+namespace FFPACK {
+
+ template<class Field>
+ void threads_fgemm(const size_t m, const size_t n, const size_t r, int nbthreads, size_t * W1, size_t * W2, size_t * W3, size_t gamma)
+ {
+ size_t H1, H2, H3;
+ size_t M2 = m>>1;
+ size_t N2 = n>>1;
+
+ H1 = ((m-N2)*r*(N2-r))<<1;
+ H2 = ((M2-r)*r*(n-N2))<<1;
+ H3 = ((m-M2)*r*(n-N2))<<1;
+
+ // if we take into account 2 concurrent pluq calls....
+ size_t h;
+ size_t z1= h*((m-M2)*(N2-r)*(N2-r)-(N2-r)*(N2-r)*(N2-r)/3);
+ size_t z2= h*((n-N2)*(M2-r)*(M2-r)-(M2-r)*(M2-r)*(M2-r)/3);
+
+ H1+= z1;
+ H2+= z2;
+
+ // compute number of threads for each fgemm call
+ *W1=std::max(H1*nbthreads/(H1+H2+H3),(size_t)1);
+ *W2=std::max(H2*nbthreads/(H1+H2+H3),(size_t)1);
+ *W3=std::max(nbthreads-*W1-*W2,(size_t)1);
+
+ // add gamma factor to change number of threads for pluq calls
+ W1-= gamma*z1/(z1+z2);
+ W2-= gamma*(1-z1/(z1+z2));
+ W3+= gamma;
+
+ }
+
+ template<class Field>
+ void threads_ftrsm(const size_t m, const size_t n, int nbthreads, size_t * t1, size_t * t2)
+ {
+ *t1 = nbthreads*m/(m+n);
+ *t2 = nbthreads-(int)*t1;
+ }
+
+
+ template<class Field>
+ inline size_t
+ pPLUQ(const Field& Fi, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* P, size_t* Q, int nt)
+ {
+
+
+ for (size_t i=0; i<M; ++i) P[i] = i;
+ for (size_t i=0; i<N; ++i) Q[i] = i;
+ if (std::min(M,N) == 0) return 0;
+ if (std::max (M,N) == 1) return (Fi.isZero(*A))? 0 : 1;
+ if (M == 1){
+ size_t piv = 0;
+ while ((piv < N) && Fi.isZero (A[piv])) piv++;
+ if (piv == N)
+ return 0;
+ if (piv){
+ Q[0] = piv;
+ Fi.assign (*A, A[piv]);
+ Fi.assign (A[piv], Fi.zero);
+ }
+ if (Diag== FFLAS::FflasUnit){
+ typename Field::Element invpivot;
+ Fi.inv(invpivot, *A);
+ // for (size_t i=piv+1; i<N; ++i)
+ // Fi.mulin (A[i], invpivot);
+ FFLAS::fscalin(Fi,N-piv-1,invpivot,A+piv+1,1);
+ }
+ return 1;
+ }
+ if (N == 1){
+ size_t piv = 0;
+ while ((piv < M) && Fi.isZero (A[piv*lda])) piv++;
+ if (piv == M)
+ return 0;
+ if (piv){
+ P[0] = piv;
+ Fi.assign (*A, *(A+piv*lda));
+ Fi.assign (*(A+piv*lda), Fi.zero);
+ }
+ if (Diag== FFLAS::FflasNonUnit){
+ typename Field::Element invpivot;
+ Fi.inv(invpivot, *A);
+ // for (size_t i=piv+1; i<M; ++i)
+ // Fi.mulin (*(A+i*lda), invpivot);
+ FFLAS::fscalin(Fi,M-piv-1,invpivot,A+(piv+1)*lda,lda);
+ }
+ return 1;
+ }
+
+ #ifdef PBASECASE_K
+ // if (std::min(M,N) < PBASECASE_K)
+ if (std::min(M,N) <= lda/NUM_THREADS && lda/NUM_THREADS > PBASECASE_K)
+ return PLUQ_basecaseCrout (Fi, Diag, M, N, A, lda, P, Q);
+ #endif
+ FFLAS::FFLAS_DIAG OppDiag = (Diag == FFLAS::FflasUnit)? FFLAS::FflasNonUnit : FFLAS::FflasUnit;
+
+ size_t M2 = M >> 1;
+ size_t N2 = N >> 1;
+ size_t * P1 = FFLAS::fflas_new<size_t> (M2);
+ size_t * Q1 = FFLAS::fflas_new<size_t> (N2);
+ size_t* MathP = 0;
+ size_t* MathQ = 0;
+ size_t* P2,*P3,*Q2,*Q3,*P4,*Q4;
+ size_t R1,R2,R3,R4;
+
+ // A1 = P1 [ L1 ] [ U1 V1 ] Q1
+ // [ M1 ]
+ R1 = pPLUQ (Fi, Diag, M2, N2, A, lda, P1, Q1,nt);
+
+ typename Field::Element * A2 = A + N2;
+ typename Field::Element * A3 = A + M2*lda;
+ typename Field::Element * A4 = A3 + N2;
+ typename Field::Element * F = A2 + R1*lda;
+ typename Field::Element * G = A3 + R1;
+
+ // const FFLAS::CuttingStrategy meth = FFLAS::RECURSIVE;
+ // const FFLAS::StrategyParameter strat = FFLAS::TWO_D_ADAPT;
+
+ typename FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::TwoDAdaptive> pWH (std::max(nt,1));
+ typename FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> PH (std::max(nt,1));
+
+ SYNCH_GROUP(
+
+ // [ B1 ] <- P1^T A2
+ // [ B2 ]
+ TASK(MODE(READ(P1) CONSTREFERENCE(Fi, P1, A2) READWRITE(A2[0])),
+ { papplyP( Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N-N2, 0, M2, A2, lda, P1); }
+ );
+ // [ C1 C2 ] <- A3 Q1^T
+ TASK(MODE(READ(Q1) CONSTREFERENCE(Fi, Q1, A3) READWRITE(A3[0])),
+ papplyP( Fi, FFLAS::FflasRight, FFLAS::FflasTrans, M-M2, 0, N2, A3, lda, Q1););
+
+ CHECK_DEPENDENCIES;
+ // D <- L1^-1 B1
+ TASK(MODE(READ(A[0], R1, PH) CONSTREFERENCE(Fi, PH, A2) READWRITE(A2[0])),
+ ftrsm( Fi, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans, OppDiag, R1, N-N2, Fi.one, A, lda, A2, lda, PH));
+
+ // E <- C1 U1^-1
+ TASK(MODE(READ(R1, A[0], PH) CONSTREFERENCE(A3, Fi, M2, R1, PH) READWRITE(A3[0])),
+ ftrsm(Fi, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, Diag, M-M2, R1, Fi.one, A, lda, A3, lda, PH));
+
+ CHECK_DEPENDENCIES;
+
+ // F <- B2 - M1 D
+ TASK(MODE(READ(A2[0], A[R1*lda], pWH) READWRITE(F[0]) CONSTREFERENCE(A, A2, F, pWH, Fi)),
+ fgemm( Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M2-R1, N-N2, R1, Fi.mOne, A + R1*lda, lda, A2, lda, Fi.one, F, lda, pWH));
+
+ // G <- C2 - E V1
+ TASK(MODE(READ(R1, A[R1], A3[0], pWH) READWRITE(G[0]) CONSTREFERENCE(Fi, A, A3, G, pWH)),
+ fgemm( Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-M2, N2-R1, R1, Fi.mOne, A3, lda, A+R1, lda, Fi.one, G, lda, pWH));
+
+ CHECK_DEPENDENCIES;
+
+ P2 = FFLAS::fflas_new<size_t>(M2-R1);
+ Q2 = FFLAS::fflas_new<size_t>(N-N2);
+ //typename Field::Element * A4R2 = 0;
+ // F = P2 [ L2 ] [ U2 V2 ] Q2
+ // [ M2 ]
+ TASK(MODE(CONSTREFERENCE(Fi, P2, Q2, F,/* A4R2,*/ R2) WRITE(R2/*, A4R2[0]*/) READWRITE(F[0], P2, Q2) ),
+ R2 = pPLUQ( Fi, Diag, M2-R1, N-N2, F, lda, P2, Q2,nt/2)
+ //A4R2 = A4+R2;
+ );
+
+ //R2 = PLUQ (Fi, Diag, M2-R1, N-N2, F, lda, P2, Q2);
+
+ P3 = FFLAS::fflas_new<size_t>(M-M2);
+ Q3 = FFLAS::fflas_new<size_t>(N2-R1);
+ // G = P3 [ L3 ] [ U3 V3 ] Q3
+ // [ M3 ]
+ TASK(MODE(CONSTREFERENCE(Fi, G, Q3, P3, R3) WRITE(R3, P3, Q3) READWRITE(G[0])),
+ R3 = pPLUQ( Fi, Diag, M-M2, N2-R1, G, lda, P3, Q3,nt/2));
+
+ // H <- A4 - ED
+ TASK(MODE(CONSTREFERENCE(Fi, A3, A2, A4, pWH) READ(M2, N2, R1, A3[0], A2[0]) READWRITE(A4[0])),
+ fgemm( Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-M2, N-N2, R1, Fi.mOne, A3, lda, A2, lda, Fi.one, A4, lda, pWH));
+
+ CHECK_DEPENDENCIES;
+
+ // [ H1 H2 ] <- P3^T H Q2^T
+ // [ H3 H4 ]
+ TASK(MODE(READ(P3, Q2) CONSTREFERENCE(Fi, A4, Q2, P3) READWRITE(A4[0])),
+ papplyP( Fi, FFLAS::FflasRight, FFLAS::FflasTrans, M-M2, 0, N-N2, A4, lda, Q2);
+ papplyP( Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N-N2, 0, M-M2, A4, lda, P3););
+
+ CHECK_DEPENDENCIES;
+ // [ E1 ] <- P3^T E
+ // [ E2 ]
+ TASK(MODE(READ(P3) CONSTREFERENCE(Fi, P3, A3) READWRITE(A3[0])),
+ papplyP( Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, R1, 0, M-M2, A3, lda, P3));
+ //applyP( Fi, FflasLeft, FflasNoTrans, R1, 0, M-M2, A3, lda, P3);
+
+ // [ M11 ] <- P2^T M1
+ // [ M12 ]
+ TASK(MODE(READ(P2) CONSTREFERENCE(P2, A, Fi) READWRITE(A[R1*lda])),
+ papplyP(Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, R1, 0, M2-R1, A+R1*lda, lda, P2));
+ //applyP(Fi, FflasLeft, FflasNoTrans, R1, 0, M2-R1, A+R1*lda, lda, P2);
+
+ // [ D1 D2 ] <- D Q2^T
+ TASK(MODE(READ(Q2) CONSTREFERENCE(Fi, Q2, A2) READWRITE(A2[0])),
+ papplyP( Fi, FFLAS::FflasRight, FFLAS::FflasTrans, R1, 0, N-N2, A2, lda, Q2));
+ //papplyP( Fi, FflasRight, FflasTrans, R1, 0, N-N2, A2, lda, Q2);
+
+ // [ V1 V2 ] <- V1 Q3^T
+ TASK(MODE(READ(Q3) CONSTREFERENCE(Fi, Q3, A) READWRITE(A[R1])),
+ papplyP( Fi, FFLAS::FflasRight, FFLAS::FflasTrans, R1, 0, N2-R1, A+R1, lda, Q3));
+ //applyP( Fi, FflasRight, FflasTrans, R1, 0, N2-R1, A+R1, lda, Q3);
+
+ // CHECK_DEPENDENCIES;
+
+ // I <- H1 U2^-1
+ // K <- H3 U2^-1
+ TASK(MODE(READ(R2, F[0], P2) CONSTREFERENCE(Fi, A4, F, PH, R2) READWRITE(A4[0])),
+ ftrsm( Fi, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, Diag, M-M2, R2, Fi.one, F, lda, A4, lda, PH));
+ //pftrsm( Fi, FflasRight, FflasUpper, FflasNoTrans, Diag, M-M2, R2, Fi.one, F, lda, A4, lda, method, NUM);
+ //ftrsm( Fi, FflasRight, FflasUpper, FflasNoTrans, Diag, M-M2, R2, Fi.one, F, lda, A4, lda);
+ CHECK_DEPENDENCIES;
+ typename Field::Element_ptr temp = 0;
+
+ TASK(MODE(READ(A4[0], R3, P2) READWRITE(temp[0], R2) CONSTREFERENCE(Fi, A4, temp, R2, R3)),
+ temp = FFLAS::fflas_new (Fi, R3, R2);
+ FFLAS::fassign (Fi, R3, R2, A4, lda, temp, R2);
+ );
+ CHECK_DEPENDENCIES;
+
+ // J <- L3^-1 I (in a temp)
+ TASK(MODE(READ(R2, R3, G[0]) CONSTREFERENCE(Fi, G, temp, R2, R3, PH) READWRITE(temp[0])),
+ ftrsm( Fi, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans, OppDiag, R3, R2, Fi.one, G, lda, temp, R2, PH););
+
+ // N <- L3^-1 H2
+ TASK(MODE(READ(R3, R2, G[0]) CONSTREFERENCE(Fi, G, A4, R3, R2, PH) READWRITE(A4[R2])),
+ ftrsm(Fi, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans, OppDiag, R3, N-N2-R2, Fi.one, G, lda, A4+R2, lda, PH));
+
+ CHECK_DEPENDENCIES;
+
+ // O <- N - J V2
+ TASK(MODE(READ(R2, F[R2]) CONSTREFERENCE(Fi, R2, A4, R3, temp, pWH) READWRITE(A4[R2], temp[0])),
+ fgemm( Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, R3, N-N2-R2, R2, Fi.mOne, temp, R2, F+R2, lda, Fi.one, A4+R2, lda, pWH);
+ FFLAS::fflas_delete (temp);
+ // delete[] temp;
+ temp=0;
+ );
+
+ typename Field::Element_ptr R = 0;
+ // R <- H4 - K V2
+ TASK(MODE(READ(R2, R3, M2, N2, A4[R3*lda], F[R2]) CONSTREFERENCE(Fi, R, F, R2, R3, pWH) READWRITE(R[0])),
+ R = A4 + R2 + R3*lda;
+ fgemm( Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-M2-R3, N-N2-R2, R2, Fi.mOne, A4+R3*lda, lda, F+R2, lda, Fi.one, R, lda, pWH)
+ );
+ //fgemm( Fi, FflasNoTrans, FflasNoTrans, M-M2-R3, N-N2-R2, R2, Fi.mOne, A4+R3*lda, lda, F+R2, lda, Fi.one, R, lda);
+ CHECK_DEPENDENCIES;
+
+ // R <- R - M3 O
+ TASK(MODE(READ(R3, R2, A4[R2], G[R3*lda]) CONSTREFERENCE(Fi, A4, R, R3, R2, G, pWH) READWRITE(R[0])),
+ fgemm( Fi, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-M2-R3, N-N2-R2, R3, Fi.mOne, G+R3*lda, lda, A4+R2, lda, Fi.one, R, lda, pWH));
+ //fgemm( Fi, FflasNoTrans, FflasNoTrans, M-M2-R3, N-N2-R2, R3, Fi.mOne, G+R3*lda, lda, A4+R2, lda, Fi.one, R, lda);
+ CHECK_DEPENDENCIES;
+
+ /*
+ size_t * P4 = FFLAS::fflas_new<size_t>(M-M2-R3);
+ size_t * Q4 = FFLAS::fflas_new<size_t>(N-N2-R2);
+ */
+
+ // H4 = P4 [ L4 ] [ U4 V4 ] Q4
+ // [ M4 ]
+ //TASK(READ(Fi), NOWRITE(R4), READWRITE(R, P4, Q4), PPLUQ, R4, Fi, Diag, M-M2-R3, N-N2-R2, R, lda, P4, Q4);
+ TASK(MODE(CONSTREFERENCE(Fi, R4, R, P4, Q4, R2, R3, M2, N2) READWRITE(R[0]) WRITE(R4, P4[0], Q4[0])),
+ P4 = FFLAS::fflas_new<size_t>(M-M2-R3);
+ Q4 = FFLAS::fflas_new<size_t>(N-N2-R2);
+ R4 = pPLUQ (Fi, Diag, M-M2-R3, N-N2-R2, R, lda, P4, Q4,nt);
+ );
+ CHECK_DEPENDENCIES;
+
+ // [ E21 M31 0 K1 ] <- P4^T [ E2 M3 0 K ]
+ // [ E22 M32 0 K2 ]
+ TASK(MODE(READ(P4[0], R2, R3, M2) CONSTREFERENCE(Fi, P4, A3, R2, R3) READWRITE(A3[R3*lda])),
+ papplyP(Fi, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N2+R2, 0, M-M2-R3, A3+R3*lda, lda, P4));
+ //applyP( Fi, FflasLeft, FflasNoTrans, N2+R2, 0, M-M2-R3, A3+R3*lda, lda, P4);
+
+ // [ D21 D22 ] [ D2 ]
+ // [ V21 V22 ] <- [ V2 ] Q4^T
+ // [ 0 0 ] [ 0 ]
+ // [ O1 O2 ] [ O ]
+ TASK(MODE(READ(Q4[0], R2, N2, M2, R3) CONSTREFERENCE(Fi, Q4, A2, R2, R3) READWRITE(A2[R2])),
+ papplyP( Fi, FFLAS::FflasRight, FFLAS::FflasTrans, M2+R3, 0, N-N2-R2, A2+R2, lda, Q4));
+ //applyP( Fi, FflasRight, FflasTrans, M2+R3, 0, N-N2-R2, A2+R2, lda, Q4);
+
+ // P <- Diag (P1 [ I_R1 ] , P3 [ I_R3 ])
+ // [ P2 ] [ P4 ]
+ WAIT;
+ // TASK(MODE(CONSTREFERENCE(P1, P2, P3, P4, R1, R3, MathP, M2) READ(P1, P2, R1, R3, P3, P4, M2) READWRITE(MathP)),
+ MathP = FFLAS::fflas_new<size_t>(M);
+ composePermutationsP (MathP, P1, P2, R1, M2);
+ composePermutationsP (MathP+M2, P3, P4, R3, M-M2);
+ for (size_t i=M2; i<M; ++i)
+ MathP[i] += M2;
+ /* if (R1+R2 < M2)
+ PermApplyS( MathP, 1,1, M2, R1, R2, R3, R4);*/
+ // );
+
+ //CHECK_DEPENDENCIES;
+
+ // WAIT;
+ if (R1+R2 < M2){
+ // P <- P S
+ TASK(MODE(CONSTREFERENCE(R1, R2, R3, R4, MathP, M2) READ(R1, R2, R3, R4, M2) READWRITE(MathP[0])),
+ PermApplyS( MathP, 1,1, M2, R1, R2, R3, R4);
+ );
+
+ // A <- S^T A
+ TASK(MODE(READ(R1, R2, R3, R4) CONSTREFERENCE(Fi, A, R1, R2, R3, R4) READWRITE(A[0])),
+ pMatrixApplyS( Fi, A, lda, N, M2, R1, R2, R3, R4));
+ //MatrixApplyS(Fi, A, lda, N, M2, R1, R2, R3, R4);
+ }
+
+ // Q<- Diag ( [ I_R1 ] Q1, [ I_R2 ] Q2 )
+ // [ Q3 ] [ P4 ]
+ MathQ = FFLAS::fflas_new<size_t>(N);
+ TASK(MODE(CONSTREFERENCE(Q1, Q2, Q3, Q4, R1, R2) READ(Q1[0], Q2[0], Q3[0], Q4[0], R1, R2) READWRITE(MathQ[0])),
+ composePermutationsQ (MathQ, Q1, Q3, R1, N2);
+ composePermutationsQ (MathQ+N2, Q2, Q4, R2, N-N2);
+ for (size_t i=N2; i<N; ++i)
+ MathQ[i] += N2;
+ );
+ CHECK_DEPENDENCIES;
+
+ if (R1 < N2){
+ // Q <- T Q
+ TASK(MODE(CONSTREFERENCE(R1, R2, R3, R4) READ(R1, R2, R3, R4) READWRITE(MathQ[0])),
+ PermApplyT (MathQ, 1,1,N2, R1, R2, R3, R4););
+
+ // A <- A T^T
+ TASK(MODE(READ(R1, R2, R3, R4) CONSTREFERENCE(Fi, A, R1, R2, R3, R4) READWRITE(A[0])),
+ pMatrixApplyT(Fi, A, lda, M, N2, R1, R2, R3, R4));
+ // MatrixApplyT(Fi, A, lda, M, N2, R1, R2, R3, R4);
+ }
+ CHECK_DEPENDENCIES;
+ TASK(MODE(CONSTREFERENCE(MathP, MathQ) READ(MathP[0], MathQ[0]) READWRITE(P[0], Q[0])),
+ MathPerm2LAPACKPerm (Q, MathQ, N);
+ MathPerm2LAPACKPerm (P, MathP, M);
+ );
+ );
+ FFLAS::fflas_delete( MathQ);
+ FFLAS::fflas_delete( MathP);
+ FFLAS::fflas_delete( P1);
+ FFLAS::fflas_delete( P2);
+ FFLAS::fflas_delete( P3);
+ FFLAS::fflas_delete( P4);
+ FFLAS::fflas_delete( Q1);
+ FFLAS::fflas_delete( Q2);
+ FFLAS::fflas_delete( Q3);
+ FFLAS::fflas_delete( Q4);
+
+ //);
+
+
+
+ return R1+R2+R3+R4;
+ //#endif
+ }
+
+}// namespace FFPACK
+
+//#endif // OPENMP
+#endif // __FFLASFFPACK_ffpack_ppluq_INL
diff --git a/fflas-ffpack/ffpack/ffpack_rankprofiles.inl b/fflas-ffpack/ffpack/ffpack_rankprofiles.inl
new file mode 100644
index 0000000..56aece1
--- /dev/null
+++ b/fflas-ffpack/ffpack/ffpack_rankprofiles.inl
@@ -0,0 +1,278 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* ffpack_rankprofiles.inl
+ * Copyright (C) 2015 FFLAS-FFACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_ffpack_rank_profiles_INL
+#define __FFLASFFPACK_ffpack_rank_profiles_INL
+
+namespace FFPACK{
+template <class Field>
+inline size_t RowRankProfile (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* &rkprofile,
+ const FFPACK_LU_TAG LuTag){
+
+
+ size_t *P = FFLAS::fflas_new<size_t>((LuTag==FfpackSlabRecursive)?N:M);
+ size_t *Q = FFLAS::fflas_new<size_t>((LuTag==FfpackSlabRecursive)?M:N);
+ size_t R;
+
+ if (LuTag == FfpackSlabRecursive){
+ R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q);
+ std::swap(P,Q);
+ } else
+ R = PLUQ (F, FFLAS::FflasNonUnit, M, N, A, lda, P, Q);
+
+ rkprofile = FFLAS::fflas_new<size_t> (R);
+
+ RankProfileFromLU (P, M, R, rkprofile, LuTag);
+
+ FFLAS::fflas_delete (Q);
+ FFLAS::fflas_delete (P);
+ return R;
+}
+template <class Field>
+inline size_t ColumnRankProfile (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ size_t* &rkprofile,
+ const FFPACK_LU_TAG LuTag){
+
+
+ size_t *P = FFLAS::fflas_new<size_t>(M);
+ size_t *Q = FFLAS::fflas_new<size_t>(N);
+ size_t R;
+
+ if (LuTag == FfpackSlabRecursive){
+ R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Q);
+ } else
+ R = PLUQ (F, FFLAS::FflasNonUnit, M, N, A, lda, P, Q);
+
+ rkprofile = FFLAS::fflas_new<size_t> (R);
+
+ RankProfileFromLU (Q, N, R, rkprofile, LuTag);
+
+ FFLAS::fflas_delete (P);
+ FFLAS::fflas_delete (Q);
+ return R;
+}
+
+inline void RankProfileFromLU (const size_t* Q, const size_t N, const size_t R,
+ size_t* rkprofile, const FFPACK_LU_TAG LuTag){
+
+ if (LuTag == FfpackSlabRecursive)
+ std::copy(Q, Q+R, rkprofile);
+ else {
+ size_t * RP = FFLAS::fflas_new<size_t>(N);
+ for (size_t i=0;i < N; ++i)
+ RP [i] = i;
+ for (size_t i=0; i<N; ++i)
+ if (Q[i] != i)
+ std::swap (RP [i], RP [Q [i]]);
+
+ std::copy(RP, RP+R, rkprofile);
+ std::sort (rkprofile, rkprofile + R);
+ FFLAS::fflas_delete(RP);
+ }
+}
+
+inline size_t LeadingSubmatrixRankProfiles (const size_t M, const size_t N, const size_t R,
+ const size_t LSm, const size_t LSn,
+ const size_t* P, const size_t* Q,
+ size_t* RRP, size_t* CRP){
+ size_t LSr=0; // rank of the LSm x LSn leading submatrix
+
+ size_t* MathP = FFLAS::fflas_new<size_t>(M);
+ size_t* MathQ = FFLAS::fflas_new<size_t>(N);
+
+ LAPACKPerm2MathPerm (MathP, P, M);
+ LAPACKPerm2MathPerm (MathQ, Q, N);
+ for (size_t i = 0; i < R; i++)
+ if (MathP[i] < LSm && MathQ[i] < LSn){
+ RRP [LSr] = MathP[i];
+ CRP [LSr] = MathQ[i];
+ LSr++;
+ }
+ std::sort (RRP, RRP+LSr);
+ std::sort (CRP, CRP+LSr);
+ FFLAS::fflas_delete(MathP);
+ FFLAS::fflas_delete(MathQ);
+ return LSr;
+
+}
+
+
+template <class Field>
+size_t RowRankProfileSubmatrixIndices (const Field& F,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A,
+ const size_t lda,
+ size_t*& rowindices,
+ size_t*& colindices,
+ size_t& R)
+{
+ size_t *P = FFLAS::fflas_new<size_t>(N);
+ size_t *Q = FFLAS::fflas_new<size_t>(M);
+
+ R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q);
+ rowindices = FFLAS::fflas_new<size_t>(M);
+ colindices = FFLAS::fflas_new<size_t>(N);
+ for (size_t i=0; i<R; ++i){
+ rowindices [i] = Q [i];
+ }
+ for (size_t i=0; i<N; ++i)
+ colindices [i] = i;
+ size_t tmp;
+ for (size_t i=0; i<R; ++i){
+ if (i != P[i]){
+ tmp = colindices[i];
+ colindices[i] = colindices[P[i]];
+ colindices[P[i]] = tmp;
+ }
+ }
+
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ return R;
+}
+
+template <class Field>
+size_t ColRankProfileSubmatrixIndices (const Field& F,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A,
+ const size_t lda,
+ size_t*& rowindices,
+ size_t*& colindices,
+ size_t& R)
+{
+ size_t *P = FFLAS::fflas_new<size_t>(M);
+ size_t *Q = FFLAS::fflas_new<size_t>(N);
+
+ R = LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasTrans, M, N, A, lda, P, Q);
+ rowindices = FFLAS::fflas_new<size_t>(M);
+ colindices = FFLAS::fflas_new<size_t>(N);
+ for (size_t i=0; i<R; ++i)
+ colindices [i] = Q [i];
+
+ for (size_t i=0; i<N; ++i)
+ rowindices [i] = i;
+
+ size_t tmp;
+ for (size_t i=0; i<R; ++i){
+ if (i != P[i]){
+ tmp = rowindices[i];
+ rowindices[i] = rowindices[P[i]];
+ rowindices[P[i]] = tmp;
+ }
+ }
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ return R;
+}
+
+template <class Field>
+size_t RowRankProfileSubmatrix (const Field& F,
+ const size_t M, const size_t N,
+ typename Field::Element_ptr A,
+ const size_t lda,
+ typename Field::Element_ptr& X, size_t& R)
+{
+
+ size_t * rowindices, * colindices;
+
+ typename Field::Element_ptr A2 = FFLAS::fflas_new (F, M, N) ;
+ FFLAS::fassign(F,M,N,A,lda,A2,N);
+
+ RowRankProfileSubmatrixIndices (F, M, N, A2, N, rowindices, colindices, R);
+
+ X = FFLAS::fflas_new (F, R, R);
+ for (size_t i=0; i<R; ++i)
+ for (size_t j=0; j<R; ++j)
+ F.assign (*(X + i*R + j), *(A + rowindices[i]*lda + colindices[j]));
+ FFLAS::fflas_delete (A2);
+ FFLAS::fflas_delete( rowindices);
+ FFLAS::fflas_delete( colindices);
+ return R;
+}
+
+template <class Field>
+size_t ColRankProfileSubmatrix (const Field& F, const size_t M, const size_t N,
+ typename Field::Element_ptr A, const size_t lda,
+ typename Field::Element_ptr& X, size_t& R)
+{
+
+ size_t * rowindices, * colindices;
+
+ typename Field::Element_ptr A2 = FFLAS::fflas_new (F, M, N);
+ FFLAS::fassign(F,M,N,A,lda,A2,N);
+
+ ColRankProfileSubmatrixIndices (F, M, N, A2, N, rowindices, colindices, R);
+
+ X = FFLAS::fflas_new (F, R, R);
+ for (size_t i=0; i<R; ++i)
+ for (size_t j=0; j<R; ++j)
+ F.assign (*(X + i*R + j), *(A + rowindices[i]*lda + colindices[j]));
+ FFLAS::fflas_delete (A2);
+ FFLAS::fflas_delete( colindices);
+ FFLAS::fflas_delete( rowindices);
+ return R;
+}
+
+template <class Field>
+typename Field::Element_ptr
+LQUPtoInverseOfFullRankMinor( const Field& F, const size_t rank,
+ typename Field::Element_ptr A_factors, const size_t lda,
+ const size_t* QtPointer,
+ typename Field::Element_ptr X, const size_t ldx)
+{
+
+ // upper entries are okay, just need to move up bottom ones
+ const size_t* srcRow = QtPointer;
+ for (size_t row=0; row<rank; row++, srcRow++)
+ if (*srcRow != row) {
+ typename Field::Element_ptr oldRow = A_factors + (*srcRow) * lda;
+ typename Field::Element_ptr newRow = A_factors + row * lda;
+ for (size_t col=0; col<row; col++, oldRow++, newRow++)
+ F.assign(*newRow, *oldRow);
+ }
+
+ // X <- (Qt.L.Q)^(-1)
+ //invL( F, rank, A_factors, lda, X, ldx);
+ ftrtri (F, FFLAS::FflasLower, FFLAS::FflasUnit, rank, A_factors, lda);
+ FFLAS::fassign(F,rank,rank,X,ldx,A_factors,lda);
+
+ // X = U^-1.X
+ ftrsm( F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans,
+ FFLAS::FflasNonUnit, rank, rank, F.one, A_factors, lda, X, ldx);
+
+ return X;
+
+}
+
+} // namespace FFPACK
+
+#endif // __FFLASFFPACK_ffpack_rank_profiles_INL
diff --git a/fflas-ffpack/field/Makefile.am b/fflas-ffpack/field/Makefile.am
index 1173e93..42ed9a5 100644
--- a/fflas-ffpack/field/Makefile.am
+++ b/fflas-ffpack/field/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -24,19 +24,17 @@
pkgincludesubdir=$(pkgincludedir)/field
-pkgincludesub_HEADERS= \
- modular-balanced.h \
- modular-balanced-double.h \
- modular-balanced-float.h \
- modular-balanced-int32.h \
- modular-balanced-int64.h \
- modular-positive.h \
- modular-float.h \
- modular-double.h \
- modular-int32.h \
- modular-int64.h \
- nonzero-randiter.h \
- modular-randiter.h \
- unparametric.h \
- field-general.h
+RNS=rns.h \
+ rns.inl \
+ rns-double.h \
+ rns-double-elt.h \
+ rns-double.inl \
+ rns-integer.h \
+ rns-integer-mod.h \
+ modular-extended.h
+pkgincludesub_HEADERS= \
+ field-traits.h \
+ $(RNS)
+
+EXTRA_DIST=field.doxy
diff --git a/fflas-ffpack/field/Makefile.in b/fflas-ffpack/field/Makefile.in
deleted file mode 100644
index 0088f04..0000000
--- a/fflas-ffpack/field/Makefile.in
+++ /dev/null
@@ -1,561 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# adapted from LinBox configuration
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = fflas-ffpack/field
-DIST_COMMON = $(pkgincludesub_HEADERS) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__uninstall_files_from_dir = { \
- test -z "$$files" \
- || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
- || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
- $(am__cd) "$$dir" && rm -f $$files; }; \
- }
-am__installdirs = "$(DESTDIR)$(pkgincludesubdir)"
-HEADERS = $(pkgincludesub_HEADERS)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-pkgincludesubdir = $(pkgincludedir)/field
-pkgincludesub_HEADERS = \
- modular-balanced.h \
- modular-balanced-double.h \
- modular-balanced-float.h \
- modular-balanced-int32.h \
- modular-balanced-int64.h \
- modular-positive.h \
- modular-float.h \
- modular-double.h \
- modular-int32.h \
- modular-int64.h \
- nonzero-randiter.h \
- modular-randiter.h \
- unparametric.h \
- field-general.h
-
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps fflas-ffpack/field/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps fflas-ffpack/field/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-install-pkgincludesubHEADERS: $(pkgincludesub_HEADERS)
- @$(NORMAL_INSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(pkgincludesubdir)" || exit 1; \
- fi; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; \
- done | $(am__base_list) | \
- while read files; do \
- echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludesubdir)" || exit $$?; \
- done
-
-uninstall-pkgincludesubHEADERS:
- @$(NORMAL_UNINSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- dir='$(DESTDIR)$(pkgincludesubdir)'; $(am__uninstall_files_from_dir)
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(HEADERS)
-installdirs:
- for dir in "$(DESTDIR)$(pkgincludesubdir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am: install-pkgincludesubHEADERS
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-pkgincludesubHEADERS
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
- clean-libtool ctags distclean distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-pkgincludesubHEADERS install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags uninstall uninstall-am uninstall-pkgincludesubHEADERS
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/fflas-ffpack/field/field-traits.h b/fflas-ffpack/field/field-traits.h
new file mode 100644
index 0000000..9c68ac4
--- /dev/null
+++ b/fflas-ffpack/field/field-traits.h
@@ -0,0 +1,348 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file field/field-traits.h
+ * @brief Field Traits
+ */
+
+#ifndef __FFLASFFPACK_field_field_traits_H
+#define __FFLASFFPACK_field_field_traits_H
+
+#include <type_traits> // CXX11
+#include "fflas-ffpack/field/rns-double-elt.h"
+
+// ----- Forward declarations
+
+#include "recint/rmint.h"
+#include "givaro/modular-general.h"
+#include "givaro/zring.h"
+
+namespace RecInt {
+
+ template<size_t K>
+ class rint;
+
+ template<size_t K>
+ class ruint;
+
+}
+
+namespace Givaro {
+
+ template<class T>
+ class ModularBalanced;
+
+ template<class T>
+ class Montgomery;
+
+}
+
+namespace FFPACK {
+
+ template<class T>
+ class RNSInteger;
+
+ template<class T>
+ class RNSIntegerMod;
+
+}
+
+namespace FFLAS { /* Categories */
+
+ //! Traits and categories will need to be placed in a proper file later
+
+ namespace FieldCategories {
+
+ // Classify
+ //! generic ring.
+ struct GenericTag{};
+ //! This is a modular field like e.g. <code>Modular<T></code> or <code>ModularBalanced<T></code>
+ struct ModularTag{};
+ //! If the field uses a representation with infix operators
+ struct UnparametricTag{};
+ }
+
+ //! Specifies the mode of action for an algorithm w.r.t. its field
+ //!
+ namespace ModeCategories {
+ //! No specific mode of action: use standard field operations
+ struct DefaultTag{};
+
+ //! Use standard field operations, but keeps track of bounds on input and output
+ struct DefaultBoundedTag{};
+
+ //! Force conversion to appropriate element type of ElementCategory T.
+ //! e.g.
+ //! - ConvertTo<ElementCategories::MachineFloatTag> tries conversion
+ //! of Modular<int> to Modular<double>
+ //! - ConvertTo<ElementCategories::FixedPrecIntTag> tries conversion
+ //! of Modular<Integer> to Modular<RecInt<K> >
+ //! - ConvertTo<ElementCategories::ArbitraryPrecIntTag> tries conversion
+ //! of Modular<Integer> to RNSInteger
+ //! .
+ template<class T>
+ struct ConvertTo{};
+
+ //! Performs field operations with delayed mod reductions. Ensures result is reduced.
+ struct DelayedTag{};
+
+ //! Performs field operations with delayed mod only when necessary. Result may not be reduced.
+ struct LazyTag{};
+ }
+
+ namespace ElementCategories {
+ //! default is generic
+ struct GenericTag{};
+ //! float or double
+ struct MachineFloatTag{};
+ //! short, int, long, long long, and unsigned variants
+ struct MachineIntTag{};
+ //! Fixed precision integers above machine precision: Givaro::recInt
+ struct FixedPrecIntTag{};
+ //! Arbitrary precision integers: GMP
+ struct ArbitraryPrecIntTag{};
+ //! Representation in a Residue Number System
+ struct RNSElementTag{};
+ //- If it can support SIMD operations (ie \c double or \c int32_t, etc)
+ // struct SIMDTag : public GenericTag{};
+ }
+
+} // FFLAS
+
+namespace FFLAS { /* Traits */
+
+ /*! ElementTraits
+ */
+ template <class Element>
+ struct ElementTraits {typedef typename ElementCategories::GenericTag value;};
+
+ template<> struct ElementTraits<float> {typedef ElementCategories::MachineFloatTag value;};
+ template<> struct ElementTraits<double> {typedef ElementCategories::MachineFloatTag value;};
+ template<> struct ElementTraits<int8_t> {typedef ElementCategories::MachineIntTag value;};
+ template<> struct ElementTraits<int16_t> {typedef ElementCategories::MachineIntTag value;};
+ template<> struct ElementTraits<int32_t> {typedef ElementCategories::MachineIntTag value;};
+ template<> struct ElementTraits<int64_t> {typedef ElementCategories::MachineIntTag value;};
+ template<> struct ElementTraits<uint8_t> {typedef ElementCategories::MachineIntTag value;};
+ template<> struct ElementTraits<uint16_t> {typedef ElementCategories::MachineIntTag value;};
+ template<> struct ElementTraits<uint32_t> {typedef ElementCategories::MachineIntTag value;};
+ template<> struct ElementTraits<uint64_t> {typedef ElementCategories::MachineIntTag value;};
+ template<>
+ struct ElementTraits<Givaro::Integer> {typedef ElementCategories::ArbitraryPrecIntTag value;};
+ template<size_t K>
+ struct ElementTraits<RecInt::rint<K> > {typedef ElementCategories::FixedPrecIntTag value;};
+ template<size_t K>
+ struct ElementTraits<RecInt::ruint<K> > {typedef ElementCategories::FixedPrecIntTag value;};
+ template<size_t K, int MG>
+ struct ElementTraits<RecInt::rmint<K, MG> >{typedef ElementCategories::FixedPrecIntTag value;};
+ template<>
+ struct ElementTraits<FFPACK::rns_double_elt>{typedef ElementCategories::RNSElementTag value;};
+
+
+ /*! ModeTraits
+ */
+ template <class Field>
+ struct ModeTraits {typedef typename ModeCategories::DefaultTag value;};
+
+ template <typename Element, typename Compute>
+ struct ModeTraits<Givaro::Modular<Element,Compute> >{typedef typename ModeCategories::DelayedTag value;};
+
+ template <> template<typename Compute> struct ModeTraits<Givaro::Modular<int8_t,Compute> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+ template <> template<typename Compute> struct ModeTraits<Givaro::Modular<int16_t,Compute> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+ template <> template<typename Compute> struct ModeTraits<Givaro::Modular<int32_t,Compute> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+ template <> template<typename Compute> struct ModeTraits<Givaro::Modular<uint8_t,Compute> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+ template <> template<typename Compute> struct ModeTraits<Givaro::Modular<uint16_t,Compute> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+ template <> template<typename Compute> struct ModeTraits<Givaro::Modular<uint32_t,Compute> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+
+#ifndef INTEGER_NO_RNS
+ template <> template<typename Compute> struct ModeTraits<Givaro::Modular<Givaro::Integer,Compute> > {typedef typename ModeCategories::ConvertTo<ElementCategories::RNSElementTag> value;};
+#endif
+
+ template <typename Element>
+ struct ModeTraits<Givaro::ModularBalanced<Element> >{typedef typename ModeCategories::DelayedTag value;};
+
+ template <> struct ModeTraits<Givaro::ModularBalanced<int8_t> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+ template <> struct ModeTraits<Givaro::ModularBalanced<int16_t> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+ template <> struct ModeTraits<Givaro::ModularBalanced<int32_t> > {typedef typename ModeCategories::ConvertTo<ElementCategories::MachineFloatTag> value;};
+
+#ifndef INTEGER_NO_RNS
+ template <> struct ModeTraits<Givaro::ModularBalanced<Givaro::Integer> > {typedef typename ModeCategories::ConvertTo<ElementCategories::RNSElementTag> value;};
+ template <> struct ModeTraits<Givaro::ZRing<Givaro::Integer> > {typedef typename ModeCategories::ConvertTo<ElementCategories::RNSElementTag> value;};
+#endif
+
+ // These ones are here temporarily, to ensure
+ // In the long term ZRing should be in DefaultTag, and forced to be in DefaultBoundedTag be the caller. However this would prevent these rings to use Winograd's algorithm (extensive use of bounded helpers) in the current implementation. Needs work.
+ template <> struct ModeTraits<Givaro::ZRing<float> > {typedef typename ModeCategories::DefaultBoundedTag value;};
+ template <> struct ModeTraits<Givaro::ZRing<double> > {typedef typename ModeCategories::DefaultBoundedTag value;};
+ template <class T> struct ModeTraits<Givaro::Montgomery<T> > {typedef typename ModeCategories::DefaultBoundedTag value;};
+
+ /*! FieldTrait
+ */
+ template <class Field>
+ struct FieldTraits {
+ typedef typename FieldCategories::GenericTag category;
+ // typedef false_type balanced ;
+ static const bool balanced = false ;
+ };
+
+
+ // RecInt
+ template<size_t K>
+ struct FieldTraits<Givaro::ZRing<RecInt::ruint<K> > > {
+ //typedef FieldCategories::FloatingPointConvertibleTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+
+ // Modular <double|float>
+ // ModularBalanced <double|float>
+ template<class Element>
+ struct FieldTraits<Givaro::Modular<Element> > {
+ typedef FieldCategories::ModularTag category;
+ static const bool balanced = false ;
+ };
+
+ template<class Element>
+ struct FieldTraits<Givaro::ModularBalanced<Element> > {
+ typedef FieldCategories::ModularTag category;
+ static const bool balanced = true ;
+ };
+
+ // ZRing< float|double >
+ template<>
+ struct FieldTraits<Givaro::ZRing<double> > {
+// typedef FieldCategories::FloatingPointTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+ template<>
+ struct FieldTraits<Givaro::ZRing<float> > {
+// typedef FieldCategories::FloatingPointTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+
+ // ZRing< intX >
+ template<>
+ struct FieldTraits<Givaro::ZRing<int16_t> > {
+// typedef FieldCategories::FloatingPointConvertibleTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+ template<>
+ struct FieldTraits<Givaro::ZRing<uint16_t> > {
+// typedef FieldCategories::FloatingPointConvertibleTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+ template<>
+ struct FieldTraits<Givaro::ZRing<int32_t> > {
+// typedef FieldCategories::FloatingPointConvertibleTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+ template<>
+ struct FieldTraits<Givaro::ZRing<uint32_t> > {
+// typedef FieldCategories::FloatingPointConvertibleTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+ template<>
+ struct FieldTraits<Givaro::ZRing<int64_t> > {
+// typedef FieldCategories::FloatingPointConvertibleTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+ template<>
+ struct FieldTraits<Givaro::ZRing<uint64_t> > {
+// typedef FieldCategories::FloatingPointConvertibleTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+
+ // ZRing<Integer>
+ template<>
+ struct FieldTraits<Givaro::ZRing<Givaro::Integer> >
+ {
+// typedef FieldCategories::MultiPrecisionTag value;
+ typedef FieldCategories::UnparametricTag category;
+ static const bool balanced = false ;
+ };
+
+ // RNSInteger
+ template<typename T>
+ struct FieldTraits<FFPACK::RNSInteger<T> > {
+// typedef FieldCategories::MultiPrecisionTag value;
+ typedef FieldCategories::UnparametricTag category;
+ // typedef true_type balanced ;
+ static const bool balanced = false ;
+ };
+ // RNSIntegerMod
+ template<typename T>
+ struct FieldTraits<FFPACK::RNSIntegerMod<T> >{
+// typedef FieldCategories::MultiPrecisionTag value;
+ typedef FieldCategories::ModularTag category;
+ // typedef true_type balanced ;
+ static const bool balanced = false ;
+ };
+
+
+} // FFLAS
+
+namespace FFLAS { /* associatedDelayedField */
+
+ template <class Field>
+ struct associatedDelayedField{
+ typedef Field field;
+ typedef Field& type; // reference to avoid copying heavy fields
+ };
+ template <typename T,typename X>
+ struct associatedDelayedField<const Givaro::Modular<T,X>> {
+ typedef Givaro::ZRing<T> field;
+ typedef Givaro::ZRing<T> type;
+ };
+ template <typename T>
+ struct associatedDelayedField<const Givaro::ModularBalanced<T>> {
+ typedef Givaro::ZRing<T> field;
+ typedef Givaro::ZRing<T> type;
+ };
+ template <typename T>
+ struct associatedDelayedField<const Givaro::ZRing<T>> {
+ typedef Givaro::ZRing<T> field;
+ typedef Givaro::ZRing<T> type;
+ };
+ template <typename RNS>
+ struct associatedDelayedField<const FFPACK::RNSIntegerMod<RNS>> {
+ typedef FFPACK::RNSInteger<RNS> field;
+ typedef FFPACK::RNSInteger<RNS> type;
+ };
+
+} // FFLAS
+
+#endif // __FFLASFFPACK_field_field_traits_H
+
diff --git a/fflas-ffpack/field/field.doxy b/fflas-ffpack/field/field.doxy
new file mode 100644
index 0000000..c17e4d8
--- /dev/null
+++ b/fflas-ffpack/field/field.doxy
@@ -0,0 +1,36 @@
+// Copyright (c) 2014 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+
+
+/** \ingroup fflas-ffpack
+ * \defgroup field FFLAS-FFPACK fields
+ *
+ * \brief fields in the FFLAS-FFPACK library
+ *
+ * Unparametric/Random elements
+ *
+ * @todo biblio
+ *
+ */
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/field/modular-balanced-double.h b/fflas-ffpack/field/modular-balanced-double.h
deleted file mode 100644
index 742677a..0000000
--- a/fflas-ffpack/field/modular-balanced-double.h
+++ /dev/null
@@ -1,447 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* field/modular-balanced-double.h
- * Copyright (C) 2003 Pascal Giorgi
- * 2005 Clement Pernet
- * Written by Pascal Giorgi <pascal.giorgi at ens-lyon.fr>
- * and Clement Pernet <Clement.Pernet at imag.fr>
- * and Brice Boyer <bboyer at imag.fr>
- *
- * ------------------------------------
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-/*! @file field/modular-balanced-double.h
- * @ingroup field
- * @brief Balanced representation of <code>Z/mZ</code> over \c double .
- */
-
-#ifndef __FFLASFFPACK_modular_balanced_double_H
-#define __FFLASFFPACK_modular_balanced_double_H
-
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-#include "fflas-ffpack/utils/debug.h"
-#include <float.h>
-
-namespace FFPACK
-{
-
- //! it is forbiden to have char 2
- template<>
- class ModularBalanced <double>{
-
- protected:
- double modulus;
- double half_mod;
- double mhalf_mod;
- unsigned long lmodulus;
-
- public:
- typedef double Element;
- typedef unsigned long FieldInt;
- typedef ModularBalancedRandIter<double> RandIter;
- typedef NonzeroRandIter<ModularBalanced<double>, RandIter > NonZeroRandIter;
-
- const Element one ;
- const Element zero ;
- const Element mOne ; // except in char 2
-
- static const bool balanced = true;
-
- ModularBalanced (int32_t p, int exp = 1) :
- modulus((double)p),
- half_mod (double((p-1)/2)),
- mhalf_mod(half_mod-modulus+1),
- lmodulus ((unsigned int)p)
- ,one(1),zero(0),mOne(-1)
- {
-#ifdef DEBUG
- if(modulus <= 1)
- throw FFPACK::Failure(__func__,__FILE__,
- __LINE__,
- "modulus must be > 1");
- if( exp != 1 ) throw Failure(__func__,__FILE__,
- __LINE__,
- "exponent must be 1");
- if (modulus > getMaxModulus())
- throw Failure (__func__,__FILE__,
- __LINE__,
- "modulus is too big");
-#endif
- }
-
- ModularBalanced (double p) :
- modulus (p),
- half_mod (double((int)(p-1)/2)),
- mhalf_mod(half_mod-modulus+1),
- lmodulus ((unsigned long)p)
- ,one(1),zero(0),mOne(-1)
- {
-#ifdef DEBUG
- if (modulus <= 1)
- throw Failure(__func__,__FILE__,
- __LINE__,
- "modulus must be > 1");
- if (modulus > getMaxModulus())
- throw Failure (__func__,__FILE__,
- __LINE__,
- "modulus is too big");
-#endif
- }
-
- ModularBalanced (unsigned long p) :
- modulus ((double)p),
- half_mod (double((unsigned long)(p-1)/2)),
- mhalf_mod(half_mod-modulus+1),
- lmodulus (p)
- ,one(1),zero(0),mOne(-1)
- {
-#ifdef DEBUG
- if (modulus <= 1)
- throw Failure(__func__,__FILE__,
- __LINE__,
- "modulus must be > 1");
- if (modulus > getMaxModulus())
- throw Failure (__func__,__FILE__,
- __LINE__,
- "modulus is too big");
-#endif
- }
-
- ModularBalanced (long p) :
- modulus ((double)p),
- half_mod (double((unsigned long)(p-1)/2)),
- mhalf_mod(half_mod-modulus+1),
- lmodulus ((unsigned int)p)
- ,one(1),zero(0),mOne(-1)
- {
-#ifdef DEBUG
- if (modulus <= 1)
- throw Failure(__func__,__FILE__,
- __LINE__,
- "modulus must be > 1");
- if (modulus > getMaxModulus())
- throw Failure (__func__,__FILE__,
- __LINE__,
- "modulus is too big");
-#endif
- }
-
-
- ModularBalanced<double>(const ModularBalanced<double>& mf) :
- modulus(mf.modulus), half_mod(mf.half_mod)
- ,mhalf_mod(mf.mhalf_mod), lmodulus(mf.lmodulus)
- ,one(mf.one),zero(mf.zero),mOne(mf.mOne)
- {}
-
- ModularBalanced<Element> & assign(const ModularBalanced<Element> &F)
- {
- modulus = F.modulus;
- half_mod = F.half_mod;
- mhalf_mod = F.mhalf_mod;
- lmodulus = F.lmodulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-
-#if 1
- const ModularBalanced<double> &operator=(const ModularBalanced<double> &F)
- {
- modulus = F.modulus;
- half_mod = F.half_mod;
- mhalf_mod = F.mhalf_mod;
- lmodulus = F.lmodulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-#endif
-
-
- FieldInt &cardinality (FieldInt &c) const
- {
- return c = (FieldInt) modulus;
- }
-
- FieldInt cardinality () const
- {
- return (FieldInt) modulus;
- }
-
- unsigned long characteristic() const
- {
- return lmodulus ;
- }
-
- FieldInt &characteristic (FieldInt &c) const
- {
- return c = (FieldInt) modulus;
- }
-
- unsigned long &convert (unsigned long &x, const Element &y) const
- {
- return x = (unsigned long)y;
- }
-
- double &convert (double &x, const Element& y) const
- {
- return x=y;
- }
-
- float &convert (float &x, const Element& y) const
- {
- return x=float(y);
- }
-
- std::ostream &write (std::ostream &os) const
- {
- return os << "balanced double mod " << (long int)modulus;
- }
-
- std::istream &read (std::istream &is)
- {
- is >> modulus;
-#ifdef DEBUG
- if(modulus <= 1)
- throw Failure (__func__,
- __LINE__,
- "modulus must be > 1");
- if(modulus > getMaxModulus())
- throw Failure (__func__,
- __LINE__,
- "modulus is too big");
-#endif
- return is;
- }
-
- std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << (long) x;
- }
-
- std::istream &read (std::istream &is, Element &x) const
- {
- double tmp;
- is >> tmp;
- init(x,tmp);
- return is;
- }
-
- Element &init (Element &x, const unsigned long &y) const
- {
- x = Element(y % lmodulus);
- if (x > half_mod) return x -= modulus;
- else if (x<mhalf_mod) return x += modulus;
- else return x;
- }
-
- Element& init(Element& x, const double y) const
- {
-
- x = fmod (y, modulus);
- // x =(Element) ((long int)y%(long int)(modulus));
- if (x < mhalf_mod) return x += modulus;
- if (x > half_mod) return x -= modulus;
- return x;
- }
-
- Element& init(Element& x) const
- {
- return x = 0;
- }
-
- template<class T>
- Element& init(Element& x, const T y) const
- {
- return init(x,double(y));
- }
-
-
- Element& assign(Element& x, const Element& y) const
- {
- return x = y;
- }
-
- /*! Tests equality.
- * @param x element
- * @param y element
- * @warning \c x and \c y are supposed to be reduced.
- */
- inline bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- inline bool isZero (const Element &x) const
- {
- return x == 0.;
- }
-
- inline bool isOne (const Element &x) const
- {
- return x == 1.;
- }
-
- inline Element &add (Element &x,
- const Element &y,
- const Element &z) const
- {
- x = y + z;
- if ( x > half_mod ) return x -= modulus;
- if ( x < mhalf_mod ) return x += modulus;
- return x;
- }
-
- inline Element &sub (Element &x,
- const Element &y,
- const Element &z) const
- {
- x = y - z;
- if (x > half_mod) return x -= modulus;
- if (x < mhalf_mod) return x += modulus;
- return x;
- }
-
- inline Element &mul (Element &x,
- const Element &y, const Element &z) const
- {
- x = y * z;
- return init (x,x);
- }
-
- inline Element &div (Element &x,
- const Element &y, const Element &z) const
- {
- Element temp;
- inv (temp, z);
- return mul (x, y, temp);
- }
-
- inline Element &neg (Element &x,
- const Element &y) const
- {
- return x = -y;
- }
-
- inline Element &inv (Element &x, const Element &y) const
- {
- // The extended Euclidean algoritm
- int x_int, y_int, q, tx, ty, temp;
- x_int = int (modulus);
- y_int = (y < 0.) ? int(y + modulus) : int(y);
- tx = 0;
- ty = 1;
-
- while (y_int != 0) {
- // always: gcd (modulus,residue) = gcd (x_int,y_int)
- // sx*modulus + tx*residue = x_int
- // sy*modulus + ty*residue = y_int
- q = x_int / y_int; // integer quotient
- temp = y_int; y_int = x_int - q * y_int;
- x_int = temp;
- temp = ty; ty = tx - q * ty;
- tx = temp;
- }
-
- if (tx > half_mod ) return x = tx - modulus;
- else if ( tx < mhalf_mod ) return x = tx + modulus;
- else return x = (double) tx;
- }
-
- inline Element &axpy (Element &r,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- r = a * x + y;
- return init (r, r);
- }
-
- inline Element &addin (Element &x, const Element &y) const
- {
- x += y;
- if ( x > half_mod ) return x -= modulus;
- if ( x < mhalf_mod ) return x += modulus;
- return x;
- }
-
- inline Element &subin (Element &x, const Element &y) const
- {
- x -= y;
- if ( x > half_mod ) return x -= modulus;
- if ( x < mhalf_mod ) return x += modulus;
- return x;
- }
-
- inline Element &mulin (Element &x, const Element &y) const
- {
- return mul(x,x,y);
- }
-
- inline Element &divin (Element &x, const Element &y) const
- {
- return div(x,x,y);
- }
-
- inline Element &negin (Element &x) const
- {
- return x = - x;
- }
-
- inline Element &invin (Element &x) const
- {
- return inv (x, x);
- }
-
- inline Element &axpyin (Element &r, const Element &a, const Element &x) const
- {
- r += a * x;
- return init (r, r);
- }
-
- static inline double getMaxModulus()
- {
- return 67108864.0; // 2^26
- // return 1 << (DBL_MANT_DIG >> 1); // 2^(DBL_MANT_DIG/2)
- // FFLASFFPACK_check(94906266LL*94906267LL>9007199254740991LL);
- // FFLASFFPACK_check(94906265LL*94906266LL<9007199254740991LL);
- // return 189812531 ;
- }
-
- };
-
-
-} // FFPACK
-
-// const double FFPACK::ModularBalanced<double>::one = 1;
-// const double FFPACK::ModularBalanced<double>::mOne = -1;
-// const double FFPACK::ModularBalanced<double>::zero = 0;
-
-#include "field-general.h"
-
-#endif // __FFLASFFPACK_modular_balanced_double_H
-
diff --git a/fflas-ffpack/field/modular-balanced-float.h b/fflas-ffpack/field/modular-balanced-float.h
deleted file mode 100644
index b3c416d..0000000
--- a/fflas-ffpack/field/modular-balanced-float.h
+++ /dev/null
@@ -1,486 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* field/modular-balanced-float.h
- * Copyright (C) 2003 Pascal Giorgi
- * 2005,2008 Clement Pernet
- * Written by Clement Pernet <clement.pernet at gmail.com>
- * Pascal Giorgi <pascal.giorgi at ens-lyon.fr>
- * Modified Brice Boyer <bboyer at imag.fr>
- * ------------------------------------
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-/*! @file field/modular-balanced-float.h
- * @ingroup field
- * @brief Balanced representation of <code>Z/mZ</code> over \c float .
- */
-
-#ifndef __FFLASFFPACK_modular_balanced_float_H
-#define __FFLASFFPACK_modular_balanced_float_H
-
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-#include "fflas-ffpack/utils/debug.h"
-#include <float.h>
-
-namespace FFPACK {
-
- template<>
- class ModularBalanced <float>{
-
- public:
- typedef float Element;
-
- protected:
- Element modulus;
- Element half_mod;
- Element mhalf_mod;
- unsigned long lmodulus;
-
-
- public:
- typedef unsigned long FieldInt;
- typedef ModularBalancedRandIter<float> RandIter;
- typedef NonzeroRandIter<ModularBalanced<float>, RandIter> NonZeroRandIter;
-
- const Element one ;
- const Element zero ;
- const Element mOne ;
-
- static const bool balanced = true ;
-
- ModularBalanced (int32_t p, int exp = 1) :
- modulus((Element)p),
- half_mod( Element((p-1)/2)),
- mhalf_mod( (Element) half_mod-modulus+1),
- lmodulus ((unsigned int)p)
- ,one(1),zero(0),mOne(-1)
- {
-#ifdef DEBUG
- if(modulus <= 1)
- throw Failure(__func__,__FILE__,
- __LINE__,
- "modulus must be > 1");
- if( exp != 1 ) throw Failure(__func__,__FILE__,
- __LINE__,
- "exponent must be 1");
- if ((Element) modulus > (Element) getMaxModulus())
- throw Failure (__func__,__FILE__,
- __LINE__,
- "modulus is too big");
-#endif
- }
-
- ModularBalanced (Element p) :
- modulus (p),
- half_mod( Element((p-1)/2)),
- mhalf_mod( half_mod-p+1),
- lmodulus ((unsigned long)p)
- ,one(1),zero(0),mOne(-1)
- {
-#ifdef DEBUG
- if (modulus <= 1)
- throw Failure(__func__,__FILE__,
- __LINE__,
- "modulus must be > 1");
- if ((Element) modulus > (Element) getMaxModulus())
- throw Failure (__func__,__FILE__,
- __LINE__,
- "modulus is too big");
-#endif
- }
-
- ModularBalanced (double p) :
- modulus (Element(p)),
- half_mod( Element((p-1)/2)),
- mhalf_mod( half_mod-Element(p)+1),
- lmodulus ((unsigned long)p)
- ,one(1),zero(0),mOne(-1)
- {
-#ifdef DEBUG
- if (modulus <= 1)
- throw Failure(__func__,__FILE__,
- __LINE__,
- "modulus must be > 1");
- if ((Element) modulus > (Element) getMaxModulus())
- throw Failure (__func__,__FILE__,
- __LINE__,
- "modulus is too big");
-#endif
- }
-
-
- ModularBalanced (FieldInt p) :
- modulus((Element)p),
- half_mod( Element((p-1)/2)),
- mhalf_mod( (Element) half_mod-modulus+1),
- lmodulus(p)
- ,one(1),zero(0),mOne(-1)
- {
-#ifdef DEBUG
- if ((Element) modulus <= 1)
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if ((Element) modulus > (Element) getMaxModulus())
- throw Failure (__func__,__FILE__,
- __LINE__,
- "modulus is too big");
-#endif
- }
-
- ModularBalanced<float>(const ModularBalanced<float>& mf) :
- modulus(mf.modulus),
- half_mod(mf.half_mod),
- mhalf_mod(mf.mhalf_mod),
- lmodulus (mf.lmodulus)
- ,one(mf.one),zero(mf.zero),mOne(mf.mOne)
- {}
-
- ModularBalanced<Element> & assign(const ModularBalanced<Element> &F)
- {
- modulus = F.modulus;
- half_mod = F.half_mod;
- mhalf_mod = F.mhalf_mod;
- lmodulus = F.lmodulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-
-#if 1
- const ModularBalanced<float> &operator=(const ModularBalanced<float> &F)
- {
- modulus = F.modulus;
- half_mod = F.half_mod;
- mhalf_mod = F.mhalf_mod;
- lmodulus = F.lmodulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-#endif
-
- FieldInt &cardinality (FieldInt &c) const
- {
- return c = (FieldInt) modulus;
- }
-
- FieldInt cardinality () const
- {
- return (FieldInt) modulus;
- }
-
-
- long unsigned int &characteristic (long unsigned int &c) const
- {
- return c = (FieldInt) modulus;
- }
-
- FieldInt characteristic () const
- {
- return (FieldInt) modulus;
- }
-
- unsigned long &convert (unsigned long &x, const Element &y) const
- {
- // if ( y < 0. )
- // return x= (unsigned long) (y + modulus) ;
- // else
- return x = (unsigned long)y;
- }
-
- float &convert (float &x, const Element& y) const
- {
- return x=y;
- }
- double &convert (double &x, const Element& y) const
- {
- return x=y;
- }
-
- std::ostream &write (std::ostream &os) const
- {
- return os << "balanced float mod " << int(modulus);
- }
-
- std::istream &read (std::istream &is) {
- is >> modulus;
-#ifdef DEBUG
- if(modulus <= 1)
- throw Failure (__func__,
- __LINE__,
- "modulus must be > 1");
- if(modulus > getMaxModulus())
- throw Failure (__func__,
- __LINE__,
- "modulus is too big");
-#endif
- return is;
- }
-
- std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << int(x);
- }
-
- std::istream &read (std::istream &is, Element &x) const
- {
- float tmp;
- is >> tmp;
- init(x,tmp);
- return is;
- }
-
- Element &init (Element &x, const unsigned long &y) const
- {
- Element tmp = Element(y % lmodulus);
- if (tmp > half_mod)
-return x = tmp-modulus;
- else if (tmp<mhalf_mod)
-return x = tmp+modulus;
- else
-return x=tmp;
- }
-
- Element &init (Element &x, const long &y) const
- {
- // pas de pbème : float tout petit !
- Element tmp = Element(y % (long) lmodulus);
- if (tmp > half_mod)
-return x = tmp-modulus;
- else if (tmp<mhalf_mod)
-return x = tmp+modulus;
- else
-return x=tmp;
- }
-
- Element &init (Element &x, const int &y) const
- {
- // pas de pbème : float tout petit !
- Element tmp = Element(y % (long) lmodulus);
- if (tmp > half_mod)
- return x = tmp-modulus;
- else if (tmp<mhalf_mod)
- return x = tmp+modulus;
- else
- return x=tmp;
- }
-
- inline Element& init(Element& x, const double y ) const
- {
- x = (Element) fmod (y, double(modulus));
- if ( x > half_mod )
- return x -= modulus;
- else if ( x < mhalf_mod )
- return x += modulus;
- else
- return x ;
- }
-
- inline Element& init(Element& x, const Element y) const
- {
-
- x = fmodf (y, modulus);
-
- if ( x > half_mod )
- return x -= modulus;
- else if ( x < mhalf_mod )
- return x += modulus;
- else
- return x ;
- }
-
- inline Element& init(Element& x) const
- {
-
- return x=0 ;
- }
-
- inline Element& assign(Element& x, const Element& y) const
- {
- return x = y;
- }
-
- inline bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- inline bool isZero (const Element &x) const
- {
- return x == 0.;
- }
-
- inline bool isOne (const Element &x) const
- {
- return x == 1.;
- }
-
- inline Element &add (Element &x,
- const Element &y, const Element &z) const
- {
- x = y + z;
- if ( x > half_mod )
-return x -= modulus;
- if ( x < mhalf_mod )
-return x += modulus;
- else
-return x;
- }
-
- inline Element &sub (Element &x,
- const Element &y, const Element &z) const
- {
- x = y - z;
- if (x > half_mod)
-return x -= modulus;
- if (x < mhalf_mod)
-return x += modulus;
- else
-return x;
- }
-
- inline Element &mul (Element &x,
- const Element &y, const Element &z) const
- {
- x = y * z;
- return init (x,x);
- }
-
- //! @todo remove temp
- inline Element &div (Element &x, const Element &y, const Element &z) const
- {
- Element temp ;
- inv (temp, z);
- return mul (x, y, temp);
- }
-
- inline Element &neg (Element &x, const Element &y) const
- {
- return x = -y;
- }
-
- inline Element &inv (Element &x, const Element &y) const
- {
- // The extended Euclidean algoritm
- int x_int, y_int, q, tx, ty, temp;
- x_int = int (modulus);
- y_int = (y < 0.) ? int(y + modulus) : int(y);
- tx = 0;
- ty = 1;
-
- while (y_int != 0) {
- // always: gcd (modulus,residue) = gcd (x_int,y_int)
- // sx*modulus + tx*residue = x_int
- // sy*modulus + ty*residue = y_int
- q = x_int / y_int; // integer quotient
- temp = y_int; y_int = x_int - q * y_int;
- x_int = temp;
- temp = ty; ty = tx - q * ty;
- tx = temp;
- }
-
- if (tx > half_mod )
- return x = (Element)tx - modulus;
- else if ( tx < mhalf_mod )
- return x = (Element)tx + modulus;
- return x = (Element) tx;
- }
-
- inline Element &axpy (Element &r,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- r = a * x + y;
- return init (r, r);
- }
-
- inline Element &addin (Element &x, const Element &y) const
- {
- x += y;
- if ( x > half_mod )
- return x -= modulus;
- else if ( x < mhalf_mod )
- return x += modulus;
- return x;
- }
-
- inline Element &subin (Element &x, const Element &y) const
- {
- x -= y;
- if ( x > half_mod )
-return x -= modulus;
- else if ( x < mhalf_mod )
-return x += modulus;
- return x;
- }
-
- inline Element &mulin (Element &x, const Element &y) const
- {
- return mul(x,x,y);
- }
-
- inline Element &divin (Element &x, const Element &y) const
- {
- return div(x,x,y);
- }
-
- inline Element &negin (Element &x) const
- {
- return x = -x;
- }
-
- inline Element &invin (Element &x) const
- {
- return inv (x, x);
- }
-
- inline Element &axpyin (Element &r, const Element &a, const Element &x) const
- {
- r += a * x;
- return init (r, r);
- }
-
- static inline Element getMaxModulus()
- {
- // FFLASFFPACK_check (4095*4096<16777215)
- // FFLASFFPACK_check (4097*4096>16777215)
- // return 1 << (FLT_MANT_DIG >> 1); // 2^(FLT_MANT_DIG/2)
- return 8191 ;
- }
-
- };
-
-} // FFPACK
-
-#include "field-general.h"
-
-// const float FFPACK::ModularBalanced<float>::one = 1;
-// const float FFPACK::ModularBalanced<float>::mOne = -1;
-// const float FFPACK::ModularBalanced<float>::zero = 0;
-
-
-#endif // __FFLASFFPACK_modular_balanced_double_H
-
diff --git a/fflas-ffpack/field/modular-balanced-int32.h b/fflas-ffpack/field/modular-balanced-int32.h
deleted file mode 100644
index ac9ead9..0000000
--- a/fflas-ffpack/field/modular-balanced-int32.h
+++ /dev/null
@@ -1,473 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2009 LinBox
- * Written by C Pernet
- * updated to compilable condition by <brice.boyer at imag.fr>
- *
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *
- */
-
-
-/*! @file field/modular-balanced-int32.h
- * @ingroup field
- * @brief Balanced representation of <code>Z/mZ</code> over \c int32_t .
- */
-
-#ifndef __FFLASFFPACK_modular_balanced_int32_H
-#define __FFLASFFPACK_modular_balanced_int32_H
-
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-#include "fflas-ffpack/utils/debug.h"
-
-#ifndef LINBOX_MAX_INT
-#define LINBOX_MAX_INT INT32_MAX
-#endif
-
-
-namespace FFPACK
-{
-
-
- /// \ingroup field
- template <>
- class ModularBalanced<int32_t> {
- protected:
- int32_t modulus;
- int32_t half_mod;
- int32_t mhalf_mod;
- double modulusinv;
-
- public:
-
- typedef int32_t Element;
- typedef ModularBalancedRandIter<int32_t> RandIter;
- typedef NonzeroRandIter<ModularBalanced<int32_t>,RandIter> NonZeroRandIter;
-
- const Element one ;
- const Element zero ;
- const Element mOne ;
-
- static const bool balanced = true;
-
- //default modular field,taking 65521 as default modulus
- ModularBalanced () :
- modulus(65521)
- ,one(1),zero(0),mOne(-1)
- {
- modulusinv = 1/(double)65521;
- half_mod = (65521 >> 1);
- mhalf_mod = half_mod-65520;
- }
-
- ModularBalanced (int32_t value, int exp = 1) :
- modulus(value)
- ,one(1),zero(0),mOne(-1)
- {
- half_mod = (modulus >> 1);
- mhalf_mod = half_mod-modulus+1;
- modulusinv = 1 / ((double) value);
-#ifdef DEBUG
- if(exp != 1) throw Failure(__func__,__FILE__,__LINE__,"exponent must be 1");
- if(value <= 1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- // std::cout << value << '<' << getMaxModulus() << std::endl;
- if(value > getMaxModulus() ) throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
- if( ! (value % 2) ) throw Failure(__func__,__FILE__,__LINE__,"modulus must be odd");
-#endif
-
- }
-
- ModularBalanced (const ModularBalanced<int32_t>& mf) :
- modulus(mf.modulus),
- half_mod(mf.half_mod),
- mhalf_mod(mf.mhalf_mod),
- modulusinv(mf.modulusinv)
- ,one(mf.one),zero(mf.zero),mOne(mf.mOne)
- { }
-
- ModularBalanced<Element> & assign(const ModularBalanced<Element> &F)
- {
- modulus = F.modulus;
- half_mod = F.half_mod;
- mhalf_mod = F.mhalf_mod;
- // lmodulus = F.lmodulus;
- modulusinv = F.modulusinv;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-
-#if 1
- const ModularBalanced &operator=(const ModularBalanced<int32_t> &F)
- {
- modulus = F.modulus;
- half_mod = F.half_mod;
- mhalf_mod = F.mhalf_mod;
- // lmodulus = F.lmodulus;
- modulusinv = F.modulusinv;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-#endif
-
- size_t characteristic () const
- {
- return (size_t) modulus;
- }
-
- unsigned long &characteristic (unsigned long & c) const
- {
- return c=(unsigned long)modulus;
- }
-
- size_t cardinality () const
- {
- return (size_t) modulus;
- }
-
-
- double & convert(double &x, const Element &y) const
- {
- return x = (double) y;
- }
-
- float & convert(float &x, const Element &y) const
- {
- return x = (float) y;
- }
-
-
-
- std::ostream &write (std::ostream &os) const
- {
- return os << "balanced int32_t mod " << modulus;
- }
-
- std::istream &read (std::istream &is)
- {
- is >> modulus;
- half_mod = modulus/2;
- mhalf_mod = half_mod-modulus+1;
- modulusinv = 1 /((double) modulus );
-#ifdef DEBUG
- if(modulus <= 1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
-
- if(modulus > getMaxModulus() ) throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
- if( ! (modulus % 2) ) throw Failure(__func__,__FILE__,__LINE__,"modulus must be oddd");
-#endif
-
- return is;
- }
-
- std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << x;
- }
-
- std::istream &read (std::istream &is, Element &x) const
- {
- int32_t tmp;
- is >> tmp;
- init(x,tmp);
- return is;
- }
-
-
- Element &init (Element &x, const double &y) const
- {
- x = (Element) fmod(y,(double)modulus);
- if (x < mhalf_mod) x += modulus;
- else if (x > half_mod) x -= modulus;
- return x;
- }
-
-
- Element &init (Element &x, const size_t &y) const
- {
- x = Element((Element)y % (Element)modulus);
- if (x < mhalf_mod)
- x += modulus;
- else if (x > half_mod)
- x -= modulus;
- return x;
- }
-
-
- inline Element& init(Element& x, int y ) const
- {
- x = Element(y % modulus);
-
- if ( x < mhalf_mod )
- x += modulus;
- else if (x > half_mod )
- x -= modulus;
-
- return x;
- }
-
- inline Element& init(Element& x ) const
- {
- return x = 0;
- }
-
- inline Element& init(Element& x, long y) const
- {
- x = Element(y % modulus);
- if ( x < mhalf_mod )
- x += modulus;
- else if ( x > half_mod )
- x -= modulus;
-
- return x;
- }
-
- inline Element& assign(Element& x, const Element& y) const
- {
- return x = y;
- }
-
-
- inline bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- inline bool isZero (const Element &x) const
- {
- return x == 0;
- }
-
- inline bool isOne (const Element &x) const
- {
- return x == 1;
- }
-
- inline Element &add (Element &x, const Element &y, const Element &z) const
- {
- x = y + z;
- if ( x > half_mod ) x -= modulus;
- else if ( x < mhalf_mod ) x += modulus;
-
- return x;
- }
-
- inline Element &sub (Element &x, const Element &y, const Element &z) const
- {
- x = y - z;
- if (x > half_mod) x -= modulus;
- else if (x < mhalf_mod) x += modulus;
- return x;
- }
-
- inline Element &mul (Element &x, const Element &y, const Element &z) const
- {
- int32_t q;
-
- q = (int32_t) ((((double) y) * ((double) z)) * modulusinv); // q could be off by (+/-) 1
- x = (int32_t) (y*z - q*modulus);
-
- if (x > half_mod)
- x -= modulus;
- else if (x < mhalf_mod)
- x += modulus;
-
- return x;
- }
-
- inline Element &div (Element &x, const Element &y, const Element &z) const
- {
- Element temp;
- inv (temp, z);
- return mul (x, y, temp);
- }
-
- inline Element &neg (Element &x, const Element &y) const
- {
- return x = -y;
- }
-
- inline Element &inv (Element &x, const Element &y) const
- {
- int32_t d, t;
- XGCD(d, x, t, y, modulus);
-#ifdef DEBUG
- if (d != 1)
- throw Failure(__func__,__FILE__,__LINE__,"InvMod: inverse undefined");
-#endif
- if (x > half_mod)
- x -= modulus;
- else if (x < mhalf_mod)
- x += modulus;
-
- return x;
-
- }
-
- inline Element &axpy (Element &r,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- int32_t q;
-
- q = (int32_t) (((((double) a) * ((double) x)) + (double)y) * modulusinv); // q could be off by (+/-) 1
- r = (int32_t) (a * x + y - q*modulus);
-
-
- if (r > half_mod)
- r -= modulus;
- else if (r < mhalf_mod)
- r += modulus;
-
- return r;
-
- }
-
- inline Element &addin (Element &x, const Element &y) const
- {
- x += y;
- if ( x > half_mod ) x -= modulus;
- else if (x < mhalf_mod) x += modulus;
-
- return x;
- }
-
- inline Element &subin (Element &x, const Element &y) const
- {
- x -= y;
- if (x > half_mod)
- x -= modulus;
- else if (x < mhalf_mod)
- x += modulus;
-
- return x;
- }
-
- inline Element &mulin (Element &x, const Element &y) const
- {
- return mul(x,x,y);
- }
-
- inline Element &divin (Element &x, const Element &y) const
- {
- return div(x,x,y);
- }
-
- inline Element &negin (Element &x) const
- {
- return x = -x;
- }
-
- inline Element &invin (Element &x) const
- {
- return inv (x, x);
- }
-
- inline Element &axpyin (Element &r, const Element &a, const Element &x) const
- {
- int32_t q;
-
- q = (int32_t) (((((double) a)*((double) x)) + (double)r) * modulusinv); // q could be off by (+/-) 1
- r = (int32_t) (a * x + r - q*modulus);
-
-
- if (r > half_mod)
- r -= modulus;
- else if (r < mhalf_mod)
- r += modulus;
-
- return r;
- }
-
- static inline int32_t getMaxModulus()
- {
- // return 1073741824; // 2^30
- return 2147483647; // 2^31 -1
- // return 1 << 15; // 2^15
- // FFLASFFPACK_check(46340LL*46341LL<2147483647LL);
- // FFLASFFPACK_check(46342LL*46341LL>2147483647LL);
- // return 92681 ;
- }
-
- private:
-
- inline static void XGCD(int32_t& d, int32_t& s, int32_t& t, int32_t a, int32_t b) {
- int32_t u, v, u0, v0, u1, v1, u2, v2, q, r;
-
- int32_t aneg = 0, bneg = 0;
-
- if (a < 0) {
-#ifdef DEBUG
- if (a < -LINBOX_MAX_INT) throw Failure(__func__,__FILE__,__LINE__,"XGCD: integer overflow");
-#endif
- a = -a;
- aneg = 1;
- }
-
- if (b < 0) {
-#ifdef DEBUG
- if (b < -LINBOX_MAX_INT) throw Failure(__func__,__FILE__,__LINE__,"XGCD: integer overflow");
-#endif
- b = -b;
- bneg = 1;
- }
-
- u1 = 1; v1 = 0;
- u2 = 0; v2 = 1;
- u = a; v = b;
-
- while (v != 0) {
- q = u / v;
- r = u % v;
- u = v;
- v = r;
- u0 = u2;
- v0 = v2;
- u2 = u1 - q*u2;
- v2 = v1- q*v2;
- u1 = u0;
- v1 = v0;
- }
-
- if (aneg)
- u1 = -u1;
-
- if (bneg)
- v1 = -v1;
-
- d = u;
- s = u1;
- t = v1;
- }
-
- };
-
-}
-
-
-#include "field-general.h"
-#endif // __FFLASFFPACK_modular_balanced_int32_H
-
diff --git a/fflas-ffpack/field/modular-balanced-int64.h b/fflas-ffpack/field/modular-balanced-int64.h
deleted file mode 100644
index ec95830..0000000
--- a/fflas-ffpack/field/modular-balanced-int64.h
+++ /dev/null
@@ -1,486 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2010 LinBox
- * Adapted by B Boyer <brice.boyer at imag.fr>
- * (from other modular-balanced* files)
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *
- */
-
-
-/*! @file field/modular-balanced-int64.h
- * @ingroup field
- * @brief Balanced representation of <code>Z/mZ</code> over \c int64_t .
- */
-
-#ifndef __FFLASFFPACK_modular_balanced_int64_H
-#define __FFLASFFPACK_modular_balanced_int64_H
-
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-#include "fflas-ffpack/utils/debug.h"
-
-
-#ifndef LINBOX_MAX_INT64
-#ifdef __x86_64__
-#define LINBOX_MAX_INT64 INT64_MAX
-#else
-#define LINBOX_MAX_INT64 INT64_MAX
-#endif
-#endif
-
-// todo INT64_MAX
-
-
-namespace FFPACK
-{
-
-
- /// \ingroup field
- template <>
- class ModularBalanced<int64_t> {
- protected:
- int64_t modulus;
- int64_t half_mod;
- int64_t mhalf_mod;
- double modulusinv;
-
- public:
-
-
- typedef int64_t Element;
- typedef ModularBalancedRandIter<int64_t> RandIter;
-
-
- const Element one ;
- const Element zero ;
- const Element mOne ;
-
-
- static const bool balanced = true ;
-
- //default modular field,taking 65521 as default modulus
- ModularBalanced () :
- modulus(65521)
- ,one(1),zero(0),mOne(-1)
- {
- modulusinv = 1/(double)65521;
- half_mod = (65521 >> 1);
- mhalf_mod = half_mod-65520;
- }
-
- ModularBalanced (int64_t value, int exp = 1) :
- modulus(value)
- ,one(1),zero(0),mOne(-1)
- {
- half_mod = (modulus >> 1);
- mhalf_mod = half_mod-modulus+1;
- modulusinv = 1 / ((double) value);
-#ifdef DEBUG
- if(exp != 1) throw Failure(__func__,__FILE__,__LINE__,"exponent must be 1");
- if(value <= 1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if(value > getMaxModulus() ) throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
- if( ! (value % 2) ) throw Failure(__func__,__FILE__,__LINE__,"modulus must be odd");
-#endif
-
- }
-
- ModularBalanced (const ModularBalanced<int64_t>& mf) :
- modulus(mf.modulus),
- half_mod(mf.half_mod),
- mhalf_mod(mf.mhalf_mod),
- modulusinv(mf.modulusinv)
- ,one(mf.one),zero(mf.zero),mOne(mf.mOne)
- { }
-
- ModularBalanced<Element> & assign(const ModularBalanced<Element> &F)
- {
- modulus = F.modulus;
- half_mod = F.half_mod;
- mhalf_mod = F.mhalf_mod;
- // lmodulus = F.lmodulus;
- modulusinv = F.modulusinv;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-
-#if 1
- const ModularBalanced &operator=(const ModularBalanced<int64_t> &F)
- {
- modulus = F.modulus;
- half_mod = F.half_mod;
- mhalf_mod = F.mhalf_mod;
- // lmodulus = F.lmodulus;
- modulusinv = F.modulusinv;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-#endif
-
- uint64_t characteristic () const
- {
- return (uint64_t)modulus;
- }
-
- size_t cardinality () const
- {
- return (size_t) modulus;
- }
-
- double & convert(double &x, const Element &y) const
- {
- return x = (double) y;
- }
-
- float & convert(float &x, const Element &y) const
- {
- return x = (float) y;
- }
-
- std::ostream &write (std::ostream &os) const
- {
- return os << "balanced int64_t mod " << modulus;
- }
-
- std::istream &read (std::istream &is)
- {
- is >> modulus;
- half_mod = modulus/2;
- mhalf_mod = half_mod-modulus+1;
- modulusinv = 1 /((double) modulus );
-#ifdef DEBUG
- if(modulus <= 1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if(modulus > getMaxModulus() ) throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
- if( ! (modulus % 2) ) throw Failure(__func__,__FILE__,__LINE__,"modulus must be oddd");
-#endif
-
- return is;
- }
-
- std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << x;
- }
-
- std::istream &read (std::istream &is, Element &x) const
- {
- int64_t tmp;
- is >> tmp;
- init(x,tmp);
- return is;
- }
-
-
- template<class Element1>
- Element &init (Element & x, const Element1 &y) const
- {
- x = y % modulus;
-
- if ( x < mhalf_mod )
- x += modulus;
- else if (x > half_mod )
- x -= modulus;
-
- return x;
- }
-
- Element &init (Element &x, const double &y) const
- {
- x = (Element) fmod(y,(double)modulus);
- if (x < mhalf_mod)
- x += modulus;
- else if (x > half_mod)
- x -= modulus;
- return x;
- }
-
-
- Element &init (Element &x, const size_t &y) const
- {
- x = (Element)y % Element(modulus);
- if (x < mhalf_mod)
- x += modulus;
- else if (x > half_mod)
- x -= modulus;
- return x;
- }
-
-
- inline Element& init(Element& x, int y =0) const
- {
- x = y % modulus;
-
- if ( x < mhalf_mod )
- x += modulus;
- else if (x > half_mod )
- x -= modulus;
-
- return x;
- }
-
- inline Element& init(Element& x, long y) const
- {
- x = y % modulus;
- if ( x < mhalf_mod ) x += modulus;
- else if ( x > half_mod ) x -= modulus;
-
- return x;
- }
-
- inline Element& assign(Element& x, const Element& y) const
- {
- return x = y;
- }
-
-
- inline bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- inline bool isZero (const Element &x) const
- {
- return x == 0;
- }
-
- inline bool isOne (const Element &x) const
- {
- return x == 1;
- }
-
- inline Element &add (Element &x, const Element &y, const Element &z) const
- {
- x = y + z;
- if ( x > half_mod ) x -= modulus;
- else if ( x < mhalf_mod ) x += modulus;
-
- return x;
- }
-
- inline Element &sub (Element &x, const Element &y, const Element &z) const
- {
- x = y - z;
- if (x > half_mod) x -= modulus;
- else if (x < mhalf_mod) x += modulus;
- return x;
- }
-
- inline Element &mul (Element &x, const Element &y, const Element &z) const
- {
- int64_t q;
-
- q = (int64_t) ((((double) y) * ((double) z)) * modulusinv); // q could be off by (+/-) 1
- x = (int64_t) (y*z - q*modulus);
-
- if (x > half_mod)
- x -= modulus;
- else if (x < mhalf_mod)
- x += modulus;
-
- return x;
- }
-
- inline Element &div (Element &x, const Element &y, const Element &z) const
- {
- Element temp;
- inv (temp, z);
- return mul (x, y, temp);
- }
-
- inline Element &neg (Element &x, const Element &y) const
- {
- return x = -y;
- }
-
- inline Element &inv (Element &x, const Element &y) const
- {
- int64_t d, t;
- XGCD(d, x, t, y, modulus);
-#ifdef DEBUG
- if (d != 1)
- throw Failure(__func__,__FILE__,__LINE__,"InvMod: inverse undefined");
-#endif
- if (x > half_mod)
- x -= modulus;
- else if (x < mhalf_mod)
- x += modulus;
-
- return x;
-
- }
-
- inline Element &axpy (Element &r,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- int64_t q;
-
- q = (int64_t) (((((double) a) * ((double) x)) + (double)y) * modulusinv); // q could be off by (+/-) 1
- r = (int64_t) (a * x + y - q*modulus);
-
-
- if (r > half_mod)
- r -= modulus;
- else if (r < mhalf_mod)
- r += modulus;
-
- return r;
-
- }
-
- inline Element &addin (Element &x, const Element &y) const
- {
- x += y;
- if ( x > half_mod ) x -= modulus;
- else if (x < mhalf_mod) x += modulus;
-
- return x;
- }
-
- inline Element &subin (Element &x, const Element &y) const
- {
- x -= y;
- if (x > half_mod)
- x -= modulus;
- else if (x < mhalf_mod)
- x += modulus;
-
- return x;
- }
-
- inline Element &mulin (Element &x, const Element &y) const
- {
- return mul(x,x,y);
- }
-
- inline Element &divin (Element &x, const Element &y) const
- {
- return div(x,x,y);
- }
-
- inline Element &negin (Element &x) const
- {
- return x = -x;
- }
-
- inline Element &invin (Element &x) const
- {
- return inv (x, x);
- }
-
- inline Element &axpyin (Element &r, const Element &a, const Element &x) const
- {
- int64_t q;
-
- q = (int64_t) (((((double) a)*((double) x)) + (double)r) * modulusinv); // q could be off by (+/-) 1
- r = (int64_t) (a * x + r - q*modulus);
-
-
- if (r > half_mod)
- r -= modulus;
- else if (r < mhalf_mod)
- r += modulus;
-
- return r;
- }
-
- static inline int64_t getMaxModulus()
- {
-#if 1
-#ifdef __x86_64__
- return 4611686018427387904L; // 2^62
- // return 8589934591L;
-#else
- return 4611686018427387904LL; // 2^62
- // return 8589934591LL;
-#endif
-#endif
- // return 1 << 31;
- }
-
- private:
-
- inline static void XGCD(int64_t& d, int64_t& s, int64_t& t, int64_t a, int64_t b)
- {
- int64_t u, v, u0, v0, u1, v1, u2, v2, q, r;
-
- int64_t aneg = 0, bneg = 0;
-
- if (a < 0) {
-#ifdef DEBUG
- if (a < -LINBOX_MAX_INT64) throw Failure(__func__,__FILE__,__LINE__,"XGCD: integer overflow");
-#endif
- a = -a;
- aneg = 1;
- }
-
- if (b < 0) {
-#ifdef DEBUG
- if (b < -LINBOX_MAX_INT64) throw Failure(__func__,__FILE__,__LINE__,"XGCD: integer overflow");
-#endif
- b = -b;
- bneg = 1;
- }
-
- u1 = 1; v1 = 0;
- u2 = 0; v2 = 1;
- u = a; v = b;
-
- while (v != 0) {
- q = u / v;
- r = u % v;
- u = v;
- v = r;
- u0 = u2;
- v0 = v2;
- u2 = u1 - q*u2;
- v2 = v1- q*v2;
- u1 = u0;
- v1 = v0;
- }
-
- if (aneg)
- u1 = -u1;
-
- if (bneg)
- v1 = -v1;
-
- d = u;
- s = u1;
- t = v1;
- }
-
- };
-
-}
-
-#undef LINBOX_MAX_INT64
-
-#include "field-general.h"
-
-#endif //__FFLASFFPACK_modular_balanced_int64_H
-
diff --git a/fflas-ffpack/field/modular-balanced.h b/fflas-ffpack/field/modular-balanced.h
deleted file mode 100644
index ada887a..0000000
--- a/fflas-ffpack/field/modular-balanced.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* field/modular-balanced-double.h
- * Copyright (C) 2003 Pascal Giorgi
- * 2005 Clement Pernet
- * Written by Pascal Giorgi <pascal.giorgi at ens-lyon.fr>
- * and Clement Pernet <Clement.Pernet at imag.fr>
- * and Brice Boyer <bboyer at imag.fr>
- *
- * ------------------------------------
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-/*! @file field/modular-balanced-double.h
- * @ingroup field
- * @brief Balanced representation of <code>Z/mZ</code> over \c double .
- */
-
-#ifndef __FFLASFFPACK_modular_balanced_H
-#define __FFLASFFPACK_modular_balanced_H
-
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-
-namespace FFPACK {
-
-template <class Element>
-class ModularBalanced;
-
-}
-
-
-#include "fflas-ffpack/field/modular-balanced-double.h"
-#include "fflas-ffpack/field/modular-balanced-float.h"
-#include "fflas-ffpack/field/modular-balanced-int32.h"
-#ifdef __x86_64__ // __WORDSIZE == 64
-#include "fflas-ffpack/field/modular-balanced-int64.h"
-#endif
-
-#endif // __FFLASFFPACK_modular_balanced_double_H
-
diff --git a/fflas-ffpack/field/modular-double.h b/fflas-ffpack/field/modular-double.h
deleted file mode 100644
index 3d24d85..0000000
--- a/fflas-ffpack/field/modular-double.h
+++ /dev/null
@@ -1,416 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* fflas-ffpack/modular-positive.h
- * Copyright (C) 2003 Pascal Giorgi
- * 2008 Clement Pernet
- * Written by Clement Pernet <clement.pernet at gmail.com>
- * Pascal Giorgi <pascal.giorgi at ens-lyon.fr>
- *
- * ------------------------------------
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-#ifndef __FFLASFFPACK_modular_double_H
-#define __FFLASFFPACK_modular_double_H
-
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-#include "fflas-ffpack/utils/debug.h"
-#include <float.h>
-
-namespace FFPACK {
-
- template <>
- class Modular<double> {
- public:
- typedef double Element;
-
- protected:
-
- Element modulus;
- unsigned long lmodulus;
-
- //double inv_modulus;
-
- public:
- typedef unsigned long FieldInt;
-
- const Element one ;
- const Element zero ;
- const Element mOne ;
-
-
- static const bool balanced = false ;
-
- typedef ModularRandIter<double> RandIter;
- typedef NonzeroRandIter<Modular<double>, ModularRandIter<double> > NonZeroRandIter;
-
-
- Modular () :
- modulus(0),lmodulus(0),
- one(0),zero(0),mOne(0)
- {}
-
-
- Modular (int32_t p, int exp = 1) :
- modulus((double)p), lmodulus((unsigned long)p)//, inv_modulus(1./(double)p)
- ,one(1),zero(0),mOne(modulus -1)
- {
-#ifdef DEBUG
- if(modulus <= 1)
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if( exp != 1 ) throw Failure(__func__,__FILE__,__LINE__,"exponent must be 1");
- if(modulus > getMaxModulus())
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- }
-
- Modular (Element p) :
- modulus(p), lmodulus((unsigned long)p)
- ,one(1),zero(0),mOne(modulus -1)
- {
-#ifdef DEBUG
- if( modulus <= 1 )
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if( modulus > getMaxModulus())
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- }
-
- Modular (unsigned long int p) :
- modulus((Element)p), lmodulus(p)
- ,one(1),zero(0),mOne(modulus -1)
- {
-#ifdef DEBUG
- if( (Element) modulus <= 1 )
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if( (Element) modulus > getMaxModulus())
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- }
-
-
-
- Modular(const Modular<Element>& mf) :
- modulus(mf.modulus),
- lmodulus(mf.lmodulus)
- ,one(mf.one),zero(mf.zero),mOne(mf.mOne)
- {}
-
- Modular<Element> & assign(const Modular<Element> &F)
- {
- modulus = F.modulus;
- lmodulus= F.lmodulus;
- //inv_modulus = F.inv_modulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-
-#if 1
- const Modular &operator=(const Modular<double> &F)
- {
- modulus = F.modulus;
- lmodulus= F.lmodulus;
- //inv_modulus = F.inv_modulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-
-#endif
-
-
- unsigned long &cardinality (unsigned long &c) const
- {
- return c = lmodulus ;
- }
-
- unsigned long cardinality () const
- {
- return lmodulus ;
- }
-
- unsigned long & characteristic (unsigned long &c) const
- {
- return c = lmodulus ;
- }
-
- unsigned long characteristic () const
- {
- return lmodulus;
- }
-
- unsigned long &convert (unsigned long &x, const Element &y) const
- {
- return x = (unsigned long)(y);
- }
-
- Element &convert (Element &x, const Element& y) const
- {
- return x=y;
- }
-
- float &convert (float &x, const Element& y) const
- {
- return x=(float)y;
- }
-
- std::ostream &write (std::ostream &os) const
- {
- return os << "double mod " << (int)modulus;
- }
-
- std::istream &read (std::istream &is)
- {
- is >> modulus;
-#ifdef DEBUG
- if(modulus <= 1)
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if(modulus > 94906265)
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
-
- return is;
- }
-
- std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << (int)x;
- }
-
- std::istream &read (std::istream &is, Element &x) const
- {
- double tmp;
- is >> tmp;
- init(x,tmp);
- return is;
- }
-
- Element &init (Element &x, const unsigned long &y) const
- {
- x = Element(y % lmodulus);
- if (x < 0.) x += modulus;
- return x;
- }
-
- Element &init (Element &x, const long &y) const
- {
- // no problem here because double<long
- x = Element(y % (long)lmodulus);
- if (x < 0.) x += modulus;
- return x;
- }
-
- Element &init (Element &x, const unsigned int &y) const
- {
- x = Element(y % lmodulus);
- if (x < 0.) x += modulus;
- return x;
- }
-
- Element &init (Element &x, const int &y) const
- {
- // no problem here because int<=long
- x = Element(y % (long)lmodulus);
- if (x < 0.) x += modulus;
- return x;
- }
-
- Element& init(Element& x, Element y) const
- {
-
- x = fmod (y, modulus);
- if (x < 0.) x += modulus;
- return x;
- }
-
- Element& init(Element& x, float y) const
- {
-
- x = fmod ((Element)y, modulus);
- if (x < 0.) x += modulus;
- return x;
- }
-
- Element& init(Element& x) const
- {
- return x=0.;
- }
-
- Element& assign(Element& x, const Element& y) const
- {
- return x = y;
- }
-
- bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- bool isZero (const Element &x) const
- {
- return x == 0.;
- }
-
- bool isOne (const Element &x) const
- {
- return x == 1.;
- }
-
- Element &add (Element &x, const Element &y, const Element &z) const
- {
- x = y + z;
- if ( x >= modulus ) x -= modulus;
- return x;
- }
-
- Element &sub (Element &x, const Element &y, const Element &z) const
- {
- x = y - z;
- if (x < 0) x += modulus;
- return x;
- }
-
- Element &mul (Element &x, const Element &y, const Element &z) const
- {
- x = y*z;
- return init(x,x);
- }
-
- Element &div (Element &x, const Element &y, const Element &z) const
- {
- Element temp;
- inv (temp, z);
- return mul (x, y, temp);
- }
-
- Element &neg (Element &x, const Element &y) const
- {
- if(y == 0) return x = 0;
- else return x = modulus - y;
- }
-
- Element &inv (Element &x, const Element &y) const
- {
- // The extended Euclidean algoritm
- int x_int, y_int, q, tx, ty, temp;
- x_int = int (modulus);
- y_int = int (y);
- tx = 0;
- ty = 1;
-
- while (y_int != 0) {
- // always: gcd (modulus,residue) = gcd (x_int,y_int)
- // sx*modulus + tx*residue = x_int
- // sy*modulus + ty*residue = y_int
- q = x_int / y_int; // integer quotient
- temp = y_int; y_int = x_int - q * y_int;
- x_int = temp;
- temp = ty; ty = tx - q * ty;
- tx = temp;
- }
-
- if (tx < 0) tx += (int)modulus;
-
- // now x_int = gcd (modulus,residue)
- return x = (Element)tx;
-
-
- }
-
- Element &axpy (Element &r,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- r = a * x + y;
- return init(r,r);
-
- }
-
- Element &addin (Element &x, const Element &y) const
- {
- x += y;
- if ( x >= modulus ) x -= modulus;
- return x;
- }
-
- Element &subin (Element &x, const Element &y) const
- {
- x -= y;
- if (x < 0.) x += modulus;
- return x;
- }
-
- Element &mulin (Element &x, const Element &y) const
- {
- return mul(x,x,y);
- }
-
- Element &divin (Element &x, const Element &y) const
- {
- return div(x,x,y);
- }
-
- Element &negin (Element &x) const
- {
- if (x == 0.) return x;
- else return x = modulus - x;
- }
-
- Element &invin (Element &x) const
- {
- return inv (x, x);
- }
-
- Element &axpyin (Element &r, const Element &a, const Element &x) const
- {
- r = r + a * x;
- return r = fmod(r, modulus);
-
- }
-
- static Element getMaxModulus()
- {
- return 67108864.0; // 2^26
- // return 1 << (DBL_MANT_DIG >> 1); // 2^(DBL_MANT_DIG/2)
- // return 94906265 ;
- }
-
- };
-
-} // FFPACK
-
-// const double FFPACK::Modular<double>::one = 1UL;
-// const double FFPACK::Modular<double>::zero = 0UL;
-
-
-
-
-#include "field-general.h"
-
-#endif
diff --git a/fflas-ffpack/field/modular-extended.h b/fflas-ffpack/field/modular-extended.h
new file mode 100644
index 0000000..0209805
--- /dev/null
+++ b/fflas-ffpack/field/modular-extended.h
@@ -0,0 +1,333 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS group
+ *
+ * Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *
+ */
+
+#ifndef __FFLASFFPACK_MODULAR_EXTENDED_H
+#define __FFLASFFPACK_MODULAR_EXTENDED_H
+
+#include "givaro/givranditer.h"
+#include "givaro/ring-interface.h"
+#include "givaro/modular-general.h"
+
+// namespace Givaro{
+// template<class T>
+// class ModularExtended;// : public RingInterface<double>{};
+// } // Givaro
+
+namespace Givaro{
+/*
+ *
+ * Modular double/float allowing big moduli
+ * !!: RandIter does not works, use your own random
+ *
+ */
+template<class _Element>
+class ModularExtended// : public RingInterface<double>
+{
+public:
+
+ typedef double Element;
+ typedef Element* Element_ptr ;
+ typedef const Element ConstElement;
+ typedef const Element* ConstElement_ptr;
+ // ----- Exported Types and constantes
+ typedef ModularExtended<Element> Self_t;
+ typedef uint64_t Residu_t;
+ enum { size_rep = sizeof(Residu_t) };
+
+private:
+ // Verkampt Split
+ inline void split(const Element x, Element &x_h, Element &x_l) const {
+ Element c;
+ if(std::is_same<Element, double>::value){
+ c = (Element)((1 << 27)+1);
+ }else if(std::is_same<Element, float>::value){
+ c = (Element)((1 << 13)+1);
+ }
+
+ x_h = (c*x)+(x-(c*x));
+ x_l = x - x_h;
+ }
+
+ // Dekker mult, a * b = s + t
+ inline void mult(const Element a, const Element b, Element &s, Element &t) const{
+ s = a*b;
+//#ifdef __FMA__
+ t = std::fma(-a, b, s);
+//#else
+ Element ah, al, bh, bl;
+ split(a, ah, al);
+ split(b, bh, bl);
+ t = ((((-s+ah*bh)+(ah*bl))+(al*bh))+(al*bl));
+//#endif
+ }
+
+public:
+ // ----- Constantes
+ const Element zero = 0.0;
+ const Element one = 1.0;
+ const Element mOne = -1.0;
+
+ // ----- Constructors
+ ModularExtended() = default;
+
+ template<class XXX> ModularExtended(const XXX& p)
+ : zero(0.0), one(1.0), mOne((Element)p - 1.0), _p((Element)p), _invp(1/_p), _negp(-_p), _lp((Residu_t)p)
+ {
+ assert(_p >= getMinModulus());
+ assert(_p <= maxCardinality());
+ }
+
+ //ModularExtended(const Self_t& F) = default;
+ //ModularExtended(Self_t&& F) = default;
+ // : zero(F.zero), one(F.one), mOne(F.mOne), _p(F._p), _lp(F._lp) {}
+
+ // ----- Accessors
+ inline Element minElement() const { return zero; }
+ inline Element maxElement() const { return mOne; }
+
+ // ----- Access to the modulus
+ inline Residu_t residu() const { return _lp; }
+ inline Residu_t size() const { return _lp; }
+ inline Residu_t characteristic() const { return _lp; }
+ template<class T> inline T& characteristic(T& p) const { return p = _lp; }
+ inline Residu_t cardinality() const { return _lp; }
+ template<class T> inline T& cardinality(T& p) const { return p = _lp; }
+ static inline Residu_t maxCardinality() {
+ if(std::is_same<Element, double>::value)
+ return 4503599627370496;
+ else if(std::is_same<Element, float>::value)
+ return 8388608;
+ }
+ static inline Residu_t getMinModulus() { return 2; }
+
+ // ----- Checkers
+ inline bool isZero(const Element& a) const { return a == zero; }
+ inline bool isOne (const Element& a) const { return a == one; }
+ inline bool isMOne(const Element& a) const { return a == mOne; }
+ inline bool areEqual(const Element& a, const Element& b) const { return a == b; }
+ inline size_t length(const Element a) const { return size_rep; }
+
+ // ----- Ring-wise operators
+ inline bool operator==(const Self_t& F) const { return _p == F._p; }
+ inline bool operator!=(const Self_t& F) const { return _p != F._p; }
+ inline Self_t& operator=(const Self_t& F)
+ {
+ F.assign(const_cast<Element&>(one), F.one);
+ F.assign(const_cast<Element&>(zero), F.zero);
+ F.assign(const_cast<Element&>(mOne), F.mOne);
+ _p = F._p;
+ _negp = F._negp;
+ _invp = F._invp;
+ _lp= F._lp;
+ return *this;
+ }
+
+ // ----- Initialisation
+ Element &init (Element &x) const{
+ return x = zero;
+ }
+
+ template<class XXX> Element& init(Element & x, const XXX & y) const{
+ x=Element(y);
+ return reduce(x);
+ }
+
+ Element &assign (Element &x, const Element &y) const{
+ return x = y;
+ }
+
+ // ----- Convert and reduce
+ Integer& convert (Integer &x, const Element &y) const{
+ return x = (Integer)y;
+ }
+ Residu_t& convert (Residu_t &x, const Element &y) const{
+ return x = (Residu_t)y;
+ }
+ Element& convert (Element &x, const Element &y) const{
+ return x = y;
+ }
+ float& convert (float &x, const Element &y) const{
+ return x = (float)y;
+ }
+
+ Element& reduce (Element& x, const Element& y) const{
+ Element q = floor(y*_invp);
+ Element pqh, pql;
+ mult(_p, q, pqh, pql);
+ x = (x-pqh)-pql;
+ if(x >= _p)
+ x -= _p;
+ else if(x < 0)
+ x += _p;
+ return x;
+ }
+ Element& reduce (Element& x) const{
+ Element q = floor(x*_invp);
+ Element pqh, pql;
+ mult(_p, q, pqh, pql);
+ x = (x-pqh)-pql;
+ if(x >= _p)
+ x -= _p;
+ else if(x < zero)
+ x += _p;
+ return x;
+ }
+
+ // ----- Classic arithmetic
+ Element& mul(Element& r, const Element& a, const Element& b) const {
+ Element abh, abl, pqh, pql;
+ mult(a, b, abh, abl);
+ Element q = floor(abh*_invp);
+ mult(_p, q, pqh, pql);
+ r = (abh-pqh)+(abl-pql);
+ if(r > _p)
+ r-= _p;
+ else if(r < 0)
+ r += _p;
+ return r;
+ }
+
+
+ Element& div(Element& r, const Element& a, const Element& b) const{
+ return mulin(inv(r, a), b);
+ }
+ Element& add(Element& r, const Element& a, const Element& b) const {
+ r = a + b;
+ if(r >= _p)
+ r += _negp;
+ return r;
+ }
+ Element& sub(Element& r, const Element& a, const Element& b) const {
+ r = a - b;
+ if(r < 0)
+ r += _p;
+ return r;
+ }
+ Element& neg(Element& r, const Element& a) const {
+ r = -a;
+ if(r < 0)
+ r += _p;
+ return r;
+ }
+ Element& inv(Element& x, const Element& y) const{
+ int64_t x_int, y_int, tx, ty;
+ x_int = int64_t(_lp);
+ y_int = int64_t(y);
+ tx = 0;
+ ty = 1;
+
+ while (y_int != 0) {
+ // always: gcd (modulus,residue) = gcd (x_int,y_int)
+ // sx*modulus + tx*residue = x_int
+ // sy*modulus + ty*residue = y_int
+ int64_t q = x_int / y_int; // integer quotient
+ int64_t temp = y_int; y_int = x_int - q * y_int;
+ x_int = temp;
+ temp = ty; ty = tx - q * ty;
+ tx = temp;
+ }
+
+ if (tx < 0) tx += int64_t(_p);
+
+ // now x_int = gcd (modulus,residue)
+ return x = Element(tx);
+ }
+
+ Element& mulin(Element& r, const Element& a) const {
+ return mul(r, r, a);
+ }
+ Element& divin(Element& r, const Element& y) const{
+ Element iy;
+ return mulin(r, inv(iy, y));
+ }
+ Element& addin(Element& r, const Element& a) const {
+ return add(r, r, a);
+ }
+ Element& subin(Element& r, const Element& a) const {
+ return sub(r, r, a);
+ }
+ Element& negin(Element& r) const {
+ return neg(r, r);
+ }
+ Element& invin(Element& r) const {
+ return inv(r, r);
+ }
+
+ // -- axpy: r <- a * x + y
+ // -- axpyin: r <- a * x + r
+ Element& axpy (Element& r, const Element& a, const Element& x, const Element& y) const {
+ Element tmp;
+ mul(tmp, a, x);
+ return add(r, tmp, y);
+ }
+ Element& axpyin(Element& r, const Element& a, const Element& x) const {
+ Element tmp(r);
+ return axpy(r, a, x, tmp);
+ }
+
+ // -- axmy: r <- a * x - y
+ // -- axmyin: r <- a * x - r
+ Element& axmy (Element& r, const Element& a, const Element& x, const Element& y) const {
+ Element tmp;
+ mul(tmp, a, x);
+ return sub(r, tmp, y);
+ }
+ Element& axmyin(Element& r, const Element& a, const Element& x) const {
+ return axmy(r, a, x, r);
+ }
+
+ // -- maxpy: r <- y - a * x
+ // -- maxpyin: r <- r - a * x
+ Element& maxpy (Element& r, const Element& a, const Element& x, const Element& y) const {
+ Element tmp;
+ mul(tmp, a, x);
+ return sub(r, y, tmp);
+ }
+ Element& maxpyin(Element& r, const Element& a, const Element& x) const {
+ return maxpy(r, a, x, r);
+ }
+
+ // ----- Random generators
+ // typedef ModularRandIter<Self_t> RandIter;
+ // typedef GeneralRingNonZeroRandIter<Self_t> NonZeroRandIter;
+ // template< class Random > Element& random(const Random& g, Element& r) const { return init(r, g()); }
+ // template< class Random > Element& nonzerorandom(const Random& g, Element& a) const
+ // { while (isZero(init(a, g())));
+ // return a; }
+
+protected:
+ double _p = 0;
+ double _invp = 0;
+ double _negp = 0;
+ Residu_t _lp = 0;
+
+};
+
+}// Givaro
+
+#endif //__FFLASFFPACK_MODULAR_EXTENDED_H
diff --git a/fflas-ffpack/field/modular-float.h b/fflas-ffpack/field/modular-float.h
deleted file mode 100644
index 82bdfcb..0000000
--- a/fflas-ffpack/field/modular-float.h
+++ /dev/null
@@ -1,421 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* fflas-ffpack/modular-positive.h
- * Copyright (C) 2003 Pascal Giorgi
- * 2008 Clement Pernet
- * Written by Clement Pernet <clement.pernet at gmail.com>
- * Pascal Giorgi <pascal.giorgi at ens-lyon.fr>
- *
- * ------------------------------------
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-#ifndef __FFLASFFPACK_modular_float_H
-#define __FFLASFFPACK_modular_float_H
-
-#include <math.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-#include "fflas-ffpack/utils/debug.h"
-#include <float.h>
-
-namespace FFPACK {
-
- template <>
- class Modular<float> {
-
- public :
- typedef float Element;
-
- protected:
-
- Element modulus;
- unsigned long lmodulus;
-
- //Element inv_modulus;
-
- public:
- typedef unsigned long FieldInt;
- typedef ModularRandIter<Element> RandIter;
- typedef NonzeroRandIter<Modular<Element>, ModularRandIter<Element> > NonZeroRandIter;
-
- const Element one ;
- const Element zero ;
- const Element mOne ;
-
-
- static const bool balanced = false;
-
- Modular () :
- modulus(0),lmodulus(0)
- ,one(0),zero(0),mOne(0)
- {}
-
- Modular (int32_t p, int exp = 1) :
- modulus((Element)p), lmodulus((unsigned long)p)//, inv_modulus(1./(Element)
- ,one(1),zero(0),mOne(modulus -1)
- {
-#ifdef DEBUG
- if(modulus <= 1)
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if( exp != 1 ) throw Failure(__func__,__FILE__,__LINE__,"exponent must be 1");
- if(modulus > getMaxModulus() )
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
-
- }
- Modular (Element p) :
- modulus(p), lmodulus((unsigned long)p)
- ,one(1),zero(0),mOne(modulus -1)
- {
-#ifdef DEBUG
- if( modulus <= 1 )
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if( modulus > getMaxModulus())
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- }
-
- Modular (unsigned long int p) :
- modulus((Element)p), lmodulus(p)
- ,one(1),zero(0),mOne(modulus -1)
- {
-#ifdef DEBUG
- if( (Element) modulus <= 1 )
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if( (Element) modulus > getMaxModulus())
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- }
-
-
- Modular(const Modular<Element>& mf) :
- modulus(mf.modulus), lmodulus(mf.lmodulus)//inv_modulus(mf.inv_modulus)
- ,one(mf.one),zero(mf.zero),mOne(mf.mOne)
- {}
-
- Modular <Element>& assign(const Modular<Element> &F)
- {
- modulus = F.modulus;
- lmodulus= F.lmodulus;
- //inv_modulus = F.inv_modulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-#if 1
- const Modular &operator=(const Modular<Element> &F)
- {
- modulus = F.modulus;
- lmodulus= F.lmodulus;
- //inv_modulus = F.inv_modulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-#endif
-
-
- unsigned long &cardinality (unsigned long &c) const
- {
- return c = lmodulus;
- }
-
- unsigned long cardinality() const
- {
- return lmodulus ;
- }
-
- unsigned long int & characteristic (long unsigned int& c) const
- {
- return c = lmodulus ;
- }
-
- unsigned long characteristic () const
- {
- return lmodulus;
- }
-
-
- unsigned long &convert (unsigned long &x, const Element &y) const
- {
- return x = (unsigned long)(y);
- }
-
- double &convert (double &x, const Element &y) const
- {
- return x = y;
- }
-
- Element &convert (Element &x, const Element& y) const
- {
- return x=y;
- }
-
- std::ostream &write (std::ostream &os) const
- {
- return os << "float mod " << (int)modulus;
- }
-
- std::istream &read (std::istream &is)
- {
- is >> modulus;
-#ifdef DEBUG
- if(modulus <= 1)
- throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if(modulus > 94906265)
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
-
- return is;
- }
-
- std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << x;
- }
-
- std::istream &read (std::istream &is, Element &x) const
- {
- float tmp;
- is >> tmp;
- init(x,tmp);
- return is;
- }
-
- //!@warning possibly buggy. use % instead ?
- inline Element& init(Element& x, unsigned long int y) const
- {
-
- x = (Element)(y % lmodulus);
-
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element& init(Element& x, long int y) const
- {
-
- x = (Element)(y % (long)lmodulus);
-
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element& init(Element& x, Element y ) const
- {
-
- x = fmodf (y, modulus);
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element& init(Element& x, double y ) const
- {
-
- x = (Element)fmod (y, (double)modulus);
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element& init(Element& x, int y ) const
- {
-
- x = (Element) (y%(int) lmodulus);
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element& init(Element& x, unsigned int y ) const
- {
-
- x = (Element)(y % (unsigned int)lmodulus);
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element &init(Element& x) const
- {
- return x = 0. ;
- }
-
- inline Element& assign(Element& x, const Element& y) const
- {
- return x = y;
- }
-
-
- inline bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- inline bool isZero (const Element &x) const
- {
- return x == 0.;
- }
-
- inline bool isOne (const Element &x) const
- {
- return x == 1.;
- }
-
- inline Element &add (Element &x, const Element &y, const Element &z) const
- {
- x = y + z;
- if ( x >= modulus ) x -= modulus;
- return x;
- }
-
- inline Element &sub (Element &x, const Element &y, const Element &z) const
- {
- x = y - z;
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element &mul (Element &x, const Element &y, const Element &z) const
- {
- Element tmp= y*z;
- x= fmodf(tmp, modulus);
- //x= tmp - floor(tmp*inv_modulus)*modulus;
-
- return x;
- }
-
- inline Element &div (Element &x, const Element &y, const Element &z) const
- {
- Element temp;
- inv (temp, z);
- return mul (x, y, temp);
- }
-
- inline Element &neg (Element &x, const Element &y) const
- {
- if(y == 0) return x = 0;
- else return x = modulus - y;
- }
-
- inline Element &inv (Element &x, const Element &y) const
- {
- // The extended Euclidean algoritm
- int x_int, y_int, q, tx, ty, temp;
- x_int = int (modulus);
- y_int = int (y);
- tx = 0;
- ty = 1;
-
- while (y_int != 0) {
- // always: gcd (modulus,residue) = gcd (x_int,y_int)
- // sx*modulus + tx*residue = x_int
- // sy*modulus + ty*residue = y_int
- q = x_int / y_int; // integer quotient
- temp = y_int; y_int = x_int - q * y_int;
- x_int = temp;
- temp = ty; ty = tx - q * ty;
- tx = temp;
- }
-
- if (tx < 0) tx += (int)modulus;
-
- // now x_int = gcd (modulus,residue)
- return x = (Element)tx;
-
-
- }
-
- inline Element &axpy (Element &r,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- Element tmp = a * x + y;
- return r= fmodf(tmp, modulus);
- //return r= tmp- floor(tmp*inv_modulus)*modulus;
-
- }
-
- inline Element &addin (Element &x, const Element &y) const
- {
- x += y;
- if ( x >= modulus ) x -= modulus;
- return x;
- }
-
- inline Element &subin (Element &x, const Element &y) const
- {
- x -= y;
- if (x < 0.) x += modulus;
- return x;
- }
-
- inline Element &mulin (Element &x, const Element &y) const
- {
- return mul(x,x,y);
- }
-
- inline Element &divin (Element &x, const Element &y) const
- {
- return div(x,x,y);
- }
-
- inline Element &negin (Element &x) const
- {
- if (x == 0.) return x;
- else return x = modulus - x;
- }
-
- inline Element &invin (Element &x) const
- {
- return inv (x, x);
- }
-
- inline Element &axpyin (Element &r, const Element &a, const Element &x) const
- {
- Element tmp = r + a * x;
- return r = fmodf(tmp, modulus);
-
- //return r= tmp- floor(tmp*inv_modulus)*modulus;
- }
-
- static inline Element getMaxModulus()
- {
- return 4096.0; // floor( 2^12 )
- // return 1 << (FLT_MANT_DIG >> 1); // 2^(DBL_MANT_DIG/2)
- }
-
- };
-
-} // FFPACK
-
-
-// const float FFPACK::Modular<float>::one = 1UL;
-// const float FFPACK::Modular<float>::zero = 0UL;
-
-
-
-#include "field-general.h"
-
-#endif // __FFLASFFPACK_modular_float_H
diff --git a/fflas-ffpack/field/modular-int32.h b/fflas-ffpack/field/modular-int32.h
deleted file mode 100644
index 241300d..0000000
--- a/fflas-ffpack/field/modular-int32.h
+++ /dev/null
@@ -1,546 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2010 LinBox
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *
- */
-
-/*! @file field/modular-int32.h
- * @ingroup field
- * @brief representation of <code>Z/mZ</code> over \c int32_t .
- */
-#ifndef __FFLASFFPACK_modular_int32_H
-#define __FFLASFFPACK_modular_int32_H
-
-#include <math.h>
-#include <sys/time.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-#include "fflas-ffpack/utils/debug.h"
-
-#ifndef LINBOX_MAX_INT
-#define LINBOX_MAX_INT INT32_MAX
-#endif
-
-// Namespace in which all LinBox code resides
-namespace FFPACK
-{
-
- template< class Element >
- class Modular;
-
- /** \brief Specialization of Modular to int32_t element type with efficient dot product.
- *
- * Efficient element operations for dot product, mul, axpy, by using floating point
- * inverse of modulus (borrowed from NTL) and some use of non-normalized intermediate values.
- *
- * For some uses this is the most efficient field for primes in the range from half word
- * to 2^30.
- *
- * Requires: Modulus < 2^30.
- * Intended use: 2^15 < prime modulus < 2^30.
- * \ingroup field
- * @todo what about this _two64 not so usefull here ?? (but in linbox)
- */
- template <>
- class Modular<int32_t> {
-
- protected:
-
- int32_t modulus;
- double modulusinv;
- unsigned long lmodulus;
- int32_t _two64;
-
-
- public :
- typedef int32_t Element;
- const Element one ;
- const Element zero ;
- const Element mOne ; // can't be const because of operator=
-
- public:
-
-
- static const bool balanced = false ;
- typedef ModularRandIter<Element> RandIter;
- typedef NonzeroRandIter<Modular<Element>, ModularRandIter<Element> > NonZeroRandIter;
-
- //default modular field,taking 65521 as default modulus
- Modular () :
- modulus(65521),lmodulus((unsigned long) modulus)
- ,one(1),zero(0),mOne(modulus -1)
- {
- modulusinv=1/(double)65521;
-
- _two64 = (int32_t) ((uint64_t) (-1) % (uint64_t) 65521);
- _two64 += 1;
- if (_two64 >= 65521) _two64 -= 65521;
- }
-
- Modular (int32_t value, int32_t exp = 1) :
- modulus(value),lmodulus((unsigned long)value)
- ,one(1),zero(0),mOne(modulus -1)
- {
- modulusinv = 1 / ((double) value);
-#ifdef DEBUG
- if(exp != 1) throw Failure(__func__,__FILE__,__LINE__,"exponent must be 1");
- if(value<=1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if(value>getMaxModulus()) {
- std::cerr << value << '>' << getMaxModulus() << std::endl;
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
- }
-#endif
- _two64 = (int32_t) ((uint64_t) (-1) % (uint64_t) value);
- _two64 += 1;
- if (_two64 >= value) _two64 -= value;
- }
-
- Modular (unsigned long int value) :
- modulus((Element) value),lmodulus(value)
- ,one(1),zero(0),mOne(modulus -1)
- {
- modulusinv = 1 / ((double) value);
-#ifdef DEBUG
- if(value<=1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if (value>INT32_MAX) // stupidly big ?
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
- if((Element)value>getMaxModulus()) // we can cast now
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- _two64 = (int32_t) ((uint64_t) (-1) % (uint64_t) value);
- _two64 += 1;
- if ((unsigned long)_two64 >= value)
- _two64 = _two64 - (int32_t) value;
- }
-
- Modular (long int value) :
- modulus((Element) value), lmodulus((unsigned long int)value)
- ,one(1),zero(0),mOne(modulus -1)
- {
- modulusinv = 1 / ((double) value);
-#ifdef DEBUG
- if(value<=1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if (value>INT32_MAX) // stupidly big ?
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
- if((Element)value>getMaxModulus()) // we can cast now
- throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- _two64 = (int32_t) ((uint64_t) (-1) % (uint64_t) value);
- _two64 += 1;
- if ((long int)_two64 >= value)
- _two64 = _two64 - (int32_t) value;
- }
-
-
- Modular(const Modular<int32_t>& mf) :
- modulus(mf.modulus),modulusinv(mf.modulusinv)
- ,lmodulus(mf.lmodulus),_two64(mf._two64)
- ,one(mf.one),zero(mf.zero),mOne(mf.mOne)
- {}
-
- Modular <Element>& assign(const Modular<Element> &F)
- {
- modulus = F.modulus;
- modulusinv = F.modulusinv;
- lmodulus = F.lmodulus;
- _two64 = F._two64;
- //inv_modulus = F.inv_modulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-
-#if 1
- const Modular &operator=(const Modular<int32_t> &F)
- {
- modulus = F.modulus;
- modulusinv = F.modulusinv;
- lmodulus = F.lmodulus;
- _two64 = F._two64;
- //inv_modulus = F.inv_modulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-#endif
-
-
- unsigned long &cardinality (unsigned long &c) const
- {
- return c = lmodulus;
- }
-
- unsigned long &characteristic (unsigned long &c) const
- {
- return c = lmodulus;
- }
-
- unsigned long characteristic () const
- {
- return lmodulus;
- }
-
- unsigned long cardinality () const
- {
- return lmodulus;
- }
-
-
- int32_t &convert (int32_t &x, const Element &y) const
- {
- return x = y;
- }
-
- double &convert (double &x, const Element &y) const
- {
- return x = (double) y;
- }
-
- float &convert (float &x, const Element &y) const
- {
- return x = (float) y;
- }
-
- std::ostream &write (std::ostream &os) const
- {
- return os << "int32_t mod " << modulus;
- }
-
- std::istream &read (std::istream &is)
- {
- is >> modulus;
- modulusinv = 1 /((double) modulus );
-#ifdef DEBUG
- if(modulus <= 1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if(modulus > getMaxModulus()) throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- _two64 = (int32_t) ((uint64_t) (-1) % (uint64_t) modulus);
- _two64 += 1;
- if (_two64 >= modulus) _two64 -= modulus;
-
- return is;
- }
-
- std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << x;
- }
-
- std::istream &read (std::istream &is, Element &x) const
- {
- int32_t tmp;
- is >> tmp;
- init(x,tmp);
- return is;
- }
-
- Element &init (Element & x, const double &y) const
- {
- double z = fmod(y, (double)modulus);
- if (z < 0)
- z += (double)modulus;
- //z += 0.5; // C Pernet Sounds nasty and not necessary
- return x = static_cast<Element>(z); //rounds towards 0
- }
-
- Element &init (Element & x, const float &y) const
- {
- return init(x , (double) y);
- }
-
- template<class Element1>
- Element &init (Element & x, const Element1 &y) const
- {
- x = Element(y) % modulus;
- if (x < 0) x += modulus;
- return x;
- }
-
- Element& init(Element& x, int y =0) const
- {
- x = y % modulus;
- if ( x < 0 ) x += modulus;
- return x;
- }
-
- Element& init(Element& x, long y) const
- {
- x = Element(y % (long)modulus);
- if ( x < 0 ) x += modulus;
- return x;
- }
-
- Element& assign(Element& x, const Element& y) const
- {
- return x = y;
- }
-
-
- bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- bool isZero (const Element &x) const
- {
- return x == 0;
- }
-
- bool isOne (const Element &x) const
- {
- return x == 1;
- }
-
- Element &add (Element &x, const Element &y, const Element &z) const
- {
- x = y + z;
- if ( x >= modulus ) x -= modulus;
- return x;
- }
-
- Element &sub (Element &x, const Element &y, const Element &z) const
- {
- x = y - z;
- if (x < 0)
- x += (Element) modulus;
- return x;
- }
-
- Element &mul (Element &x, const Element &y, const Element &z) const
- {
- int32_t q;
-
- q = (int32_t) ((((double) y)*((double) z)) * modulusinv); // q could be off by (+/-) 1
- x = (int32_t) (y*z - q*modulus);
-
-
- if (x >= modulus)
- x -= (Element) modulus;
- else if (x < 0)
- x += (Element) modulus;
-
- return x;
- }
-
- Element &div (Element &x, const Element &y, const Element &z) const
- {
- FFLASFFPACK_check(!isZero(z));
- Element temp;
- inv (temp, z);
- return mul (x, y, temp);
- }
-
- Element &neg (Element &x, const Element &y) const
- {
- if(y == 0) return x=0;
- else return x = modulus-y;
- }
-
- Element &inv (Element &x, const Element &y) const
- {
- FFLASFFPACK_check(!isZero(y));
- int32_t d, t;
- XGCD(d, x, t, y, modulus);
- if (d != 1)
- {
-#ifdef DEBUG
- throw Failure(__func__,__FILE__,__LINE__,"InvMod: Input is not invertible ");
-#endif
- }
- if (x < 0)
- x += modulus;
- return x;
-
- }
-
- Element &axpy (Element &r,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- int32_t q;
-
- q = (int32_t) (((((double) a) * ((double) x)) + (double)y) * modulusinv); // q could be off by (+/-) 1
- r = (int32_t) (a * x + y - q*modulus);
-
-
- if (r >= modulus)
- r -= modulus;
- else if (r < 0)
- r += modulus;
-
- return r;
-
- }
-
- Element &addin (Element &x, const Element &y) const
- {
- x += y;
- if ( x >= modulus ) x -= modulus;
- return x;
- }
-
- Element &subin (Element &x, const Element &y) const
- {
- x -= y;
- if (x < 0) x += modulus;
- return x;
- }
-
- Element &mulin (Element &x, const Element &y) const
- {
- return mul(x,x,y);
- }
-
- Element &divin (Element &x, const Element &y) const
- {
- return div(x,x,y);
- }
-
- Element &negin (Element &x) const
- {
- if (x == 0) return x;
- else return x = modulus - x;
- }
-
- Element &invin (Element &x) const
- {
- FFLASFFPACK_check(!isZero(x));
- return inv (x, x);
- }
-
- Element &axpyin (Element &r, const Element &a, const Element &x) const
- {
- int32_t q;
-
- q = (int32_t) (((((double) a) * ((double) x)) + (double) r) * modulusinv); // q could be off by (+/-) 1
- r = (int32_t) (a * x + r - q*modulus);
-
-
- if (r >= modulus)
- r -= modulus;
- else if (r < 0)
- r += modulus;
-
- return r;
- }
-
- unsigned long AccBound(const Element&r) const
- {
- // Element one, zero ;
- // init(one,1UL) ;
- // init(zero,0UL);
- double max_double = (double) (INT32_MAX) - modulus ;
- double p = modulus-1 ;
- if (areEqual(zero,r))
- return (unsigned long) (max_double/p) ;
- else if (areEqual(one,r))
- {
- if (modulus>= getMaxModulus())
- return 0 ;
- else
- return (unsigned long) max_double/(unsigned long)(modulus*modulus) ;
- } else
- throw "Bad input, expecting 0 or 1";
- return 0;
- }
-
-
- static int32_t getMaxModulus()
- {
- // return INT32_MAX ; // 2^31-1
- return 1073741824;// 2^30
- // return 46341 ;
- }
-
- private:
-
- static void XGCD(int32_t& d, int32_t& s, int32_t& t, int32_t a, int32_t b)
- {
- int32_t u, v, u0, v0, u1, v1, u2, v2, q, r;
-
- int32_t aneg = 0, bneg = 0;
-
- if (a < 0)
- {
-#ifdef DEBUG
- if (a < -LINBOX_MAX_INT) throw Failure(__func__,__FILE__,__LINE__,"XGCD: integer overflow");
-#endif
- a = -a;
- aneg = 1;
- }
-
- if (b < 0)
- {
-#ifdef DEBUG
- if (b < -LINBOX_MAX_INT) throw Failure(__func__,__FILE__,__LINE__,"XGCD: integer overflow");
-#endif
- b = -b;
- bneg = 1;
- }
-
- u1 = 1; v1 = 0;
- u2 = 0; v2 = 1;
- u = a; v = b;
-
- while (v != 0)
- {
- q = u / v;
- r = u % v;
- u = v;
- v = r;
- u0 = u2;
- v0 = v2;
- u2 = u1 - q*u2;
- v2 = v1- q*v2;
- u1 = u0;
- v1 = v0;
- }
-
- if (aneg)
- u1 = -u1;
-
- if (bneg)
- v1 = -v1;
-
- d = u;
- s = u1;
- t = v1;
- }
-
- };
-
-
-}
-
-// const int32_t FFPACK::Modular<int32_t>::one = 1UL;
-// const int32_t FFPACK::Modular<int32_t>::zero = 0UL;
-
-
-
-
-#include "field-general.h"
-
-#endif //__LINBOX_modular_int32_H
-
diff --git a/fflas-ffpack/field/modular-int64.h b/fflas-ffpack/field/modular-int64.h
deleted file mode 100644
index 4431920..0000000
--- a/fflas-ffpack/field/modular-int64.h
+++ /dev/null
@@ -1,482 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2010 LinBox
- * Adapted by B Boyer <brice.boyer at imag.fr>
- * (from other modular-balanced* files)
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *
- */
-
-/*! @file field/modular-int64.h
- * @ingroup field
- * @brief representation of <code>Z/mZ</code> over \c int64_t .
- */
-#ifndef __FFLASFFPACK_modular_int64_H
-#define __FFLASFFPACK_modular_int64_H
-
-#include <math.h>
-#include <sys/time.h>
-#include "fflas-ffpack/field/modular-randiter.h"
-#include "fflas-ffpack/field/nonzero-randiter.h"
-
-
-#ifndef LINBOX_MAX_INT64
-#ifdef __x86_64__
-#define LINBOX_MAX_INT64 INT64_MAX
-#else
-#define LINBOX_MAX_INT64 INT64_MAX
-#endif
-#endif
-
-// Namespace in which all LinBox code resides
-namespace FFPACK
-{
-
- template< class Element >
- class Modular;
-
- /** \brief Specialization of Modular to int64_t element type with efficient dot product.
- *
- * Efficient element operations for dot product, mul, axpy, by using floating point
- * inverse of modulus (borrowed from NTL) and some use of non-normalized intermediate values.
- *
- * For some uses this is the most efficient field for primes in the range from half word
- * to 2^62.
- *
- * Requires: Modulus < 2^62.
- * Intended use: 2^30 < prime modulus < 2^62.
- \ingroup field
- */
- template <>
- class Modular<int64_t> {
-
- protected:
-
- int64_t modulus;
- double modulusinv;
- unsigned long lmodulus ;
- int64_t _two64 ;
-
- public:
-
- typedef int64_t Element;
- const Element one ;
- const Element zero ;
- const Element mOne ;
-
-
- typedef ModularRandIter<int64_t> RandIter;
-
- static const bool balanced = false ;
-
- //default modular field,taking 65521 as default modulus
- Modular () :
- modulus(65521),lmodulus((unsigned long)modulus)
- ,one(1),zero(0),mOne(modulus -1)
- {
- modulusinv=1/(double)65521;
- _two64 = (int64_t) ((uint64_t) (-1) % (uint64_t) 65521);
- _two64 += 1;
- if (_two64 >= 65521) _two64 -= 65521;
-
- }
-
- Modular (int64_t value, int64_t exp = 1) :
- modulus(value),lmodulus((unsigned long)modulus)
- ,one(1),zero(0),mOne(modulus -1)
- {
- modulusinv = 1 / ((double) value);
-#ifdef DEBUG
- if(exp != 1) throw Failure(__func__,__FILE__,__LINE__,"exponent must be 1");
- if(value<=1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if( value > getMaxModulus()) throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
- _two64 = (int64_t) ((uint64_t) (-1) % (uint64_t) value);
- _two64 += 1;
- if (_two64 >= value) _two64 -= value;
-
- }
-
- Modular(const Modular<int64_t>& mf) :
- modulus(mf.modulus),modulusinv(mf.modulusinv),lmodulus((unsigned long)modulus),_two64(mf._two64)
- ,one(mf.one),zero(mf.zero),mOne(mf.mOne)
- {}
-
- Modular<Element> & assign(const Modular<Element> &F)
- {
- modulus = F.modulus;
- modulusinv = F.modulusinv;
- lmodulus = F.lmodulus;
- _two64 = F._two64;
- //inv_modulus = F.inv_modulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-
-#if 1
- const Modular &operator=(const Modular<int64_t> &F)
- {
- modulus = F.modulus;
- modulusinv = F.modulusinv;
- lmodulus = F.lmodulus;
- _two64 = F._two64;
- //inv_modulus = F.inv_modulus;
- F.assign(const_cast<Element&>(one),F.one);
- F.assign(const_cast<Element&>(zero),F.zero);
- F.assign(const_cast<Element&>(mOne),F.mOne);
- return *this;
- }
-#endif
-
- inline unsigned long &cardinality ( unsigned long &c) const
- {
- return c = lmodulus;
- }
-
- inline unsigned long &characteristic (unsigned long &c) const
- {
- return c = lmodulus;
- }
-
- inline unsigned long characteristic () const
- {
- return lmodulus;
- }
-
- inline unsigned long cardinality () const
- {
- return lmodulus;
- }
-
-
- inline int64_t &convert (int64_t &x, const Element &y) const
- {
- return x = y;
- }
-
- inline double &convert (double &x, const Element &y) const
- {
- return x = (double) y;
- }
-
- inline float &convert (float &x, const Element &y) const
- {
- return x = (float) y;
- }
-
- inline std::ostream &write (std::ostream &os) const
- {
- return os << "int64_t mod " << modulus;
- }
-
- inline std::istream &read (std::istream &is)
- {
- is >> modulus;
- modulusinv = 1 /((double) modulus );
-#ifdef DEBUG
- if(modulus <= 1) throw Failure(__func__,__FILE__,__LINE__,"modulus must be > 1");
- if(modulus > getMaxModulus()) throw Failure(__func__,__FILE__,__LINE__,"modulus is too big");
-#endif
-
- return is;
- }
-
- inline std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << x;
- }
-
- inline std::istream &read (std::istream &is, Element &x) const
- {
- int64_t tmp;
- is >> tmp;
- init(x,tmp);
- return is;
- }
-
- inline Element &init (Element & x, const double &y) const
- {
- double z = fmod(y, (double)modulus);
- if (z < 0) z += (double)modulus;
- //z += 0.5; // C Pernet Sounds nasty and not necessary
- return x = static_cast<long>(z); //rounds towards 0
- }
-
- inline Element &init (Element & x, const float &y) const
- {
- return init(x , (double) y);
- }
-
- template<class Element1>
- inline Element &init (Element & x, const Element1 &y) const
- {
- x = y % modulus;
- if (x < 0) x += modulus;
- return x;
- }
-
-
- inline Element& init(Element& x, int y =0) const
- {
- x = y % modulus;
- if ( x < 0 ) x += modulus;
- return x;
- }
-
- inline Element& init(Element& x, long y) const
- {
- x = y % modulus;
- if ( x < 0 ) x += modulus;
- return x;
- }
-
- inline Element& assign(Element& x, const Element& y) const
- {
- return x = y;
- }
-
- inline bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- inline bool isZero (const Element &x) const
- {
- return x == 0;
- }
-
- inline bool isOne (const Element &x) const
- {
- return x == 1;
- }
-
- inline Element &add (Element &x, const Element &y, const Element &z) const
- {
- x = y + z;
- if ( x >= modulus ) x -= modulus;
- return x;
- }
-
- inline Element &sub (Element &x, const Element &y, const Element &z) const
- {
- x = y - z;
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element &mul (Element &x, const Element &y, const Element &z) const
- {
- int64_t q;
-
- q = (int64_t) ((((double) y)*((double) z)) * modulusinv); // q could be off by (+/-) 1
- x = (int64_t) (y*z - q*modulus);
-
-
- if (x >= modulus)
- x -= modulus;
- else if (x < 0)
- x += modulus;
-
- return x;
- }
-
- inline Element &div (Element &x, const Element &y, const Element &z) const
- {
- Element temp;
- inv (temp, z);
- return mul (x, y, temp);
- }
-
- inline Element &neg (Element &x, const Element &y) const
- {
- if(y == 0) return x=0;
- else return x = modulus-y;
- }
-
- inline Element &inv (Element &x, const Element &y) const
- {
- int64_t d, t;
- XGCD(d, x, t, y, modulus);
- if (d != 1)
- {
-#ifdef DEBUG
- throw Failure(__func__,__FILE__,__LINE__,"InvMod: Input is not invertible ");
-#endif
- }
- if (x < 0)
- x += modulus;
- return x;
-
- }
-
- inline Element &axpy (Element &r,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- int64_t q;
-
- q = (int64_t) (((((double) a) * ((double) x)) + (double)y) * modulusinv); // q could be off by (+/-) 1
- r = (int64_t) (a * x + y - q*modulus);
-
-
- if (r >= modulus)
- r -= modulus;
- else if (r < 0)
- r += modulus;
-
- return r;
-
- }
-
- inline Element &addin (Element &x, const Element &y) const
- {
- x += y;
- if ( x >= modulus ) x -= modulus;
- return x;
- }
-
- inline Element &subin (Element &x, const Element &y) const
- {
- x -= y;
- if (x < 0) x += modulus;
- return x;
- }
-
- inline Element &mulin (Element &x, const Element &y) const
- {
- return mul(x,x,y);
- }
-
- inline Element &divin (Element &x, const Element &y) const
- {
- return div(x,x,y);
- }
-
- inline Element &negin (Element &x) const
- {
- if (x == 0) return x;
- else return x = modulus - x;
- }
-
- inline Element &invin (Element &x) const
- {
- return inv (x, x);
- }
-
- inline Element &axpyin (Element &r, const Element &a, const Element &x) const
- {
- int64_t q;
-
- q = (int64_t) (((((double) a) * ((double) x)) + (double) r) * modulusinv); // q could be off by (+/-) 1
- r = (int64_t) (a * x + r - q*modulus);
-
-
- if (r >= modulus)
- r -= modulus;
- else if (r < 0)
- r += modulus;
-
- return r;
- }
-
- static inline int64_t getMaxModulus()
- {
-#if 1
-#ifdef __x86_64__
- return 4611686018427387904L; // 2^62 in long long
-#else
- return 4611686018427387904LL; // 2^62 in long
-#endif
-#endif
- // return 1 << 31 ;
- // return 4294967296 ;
- }
-
- private:
-
- static void XGCD(int64_t& d, int64_t& s, int64_t& t, int64_t a, int64_t b)
- {
- int64_t u, v, u0, v0, u1, v1, u2, v2, q, r;
-
- int64_t aneg = 0, bneg = 0;
-
- if (a < 0)
- {
-#ifdef DEBUG
- if (a < -LINBOX_MAX_INT)
- throw Failure(__func__,__FILE__,__LINE__,"XGCD: integer overflow");
-#endif
- a = -a;
- aneg = 1;
- }
-
- if (b < 0)
- {
-#ifdef DEBUG
- if (b < -LINBOX_MAX_INT) throw
- Failure(__func__,__FILE__,__LINE__,"XGCD: integer overflow");
-#endif
- b = -b;
- bneg = 1;
- }
-
- u1 = 1; v1 = 0;
- u2 = 0; v2 = 1;
- u = a; v = b;
-
- while (v != 0)
- {
- q = u / v;
- r = u % v;
- u = v;
- v = r;
- u0 = u2;
- v0 = v2;
- u2 = u1 - q*u2;
- v2 = v1- q*v2;
- u1 = u0;
- v1 = v0;
- }
-
- if (aneg)
- u1 = -u1;
-
- if (bneg)
- v1 = -v1;
-
- d = u;
- s = u1;
- t = v1;
- }
-
- };
-
-
-}
-
-#undef LINBOX_MAX_INT64
-
-#include "field-general.h"
-
-#endif //__LINBOX_modular_int64_H
-
diff --git a/fflas-ffpack/field/modular-randiter.h b/fflas-ffpack/field/modular-randiter.h
deleted file mode 100644
index d38e9e7..0000000
--- a/fflas-ffpack/field/modular-randiter.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* field/modular-randiter.h
- * Copyright (C) 2008 Clement Pernet
- *
- * Written by Clement Pernet <clement.pernet at gmail.com>
- *
- * ------------------------------------
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-#ifndef __FFLASFFPACK_modular_randiter_H
-#define __FFLASFFPACK_modular_randiter_H
-
-#include <sys/time.h>
-#include <stdlib.h>
-#include <limits>
-
-namespace FFPACK {
- template< class Element >
- class Modular;
-
- template <class Element>
- class ModularRandIter {
- public:
- ModularRandIter (const Modular<Element> &F) :
- _F(F)
- {
- struct timeval tp;
- gettimeofday(&tp, 0) ;
- long _seed = (long)(tp.tv_usec);
- srand48(_seed);
- }
-
- ModularRandIter (const ModularRandIter<Element> &R) :
- _F (R._F)
- {}
-
- /*! @bug not so random... (at all) */
- Element &random (Element &a) const
- {
- return _F.init(a,(double)(lrand48()-std::numeric_limits<long>::max()));
- }
-
- private:
- Modular<Element> _F;
-
- };
-
- template <class T>
- class ModularBalanced;
-
- template <class Element>
- class ModularBalancedRandIter {
- public:
- ModularBalancedRandIter (const ModularBalanced<Element> &F):_F(F){}
- ModularBalancedRandIter (const ModularBalancedRandIter<Element> &R) :
- _F (R._F)
- {}
- Element &random (Element &a) const
- {
- return _F.init(a,(double)rand());
- }
- private:
- ModularBalanced<Element> _F;
-
- };
-
-} // FFPACK
-
-#endif // __FFLASFFPACK_modular_randiter_H
diff --git a/fflas-ffpack/field/nonzero-randiter.h b/fflas-ffpack/field/nonzero-randiter.h
deleted file mode 100644
index d4492ff..0000000
--- a/fflas-ffpack/field/nonzero-randiter.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* fflas-ffpack/nonzero-randiter.h
- * Copyright (C) 2001-2002 Bradford Hovinen
- * 2008 Clement Pernet
- * Written by William J Turner <wjturner at math.ncsu.edu>,
- * Bradford Hovinen <hovinen at cis.udel.edu>
- * Clement Pernet <clement.pernet at gmail.com>
- *
- * taken for LinBox
- *
- * ------------------------------------
- *
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-#ifndef __NONZERO_RANDITER_H
-#define __NONZERO_RANDITER_H
-
-#include <sys/time.h>
-#include <stdlib.h>
-
-#include <string>
-
-namespace FFPACK {
-/** Random iterator for nonzero random numbers
- *
- * Wraps around an existing random iterator and ensures that the output
- * is entirely nonzero numbers.
- **/
-template <class Field, class RandIter = typename Field::RandIter>
-class NonzeroRandIter
-{
-public:
-
- typedef typename Field::Element Element;
-
- NonzeroRandIter (const Field &F, const RandIter &r)
- : _F (F), _r (r)
- {}
-
- NonzeroRandIter (const NonzeroRandIter& R)
- : _F (R._F), _r (R._r) {}
-
- ~NonzeroRandIter()
- {}
-
- NonzeroRandIter& operator=(const NonzeroRandIter& R)
- {
- if (this != &R) { // guard against self-assignment
- _F = R._F;
- _r = R._r;
- }
-
- return *this;
- }
-
- Element &random (Element &a) const
- {
- do _r.random (a); while (_F.isZero (a));
- return a;
- }
-
-private:
-
- Field _F;
- RandIter _r;
-
-}; // class NonzeroRandIter
-
-} // FFPACK
-
-#endif // __NONZERO_RANDITER_H
diff --git a/fflas-ffpack/field/rns-double-elt.h b/fflas-ffpack/field/rns-double-elt.h
new file mode 100644
index 0000000..6a34b99
--- /dev/null
+++ b/fflas-ffpack/field/rns-double-elt.h
@@ -0,0 +1,143 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/*! @file field/rns-double-elt.h
+ * @ingroup field
+ * @brief rns elt structure with double support
+ */
+
+
+#ifndef __FFLASFFPACK_field_rns_double_elt_INL
+#define __FFLASFFPACK_field_rns_double_elt_INL
+
+#include "fflas-ffpack/utils/fflas_memory.h"
+#include "fflas-ffpack/utils/cast.h"
+
+namespace FFPACK {
+
+ // forward declaration
+ struct rns_double_elt_ptr;
+ struct rns_double_elt_cstptr;
+
+ // element of the rns structure (allow virtualization of element from an array of double)
+ struct rns_double_elt {
+ double *_ptr;
+ size_t _stride;
+ bool _alloc; // specify wether Element owns its memory; alloc is true only through F.init() and _ptr==NULL (this is to handle Element allocated within a matrix)
+ rns_double_elt(): _ptr(NULL), _stride(0), _alloc(false) {}
+ ~rns_double_elt(){ if (_alloc) FFLAS::fflas_delete(_ptr);}
+ rns_double_elt(double* p, size_t r, size_t a=false) : _ptr(p), _stride(r), _alloc(a) {}
+ inline rns_double_elt_ptr operator&() ;
+ inline rns_double_elt_cstptr operator&()const ;
+ rns_double_elt(const rns_double_elt& x) : _ptr(x._ptr),_stride(x._stride),_alloc(false) {}
+ };
+
+ // pointer to element of the rns structure (allow virtualization of element from an array of double)
+ struct rns_double_elt_ptr : public rns_double_elt {
+ rns_double_elt other;
+ rns_double_elt_ptr(){}
+ rns_double_elt_ptr(double* p, size_t r) : rns_double_elt(p,r,false){}
+ rns_double_elt_ptr(const rns_double_elt_ptr &x) : rns_double_elt(x._ptr,x._stride,false){}
+ rns_double_elt_ptr(const rns_double_elt_cstptr &x);
+ rns_double_elt_ptr(rns_double_elt_ptr &&)=default;
+ //inline operator rns_double_elt_cstptr();
+ inline rns_double_elt_ptr* operator&(){return this;}
+ inline rns_double_elt& operator*() {return static_cast<rns_double_elt&>(*this);}
+ inline rns_double_elt operator[](size_t i) const {return rns_double_elt(_ptr+i,_stride);} // BUGGY
+ inline rns_double_elt& operator[](size_t i) {other=rns_double_elt(_ptr+i,_stride);return other;} // BUGGY
+ inline rns_double_elt_ptr operator++() {return rns_double_elt_ptr(_ptr++,_stride);}
+ inline rns_double_elt_ptr operator--() {return rns_double_elt_ptr(_ptr--,_stride);}
+ inline rns_double_elt_ptr operator+(size_t inc) {return rns_double_elt_ptr(_ptr+inc,_stride);}
+ inline rns_double_elt_ptr operator-(size_t inc) {return rns_double_elt_ptr(_ptr-inc,_stride);}
+ inline rns_double_elt_ptr& operator+=(size_t inc) {_ptr+=inc;return *this;}
+ inline rns_double_elt_ptr& operator-=(size_t inc) {_ptr-=inc;return *this;}
+ inline rns_double_elt_ptr& operator=(const rns_double_elt_ptr& x);
+ bool operator< (const rns_double_elt_ptr& x) {return _ptr < x._ptr;}
+ bool operator!= (const rns_double_elt_ptr& x) {return _ptr != x._ptr;}
+ };
+ struct rns_double_elt_cstptr : public rns_double_elt {
+ rns_double_elt other;
+ rns_double_elt_cstptr(){}
+ rns_double_elt_cstptr(double* p, size_t r) : rns_double_elt(p,r,false){}
+ rns_double_elt_cstptr(const rns_double_elt_ptr& x) : rns_double_elt(x._ptr,x._stride,false){}
+ rns_double_elt_cstptr(const rns_double_elt_cstptr& x) : rns_double_elt(x._ptr,x._stride,false){}
+ rns_double_elt_cstptr(rns_double_elt_cstptr &&)=default;
+ inline rns_double_elt_cstptr* operator&(){return this;}
+ inline rns_double_elt& operator*() const {
+ return *const_cast<rns_double_elt*>(static_cast<const rns_double_elt*>(this));
+ }
+ inline rns_double_elt operator[](size_t i)const {return rns_double_elt(_ptr+i,_stride);}
+ inline rns_double_elt& operator[](size_t i) {other=rns_double_elt(_ptr+i,_stride);return other;} // BUGGY
+
+ //inline rns_double_elt& operator[](size_t i)const {return *((*this)+i);}// BUGGY
+ inline rns_double_elt_cstptr operator++() {return rns_double_elt_cstptr(_ptr++,_stride);}
+ inline rns_double_elt_cstptr operator--() {return rns_double_elt_cstptr(_ptr--,_stride);}
+ inline rns_double_elt_cstptr operator+(size_t inc)const {return rns_double_elt_cstptr(_ptr+inc,_stride);}
+ inline rns_double_elt_cstptr operator-(size_t inc)const {return rns_double_elt_cstptr(_ptr-inc,_stride);}
+ inline rns_double_elt_cstptr& operator+=(size_t inc) {_ptr+=inc;return *this;}
+ inline rns_double_elt_cstptr& operator-=(size_t inc) {_ptr-=inc;return *this;}
+ inline rns_double_elt_cstptr& operator=(const rns_double_elt_cstptr& x);
+ bool operator< (const rns_double_elt_cstptr& x) {return _ptr < x._ptr;}
+ bool operator!= (const rns_double_elt_cstptr& x) {return _ptr != x._ptr;}
+ };
+
+ inline rns_double_elt_ptr& rns_double_elt_ptr::operator=(const rns_double_elt_ptr& x) {
+ if (this != &x){
+ if (_alloc) FFLAS::fflas_delete(_ptr);
+ _ptr= x._ptr;
+ _stride=x._stride;
+ _alloc=false;
+ }
+ return *this;
+ }
+ inline rns_double_elt_cstptr& rns_double_elt_cstptr::operator=(const rns_double_elt_cstptr& x) {
+ if (this != &x){
+ if (_alloc) FFLAS::fflas_delete(_ptr);
+ _ptr= x._ptr;
+ _stride=x._stride;
+ _alloc=false;
+ }
+ return *this;
+ }
+
+ inline rns_double_elt_ptr::rns_double_elt_ptr(const rns_double_elt_cstptr &x)
+ : rns_double_elt(x._ptr,x._stride,false){}
+ //inline rns_double_elt_ptr::operator rns_double_elt_cstptr(){return rns_double_elt_cstptr(_ptr,_stride);}
+ inline rns_double_elt_ptr rns_double_elt::operator&() {return rns_double_elt_ptr(_ptr,_stride);}
+ inline rns_double_elt_cstptr rns_double_elt::operator&() const {return rns_double_elt_cstptr(_ptr,_stride);}
+
+
+ template<>
+ inline rns_double_elt_ptr fflas_const_cast (rns_double_elt_cstptr x){return x;}
+ template<>
+ inline rns_double_elt_cstptr fflas_const_cast (rns_double_elt_ptr x){return x;}
+
+
+} // end namespace FFPACK:
+
+#endif // __FFLASFFPACK_field_rns_double_elt_INL
diff --git a/fflas-ffpack/field/rns-double.h b/fflas-ffpack/field/rns-double.h
new file mode 100644
index 0000000..b99a704
--- /dev/null
+++ b/fflas-ffpack/field/rns-double.h
@@ -0,0 +1,421 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/*! @file field/rns-double.h
+ * @ingroup field
+ * @brief rns structure with double support
+ */
+
+#ifndef __FFPACK_rns_double_H
+#define __FFPACK_rns_double_H
+
+// Bigger multiple of s lesser or equal than x, s must be a power of two
+#ifndef ROUND_DOWN
+#define ROUND_DOWN(x, s) ((x) & ~((s)-1))
+#endif
+
+#include <vector>
+#include <givaro/modular-double.h>
+#include <givaro/givinteger.h>
+#include <givaro/givintprime.h>
+#include <recint/ruint.h>
+#include "fflas-ffpack/config-blas.h"
+#include "fflas-ffpack/utils/fflas_memory.h"
+#include "fflas-ffpack/utils/align-allocator.h"
+#include "fflas-ffpack/field/modular-extended.h"
+#include "fflas-ffpack/field/rns-double-elt.h"
+
+namespace FFPACK {
+
+ /* Structure that handles rns representation given a bound and bitsize for prime moduli
+ * support sign representation (i.e. the bound must be twice larger then ||A||)
+ */
+ struct rns_double {
+ typedef Givaro::Integer integer;
+ typedef Givaro::Modular<double> ModField;
+
+ std::vector<double, AlignedAllocator<double, Alignment::CACHE_LINE>> _basis; // the rns moduli (mi)
+ std::vector<double, AlignedAllocator<double, Alignment::CACHE_LINE>> _basisMax; // (mi-1)
+ std::vector<double, AlignedAllocator<double, Alignment::CACHE_LINE>> _negbasis; // (-mi)
+ std::vector<double, AlignedAllocator<double, Alignment::CACHE_LINE>> _invbasis; // the inverse of rns moduli (1/mi)
+ std::vector<ModField> _field_rns; // the associated prime field for each mi
+ integer _M; // the product of the mi's
+ std::vector<integer> _Mi; // _M/mi
+ std::vector<double> _MMi; // (_Mi)^(-1) mod mi
+ std::vector<double> _crt_in; // 2^(16*j) mod mi
+ std::vector<double> _crt_out; // (_Mi._MMi) written in base 2^16
+ size_t _size; // the size of the rns basis (number of mi's)
+ size_t _pbits; // the size in bit of the mi's
+ size_t _ldm; // log[2^16](_M)
+
+ typedef double BasisElement;
+ typedef rns_double_elt Element;
+ typedef rns_double_elt_ptr Element_ptr;
+ typedef rns_double_elt_cstptr ConstElement_ptr;
+
+ rns_double(const integer& bound, size_t pbits, bool rnsmod=false, long seed=time(NULL))
+ : _M(1), _size(0), _pbits(pbits)
+ {
+ integer::seeding(seed);
+ Givaro::IntPrimeDom IPD;
+ integer prime;
+ integer sum=1;
+ while (_M < bound*sum) {
+ _basis.resize(_size+1);
+ do {
+ integer::random_exact_2exp(prime, _pbits-1);
+ IPD.nextprimein(prime);
+ } while (_M%prime == 0);
+ _basis[_size]=prime;
+ _size++;
+ _M*=prime;
+ if (rnsmod) sum+=prime;
+ }
+ precompute_cst();
+ }
+
+ rns_double(size_t pbits, size_t size, long seed=time(NULL))
+ : _M(1), _size(size), _pbits(pbits)
+ {
+ integer::seeding(seed);
+ Givaro::IntPrimeDom IPD;
+ integer prime;
+ integer sum=1;
+ _basis.resize(size);
+ _negbasis.resize(size);
+ _basisMax.resize(size);
+ for(size_t i = 0 ; i < _size ; ++i){
+ integer::random_exact_2exp(prime, _pbits-1);
+ IPD.nextprimein(prime);
+ _basis[i]=prime;
+ _basisMax[i] = prime-1;
+ _negbasis[i] = 0-prime;
+ _M*=prime;
+ }
+ precompute_cst();
+ }
+
+ template<typename Vect>
+ rns_double(const Vect& basis, bool rnsmod=false, long seed=time(NULL))
+ : _basis(basis.begin(),basis.end()), _basisMax(basis.size()), _negbasis(basis.size()), _M(1), _size(basis.size()), _pbits(0)
+ {
+ for(size_t i=0;i<_size;i++){
+ //std::cout<<"basis["<<i<<"]="<<_basis[i]<<std::endl;
+ _M*=_basis[i];
+ _pbits=std::max(_pbits, integer(_basis[i]).bitsize());
+ }
+ //std::cout<<"M="<<_M<<std::endl;
+ precompute_cst();
+ }
+
+ // can force to reduce integer entries larger than M
+ void precompute_cst(size_t K=0){
+ if (K!=0)
+ _ldm=K;
+ else
+ _ldm = (_M.bitsize()/16) + ((_M.bitsize()%16)?1:0) ;
+ _invbasis.resize(_size);
+ _field_rns.resize(_size);
+ _Mi.resize(_size);
+ _MMi.resize(_size);
+ _basisMax.resize(_size);
+ _negbasis.resize(_size);
+ _crt_in.resize(_size*_ldm);
+ _crt_out.resize(_size*_ldm);
+ //const unsigned int MASK=0xFFFF;
+ //Givaro::Timer chrono;
+ //double t1=0.,t2=0.,t3=0.;
+
+ for (size_t i=0;i<_size;i++){
+ //chrono.start();
+ _invbasis[i] = 1./_basis[i];
+ _basisMax[i] = _basis[i]-1;
+ _negbasis[i] = 0-_basis[i];
+ _field_rns[i] = ModField(_basis[i]);
+ _Mi[i] = _M/(uint64_t)_basis[i];
+ _field_rns[i].init(_MMi[i], _Mi[i] % (double)_basis[i]);
+ _field_rns[i].invin(_MMi[i]);
+ integer tmp= _Mi[i]*(uint64_t)_MMi[i];
+ const mpz_t* m0 = reinterpret_cast<const mpz_t*>(&tmp);
+ const uint16_t* m0_ptr = reinterpret_cast<const uint16_t*>(m0[0]->_mp_d);
+ size_t maxs=std::min(_ldm,(tmp.size())*sizeof(mp_limb_t)/2);// to ensure 32 bits portability
+ //chrono.stop();
+ //t1+=chrono.usertime();
+ //chrono.start();
+ /*
+ for(size_t j=0;j<_ldm;j++){
+ _crt_out[j+i*_ldm]=double(tmp[0]&MASK);
+ tmp>>=16; // Bad idea -> too slow (must get the lowest limb of the integer)
+
+ }
+ */
+ size_t l=0;
+ for(;l<maxs;l++)
+ _crt_out[l+i*_ldm]=m0_ptr[l];
+ for(;l<_ldm;l++)
+ _crt_out[l+i*_ldm]=0.;;
+ // chrono.stop();
+ // t2+=chrono.usertime();
+ // chrono.start();
+ double beta=double(1<<16);
+ double acc=1;
+ for(size_t j=0;j<_ldm;j++){
+ _crt_in[j+i*_ldm]=acc;
+ _field_rns[i].mulin(acc,beta);
+ }
+ // chrono.stop();
+ // t3+=chrono.usertime();
+
+ }
+ // std::cout<<"t1="<<t1<<std::endl;
+ // std::cout<<"t2="<<t2<<std::endl;
+ // std::cout<<"t3="<<t3<<std::endl;
+ }
+
+ // Arns must be an array of m*n*_size
+ // abs(||A||) <= maxA
+ template<typename T>
+ void init(size_t m, size_t n, double* Arns, size_t rda, const T* A, size_t lda,
+ const integer& maxA, bool RNS_MAJOR=false) const
+ {
+ init(m,n,Arns,rda,A,lda, maxA.bitsize()/16 + (maxA.bitsize()%16?1:0),RNS_MAJOR);
+ }
+
+ void init(size_t m, size_t n, double* Arns, size_t rda, const integer* A, size_t lda, size_t k, bool RNS_MAJOR=false) const;
+ void init_transpose(size_t m, size_t n, double* Arns, size_t rda, const integer* A, size_t lda, size_t k, bool RNS_MAJOR=false) const;
+ void convert(size_t m, size_t n, integer gamma, integer* A, size_t lda, const double* Arns, size_t rda, bool RNS_MAJOR=false) const;
+ void convert_transpose(size_t m, size_t n, integer gamma, integer* A, size_t lda, const double* Arns, size_t rda, bool RNS_MAJOR=false) const;
+
+ // reduce entries of Arns to be less than the rns basis elements
+ void reduce(size_t n, double* Arns, size_t rda, bool RNS_MAJOR=false) const;
+
+ template<size_t K>
+ void init(size_t m, size_t n, double* Arns, size_t rda, const RecInt::ruint<K>* A, size_t lda, size_t k, bool RNS_MAJOR=false) const;
+ template<size_t K>
+ void convert(size_t m, size_t n, integer gamma, RecInt::ruint<K>* A, size_t lda, const double* Arns, size_t rda, bool RNS_MAJOR=false) const;
+
+
+ }; // end of struct rns_double
+
+ /* Structure that handles rns representation given a bound and bitsize for prime moduli, allow large moduli
+ * support sign representation (i.e. the bound must be twice larger then ||A||)
+ */
+ struct rns_double_extended {
+ typedef Givaro::Integer integer;
+ typedef Givaro::ModularExtended<double> ModField;
+
+ std::vector<double, AlignedAllocator<double, Alignment::CACHE_LINE>> _basis; // the rns moduli (mi)
+ std::vector<double, AlignedAllocator<double, Alignment::CACHE_LINE>> _basisMax; // (mi-1)
+ std::vector<double, AlignedAllocator<double, Alignment::CACHE_LINE>> _negbasis; // (-mi)
+ std::vector<double, AlignedAllocator<double, Alignment::CACHE_LINE>> _invbasis; // the inverse of rns moduli (1/mi)
+ std::vector<ModField> _field_rns; // the associated prime field for each mi
+ integer _M; // the product of the mi's
+ std::vector<integer> _Mi; // _M/mi
+ std::vector<double> _MMi; // (_Mi)^(-1) mod mi
+ std::vector<double> _crt_in; // 2^(16*j) mod mi
+ std::vector<double> _crt_out; // (_Mi._MMi) written in base 2^16
+ size_t _size; // the size of the rns basis (number of mi's)
+ size_t _pbits; // the size in bit of the mi's
+ size_t _ldm; // log[2^16](_M)
+
+ typedef double BasisElement;
+ typedef rns_double_elt Element;
+ typedef rns_double_elt_ptr Element_ptr;
+ typedef rns_double_elt_cstptr ConstElement_ptr;
+
+ rns_double_extended(const integer& bound, size_t pbits, bool rnsmod=false, long seed=time(NULL))
+ : _M(1), _size(0), _pbits(pbits)
+ {
+ integer::seeding(seed);
+ integer prime; Givaro::IntPrimeDom IPD;
+ integer sum=1;
+ while (_M < bound*sum) {
+ _basis.resize(_size+1);
+ do {
+ integer::random_exact_2exp(prime, _pbits-1);
+ IPD.nextprimein(prime);
+ } while (_M%prime == 0);
+ _basis[_size]=prime;
+ _size++;
+ _M*=prime;
+ if (rnsmod) sum+=prime;
+ }
+ precompute_cst();
+ }
+
+ rns_double_extended(size_t pbits, size_t size, long seed=time(NULL))
+ : _M(1), _size(size), _pbits(pbits)
+ {
+ integer::seeding(seed);
+ integer prime; Givaro::IntPrimeDom IPD;
+ integer sum=1;
+ _basis.resize(size);
+ _negbasis.resize(size);
+ _basisMax.resize(size);
+ for(size_t i = 0 ; i < _size ; ++i){
+ integer::random_exact_2exp(prime, _pbits-1);
+ IPD.nextprimein(prime);
+ _basis[i]=prime;
+ _basisMax[i] = prime-1;
+ _negbasis[i] = 0-prime;
+ _M*=prime;
+ }
+ precompute_cst();
+ }
+
+ template<typename Vect>
+ rns_double_extended(const Vect& basis, bool rnsmod=false, long seed=time(NULL))
+ : _basis(basis.begin(),basis.end()), _basisMax(basis.size()), _negbasis(basis.size()), _M(1), _size(basis.size()), _pbits(0)
+ {
+ for(size_t i=0;i<_size;i++){
+ //std::cout<<"basis["<<i<<"]="<<_basis[i]<<std::endl;
+ _M*=_basis[i];
+ _pbits=std::max(_pbits, integer(_basis[i]).bitsize());
+ }
+ //std::cout<<"M="<<_M<<std::endl;
+ precompute_cst();
+ }
+
+
+ void precompute_cst(){
+ _ldm = (_M.bitsize()/16) + ((_M.bitsize()%16)?1:0) ;
+ _invbasis.resize(_size);
+ _basisMax.resize(_size);
+ _negbasis.resize(_size);
+ _field_rns.resize(_size);
+ _Mi.resize(_size);
+ _MMi.resize(_size);
+ _crt_in.resize(_size*_ldm);
+ _crt_out.resize(_size*_ldm);
+ const unsigned int MASK=0xFFFF;
+ for (size_t i=0;i<_size;i++){
+ _invbasis[i] = 1./_basis[i];
+ _basisMax[i] = _basis[i]-1;
+ _negbasis[i] = 0-_basis[i];
+ _field_rns[i] = ModField(_basis[i]);
+ _Mi[i] = _M/(uint64_t)_basis[i];
+ _field_rns[i].init(_MMi[i], _Mi[i] % (double)_basis[i]);
+ _field_rns[i].invin(_MMi[i]);
+ integer tmp= _Mi[i]*(uint64_t)_MMi[i];
+ for(size_t j=0;j<_ldm;j++){
+ _crt_out[j+i*_ldm]=double(tmp&MASK);
+ tmp>>=16;
+ }
+ double beta=double(1<<16);
+ double acc=1;
+ for(size_t j=0;j<_ldm;j++){
+ _crt_in[j+i*_ldm]=acc;
+ _field_rns[i].mulin(acc,beta);
+ }
+ }
+ }
+
+ // Arns must be an array of m*n*_size
+ // abs(||A||) <= maxA
+ void init(size_t m, size_t n, double* Arns, size_t rda, const integer* A, size_t lda,
+ const integer& maxA, bool RNS_MAJOR=false) const
+ {
+ init(m*n,Arns,A,lda);
+ }
+
+ void init(size_t m, size_t n, double* Arns, size_t rda, const integer* A, size_t lda, size_t k, bool RNS_MAJOR=false){
+ init(m*n,Arns,A,lda);
+ }
+ void convert(size_t m, size_t n, integer gamma, integer* A, size_t lda, const double* Arns, size_t rda, bool RNS_MAJOR=false){
+ convert(m*n, A, Arns);
+ }
+ void init(size_t m, double* Arns, const integer* A, size_t lda) const;
+ void convert(size_t m, integer *A, const double *Arns) const;
+
+#if defined(__FFLASFFPACK_USE_SIMD)
+
+ template<class SimdT>
+ inline void splitSimd(const SimdT x, SimdT & x_h, SimdT & x_l) const {
+ using simd = Simd<double>;
+ using vect_t = typename simd::vect_t;
+ vect_t vc = simd::set1((double)((1 << 27)+1));
+ vect_t tmp = simd::mul(vc, x);
+ x_h = simd::add(tmp, simd::sub(x, tmp));
+ x_l = simd::sub(x, x_h);
+ }
+
+ template<class SimdT>
+ inline void multSimd(const SimdT va, const SimdT vb, SimdT & vs, SimdT & vt) const{
+ using simd = Simd<double>;
+ using vect_t = typename simd::vect_t;
+ vect_t vah, val, vbh, vbl;
+ vs = simd::mul(va, vb);
+//#ifdef __FMA__
+ vt = simd::fnmadd(va, vb, vs);
+//#else
+ splitSimd(va, vah, val);
+ splitSimd(vb, vbh, vbl);
+ vt = simd::add(simd::add(simd::sub(simd::mul(vah, vbh), vs), simd::mul(vah, vbl)), simd::add(simd::mul(val, vbh), simd::mul(val, vbl)));
+//#endif
+ }
+
+ template<class SimdT>
+ inline SimdT modSimd(const SimdT a, const SimdT p, const SimdT ip, const SimdT np) const{
+ using simd = Simd<double>;
+ using vect_t = typename simd::vect_t;
+ vect_t pqh, pql, abl, abh;
+ vect_t q = simd::floor(simd::mul(a, ip));
+ multSimd(p, q, pqh, pql);
+ vect_t r = simd::add(simd::sub(a, pqh), pql);
+ abh = simd::greater_eq(r, p);
+ abl = simd::lesser(r, simd::zero());
+ abh = simd::vand(abh, np);
+ abl = simd::vand(abl, p);
+ abh = simd::vor(abh, abl);
+ return r = simd::add(r, abh);
+ }
+
+#endif // __FFLASFFPACK_USE_SIMD
+
+ // reduce entries of Arns to be less than the rns basis elements
+ void reduce(size_t n, double* Arns, size_t rda, bool RNS_MAJOR=false) const;
+
+
+
+ }; // end of struct rns_double_extended
+
+} // end of namespace FFPACK
+
+#include "rns-double.inl"
+//#include "rns-double-recint.inl"
+namespace FFLAS {
+
+ template<>
+ inline void fflas_delete (FFPACK::rns_double_elt_ptr A) {FFLAS::fflas_delete( A._ptr);}
+ template<>
+ inline void fflas_delete (FFPACK::rns_double_elt_cstptr A) {delete[] A._ptr;}
+
+}
+
+#endif // __FFPACK_rns_double_H
+
diff --git a/fflas-ffpack/field/rns-double.inl b/fflas-ffpack/field/rns-double.inl
new file mode 100644
index 0000000..71a5eed
--- /dev/null
+++ b/fflas-ffpack/field/rns-double.inl
@@ -0,0 +1,568 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+#ifndef __FFLASFFPACK_field_rns_double_INL
+#define __FFLASFFPACK_field_rns_double_INL
+
+#include "fflas-ffpack/fflas/fflas_freduce.h"
+
+namespace FFPACK {
+
+ // Arns must be an array of m*n*_size
+ // abs(||A||) < 2^(16k)
+ inline void rns_double::init(size_t m, size_t n, double* Arns, size_t rda, const integer* A, size_t lda, size_t k, bool RNS_MAJOR) const
+ {
+ if (k>_ldm){
+ FFPACK::failure()(__func__,__FILE__,__LINE__,"rns_struct: init (too large entry)");
+ std::cerr<<"k="<<k<<" _ldm="<<_ldm<<std::endl;
+ }
+ size_t mn=m*n;
+ double *A_beta = FFLAS::fflas_new<double >(mn*k);
+ const integer* Aiter=A;
+ // split A into A_beta according to a Kronecker transform in base 2^16
+// auto sp=SPLITTER(MAX_THREADS,FFLAS::CuttingStrategy::Column,FFLAS::StrategyParameter::Threads);
+
+ Givaro::Timer tkr; tkr.start();
+// #ifndef __FFLASFFPACK_SEQUENTIAL
+// auto sp=SPLITTER(MAX_THREADS);
+// #else
+// auto sp=SPLITTER(1);
+// #endif
+ // FOR2D(i,j,m,n,sp,
+ // TASK(MODE(READ(Aiter[0]) READWRITE(A_beta[0])),
+ //for(size_t i=0;i<m;i++)
+ //PAR_BLOCK{
+// FOR1D(i,m,sp,
+ PARFOR1D(i,m,SPLITTER(NUM_THREADS),
+ for(size_t j=0;j<n;j++){
+ size_t idx=j+i*n;
+ const mpz_t* m0 = reinterpret_cast<const mpz_t*>(Aiter+j+i*lda);
+ const uint16_t* m0_ptr = reinterpret_cast<const uint16_t*>(m0[0]->_mp_d);
+ size_t l=0;
+ //size_t maxs=std::min(k,(Aiter[j+i*lda].size())<<2);
+ size_t maxs=std::min(k,(Aiter[j+i*lda].size())*sizeof(mp_limb_t)/2);// to ensure 32 bits portability
+
+ if (m0[0]->_mp_size >= 0)
+ for (;l<maxs;l++)
+ A_beta[l+idx*k]= m0_ptr[l];
+ else
+ for (;l<maxs;l++)
+ A_beta[l+idx*k]= - double(m0_ptr[l]);
+ for (;l<k;l++)
+ A_beta[l+idx*k]= 0.;
+
+ // );
+ }
+ );
+
+ tkr.stop();
+ //if(m>1 && n>1) std::cerr<<"Kronecker : "<<tkr.realtime()<<std::endl;
+ if (RNS_MAJOR==false) {
+ // Arns = _crt_in x A_beta^T
+ Givaro::Timer tfgemm; tfgemm.start();
+ FFLAS::fgemm (Givaro::ZRing<double>(), FFLAS::FflasNoTrans,FFLAS::FflasTrans,_size,mn,k,1.0,_crt_in.data(),_ldm,A_beta,k,0.,Arns,rda,
+ // FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads>());
+ FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::TwoDAdaptive>());
+
+ tfgemm.stop();
+ //if(m>1 && n>1) std::cerr<<"fgemm : "<<tfgemm.realtime()<<std::endl;
+// cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasTrans,(int)_size,(int)mn,(int)k,1.0,_crt_in.data(),(int)_ldm,A_beta,(int)k,0.,Arns,(int)rda);
+ // reduce each row i of Arns modulo moduli[i]
+ //for(size_t i=0;i<_size;i++)
+ // FFLAS::freduce (_field_rns[i],mn,Arns+i*rda,1);
+ }
+ else {
+ // Arns = A_beta x _crt_in^T
+ cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasTrans,(int)mn,(int)_size,(int)k,1.0,A_beta,(int)k,_crt_in.data(),(int)_ldm,0.,Arns,(int)_size);
+ // reduce each column j of Arns modulo moduli[i]
+ //for(size_t i=0;i<_size;i++)
+ // FFLAS::freduce (_field_rns[i],mn,Arns+i,_size);
+ }
+ Givaro::Timer tred; tred.start();
+
+ reduce(mn,Arns,rda,RNS_MAJOR);
+ tred.stop();
+ //if(m>1 && n>1) std::cerr<<"Reduce : "<<tred.realtime()<<std::endl;
+
+ FFLAS::fflas_delete( A_beta);
+
+#ifdef CHECK_RNS
+ bool ok=true;
+ for (size_t i=0;i<m;i++)
+ for(size_t j=0;j<n;j++)
+ for(size_t k=0;k<_size;k++){
+ ok&= (((A[i*lda+j] % (int64_t) _basis[k])+(A[i*lda+j]<0?(int64_t)_basis[k]:0)) == (int64_t) Arns[i*n+j+k*rda]);
+ if (((A[i*lda+j] % (int64_t) _basis[k])+(A[i*lda+j]<0?(int64_t)_basis[k]:0))
+ != (int64_t) Arns[i*n+j+k*rda])
+ {
+ std::cout<<((A[i*lda+j] % (int64_t) _basis[k])+(A[i*lda+j]<0?(int64_t)_basis[k]:0))
+ <<" != "
+ <<(int64_t) Arns[i*n+j+k*rda]
+ <<std::endl;
+ }
+ }
+ std::cout<<"RNS freduce ... "<<(ok?"OK":"ERROR")<<std::endl;
+#endif
+ }
+
+ // Arns must be an array of m*n*_size
+ // abs(||A||) < 2^(16k)
+ inline void rns_double::init_transpose(size_t m, size_t n, double* Arns, size_t rda, const integer* A, size_t lda, size_t k, bool RNS_MAJOR) const
+ {
+ if (k>_ldm)
+ FFPACK::failure()(__func__,__FILE__,__LINE__,"rns_struct: init (too large entry)");
+
+ size_t mn=m*n;
+ double *A_beta = FFLAS::fflas_new<double >(mn*k);
+ const integer* Aiter=A;
+ // split A into A_beta according to a Kronecker transform in base 2^16
+ for(size_t j=0;j<n;j++){
+ for(size_t i=0;i<m;i++){
+ size_t idx=i+j*m;
+ const mpz_t* m0 = reinterpret_cast<const mpz_t*>(Aiter+j+i*lda);
+ const uint16_t* m0_ptr = reinterpret_cast<const uint16_t*>(m0[0]->_mp_d);
+ size_t l=0;
+ //size_t maxs=std::min(k,(Aiter[j+i*lda].size())<<2);
+ size_t maxs=std::min(k,(Aiter[j+i*lda].size())*sizeof(mp_limb_t)/2); // to ensure 32 bits portability
+ if (m0[0]->_mp_size >= 0)
+ for (;l<maxs;l++)
+ A_beta[l+idx*k]= m0_ptr[l];
+ else
+ for (;l<maxs;l++)
+ A_beta[l+idx*k]= - double(m0_ptr[l]);
+ for (;l<k;l++)
+ A_beta[l+idx*k]= 0.;
+ }
+ }
+ if (RNS_MAJOR==false) {
+ // Arns = _crt_in x A_beta^T
+ cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasTrans,(int)_size,(int)mn,(int)k,1.0,_crt_in.data(),(int)_ldm,A_beta,(int)k,0.,Arns,(int)rda);
+ // reduce each row i of Arns modulo moduli[i]
+ //for(size_t i=0;i<_size;i++)
+ // FFLAS::freduce (_field_rns[i],mn,Arns+i*rda,1);
+ }
+ else {
+ // Arns = A_beta x _crt_in^T
+ cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasTrans,(int)mn,(int)_size,(int)k,1.0,A_beta,(int)k,_crt_in.data(),(int)_ldm,0.,Arns,(int)_size);
+ // reduce each column j of Arns modulo moduli[i]
+ //for(size_t i=0;i<_size;i++)
+ // FFLAS::freduce (_field_rns[i],mn,Arns+i,_size);
+ }
+ reduce(mn,Arns,rda,RNS_MAJOR);
+
+ FFLAS::fflas_delete( A_beta);
+
+#ifdef CHECK_RNS
+ bool ok=true;
+ for (size_t i=0;i<m;i++)
+ for(size_t j=0;j<n;j++)
+ for(size_t k=0;k<_size;k++)
+ ok&= (((A[i*lda+j] % (int64_t) _basis[k])+(A[i*lda+j]<0?(int64_t)_basis[k]:0))
+ == (int64_t) Arns[j*m+i+k*rda]);
+ std::cout<<"RNS freduce ... "<<(ok?"OK":"ERROR")<<std::endl;
+#endif
+ }
+
+ inline void rns_double::convert(size_t m, size_t n, integer gamma, integer* A, size_t lda,
+ const double* Arns, size_t rda, bool RNS_MAJOR) const
+ {
+#ifdef CHECK_RNS
+ integer* Acopy=new integer[m*n];
+ for(size_t i=0;i<m;i++)
+ for(size_t j=0;j<n;j++)
+ Acopy[i*n+j]=A[i*lda+j];
+
+#endif
+
+ integer hM= (_M-1)>>1;
+ size_t mn= m*n;
+ double *A_beta= FFLAS::fflas_new<double>(mn*_ldm);
+ Givaro::Timer tfgemmc;tfgemmc.start();
+ if (RNS_MAJOR==false)
+ // compute A_beta = Ap^T x M_beta
+ FFLAS::fgemm(Givaro::ZRing<double>(),FFLAS::FflasTrans, FFLAS::FflasNoTrans,(int) mn,(int) _ldm,(int) _size, 1.0 , Arns,(int) rda, _crt_out.data(),(int) _ldm, 0., A_beta,(int)_ldm,
+ FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::TwoDAdaptive >());
+// FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads >());
+
+ else // compute A_beta = Ap x M_Beta
+ cblas_dgemm(CblasRowMajor,CblasNoTrans, CblasNoTrans, (int)mn, (int)_ldm, (int)_size, 1.0 , Arns, (int)_size, _crt_out.data(), (int)_ldm, 0., A_beta,(int)_ldm);
+
+ tfgemmc.stop();
+ //if(m>1 && n>1) std::cerr<<"fgemm Convert : "<<tfgemmc.realtime()<<std::endl;
+ // compute A using inverse Kronecker transform of A_beta expressed in base 2^log_beta
+ integer* Aiter= A;
+ size_t k=_ldm;
+ size_t k4=((k+3)>>2)+ (((k+3)%4==0)?0:1);
+ std::vector<uint16_t> A0(k4<<2,0),A1(k4<<2,0),A2(k4<<2,0),A3(k4<<2,0);
+ integer a0,a1,a2,a3,res;
+ mpz_t *m0,*m1,*m2,*m3;
+ m0= reinterpret_cast<mpz_t*>(&a0);
+ m1= reinterpret_cast<mpz_t*>(&a1);
+ m2= reinterpret_cast<mpz_t*>(&a2);
+ m3= reinterpret_cast<mpz_t*>(&a3);
+ mp_limb_t *m0_d,*m1_d,*m2_d,*m3_d;
+ m0_d = m0[0]->_mp_d;
+ m1_d = m1[0]->_mp_d;
+ m2_d = m2[0]->_mp_d;
+ m3_d = m3[0]->_mp_d;
+ m0[0]->_mp_alloc = m1[0]->_mp_alloc = m2[0]->_mp_alloc = m3[0]->_mp_alloc = (int) (k4*8/sizeof(mp_limb_t)); // to ensure 32 bits portability
+ m0[0]->_mp_size = m1[0]->_mp_size = m2[0]->_mp_size = m3[0]->_mp_size = (int) (k4*8/sizeof(mp_limb_t)); // to ensure 32 bits portability
+ Givaro::Timer tkroc;
+ tkroc.start();
+// auto sp=SPLITTER();
+// PARFOR1D(i,m,sp,
+ for(size_t i=0;i<m;i++)
+ for (size_t j=0;j<n;j++){
+ size_t idx=i*n+j;
+ for (size_t l=0;l<k;l++){
+ uint64_t tmp=(uint64_t)A_beta[l+idx*k];
+ uint16_t* tptr= reinterpret_cast<uint16_t*>(&tmp);
+ A0[l ]= tptr[0];
+ A1[l+1]= tptr[1];
+ A2[l+2]= tptr[2];
+ A3[l+3]= tptr[3];
+ }
+ // see A0,A1,A2,A3 as a the gmp integers a0,a1,a2,a3
+ m0[0]->_mp_d= reinterpret_cast<mp_limb_t*>(&A0[0]);
+ m1[0]->_mp_d= reinterpret_cast<mp_limb_t*>(&A1[0]);
+ m2[0]->_mp_d= reinterpret_cast<mp_limb_t*>(&A2[0]);
+ m3[0]->_mp_d= reinterpret_cast<mp_limb_t*>(&A3[0]);
+ res = a0;res+= a1;res+= a2;res+= a3;
+ res%=_M;
+
+ // get the correct result according to the expected sign of A
+ if (res>hM)
+ res-=_M;
+ if (gamma==0)
+ Aiter[j+i*lda]=res;
+ else
+ if (gamma==integer(1))
+ Aiter[j+i*lda]+=res;
+ else
+ if (gamma==integer(-1))
+ Aiter[j+i*lda]=res-Aiter[j+i*lda];
+ else{
+ Aiter[j+i*lda]*=gamma;
+ Aiter[j+i*lda]+=res;
+ }
+
+ }
+ tkroc.stop();
+ //if(m>1 && n>1) std::cerr<<"Kronecker Convert : "<<tkroc.realtime()<<std::endl;
+
+ m0[0]->_mp_d = m0_d;
+ m1[0]->_mp_d = m1_d;
+ m2[0]->_mp_d = m2_d;
+ m3[0]->_mp_d = m3_d;
+ m0[0]->_mp_alloc = m1[0]->_mp_alloc = m2[0]->_mp_alloc= m3[0]->_mp_alloc = 1;
+ m0[0]->_mp_size = m1[0]->_mp_size = m2[0]->_mp_size = m3[0]->_mp_size = 0;
+ FFLAS::fflas_delete( A_beta);
+
+#ifdef CHECK_RNS
+ bool ok=true;
+ for (size_t i=0;i<m;i++)
+ for(size_t j=0;j<n;j++)
+ for(size_t k=0;k<_size;k++){
+ int64_t _p =(int64_t) _basis[k];
+ integer curr=A[i*lda+j] - gamma*Acopy[i*n+j];
+ ok&= ( curr% _p +(curr%_p<0?_p:0) == (int64_t) Arns[i*n+j+k*rda]);
+ //std::cout<<A[i*lda+j]<<" mod "<<(int64_t) _basis[k]<<"="<<(int64_t) Arns[i*n+j+k*rda]<<";"<<std::endl;
+ }
+ std::cout<<"RNS convert ... "<<(ok?"OK":"ERROR")<<std::endl;
+#endif
+
+ }
+
+ inline void rns_double::convert_transpose(size_t m, size_t n, integer gamma, integer* A, size_t lda,
+ const double* Arns, size_t rda, bool RNS_MAJOR) const
+ {
+ integer hM= (_M-1)>>1;
+ size_t mn= m*n;
+ double *A_beta= FFLAS::fflas_new<double>(mn*_ldm);
+
+ if (RNS_MAJOR==false)
+ // compute A_beta = Ap^T x M_beta
+ cblas_dgemm(CblasRowMajor,CblasTrans, CblasNoTrans,(int) mn,(int) _ldm,(int) _size, 1.0 , Arns,(int) rda, _crt_out.data(),(int) _ldm, 0., A_beta,(int)_ldm);
+ else // compute A_beta = Ap x M_Beta
+ cblas_dgemm(CblasRowMajor,CblasNoTrans, CblasNoTrans, (int)mn, (int)_ldm, (int)_size, 1.0 , Arns, (int)_size, _crt_out.data(), (int)_ldm, 0., A_beta,(int)_ldm);
+
+ // compute A using inverse Kronecker transform of A_beta expressed in base 2^log_beta
+ integer* Aiter= A;
+ size_t k=_ldm;
+ size_t k4=((k+3)>>2)+ (((k+3)%4==0)?0:1);
+ std::vector<uint16_t> A0(k4<<2,0),A1(k4<<2,0),A2(k4<<2,0),A3(k4<<2,0);
+ integer a0,a1,a2,a3,res;
+ mpz_t *m0,*m1,*m2,*m3;
+ m0= reinterpret_cast<mpz_t*>(&a0);
+ m1= reinterpret_cast<mpz_t*>(&a1);
+ m2= reinterpret_cast<mpz_t*>(&a2);
+ m3= reinterpret_cast<mpz_t*>(&a3);
+ mp_limb_t *m0_d,*m1_d,*m2_d,*m3_d;
+ m0_d = m0[0]->_mp_d;
+ m1_d = m1[0]->_mp_d;
+ m2_d = m2[0]->_mp_d;
+ m3_d = m3[0]->_mp_d;
+ m0[0]->_mp_alloc = m1[0]->_mp_alloc = m2[0]->_mp_alloc = m3[0]->_mp_alloc = (int32_t)(k4*8/sizeof(mp_limb_t)); // to ensure 32 bits portability
+ m0[0]->_mp_size = m1[0]->_mp_size = m2[0]->_mp_size = m3[0]->_mp_size = (int32_t)(k4*8/sizeof(mp_limb_t)); // to ensure 32 bits portability
+ for (size_t j=0;j<n;j++)
+ for(size_t i=0;i<m;i++){
+
+
+ size_t idx=i+j*m;
+ for (size_t l=0;l<k;l++){
+ uint64_t tmp=(uint64_t)A_beta[l+idx*k];
+ uint16_t* tptr= reinterpret_cast<uint16_t*>(&tmp);
+ A0[l ]= tptr[0];
+ A1[l+1]= tptr[1];
+ A2[l+2]= tptr[2];
+ A3[l+3]= tptr[3];
+ }
+ // see A0,A1,A2,A3 as a the gmp integers a0,a1,a2,a3
+ m0[0]->_mp_d= reinterpret_cast<mp_limb_t*>(&A0[0]);
+ m1[0]->_mp_d= reinterpret_cast<mp_limb_t*>(&A1[0]);
+ m2[0]->_mp_d= reinterpret_cast<mp_limb_t*>(&A2[0]);
+ m3[0]->_mp_d= reinterpret_cast<mp_limb_t*>(&A3[0]);
+ res = a0;res+= a1;res+= a2;res+= a3;
+ res%=_M;
+
+ // get the correct result according to the expected sign of A
+ if (res>hM)
+ res-=_M;
+ if (gamma==0)
+ Aiter[j+i*lda]=res;
+ else
+ if (gamma==integer(1))
+ Aiter[j+i*lda]+=res;
+ else
+ if (gamma==integer(-1))
+ Aiter[j+i*lda]=res-Aiter[j+i*lda];
+ else{
+ Aiter[j+i*lda]*=gamma;
+ Aiter[j+i*lda]+=res;
+ }
+
+ }
+ m0[0]->_mp_d = m0_d;
+ m1[0]->_mp_d = m1_d;
+ m2[0]->_mp_d = m2_d;
+ m3[0]->_mp_d = m3_d;
+ m0[0]->_mp_alloc = m1[0]->_mp_alloc = m2[0]->_mp_alloc= m3[0]->_mp_alloc = 1;
+ m0[0]->_mp_size = m1[0]->_mp_size = m2[0]->_mp_size = m3[0]->_mp_size = 0;
+ FFLAS::fflas_delete( A_beta);
+#ifdef CHECK_RNS
+ bool ok=true;
+ for (size_t i=0;i<m;i++)
+ for(size_t j=0;j<n;j++)
+ for(size_t k=0;k<_size;k++){
+ ok&= (((A[i*lda+j] % (int64_t) _basis[k])+(A[i*lda+j]% (int64_t) _basis[k]<0?(int64_t)_basis[k]:0)) == (int64_t) Arns[i+j*m+k*rda]);
+ //std::cout<<A[i*lda+j]<<" mod "<<(int64_t) _basis[k]<<"="<<(int64_t) Arns[i*n+j+k*rda]<<";"<<std::endl;
+ }
+ std::cout<<"RNS convert ... "<<(ok?"OK":"ERROR")<<std::endl;
+#endif // CHECK_RNS
+
+ }
+
+ // reduce entries of Arns to be less than the rns basis elements
+ inline void rns_double::reduce(size_t n, double* Arns, size_t rda, bool RNS_MAJOR) const{
+
+ if (RNS_MAJOR) {
+#ifdef __FFLASFFPACK_USE_SIMD
+ using simd = Simd<double>;
+ using vect_t = typename simd::vect_t;
+
+ if(_size % simd::vect_size == 0){
+ for(size_t i = 0 ; i < n ; i++){
+ vect_t tmp1, tmp2, tmp3, v, max, basis, inv, neg;
+ for(size_t j = 0 ; j < _size ; j+=simd::vect_size){
+ basis = simd::load(_basis.data()+j);
+ inv = simd::load(_invbasis.data()+j);
+ max = simd::load(_basisMax.data()+j);
+ neg = simd::load(_negbasis.data()+j);
+ v = simd::load(Arns+i*_size+j);
+ tmp1 = simd::floor(simd::mul(v, inv));
+ tmp2 = simd::fnmadd(v, tmp1, basis);
+ tmp1 = simd::greater(tmp2, max);
+ tmp3 = simd::lesser(tmp2, simd::zero());
+ tmp1 = simd::vand(tmp1, neg);
+ tmp3 = simd::vand(tmp3, basis);
+ tmp1 = simd::vor(tmp1, tmp3);
+ tmp2 = simd::add(tmp2, tmp1);
+ simd::store(Arns+i*_size+j, tmp2);
+ }
+ }
+ }else{
+ for(size_t i = 0 ; i < n ; i++){
+ vect_t tmp1, tmp2, tmp3, v, max, basis, inv, neg;
+ size_t j = 0;
+ for( ; j < ROUND_DOWN(_size, simd::vect_size) ; j+=simd::vect_size){
+ basis = simd::load(_basis.data()+j);
+ inv = simd::load(_invbasis.data()+j);
+ max = simd::load(_basisMax.data()+j);
+ neg = simd::load(_negbasis.data()+j);
+ v = simd::loadu(Arns+i*_size+j);
+ tmp1 = simd::floor(simd::mul(v, inv));
+ tmp2 = simd::fnmadd(v, tmp1, basis);
+ tmp1 = simd::greater(tmp2, max);
+ tmp3 = simd::lesser(tmp2, simd::zero());
+ tmp1 = simd::vand(tmp1, neg);
+ tmp3 = simd::vand(tmp3, basis);
+ tmp1 = simd::vor(tmp1, tmp3);
+ tmp2 = simd::add(tmp2, tmp1);
+ simd::storeu(Arns+i*_size+j, tmp2);
+ }
+ for( ; j < _size ; ++j){
+ // std::cout << j << std::endl;
+ // auto x = std::floor(Arns[i*_size+j] * _invbasis[j]);
+ Arns[i*_size+j] -= std::floor(Arns[i*_size+j]*_invbasis[j])*_basis[j];
+ // Arns[i*_size+j] = std::fma(Arns[i*_size+j], -x, _basis[j]);
+ if(Arns[i*_size+j] >= _basis[j]){
+ Arns[i*_size+j] -= _basis[j];
+ }else if(Arns[i*_size+j] < 0){
+ Arns[i*_size+j] += _basis[j];
+ }
+ }
+ }
+ }
+#else
+ for(size_t i = 0 ; i < n ; i+= _size){
+ for(size_t j = 0 ; j < _size ; ++j){
+ //_field_rns.reduce(Arns+i*_size+j);
+ _field_rns[i].reduce(Arns[i*_size+j]);
+ }
+ }
+#endif
+ }
+ else { // NOT IN RNS MAJOR
+// #ifndef __FFLASFFPACK_SEQUENTIAL
+// auto sp=SPLITTER(MAX_THREADS);
+// #else
+// auto sp=SPLITTER(1);
+// #endif
+ PARFOR1D(i,_size,SPLITTER(NUM_THREADS),
+ //for(size_t i=0;i<_size;i++)
+ FFLAS::freduce (_field_rns[i],n,Arns+i*rda,1);
+ );
+ }
+
+ }
+
+
+// TODO: less naive implementation
+ inline void rns_double_extended::init(size_t m, double* Arns, const integer* A, size_t lda) const{
+ for(size_t i = 0 ; i < m ; ++i){
+ for(size_t j = 0 ; j < _size ; ++j){
+ Arns[i*_size+j] = (double)((A[i*lda]%integer(_basis[j]))[0]);
+ }
+ }
+ }
+
+// TODO: less naive implementation
+ inline void rns_double_extended::convert(size_t m, integer *A, const double *Arns) const{
+ integer hM= (_M-1)/2;
+ for(size_t i = 0 ; i < m ; ++i){
+ A[i] = 0;
+ integer tmp;
+ for(size_t j = 0 ; j < _size ; ++j){
+ A[i] += ((integer(Arns[i*_size+j])*integer(_MMi[j]))%integer(_basis[j]))*integer(_Mi[j]);
+ }
+ A[i] %= _M;
+ if(A[i] > hM)
+ A[i] -= _M;
+ }
+ }
+
+ // reduce entries of Arns to be less than the rns basis elements
+ inline void rns_double_extended::reduce(size_t n, double* Arns, size_t rda, bool RNS_MAJOR) const{
+
+#ifdef __FFLASFFPACK_USE_SIMD
+ using simd = Simd<double>;
+ using vect_t = typename simd::vect_t;
+
+ if(_size % simd::vect_size == 0){
+//#pragma omp parallel for schedule(static, 256)
+ for(size_t i = 0 ; i < n ; i++){
+ vect_t tmp1, tmp2, tmp3, v, max, basis, inv, neg;
+ for(size_t j = 0 ; j < _size ; j+=simd::vect_size){
+ basis = simd::load(_basis.data()+j);
+ inv = simd::load(_invbasis.data()+j);
+ max = simd::load(_basisMax.data()+j);
+ neg = simd::load(_negbasis.data()+j);
+ v = simd::load(Arns+i*_size+j);
+ tmp2 = modSimd(v, basis, inv, neg);
+ tmp1 = simd::greater(tmp2, max);
+ tmp3 = simd::lesser(tmp2, simd::zero());
+ tmp1 = simd::vand(tmp1, neg);
+ tmp3 = simd::vand(tmp3, basis);
+ tmp1 = simd::vor(tmp1, tmp3);
+ tmp2 = simd::add(tmp2, tmp1);
+ simd::store(Arns+i*_size+j, tmp2);
+ }
+ }
+ }else{
+//#pragma omp parallel for schedule(static, 256)
+ for(size_t i = 0 ; i < n ; i++){
+ vect_t tmp1, tmp2, tmp3, v, max, basis, inv, neg;
+ size_t j = 0;
+ for( ; j < ROUND_DOWN(_size, simd::vect_size) ; j+=simd::vect_size){
+ basis = simd::load(_basis.data()+j);
+ inv = simd::load(_invbasis.data()+j);
+ max = simd::load(_basisMax.data()+j);
+ neg = simd::load(_negbasis.data()+j);
+ v = simd::loadu(Arns+i*_size+j);
+ tmp2 = modSimd(v, basis, inv, neg);
+ tmp1 = simd::greater(tmp2, max);
+ tmp3 = simd::lesser(tmp2, simd::zero());
+ tmp1 = simd::vand(tmp1, neg);
+ tmp3 = simd::vand(tmp3, basis);
+ tmp1 = simd::vor(tmp1, tmp3);
+ tmp2 = simd::add(tmp2, tmp1);
+ simd::storeu(Arns+i*_size+j, tmp2);
+ }
+ for( ; j < _size ; ++j){
+ _field_rns[j].reduce(Arns[i*_size+j]);
+ }
+ }
+ }
+#else
+
+// TODO : SIMD version
+ for(size_t i = 0 ; i < n ; i+= _size){
+ for(size_t j = 0 ; j < _size ; ++j){
+ //_field_rns.reduce(Arns+i*_size+j);
+ _field_rns[i].reduce(Arns[i*_size+j]);
+ }
+ }
+
+#endif
+
+ }
+
+} // FFPACK
+
+#endif // __FFLASFFPACK_field_rns_double_INL
diff --git a/fflas-ffpack/field/rns-integer-mod.h b/fflas-ffpack/field/rns-integer-mod.h
new file mode 100644
index 0000000..a72118d
--- /dev/null
+++ b/fflas-ffpack/field/rns-integer-mod.h
@@ -0,0 +1,862 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+/*! @file field/rns-integer-mod.h
+ * @ingroup field
+ * @brief representation of <code>Z/pZ</code> using RNS representation (note: fixed precision)
+ */
+
+
+#ifndef __FFPACK_rns_integer_mod_H
+#define __FFPACK_rns_integer_mod_H
+
+#include <vector>
+#include <cmath>
+
+#include <recint/recint.h>
+#include <givaro/modular-integer.h>
+#include <givaro/givinteger.h>
+#include <givaro/udl.h>
+
+#include "fflas-ffpack/field/rns-double.h"
+#include "fflas-ffpack/field/rns-integer.h"
+#include "fflas-ffpack/field/modular-extended.h"
+#include "fflas-ffpack/fflas/fflas_level1.inl"
+#include "fflas-ffpack/fflas/fflas_level2.inl"
+#include "fflas-ffpack/fflas/fflas_level3.inl"
+#include "fflas-ffpack/fflas/fflas_enum.h"
+
+namespace FFPACK {
+
+ template<typename RNS>
+ class RNSIntegerMod;
+}
+#include "fflas-ffpack/fflas/fflas_fscal_mp.inl"
+
+#if defined(BENCH_PERF_FGEMM_MP) || defined(BENCH_PERF_TRSM_MP) || defined(BENCH_PERF_LQUP_MP)
+#define BENCH_PERF_SCAL_MP
+#define BENCH_MODP
+#endif
+
+namespace FFPACK {
+
+
+ template<typename RNS>
+ class RNSIntegerMod {
+ public:
+ typedef typename RNS::Element Element;
+ typedef typename RNS::Element_ptr Element_ptr;
+ typedef typename RNS::ConstElement_ptr ConstElement_ptr;
+
+ protected:
+ typedef typename RNS::BasisElement BasisElement;
+ typedef Givaro::Modular<BasisElement> ModField;
+ typedef Givaro::Integer integer;
+
+ integer _p;
+ std::vector<BasisElement, AlignedAllocator<BasisElement, Alignment::CACHE_LINE>> _Mi_modp_rns;
+ std::vector<BasisElement, AlignedAllocator<BasisElement, Alignment::CACHE_LINE>> _iM_modp_rns;
+ const RNS *_rns;
+ Givaro::Modular<Givaro::Integer> _F;
+ RNSInteger<RNS> _RNSdelayed;
+ public:
+ Element one, mOne,zero;
+
+#ifdef BENCH_MODP
+ mutable double t_modp, t_igemm, t_scal,t_trsm;
+ mutable size_t n_modp;
+#endif
+
+
+ RNSIntegerMod(const integer& p, const RNS& myrns) : _p(p),
+ _Mi_modp_rns(myrns._size*myrns._size),
+ _iM_modp_rns(myrns._size*myrns._size),
+ _rns(&myrns),
+ _F(p),
+ _RNSdelayed(myrns){
+ init(one,1);
+ init(zero,0);
+ init(mOne,-1);
+ integer iM=0;
+ size_t mysize=myrns._size;
+ integer sum=0;
+ //std::cout << "M: " << myrns._M << std::endl;
+ for (size_t i=0;i<mysize;i++){
+ integer Mi = myrns._Mi[i] % _p;
+ for (size_t j=0;j<mysize;j++){
+ _iM_modp_rns[i+j*mysize]= iM % myrns._basis[j];
+ _Mi_modp_rns[i+j*mysize]= Mi % myrns._basis[j];
+ }
+ iM+=myrns._M;iM%=_p;
+ sum+=myrns._basis[i];
+ }
+#ifdef BENCH_MODP
+ t_modp=t_igemm=t_scal=t_trsm=0.;
+ n_modp=0;
+#endif
+ }
+
+ const rns_double& rns() const {return *_rns;}
+ const RNSInteger<RNS>& delayed() const {return _RNSdelayed;}
+
+ size_t size() const {return _rns->_size;}
+
+ bool isOne(const Element& x) const {
+ bool isone=true;
+ for (size_t i=0;i<_rns->_size;i++)
+ isone&= (one._ptr[i]== x._ptr[i]);
+ return isone;
+ }
+
+ bool isMOne(const Element& x) const {
+ bool ismone=true;
+ for (size_t i=0;i<_rns->_size;i++)
+ ismone&= (mOne._ptr[i]== x._ptr[i]);
+ return ismone;
+ }
+
+ bool isZero(const Element& x) const {
+ //write(std::cout,x)<<" == ";
+ //write(std::cout,zero)<<std::endl;
+ integer t1;
+ t1=convert(t1,x)%_p;
+ //std::cout<<"t1="<<t1<<std::endl;
+ bool iszero=true;
+ for (size_t i=0;i<_rns->_size;i++)
+ iszero&= (zero._ptr[i]==x._ptr[i]);
+ //std::cout<<(iszero || (t1==integer(0))?"zero":"nonzero")<<std::endl;
+ return iszero || (t1==integer(0));
+ }
+
+ integer characteristic(integer &p) const { return p=_p;}
+
+ integer cardinality(integer &p) const { return p=_p;}
+
+ Element& init(Element& x) const{
+ if (x._ptr == NULL){
+ x._ptr = FFLAS::fflas_new<BasisElement>(_rns->_size);
+ x._stride=1;
+ x._alloc=true;
+ }
+ return x;
+ }
+ Element& init(Element& x, const Givaro::Integer& y) const{
+ init(x);
+ size_t k =(_p.bitsize())/16+((_p.bitsize())%16?1:0);
+ _rns->init(1,1,x._ptr,x._stride, &y,1,k);
+ return x;
+ }
+
+ // assume this is the mod p operation
+ Element& reduce (Element& x, const Element& y) const{
+ Givaro::Integer tmp;
+ convert(tmp,y);
+ tmp %= _p;
+ init (x,tmp);
+ return x;
+ }
+
+ Element& reduce (Element& x) const{
+ Givaro::Integer tmp;
+ convert (tmp, x);
+ tmp %= _p;
+ return init (x, tmp);
+ }
+
+ Element& init(Element& x, const Element& y) const{
+ return reduce (x, y);
+ }
+
+
+ Givaro::Integer convert(Givaro::Integer& x, const Element& y)const {
+ _rns->convert(1,1,integer(0),&x,1,y._ptr,y._stride);
+ return x;
+ }
+
+ Element& assign(Element& x, const Element& y) const {
+ for(size_t i=0;i<_rns->_size;i++)
+ x._ptr[i*x._stride] = y._ptr[i*y._stride];
+ return x;
+ }
+
+ Element& add(Element& x, const Element& y, const Element& z) const {
+ for(size_t i=0;i<_rns->_size;i++)
+ _rns->_field_rns[i].add((x._ptr)[i*x._stride],
+ (y._ptr)[i*y._stride],
+ (z._ptr)[i*z._stride]);
+ return x;
+ }
+
+ Element& sub(Element& x, const Element& y, const Element& z) const {
+ for(size_t i=0;i<_rns->_size;i++)
+ _rns->_field_rns[i].sub((x._ptr)[i*x._stride],
+ (y._ptr)[i*y._stride],
+ (z._ptr)[i*z._stride]);
+ return x;
+ }
+
+ Element& neg(Element& x, const Element& y) const {
+ for(size_t i=0;i<_rns->_size;i++)
+ _rns->_field_rns[i].neg((x._ptr)[i*x._stride],
+ (y._ptr)[i*y._stride]);
+ return x;
+ }
+
+ Element& mul(Element& x, const Element& y, const Element& z) const {
+ for(size_t i=0;i<_rns->_size;i++)
+ _rns->_field_rns[i].mul((x._ptr)[i*x._stride],
+ (y._ptr)[i*y._stride],
+ (z._ptr)[i*z._stride]);
+ return x;
+ }
+
+
+ Element& axpyin(Element& x, const Element& y, const Element& z) const {
+ for(size_t i=0;i<_rns->_size;i++)
+ _rns->_field_rns[i].axpyin((x._ptr)[i*x._stride],
+ (y._ptr)[i*y._stride],
+ (z._ptr)[i*z._stride]);
+ return x;
+ }
+
+ Element& inv(Element& x, const Element& y) const {
+ Givaro::Integer tmp;
+ convert(tmp,y);
+ _F.invin(tmp);
+ init(x,tmp);
+ return x;
+ }
+
+ bool areEqual(const Element& x, const Element& y) const {
+ for(size_t i=0;i<_rns->_size;i++)
+ if (!_rns->_field_rns[i].areEqual((x._ptr)[i*x._stride],(y._ptr)[i*y._stride]))
+ return false;
+ return true;
+ }
+ std::ostream& write(std::ostream& os, const Element& y) const {
+ os<<"[ "<< (long) (y._ptr)[0];
+ for(size_t i=1;i<_rns->_size;i++)
+ os<<" , "<< (long) ((y._ptr)[i*y._stride]);
+ return os<<" ]";
+ }
+
+
+ std::ostream& write(std::ostream& os) const {
+ os<<"M:=[ "<< (long) _rns->_basis[0];
+ for(size_t i=1;i<_rns->_size;i++)
+ os<<" , "<< (long) _rns->_basis[i];
+ return os<<" ]"<<std::endl;
+ }
+
+
+ void reduce_modp(size_t n, Element_ptr B) const{
+#ifdef BENCH_MODP
+ FFLAS::Timer chrono; chrono.start();
+#endif
+ size_t _size= _rns->_size;
+ BasisElement *Gamma, *alpha, *A;
+ A=B._ptr;
+ size_t rda = B._stride;
+ Givaro::ZRing<BasisElement> D;
+ Gamma = FFLAS::fflas_new(D,_size,n);
+ alpha = FFLAS::fflas_new(D,n,1);
+
+ // compute Gamma
+ //for(size_t i=0;i<_size;i++)
+ // FFLAS::fscal(_rns->_field_rns[i], n, _rns->_MMi[i], A+i*rda, 1, Gamma+i*n,1);
+ typename RNS::Element mmi(const_cast<typename RNS::BasisElement*>(_rns->_MMi.data()),1);
+ FFLAS::fscal(_RNSdelayed, n, mmi, B, 1, typename RNS::Element_ptr(Gamma,n), 1);
+
+ // compute A = _Mi_modp_rns.Gamma (note must be reduced mod m_i, but this is postpone to the end)
+ FFLAS::fgemm(D,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans, _size, n, _size, D.one, _Mi_modp_rns.data(), _size, Gamma, n, D.zero, A, rda);
+
+ //std::cout<<"fgemv (Y)...";
+ //std::cout<<"fgemv (Y)..."<<n<<" -> "<<_size<<endl;;
+ // compute alpha = _invbase.Gamma
+ FFLAS::fgemv(D,FFLAS::FflasTrans, _size, n, D.one, Gamma, n, _rns->_invbasis.data(), 1 , D.zero, alpha, 1);
+ //std::cout<<"done"<<std::endl;
+
+ // compute ((z-(alpha.M mod p)) mod m_i (perform the subtraction over Z and reduce at the end)
+ for(size_t i=0;i<_size;i++){
+ for(size_t j=0;j<n;j++){
+ //long aa=floor(alpha[j]+0.5);
+ long aa= (long)rint(alpha[j]);
+ A[j+i*rda]-=_iM_modp_rns[aa+i*_size];
+ }
+ }
+
+ // reduce each row of A modulo m_i
+ for (size_t i=0;i<_size;i++)
+ FFLAS::freduce (_rns->_field_rns[i], n, A+i*rda, 1);
+
+ FFLAS::fflas_delete(Gamma);
+ FFLAS::fflas_delete(alpha);
+
+#ifdef BENCH_MODP
+ chrono.stop();
+ t_modp+=chrono.usertime();
+#endif
+ }
+
+ std::ostream& write_matrix(std::ostream& c,
+ double* E,
+ int n, int m, int lda) const
+ {
+ c<<std::endl<<"***********************"<<std::endl;
+ for (int i = 0; i<n;++i){
+ for (int j=0; j<m;++j)
+ c << (long)*(E+j+lda*i) << " ";
+ c << std::endl;
+ }
+ c<<"***********************"<<std::endl;
+ return c << std::endl;
+ }
+
+ void reduce_modp(size_t m, size_t n, Element_ptr B, size_t lda) const{
+#ifdef BENCH_MODP
+ FFLAS::Timer chrono; chrono.start();
+#endif
+ //cout<<"REDUCE MOD WITH LDA!=N"<<endl;
+ size_t _size= _rns->_size;
+ size_t mn=m*n;
+ BasisElement *Gamma, *alpha, *z, *A;
+ A=B._ptr;
+ size_t rda=B._stride;
+ Gamma = FFLAS::fflas_new<BasisElement>(mn*_size);
+ alpha = FFLAS::fflas_new<BasisElement>(mn);
+ z = FFLAS::fflas_new<BasisElement>(mn*_size);
+
+ // compute Gamma
+ //for(size_t i=0;i<_size;i++)
+ // FFLAS::fscal(_rns->_field_rns[i], m, n, _rns->_MMi[i], A+i*rda, lda, Gamma+i*mn,n);
+ typename RNS::Element mmi(const_cast<typename RNS::BasisElement*>(_rns->_MMi.data()),1);
+ FFLAS::fscal(_RNSdelayed, m, n, mmi, B, lda, typename RNS::Element_ptr(Gamma,mn), n);
+
+ // compute Gamma = _Mi_modp_rns.Gamma (note must be reduced mod m_i, but this is postpone to the end)
+ Givaro::ZRing<BasisElement> D;
+
+ FFLAS::fgemm(D,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,_size, mn, _size, D.one, _Mi_modp_rns.data(), _size, Gamma, mn, D.zero, z, mn);
+
+ //write_matrix(std::cout,Gamma, mn, _size, mn);
+
+ // compute alpha = _invbase.Gamma
+ //std::cout<<"fgemv (X)..."<<m<<"x"<<n<<" -> "<<_size<<" "<<lda<<endl;;
+ FFLAS::fgemv(D, FFLAS::FflasTrans, _size, mn, D.one, Gamma, mn, _rns->_invbasis.data(), 1 , D.zero, alpha, 1);
+ //std::cout<<"done"<<std::endl;
+
+ // compute A=((Gamma--(alpha.M mod p)) mod m_i (perform the subtraction over Z and reduce at the end)
+ for(size_t k=0;k<_size;k++){
+ for(size_t i=0;i<m;i++)
+ for(size_t j=0;j<n;j++){
+ long aa=(long)floor(alpha[j+i*n]+0.5);
+ A[j+i*lda+k*rda]= z[j+i*n+k*mn]-_iM_modp_rns[aa+k*_size];
+ }
+ }
+
+ // reduce each row of A modulo m_i
+ for (size_t i=0;i<_size;i++)
+ FFLAS::freduce (_rns->_field_rns[i], m, n, A+i*rda, lda);
+ FFLAS::fflas_delete(Gamma);
+ FFLAS::fflas_delete(alpha);
+ FFLAS::fflas_delete(z);
+#ifdef BENCH_MODP
+ chrono.stop();
+ t_modp+=chrono.usertime();
+#endif
+ }
+
+#ifdef __DLP_CHALLENGE
+
+#define DELTA 27
+ template<class SimdT>
+ inline void splitSimd(const SimdT x, SimdT & x_h, SimdT & x_l) const {
+ using simd = Simd<double>;
+ using vect_t = typename simd::vect_t;
+ vect_t vc = simd::set1((double)((1_ui64 << DELTA) + 1_ui64));
+ vect_t tmp = simd::mul(vc, x);
+ x_h = simd::add(tmp, simd::sub(x, tmp));
+ x_l = simd::sub(x, x_h);
+ }
+
+ template<class SimdT>
+ inline void multSimd(const SimdT va, const SimdT vb, SimdT & vs, SimdT & vt) const{
+ using simd = Simd<double>;
+ using vect_t = typename simd::vect_t;
+ vect_t vah, val, vbh, vbl;
+ splitSimd(va, vah, val);
+ splitSimd(vb, vbh, vbl);
+ vs = simd::mul(va, vb);
+ vt = simd::add(simd::add(simd::sub(simd::mul(vah, vbh), vs), simd::mul(vah, vbl)), simd::add(simd::mul(val, vbh), simd::mul(val, vbl)));
+ }
+
+ template<class SimdT>
+ inline SimdT multModSimd(const SimdT a, const SimdT b, const SimdT p, const SimdT ip, const SimdT np) const{
+ using simd = Simd<double>;
+ using vect_t = typename simd::vect_t;
+ vect_t abh, abl, pqh, pql;
+ multSimd(a, b, abh, abl);
+ vect_t q = simd::floor(simd::mul(abh, ip));
+ multSimd(p, q, pqh, pql);
+ vect_t r = simd::add(simd::sub(abh, pqh), simd::sub(abl, pql));
+ abh = simd::greater_eq(r, p);
+ abl = simd::lesser(r, simd::zero());
+ abh = simd::vand(abh, np);
+ abl = simd::vand(abl, p);
+ abh = simd::vor(abh, abl);
+ return r = simd::add(r, abh);
+ }
+
+ inline void split(const double x, const int delta, double &x_h, double &x_l) const {
+ double c = (double)((1_ui64 << delta) + 1_ui64);
+ x_h = (c*x)+(x-(c*x));
+ x_l = x - x_h;
+ }
+
+ inline void mult(const double a, const double b, double &s, double &t) const{
+ double ah, al, bh, bl;
+ s = a*b;
+#ifdef __FMA__
+ t = std::fma(-a, b, s);
+#else
+ split(a, DELTA, ah, al);
+ split(b, DELTA, bh, bl);
+ t = ((((-s+ah*bh)+(ah*bl))+(al*bh))+(al*bl));
+#endif
+ }
+
+ inline double multmod(const double a, const double b, const double p, const double ip, const double np) const{
+ double abh, abl, pqh, pql;
+ mult(a, b, abh, abl);
+ double q = floor(abh*ip);
+ mult(p, q, pqh, pql);
+ double r = (abh-pqh)+(abl-pql);
+ if(r > p)
+ r -= p;
+ else if(r < 0)
+ r += p;
+ return r;
+ }
+
+ void reduce_modp_rnsmajor_scal_quad(size_t n, Element_ptr B) const {
+ // std::cout << "modp scalar quad" << std::endl;
+ // using namespace modp_details;
+ using simd = Simd<BasisElement>;
+ using vect_t = typename simd::vect_t;
+
+ FFLAS::Timer T;
+ size_t _size= _rns->_size;
+
+ Givaro::ZRing<BasisElement> D;
+ std::vector<Givaro::ModularExtended<double>> Fields;
+ for(size_t i = 0 ; i < _size ; ++i){
+ Fields.emplace_back(_rns->_basis[i]);
+ }
+/*
+ if((int64_t)B._ptr%simd::alignment == 0 && _size%simd::vect_size==0){
+ for(size_t j = 0 ; j < n ; ++j){
+ BasisElement *A, *Gamma, *tabTmp;
+ A = FFLAS::fflas_new<BasisElement>(_size, Alignment::CACHE_LINE);
+ Gamma = FFLAS::fflas_new<BasisElement>(_size, Alignment::CACHE_LINE);
+ tabTmp = FFLAS::fflas_new<BasisElement>(_size, Alignment::CACHE_LINE);
+
+ vect_t vA, vB, vG, vp, vnp, vip, vRNS, vtmp;
+
+ // Compute Gamma
+ for(size_t i = 0 ; i < _size ; i+= simd::vect_size){
+ vB = simd::load(B._ptr+j*_size+i);
+ vp = simd::load(_rns->_basis.data()+i);
+ vip = simd::load(_rns->_invbasis.data()+i);
+ vnp = simd::load(_rns->_negbasis.data()+i);
+ vRNS = simd::load(_rns->_MMi.data()+i);
+ vG = multModSimd(vB, vRNS, vp, vip, vnp);
+ simd::store(Gamma+i, vG);
+ }
+
+ // Compute A=Gamma*Mi in rns
+ for(size_t k = 0 ; k < _size ; ++k){
+ for(size_t i = 0 ; i < _size ; i+= simd::vect_size){
+ vG = simd::load(Gamma+i);
+ vp = simd::set1(_rns->_basis[k]);
+ vip = simd::set1(_rns->_invbasis[k]);
+ vnp = simd::set1(_rns->_negbasis[k]);
+ vRNS = simd::load(_Mi_modp_rns.data()+k*_size+i);
+ vtmp = multModSimd(vG, vRNS, vp, vip, vnp);
+ simd::store(tabTmp+i, vtmp);
+ }
+ for(size_t i = 0 ; i < _size ; ++i){
+ Fields[k].addin(A[k], tabTmp[i]);
+ }
+ }
+ double alpha = 0;
+ for(size_t k = 0 ; k < _size ; ++k){
+ alpha += Gamma[k]*_rns->_invbasis[k];
+ }
+ // -= alpha
+ long aa= (long)rint(alpha);
+ for(size_t k = 0; k < _size ; k++){
+ Fields[k].sub(B._ptr[j*_size+k], A[k], _iM_modp_rns[aa+k*_size]);
+ }
+ FFLAS::fflas_delete(Gamma);
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(tabTmp);
+ }
+ }else{
+ for(size_t j = 0 ; j < n ; ++j){
+ BasisElement *A, *Gamma, *tabTmp;
+ A = FFLAS::fflas_new<BasisElement>(_size, Alignment::CACHE_LINE);
+ Gamma = FFLAS::fflas_new<BasisElement>(_size, Alignment::CACHE_LINE);
+ tabTmp = FFLAS::fflas_new<BasisElement>(_size, Alignment::CACHE_LINE);
+
+ vect_t vA, vB, vG, vp, vnp, vip, vRNS, vtmp;
+
+ // Compute Gamma
+ size_t i = 0;
+ for(; i < ROUND_DOWN(_size, simd::vect_size) ; i+= simd::vect_size){
+ vB = simd::load(B._ptr+j*_size+i);
+ vp = simd::load(_rns->_basis.data()+i);
+ vip = simd::load(_rns->_invbasis.data()+i);
+ vnp = simd::load(_rns->_negbasis.data()+i);
+ vRNS = simd::load(_rns->_MMi.data()+i);
+ vG = multModSimd(vB, vRNS, vp, vip, vnp);
+ simd::store(Gamma+i, vG);
+ }
+ for(; i < _size ; ++i){
+ Fields[i].mul(Gamma[i], B._ptr[j*_size+i], _rns->_MMi[i]);
+ }
+
+ // Compute A=Gamma*Mi in rns
+ for(size_t k = 0 ; k < _size ; ++k){
+ i = 0;
+ A[k] = 0;
+ for( ; i < ROUND_DOWN(_size, simd::vect_size); i+= simd::vect_size){
+ vG = simd::load(Gamma+i);
+ vp = simd::set1(_rns->_basis[k]);
+ vip = simd::set1(_rns->_invbasis[k]);
+ vnp = simd::set1(_rns->_negbasis[k]);
+ vRNS = simd::load(_Mi_modp_rns.data()+k*_size+i);
+ vtmp = multModSimd(vG, vRNS, vp, vip, vnp);
+ simd::store(tabTmp+i, vtmp);
+ }
+ for(; i < _size ; ++i){
+ Fields[k].mul(tabTmp[i], Gamma[i], _Mi_modp_rns[i]);
+ }
+ for(size_t i = 0 ; i < _size ; ++i){
+ Fields[k].addin(A[k], tabTmp[i]);
+ }
+ }
+ double alpha = 0;
+ for(size_t k = 0 ; k < _size ; ++k){
+ alpha += Gamma[k]*_rns->_invbasis[k];
+ }
+ // -= alpha
+ long aa= (long)rint(alpha);
+ for(size_t k = 0; k < _size ; k++){
+ Fields[k].sub(B._ptr[j*_size+k], A[k], _iM_modp_rns[aa+k*_size]);
+ }
+ FFLAS::fflas_delete(Gamma);
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(tabTmp);
+ }
+ }
+ //*/
+ //*
+#pragma omp parallel for schedule(static, 256)
+ for(size_t i = 0 ; i < n; ++i){
+ double* Ad;
+ BasisElement *Gamma;
+ Gamma = FFLAS::fflas_new<BasisElement>(_size);
+ Ad = FFLAS::fflas_new<double>(_size);
+ // Compute Gamma
+ // std::cout << "B: " << std::endl;
+ // for(size_t j = 0 ; j < _size ; ++j){
+ // std::cout << B._ptr[i*_size+j] << " ";
+ // }
+ // std::cout << std::endl;
+ for(size_t k = 0; k < _size ; ++k){
+ Fields[k].mul(Gamma[k], B._ptr[i*_size+k], _rns->_MMi[k]);
+ }
+ // std::cout << "Gamma: " << std::endl;
+ // for(size_t j = 0 ; j < _size ; ++j){
+ // std::cout << Gamma[j] << " ";
+ // }
+ // std::cout << std::endl;
+
+ // std::cout << "MMi: " << std::endl;
+ // for(size_t j = 0 ; j < _size ; ++j){
+ // std::cout << _rns->_MMi[j] << " ";
+ // }
+ // std::cout << std::endl;
+
+ // FFLAS::fgemm(D,FFLAS::FflasNoTrans,FFLAS::FflasTrans, n, _size, _size, D.one, Gamma, _size, _Mi_modp_rns.data(), _size, D.zero, A, _size);
+ // Mul by Mi_modp
+ for(size_t k = 0 ; k < _size ; ++k){
+ Ad[k] = FFLAS::fdot(Fields[k], _size, Gamma, 1, _Mi_modp_rns.data()+k*_size,1);
+ }
+ // std::cout << "_Mi_modp_rns: " << std::endl;
+ // std::cout << "[";
+ // for(size_t j = 0 ; j < _size ; ++j){
+ // std::cout << "[";
+ // for(size_t k = 0 ; k < _size-1 ; ++k){
+ // std::cout << _Mi_modp_rns[j*_size+k] << " ,";
+ // }
+ // std::cout << _Mi_modp_rns[j*_size+_size-1] << "],";
+ // }
+ // std::cout << "]" << std::endl;
+ // std::cout << "Ad: " << std::endl;
+ // for(size_t j = 0 ; j < _size ; ++j){
+ // std::cout << Ad[j] << " ";
+ // }
+ // std::cout << std::endl;
+ // std::cout << "_iM_modp_rns: " << std::endl;
+ // std::cout << "[";
+ // for(size_t j = 0 ; j < _size ; ++j){
+ // std::cout << "[";
+ // for(size_t k = 0 ; k < _size-1 ; ++k){
+ // std::cout << _iM_modp_rns[j*_size+k] << " ,";
+ // }
+ // std::cout << _iM_modp_rns[j*_size+_size-1] << "],";
+ // }
+ // std::cout << std::endl;
+
+ // compute alpha
+ // FFLAS::fgemv(D,FFLAS::FflasNoTrans, n, _size, D.one, Gamma, _size, _rns->_invbasis.data(), 1 , D.zero, alpha, 1);
+ double alpha = 0;
+ for(size_t k = 0 ; k < _size ; ++k){
+ alpha += Gamma[k]*_rns->_invbasis[k];
+ }
+ // std::cout << "alpha: " << alpha << std::endl;
+ // -= alpha
+ // long aa= (int64_t)alpha;
+ long aa= (long)rint(alpha);
+ //std::cout << "aa: " << aa << std::endl;
+ for(size_t k = 0; k < _size ; k++){
+ // std::cout << Ad[k] << " - " << _iM_modp_rns[aa+k*_size] << " = ";
+ Fields[k].sub(B._ptr[i*_size+k], Ad[k], _iM_modp_rns[aa+k*_size]);
+ // std::cout<<B._ptr[i*_size+k]<<std::endl;
+ }
+ FFLAS::fflas_delete(Gamma);
+
+ FFLAS::fflas_delete(Ad);
+ // std::cout << std::endl;
+ // std::cout << "====================================" << std::endl;
+ }
+ //*/
+ // std::cout << std::endl;
+ // _rns->reduce(n,B._ptr,1,true);
+ }
+
+#endif // __DLP_CHALLENGE
+
+ void reduce_modp_rnsmajor(size_t n, Element_ptr B) const{
+ // std::cout << "modp BLAS" << std::endl;
+#ifdef BENCH_MODP
+ FFLAS::Timer chrono; chrono.start();
+#endif
+ size_t _size= _rns->_size;
+ BasisElement *Gamma, *alpha, *A;
+ A=B._ptr;
+ Givaro::ZRing<BasisElement> D;
+ FFLAS::Timer T;
+ // T.start();
+ Gamma = FFLAS::fflas_new(D,n,_size);
+ alpha = FFLAS::fflas_new(D,n,1);
+ // T.stop();
+ // std::cout << "Alloc: " << T << std::endl;
+ // compute Gamma (NOT EFFICIENT)
+ //for(size_t i=0;i<_size;i++)
+ //
+ // FFLAS::fscal(_rns->_field_rns[i], n, _rns->_MMi[i], A+i, _size, Gamma+i,_size);
+ T.start();
+#ifdef __FFLASFFPACK_USE_SIMD
+ using simd = Simd<BasisElement>;
+ using vect_t = typename simd::vect_t;
+
+ if(((int64_t)A%simd::alignment == 0) && (_size%simd::vect_size==0)){
+ auto MMi = _rns->_MMi.data();
+ for(size_t i = 0 ; i < n ; ++i){
+ vect_t vA1, vA2, vMi1, vMi2, tmp1, tmp2, tmp3, v, max, basis, inv_, neg_;
+ size_t k = 0;
+ for( ; k < ROUND_DOWN(_size, simd::vect_size) ; k+=simd::vect_size){
+ basis = simd::load(_rns->_basis.data()+k);
+ inv_ = simd::load(_rns->_invbasis.data()+k);
+ max = simd::load(_rns->_basisMax.data()+k);
+ neg_ = simd::load(_rns->_negbasis.data()+k);
+ vA1 = simd::load(A+i*_size+k);
+ vMi1 = simd::load(MMi+k);
+ v = simd::mul(vA1, vMi1);
+ tmp1 = simd::floor(simd::mul(v, inv_));
+ tmp2 = simd::fnmadd(v, tmp1, basis);
+ tmp1 = simd::greater(tmp2, max);
+ tmp3 = simd::lesser(tmp2, simd::zero());
+ tmp1 = simd::vand(tmp1, neg_);
+ tmp3 = simd::vand(tmp3, basis);
+ tmp1 = simd::vor(tmp1, tmp3);
+ tmp2 = simd::add(tmp2, tmp1);
+ simd::store(Gamma+i*_size+k, tmp2);
+ }
+ }
+ }else{
+ vect_t vA1, vA2, vMi1, vMi2, tmp1, tmp2, tmp3, v, max, basis, inv_, neg_;
+ auto MMi = _rns->_MMi.data();
+ for(size_t i = 0 ; i < n ; ++i){
+ size_t k = 0;
+ for( ; k < ROUND_DOWN(_size, simd::vect_size) ; k+=simd::vect_size){
+ basis = simd::load(_rns->_basis.data()+k);
+ inv_ = simd::load(_rns->_invbasis.data()+k);
+ max = simd::load(_rns->_basisMax.data()+k);
+ neg_ = simd::load(_rns->_negbasis.data()+k);
+ vA1 = simd::loadu(A+i*_size+k);
+ vMi1 = simd::loadu(MMi+k);
+ v = simd::mul(vA1, vMi1);
+ tmp1 = simd::floor(simd::mul(v, inv_));
+ tmp2 = simd::fnmadd(v, tmp1, basis);
+ tmp1 = simd::greater(tmp2, max);
+ tmp3 = simd::lesser(tmp2, simd::zero());
+ tmp1 = simd::vand(tmp1, neg_);
+ tmp3 = simd::vand(tmp3, basis);
+ tmp1 = simd::vor(tmp1, tmp3);
+ tmp2 = simd::add(tmp2, tmp1);
+ simd::storeu(Gamma+i*_size+k, tmp2);
+ }
+ for(; k < _size ; ++k){
+ Gamma[i*_size+k] = A[i*_size+k]*MMi[k];
+ Gamma[i*_size+k] -= std::floor(Gamma[i*_size+k]*_rns->_invbasis[k])*_rns->_basis[k];
+ if(Gamma[i*_size+k] >= _rns->_basis[k]){
+ Gamma[i*_size+k] -= _rns->_basis[k];
+ }else if(Gamma[i*_size+k] < 0){
+ Gamma[i*_size+k] += _rns->_basis[k];
+ }
+ }
+ }
+ }
+ // _rns->reduce(n,Gamma,1,true);
+#else
+ typename RNS::Element mmi(const_cast<typename RNS::BasisElement*>(_rns->_MMi.data()),1);
+ FFLAS::fscal(_RNSdelayed, n, mmi, B, 1, typename RNS::Element_ptr(Gamma,1), 1);
+#endif
+ T.stop();
+ // std::cout << "Gamma: " << T << std::endl;
+
+
+ // compute A = Gamma._Mi_modp_rns^T (note must be reduced mod m_i, but this is postpone to the end)
+ T.start();
+ FFLAS::fgemm(D,FFLAS::FflasNoTrans,FFLAS::FflasTrans, n, _size, _size, D.one, Gamma, _size, _Mi_modp_rns.data(), _size, D.zero, A, _size);
+ T.stop();
+ // std::cout << "fgemm: " << T << std::endl;
+ // std::cout<<"fgemv (Y)...";
+ //std::cout<<"fgemv (Y)..."<<n<<" -> "<<_size<<endl;;
+ // compute alpha = Gamma._invbasis
+ T.start();
+ FFLAS::fgemv(D,FFLAS::FflasNoTrans, n, _size, D.one, Gamma, _size, _rns->_invbasis.data(), 1 , D.zero, alpha, 1);
+ T.stop();
+ // std::cout << "fgemv: " << T << std::endl;
+ //std::cout<<"done"<<std::endl;
+ T.start();
+ // compute ((z-(alpha.M mod p)) mod m_i (perform the subtraction over Z and reduce at the end)
+ for(size_t j=0;j<n;j++){
+ long aa= (long)rint(alpha[j]);
+ for(size_t i=0;i<_size;i++){
+ //long aa=floor(alpha[j]+0.5);
+ A[j*_size+i]-=_iM_modp_rns[aa+i*_size];
+ }
+ }
+ // vect_t viM;
+ // auto iM = _iM_modp_rns.data();
+ // for(size_t j = 0 ; j < n ; ++j){
+ // long aa= (long)rint(alpha[j]);
+ // for(int i = 0 ; i < ROUND_DOWN(_size, simd::vect_size) ; i+=simd::vect_size){
+ // vA = simd::load(A+j*_size+i);
+ // viM = simd::load(iM+aa)
+ // }
+ // }
+ T.stop();
+ // std::cout << "last: " << T << std::endl;
+ T.start();
+ // reduce each column of A modulo m_i
+ _rns->reduce(n,A,1,true);
+ T.stop();
+ // std::cout << "reduce: "<< T << std::endl;
+
+ // T.start();
+ FFLAS::fflas_delete(Gamma);
+ FFLAS::fflas_delete(alpha);
+ // T.stop();
+ // std::cout << "delete: " << T << std::endl;
+#ifdef BENCH_MODP
+ chrono.stop();
+ t_modp+=chrono.usertime();
+#endif
+
+ }
+
+
+ }; // end of class RNSIntegerMod
+
+} // end of namespace FFPACK
+
+
+namespace FFLAS {
+
+ // specialization for the fflas alloc function
+ template<>
+ inline FFPACK::rns_double_elt_ptr
+ fflas_new(const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F, const size_t m, const size_t n, const Alignment align){
+ return FFPACK::rns_double_elt_ptr(FFLAS::fflas_new<double>(m*n*F.size(),align),m*n);
+ }
+
+ // function to convert from integer to RNS (note: this is not the finit function from FFLAS, extra k)
+ template<typename RNS>
+ void finit_rns(const FFPACK::RNSIntegerMod<RNS> &F, const size_t m, const size_t n, size_t k,
+ const Givaro::Integer *B, const size_t ldb, typename RNS::Element_ptr A)
+ {
+ F.rns().init(m,n,A._ptr,A._stride, B,ldb,k);
+ }
+ template<typename RNS>
+ void finit_trans_rns(const FFPACK::RNSIntegerMod<RNS> &F, const size_t m, const size_t n, size_t k,
+ const Givaro::Integer *B, const size_t ldb, typename RNS::Element_ptr A)
+ {
+ F.rns().init_transpose(m,n,A._ptr,A._stride, B,ldb,k);
+ }
+
+ // function to convert from RNS to integer (note: this is not the fconvert function from FFLAS, extra alpha)
+ template<typename RNS>
+ void fconvert_rns(const FFPACK::RNSIntegerMod<RNS> &F, const size_t m, const size_t n,
+ Givaro::Integer alpha, Givaro::Integer *B, const size_t ldb, typename RNS::ConstElement_ptr A)
+ {
+ F.rns().convert(m,n,alpha,B,ldb,A._ptr,A._stride);
+ }
+ template<typename RNS>
+ void fconvert_trans_rns(const FFPACK::RNSIntegerMod<RNS> &F, const size_t m, const size_t n,
+ Givaro::Integer alpha, Givaro::Integer *B, const size_t ldb, typename RNS::ConstElement_ptr A)
+ {
+ F.rns().convert_transpose(m,n,alpha,B,ldb,A._ptr,A._stride);
+ }
+
+} // end of namespace FFLAS
+#undef DELTA
+#endif
+
diff --git a/fflas-ffpack/field/rns-integer.h b/fflas-ffpack/field/rns-integer.h
new file mode 100644
index 0000000..1762383
--- /dev/null
+++ b/fflas-ffpack/field/rns-integer.h
@@ -0,0 +1,179 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+/*! @file field/rns-integer.h
+ * @ingroup field
+ * @brief representation of <code>Z</code> using RNS representation (note: fixed precision)
+ */
+
+#ifndef __FFPACK_unparametric_rns_integer_H
+#define __FFPACK_unparametric_rns_integer_H
+
+#include <givaro/givinteger.h>
+
+#include "fflas-ffpack/field/rns-double.h"
+
+namespace FFPACK {
+
+
+ template<typename RNS>
+ class RNSInteger {
+ protected:
+ const RNS *_rns; // the rns structure
+ typedef typename RNS::BasisElement BasisElement;
+ typedef Givaro::Integer integer;
+
+ public:
+ typedef typename RNS::Element Element;
+ typedef typename RNS::Element_ptr Element_ptr;
+ typedef typename RNS::ConstElement_ptr ConstElement_ptr;
+
+ Element one, mOne,zero;
+
+ RNSInteger(const RNS& myrns) : _rns(&myrns)
+ {
+ init(one,1);
+ init(zero,0);
+ init(mOne,-1);
+ }
+ template<typename T>
+ RNSInteger(const T &F) : _rns(&(F.rns()))
+ {
+ init(one,1);
+ init(zero,0);
+ init(mOne,-1);
+ }
+
+ const RNS& rns() const {return *_rns;}
+
+ size_t size() const {return _rns->_size;}
+
+ bool isOne(const Element& x) const {
+ bool isone=true;
+ for (size_t i=0;i<_rns->_size;i++)
+ isone&= (one._ptr[i]== x._ptr[i]);
+ return isone;
+ }
+
+ bool isMOne(const Element& x) const {
+ bool ismone=true;
+ for (size_t i=0;i<_rns->_size;i++)
+ ismone&= (mOne._ptr[i]== x._ptr[i]);
+ return ismone;
+ }
+
+ bool isZero(const Element& x) const {
+ bool iszero=true;
+ for (size_t i=0;i<_rns->_size;i++)
+ iszero&= (zero._ptr[i]== x._ptr[i]);
+ return iszero;
+ }
+
+ integer characteristic(integer &p) const { return p=0;}
+
+ integer cardinality(integer &p) const { return p=-1;}
+
+ Element& init(Element& x) const{
+ if (x._ptr == NULL){
+ x._ptr = FFLAS::fflas_new<BasisElement>(_rns->_size);
+ x._stride=1;
+ x._alloc=true;
+ }
+ return x;
+ }
+ Element& init(Element& x, const Givaro::Integer& y) const{
+ init(x);
+ size_t k =(y.bitsize())/16+((y.bitsize())%16?1:0);
+ _rns->init(1,1,x._ptr,x._stride, &y,1,k);
+ return x;
+ }
+ Element& reduce (Element& x, const Element& y) const {return assign (x,y);}
+
+ Element& reduce (Element& x) const {return x;}
+
+ Givaro::Integer convert(Givaro::Integer& x, const Element& y)const {
+ _rns->convert(1,1,integer(0),&x,1,y._ptr,y._stride);
+ return x;
+ }
+
+ Element& assign(Element& x, const Element& y) const {
+ for(size_t i=0;i<_rns->_size;i++)
+ x._ptr[i*x._stride] = y._ptr[i*y._stride];
+ return x;
+ }
+ std::ostream& write(std::ostream& os, const Element& y) const {
+ os<<"[ "<< (long) (y._ptr)[0];
+ for(size_t i=1;i<_rns->_size;i++)
+ os<<" , "<< (long) (y._ptr)[i*y._stride];
+ return os<<" ]";
+ }
+
+
+ std::ostream& write(std::ostream& os) const {
+ os<<"M:=[ "<< (long) _rns->_basis[0];
+ for(size_t i=1;i<_rns->_size;i++)
+ os<<" , "<< (long) _rns->_basis[i];
+ return os<<" ]"<<std::endl;
+ }
+
+
+
+ }; // end of class Unparametric<rns_double>
+
+
+} // end of namespace FFPACK
+
+namespace FFLAS {
+
+ // specialization for the fflas alloc function
+ template<>
+ inline FFPACK::rns_double_elt_ptr
+ fflas_new(const FFPACK::RNSInteger<FFPACK::rns_double> &F, const size_t m, const size_t n, const Alignment align){
+ double *ptr=FFLAS::fflas_new<double>(m*n*F.size(), align);
+ return FFPACK::rns_double_elt_ptr(ptr,m*n);
+ }
+
+ // function to convert from integer to RNS (note: this is not the finit function from FFLAS, extra k)
+ template<typename RNS>
+ void finit_rns(const FFPACK::RNSInteger<RNS> &F, const size_t m, const size_t n, size_t k,
+ const Givaro::Integer *B, const size_t ldb, typename FFPACK::RNSInteger<RNS>::Element_ptr A)
+ {
+ F.rns().init(m,n,A._ptr,A._stride, B,ldb,k);
+ }
+ // function to convert from RNS to integer (note: this is not the fconvert function from FFLAS, extra alpha)
+ template<typename RNS>
+ void fconvert_rns(const FFPACK::RNSInteger<RNS> &F, const size_t m, const size_t n,
+ Givaro::Integer alpha, Givaro::Integer *B, const size_t ldb, typename FFPACK::RNSInteger<RNS>::ConstElement_ptr A)
+ {
+ F.rns().convert(m,n,alpha,B,ldb,A._ptr,A._stride);
+ }
+
+
+} // end of namespace FFLAS
+
+#endif // __FFPACK_unparametric_rns_integer_H
+
diff --git a/fflas-ffpack/fflas-ffpack.h b/fflas-ffpack/field/rns.h
similarity index 71%
copy from fflas-ffpack/fflas-ffpack.h
copy to fflas-ffpack/field/rns.h
index ceeb9c0..b0ec78f 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/fflas-ffpack/field/rns.h
@@ -1,7 +1,10 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+
+/*
+ * Copyright (C) FFLAS group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -23,17 +26,22 @@
*
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
+
+/*! @file field/rns.h
+ * @ingroup field
+ * @defgroup rns RNS
+ * @brief just include them all
*/
+#ifndef __FFLASFFPACK_field_rns_H
+#define __FFLASFFPACK_field_rns_H
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
+namespace FFPACK{
+ template<typename RNS>
+ class RNSInteger;
+ template<typename RNS>
+ class RNSIntegerMod;
+}
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
-#endif // __FFLASFFPACK_fflas_ffpack_H
+#endif // __FFLASFFPACK_field_rns_H
diff --git a/fflas-ffpack/fflas-ffpack.h b/fflas-ffpack/field/rns.inl
similarity index 71%
copy from fflas-ffpack/fflas-ffpack.h
copy to fflas-ffpack/field/rns.inl
index ceeb9c0..3fce111 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/fflas-ffpack/field/rns.inl
@@ -1,7 +1,10 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+
+/*
+ * Copyright (C) FFLAS group
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -23,17 +26,11 @@
*
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
- */
-
-
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
+#ifndef __FFLASFFPACK_field_rns_INL
+#define __FFLASFFPACK_field_rns_INL
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
+#include "rns-double.h"
+#include "rns-integer.h"
+#include "rns-integer-mod.h"
-#endif // __FFLASFFPACK_fflas_ffpack_H
+#endif // __FFLASFFPACK_field_rns_INL
diff --git a/fflas-ffpack/field/unparametric.h b/fflas-ffpack/field/unparametric.h
deleted file mode 100644
index 12db472..0000000
--- a/fflas-ffpack/field/unparametric.h
+++ /dev/null
@@ -1,362 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* field/unparametric.h
- * Copyright (C) 1999-2005 William J Turner,
- * 2001 Bradford Hovinen
- * 2005 Clement Pernet
- *
- * Written by W. J. Turner <wjturner at acm.org>,
- * Bradford Hovinen <hovinen at cis.udel.edu>
- * Modified By C. Pernet and inserted into Fflas_Ffpack
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *
- */
-
-#ifndef __FFLASFFPACK_field_unparametric_H
-#define __FFLASFFPACK_field_unparametric_H
-
-#include <iostream> // std::cout
-#include <string>
-#include <algorithm>
-#include <typeinfo>
-
-
-namespace FFPACK
-{
- template<class _Element>
- class UnparametricField ;
-
- template <typename Target, typename Source>
- Target& Caster (Target& t, const Source& s)
- {
- return t = static_cast<Target>(s);
- }
-
- /** \brief Unparameterized field adapter.
- * \ingroup field
- * \defgroup UnparametricField UnparametricField
- *
- * A field having an interface similar to that of floats is adapted to LinBox.
- *
- * Used to generate efficient field classes for unparameterized fields (or hidden parameter fields).
- *
- * Some fields are implemented by definition of the C++ arithmetic operators, such as z = x*y,
- * for z, y, z instances of a type K. The LinBox field
- * Unparametric<K> is the adaptation to LinBox.
- *
- * For a typical unparametric field, some of the methods must be defined in a specialization.
- */
- template <class K>
- class UnparametricOperations {
- public:
- typedef K Element;
-
- UnparametricOperations(){}
- //@{
- ~UnparametricOperations () {}
-
- /* Assignment operator.
- * Assigns UnparametricField object F to field.
- * @param F UnparametricField object.
- */
- // I believe this should be virtual -bds
- ///
- //@} Field Object Basics.
-
- /** @name Data Object Management.
- * first argument is set and the value is also returned.
- */
- //@{
-
- Element& init (Element& x) const
- {
- return x;
- }
-
-
- ///
- Element &assign (Element &x, const Element &y) const
- {
- return x = y;
- }
-
-
- //@}
-
- /// @name Comparison Predicates
- //@{
- /// x == y
- bool areEqual (const Element &x, const Element &y) const
- {
- return x == y;
- }
-
- /// x == 0
- bool isZero (const Element &x) const
- {
- return x == Element (0);
- }
-
- /// x == 1
- bool isOne (const Element &x) const
- {
- return x == Element (1);
- }
- //@} Comparison Predicates
-
-
- /** @name Arithmetic Operations
- * The first argument is set and is also the return value.
- */
- //@{
-
- /// x := y + z
- Element &add (Element &x, const Element &y, const Element &z) const
- {
- return x = y + z;
- }
-
- /// x := y - z
- Element &sub (Element &x, const Element &y, const Element &z) const
- {
- return x = y - z;
- }
-
- /// x := y*z
- Element &mul (Element &x, const Element &y, const Element &z) const
- {
- return x = y * z;
- }
-
- /// x := y/z
- Element &div (Element &x, const Element &y, const Element &z) const
- {
- return x = y / z;
- }
-
- /// x := -y
- Element &neg (Element &x, const Element &y) const
- {
- return x = - y;
- }
-
- /// x := 1/y
- Element &inv (Element &x, const Element &y) const
- {
- return x = Element (1) / y;
- }
-
- /// z := a*x + y
- // more optimal implementation, if available, can be defined in a template specialization.
- Element &axpy (Element &z,
- const Element &a,
- const Element &x,
- const Element &y) const
- {
- return z = a * x + y;
- }
-
- //@} Arithmetic Operations
-
- /** @name Inplace Arithmetic Operations
- * The first argument is modified and the result is the return value.
- */
- //@{
-
- /// x := x + y
- Element &addin (Element &x, const Element &y) const
- {
- return x += y;
- }
-
- /// x := x - y
- Element &subin (Element &x, const Element &y) const
- {
- return x -= y;
- }
-
- /// x := x*y
- Element &mulin (Element &x, const Element &y) const
- {
- return x *= y;
- }
-
- /// x := x/y
- Element &divin (Element &x, const Element &y) const
- {
- return x /= y;
- }
-
- /// x := -x
- Element &negin (Element &x) const
- {
- return x = - x;
- }
-
- /// x := 1/x
- Element &invin (Element &x) const
- {
- return x = Element (1) / x;
- }
-
- /// y := a*x + y
- Element &axpyin (Element &y, const Element &a, const Element &x) const
- {
- return y += a * x;
- }
-
- //@} Inplace Arithmetic Operations
-
- /** @name Input/Output Operations */
- //@{
-
- /** Print field.
- * @return output stream to which field is written.
- * @param os output stream to which field is written.
- */
- std::ostream &write (std::ostream &os) const
- {
- return os << "unparameterized field(" << sizeof(Element) <<',' << typeid(Element).name() << ')';
- }
-
- /** Read field.
- * @return input stream from which field is read.
- * @param is input stream from which field is read.
- */
- std::istream &read (std::istream &is) const
- {
- return is;
- }
-
- /** Print field element.
- * @return output stream to which field element is written.
- * @param os output stream to which field element is written.
- * @param x field element.
- */
- std::ostream &write (std::ostream &os, const Element &x) const
- {
- return os << x;
- }
-
- /** Read field element.
- * @return input stream from which field element is read.
- * @param is input stream from which field element is read.
- * @param x field element.
- */
- std::istream &read (std::istream &is, Element &x) const
- {
- return is >> x;
- }
-
- //@}
-
-
-
- };
-
- template<class _Element>
- class UnparametricField : public UnparametricOperations<_Element> {
- protected:
- long int _p ; long int _card ;
- public:
-
- /** The field's element type.
- * Type K must provide a default constructor,
- * a copy constructor, a destructor, and an assignment operator.
- */
-
- typedef typename UnparametricOperations<_Element>::Element Element;
- const Element one ; // peut pas être static... :(
- const Element zero ;
- const Element mOne ;
-
- /** @name Field Object Basics.
- */
- //@{
-
- /** Builds this field to have characteristic q and cardinality q<sup>e</sup>.
- * This constructor must be defined in a specialization.
- */
- UnparametricField(long int q = 0, size_t e = 1) :
- _p(q), _card((long)(q == 0 ? -1 : pow((double)q, (double)e)) )
- // ,one(Element(1L)),zero(Element(0L)),mOne(Element(-1L))
- ,one(1),zero(0),mOne(-one)
- {
- // Caster(one,1);
- } // assuming q is a prime or zero.
- //@}
-
- /// construct this field as copy of F.
- UnparametricField (const UnparametricField &F) :
- _p(F._p), _card(F._card)
- // ,one(1L),zero(0L)
- ,one(F.one),zero(F.zero),mOne(F.mOne)
- {
- // init(mOne,-1L);
- }
-
-
- unsigned long &cardinality (unsigned long &c) const
- {
- return c = _card ;
- }
-
- unsigned long &characteristic (unsigned long &c) const
- {
- return c = _p ;
- // return c = _card ;
- }
-
- unsigned long cardinality () const
- {
- return _card ;
- }
-
- unsigned long characteristic () const
- {
- return _p ;
- // return _card ;
- }
-
- UnparametricField<Element> operator=(const UnparametricField<Element>) { return *this ;}
-
- /// x := y. Caution: it is via cast to long. Good candidate for specialization.
- template <typename Src>
- Element& init (Element& x, const Src& s) const
- {
- return FFPACK::Caster (x, s);
- }
-
-
- /// x := y. Caution: it is via cast to long. Good candidate for specialization. --dpritcha
-
- template <typename T>
- T& convert (T &x, const Element &y) const
- {
- return FFPACK::Caster (x,y);
- }
-
- };
-} // FFPACK
-
-#include "field-general.h"
-
-#endif // __FIELD_UNPARAMETRIC_H_
diff --git a/benchmark/html/Makefile.am b/fflas-ffpack/interfaces/Makefile.am
similarity index 70%
rename from benchmark/html/Makefile.am
rename to fflas-ffpack/interfaces/Makefile.am
index cbf2e81..6d0a04f 100644
--- a/benchmark/html/Makefile.am
+++ b/fflas-ffpack/interfaces/Makefile.am
@@ -1,9 +1,9 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2010 the LinBox group
+# Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
+# This file is part of the library LinBox.
#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
+# LinBox is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
@@ -17,12 +17,8 @@
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# ========LICENCE========
-#/
-#
-# Nothing yet
-EXTRA_DIST=fflas.css \
- html_report.sh \
- html_report.xsl \
- process.sh
+pkgincludesubdir=$(pkgincludedir)/interfaces
+SUBDIRS=libs
+EXTRA_DIST=interfaces.doxy
diff --git a/fflas-ffpack/interfaces/interfaces.doxy b/fflas-ffpack/interfaces/interfaces.doxy
new file mode 100644
index 0000000..d512925
--- /dev/null
+++ b/fflas-ffpack/interfaces/interfaces.doxy
@@ -0,0 +1,32 @@
+// Copyright (c) 2011 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+/** \ingroup fflasffpack
+ * \defgroup interfaces Interfaces
+ *
+ * \brief Intefaces for FFLAS-FFPACK
+ *
+ * C interface in folder @see libs
+ */
+
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/interfaces/libs/Makefile.am b/fflas-ffpack/interfaces/libs/Makefile.am
new file mode 100644
index 0000000..9728a7b
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/Makefile.am
@@ -0,0 +1,88 @@
+# Copyright (c) 2010 the LinBox group
+# Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+# ========LICENCE========
+# This file is part of the library LinBox.
+#
+# LinBox is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+# ========LICENCE========
+
+if FFLASFFPACK_PRECOMPILED
+
+pkgincludesubdir=$(pkgincludedir)/interfaces/libs
+
+AM_CPPFLAGS=-I$(top_srcdir)
+AM_CXXFLAGS = @DEFAULT_CFLAGS@
+AM_CPPFLAGS += $(OPTFLAGS) -I$(top_srcdir)/fflas-ffpack/utils/ -I$(top_srcdir)/fflas-ffpack/fflas/ -I$(top_srcdir)/fflas-ffpack/ffpack -I$(top_srcdir)/fflas-ffpack/field $(GIVARO_CFLAGS) $(CBLAS_FLAG) $(CUDA_CFLAGS) $(PARFLAGS)
+LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARFLAGS)
+#AM_LDFLAGS=-static
+
+
+
+pkgincludesub_HEADERS=fflas_c.h \
+ ffpack_c.h \
+ fflas_L3_inst.h \
+ fflas_L3_inst_implem.inl \
+ fflas_L2_inst.h \
+ fflas_L2_inst_implem.inl \
+ fflas_L1_inst.h \
+ fflas_L1_inst_implem.inl \
+ ffpack_inst.h \
+ ffpack_inst_implem.inl
+
+
+lib_LTLIBRARIES=libfflas.la \
+ libffpack.la \
+ libfflas_c.la \
+ libffpack_c.la
+
+
+
+libfflas_la_SOURCES= fflas_L1_inst.C \
+ fflas_L1_inst_implem.inl\
+ fflas_L2_inst.C \
+ fflas_L2_inst_implem.inl \
+ fflas_L3_inst.C \
+ fflas_L3_inst_implem.inl
+
+libfflas_la_LDFLAGS= $(LDADD) -version-info 1:0:0 \
+ -no-undefined
+
+libffpack_la_SOURCES= ffpack_inst.C \
+ ffpack_inst_implem.inl
+libffpack_la_LDFLAGS= $(LDADD) -version-info 1:0:0 \
+ -no-undefined -lfflas
+
+EXTRA_libffpack_la_DEPENDENCIES= libfflas.la
+
+libfflas_c_la_SOURCES=fflas_lvl1.C \
+ fflas_lvl2.C \
+ fflas_lvl3.C \
+ fflas_sparse.C
+#libfflas_c_la_CPPFLAGS=$(AM_CPPFLAGS) -DFFLAS_COMPILED -DFFPACK_COMPILED
+libfflas_c_la_LDFLAGS= $(LDADD) -version-info 1:0:0 \
+ -no-undefined -lfflas
+
+EXTRA_libfflas_c_la_DEPENDENCIES=libfflas.la
+
+libffpack_c_la_SOURCES=ffpack.C
+#libffpack_c_la_CPPFLAGS=$(AM_CPPFLAGS) -DFFLAS_COMPILED -DFFPACK_COMPILED
+libffpack_c_la_LDFLAGS= $(LDADD) -version-info 1:0:0 \
+ -no-undefined -lfflas -lffpack
+EXTRA_libffpack_c_la_DEPENDENCIES=libffpack.la
+
+
+EXTRA_DIST=c_libs.doxy
+
+endif
diff --git a/fflas-ffpack/interfaces/libs/c_libs.doxy b/fflas-ffpack/interfaces/libs/c_libs.doxy
new file mode 100644
index 0000000..b461d51
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/c_libs.doxy
@@ -0,0 +1,38 @@
+// Copyright (c) 2011 FFLAS-FFPACK
+// written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+//
+// ========LICENCE========
+// This file is part of the library FFLAS-FFPACK.
+//
+// FFLAS-FFPACK is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+// ========LICENCE========
+//
+
+/** \ingroup interfaces
+ *
+ * \brief C library intefaces for FFLAS-FFPACK
+ *
+ * Routines will look like their C++ counterpart :
+ * <code>
+ * freduce(Modular<double>,m,n, double *)
+ * </code>
+ * becomes
+ * <code>
+ * freduce_modular_double(p,m,n, double *,positive)
+ * </code>
+ */
+
+
+// vim:syn=doxygen
diff --git a/fflas-ffpack/interfaces/libs/fflas_L1_inst.C b/fflas-ffpack/interfaces/libs/fflas_L1_inst.C
new file mode 100755
index 0000000..5387e9d
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L1_inst.C
@@ -0,0 +1,66 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* fflas_L1_inst.h
+ * Copyright (C) 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+
+#ifndef __FFLAS_L1_INST_C
+#define __FFLAS_L1_INST_C
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+#include "fflas.h"
+#include "fflas_helpers.inl"
+
+#ifdef INST_OR_DECL
+#undef INST_OR_DECL
+#endif
+#define INST_OR_DECL
+
+#define FFLAS_FIELD Givaro::ModularBalanced
+#define FFLAS_ELT double
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#define FFLAS_FIELD Givaro::Modular
+#define FFLAS_ELT double
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#endif // __FFLAS_L1_INST_C
diff --git a/fflas-ffpack/interfaces/libs/fflas_L1_inst.h b/fflas-ffpack/interfaces/libs/fflas_L1_inst.h
new file mode 100644
index 0000000..513ff69
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L1_inst.h
@@ -0,0 +1,64 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* fflas_L1_inst.h
+ * Copyright (C) 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#ifndef __FFLAS_L1_INST_H
+#define __FFLAS_L1_INST_H
+
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas/fflas_helpers.inl"
+
+#ifdef INST_OR_DECL
+#undef INST_OR_DECL
+#endif
+#define INST_OR_DECL <>
+
+#define FFLAS_FIELD Givaro::ModularBalanced
+#define FFLAS_ELT double
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#define FFLAS_FIELD Givaro::Modular
+#define FFLAS_ELT double
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L1_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#endif //__FFLAS_L1_INST_H
diff --git a/fflas-ffpack/interfaces/libs/fflas_L1_inst_implem.inl b/fflas-ffpack/interfaces/libs/fflas_L1_inst_implem.inl
new file mode 100644
index 0000000..f59c7ad
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L1_inst_implem.inl
@@ -0,0 +1,369 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014,2015 the FFLAS-FFPACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+namespace FFLAS {
+ //---------------------------------------------------------------------
+ // Level 1 routines
+ //---------------------------------------------------------------------
+
+ /** freduce
+ * \f$x \gets x mod F\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template INST_OR_DECL
+ void
+ freduce (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ FFLAS_ELT* X, const size_t incX);
+
+ /** freduce
+ * \f$x \gets y mod F\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param Y vector of \p Element
+ * \param incY stride of \p Y
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template INST_OR_DECL
+ void
+ freduce (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ const FFLAS_ELT* Y, const size_t incY,
+ FFLAS_ELT* X, const size_t incX);
+
+ /** finit
+ * \f$x \gets y mod F\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param Y vector of \p OtherElement
+ * \param incY stride of \p Y
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template INST_OR_DECL
+ void
+ finit (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ const FFLAS_ELT* Y, const size_t incY,
+ FFLAS_ELT* X, const size_t incX);
+
+ /** fconvert
+ * \f$x \gets y mod F\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param Y vector of \p F
+ * \param incY stride of \p Y
+ * \param X vector in \p OtherElement
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template INST_OR_DECL
+ void
+ fconvert (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ FFLAS_ELT* X, const size_t incX,
+ const FFLAS_ELT* Y, const size_t incY);
+ // {
+ // OtherElement_ptr Xi = X ;
+ // const FFLAS_ELT* Yi = Y ;
+ // for (; Xi < X+n*incX; Xi+=incX, Yi += incY )
+ // F.convert( *Xi , *Yi);
+ // }
+
+ /** fnegin
+ * \f$x \gets - x\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ */
+ template INST_OR_DECL
+ void
+ fnegin (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ FFLAS_ELT* X, const size_t incX);
+ // {
+ // FFLAS_ELT* Xi = X ;
+ // for (; Xi < X+n*incX; Xi+=incX )
+ // F.negin( *Xi );
+ // }
+
+ /** fneg
+ * \f$x \gets - y\f$.
+ * @param F field
+ * @param n size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * \param Y vector in \p F
+ * \param incY stride of \p Y
+ * @bug use cblas_(d)scal when possible
+ */
+ template INST_OR_DECL
+ void
+ fneg (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ const FFLAS_ELT* Y, const size_t incY,
+ FFLAS_ELT* X, const size_t incX);
+ // {
+ // FFLAS_ELT* Xi = X ;
+ // const FFLAS_ELT* Yi = Y ;
+ // for (; Xi < X+n*incX; Xi+=incX,Yi+=incY )
+ // F.neg( *Xi, *Yi );
+ // }
+
+ /** \brief fzero : \f$A \gets 0 \f$.
+ * @param F field
+ * @param n number of elements to zero
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ */
+ template INST_OR_DECL
+ void
+ fzero (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ FFLAS_ELT* X, const size_t incX);
+ // {
+ // if (incX == 1) { // contigous data
+ // // memset(X,(int)F.zero,n); // might be bogus ?
+ // for (size_t i = 0 ; i < n ; ++i)
+ // F.assign(*(X+i), F.zero);
+
+ // }
+ // else { // not contiguous (strided)
+ // for (size_t i = 0 ; i < n ; ++i)
+ // F.assign(*(X+i*incX), F.zero);
+ // }
+ // }
+
+ /** \brief fiszero : test \f$X = 0 \f$.
+ * @param F field
+ * @param n vector dimension
+ * \param X vector in \p F
+ * \param incX increment of \p X
+ */
+ template INST_OR_DECL
+ bool
+ fiszero (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ const FFLAS_ELT* X, const size_t incX);
+ // {
+ // bool res=true;
+ // for (size_t i = 0 ; i < n ; ++i)
+ // res &= F.isZero (X [i*incX]);
+ // return res;
+ // }
+
+ /** \brief fequal : test \f$X = Y \f$.
+ * @param F field
+ * @param n vector dimension
+ * \param X vector in \p F
+ * \param incX increment of \p X
+ * \param Y vector in \p F
+ * \param incY increment of \p Y
+ */
+ template INST_OR_DECL
+ bool
+ fequal (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n,
+ const FFLAS_ELT* X, const size_t incX,
+ const FFLAS_ELT* Y, const size_t incY);
+ // {
+ // bool res=true;
+ // for (size_t i = 0 ; i < n ; ++i)
+ // res &= F.areEqual (X [i*incX], Y [i*incY]);
+ // return res;
+ // }
+
+ /** \brief fassign : \f$x \gets y \f$.
+ * X is preallocated
+ * @todo variant for triagular matrix
+ * @param F field
+ * @param N size of the vectors
+ * \param [out] X vector in \p F
+ * \param incX stride of \p X
+ * \param [in] Y vector in \p F
+ * \param incY stride of \p Y
+ */
+ template INST_OR_DECL
+ void
+ fassign (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ const FFLAS_ELT* Y, const size_t incY ,
+ FFLAS_ELT* X, const size_t incX);
+
+
+ /** fscalin
+ * \f$x \gets \alpha \cdot x\f$.
+ * @param F field
+ * @param n size of the vectors
+ * @param alpha scalar
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * @bug use cblas_(d)scal when possible
+ * @internal
+ * @todo check if comparison with +/-1,0 is necessary.
+ */
+ template INST_OR_DECL
+ void
+ fscalin (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n, const FFLAS_ELT alpha,
+ FFLAS_ELT* X, const size_t incX);
+
+
+ /** fscal
+ * \f$y \gets \alpha \cdot x\f$.
+ * @param F field
+ * @param n size of the vectors
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param incX stride of \p X
+ * \param[out] Y vector in \p F
+ * \param incY stride of \p Y
+ * @bug use cblas_(d)scal when possible
+ * @internal
+ * @todo check if comparison with +/-1,0 is necessary.
+ */
+ template INST_OR_DECL
+ void
+ fscal (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t n
+ , const FFLAS_ELT alpha
+ , const FFLAS_ELT* X, const size_t incX
+ , FFLAS_ELT* Y, const size_t incY);
+
+
+
+ /** \brief faxpy : \f$y \gets \alpha \cdot x + y\f$.
+ * @param F field
+ * @param N size of the vectors
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param incX stride of \p X
+ * \param[in,out] Y vector in \p F
+ * \param incY stride of \p Y
+ */
+ template INST_OR_DECL
+ void
+ faxpy (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* X, const size_t incX,
+ FFLAS_ELT* Y, const size_t incY );
+
+ /** \brief faxpby : \f$y \gets \alpha \cdot x + \beta \cdot y\f$.
+ * @param F field
+ * @param N size of the vectors
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param incX stride of \p X
+ * \param beta scalar
+ * \param[in,out] Y vector in \p F
+ * \param incY stride of \p Y
+ * \note this is a catlas function
+ */
+ // template INST_OR_DECL
+ // void
+ // faxpby (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ // const FFLAS_ELT alpha,
+ // const FFLAS_ELT* X, const size_t incX,
+ // const FFLAS_ELT beta,
+ // FFLAS_ELT* Y, const size_t incY );
+
+
+ /** \brief fdot: dot product \f$x^T y\f$.
+ * @param F field
+ * @param N size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * \param Y vector in \p F
+ * \param incY stride of \p Y
+ */
+ template INST_OR_DECL
+ FFLAS_ELT
+ fdot (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ const FFLAS_ELT* X, const size_t incX,
+ const FFLAS_ELT* Y, const size_t incY );
+
+ /** \brief fswap: \f$ X \leftrightarrow Y\f$.
+ * @bug use cblas_dswap when double
+ * @param F field
+ * @param N size of the vectors
+ * \param X vector in \p F
+ * \param incX stride of \p X
+ * \param Y vector in \p F
+ * \param incY stride of \p Y
+ */
+ template INST_OR_DECL
+ void
+ fswap (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N, FFLAS_ELT* X, const size_t incX,
+ FFLAS_ELT* Y, const size_t incY );
+ // {
+
+ // FFLAS_ELT tmp; F.init(tmp);
+ // FFLAS_ELT* Xi = X;
+ // FFLAS_ELT* Yi=Y;
+ // for (; Xi < X+N*incX; Xi+=incX, Yi+=incY ){
+ // F.assign( tmp, *Xi );
+ // F.assign( *Xi, *Yi );
+ // F.assign( *Yi, tmp );
+ // }
+ // }
+
+ template INST_OR_DECL
+ void
+ fadd (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ const FFLAS_ELT* A, const size_t inca,
+ const FFLAS_ELT* B, const size_t incb,
+ FFLAS_ELT* C, const size_t incc);
+
+ template INST_OR_DECL
+ void
+ fsub (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ const FFLAS_ELT* A, const size_t inca,
+ const FFLAS_ELT* B, const size_t incb,
+ FFLAS_ELT* C, const size_t incc);
+
+ template INST_OR_DECL
+ void
+ faddin (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ const FFLAS_ELT* B, const size_t incb,
+ FFLAS_ELT* C, const size_t incc);
+
+ // template INST_OR_DECL
+ // void
+ // fsubin (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ // FFLAS_ELT* C, const size_t incc);
+
+
+ template INST_OR_DECL
+ void
+ fadd (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N,
+ const FFLAS_ELT* A, const size_t inca,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* B, const size_t incb,
+ FFLAS_ELT* C, const size_t incc);
+
+} // FFLAS
+
diff --git a/fflas-ffpack/interfaces/libs/fflas_L2_inst.C b/fflas-ffpack/interfaces/libs/fflas_L2_inst.C
new file mode 100755
index 0000000..b90cedb
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L2_inst.C
@@ -0,0 +1,66 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* fflas_L2_inst.h
+ * Copyright (C) 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+
+#ifndef __FFLAS_L2_INST_C
+#define __FFLAS_L2_INST_C
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+#include "fflas.h"
+#include "fflas_helpers.inl"
+
+#ifdef INST_OR_DECL
+#undef INST_OR_DECL
+#endif
+#define INST_OR_DECL
+
+#define FFLAS_FIELD Givaro::ModularBalanced
+#define FFLAS_ELT double
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#define FFLAS_FIELD Givaro::Modular
+#define FFLAS_ELT double
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#endif // __FFLAS_L2_INST_C
diff --git a/fflas-ffpack/interfaces/libs/fflas_L2_inst.h b/fflas-ffpack/interfaces/libs/fflas_L2_inst.h
new file mode 100644
index 0000000..d4586c4
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L2_inst.h
@@ -0,0 +1,64 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* fflas_L2_inst.h
+ * Copyright (C) 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#ifndef __FFLAS_L2_INST_H
+#define __FFLAS_L2_INST_H
+
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas/fflas_helpers.inl"
+
+#ifdef INST_OR_DECL
+#undef INST_OR_DECL
+#endif
+#define INST_OR_DECL <>
+
+#define FFLAS_FIELD Givaro::ModularBalanced
+#define FFLAS_ELT double
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#define FFLAS_FIELD Givaro::Modular
+#define FFLAS_ELT double
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L2_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#endif //__FFLAS_L2_INST_H
diff --git a/fflas-ffpack/interfaces/libs/fflas_L2_inst_implem.inl b/fflas-ffpack/interfaces/libs/fflas_L2_inst_implem.inl
new file mode 100644
index 0000000..2a13d5f
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L2_inst_implem.inl
@@ -0,0 +1,464 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014,2015 the FFLAS-FFPACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+namespace FFLAS {
+ //---------------------------------------------------------------------
+ // Level 2 routines
+ //---------------------------------------------------------------------
+
+ /** \brief fassign : \f$A \gets B \f$.
+ * @param F field
+ * @param m number of rows to copy
+ * @param n number of cols to copy
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * \param B vector in \p F
+ * \param ldb stride of \p B
+ */
+ template INST_OR_DECL
+ void
+ fassign (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n,
+ const FFLAS_ELT* B, const size_t ldb ,
+ FFLAS_ELT* A, const size_t lda );
+
+ /** \brief fzero : \f$A \gets 0 \f$.
+ * @param F field
+ * @param m number of rows to zero
+ * @param n number of cols to zero
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @warning may be buggy if Element is larger than int
+ */
+
+ template INST_OR_DECL
+ void
+ fzero (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n,
+ FFLAS_ELT* A, const size_t lda);
+ // {
+ // /* use memset only with Elements that are ok */
+ // if (n == lda) { // contigous data
+ // // memset(A,(int) F.zero,m*n); // might be bogus ?
+ // fzero(F,m*n,A,1);
+ // }
+ // else { // not contiguous (strided)
+ // for (size_t i = 0 ; i < m ; ++i)
+ // // memset(A+i*lda,(int) F.zero,n) ; // might be bogus ?
+ // fzero(F,n,A+i*lda,1);
+ // }
+ // }
+ /** \brief fequal : test \f$A = B \f$.
+ * @param F field
+ * @param m row dimension
+ * @param n column dimension
+ * \param A m x n matrix in \p F
+ * \param lda leading dimension of A
+ * \param B m x n matrix in \p F
+ * \param ldb leading dimension of B
+ */
+ template INST_OR_DECL
+ bool
+ fequal (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT* B, const size_t ldb);
+ // {
+ // bool res=true;
+ // for (size_t i = 0 ; i < m ; ++i)
+ // res &= fequal (F, n, A + i*lda, 1, B + i*ldb, 1);
+ // return res;
+ // }
+ /** \brief fiszero : test \f$A = 0 \f$.
+ * @param F field
+ * @param m row dimension
+ * @param n column dimension
+ * \param A m x n matrix in \p F
+ * \param lda leading dimension of A
+ */
+ template INST_OR_DECL
+ bool
+ fiszero (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n,
+ const FFLAS_ELT* A, const size_t lda);
+ // {
+ // bool res=true;
+ // for (size_t i = 0 ; i < m ; ++i)
+ // res &= fiszero (F, n, A + i*lda, 1);
+ // return res;
+ // }
+
+ //! creates a diagonal matrix
+ template INST_OR_DECL
+ void
+ fidentity (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n,
+ FFLAS_ELT* A, const size_t lda, const FFLAS_ELT & d);
+ // {
+ // fzero(F,m,n,A,lda);
+ // for (size_t i = 0 ; i < std::min(m,n) ; ++i)
+ // F.assign(A[i*lda+i],d);
+ // }
+
+ //! creates a diagonal matrix
+ template INST_OR_DECL
+ void
+ fidentity (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n,
+ FFLAS_ELT* A, const size_t lda);
+ // {
+ // fzero(F,m,n,A,lda);
+ // for (size_t i = 0 ; i < std::min(m,n) ; ++i)
+ // F.assign(A[i*lda+i],F.one);
+ // }
+
+ /** freduce
+ * \f$A \gets A mod F\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @internal
+ */
+ template INST_OR_DECL
+ void
+ freduce (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m , const size_t n,
+ FFLAS_ELT* A, const size_t lda);
+
+ /** freduce
+ * \f$A \gets B mod F\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * \param B matrix in \p Element
+ * \param ldb stride of \p B
+ * @internal
+ */
+ template INST_OR_DECL
+ void
+ freduce (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m , const size_t n,
+ const FFLAS_ELT* B, const size_t ldb,
+ FFLAS_ELT* A, const size_t lda);
+
+ /** finit
+ * \f$A \gets B mod F\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * \param B matrix in \p F
+ * \param ldb stride of \p B
+ * @internal
+ */
+ template INST_OR_DECL
+ void
+ finit (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m , const size_t n,
+ const FFLAS_ELT* B, const size_t ldb,
+ FFLAS_ELT* A, const size_t lda);
+
+
+ /** fnegin
+ * \f$A \gets - A\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @internal
+ */
+ template INST_OR_DECL
+ void
+ fnegin (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m , const size_t n,
+ FFLAS_ELT* A, const size_t lda);
+ // {
+ // //!@todo check if n == lda
+ // for (size_t i = 0 ; i < m ; ++i)
+ // fnegin(F,n,A+i*lda,1);
+ // return;
+ // }
+
+ /** fneg
+ * \f$A \gets - B\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @internal
+ */
+ template INST_OR_DECL
+ void
+ fneg (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m , const size_t n,
+ const FFLAS_ELT* B, const size_t ldb,
+ FFLAS_ELT* A, const size_t lda);
+ // {
+ // //!@todo check if n == lda
+ // for (size_t i = 0 ; i < m ; ++i)
+ // fneg(F,n,B+i*ldb,1,A+i*lda,1);
+ // return;
+ // }
+
+ /** fscalin
+ * \f$A \gets a \cdot A\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * @param alpha homotecie scalar
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * @internal
+ */
+ template INST_OR_DECL
+ void
+ fscalin (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m , const size_t n,
+ const FFLAS_ELT alpha,
+ FFLAS_ELT* A, const size_t lda);
+
+ /** fscal
+ * \f$B \gets a \cdot A\f$.
+ * @param F field
+ * @param m number of rows
+ * @param n number of cols
+ * @param alpha homotecie scalar
+ * \param[in] A matrix in \p F
+ * \param lda stride of \p A
+ * \param[out] B matrix in \p F
+ * \param ldb stride of \p B
+ * @internal
+ */
+ template INST_OR_DECL
+ void
+ fscal (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m , const size_t n,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* B, const size_t ldb);
+
+ /** \brief faxpy : \f$y \gets \alpha \cdot x + y\f$.
+ * @param F field
+ * @param m row dimension
+ * @param n column dimension
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param ldx leading dimension of \p X
+ * \param[in,out] Y vector in \p F
+ * \param ldy leading dimension of \p Y
+ */
+ template INST_OR_DECL
+ void
+ faxpy (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n
+ , const FFLAS_ELT alpha,
+ const FFLAS_ELT* X, const size_t ldx,
+ FFLAS_ELT* Y, const size_t ldy );
+
+ /** \brief faxpby : \f$y \gets \alpha \cdot x + \beta \cdot y\f$.
+ * @param F field
+ * @param m row dimension
+ * @param n column dimension
+ * @param alpha scalar
+ * \param[in] X vector in \p F
+ * \param ldx leading dimension of \p X
+ * \param beta scalar
+ * \param[in,out] Y vector in \p F
+ * \param ldy leading dimension of \p Y
+ * \note this is a catlas function
+ */
+ // template INST_OR_DECL
+ // void
+ // faxpby (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n,
+ // const FFLAS_ELT alpha,
+ // const FFLAS_ELT* X, const size_t ldx,
+ // const FFLAS_ELT beta,
+ // FFLAS_ELT* Y, const size_t ldy );
+
+ /** \brief fmove : \f$A \gets B \f$ and \f$ B \gets 0\f$.
+ * @param F field
+ * @param m number of rows to copy
+ * @param n number of cols to copy
+ * \param A matrix in \p F
+ * \param lda stride of \p A
+ * \param B vector in \p F
+ * \param ldb stride of \p B
+ */
+ template INST_OR_DECL
+ void
+ fmove (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t m, const size_t n,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* B, const size_t ldb );
+ // {
+ // fassign(F,m,n,A,lda,B,ldb);
+ // fzero(F,m,n,B,ldb);
+ // }
+
+ /** fadd : matrix addition.
+ * Computes \p C = \p A + \p B.
+ * @param F field
+ * @param M rows
+ * @param N cols
+ * @param A dense matrix of size \c MxN
+ * @param lda leading dimension of \p A
+ * @param B dense matrix of size \c MxN
+ * @param ldb leading dimension of \p B
+ * @param C dense matrix of size \c MxN
+ * @param ldc leading dimension of \p C
+ */
+ template INST_OR_DECL
+ void
+ fadd (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT* B, const size_t ldb,
+ FFLAS_ELT* C, const size_t ldc);
+
+
+
+ /** fsub : matrix subtraction.
+ * Computes \p C = \p A - \p B.
+ * @param F field
+ * @param M rows
+ * @param N cols
+ * @param A dense matrix of size \c MxN
+ * @param lda leading dimension of \p A
+ * @param B dense matrix of size \c MxN
+ * @param ldb leading dimension of \p B
+ * @param C dense matrix of size \c MxN
+ * @param ldc leading dimension of \p C
+ */
+ template INST_OR_DECL
+ void
+ fsub (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT* B, const size_t ldb,
+ FFLAS_ELT* C, const size_t ldc);
+
+ //! fsubin
+ //! C = C - B
+ template INST_OR_DECL
+ void
+ fsubin (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ const FFLAS_ELT* B, const size_t ldb,
+ FFLAS_ELT* C, const size_t ldc);
+
+ /** fadd : matrix addition with scaling.
+ * Computes \p C = \p A + alpha \p B.
+ * @param F field
+ * @param M rows
+ * @param N cols
+ * @param A dense matrix of size \c MxN
+ * @param lda leading dimension of \p A
+ * @param alpha some scalar
+ * @param B dense matrix of size \c MxN
+ * @param ldb leading dimension of \p B
+ * @param C dense matrix of size \c MxN
+ * @param ldc leading dimension of \p C
+ */
+ template INST_OR_DECL
+ void
+ fadd (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* B, const size_t ldb,
+ FFLAS_ELT* C, const size_t ldc);
+
+ //! faddin
+ template INST_OR_DECL
+ void
+ faddin (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ const FFLAS_ELT* B, const size_t ldb,
+ FFLAS_ELT* C, const size_t ldc);
+
+
+ /** @brief finite prime FFLAS_FIELD<FFLAS_ELT> GEneral Matrix Vector multiplication.
+ *
+ * Computes \f$Y \gets \alpha \mathrm{op}(A) X + \beta Y \f$.
+ * @param F field
+ * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
+ * @param M rows
+ * @param N cols
+ * @param alpha scalar
+ * @param A dense matrix of size \c MxN
+ * @param lda leading dimension of \p A
+ * @param X dense vector of size \c N
+ * @param incX stride of \p X
+ * @param beta scalar
+ * @param[out] Y dense vector of size \c M
+ * @param incY stride of \p Y
+ */
+ template INST_OR_DECL
+ FFLAS_ELT*
+ fgemv (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS_TRANSPOSE TransA,
+ const size_t M, const size_t N,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT* X, const size_t incX,
+ const FFLAS_ELT beta,
+ FFLAS_ELT* Y, const size_t incY);
+
+ /** @brief fger: rank one update of a general matrix
+ *
+ * Computes \f$A \gets \alpha x . y^T + A\f$
+ * @param F field
+ * @param M rows
+ * @param N cols
+ * @param alpha scalar
+ * @param[in,out] A dense matrix of size \c MxN and leading dimension \p lda
+ * @param lda leading dimension of \p A
+ * @param x dense vector of size \c M
+ * @param incx stride of \p X
+ * @param y dense vector of size \c N
+ * @param incy stride of \p Y
+ */
+ template INST_OR_DECL
+ void
+ fger (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* x, const size_t incx,
+ const FFLAS_ELT* y, const size_t incy,
+ FFLAS_ELT* A, const size_t lda);
+
+ /** @brief ftrsv: TRiangular System solve with Vector
+ * Computes \f$ X \gets \mathrm{op}(A^{-1}) X\f$
+ * @param F field
+ * @param X vector of size \p N on a field \p F
+ * @param incX stride of \p X
+ * @param A a matrix of leading dimension \p lda and size \p N
+ * @param lda leading dimension of \p A
+ * @param N number of rows or columns of \p A according to \p TransA
+ * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
+ * \param Diag if \c Diag==FflasUnit then \p A is unit.
+ * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
+ */
+ template INST_OR_DECL
+ void
+ ftrsv (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA, const FFLAS_DIAG Diag,
+ const size_t N,const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* X, int incX);
+
+
+
+} // FFLAS
+
diff --git a/fflas-ffpack/interfaces/libs/fflas_L3_inst.C b/fflas-ffpack/interfaces/libs/fflas_L3_inst.C
new file mode 100755
index 0000000..92ef986
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L3_inst.C
@@ -0,0 +1,65 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* fflas_L3_inst.h
+ * Copyright (C) 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#include "fflas-ffpack/fflas-ffpack-config.h"
+
+#ifndef __FFLAS_L3_INST_C
+#define __FFLAS_L3_INST_C
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+#include "fflas.h"
+#include "fflas_helpers.inl"
+
+#ifdef INST_OR_DECL
+#undef INST_OR_DECL
+#endif
+#define INST_OR_DECL
+
+#define FFLAS_FIELD Givaro::ModularBalanced
+#define FFLAS_ELT double
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#define FFLAS_FIELD Givaro::Modular
+#define FFLAS_ELT double
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#endif // __FFLAS_L3_INST_C
diff --git a/fflas-ffpack/interfaces/libs/fflas_L3_inst.h b/fflas-ffpack/interfaces/libs/fflas_L3_inst.h
new file mode 100644
index 0000000..fa27e01
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L3_inst.h
@@ -0,0 +1,64 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* fflas_L3_inst.h
+ * Copyright (C) 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#ifndef __FFLAS_L3_INST_H
+#define __FFLAS_L3_INST_H
+
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas/fflas_helpers.inl"
+
+#ifdef INST_OR_DECL
+#undef INST_OR_DECL
+#endif
+#define INST_OR_DECL <>
+
+#define FFLAS_FIELD Givaro::ModularBalanced
+#define FFLAS_ELT double
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#define FFLAS_FIELD Givaro::Modular
+#define FFLAS_ELT double
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "fflas_L3_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#endif //__FFLAS_L3_INST_H
diff --git a/fflas-ffpack/interfaces/libs/fflas_L3_inst_implem.inl b/fflas-ffpack/interfaces/libs/fflas_L3_inst_implem.inl
new file mode 100644
index 0000000..b56a510
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_L3_inst_implem.inl
@@ -0,0 +1,199 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+namespace FFLAS {
+ //---------------------------------------------------------------------
+ // Level 3 routines
+ //---------------------------------------------------------------------
+ // set by default for ftrsm to be thread safe
+ // undef it at your own risk, and only if you run it in sequential
+ #define __FFLAS__TRSM_READONLY
+
+ /** @brief ftrsm: <b>TR</b>iangular <b>S</b>ystem solve with <b>M</b>atrix.
+ * Computes \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ or \f$B \gets \alpha B \mathrm{op}(A^{-1})\f$.
+ * \param F field
+ * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ is computed.
+ * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
+ * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
+ * \param Diag if \c Diag==FflasUnit then \p A is unit.
+ * \param M rows of \p B
+ * \param N cols of \p B
+ * @param alpha scalar
+ * \param A triangular invertible matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
+ * @param lda leading dim of \p A
+ * @param B matrix of size \p MxN
+ * @param ldb leading dim of \p B
+ * @bug \f$\alpha\f$ must be non zero.
+ */
+ template INST_OR_DECL
+ void
+ ftrsm (const FFLAS_FIELD <FFLAS_ELT>& F, const FFLAS_SIDE Side,
+ const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA,
+ const FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const FFLAS_ELT alpha,
+#ifdef __FFLAS__TRSM_READONLY
+ const FFLAS_ELT* A,
+#else
+ FFLAS_ELT* A,
+#endif
+ const size_t lda,
+ FFLAS_ELT* B, const size_t ldb);
+
+ /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply.
+ * Computes \f$ B \gets \alpha \mathrm{op}(A) B\f$ or \f$B \gets \alpha B \mathrm{op}(A)\f$.
+ * @param F field
+ * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed.
+ * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
+ * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
+ * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit.
+ * \param M rows of \p B
+ * \param N cols of \p B
+ * @param alpha scalar
+ * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
+ * @param lda leading dim of \p A
+ * @param B matrix of size \p MxN
+ * @param ldb leading dim of \p B
+ */
+ template INST_OR_DECL
+ void
+ ftrmm (const FFLAS_FIELD <FFLAS_ELT>& F, const FFLAS_SIDE Side,
+ const FFLAS_UPLO Uplo,
+ const FFLAS_TRANSPOSE TransA,
+ const FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const FFLAS_ELT alpha,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* B, const size_t ldb);
+
+ /** @brief fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply.
+ *
+ * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$
+ * Automatically set Winograd recursion level
+ * \param F field.
+ * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
+ * \param tb same for matrix \p B
+ * \param m see \p A
+ * \param n see \p B
+ * \param k see \p A
+ * \param alpha scalar
+ * \param beta scalar
+ * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
+ * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
+ * \param C \f$C\f$ is \f$m \times n\f$
+ * \param lda leading dimension of \p A
+ * \param ldb leading dimension of \p B
+ * \param ldc leading dimension of \p C
+ * \param w recursive levels of Winograd's algorithm are used. No argument (or -1) does auto computation of \p w.
+ * @warning \f$\alpha\f$ \e must be invertible
+ */
+ template INST_OR_DECL
+ FFLAS_ELT* fgemm( const FFLAS_FIELD <FFLAS_ELT>& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m, const size_t n, const size_t k,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT* B, const size_t ldb,
+ const FFLAS_ELT beta,
+ FFLAS_ELT* C, const size_t ldc);
+
+ template INST_OR_DECL
+ FFLAS_ELT*
+ fgemm( const FFLAS_FIELD <FFLAS_ELT>& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT* B, const size_t ldb,
+ const FFLAS_ELT beta,
+ FFLAS_ELT* C, const size_t ldc,
+ const ParSeqHelper::Sequential seq);
+
+ template INST_OR_DECL
+ FFLAS_ELT*
+ fgemm( const FFLAS_FIELD <FFLAS_ELT>& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT* B, const size_t ldb,
+ const FFLAS_ELT beta,
+ FFLAS_ELT* C, const size_t ldc,
+ const ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoDAdaptive> par);
+
+ template INST_OR_DECL
+ FFLAS_ELT*
+ fgemm( const FFLAS_FIELD <FFLAS_ELT>& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT* B, const size_t ldb,
+ const FFLAS_ELT beta,
+ FFLAS_ELT* C, const size_t ldc,
+ const ParSeqHelper::Parallel<CuttingStrategy::Block,StrategyParameter::Threads> par);
+
+
+ /** @brief fsquare: Squares a matrix.
+ * compute \f$ C \gets \alpha \mathrm{op}(A) \mathrm{op}(A) + \beta C\f$ over a FFLAS_FIELD <FFLAS_ELT> \p F
+ * Avoid the conversion of B
+ * @param ta if \c ta==FflasTrans, \f$\mathrm{op}(A)=A^T\f$.
+ * @param F field
+ * @param n size of \p A
+ * @param alpha scalar
+ * @param beta scalar
+ * @param A dense matrix of size \c nxn
+ * @param lda leading dimension of \p A
+ * @param C dense matrix of size \c nxn
+ * @param ldc leading dimension of \p C
+ */
+ template INST_OR_DECL
+ FFLAS_ELT* fsquare (const FFLAS_FIELD <FFLAS_ELT>& F,
+ const FFLAS_TRANSPOSE ta,
+ const size_t n,
+ const FFLAS_ELT alpha,
+ const FFLAS_ELT* A, const size_t lda,
+ const FFLAS_ELT beta,
+ FFLAS_ELT* C, const size_t ldc);
+
+
+} // FFLAS
+
diff --git a/fflas-ffpack/interfaces/libs/fflas_c.h b/fflas-ffpack/interfaces/libs/fflas_c.h
new file mode 100644
index 0000000..29ed7bd
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_c.h
@@ -0,0 +1,461 @@
+/* -*- mode: C++; tAb-width: 8; indent-tAbs-mode: t; c-basic-offset: 8 -*- */
+/* vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+ */
+/*
+ * Copyright (C) 2015 FFLAS-FFPACK
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more detAils.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas-c.h
+ * @author Brice Boyer
+ * @brief C functions calls for FFLAS
+ * @see fflas/fflas.h
+ */
+
+#ifndef __FFLASFFPACK_interfaces_libs_fflas_c_H
+#define __FFLASFFPACK_interfaces_libs_fflas_c_H
+//#include "fflas-ffpack/fflas-ffpack-config.h"
+
+#ifndef FFLAS_COMPILED
+#define FFLAS_COMPILED
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /// Storage by row or col ?
+ enum FFLAS_C_ORDER{
+ FflasRowMajor=101, /**< row major */
+ FflasColMajor=102 /**< col major */
+ };
+ // public:
+ /// Is matrix transposed ?
+ enum FFLAS_C_TRANSPOSE {
+ FflasNoTrans = 111, /**< Matrix is not transposed */
+ FflasTrans = 112 /**< Matrix is transposed */
+ };
+ /// Is triangular matrix's shape upper ?
+ enum FFLAS_C_UPLO {
+ FflasUpper = 121, /**< Triangular matrix is Upper triangular (if \f$i>j\f$ then \f$T_{i,j} = 0\f$)*/
+ FflasLower = 122 /**< Triangular matrix is Lower triangular (if \f$i<j\f$ then \f$T_{i,j} = 0\f$)*/
+ };
+
+ /// Is the triangular matrix implicitly unit diagonal ?
+ enum FFLAS_C_DIAG {
+ FflasNonUnit = 131, /**< Triangular matrix has an explicit arbitrary diagonal */
+ FflasUnit = 132 /**< Triangular matrix has an implicit unit diagonal (\f$T_{i,i} = 1\f$)*/ /**< */
+ };
+
+ /// On what side ?
+ enum FFLAS_C_SIDE {
+ FflasLeft = 141,/**< Operator applied on the left */
+ FflasRight = 142 /**< Operator applied on the rigth*/
+ };
+
+ /** \p FFLAS_C_BASE determines the type of the element representation for Matrix Mult kernel. (deprecated, should not be used) */
+ enum FFLAS_C_BASE {
+ FflasDouble = 151, /**< to use the double precision BLAS */
+ FflasFloat = 152, /**< to use the single precison BLAS */
+ FflasGeneric = 153 /**< for any other domain, that can not be converted to floating point integers */
+ };
+
+/* ******** *
+ * LEVEL1 *
+ * ******** */
+
+/* Modular<double> */
+/* ModularBalanced<double> */
+
+void
+freducein_1_modular_double (const double p, const size_t n,
+ double * X, const size_t incX
+ , bool positive );
+
+void
+freduce_1_modular_double (const double F, const size_t n,
+ const double * Y, const size_t incY,
+ double * X, const size_t incX
+ , bool positive );
+
+
+void
+fnegin_1_modular_double (const double F, const size_t n,
+ double * X, const size_t incX
+ , bool positive );
+
+
+void
+fneg_1_modular_double (const double p, const size_t n,
+ const double * Y, const size_t incY,
+ double * X, const size_t incX
+ , bool positive );
+
+void
+fzero_1_modular_double (const double p, const size_t n,
+ double * X, const size_t incX
+ , bool positive );
+
+
+bool
+fiszero_1_modular_double (const double p, const size_t n,
+ const double * X, const size_t incX
+ , bool positive );
+
+bool
+fequal_1_modular_double (const double p, const size_t n,
+ const double * X, const size_t incX,
+ const double * Y, const size_t incY
+ , bool positive );
+
+
+void
+fassign_1_modular_double (const double p, const size_t n,
+ const double * Y, const size_t incY ,
+ double * X, const size_t incX
+ , bool positive );
+
+
+void
+fscalin_1_modular_double (const double p, const size_t n, const double alpha,
+ double * X, const size_t incX
+ , bool positive );
+
+
+void
+fscal_1_modular_double (const double p, const size_t n
+ , const double alpha
+ , const double * X, const size_t incX
+ , double * Y, const size_t incY
+ , bool positive );
+
+
+void
+faxpy_1_modular_double (const double p, const size_t n,
+ const double alpha,
+ const double * X, const size_t incX,
+ double * Y, const size_t incY
+ , bool positive );
+
+#if 0
+void
+faxpby_1_modular_double (const double p, const size_t n,
+ const double alpha,
+ const double * X, const size_t incX,
+ const double betA,
+ double * Y, const size_t incY
+ , bool positive );
+#endif
+
+
+
+double
+fdot_1_modular_double (const double p, const size_t n,
+ const double * X, const size_t incX,
+ const double * Y, const size_t incY
+ , bool positive );
+
+
+void
+fswap_1_modular_double (const double p, const size_t n,
+ double * X, const size_t incX,
+ double * Y, const size_t incY
+ , bool positive );
+
+
+void
+fadd_1_modular_double (const double p, const size_t n,
+ const double * A, const size_t incA,
+ const double * B, const size_t incB,
+ double * C, const size_t incC
+ , bool positive );
+
+void
+fsub_1_modular_double (const double p, const size_t n,
+ const double * A, const size_t incA,
+ const double * B, const size_t incB,
+ double * C, const size_t incC
+ , bool positive );
+
+void
+faddin_1_modular_double (const double p, const size_t n,
+ const double * B, const size_t incB,
+ double * C, const size_t incC
+ , bool positive );
+
+void
+fsubin_1_modular_double (const double p, const size_t n,
+ const double * B, const size_t incB,
+ double * C, const size_t incC
+ , bool positive );
+
+/* ******** *
+ * LEVEL1.5 *
+ * ******** */
+
+// fspmv
+
+/* ******** *
+ * LEVEL2 *
+ * ******** */
+
+
+/* Modular<double> */
+/* ModularBalanced<double> */
+
+
+void
+fassign_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * B, const size_t ldB ,
+ double * A, const size_t ldA
+ , bool positive );
+
+
+
+void
+fzero_2_modular_double (const double p, const size_t m, const size_t n,
+ double * A, const size_t ldA
+ , bool positive );
+
+
+bool
+fequal_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t ldA,
+ const double * B, const size_t ldB
+ , bool positive );
+
+
+bool
+fiszero_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t ldA
+ , bool positive );
+
+
+void
+fidentity_2_modular_double (const double p, const size_t m, const size_t n,
+ double * A, const size_t ldA,
+ const double d
+ , bool positive );
+
+
+
+void
+freducein_2_modular_double (const double p, const size_t m , const size_t n,
+ double * A, const size_t ldA
+ , bool positive );
+
+
+void
+freduce_2_modular_double (const double p, const size_t m , const size_t n,
+ const double * B, const size_t ldB,
+ double * A, const size_t ldA
+ , bool positive );
+
+void
+fnegin_2_modular_double (const double p, const size_t m , const size_t n,
+ double * A, const size_t ldA
+ , bool positive );
+
+
+void
+fneg_2_modular_double (const double p, const size_t m , const size_t n,
+ const double * B, const size_t ldB,
+ double * A, const size_t ldA
+ , bool positive );
+
+
+void
+fscalin_2_modular_double (const double p, const size_t m , const size_t n,
+ const double alpha,
+ double * A, const size_t ldA
+ , bool positive );
+
+
+void
+fscal_2_modular_double (const double p, const size_t m , const size_t n,
+ const double alpha,
+ const double * A, const size_t ldA,
+ double * B, const size_t ldB
+ , bool positive );
+
+
+void
+faxpy_2_modular_double (const double p, const size_t m, const size_t n
+ , const double alpha,
+ const double * X, const size_t ldX,
+ double * Y, const size_t ldY
+ , bool positive );
+
+
+#if 0
+void
+faxpby_2_modular_double (const double p, const size_t m, const size_t n,
+ const double alpha,
+ const double * X, const size_t ldX,
+ const double betA,
+ double * Y, const size_t ldY
+ , bool positive );
+#endif
+
+
+void
+fmove_2_modular_double (const double p, const size_t m, const size_t n,
+ double * A, const size_t ldA,
+ double * B, const size_t ldB
+ , bool positive );
+
+
+void
+fadd_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t ldA,
+ const double * B, const size_t ldB,
+ double * C, const size_t ldC
+ , bool positive );
+
+
+
+void
+fsub_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t ldA,
+ const double * B, const size_t ldB,
+ double * C, const size_t ldC
+ , bool positive );
+
+
+void
+fsubin_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * B, const size_t ldB,
+ double * C, const size_t ldC
+ , bool positive );
+
+
+
+void
+faddin_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * B, const size_t ldB,
+ double * C, const size_t ldC
+ , bool positive );
+
+
+
+double *
+fgemv_2_modular_double (const double p, const enum FFLAS_C_TRANSPOSE TransA,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double * A, const size_t ldA,
+ const double * X, const size_t incX,
+ const double betA,
+ double * Y, const size_t incY
+ , bool positive );
+
+
+void
+fger_2_modular_double (const double p, const size_t m, const size_t n,
+ const double alpha,
+ const double * x, const size_t incX,
+ const double * y, const size_t incY,
+ double * A, const size_t ldA
+ , bool positive );
+
+
+void
+ftrsv_2_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_TRANSPOSE TransA, const enum FFLAS_C_DIAG Diag,
+ const size_t n,const double * A, const size_t ldA,
+ double * X, int incX
+ , bool positive );
+
+/* ******** *
+ * LEVEL2.5 *
+ * ******** */
+
+// fspmm
+
+/* ******** *
+ * LEVEL3 *
+ * ******** */
+
+
+void
+ftrsm_3_modular_double (const double p, const enum FFLAS_C_SIDE Side,
+ const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_TRANSPOSE TransA,
+ const enum FFLAS_C_DIAG Diag,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double * A,
+ const size_t ldA,
+ double * B, const size_t ldB
+ , bool positive );
+
+
+void
+ftrmm_3_modular_double (const double p, const enum FFLAS_C_SIDE Side,
+ const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_TRANSPOSE TransA,
+ const enum FFLAS_C_DIAG Diag,
+ const size_t m, const size_t n,
+ const double alpha,
+ double * A, const size_t ldA,
+ double * B, const size_t ldB
+ , bool positive );
+
+
+double *
+fgemm_3_modular_double( const double p,
+ const enum FFLAS_C_TRANSPOSE tA,
+ const enum FFLAS_C_TRANSPOSE tB,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const double alpha,
+ const double * A, const size_t ldA,
+ const double * B, const size_t ldB,
+ const double betA,
+ double * C, const size_t ldC
+ , bool positive );
+
+
+double *
+fsquare_3_modular_double (const double p,
+ const enum FFLAS_C_TRANSPOSE tA,
+ const size_t n,
+ const double alpha,
+ const double * A,
+ const size_t ldA,
+ const double betA,
+ double * C,
+ const size_t ldC
+ , bool positive );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __FFLASFFPACK_interfaces_libs_fflas_c_H
diff --git a/fflas-ffpack/interfaces/libs/fflas_lvl1.C b/fflas-ffpack/interfaces/libs/fflas_lvl1.C
new file mode 100644
index 0000000..3755870
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_lvl1.C
@@ -0,0 +1,338 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2015 FFLAS-FFPACK
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas_lvl1.C
+ * @author Brice Boyer
+ * @brief C functions calls for level 1 FFLAS in fflas-c.h
+ * @see fflas/fflas_level1.inl
+ */
+
+#include "fflas-ffpack/interfaces/libs/fflas_c.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "givaro//modular-balanced.h"
+#include "givaro//modular.h"
+
+using Givaro::Modular ;
+using Givaro::ModularBalanced ;
+using namespace FFLAS ;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * level 1
+ */
+
+void
+freducein_1_modular_double (const double p, const size_t n,
+ double * X, const size_t incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ freduce(F,n,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ freduce(F,n,X,incX);
+ }
+}
+
+void
+freduce_1_modular_double (const double p, const size_t n,
+ const double * Y, const size_t incY,
+ double * X, const size_t incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ freduce(F,n,Y,incY,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ freduce(F,n,Y,incY,X,incX);
+ }
+}
+
+
+void
+fnegin_1_modular_double (const double p, const size_t n,
+ double * X, const size_t incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fnegin(F,n,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ fnegin(F,n,X,incX);
+ }
+}
+
+
+void
+fneg_1_modular_double (const double p, const size_t n,
+ const double * Y, const size_t incY,
+ double * X, const size_t incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fneg(F,n,Y,incY,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ fneg(F,n,Y,incY,X,incX);
+ }
+}
+
+void
+fzero_1_modular_double (const double p, const size_t n,
+ double * X, const size_t incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fzero(F,n,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ fzero(F,n,X,incX);
+ }
+}
+
+
+bool
+fiszero_1_modular_double (const double p, const size_t n,
+ const double * X, const size_t incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fiszero(F,n,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ return fiszero(F,n,X,incX);
+ }
+}
+
+bool
+fequal_1_modular_double (const double p, const size_t n,
+ const double * X, const size_t incX,
+ const double * Y, const size_t incY
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fequal(F,n,Y,incY,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ return fequal(F,n,Y,incY,X,incX);
+ }
+}
+
+
+void
+fassign_1_modular_double (const double p, const size_t n,
+ const double * Y, const size_t incY ,
+ double * X, const size_t incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fassign(F,n,Y,incY,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ fassign(F,n,Y,incY,X,incX);
+ }
+}
+
+
+void
+fscalin_1_modular_double (const double p, const size_t n, const double alpha,
+ double * X, const size_t incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fscalin(F,n,alpha,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ fscalin(F,n,alpha,X,incX);
+ }
+}
+
+
+
+void
+fscal_1_modular_double (const double p, const size_t n
+ , const double alpha
+ , const double * X, const size_t incX
+ , double * Y, const size_t incY
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fscal(F,n,alpha,X,incX,Y,incY);
+ } else {
+ ModularBalanced<double> F(p);
+ fscal(F,n,alpha,X,incX,Y,incY);
+ }
+}
+
+
+void
+faxpy_1_modular_double (const double p, const size_t n,
+ const double alpha,
+ const double * X, const size_t incX,
+ double * Y, const size_t incY
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ faxpy(F,n,alpha,X,incX,Y,incY);
+ } else {
+ ModularBalanced<double> F(p);
+ faxpy(F,n,alpha,X,incX,Y,incY);
+ }
+}
+
+#if 0
+void
+faxpby_1_modular_double (const double p, const size_t n,
+ const double alpha,
+ const double * X, const size_t incX,
+ const double beta,
+ double * Y, const size_t incY
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ faxpby(F,n,alpha,X,incX,beta,Y,incY);
+ } else {
+ ModularBalanced<double> F(p);
+ faxpby(F,n,alpha,X,incX,beta,Y,incY);
+ }
+}
+#endif
+
+
+double
+fdot_1_modular_double (const double p, const size_t n,
+ const double * X, const size_t incX,
+ const double * Y, const size_t incY
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fdot(F,n,Y,incY,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ return fdot(F,n,Y,incY,X,incX);
+ }
+}
+
+
+void
+fswap_1_modular_double (const double p, const size_t n,
+ double * X, const size_t incX,
+ double * Y, const size_t incY
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fswap(F,n,Y,incY,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ fswap(F,n,Y,incY,X,incX);
+ }
+}
+
+
+void
+fadd_1_modular_double (const double p, const size_t n,
+ const double * A, const size_t incA,
+ const double * B, const size_t incB,
+ double * C, const size_t incC
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fadd(F,n,A,incA,B,incB,C,incC);
+ } else {
+ ModularBalanced<double> F(p);
+ fadd(F,n,A,incA,B,incB,C,incC);
+ }
+}
+
+void
+fsub_1_modular_double (const double p, const size_t n,
+ const double * A, const size_t incA,
+ const double * B, const size_t incB,
+ double * C, const size_t incC
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fsub(F,n,A,incA,B,incB,C,incC);
+ } else {
+ ModularBalanced<double> F(p);
+ fsub(F,n,A,incA,B,incB,C,incC);
+ }
+}
+
+void
+faddin_1_modular_double (const double p, const size_t n,
+ const double * B, const size_t incB,
+ double * C, const size_t incC
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ faddin(F,n,B,incB,C,incC);
+ } else {
+ ModularBalanced<double> F(p);
+ faddin(F,n,B,incB,C,incC);
+ }
+}
+
+void
+fsubin_1_modular_double (const double p, const size_t n,
+ const double * B, const size_t incB,
+ double * C, const size_t incC
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fsubin(F,n,B,incB,C,incC);
+ } else {
+ ModularBalanced<double> F(p);
+ fsubin(F,n,B,incB,C,incC);
+ }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/fflas-ffpack/interfaces/libs/fflas_lvl2.C b/fflas-ffpack/interfaces/libs/fflas_lvl2.C
new file mode 100644
index 0000000..1375f10
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_lvl2.C
@@ -0,0 +1,382 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright_2_modular_double (C) 2015 FFLAS-FFPACK
+ *
+ * Written by Brice Boyer_2_modular_double (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or_2_modular_double (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas_lvl2.C
+ * @author Brice Boyer
+ * @brief C functions calls for level 2 FFLAS in fflas-c.h
+ * @see fflas/fflas_level2.inl
+ */
+
+#include "fflas-ffpack/interfaces/libs/fflas_c.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "givaro//modular-balanced.h"
+#include "givaro//modular.h"
+
+using Givaro::Modular ;
+using Givaro::ModularBalanced ;
+using namespace FFLAS ;
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void
+fassign_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t lda ,
+ double * B, const size_t ldb
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fassign(F,m,n,A,lda,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ fassign(F,m,n,A,lda,B,ldb);
+ }
+}
+
+
+
+void
+fzero_2_modular_double (const double p, const size_t m, const size_t n,
+ double * A, const size_t lda
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fzero(F,m,n,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ fzero(F,m,n,A,lda);
+ }
+}
+
+bool
+fequal_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t lda,
+ const double * B, const size_t ldb
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fequal(F,m,n,A,lda,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ return fequal(F,m,n,A,lda,B,ldb);
+ }
+}
+
+bool
+fiszero_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t lda
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fiszero(F,m,n,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ return fiszero(F,m,n,A,lda);
+ }
+}
+
+void
+fidentity_2_modular_double (const double p, const size_t m, const size_t n,
+ double * A, const size_t lda,
+ const double d
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fidentity(F,m,n,A,lda,d);
+ } else {
+ ModularBalanced<double> F(p);
+ fidentity(F,m,n,A,lda,d);
+ }
+}
+
+
+void
+freducein_2_modular_double (const double p, const size_t m , const size_t n,
+ double * A, const size_t lda
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ freduce(F,m,n,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ freduce(F,m,n,A,lda);
+ }
+}
+
+void
+freduce_2_modular_double (const double p, const size_t m , const size_t n,
+ const double * A, const size_t lda,
+ double * B, const size_t ldb
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ freduce(F,m,n,A,lda,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ freduce(F,m,n,A,lda,B,ldb);
+ }
+}
+void
+fnegin_2_modular_double (const double p, const size_t m , const size_t n,
+ double * A, const size_t lda
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fnegin(F,m,n,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ fnegin(F,m,n,A,lda);
+ }
+}
+
+void
+fneg_2_modular_double (const double p, const size_t m , const size_t n,
+ const double * A, const size_t lda,
+ double * B, const size_t ldb
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fneg(F,m,n,A,lda,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ fneg(F,m,n,A,lda,B,ldb);
+ }
+}
+
+void
+fscalin_2_modular_double (const double p, const size_t m , const size_t n,
+ const double alpha,
+ double * A, const size_t lda
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fscalin(F,m,n,alpha,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ fscalin(F,m,n,alpha,A,lda);
+ }
+}
+
+void
+fscal_2_modular_double (const double p, const size_t m , const size_t n,
+ const double alpha,
+ const double * A, const size_t lda,
+ double * B, const size_t ldb
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fscal(F,m,n,alpha,A,lda,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ fscal(F,m,n,alpha,A,lda,B,ldb);
+ }
+}
+
+void
+faxpy_2_modular_double (const double p, const size_t m, const size_t n
+ , const double alpha,
+ const double * A, const size_t lda,
+ double * B, const size_t ldb
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ faxpy(F,m,n,alpha,A,lda,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ faxpy(F,m,n,alpha,A,lda,B,ldb);
+ }
+}
+
+#if 0
+void
+faxpby_2_modular_double (const double p, const size_t m, const size_t n,
+ const double alpha,
+ const double * A, const size_t lda,
+ const double beta,
+ double * B, const size_t ldb
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ faxpby(F,m,n,alpha,A,lda,beta,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ faxpby(F,m,n,alpha,A,lda,beta,B,ldb);
+ }
+}
+#endif
+
+void
+fmove_2_modular_double (const double p, const size_t m, const size_t n,
+ double * A, const size_t lda,
+ double * B, const size_t ldb
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fmove(F,m,n,A,lda,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ fmove(F,m,n,A,lda,B,ldb);
+ }
+}
+
+void
+fadd_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t lda,
+ const double * B, const size_t ldb,
+ double * C, const size_t ldc
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fadd(F,m,n,A,lda,B,ldb,C,ldc);
+ } else {
+ ModularBalanced<double> F(p);
+ fadd(F,m,n,A,lda,B,ldb,C,ldc);
+ }
+}
+
+
+void
+fsub_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * A, const size_t lda,
+ const double * B, const size_t ldb,
+ double * C, const size_t ldc
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fsub(F,m,n,A,lda,B,ldb,C,ldc);
+ } else {
+ ModularBalanced<double> F(p);
+ fsub(F,m,n,A,lda,B,ldb,C,ldc);
+ }
+}
+
+void
+fsubin_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * B, const size_t ldb,
+ double * C, const size_t ldc
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fsubin(F,m,n,B,ldb,C,ldc);
+ } else {
+ ModularBalanced<double> F(p);
+ fsubin(F,m,n,B,ldb,C,ldc);
+ }
+}
+
+void
+faddin_2_modular_double (const double p, const size_t m, const size_t n,
+ const double * B, const size_t ldb,
+ double * C, const size_t ldc
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ faddin(F,m,n,B,ldb,C,ldc);
+ } else {
+ ModularBalanced<double> F(p);
+ faddin(F,m,n,B,ldb,C,ldc);
+ }
+}
+
+
+double *
+fgemv_2_modular_double (const double p, const enum FFLAS_C_TRANSPOSE TransA,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double * A, const size_t lda,
+ const double * X, const size_t incX,
+ const double beta,
+ double * Y, const size_t incY
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fgemv(F,(enum FFLAS::FFLAS_TRANSPOSE)TransA,m,n,alpha,A,lda,X,incX,beta,Y,incY);
+ } else {
+ ModularBalanced<double> F(p);
+ return fgemv(F,(enum FFLAS::FFLAS_TRANSPOSE)TransA,m,n,alpha,A,lda,X,incX,beta,Y,incY);
+ }
+ return nullptr;
+}
+
+void
+fger_2_modular_double (const double p, const size_t m, const size_t n,
+ const double alpha,
+ const double * X, const size_t incX,
+ const double * Y, const size_t incY,
+ double * A, const size_t lda
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ fger(F,m,n,alpha,X,incX,Y,incY,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ fger(F,m,n,alpha,X,incX,Y,incY,A,lda);
+ }
+}
+
+void
+ftrsv_2_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_TRANSPOSE TransA, const enum FFLAS_C_DIAG Diag,
+ const size_t n,const double * A, const size_t lda,
+ double * X, int incX
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ ftrsv(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_TRANSPOSE)TransA,(enum FFLAS::FFLAS_DIAG)Diag,n,A,lda,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ ftrsv(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_TRANSPOSE)TransA,(enum FFLAS::FFLAS_DIAG)Diag,n,A,lda,X,incX);
+ }
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/fflas-ffpack/interfaces/libs/fflas_lvl3.C b/fflas-ffpack/interfaces/libs/fflas_lvl3.C
new file mode 100644
index 0000000..060f1d8
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/fflas_lvl3.C
@@ -0,0 +1,143 @@
+/* -*- mode: C++; tAb-width: 8; indent-tAbs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2015 FFLAS-FFPACK
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more detAils.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file fflas_lvl3.C
+ * @author Brice Boyer
+ * @brief C functions calls for level 3 FFLAS in fflas-c.h
+ * @see fflas/fflas_level3.inl
+ */
+
+#include "fflas-ffpack/interfaces/libs/fflas_c.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "givaro//modular-balanced.h"
+#include "givaro//modular.h"
+
+using Givaro::Modular ;
+using Givaro::ModularBalanced ;
+using namespace FFLAS ;
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void
+ftrsm_3_modular_double (const double p, const enum FFLAS_C_SIDE Side,
+ const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_TRANSPOSE tA,
+ const enum FFLAS_C_DIAG Diag,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double * A,
+ const size_t ldA,
+ double * B, const size_t ldB
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ ftrsm(F,(enum FFLAS_SIDE)Side,(enum FFLAS_UPLO)Uplo,(FFLAS_TRANSPOSE)tA,(enum FFLAS_DIAG)Diag,m,n,alpha,A,ldA,B,ldB);
+ } else {
+ ModularBalanced<double> F(p);
+ ftrsm(F,(enum FFLAS_SIDE)Side,(enum FFLAS_UPLO)Uplo,(FFLAS_TRANSPOSE)tA,(enum FFLAS_DIAG)Diag,m,n,alpha,A,ldA,B,ldB);
+ }
+}
+
+
+void
+ftrmm_3_modular_double (const double p, const enum FFLAS_C_SIDE Side,
+ const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_TRANSPOSE tA,
+ const enum FFLAS_C_DIAG Diag,
+ const size_t m, const size_t n,
+ const double alpha,
+ double * A, const size_t ldA,
+ double * B, const size_t ldB
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ ftrmm(F,(enum FFLAS_SIDE)Side,(enum FFLAS_UPLO)Uplo,(FFLAS_TRANSPOSE)tA,(enum FFLAS_DIAG)Diag,m,n,alpha,A,ldA,B,ldB);
+ } else {
+ ModularBalanced<double> F(p);
+ ftrmm(F,(enum FFLAS_SIDE)Side,(enum FFLAS_UPLO)Uplo,(FFLAS_TRANSPOSE)tA,(enum FFLAS_DIAG)Diag,m,n,alpha,A,ldA,B,ldB);
+ }
+}
+
+double *
+fgemm_3_modular_double( const double p,
+ const enum FFLAS_C_TRANSPOSE tA,
+ const enum FFLAS_C_TRANSPOSE tB,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const double alpha,
+ const double * A, const size_t ldA,
+ const double * B, const size_t ldB,
+ const double betA,
+ double * C, const size_t ldC,
+ bool positive )
+
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fgemm(F,(FFLAS_TRANSPOSE)tA,(FFLAS_TRANSPOSE)tB,m,n,k,alpha,A,ldA,B,ldB,betA,C,ldC);
+ } else {
+ ModularBalanced<double> F(p);
+ return fgemm(F,(FFLAS_TRANSPOSE)tA,(FFLAS_TRANSPOSE)tB,m,n,k,alpha,A,ldA,B,ldB,betA,C,ldC);
+ }
+ return nullptr;
+}
+
+
+double *
+fsquare_3_modular_double (const double p,
+ const enum FFLAS_C_TRANSPOSE tA,
+ const size_t n,
+ const double alpha,
+ const double * A,
+ const size_t ldA,
+ const double betA,
+ double * C,
+ const size_t ldC
+ , bool positive )
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fsquare(F,(FFLAS_TRANSPOSE)tA,n,alpha,A,ldA,betA,C,ldC);
+ } else {
+ ModularBalanced<double> F(p);
+ return fsquare(F,(FFLAS_TRANSPOSE)tA,n,alpha,A,ldA,betA,C,ldC);
+ }
+ return nullptr;
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/fflas-ffpack/fflas-ffpack.h b/fflas-ffpack/interfaces/libs/fflas_sparse.C
similarity index 72%
copy from fflas-ffpack/fflas-ffpack.h
copy to fflas-ffpack/interfaces/libs/fflas_sparse.C
index ceeb9c0..39667ec 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/fflas-ffpack/interfaces/libs/fflas_sparse.C
@@ -1,7 +1,10 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+/*
+ * Copyright (C) 2015 FFLAS-FFPACK
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -20,20 +23,25 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* ========LICENCE========
- *
+ *.
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
+/** @file fflas_sparse.C
+ * @author Brice Boyer
+ * @brief C functions calls for level 1.5 and 2.5 FFLAS in fflas-c.h
+ * @see fflas/fflas_sparse.h
*/
+// struct COO {
+// };
+
+// fspmv
+// COO
+// CSR
+
+// fspmm
+// COO
+// CSR
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
-#endif // __FFLASFFPACK_fflas_ffpack_H
diff --git a/fflas-ffpack/interfaces/libs/ffpack.C b/fflas-ffpack/interfaces/libs/ffpack.C
new file mode 100644
index 0000000..3da6920
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/ffpack.C
@@ -0,0 +1,1209 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2015 FFLAS-FFPACK
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file ffpack.C
+ * @author Brice Boyer
+ * @brief C functions calls for FFPACK in ffpack-c.h
+ * @see ffpack/ffpack.h
+ */
+
+#include "fflas-ffpack/interfaces/libs/ffpack_c.h"
+
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "givaro//modular-balanced.h"
+#include "givaro//modular.h"
+
+using Givaro::Modular ;
+using Givaro::ModularBalanced ;
+using namespace FFLAS ;
+using namespace FFPACK;
+
+/*****************/
+/* PERMUTATIONS */
+/*****************/
+
+
+void LAPACKPerm2MathPerm (size_t * MathP, const size_t * LapackP,
+ const size_t N)
+{
+ FFPACK::LAPACKPerm2MathPerm(MathP,LapackP,N);
+}
+
+void MathPerm2LAPACKPerm (size_t * LapackP, const size_t * MathP,
+ const size_t N)
+{
+ FFPACK::MathPerm2LAPACKPerm(LapackP, MathP, N);
+}
+
+void MatrixApplyS_modular_double (const double p, double * A, const size_t lda, const size_t width,
+ const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ MatrixApplyS(F,A,lda,width,M2,R1,R2,R3,R4);
+ } else {
+ ModularBalanced<double> F(p);
+ MatrixApplyS(F,A,lda,width,M2,R1,R2,R3,R4);
+ }
+}
+
+void PermApplyS_double (double * A, const size_t lda, const size_t width,
+ const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+{
+ PermApplyS<double>(A,lda,width,M2,R1,R2,R3,R4);
+}
+
+
+void MatrixApplyT_modular_double (const double p, double * A, const size_t lda, const size_t width,
+ const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ MatrixApplyT(F,A,lda,width,N2,R1,R2,R3,R4);
+ } else {
+ ModularBalanced<double> F(p);
+ MatrixApplyT(F,A,lda,width,N2,R1,R2,R3,R4);
+ }
+}
+
+
+void PermApplyT_double (double * A, const size_t lda, const size_t width,
+ const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4)
+{
+ PermApplyT<double>(A,lda,width,N2,R1,R2,R3,R4);
+}
+
+void composePermutationsP (size_t * MathP,
+ const size_t * P1,
+ const size_t * P2,
+ const size_t R, const size_t N)
+{
+ FFPACK::composePermutationsP(MathP,P1,P2,R,N);
+}
+
+
+void composePermutationsQ (size_t * MathP,
+ const size_t * Q1,
+ const size_t * Q2,
+ const size_t R, const size_t N)
+{
+ FFPACK::composePermutationsQ(MathP,Q1,Q2,R,N);
+}
+
+void cyclic_shift_mathPerm (size_t * P, const size_t s)
+{
+ FFPACK::cyclic_shift_mathPerm(P,s);
+}
+
+#if 0
+template<typename Base_t>
+void cyclic_shift_row_col(Base_t * A, size_t m, size_t n, size_t lda);
+#endif
+
+
+void cyclic_shift_row_modular_double(const double p, double * A, size_t m, size_t n, size_t lda
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ cyclic_shift_row(F,A,m,n,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ cyclic_shift_row(F,A,m,n,lda);
+ }
+}
+
+
+void cyclic_shift_col_modular_double(const double p, double * A, size_t m, size_t n, size_t lda
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ cyclic_shift_col(F,A,m,n,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ cyclic_shift_col(F,A,m,n,lda);
+ }
+}
+
+
+
+
+void
+applyP_modular_double( const double p,
+ const enum FFLAS::FFLAS_SIDE Side,
+ const enum FFLAS::FFLAS_TRANSPOSE Trans,
+ const size_t M, const size_t ibeg, const size_t iend,
+ double * A, const size_t lda, const size_t * P
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ applyP(F,(enum FFLAS::FFLAS_SIDE)Side,(enum FFLAS::FFLAS_TRANSPOSE)Trans,M,ibeg,iend,A,lda,P);
+ } else {
+ ModularBalanced<double> F(p);
+ applyP(F,(enum FFLAS::FFLAS_SIDE)Side,(enum FFLAS::FFLAS_TRANSPOSE)Trans,M,ibeg,iend,A,lda,P);
+ }
+}
+
+/* fgetrs, fgesv */
+
+void
+fgetrsin_modular_double (const double p,
+ const enum FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ double * A, const size_t lda,
+ const size_t *P, const size_t *Q,
+ double * B, const size_t ldb,
+ int * info
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ fgetrs(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,R,A,lda,P,Q,B,ldb,info);
+ } else {
+ ModularBalanced<double> F(p);
+ fgetrs(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,R,A,lda,P,Q,B,ldb,info);
+ }
+}
+
+
+double *
+fgetrsv_modular_double (const double p,
+ const enum FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t NRHS, const size_t R,
+ double * A, const size_t lda,
+ const size_t *P, const size_t *Q,
+ double * X, const size_t ldx,
+ const double * B, const size_t ldb,
+ int * info
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fgetrs(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,NRHS,R,A,lda,P,Q,X,ldx,B,ldb,info);
+ } else {
+ ModularBalanced<double> F(p);
+ return fgetrs(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,NRHS,R,A,lda,P,Q,X,ldx,B,ldb,info);
+ }
+}
+
+
+
+size_t
+fgesvin_modular_double (const double p,
+ const enum FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ double * B, const size_t ldb,
+ int * info
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fgesv(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,A,lda,B,ldb,info);
+ } else {
+ ModularBalanced<double> F(p);
+ return fgesv(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,A,lda,B,ldb,info);
+ }
+}
+
+
+
+size_t
+fgesv_modular_double (const double p,
+ const enum FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t NRHS,
+ double * A, const size_t lda,
+ double * X, const size_t ldx,
+ const double * B, const size_t ldb,
+ int * info
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return fgesv(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,NRHS,A,lda,X,ldx,B,ldb,info);
+ } else {
+ ModularBalanced<double> F(p);
+ return fgesv(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,NRHS,A,lda,X,ldx,B,ldb,info);
+ }
+}
+
+/* ftrtr */
+
+
+void
+ftrtri_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo, const enum FFLAS::FFLAS_DIAG Diag,
+ const size_t N, double * A, const size_t lda
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ ftrtri(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,N,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ ftrtri(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,N,A,lda);
+ }
+}
+
+
+void trinv_left_modular_double( const double p, const size_t N, const double * L, const size_t ldl,
+ double * X, const size_t ldx
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ trinv_left(F,N,L,ldl,X,ldx);
+ } else {
+ ModularBalanced<double> F(p);
+ trinv_left(F,N,L,ldl,X,ldx);
+ }
+}
+
+void
+ftrtrm_modular_double (const double p, const enum FFLAS::FFLAS_DIAG Diag, const size_t N,
+ double * A, const size_t lda
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ ftrtrm(F,(enum FFLAS::FFLAS_DIAG)Diag,N,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ ftrtrm(F,(enum FFLAS::FFLAS_DIAG)Diag,N,A,lda);
+ }
+}
+
+
+
+/* PLUQ */
+
+
+size_t
+PLUQ_modular_double (const double p, const enum FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t*P, size_t *Q
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return PLUQ(F,(enum FFLAS::FFLAS_DIAG)Diag,M,N,A,lda,P,Q);
+ } else {
+ ModularBalanced<double> F(p);
+ return PLUQ(F,(enum FFLAS::FFLAS_DIAG)Diag,M,N,A,lda,P,Q);
+ }
+}
+
+
+
+
+size_t
+LUdivine_modular_double (const double p, const enum FFLAS::FFLAS_DIAG Diag, const enum FFLAS::FFLAS_TRANSPOSE Trans,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt,
+ const enum FFPACK_C_LU_TAG LuTag,
+ const size_t cutoff
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return LUdivine(F,(enum FFLAS::FFLAS_DIAG)Diag,(enum FFLAS::FFLAS_TRANSPOSE)Trans,M,N,A,lda,P,Qt,(enum FFPACK::FFPACK_LU_TAG)LuTag,cutoff);
+ } else {
+ ModularBalanced<double> F(p);
+ return LUdivine(F,(enum FFLAS::FFLAS_DIAG)Diag,(enum FFLAS::FFLAS_TRANSPOSE)Trans,M,N,A,lda,P,Qt,(enum FFPACK::FFPACK_LU_TAG)LuTag,cutoff);
+ }
+}
+
+
+#if 0 /* UTILE ?? */
+
+size_t
+LUdivine_small_modular_double (const double p, const enum FFLAS::FFLAS_DIAG Diag, const enum FFLAS::FFLAS_TRANSPOSE Trans,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Q,
+ const enum FFPACK_C_LU_TAG LuTag);
+
+
+size_t
+LUdivine_gauss_modular_double (const double p, const enum FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Q,
+ const enum FFPACK_C_LU_TAG LuTag);
+#endif
+
+
+
+/*****************/
+/* ECHELON FORMS */
+/*****************/
+
+
+size_t
+ColumnEchelonForm_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform ,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return ColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ return ColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+size_t
+RowEchelonForm_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return RowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ return RowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+size_t
+ReducedColumnEchelonForm_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return ReducedColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ return ReducedColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+size_t
+ReducedRowEchelonForm_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return ReducedRowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ return ReducedRowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+size_t
+ColumnEchelonForm_modular_float (const float p, const size_t M, const size_t N,
+ float * A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform ,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<float> F(p);
+ return ColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<float> F(p);
+ return ColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+size_t
+RowEchelonForm_modular_float (const float p, const size_t M, const size_t N,
+ float * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<float> F(p);
+ return RowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<float> F(p);
+ return RowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+size_t
+ReducedColumnEchelonForm_modular_float (const float p, const size_t M, const size_t N,
+ float * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<float> F(p);
+ return ReducedColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<float> F(p);
+ return ReducedColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+size_t
+ReducedRowEchelonForm_modular_float (const float p, const size_t M, const size_t N,
+ float * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<float> F(p);
+ return ReducedRowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<float> F(p);
+ return ReducedRowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+size_t
+ColumnEchelonForm_modular_int32_t (const int32_t p, const size_t M, const size_t N,
+ int32_t * A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform ,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<int32_t> F(p);
+ return ColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<int32_t> F(p);
+ return ColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+size_t
+RowEchelonForm_modular_int32_t (const int32_t p, const size_t M, const size_t N,
+ int32_t * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<int32_t> F(p);
+ return RowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<int32_t> F(p);
+ return RowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+size_t
+ReducedColumnEchelonForm_modular_int32_t (const int32_t p, const size_t M, const size_t N,
+ int32_t * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<int32_t> F(p);
+ return ReducedColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<int32_t> F(p);
+ return ReducedColumnEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+size_t
+ReducedRowEchelonForm_modular_int32_t (const int32_t p, const size_t M, const size_t N,
+ int32_t * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<int32_t> F(p);
+ return ReducedRowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<int32_t> F(p);
+ return ReducedRowEchelonForm(F,M,N,A,lda,P,Qt,transform,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+size_t
+ReducedRowEchelonForm2_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return ReducedRowEchelonForm2(F,M,N,A,lda,P,Qt,transform);
+ } else {
+ ModularBalanced<double> F(p);
+ return ReducedRowEchelonForm2(F,M,N,A,lda,P,Qt,transform);
+ }
+}
+
+
+size_t
+REF_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ const size_t colbeg, const size_t rowbeg, const size_t colsize,
+ size_t* Qt, size_t* P
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return REF(F,M,N,A,lda,colbeg,rowbeg,colsize,Qt,P);
+ } else {
+ ModularBalanced<double> F(p);
+ return REF(F,M,N,A,lda,colbeg,rowbeg,colsize,Qt,P);
+ }
+}
+
+
+
+/*****************/
+/* INVERSION */
+/*****************/
+
+
+double *
+Invertin_modular_double (const double p, const size_t M,
+ double * A, const size_t lda,
+ int * nullity
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return Invert(F,M,A,lda,*nullity);
+ } else {
+ ModularBalanced<double> F(p);
+ return Invert(F,M,A,lda,*nullity);
+ }
+}
+
+
+
+double *
+Invert_modular_double (const double p, const size_t M,
+ const double * A, const size_t lda,
+ double * X, const size_t ldx,
+ int* nullity
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return Invert(F,M,A,lda,X,ldx,*nullity);
+ } else {
+ ModularBalanced<double> F(p);
+ return Invert(F,M,A,lda,X,ldx,*nullity);
+ }
+}
+
+
+double *
+Invert2_modular_double( const double p, const size_t M,
+ double * A, const size_t lda,
+ double * X, const size_t ldx,
+ int* nullity
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return Invert2(F,M,A,lda,X,ldx,*nullity);
+ } else {
+ ModularBalanced<double> F(p);
+ return Invert2(F,M,A,lda,X,ldx,*nullity);
+ }
+}
+
+
+/*****************************/
+/* CHARACTERISTIC POLYNOMIAL */
+/*****************************/
+
+
+#if 0 /* pas pour le moment */
+template <class Polynomial>
+std::list<Polynomial>&
+CharPoly( const double p, std::list<Polynomial>& charp, const size_t N,
+ double * A, const size_t lda,
+ const enum FFPACK_C_CHARPOLY_TAG CharpTag= FfpackArithProg);
+
+template<class Polynomial>
+Polynomial & mulpoly_modular_double(const double p, Polynomial &res, const Polynomial & P1, const Polynomial & P2);
+
+template <class Polynomial>
+Polynomial&
+CharPoly_modular_double( const double p, Polynomial& charp, const size_t N,
+ double * A, const size_t lda,
+ const enum FFPACK_C_CHARPOLY_TAG CharpTag= FfpackArithProg);
+
+
+
+template <class Polynomial>
+std::list<Polynomial>&
+CharpolyArithProg_modular_double (const double p, std::list<Polynomial>& frobeniusForm,
+ const size_t N, double * A, const size_t lda, const size_t c);
+#endif
+
+
+
+/**********************/
+/* MINIMAL POLYNOMIAL */
+/**********************/
+
+#if 0 /* pas pour le moment */
+template <class Polynomial>
+Polynomial&
+MinPoly_modular_double( const double p, Polynomial& minP, const size_t N,
+ const double * A, const size_t lda,
+ double * X, const size_t ldx, size_t* P,
+ const enum FFPACK_C_MINPOLY_TAG MinTag= FFPACK::FfpackDense,
+ const size_t kg_mc=0, const size_t kg_mb=0, const size_t kg_j=0 );
+#endif
+
+
+/* Krylov Elim */
+
+
+size_t KrylovElim_modular_double( const double p, const size_t M, const size_t N,
+ double * A, const size_t lda, size_t*P,
+ size_t *Q, const size_t deg, size_t *iterates, size_t * inviterates, const size_t maxit,size_t virt
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return KrylovElim(F,M,N,A,lda,P,Q,deg,iterates,inviterates,maxit, virt);
+ } else {
+ ModularBalanced<double> F(p);
+ return KrylovElim(F,M,N,A,lda,P,Q,deg,iterates,inviterates,maxit, virt);
+ }
+}
+
+
+
+size_t SpecRankProfile_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda, const size_t deg, size_t *rankProfile
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return SpecRankProfile(F,M,N,A,lda,deg,rankProfile);
+ } else {
+ ModularBalanced<double> F(p);
+ return SpecRankProfile(F,M,N,A,lda,deg,rankProfile);
+ }
+}
+
+
+
+/********/
+/* RANK */
+/********/
+
+
+size_t
+Rank_modular_double( const double p, const size_t M, const size_t N,
+ double * A, const size_t lda
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return Rank(F,M,N,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ return Rank(F,M,N,A,lda);
+ }
+}
+
+
+/********/
+/* DET */
+/********/
+
+
+bool
+IsSingular_modular_double( const double p, const size_t M, const size_t N,
+ double * A, const size_t lda
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return IsSingular(F,M,N,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ return IsSingular(F,M,N,A,lda);
+ }
+}
+
+
+
+double
+Det_modular_double( const double p, const size_t M, const size_t N,
+ double * A, const size_t lda
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return Det(F,M,N,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ return Det(F,M,N,A,lda);
+ }
+}
+
+
+
+/*********/
+/* SOLVE */
+/*********/
+
+
+
+double *
+Solve_modular_double( const double p, const size_t M,
+ double * A, const size_t lda,
+ double * x, const int incx,
+ const double * b, const int incb
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return Solve(F,M,A,lda,x,incx,b,incb);
+ } else {
+ ModularBalanced<double> F(p);
+ return Solve(F,M,A,lda,x,incx,b,incb);
+ }
+}
+
+
+
+
+void
+solveLB_modular_double( const double p, const enum FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ double * L, const size_t ldl,
+ const size_t * Q,
+ double * B, const size_t ldb
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ solveLB(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,R,L,ldl,Q,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ solveLB(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,R,L,ldl,Q,B,ldb);
+ }
+}
+
+
+
+void
+solveLB2_modular_double( const double p, const enum FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ double * L, const size_t ldl,
+ const size_t * Q,
+ double * B, const size_t ldb
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ solveLB2(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,R,L,ldl,Q,B,ldb);
+ } else {
+ ModularBalanced<double> F(p);
+ solveLB2(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,R,L,ldl,Q,B,ldb);
+ }
+}
+
+
+
+/*************/
+/* NULLSPACE */
+/*************/
+
+
+void RandomNullSpaceVector_modular_double (const double p, const enum FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ double * X, const size_t incX
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ RandomNullSpaceVector(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,A,lda,X,incX);
+ } else {
+ ModularBalanced<double> F(p);
+ RandomNullSpaceVector(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,A,lda,X,incX);
+ }
+}
+
+
+
+size_t NullSpaceBasis_modular_double (const double p, const enum FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ double ** NS, size_t* ldn,
+ size_t * NSdim
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return NullSpaceBasis(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,A,lda,*NS,*ldn,*NSdim);
+ } else {
+ ModularBalanced<double> F(p);
+ return NullSpaceBasis(F,(enum FFLAS::FFLAS_SIDE)Side,M,N,A,lda,*NS,*ldn,*NSdim);
+ }
+}
+
+
+/*****************/
+/* RANK PROFILES */
+/*****************/
+
+
+size_t RowRankProfile_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t ** rkprofile,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return RowRankProfile(F,M,N,A,lda,*rkprofile,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ return RowRankProfile(F,M,N,A,lda,*rkprofile,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+
+size_t ColumnRankProfile_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t ** rkprofile,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return ColumnRankProfile(F,M,N,A,lda,*rkprofile,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ return ColumnRankProfile(F,M,N,A,lda,*rkprofile,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+void RankProfileFromLU (const size_t* P, const size_t N, const size_t R,
+ size_t* rkprofile, const enum FFPACK_C_LU_TAG LuTag)
+{
+ FFPACK::RankProfileFromLU(P,N,R,rkprofile,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+}
+
+size_t LeadingSubmatrixRankProfiles (const size_t M, const size_t N, const size_t R,
+ const size_t LSm, const size_t LSn,
+ const size_t* P, const size_t* Q,
+ size_t* RRP, size_t* CRP)
+{
+ return FFPACK::LeadingSubmatrixRankProfiles(M,N,R,LSm,LSn,P,Q,RRP,CRP);
+}
+
+
+
+size_t RowRankProfileSubmatrixIndices_modular_double (const double p,
+ const size_t M, const size_t N,
+ double * A,
+ const size_t lda,
+ size_t ** rowindices,
+ size_t ** colindices,
+ size_t * R
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return RowRankProfileSubmatrixIndices(F,M,N,A,lda,*rowindices,*colindices,*R);
+ } else {
+ ModularBalanced<double> F(p);
+ return RowRankProfileSubmatrixIndices(F,M,N,A,lda,*rowindices,*colindices,*R);
+ }
+}
+
+
+
+size_t ColRankProfileSubmatrixIndices_modular_double (const double p,
+ const size_t M, const size_t N,
+ double * A,
+ const size_t lda,
+ size_t** rowindices,
+ size_t** colindices,
+ size_t* R
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return ColRankProfileSubmatrixIndices(F,M,N,A,lda,*rowindices,*colindices,*R);
+ } else {
+ ModularBalanced<double> F(p);
+ return ColRankProfileSubmatrixIndices(F,M,N,A,lda,*rowindices,*colindices,*R);
+ }
+}
+
+
+
+size_t RowRankProfileSubmatrix_modular_double (const double p,
+ const size_t M, const size_t N,
+ double * A,
+ const size_t lda,
+ double ** X, size_t* R
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return RowRankProfileSubmatrix(F,M,N,A,lda,*X,*R);
+ } else {
+ ModularBalanced<double> F(p);
+ return RowRankProfileSubmatrix(F,M,N,A,lda,*X,*R);
+ }
+}
+
+
+
+size_t ColRankProfileSubmatrix_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ double ** X, size_t* R
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ return ColRankProfileSubmatrix(F,M,N,A,lda,*X,*R);
+ } else {
+ ModularBalanced<double> F(p);
+ return ColRankProfileSubmatrix(F,M,N,A,lda,*X,*R);
+ }
+}
+
+
+/*********************************************/
+/* Accessors to Triangular and Echelon forms */
+/*********************************************/
+
+
+void
+getTriangular_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo,
+ const enum FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N, const size_t R,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const bool OnlyNonZeroVectors
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ getTriangular(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,A,lda,T,ldt,OnlyNonZeroVectors);
+ } else {
+ ModularBalanced<double> F(p);
+ getTriangular(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,A,lda,T,ldt,OnlyNonZeroVectors);
+ }
+}
+
+
+
+void
+getTriangularin_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo,
+ const enum FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N, const size_t R,
+ double * A, const size_t lda
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ getTriangular(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,A,lda);
+ } else {
+ ModularBalanced<double> F(p);
+ getTriangular(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,A,lda);
+ }
+}
+
+
+
+void
+getEchelonForm_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo,
+ const enum FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const bool OnlyNonZeroVectors,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ getEchelonForm(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,P,A,lda,T,ldt,OnlyNonZeroVectors,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ getEchelonForm(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,P,A,lda,T,ldt,OnlyNonZeroVectors,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+void
+getEchelonFormin_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo,
+ const enum FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ double * A, const size_t lda,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ getEchelonForm(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,P,A,lda,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ getEchelonForm(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,P,A,lda,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+void
+getEchelonTransform_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo,
+ const enum FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ getEchelonTransform(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,P,Q,A,lda,T,ldt,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ getEchelonTransform(F,(enum FFLAS::FFLAS_UPLO)Uplo,(enum FFLAS::FFLAS_DIAG)Diag,M,N,R,P,Q,A,lda,T,ldt,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+
+void
+getReducedEchelonForm_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const bool OnlyNonZeroVectors,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ getReducedEchelonForm(F,(enum FFLAS::FFLAS_UPLO)Uplo,M,N,R,P,A,lda,T,ldt,OnlyNonZeroVectors,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ getReducedEchelonForm(F,(enum FFLAS::FFLAS_UPLO)Uplo,M,N,R,P,A,lda,T,ldt,OnlyNonZeroVectors,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+void
+getReducedEchelonFormin_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ double * A, const size_t lda,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ getReducedEchelonForm(F,(enum FFLAS::FFLAS_UPLO)Uplo,M,N,R,P,A,lda,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ getReducedEchelonForm(F,(enum FFLAS::FFLAS_UPLO)Uplo,M,N,R,P,A,lda,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+
+void
+getReducedEchelonTransform_modular_double (const double p, const enum FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive)
+{
+ if (positive) {
+ Modular<double> F(p);
+ getReducedEchelonTransform(F,(enum FFLAS::FFLAS_UPLO)Uplo,M,N,R,P,Q,A,lda,T,ldt,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ } else {
+ ModularBalanced<double> F(p);
+ getReducedEchelonTransform(F,(enum FFLAS::FFLAS_UPLO)Uplo,M,N,R,P,Q,A,lda,T,ldt,(enum FFPACK::FFPACK_LU_TAG)LuTag);
+ }
+}
+
+
+void
+PLUQtoEchelonPermutation (const size_t N, const size_t R, const size_t * P, size_t * outPerm)
+{
+ FFPACK::PLUQtoEchelonPermutation(N,R,P,outPerm);
+}
+
diff --git a/fflas-ffpack/interfaces/libs/ffpack_c.h b/fflas-ffpack/interfaces/libs/ffpack_c.h
new file mode 100644
index 0000000..3250033
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/ffpack_c.h
@@ -0,0 +1,707 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+ */
+/*
+ * Copyright (C) 2015 FFLAS-FFPACK
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/** @file ffpack-c.h
+ * @author Brice Boyer
+ * @brief C functions calls for FFPACK
+ * @see ffpack/ffpack.h
+ */
+
+#ifndef __FFLASFFPACK_interfaces_libs_ffpack_c_H
+#define __FFLASFFPACK_interfaces_libs_ffpack_c_H
+//#include "fflas-ffpack/fflas-ffpack-config.h"
+
+#ifndef FFPACK_COMPILED
+#define FFPACK_COMPILED
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifndef __FFLASFFPACK_interfaces_libs_fflas_c_H
+
+enum FFLAS_C_ORDER {
+ FflasRowMajor=101,
+ FflasColMajor=102
+};
+enum FFLAS_C_TRANSPOSE {
+ FflasNoTrans = 111,
+ FflasTrans = 112
+};
+enum FFLAS_C_UPLO {
+ FflasUpper = 121,
+ FflasLower = 122
+};
+enum FFLAS_C_DIAG {
+ FflasNonUnit = 131,
+ FflasUnit = 132
+};
+enum FFLAS_C_SIDE {
+ FflasLeft = 141,
+ FflasRight = 142
+};
+
+#endif // __FFLASFFPACK_interfaces_libs_fflas_c_H
+
+ enum FFPACK_C_LU_TAG
+ {
+ FfpackSlabRecursive = 1,
+ FfpackTileRecursive = 2,
+ FfpackSingular = 3
+ };
+
+ enum FFPACK_C_CHARPOLY_TAG
+ {
+ FfpackLUK=1,
+ FfpackKG=2,
+ FfpackHybrid=3,
+ FfpackKGFast=4,
+ FfpackDanilevski=5,
+ FfpackArithProg=6,
+ FfpackKGFastG=7
+ };
+
+ enum FFPACK_C_MINPOLY_TAG
+ {
+ FfpackDense=1,
+ FfpackKGF=2
+ };
+
+
+
+/*****************/
+/* PERMUTATIONS */
+/*****************/
+
+
+void LAPACKPerm2MathPerm (size_t * MathP, const size_t * LapackP,
+ const size_t N);
+
+void MathPerm2LAPACKPerm (size_t * LapackP, const size_t * MathP,
+ const size_t N);
+
+void MatrixApplyS_modular_double (const double p, double * A, const size_t lda, const size_t width,
+ const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4
+ , bool positive );
+
+void PermApplyS_double (double * A, const size_t lda, const size_t width,
+ const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4);
+
+
+void MatrixApplyT_modular_double (const double p, double * A, const size_t lda, const size_t width,
+ const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4
+ , bool positive );
+
+void PermApplyT_double (double * A, const size_t lda, const size_t width,
+ const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4);
+
+void composePermutationsP (size_t * MathP,
+ const size_t * P1,
+ const size_t * P2,
+ const size_t R, const size_t N);
+
+void composePermutationsQ (size_t * MathP,
+ const size_t * Q1,
+ const size_t * Q2,
+ const size_t R, const size_t N);
+
+void cyclic_shift_mathPerm (size_t * P, const size_t s);
+
+#if 0
+template<typename Base_t>
+void cyclic_shift_row_col(Base_t * A, size_t m, size_t n, size_t lda);
+#endif
+
+
+void cyclic_shift_row_modular_double(const double p, double * A, size_t m, size_t n, size_t lda
+ , bool positive );
+
+
+void cyclic_shift_col_modular_double(const double p, double * A, size_t m, size_t n, size_t lda
+ , bool positive );
+
+
+
+void
+applyP_modular_double( const double p,
+ const enum FFLAS_C_SIDE Side,
+ const enum FFLAS_C_TRANSPOSE Trans,
+ const size_t M, const size_t ibeg, const size_t iend,
+ double * A, const size_t lda, const size_t * P
+ , bool positive );
+
+
+
+
+
+/* fgetrs, fgesv */
+
+void
+fgetrsin_modular_double (const double p,
+ const enum FFLAS_C_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ double * A, const size_t lda,
+ const size_t *P, const size_t *Q,
+ double * B, const size_t ldb,
+ int * info
+ , bool positive );
+
+
+double *
+fgetrs_modular_double (const double p,
+ const enum FFLAS_C_SIDE Side,
+ const size_t M, const size_t N, const size_t NRHS, const size_t R,
+ double * A, const size_t lda,
+ const size_t *P, const size_t *Q,
+ double * X, const size_t ldx,
+ const double * B, const size_t ldb,
+ int * info
+ , bool positive );
+
+
+size_t
+fgesvin_modular_double (const double p,
+ const enum FFLAS_C_SIDE Side,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ double * B, const size_t ldb,
+ int * info
+ , bool positive );
+
+
+size_t
+fgesv_modular_double (const double p,
+ const enum FFLAS_C_SIDE Side,
+ const size_t M, const size_t N, const size_t NRHS,
+ double * A, const size_t lda,
+ double * X, const size_t ldx,
+ const double * B, const size_t ldb,
+ int * info);
+
+/* ftrtr */
+
+
+void
+ftrtri_modular_double (const double p, const enum FFLAS_C_UPLO Uplo, const enum FFLAS_C_DIAG Diag,
+ const size_t N, double * A, const size_t lda
+ , bool positive );
+
+
+void trinv_left_modular_double( const double p, const size_t N, const double * L, const size_t ldl,
+ double * X, const size_t ldx
+ , bool positive );
+
+
+void
+ftrtrm_modular_double (const double p, const enum FFLAS_C_DIAG diag, const size_t N,
+ double * A, const size_t lda
+ , bool positive );
+
+
+
+/* PLUQ */
+
+
+size_t
+PLUQ_modular_double (const double p, const enum FFLAS_C_DIAG Diag,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t*P, size_t *Q
+ , bool positive );
+
+
+
+size_t
+LUdivine_modular_double (const double p, const enum FFLAS_C_DIAG Diag, const enum FFLAS_C_TRANSPOSE trans,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt,
+ const enum FFPACK_C_LU_TAG LuTag,
+ const size_t cutoff
+ , bool positive );
+
+
+size_t
+LUdivine_small_modular_double (const double p, const enum FFLAS_C_DIAG Diag, const enum FFLAS_C_TRANSPOSE trans,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Q,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+LUdivine_gauss_modular_double (const double p, const enum FFLAS_C_DIAG Diag,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Q,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+
+/*****************/
+/* ECHELON FORMS */
+/*****************/
+
+size_t
+ColumnEchelonForm_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+RowEchelonForm_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+size_t
+ColumnEchelonForm_modular_float (const float p, const size_t M, const size_t N,
+ float * A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+RowEchelonForm_modular_float (const float p, const size_t M, const size_t N,
+ float * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+ColumnEchelonForm_modular_int32_t (const int32_t p, const size_t M, const size_t N,
+ int32_t * A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+RowEchelonForm_modular_int32_t (const int32_t p, const size_t M, const size_t N,
+ int32_t * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+ReducedColumnEchelonForm_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+ReducedRowEchelonForm_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+size_t
+ReducedColumnEchelonForm_modular_float (const float p, const size_t M, const size_t N,
+ float * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+ReducedRowEchelonForm_modular_float (const float p, const size_t M, const size_t N,
+ float * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+size_t
+ReducedColumnEchelonForm_modular_int32_t (const int32_t p, const size_t M, const size_t N,
+ int32_t * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+ReducedRowEchelonForm_modular_int32_t (const int32_t p, const size_t M, const size_t N,
+ int32_t * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+size_t
+ReducedRowEchelonForm2_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform
+ , bool positive );
+
+
+size_t
+REF_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ const size_t colbeg, const size_t rowbeg, const size_t colsize,
+ size_t* Qt, size_t* P
+ , bool positive );
+
+
+/*****************/
+/* INVERSION */
+/*****************/
+
+
+double *
+Invertin_modular_double (const double p, const size_t M,
+ double * A, const size_t lda,
+ int * nullity
+ , bool positive );
+
+
+double *
+Invert_modular_double (const double p, const size_t M,
+ const double * A, const size_t lda,
+ double * X, const size_t ldx,
+ int* nullity
+ , bool positive );
+
+
+double *
+Invert2_modular_double( const double p, const size_t M,
+ double * A, const size_t lda,
+ double * X, const size_t ldx,
+ int* nullity
+ , bool positive );
+
+/*****************************/
+/* CHARACTERISTIC POLYNOMIAL */
+/*****************************/
+
+
+#if 0 /* pas pour le moment */
+template <class Polynomial>
+std::list<Polynomial>&
+CharPoly( const double p, std::list<Polynomial>& charp, const size_t N,
+ double * A, const size_t lda,
+ const enum FFPACK_C_CHARPOLY_TAG CharpTag= FfpackArithProg);
+
+template<class Polynomial>
+Polynomial & mulpoly_modular_double(const double p, Polynomial &res, const Polynomial & P1, const Polynomial & P2);
+
+template <class Polynomial>
+Polynomial&
+CharPoly_modular_double( const double p, Polynomial& charp, const size_t N,
+ double * A, const size_t lda,
+ const enum FFPACK_C_CHARPOLY_TAG CharpTag= FfpackArithProg);
+
+
+
+template <class Polynomial>
+std::list<Polynomial>&
+CharpolyArithProg_modular_double (const double p, std::list<Polynomial>& frobeniusForm,
+ const size_t N, double * A, const size_t lda, const size_t c);
+#endif
+
+
+
+/**********************/
+/* MINIMAL POLYNOMIAL */
+/**********************/
+
+#if 0 /* pas pour le moment */
+template <class Polynomial>
+Polynomial&
+MinPoly_modular_double( const double p, Polynomial& minP, const size_t N,
+ const double * A, const size_t lda,
+ double * X, const size_t ldx, size_t* P,
+ const enum FFPACK_C_MINPOLY_TAG MinTag= FfpackDense,
+ const size_t kg_mc=0, const size_t kg_mb=0, const size_t kg_j=0 );
+#endif
+
+
+/* Krylov Elim */
+
+
+size_t KrylovElim_modular_double( const double p, const size_t M, const size_t N,
+ double * A, const size_t lda, size_t*P,
+ size_t *Q, const size_t deg, size_t *iterates, size_t * inviterates, const size_t maxit,size_t virt
+ , bool positive );
+
+
+size_t SpecRankProfile_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda, const size_t deg, size_t *rankProfile
+ , bool positive );
+
+
+/********/
+/* RANK */
+/********/
+
+
+size_t
+Rank_modular_double( const double p, const size_t M, const size_t N,
+ double * A, const size_t lda
+ , bool positive ) ;
+
+/********/
+/* DET */
+/********/
+
+
+bool
+IsSingular_modular_double( const double p, const size_t M, const size_t N,
+ double * A, const size_t lda
+ , bool positive );
+
+
+double
+Det_modular_double( const double p, const size_t M, const size_t N,
+ double * A, const size_t lda
+ , bool positive );
+
+/*********/
+/* SOLVE */
+/*********/
+
+
+
+double *
+Solve_modular_double( const double p, const size_t M,
+ double * A, const size_t lda,
+ double * x, const int incx,
+ const double * b, const int incb
+ , bool positive );
+
+
+
+void
+solveLB_modular_double( const double p, const enum FFLAS_C_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ double * L, const size_t ldl,
+ const size_t * Q,
+ double * B, const size_t ldb );
+
+
+void
+solveLB2_modular_double( const double p, const enum FFLAS_C_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ double * L, const size_t ldl,
+ const size_t * Q,
+ double * B, const size_t ldb
+ , bool positive );
+
+
+/*************/
+/* NULLSPACE */
+/*************/
+
+
+void RandomNullSpaceVector_modular_double (const double p, const enum FFLAS_C_SIDE Side,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ double * X, const size_t incX
+ , bool positive );
+
+
+size_t NullSpaceBasis_modular_double (const double p, const enum FFLAS_C_SIDE Side,
+ const size_t M, const size_t N,
+ double * A, const size_t lda,
+ double ** NS, size_t* ldn,
+ size_t * NSdim
+ , bool positive );
+
+/*****************/
+/* RANK PROFILES */
+/*****************/
+
+
+size_t RowRankProfile_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t ** rkprofile,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+
+size_t ColumnRankProfile_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ size_t ** rkprofile,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+void RankProfileFromLU (const size_t* P, const size_t N, const size_t R,
+ size_t* rkprofile, const enum FFPACK_C_LU_TAG LuTag);
+
+size_t LeadingSubmatrixRankProfiles (const size_t M, const size_t N, const size_t R,
+ const size_t LSm, const size_t LSn,
+ const size_t* P, const size_t* Q,
+ size_t* RRP, size_t* CRP);
+
+
+size_t RowRankProfileSubmatrixIndices_modular_double (const double p,
+ const size_t M, const size_t N,
+ double * A,
+ const size_t lda,
+ size_t ** rowindices,
+ size_t ** colindices,
+ size_t * R
+ , bool positive );
+
+
+size_t ColRankProfileSubmatrixIndices_modular_double (const double p,
+ const size_t M, const size_t N,
+ double * A,
+ const size_t lda,
+ size_t** rowindices,
+ size_t** colindices,
+ size_t* R
+ , bool positive );
+
+
+size_t RowRankProfileSubmatrix_modular_double (const double p,
+ const size_t M, const size_t N,
+ double * A,
+ const size_t lda,
+ double ** X, size_t* R
+ , bool positive );
+
+
+size_t ColRankProfileSubmatrix_modular_double (const double p, const size_t M, const size_t N,
+ double * A, const size_t lda,
+ double ** X, size_t* R
+ , bool positive );
+
+/*********************************************/
+/* Accessors to Triangular and Echelon forms */
+/*********************************************/
+
+
+void
+getTriangular_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_DIAG diag,
+ const size_t M, const size_t N, const size_t R,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const bool OnlyNonZeroVectors
+ , bool positive );
+
+
+void
+getTriangularin_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_DIAG diag,
+ const size_t M, const size_t N, const size_t R,
+ double * A, const size_t lda
+ , bool positive );
+
+
+void
+getEchelonForm_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const bool OnlyNonZeroVectors,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+void
+getEchelonFormin_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ double * A, const size_t lda,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+void
+getEchelonTransform_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const enum FFLAS_C_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+void
+getReducedEchelonForm_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const bool OnlyNonZeroVectors,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+void
+getReducedEchelonFormin_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ double * A, const size_t lda,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+
+void
+getReducedEchelonTransform_modular_double (const double p, const enum FFLAS_C_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ const double * A, const size_t lda,
+ double * T, const size_t ldt,
+ const enum FFPACK_C_LU_TAG LuTag
+ , bool positive );
+
+void
+PLUQtoEchelonPermutation (const size_t N, const size_t R, const size_t * P, size_t * outPerm);
+
+#ifdef __cplusplus
+}
+
+#endif
+
+
+#endif // __FFLASFFPACK_interfaces_libs_ffpack_c_H
+
diff --git a/fflas-ffpack/interfaces/libs/ffpack_compiled_spec.inl b/fflas-ffpack/interfaces/libs/ffpack_compiled_spec.inl
new file mode 100644
index 0000000..bc96d69
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/ffpack_compiled_spec.inl
@@ -0,0 +1,49 @@
+#include "givaro//modular-balanced.h"
+#include "givaro//modular.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+#define PASTER(x,y) x ## _ ## y
+#define EVALUATOR(x,y) PASTER(x,y)
+#define NAME(fun) EVALUATOR(fun, FFLAS_TYPE)
+
+#if FFLAS_FIELD == Modular
+ #define FFLAS_POSITIVE true
+#else
+ #define FFLAS_POSITIVE false
+#endif
+
+ namespace FFPACK{
+template <>
+size_t ColumnEchelonForm (const Givaro::FFLAS_FIELD<FFLAS_TYPE>& F, const size_t M, const size_t N,
+ FFLAS_TYPE* A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform,
+ const FFPACK::FFPACK_LU_TAG LuTag){
+ return NAME(ColumnEchelonForm_modular) (F.cardinality(), M, N, A, lda, P, Qt, transform, LuTag, FFLAS_POSITIVE);
+}
+template <>
+size_t RowEchelonForm (const Givaro::FFLAS_FIELD<FFLAS_TYPE>& F, const size_t M, const size_t N,
+ FFLAS_TYPE* A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform,
+ const FFPACK::FFPACK_LU_TAG LuTag){
+ return NAME(RowEchelonForm_modular) (F.cardinality(), M, N, A, lda, P, Qt, transform, LuTag, FFLAS_POSITIVE);
+}
+template <>
+size_t ReducedColumnEchelonForm (const Givaro::FFLAS_FIELD<FFLAS_TYPE>& F, const size_t M, const size_t N,
+ FFLAS_TYPE* A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform,
+ const FFPACK::FFPACK_LU_TAG LuTag){
+ return NAME(ReducedColumnEchelonForm_modular) (F.cardinality(), M, N, A, lda, P, Qt, transform, LuTag, FFLAS_POSITIVE);
+}
+template <>
+size_t ReducedRowEchelonForm (const Givaro::FFLAS_FIELD<FFLAS_TYPE>& F, const size_t M, const size_t N,
+ FFLAS_TYPE* A, const size_t lda,
+ size_t* P, size_t* Qt, bool transform,
+ const FFPACK::FFPACK_LU_TAG LuTag){
+ return NAME(ReducedRowEchelonForm_modular) (F.cardinality(), M, N, A, lda, P, Qt, transform, LuTag, FFLAS_POSITIVE);
+}
+}
+
+#undef FFLAS_POSITIVE
+#undef PASTER
+#undef EVALUATOR
+#undef NAME
diff --git a/fflas-ffpack/interfaces/libs/ffpack_inst.C b/fflas-ffpack/interfaces/libs/ffpack_inst.C
new file mode 100755
index 0000000..4dcb014
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/ffpack_inst.C
@@ -0,0 +1,72 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* ffpack_inst.C
+ * Copyright (C) 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFPACK_INST_C
+#define __FFPACK_INST_C
+
+// The ffpack lib should link to the compiled fflas lib
+#ifndef FFLAS_COMPILED
+#define FFLAS_COMPILED
+#endif
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+#include "ffpack.h"
+
+// This is a C file: we do template instantiations
+#ifdef INST_OR_DECL
+#undef INST_OR_DECL
+#endif
+#define INST_OR_DECL
+
+#define FFLAS_FIELD Givaro::ModularBalanced
+#define FFLAS_ELT double
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#define FFLAS_FIELD Givaro::Modular
+#define FFLAS_ELT double
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#endif // __FFPACK_INST_C
diff --git a/fflas-ffpack/interfaces/libs/ffpack_inst.h b/fflas-ffpack/interfaces/libs/ffpack_inst.h
new file mode 100644
index 0000000..bc9aae3
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/ffpack_inst.h
@@ -0,0 +1,71 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* ffpack_inst.h
+ * Copyright (C) 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFPACK_INST_H
+#define __FFPACK_INST_H
+
+// The ffpack lib should link to the compiled fflas lib
+#ifndef FFLAS_COMPILED
+#define FFLAS_COMPILED
+#endif
+
+#include "givaro/modular.h"
+#include "givaro/modular-balanced.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+
+// This is a H file: we do template declarations
+#ifdef INST_OR_DECL
+#undef INST_OR_DECL
+#endif
+#define INST_OR_DECL <>
+
+#define FFLAS_FIELD Givaro::ModularBalanced
+#define FFLAS_ELT double
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#define FFLAS_FIELD Givaro::Modular
+#define FFLAS_ELT double
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT float
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#define FFLAS_ELT int32_t
+#include "ffpack_inst_implem.inl"
+#undef FFLAS_ELT
+#undef FFLAS_FIELD
+
+#endif //__FFPACK_INST_H
diff --git a/fflas-ffpack/interfaces/libs/ffpack_inst_implem.inl b/fflas-ffpack/interfaces/libs/ffpack_inst_implem.inl
new file mode 100644
index 0000000..399a183
--- /dev/null
+++ b/fflas-ffpack/interfaces/libs/ffpack_inst_implem.inl
@@ -0,0 +1,396 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* ffpack_inst_implem.inl
+ * Copyright (C) 2005 Clement Pernet
+ * 2014 FFLAS-FFPACK group
+ * 2015 FFLAS-FFPACK group
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+namespace FFPACK {
+
+ template INST_OR_DECL
+ void MatrixApplyS (const FFLAS_FIELD<FFLAS_ELT>& F, FFLAS_ELT* A, const size_t lda, const size_t width,
+ const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4);
+
+ template INST_OR_DECL
+ void MatrixApplyT (const FFLAS_FIELD<FFLAS_ELT>& F, FFLAS_ELT* A, const size_t lda, const size_t width,
+ const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4);
+
+ void composePermutationsP (size_t * MathP,
+ const size_t * P1,
+ const size_t * P2,
+ const size_t R, const size_t N);
+ void composePermutationsQ (size_t * MathP,
+ const size_t * Q1,
+ const size_t * Q2,
+ const size_t R, const size_t N);
+
+ void cyclic_shift_mathPerm (size_t * P, const size_t s);
+ template<typename Base_t>
+ void cyclic_shift_row_col(Base_t * A, size_t m, size_t n, size_t lda);
+ template INST_OR_DECL
+ void cyclic_shift_row(const FFLAS_FIELD<FFLAS_ELT>& F, FFLAS_ELT* A, size_t m, size_t n, size_t lda);
+ template INST_OR_DECL
+ void cyclic_shift_col(const FFLAS_FIELD<FFLAS_ELT>& F, FFLAS_ELT* A, size_t m, size_t n, size_t lda);
+
+
+ template INST_OR_DECL
+ void applyP( const FFLAS_FIELD<FFLAS_ELT>& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const FFLAS::FFLAS_TRANSPOSE Trans,
+ const size_t M, const size_t ibeg, const size_t iend,
+ FFLAS_ELT* A, const size_t lda, const size_t * P );
+
+
+ template INST_OR_DECL
+ void papplyP( const FFLAS_FIELD<FFLAS_ELT>& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const FFLAS::FFLAS_TRANSPOSE Trans,
+ const size_t m, const size_t ibeg, const size_t iend,
+ FFLAS_ELT* A, const size_t lda, const size_t * P );
+
+ template INST_OR_DECL
+ void pMatrixApplyT (const FFLAS_FIELD<FFLAS_ELT>& F, FFLAS_ELT* A, const size_t lda,
+ const size_t width, const size_t N2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4) ;
+
+
+ template INST_OR_DECL
+ void pMatrixApplyS (const FFLAS_FIELD<FFLAS_ELT>& F, FFLAS_ELT* A, const size_t lda,
+ const size_t width, const size_t M2,
+ const size_t R1, const size_t R2,
+ const size_t R3, const size_t R4) ;
+
+ template INST_OR_DECL
+ size_t pPLUQ(const FFLAS_FIELD<FFLAS_ELT>& Fi, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* P, size_t* Q, int nt);
+
+ template INST_OR_DECL
+ void fgetrs (const FFLAS_FIELD<FFLAS_ELT>& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ FFLAS_ELT* A, const size_t lda,
+ const size_t *P, const size_t *Q,
+ FFLAS_ELT* B, const size_t ldb,
+ int * info);
+
+ template INST_OR_DECL
+ FFLAS_ELT* fgetrs (const FFLAS_FIELD<FFLAS_ELT>& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t NRHS, const size_t R,
+ FFLAS_ELT* A, const size_t lda,
+ const size_t *P, const size_t *Q,
+ FFLAS_ELT* X, const size_t ldx,
+ const FFLAS_ELT* B, const size_t ldb,
+ int * info);
+ template INST_OR_DECL
+ size_t fgesv (const FFLAS_FIELD<FFLAS_ELT>& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* B, const size_t ldb,
+ int * info);
+
+ template INST_OR_DECL
+ size_t fgesv (const FFLAS_FIELD<FFLAS_ELT>& F,
+ const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t NRHS,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* X, const size_t ldx,
+ const FFLAS_ELT* B, const size_t ldb,
+ int * info);
+
+ template INST_OR_DECL
+ void ftrtri (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_UPLO Uplo, const FFLAS::FFLAS_DIAG Diag,
+ const size_t N, FFLAS_ELT* A, const size_t lda);
+
+
+ template INST_OR_DECL
+ void trinv_left( const FFLAS_FIELD<FFLAS_ELT>& F, const size_t N, const FFLAS_ELT* L, const size_t ldl,
+ FFLAS_ELT* X, const size_t ldx );
+
+ template INST_OR_DECL
+ void ftrtrm (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_DIAG diag, const size_t N,
+ FFLAS_ELT* A, const size_t lda);
+
+ template INST_OR_DECL
+ size_t PLUQ (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t*P, size_t *Q);
+
+ template INST_OR_DECL
+ size_t LUdivine (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* P, size_t* Qt,
+ const FFPACK_LU_TAG LuTag,
+ const size_t cutoff);
+
+ template INST_OR_DECL
+ size_t LUdivine_small (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* P, size_t* Q,
+ const FFPACK_LU_TAG LuTag);
+
+ template INST_OR_DECL
+ size_t LUdivine_gauss (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* P, size_t* Q,
+ const FFPACK_LU_TAG LuTag);
+
+ template INST_OR_DECL
+ size_t RowEchelonForm (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const FFPACK_LU_TAG LuTag);
+
+ template INST_OR_DECL
+ size_t ReducedRowEchelonForm (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const FFPACK_LU_TAG LuTag);
+
+ template INST_OR_DECL
+ size_t ColumnEchelonForm (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const FFPACK_LU_TAG LuTag);
+ template INST_OR_DECL
+ size_t ReducedColumnEchelonForm (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* P, size_t* Qt, const bool transform,
+ const FFPACK_LU_TAG LuTag);
+
+ template INST_OR_DECL
+ FFLAS_ELT* Invert (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M,
+ FFLAS_ELT* A, const size_t lda,
+ int& nullity);
+
+ template INST_OR_DECL
+ FFLAS_ELT* Invert (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M,
+ const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* X, const size_t ldx,
+ int& nullity);
+
+ template INST_OR_DECL
+ FFLAS_ELT* Invert2( const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* X, const size_t ldx,
+ int& nullity);
+
+ template INST_OR_DECL
+ std::list<std::vector<FFLAS_ELT> >& CharPoly (const FFLAS_FIELD<FFLAS_ELT>& F,
+ std::list<std::vector<FFLAS_ELT> >& charp,
+ const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ const FFPACK_CHARPOLY_TAG CharpTag);
+
+ template INST_OR_DECL
+ std::vector<FFLAS_ELT> & mulpoly(const FFLAS_FIELD<FFLAS_ELT>& F,
+ std::vector<FFLAS_ELT> &res,
+ const std::vector<FFLAS_ELT> & P1,
+ const std::vector<FFLAS_ELT> & P2);
+
+ template INST_OR_DECL
+ std::vector<FFLAS_ELT>& CharPoly( const FFLAS_FIELD<FFLAS_ELT>& F, std::vector<FFLAS_ELT>& charp, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ const FFPACK_CHARPOLY_TAG CharpTag);
+ template INST_OR_DECL
+ std::list<std::vector<FFLAS_ELT>>& CharpolyArithProg (const FFLAS_FIELD<FFLAS_ELT>& F,
+ std::list<std::vector<FFLAS_ELT>>& frobeniusForm,
+ const size_t N,
+ FFLAS_ELT* A, const size_t lda, const size_t c);
+ template INST_OR_DECL
+ std::vector<FFLAS_ELT>& MinPoly( const FFLAS_FIELD<FFLAS_ELT>& F, std::vector<FFLAS_ELT>& minP, const size_t N,
+ const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* X, const size_t ldx, size_t* P,
+ const FFPACK::FFPACK_MINPOLY_TAG MinTag,
+ const size_t kg_mc, const size_t kg_mb, const size_t kg_j );
+
+ template INST_OR_DECL
+ size_t KrylovElim( const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda, size_t*P,
+ size_t *Q, const size_t deg, size_t *iterates, size_t * inviterates, const size_t maxit,size_t virt);
+
+ template INST_OR_DECL
+ size_t SpecRankProfile (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda, const size_t deg, size_t *rankProfile);
+
+ template INST_OR_DECL
+ size_t Rank (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda);
+
+ template INST_OR_DECL
+ bool IsSingular (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda);
+ template INST_OR_DECL
+ FFLAS_ELT Det (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda);
+
+ template INST_OR_DECL
+ FFLAS_ELT* Solve( const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* x, const int incx,
+ const FFLAS_ELT* b, const int incb );
+ template INST_OR_DECL
+ void solveLB( const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ FFLAS_ELT* L, const size_t ldl,
+ const size_t * Q,
+ FFLAS_ELT* B, const size_t ldb );
+
+ template INST_OR_DECL
+ void solveLB2( const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N, const size_t R,
+ FFLAS_ELT* L, const size_t ldl,
+ const size_t * Q,
+ FFLAS_ELT* B, const size_t ldb );
+
+ template INST_OR_DECL
+ void RandomNullSpaceVector (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* X, const size_t incX);
+ template INST_OR_DECL
+ size_t NullSpaceBasis (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_SIDE Side,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT*& NS, size_t& ldn,
+ size_t& NSdim);
+ template INST_OR_DECL
+ size_t RowRankProfile (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* &rkprofile,
+ const FFPACK_LU_TAG LuTag);
+
+ template INST_OR_DECL
+ size_t ColumnRankProfile (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t* &rkprofile,
+ const FFPACK_LU_TAG LuTag);
+
+ void RankProfileFromLU (const size_t* P, const size_t N, const size_t R,
+ size_t* rkprofile, const FFPACK_LU_TAG LuTag);
+
+ size_t LeadingSubmatrixRankProfiles (const size_t M, const size_t N, const size_t R,
+ const size_t LSm, const size_t LSn,
+ const size_t* P, const size_t* Q,
+ size_t* RRP, size_t* CRP);
+ template INST_OR_DECL
+ size_t RowRankProfileSubmatrixIndices (const FFLAS_FIELD<FFLAS_ELT>& F,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t*& rowindices, size_t*& colindices,
+ size_t& R);
+
+ template INST_OR_DECL
+ size_t ColRankProfileSubmatrixIndices (const FFLAS_FIELD<FFLAS_ELT>& F,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ size_t*& rowindices, size_t*& colindices,
+ size_t& R);
+ template INST_OR_DECL
+ size_t RowRankProfileSubmatrix (const FFLAS_FIELD<FFLAS_ELT>& F,
+ const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT*& X, size_t& R);
+ template INST_OR_DECL
+ size_t ColRankProfileSubmatrix (const FFLAS_FIELD<FFLAS_ELT>& F, const size_t M, const size_t N,
+ FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT*& X, size_t& R);
+
+ template INST_OR_DECL
+ void getTriangular <FFLAS_FIELD<FFLAS_ELT> > (const FFLAS_FIELD<FFLAS_ELT> & F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R,
+ const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* T, const size_t ldt,
+ const bool OnlyNonZeroVectors);
+
+ template INST_OR_DECL
+ void getTriangular <FFLAS_FIELD<FFLAS_ELT> >(const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R,
+ FFLAS_ELT* A, const size_t lda);
+
+ template INST_OR_DECL
+ void getEchelonForm <FFLAS_FIELD<FFLAS_ELT> > (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* T, const size_t ldt,
+ const bool OnlyNonZeroVectors,
+ const FFPACK_LU_TAG LuTag);
+ template INST_OR_DECL
+ void getEchelonForm <FFLAS_FIELD<FFLAS_ELT> > (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ FFLAS_ELT* A, const size_t lda,
+ const FFPACK_LU_TAG LuTag);
+
+ template INST_OR_DECL
+ void getEchelonTransform <FFLAS_FIELD<FFLAS_ELT> > (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_DIAG diag,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* T, const size_t ldt,
+ const FFPACK_LU_TAG LuTag);
+ template INST_OR_DECL
+ void getReducedEchelonForm<FFLAS_FIELD<FFLAS_ELT> > (const FFLAS_FIELD<FFLAS_ELT> & F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* T, const size_t ldt,
+ const bool OnlyNonZeroVectors,
+ const FFPACK_LU_TAG LuTag);
+
+ template INST_OR_DECL
+ void getReducedEchelonForm<FFLAS_FIELD<FFLAS_ELT> > (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P,
+ FFLAS_ELT* A, const size_t lda,
+ const FFPACK_LU_TAG LuTag);
+ template INST_OR_DECL
+ void getReducedEchelonTransform<FFLAS_FIELD<FFLAS_ELT> > (const FFLAS_FIELD<FFLAS_ELT>& F, const FFLAS::FFLAS_UPLO Uplo,
+ const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
+ const FFLAS_ELT* A, const size_t lda,
+ FFLAS_ELT* T, const size_t ldt,
+ const FFPACK_LU_TAG LuTag);
+ void PLUQtoEchelonPermutation (const size_t N, const size_t R, const size_t * P, size_t * outPerm);
+
+ template INST_OR_DECL
+ FFLAS_ELT* LQUPtoInverseOfFullRankMinor( const FFLAS_FIELD<FFLAS_ELT>& F, const size_t rank,
+ FFLAS_ELT* A_factors, const size_t lda,
+ const size_t* QtPointer,
+ FFLAS_ELT* X, const size_t ldx);
+} // FFPACK
diff --git a/benchmark/src/Makefile.am b/fflas-ffpack/paladin/Makefile.am
similarity index 81%
rename from benchmark/src/Makefile.am
rename to fflas-ffpack/paladin/Makefile.am
index 18d987b..447aeef 100644
--- a/benchmark/src/Makefile.am
+++ b/fflas-ffpack/paladin/Makefile.am
@@ -1,5 +1,4 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2011 FFLAS-FFPACK
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -17,10 +16,13 @@
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# ========LICENCE========
-#/
-#
-# Nothing yet
-SUBDIRS=BLAS_LAPACK BLOCKING FFLAS_FFPACK
+pkgincludesubdir=$(pkgincludedir)/paladin
+
+pkgincludesub_HEADERS= fflas_pfinit.h \
+ blockcuts.inl \
+ pfgemm_variants.inl \
+ parallel.h \
+ kaapi_routines.inl
diff --git a/fflas-ffpack/paladin/blockcuts.inl b/fflas-ffpack/paladin/blockcuts.inl
new file mode 100755
index 0000000..106cb7e
--- /dev/null
+++ b/fflas-ffpack/paladin/blockcuts.inl
@@ -0,0 +1,481 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* fflas/fflas_bounds.inl
+ * Copyright (C) 2013 Jean-Guillaume Dumas
+ *
+ * Written by Jean-Guillaume Dumas <Jean-Guillaume.Dumas at imag.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_blockcuts_INL
+#define __FFLASFFPACK_fflas_blockcuts_INL
+
+#include <fflas-ffpack/fflas/fflas_enum.h>
+
+#define __FFLASFFPACK_MINBLOCKCUTS ((size_t)256)
+
+namespace FFLAS {
+ // enum CuttingStrategy {
+ // SINGLE ,
+ // ROW ,
+ // COLUMN ,
+ // BLOCK ,
+ // RECURSIVE
+ // };
+
+ // enum StrategyParameter {
+ // FIXED ,
+ // THREADS ,
+ // GRAIN ,
+ // TWO_D ,
+ // THREE_D_INPLACE ,
+ // THREE_D_ADAPT ,
+ // TWO_D_ADAPT ,
+ // THREE_D
+ // };
+ namespace CuttingStrategy{
+ struct Single{};
+ struct Row{};
+ struct Column{};
+ struct Block{};
+ struct Recursive{};
+ }
+
+ namespace StrategyParameter{
+ struct Fixed{};
+ struct Threads{};
+ struct Grain{};
+ struct TwoD{};
+ struct TwoDAdaptive{};
+ struct ThreeD{};
+ struct ThreeDInPlace{};
+ struct ThreeDAdaptive{};
+ }
+
+ /*! ParSeqHelper for both fgemm and ftrsm
+ */
+ /*! ParSeqHelper for both fgemm and ftrsm
+ */
+ namespace ParSeqHelper {
+ template <typename C=CuttingStrategy::Block, typename P=StrategyParameter::Threads>
+ struct Parallel{
+ typedef C Cut;
+ typedef P Param;
+
+ Parallel(size_t n=NUM_THREADS):_numthreads(n){}
+
+ friend std::ostream& operator<<(std::ostream& out, const Parallel& p) {
+ return out << "Parallel: " << p.numthreads();
+ }
+ size_t numthreads() const { return _numthreads; }
+ size_t& set_numthreads(size_t n) { return _numthreads=n; }
+ // CuttingStrategy method() const { return _method; }
+ // StrategyParameter strategy() const { return _param; }
+ private:
+ size_t _numthreads;
+ // CuttingStrategy _method;
+ // StrategyParameter _param;
+
+ };
+ struct Sequential{
+ Sequential() {}
+ template<class Cut,class Param>
+ Sequential(Parallel<Cut,Param>& ) {}
+ friend std::ostream& operator<<(std::ostream& out, const Sequential&) {
+ return out << "Sequential";
+ }
+ size_t numthreads() const { return 1; }
+ // CuttingStrategy method() const { return SINGLE; }
+ // // numthreads==1 ==> a single block
+ // StrategyParameter strategy() const { return THREADS; }
+ };
+ }
+
+
+ template<class Cut=CuttingStrategy::Block, class Strat=StrategyParameter::Threads>
+ inline void BlockCuts(size_t& RBLOCKSIZE, size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t numthreads);
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Single,StrategyParameter::Threads>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t numthreads) {
+ assert(numthreads==1);
+ RBLOCKSIZE = std::max(m,(size_t)1);
+ CBLOCKSIZE = std::max(n,(size_t)1);
+ }
+
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Row,StrategyParameter::Fixed>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t numthreads) {
+ RBLOCKSIZE = std::max(std::min(m,__FFLASFFPACK_MINBLOCKCUTS),(size_t)1);
+ CBLOCKSIZE = std::max(n,(size_t)1);
+ }
+
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Row,StrategyParameter::Grain>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t grainsize) {
+ RBLOCKSIZE = std::max(std::min(m,grainsize),(size_t)1);
+ CBLOCKSIZE = std::max(n,(size_t)1);
+ }
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Block,StrategyParameter::Grain>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t grainsize) {
+ RBLOCKSIZE = std::max(std::min(m,grainsize),(size_t)1);
+ CBLOCKSIZE = std::max(std::min(n,grainsize),(size_t)1);
+ }
+
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Column,StrategyParameter::Fixed>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t numthreads) {
+ RBLOCKSIZE = std::max(m,(size_t)1);
+ CBLOCKSIZE = std::max(std::min(n,__FFLASFFPACK_MINBLOCKCUTS),(size_t)1);
+ }
+
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Column,StrategyParameter::Grain>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t grainsize) {
+ RBLOCKSIZE = std::max(m,(size_t)1);
+ CBLOCKSIZE = std::max(std::min(n,grainsize),(size_t)1);
+ }
+
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Block,StrategyParameter::Fixed>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t numthreads) {
+ RBLOCKSIZE = std::max(std::min(m,__FFLASFFPACK_MINBLOCKCUTS),(size_t)1);
+ CBLOCKSIZE = std::max(std::min(n,__FFLASFFPACK_MINBLOCKCUTS),(size_t)1);
+ }
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Row,StrategyParameter::Threads>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t numthreads) {
+ RBLOCKSIZE = std::max(m/numthreads,(size_t)1);
+ CBLOCKSIZE = std::max(n,(size_t)1);
+ }
+
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Column,StrategyParameter::Threads>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t numthreads) {
+ RBLOCKSIZE = std::max(m,(size_t)1);
+ CBLOCKSIZE = std::max(n/numthreads,(size_t)1);
+ }
+
+ template<>
+ inline void BlockCuts<CuttingStrategy::Block,StrategyParameter::Threads>(size_t& RBLOCKSIZE,
+ size_t& CBLOCKSIZE,
+ const size_t m, const size_t n,
+ const size_t numthreads) {
+ if (numthreads<65) {
+ //CP: Let's not compute these values all the time
+ const short maxtc[64] = {1,2,3,2,5,3,7,4,3,5,11,4,13,7,5,4,17,6,19,5,7,11,23,6,5,13,9,7,29,6,31,8,11,17,7,6,37,19,13,8,41,7,43,11,9,23,47,8,7,10,17,13,53,9,11,8,19,29,59,10,61,31,9,8};
+ const short maxtr[64] = {1,1,1,2,1,2,1,2,3,2,1,3,1,2,3,4,1,3,1,4,3,2,1,4,5,2,3,4,1,5,1,4,3,2,5,6,1,2,3,5,1,6,1,4,5,2,1,6,7,5,3,4,1,6,5,7,3,2,1,6,1,2,7,8};
+
+ RBLOCKSIZE=std::max(m/(size_t)maxtr[numthreads-1],(size_t)1);
+ CBLOCKSIZE=std::max(n/(size_t)maxtc[numthreads-1],(size_t)1);
+ } else {
+ const size_t maxt = (size_t)sqrt((double)numthreads);
+ size_t maxtr=maxt,maxtc=maxt;
+ for(size_t i=maxt; i>=1; --i) {
+ size_t j=maxt;
+ size_t newpr = i*j;
+ for( ; newpr < numthreads; ++j, newpr+=i ) {}
+ if (newpr == numthreads) {
+ maxtc = j;
+ maxtr = i;
+ break;
+ }
+ }
+ RBLOCKSIZE=std::max(m/maxtr,(size_t)1);
+ CBLOCKSIZE=std::max(n/maxtc,(size_t)1);
+ }
+ }
+
+ // inline void BlockCuts(size_t& r, size_t& c,
+ // size_t m, size_t n,
+ // const CuttingStrategy method,
+ // const StrategyParameter strategy,
+ // const size_t t) {
+ // switch(method) {
+ // case CuttingStrategy::Block:
+ // switch(strategy) {
+ // case StrategyParameter::Threads: BlockCuts<CuttingStrategy::Block,StrategyParameter::Threads>(r,c,m,n,t); break;
+ // case StrategyParameter::Grain: BlockCuts<CuttingStrategy::Block,StrategyParameter::Grain>(r,c,m,n,t); break;
+ // case StrategyParameter::Fixed: BlockCuts<CuttingStrategy::Block,StrategyParameter::Fixed>(r,c,m,n,t); break;
+ // default: BlockCuts<CuttingStrategy::Block,StrategyParameter::Threads>(r,c,m,n,t);
+ // }
+ // break;
+ // case CuttingStrategy::Row:
+ // switch(strategy) {
+ // case StrategyParameter::Threads: BlockCuts<CuttingStrategy::Row,StrategyParameter::Threads>(r,c,m,n,t); break;
+ // case StrategyParameter::Grain: BlockCuts<CuttingStrategy::Row,StrategyParameter::Grain>(r,c,m,n,t); break;
+ // case StrategyParameter::Fixed: BlockCuts<CuttingStrategy::Row,StrategyParameter::Fixed>(r,c,m,n,t); break;
+ // default: BlockCuts<CuttingStrategy::Row,StrategyParameter::Threads>(r,c,m,n,t);
+ // }
+ // break;
+ // case CuttingStrategy::Column:
+ // switch(strategy) {
+ // case StrategyParameter::Threads: BlockCuts<CuttingStrategy::Column,StrategyParameter::Threads>(r,c,m,n,t); break;
+ // case StrategyParameter::Grain: BlockCuts<CuttingStrategy::Column,StrategyParameter::Grain>(r,c,m,n,t); break;
+ // case StrategyParameter::Fixed: BlockCuts<CuttingStrategy::Column,StrategyParameter::Fixed>(r,c,m,n,t); break;
+ // default: BlockCuts<CuttingStrategy::Column,StrategyParameter::Threads>(r,c,m,n,t);
+ // }
+ // break;
+ // default: BlockCuts<CuttingStrategy::Block,StrategyParameter::Threads>(r,c,m,n,t);
+ // };
+ // }
+
+
+ template<class Cut=CuttingStrategy::Block, class Param=StrategyParameter::Threads>
+ inline void BlockCuts(size_t& rowBlockSize, size_t& colBlockSize,
+ size_t& lastRBS, size_t& lastCBS,
+ size_t& changeRBS, size_t& changeCBS,
+ size_t& numRowBlock, size_t& numColBlock,
+ size_t m, size_t n,
+// const CuttingStrategy method,
+// const StrategyParameter strategy,
+ const size_t numthreads) {
+ BlockCuts<Cut,Param>(rowBlockSize, colBlockSize, m, n, numthreads);
+ numRowBlock = m/rowBlockSize;
+ numColBlock = n/colBlockSize;
+
+ changeRBS = m-rowBlockSize*numRowBlock;
+ lastRBS = rowBlockSize;
+ if (changeRBS) ++rowBlockSize;
+
+ changeCBS = n-colBlockSize*numColBlock;
+ lastCBS = colBlockSize;
+ if (changeCBS) ++colBlockSize;
+
+
+ /*
+ // Better preserve numRowBlock and numColBlock
+ if (lastRBS) {
+ lastRBS = m-rowBlockSize*numRowBlock;
+ ++rowBlockSize;
+ } else lastRBS = rowBlockSize;
+ if (lastCBS) {
+ lastCBS = n-colBlockSize*numColBlock;
+ ++colBlockSize;
+ } else lastCBS = colBlockSize;
+*/
+
+
+
+// // Better preserve rowBlockSize and colBlockSize
+// lastRBS = m % rowBlockSize;
+// lastCBS = n % colBlockSize;
+// if (lastRBS) ++numRowBlock; else lastRBS = rowBlockSize;
+// if (lastCBS) ++numColBlock; else lastCBS = colBlockSize;
+ }
+
+
+}
+
+
+
+namespace FFLAS {
+ template <typename blocksize_t=size_t, typename Cut=CuttingStrategy::Block, typename Param=StrategyParameter::Threads>
+ struct ForStrategy1D {
+ ForStrategy1D(const blocksize_t n, const ParSeqHelper::Parallel<Cut,Param> H) {
+ build(n,H);
+ }
+ ForStrategy1D(const blocksize_t b, const blocksize_t e, const ParSeqHelper::Parallel<Cut,Param> H) {
+ build(e-b,H);
+ }
+
+ void build(const blocksize_t n, const ParSeqHelper::Parallel<Cut,Param> H) {
+// std::cout<<"FS1D n : "<<n<<std::endl;
+// std::cout<<"FS1D method : "<<method<<std::endl;
+// std::cout<<"FS1D numthreads : "<<numthreads<<std::endl;
+
+ if ( Protected::AreEqual<Param, StrategyParameter::Threads>::value ) {
+ numBlock = std::max((blocksize_t)(H.numthreads()),(blocksize_t)1);
+ } else if ( Protected::AreEqual<Param,StrategyParameter::Grain>::value ) {
+ numBlock = std::max(n/ (blocksize_t)(H.numthreads()), (blocksize_t)1);
+ } else {
+ numBlock = std::max(n/(blocksize_t)(__FFLASFFPACK_MINBLOCKCUTS),(blocksize_t)1);
+ }
+ firstBlockSize = n/numBlock;
+ if (firstBlockSize<1) {
+ firstBlockSize = (blocksize_t)1;
+ numBlock = n;
+ }
+ changeBS = n - numBlock*firstBlockSize;
+ lastBlockSize = firstBlockSize;
+ if (changeBS) ++firstBlockSize;
+
+// std::cout<<"FS1D 1BLOCKSIZE : "<<firstBlockSize<<std::endl;
+// std::cout<<"FS1D 2BLOCKSIZE : "<<lastBlockSize<<std::endl;
+// std::cout<<"FS1D changeBS : "<<changeBS<<std::endl;
+// std::cout<<"FS1D NBlocks : "<<numBlock<<std::endl;
+ }
+
+ blocksize_t initialize() {
+ ibeg = 0; iend = firstBlockSize;
+// std::cout << "FS1D 0 : " << 0 << std::endl;
+// std::cout << "FS1D ibeg: " << ibeg << std::endl;
+// std::cout << "FS1D iend: " << iend << std::endl;
+
+ return current = 0;
+ }
+ bool isTerminated() const { return current == numBlock; }
+
+ blocksize_t begin() const { return ibeg; }
+ blocksize_t end() const { return iend; }
+
+ blocksize_t blocksize() const { return firstBlockSize; }
+ blocksize_t numblocks() const { return numBlock; }
+
+
+ blocksize_t operator++() {
+ ibeg = iend;
+ iend += (++current<changeBS?firstBlockSize:lastBlockSize);
+
+// std::cout << "FS1D i : " << current << std::endl;
+// std::cout << "FS1D ibeg: " << ibeg << std::endl;
+// std::cout << "FS1D iend: " << iend << std::endl;
+
+
+ return current;
+ }
+
+ protected:
+ blocksize_t ibeg, iend;
+
+ blocksize_t current;
+ blocksize_t firstBlockSize,lastBlockSize;
+ blocksize_t changeBS;
+ blocksize_t numBlock;
+
+ };
+
+ template <typename blocksize_t=size_t, typename Cut=CuttingStrategy::Block, typename Param=StrategyParameter::Threads>
+ struct ForStrategy2D {
+ ForStrategy2D(const blocksize_t m, const blocksize_t n, const ParSeqHelper::Parallel<Cut,Param> H)
+ {
+ BlockCuts<Cut,Param>(rowBlockSize, colBlockSize,
+ lastRBS, lastCBS,
+ changeRBS, changeCBS,
+ numRowBlock, numColBlock,
+ m, n,
+// H.method(), H.strategy(),
+ H.numthreads());
+
+ BLOCKS = numRowBlock * numColBlock;
+ }
+
+
+ blocksize_t initialize() {
+ _ibeg = 0; _iend = rowBlockSize;
+ _jbeg = 0; _jend = colBlockSize;
+ return current = 0;
+ }
+ bool isTerminated() const { return current == BLOCKS; }
+
+ blocksize_t ibegin() const { return _ibeg; }
+ blocksize_t jbegin() const { return _jbeg; }
+ blocksize_t iend() const { return _iend; }
+ blocksize_t jend() const { return _jend; }
+
+
+ blocksize_t operator++() {
+ ++current;
+ blocksize_t icurr = current/numColBlock;
+ blocksize_t jcurr = current%numColBlock;
+ if (jcurr) {
+ _jbeg = _jend;
+ _jend += (jcurr<changeCBS?colBlockSize:lastCBS);
+ } else {
+ _ibeg = _iend;
+ _iend += (icurr<changeRBS?rowBlockSize:lastRBS);
+ _jbeg = 0;
+ _jend = colBlockSize;
+ }
+ return current;
+ }
+
+ friend std::ostream& operator<<(std::ostream& out, const ForStrategy2D& FS2D) {
+ out<<"RBLOCKSIZE: "<<FS2D.rowBlockSize<<std::endl;
+ out<<"CBLOCKSIZE: "<<FS2D.colBlockSize<<std::endl;
+ out<<"changeRBS : "<<FS2D.changeRBS<<std::endl;
+ out<<"changeCBS : "<<FS2D.changeCBS<<std::endl;
+ out<<"lastRBS : "<<FS2D.lastRBS<<std::endl;
+ out<<"lastCBS : "<<FS2D.lastCBS<<std::endl;
+ out<<"NrowBlocks: "<<FS2D.numRowBlock<<std::endl;
+ out<<"NcolBlocks: "<<FS2D.numColBlock<<std::endl;
+ out<<"curr: " << FS2D.current << '/' << FS2D.BLOCKS << std::endl;
+ out<<"_ibeg: " << FS2D._ibeg << std::endl;
+ out<<"_iend: " << FS2D._iend << std::endl;
+ out<<"_jbeg: " << FS2D._jbeg << std::endl;
+ out<<"_jend: " << FS2D._jend << std::endl;
+ return out;
+ }
+
+ blocksize_t rowblocksize() const { return rowBlockSize; }
+ blocksize_t rownumblocks() const { return numRowBlock; }
+ blocksize_t colblocksize() const { return colBlockSize; }
+ blocksize_t colnumblocks() const { return numColBlock; }
+
+
+ protected:
+ blocksize_t _ibeg, _iend, _jbeg, _jend;
+ blocksize_t rowBlockSize, colBlockSize;
+
+ blocksize_t current;
+ blocksize_t lastRBS; blocksize_t lastCBS;
+ blocksize_t changeRBS; blocksize_t changeCBS;
+ blocksize_t numRowBlock; blocksize_t numColBlock;
+ blocksize_t BLOCKS;
+
+ };
+
+}
+
+
+
+#endif
+
diff --git a/fflas-ffpack/paladin/fflas_pfinit.h b/fflas-ffpack/paladin/fflas_pfinit.h
new file mode 100755
index 0000000..6c8056d
--- /dev/null
+++ b/fflas-ffpack/paladin/fflas_pfinit.h
@@ -0,0 +1,87 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_pfinit.inl
+ * Copyright (C) 2015 Jean Guillaume Dumas Clement Pernet Ziad Sultan
+ *<ziad.sultan at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#include "fflas-ffpack/paladin/parallel.h"
+
+namespace FFLAS
+{
+
+ template<class Field>
+ void pfzero(const Field& F,
+ size_t m, size_t n,
+ typename Field::Element_ptr C,
+ size_t BS=0)
+ {
+ using FFLAS::CuttingStrategy::Block;
+ using FFLAS::StrategyParameter::Grain;
+
+ BS=std::max(BS, (size_t)Protected::WinogradThreshold(F) );
+
+ SYNCH_GROUP(
+ FORBLOCK2D(iter, m, n, SPLITTER(BS, Block, Grain),
+ TASK(MODE(CONSTREFERENCE(F)),
+ {
+ fzero(F,
+ iter.iend()-iter.ibegin(),
+ iter.jend()-iter.jbegin(),
+ C+iter.ibegin()*n+iter.jbegin(),
+ n);
+ }
+ );
+ );
+ );
+ }
+
+ template<class Field, class RandIter>
+ void pfrand(const Field& F,
+ RandIter& G,
+ size_t m, size_t n,
+ typename Field::Element_ptr C,
+ size_t BS=0)
+ {
+ using FFLAS::CuttingStrategy::Block;
+ using FFLAS::StrategyParameter::Grain;
+
+ BS=std::max(BS, (size_t)Protected::WinogradThreshold(F) );
+ SYNCH_GROUP(
+ FORBLOCK2D(iter, m, n, SPLITTER(BS, Block, Grain),
+ TASK(MODE(CONSTREFERENCE(F,G)),
+ {
+ frand(F, G,
+ iter.iend()-iter.ibegin(),
+ iter.jend()-iter.jbegin(),
+ C+iter.ibegin()*n+iter.jbegin(),
+ n);
+ }
+ );
+ );
+ );
+ }
+
+
+
+} // FFLAS
diff --git a/fflas-ffpack/paladin/kaapi_routines.inl b/fflas-ffpack/paladin/kaapi_routines.inl
new file mode 100644
index 0000000..efe4615
--- /dev/null
+++ b/fflas-ffpack/paladin/kaapi_routines.inl
@@ -0,0 +1,192 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_pftrsm.inl
+ * Copyright (C) 2013 Ziad Sultan
+ *
+ * Written by Ziad Sultan < Ziad.Sultan at imag.fr >
+ * Time-stamp: <17 Jun 14 14:32:29 Jean-Guillaume.Dumas at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+#ifndef __FFLASFFPACK_KAAPI_ROUTINES_INL
+#define __FFLASFFPACK_KAAPI_ROUTINES_INL
+
+
+
+
+
+#ifdef __FFLASFFPACK_USE_KAAPI
+namespace FFLAS {
+
+ template<class Field, class Helper>
+ struct Taskfgemm15 : public ka::Task<15>::Signature<
+ Field,
+ FFLAS_TRANSPOSE,
+ FFLAS_TRANSPOSE,
+ size_t ,
+ size_t ,
+ size_t ,
+ typename Field::Element,
+ ka::R<typename Field::Element>,
+ size_t ,
+ ka::R<typename Field::Element>,
+ size_t ,
+ typename Field::Element,
+ ka::RW<typename Field::Element>,
+ size_t ,
+ Helper
+ //size_t // winograd
+ >{};
+/*
+ template<class Field>
+ struct Taskfgemm14 : public ka::Task<14>::Signature<
+ Field,
+ FFLAS_TRANSPOSE,
+ FFLAS_TRANSPOSE,
+ size_t ,
+ size_t ,
+ size_t ,
+ typename Field::Element,
+ ka::R<typename Field::Element>,
+ size_t ,
+ ka::R<typename Field::Element>,
+ size_t ,
+ typename Field::Element,
+ ka::RW<typename Field::Element>,
+ size_t
+ >{};
+
+
+*/
+ template<class Field>
+ struct Taskftrsm12: public ka::Task<12>::Signature<
+ Field , /* Field F */
+ FFLAS::FFLAS_SIDE ,
+ FFLAS::FFLAS_UPLO ,
+ FFLAS::FFLAS_TRANSPOSE ,
+ FFLAS::FFLAS_DIAG ,
+ size_t , /* size : M */
+ size_t , /* size : N */
+ typename Field::Element ,
+ ka::R<typename Field::Element >, /* Matrix A */
+ size_t , /* lda */
+ ka::RW<typename Field::Element >, /* Matrix B */
+ size_t /* ldb */
+ >{};
+
+
+
+template<class Field, class Helper>
+void spawnerfgemm(const Field& F,
+ const FFLAS::FFLAS_TRANSPOSE ta,
+ const FFLAS::FFLAS_TRANSPOSE tb,
+ size_t BlockRowDim,
+ size_t BlockColDim,
+ size_t k,
+ const typename Field::Element alpha,
+ ka::pointer_r<typename Field::Element> A,
+ const size_t lda,
+ ka::pointer_r<typename Field::Element> B,
+ const size_t ldb,
+ const typename Field::Element beta,
+ ka::pointer_rw<typename Field::Element> C, const size_t ldc,
+ Helper WH){
+ ka::Spawn<Taskfgemm15<Field, Helper> >()( F, ta, tb, BlockRowDim, BlockColDim, k, alpha, A.ptr(), lda, B.ptr() , ldb,
+ beta, C.ptr(), ldc, WH);
+}
+
+}
+
+template<class Field, class Helper>
+struct TaskBodyCPU<FFLAS::Taskfgemm15<Field, Helper> >{
+ void operator()(const Field& F,
+ const FFLAS::FFLAS_TRANSPOSE ta,
+ const FFLAS::FFLAS_TRANSPOSE tb,
+ size_t BlockRowDim,
+ size_t BlockColDim,
+ size_t k,
+ const typename Field::Element alpha,
+ ka::pointer_r<typename Field::Element> A,
+ const size_t lda,
+ ka::pointer_r<typename Field::Element> B,
+ const size_t ldb,
+ const typename Field::Element beta,
+ ka::pointer_rw<typename Field::Element> C, const size_t ldc,
+ Helper WH
+ // Helper & WH
+ // size_t w
+ )
+ {
+ FFLAS::MMHelper<Field, FFLAS::MMHelperAlgo::Winograd, typename FFLAS::FieldTraits<Field>::value> W(WH);
+ /*
+ FFLAS::MMHelper<Field, FFLAS::MMHelperAlgo::Winograd> WH;
+ WH(F,w);*/
+ FFLAS::fgemm( F, ta, tb, BlockRowDim, BlockColDim, k, alpha, A.ptr(), lda, B.ptr() , ldb,
+ beta, C.ptr(), ldc, W);
+ }
+ };
+
+
+/*
+ template<class Field>
+ struct TaskBodyCPU<FFLAS::Taskfgemm14<Field> >{
+ void operator()(const Field& F,
+ const FFLAS::FFLAS_TRANSPOSE ta,
+ const FFLAS::FFLAS_TRANSPOSE tb,
+ size_t BlockRowDim,
+ size_t BlockColDim,
+ size_t k,
+ const typename Field::Element alpha,
+ ka::pointer_r<typename Field::Element> A,
+ const size_t lda,
+ ka::pointer_r<typename Field::Element> B,
+ const size_t ldb,
+ const typename Field::Element beta,
+ ka::pointer_rw<typename Field::Element> C, const size_t ldc)
+ {
+ FFLAS::fgemm( F, ta, tb, BlockRowDim, BlockColDim, k, alpha, A.ptr(), lda, B.ptr() , ldb,
+ beta, C.ptr(), ldc);
+ }
+ };
+*/
+ template<class Field>
+ struct TaskBodyCPU<FFLAS::Taskftrsm12<Field> > {
+ void operator()(const Field & F, const FFLAS::FFLAS_SIDE Side,
+ const FFLAS::FFLAS_UPLO Uplo,
+ const FFLAS::FFLAS_TRANSPOSE TransA,
+ const FFLAS::FFLAS_DIAG Diag,
+ const size_t M, const size_t N,
+ const typename Field::Element alpha,
+ ka::pointer_r<typename Field::Element > A, const size_t lda,
+ ka::pointer_rw<typename Field::Element > B, const size_t ldb )
+ {
+
+ FFLAS::ftrsm(F, Side, Uplo, TransA, Diag, M, N, alpha, A.ptr(), lda, B.ptr(), ldb);
+ }
+ };
+
+
+#endif
+
+
+#endif // __FFLASFFPACK_KAAPI_ROUTINES_INL
diff --git a/fflas-ffpack/paladin/parallel.h b/fflas-ffpack/paladin/parallel.h
new file mode 100755
index 0000000..f74d3e8
--- /dev/null
+++ b/fflas-ffpack/paladin/parallel.h
@@ -0,0 +1,566 @@
+/* fflas/parallel.h
+ * Copyright (C) 2013 Jean Guillaume Dumas Clement Pernet Ziad Sultan
+ *
+ * Written by Jean Guillaume Dumas Clement Pernet Ziad Sultan
+ *<ziad.sultan at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_fflas_parallel_H
+#define __FFLASFFPACK_fflas_parallel_H
+
+
+
+#include "fflas-ffpack/config.h"
+
+#ifndef __FFLASFFPACK_USE_OPENMP
+#define __FFLASFFPACK_SEQUENTIAL
+#else
+#include "omp.h"
+#endif
+
+
+#ifdef __FFLASFFPACK_SEQUENTIAL
+ #undef __FFLASFFPACK_USE_OPENMP
+ #undef __FFLASFFPACK_USE_TBB
+ #undef __FFLASFFPACK_USE_KAAPI
+#elif defined (__FFLASFFPACK_USE_KAAPI)
+ #undef __FFLASFFPACK_SEQUENTIAL
+ #undef __FFLASFFPACK_USE_TBB
+ #undef __FFLASFFPACK_USE_OPENMP
+ #include "kaapi++"
+ #include "fflas-ffpack/fflas/kaapi_routines.inl"
+#elif defined __FFLASFFPACK_USE_TBB
+ #undef __FFLASFFPACK_SEQUENTIAL
+ #undef __FFLASFFPACK_USE_OPENMP
+ #undef __FFLASFFPACK_USE_KAAPI
+ #include </usr/include/tbb/tbb.h>
+ #include </usr/include/tbb/task.h>
+ #include </usr/include/tbb/parallel_for.h>
+ #include </usr/include/tbb/task_group.h>
+/*
+ extern "C"
+ {
+ tbb::task_group g;
+ }
+*/
+#ifdef __FFLASFFPACK_HAVE_MKL
+#ifndef _MKL_H_ // temporary
+#error "MKL (mkl.h) not present, while you have MKL enabled"
+#endif
+#undef index_t
+#define index_t MKL_INT
+#endif // __FFLASFFPACK_HAVE_MKL
+
+
+#endif
+
+#ifdef __FFLASFFPACK_FORCE_SEQ
+
+ #undef __FFLASFFPACK_USE_OPENMP
+ #undef __FFLASFFPACK_USE_KAAPI
+ #undef __FFLASFFPACK_USE_TBB
+ #define __FFLASFFPACK_SEQUENTIAL
+
+#endif
+
+#ifndef index_t
+#define index_t size_t
+#endif
+
+
+
+/*********************************************************/
+/*********************** SEQUENTIAL***********************/
+/*********************************************************/
+
+#ifdef __FFLASFFPACK_SEQUENTIAL // MACRO for sequential execution
+
+// TASK is a function call
+#define TASK(M, I) {I;}
+
+#define WAIT
+#define CHECK_DEPENDENCIES
+#define BARRIER
+#define PAR_BLOCK
+
+
+#define SYNCH_GROUP(Args...) {{Args};}
+
+
+#define NUM_THREADS 1
+#define MAX_THREADS 1
+
+#define READ(Args...)
+#define WRITE(Args...)
+#define READWRITE(Args...)
+#define CONSTREFERENCE(...)
+#define VALUE(...)
+
+#define BEGIN_PARALLEL_MAIN(Args...) int main(Args) {
+#define END_PARALLEL_MAIN(void) return 0; }
+
+// for 1D with iterator control and range access through iter (strategy 1D)
+#define FORBLOCK1D(iter, m, Helper, Args...) \
+ { FFLAS::ForStrategy1D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param> iter(m, Helper); \
+ for(iter.initialize(); !iter.isTerminated(); ++iter) \
+ {Args;} }
+
+// for strategy 1D
+#define FOR1D(i, m, Helper, Args...) \
+ FORBLOCK1D(_internal_iterator, m, Helper, \
+ for(auto i=_internal_iterator.begin(); i!=_internal_iterator.end(); ++i) \
+ { Args; })
+
+// PARFOR1D does normal execution of the loop
+#define PARFORBLOCK1D(iter, m, Helper, Args...) \
+ for(std::remove_const<decltype(m)>::type iter=0; iter<m; ++iter) \
+ { Args; }
+
+// PARFOR1D does normal execution of the loop
+#define PARFOR1D(iter, m, Helper, Args...) \
+ for(std::remove_const<decltype(m)>::type iter=0; iter<m; ++iter) \
+ { Args; }
+
+
+//////////////////// CUTTING LOOP MACROS 2D //////////////////////
+
+// for strategy 2D with access to the range and control of iterator
+#define FORBLOCK2D(iter, m, n, Helper, Args...) \
+ { FFLAS::ForStrategy2D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param> iter(m,n,Helper); \
+ for(iter.initialize(); !iter.isTerminated(); ++iter) \
+ { Args; } }
+
+// for strategy 2D
+#define FOR2D(i, j, m, n, Helper, Args...) \
+ FORBLOCK2D(_internal_iterator, m, n, Helper, \
+ for(auto i=_internal_iterator.ibegin(); i!=_internal_iterator.iend(); ++i) \
+ for(auto j=_internal_iterator.jbegin(); j!=_internal_iterator.jend(); ++j) \
+ { Args; })
+
+// parallel for strategy 2D with access to the range and control of iterator
+#define PARFORBLOCK2D(iter, m, n, Helper, Args...) \
+ FORBLOCK2D(iter, m, n, Helper, Args)
+
+// parallel for strategy 2D
+#define PARFOR2D(i, j, m, n, Helper, Args...) \
+ FOR2D(i, j, m, n, Helper, Args)
+
+
+#endif // Macro for sequential
+
+
+/*********************************************************/
+/************************* OPENMP ************************/
+/*********************************************************/
+
+#ifdef __FFLASFFPACK_USE_OPENMP //OpenMP macros
+
+#define PRAGMA_OMP_IMPL(Args...) _Pragma(#Args)
+
+#define TASK(M, I) \
+ PRAGMA_OMP_IMPL(omp task M) \
+ {I;}
+
+
+#define SYNCH_GROUP(Args...) {{Args};}\
+ WAIT;
+
+
+// macro omp taskwait (waits for all childs of current task)
+#define WAIT PRAGMA_OMP_IMPL(omp taskwait)
+#define GLOBALSHARED(a, Args...) shared(Args)
+#define CONSTREFERENCE(Args...) shared(Args)
+#define VALUE(Args...) firstprivate(Args)
+#define BARRIER PRAGMA_OMP_IMPL(omp barrier)
+
+//////////////////// CUTTING LOOP MACROS 1D //////////////////////
+
+
+// for with iterator control and range access through iter (strategy 1D)
+// Warning: by default we assume that there is no dependency between each iteration, hence we pass an empty MODE() to the tasks.
+// TODO: add an optional MODE argument to the parameter list of FORBLOCK1D
+#define FORBLOCK1D(iter, m, Helper, Args...) \
+ { FFLAS::ForStrategy1D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param > iter(m, Helper); \
+ for(iter.initialize(); !iter.isTerminated(); ++iter){ {Args;} } }
+
+
+// for strategy 1D
+// WARNING: the inner code Args should not contain any coma outside parenthesis (e.g. declaration lists, and template param list)
+#define FOR1D(i, m, Helper, Args...) \
+ FORBLOCK1D(_internal_iterator, m, Helper, \
+ TASK( , \
+ {for(auto i=_internal_iterator.begin(); i!=_internal_iterator.end(); ++i) \
+ { Args; } });) \
+ WAIT;
+
+
+
+/*
+#define PARFORBLOCK1D(iter, m, Helper, Args...) \
+ { FFLAS::ForStrategy1D<std::remove_const<decltype(m)>::type > iter(m, Helper); \
+ PRAGMA_OMP_IMPL(omp parallel for num_threads(iter.numblocks()) schedule(runtime)) \
+ for(iter.initialize(); !iter.isTerminated(); ++iter) \
+ {Args;} }
+*/
+
+//parallel for 1D with iterator control and range access cannot be done with openmp: syntax of openmp does not allow the use of the iterator syntax
+// Thus, PARFORBLOCK1D and PARFOR1D have the same implementation with no cutting. If using OpenMP the user can specify the cutting in runtime using the environmental variable: (see OpenMP spec for more details)
+// export OMP_SCHEDULE="DYNAMIC"
+// or export OMP_SCHEDULE="GUIDED,4"
+// or export OMP_SCHEDULE="STATIC"
+// or export OMP_SCHEDULE="AUTO"
+
+#define PARFORBLOCK1D(iter, m, Helper, Args...) \
+ { FFLAS::ForStrategy1D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param > OMPstrategyIterator(m, Helper); \
+ PRAGMA_OMP_IMPL(omp parallel for num_threads(OMPstrategyIterator.numblocks()) schedule(runtime)) \
+ for(std::remove_const<decltype(m)>::type iter=0; iter<m; ++iter) \
+ { Args; } }
+
+
+// parallel for 1D
+#define PARFOR1D(iter, m, Helper, Args...) \
+ { auto h = Helper; \
+ PARFORBLOCK1D(iter, m, h, { Args; } ) \
+ }
+
+
+//////////////////// CUTTING LOOP MACROS 2D //////////////////////
+
+// for strategy 2D with access to the range and control of iterator
+#define FORBLOCK2D(iter, m, n, Helper, Args...) \
+ { auto h=Helper; \
+ FFLAS::ForStrategy2D<std::remove_const<decltype(m)>::type, typename decltype(h)::Cut, typename decltype(h)::Param > iter(m,n,h); \
+ for(iter.initialize(); !iter.isTerminated(); ++iter) \
+ {Args;} }
+
+// for strategy 2D
+// WARNING: the inner code Args should not contain any coma outside parenthesis (e.g. declaration lists, and template param list)
+#define FOR2D(i, j, m, n, Helper, Args...) \
+ FORBLOCK2D(_internal_iterator, m, n, Helper, \
+ TASK(, \
+ for(auto i=_internal_iterator.ibegin(); i!=_internal_iterator.iend(); ++i) \
+ for(auto j=_internal_iterator.jbegin(); j!=_internal_iterator.jend(); ++j) \
+ { Args; });) \
+ WAIT;
+
+// parallel for strategy 2D with access to the range and control of iterator
+// WARNING: This is not doable : OMP requires an iteration over an interval of ints.
+/* #define PARFORBLOCK2D(iter, m, n, Helper, Args...) \
+ * { FFLAS::ForStrategy2D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param > iter(m,n,Helper); \
+ * PRAGMA_OMP_IMPL(omp parallel for num_threads(iter.rownumblocks()*iter.colnumblocks()) schedule(runtime)) \
+ * for(iter.initialize(); !iter.isTerminated(); ++iter) \
+ * {Args;} }
+ */
+
+// parallel for strategy 2D
+/* #define PARFOR2D(i, j, m, n, Helper, Args...) \
+ * PARFORBLOCK2D(_internal_iterator, m, n, Helper, \
+ * for(auto i=_internal_iterator.ibegin(); i!=_internal_iterator.iend(); ++i) \
+ * for(auto j=_internal_iterator.jbegin(); j!=_internal_iterator.jend(); ++j) \
+ * { Args; })
+ */
+
+// parallel region
+#define PAR_BLOCK PRAGMA_OMP_IMPL(omp parallel) \
+ PRAGMA_OMP_IMPL(omp single)
+// get the number of threads in the parallel region
+# define NUM_THREADS omp_get_num_threads()
+// get the number of threads specified with the global variable OMP_NUM_THREADS
+# define MAX_THREADS omp_get_max_threads()
+
+
+#define BEGIN_PARALLEL_MAIN(Args...) int main(Args) {
+#define END_PARALLEL_MAIN(void) return 0; }
+
+
+//////////////////////////////////////////////
+/////////////// dataflow macros //////////////
+#ifdef __FFLASFFPACK_USE_DATAFLOW // OMP dataflow synch DSL features
+
+ #define READ(Args...) depend(in: Args)
+ #define WRITE(Args...) depend(out: Args)
+ #define READWRITE(Args...) depend(inout: Args)
+ //computes dependencies (no wait here)
+ #define CHECK_DEPENDENCIES
+
+#else // OPENMP3.1 (explicit synch mode)
+
+ #define CHECK_DEPENDENCIES PRAGMA_OMP_IMPL(omp taskwait)
+
+ #define READ(Args...)
+ #define WRITE(Args...)
+ #define READWRITE(Args...)
+
+#endif // end DATAFLOW FLAG
+///////////////////////////////////////////////
+///////////////////////////////////////////////
+
+
+
+#endif // OpenMP macros
+
+
+/*********************************************************/
+/*************************** TBB ************************/
+/*********************************************************/
+#ifdef __FFLASFFPACK_USE_TBB
+
+// workaround to overload macro CONSTREFERENCE
+
+// CONSTREFERENCE macro
+/* #define REF1(a) =,&a */
+/* #define REF2(a,b) =,&a, &b */
+/* #define REF3(a,b,c) =,&a,&b,&c */
+/* #define REF4(a,b,c,d) =,&a,&b,&c,&d */
+/* #define REF5(a,b,c,d,e) =,&a,&b,&c,&d,&e */
+/* #define REF6(a,b,c,d,e,f) =,&a,&b,&c,&d,&e,&f */
+/* #define REF7(a,b,c,d,e,f,g) =,&a,&b,&c,&d,&e,&f,&g */
+/* #define REF8(a,b,c,d,e,f,g,h) =,&a,&b,&c,&d,&e,&f,&g,&h */
+/* #define REF9(a,b,c,d,e,f,g,h,i) =,&a,&b,&c,&d,&e,&f,&g,&h,&i */
+/* #define REF10(a,b,c,d,e,f,g,h,i,enough) =,&a,&b,&c,&d,&e,&f,&g,&h,&i,&enough */
+/* #define GET_REF(_1,_2,_3,_4,_5,_6,_7,_8,_9,_10, NAME,...) NAME */
+/* #define CONSTREFERENCE(...) GET_REF(__VA_ARGS__, REF10,REF9,REF8,REF7,REF6,REF5,REF4,REF3,REF2,REF1)(__VA_ARGS__) */
+
+
+#define REF1(a) ,&a
+#define REF2(a,b) ,&a, &b
+#define REF3(a,b,c) ,&a,&b,&c
+#define REF4(a,b,c,d) ,&a,&b,&c,&d
+#define REF5(a,b,c,d,e) ,&a,&b,&c,&d,&e
+#define REF6(a,b,c,d,e,f) ,&a,&b,&c,&d,&e,&f
+#define REF7(a,b,c,d,e,f,g) ,&a,&b,&c,&d,&e,&f,&g
+#define REF8(a,b,c,d,e,f,g,h) ,&a,&b,&c,&d,&e,&f,&g,&h
+#define REF9(a,b,c,d,e,f,g,h,i) ,&a,&b,&c,&d,&e,&f,&g,&h,&i
+#define REF10(a,b,c,d,e,f,g,h,i,enough) ,&a,&b,&c,&d,&e,&f,&g,&h,&i,&enough
+#define GET_REF(_1,_2,_3,_4,_5,_6,_7,_8,_9,_10, NAME,...) NAME
+#define CONSTREFERENCE(...) GET_REF(__VA_ARGS__, REF10,REF9,REF8,REF7,REF6,REF5,REF4,REF3,REF2,REF1)(__VA_ARGS__)
+
+
+// workaround to overload macro VALUE
+#define VAL1(a) ,a
+#define VAL2(a,b) ,a, b
+#define VAL3(a,b,c) ,a,b,c
+#define VAL4(a,b,c,d) ,a,b,c,d
+#define VAL5(a,b,c,d,e) ,a,b,c,d,e
+
+#define GET_VAL(_1,_2,_3,_4,_5, NAME,...) NAME
+#define VALUE(...) GET_VAL(__VA_ARGS__, VAL5,VAL4,VAL3,VAL2,VAL1)(__VA_ARGS__)
+
+// need task_group to lunch a group of tasks in parallel
+#define SYNCH_GROUP(Args...) \
+ {tbb::task_group g; \
+ {{Args};} \
+ g.wait();}
+
+
+// TBB task
+#define TASK(M, I) \
+ { \
+ g.run([=M](){I;}); \
+ }
+
+//#define MODE(Args...) Args
+#define WAIT g.wait()
+#define CHECK_DEPENDENCIES g.wait()
+#define BARRIER
+#define PAR_BLOCK
+
+#define NUM_THREADS tbb::task_scheduler_init::default_num_threads()
+#define MAX_THREADS tbb::task_scheduler_init::default_num_threads()
+#define READ(Args...)
+#define WRITE(Args...)
+#define READWRITE(Args...)
+
+#define BEGIN_PARALLEL_MAIN(Args...) int main(Args) {
+#define END_PARALLEL_MAIN(void) return 0; }
+#define CAPTURE(Args...) [Args]
+
+// for strategy 1D with access to the iterator
+#define FORBLOCK1D(iter, m, Helper, Args...) \
+ { FFLAS::ForStrategy1D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param > iter(m, Helper); \
+ for(iter.initialize(); !iter.isTerminated(); ++iter) \
+ {Args;} }
+
+// for strategy 1D
+#define FOR1D(i, m, Helper, Args...) \
+ FORBLOCK1D(_internal_iterator, m, Helper, \
+ for(auto i=_internal_iterator.begin(); i!=_internal_iterator.end(); ++i) \
+ { Args; } )
+
+
+// tbb parallel for 1D
+#define PARFORBLOCK1D(iter, m, Helper, Args...) \
+ { FFLAS::ForStrategy1D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param> iter(m, Helper); \
+ tbb::parallel_for( \
+ tbb::blocked_range<std::remove_const<decltype(m)>::type >(0, m, iter.blocksize() ), \
+ [=, &iter](const tbb::blocked_range<std::remove_const<decltype(m)>::type > &iter) { \
+ {Args;} }); \
+ }
+
+// tbb parallel for 1D
+/*
+#define PARFOR1D(i, m, Helper, Args...) \
+ PARFORBLOCK1D(_internal_iterator, m, Helper, \
+ for(auto i=_internal_iterator.begin(); i!=_internal_iterator.end(); ++i) \
+ { Args; } )
+*/
+
+#define PARFOR1D(i, m, Helper, Args...) \
+ { FFLAS::ForStrategy1D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param> TBBstrategyIterator(m, Helper); \
+ tbb::parallel_for( \
+ tbb::blocked_range<std::remove_const<decltype(m)>::type >(0, m, TBBstrategyIterator.blocksize() ), \
+ [=](const tbb::blocked_range<std::remove_const<decltype(m)>::type > &TBBblockrangeIterator) { \
+ for(auto i = TBBblockrangeIterator.begin(); \
+ i < TBBblockrangeIterator.end() ; ++i){ \
+ {Args;} }}); \
+ }
+
+
+// for strategy 2D with access to the iterator
+#define FORBLOCK2D(iter, m, n, Helper, Args...) \
+ { FFLAS::ForStrategy2D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param> iter(m,n,Helper); \
+ for(iter.initialize(); !iter.isTerminated(); ++iter) \
+ {Args;} }
+
+// for strategy 2D
+#define FOR2D(i, j, m, n, Helper, Args...) \
+ FORBLOCK2D(_internal_iterator, m, n, Helper, \
+ for(auto i=_internal_iterator.ibegin(); i!=_internal_iterator.iend(); ++i) \
+ for(auto j=_internal_iterator.jbegin(); j!=_internal_iterator.jend(); ++j) \
+ { Args; })
+
+// parallel for strategy 2D with access to the range and control of iterator
+#define PARFORBLOCK2D(iter, m, n, Helper, Args...) \
+ { FFLAS::ForStrategy2D<std::remove_const<decltype(m)>::type, typename decltype(Helper)::Cut, typename decltype(Helper)::Param> iter(m,n,Helper); \
+ tbb::parallel_for( \
+ tbb::blocked_range2d<std::remove_const<decltype(m)>::type >(0, m, iter.rowblocksize(), 0, n, iter.colblocksize() ), \
+ [=, &i](const tbb::blocked_range2d<std::remove_const<decltype(m)>::type > &iter) { \
+ {Args;} }); \
+ }
+
+// parallel for strategy 2D
+#define PARFOR2D(i, j, m, n, Helper, Args...) \
+ PARFORBLOCK2D(_internal_iterator, m, n, Helper, \
+ for(auto i=_internal_iterator.ibegin(); i!=_internal_iterator.iend(); ++i) \
+ for(auto j=_internal_iterator.jbegin(); j!=_internal_iterator.jend(); ++j) \
+ { Args; })
+
+#endif // end TBB macros
+
+/*********************************************************/
+/************************* KAAPI *************************/
+/*********************************************************/
+
+#ifdef __FFLASFFPACK_USE_KAAPI // KAAPI
+
+#define SPAWN(f,N) CONCATENATE_ARGS(ka::Spawn<Task ## f, N)
+#define CONCATENATE_ARGS(f, N) f ## N
+
+// TASK definition
+#define TASK(r, w, rw, f, Args...) CONCATENATE_ARGS(spawner,f)(Args)
+
+// WAIT do nothing in kaapi
+#define WAIT
+
+// BARRIER means synchronization in kaapi (waits for the execution of all tasks)
+#define BARRIER do{ \
+ ka::Sync(); \
+ }while(0)
+
+#define PAR_BLOCK
+#define PARFOR1D for
+
+// Number of threads
+# define NUM_THREADS kaapi_getconcurrency_cpu()
+
+# define MAX_THREADS kaapi_getconcurrency_cpu()
+
+// Begin parallel main
+#define BEGIN_PARALLEL_MAIN(Args...) \
+ struct doit { \
+ void operator()(int argc, char** argv)
+//end parallel main
+#define END_PARALLEL_MAIN(void) \
+ }; int main(int argc, char** argv) { \
+ try { ka::Community com = ka::System::join_community( argc, argv ); \
+ ka::SpawnMain<doit>()(argc, argv); \
+ com.leave(); \
+ ka::System::terminate();} \
+ catch (const std::exception& E) { ka::logfile() << "Catch : " << E.what() << std::endl;} \
+ catch (...) { ka::logfile() << "Catch unknown exception: " << std::endl;} \
+ return 0;}
+
+#define SYNCH_GROUP(Args...) {{Args};}
+
+
+
+#endif // KAAPI macros
+
+
+
+/*********************************************************/
+/********************* common macros *********************/
+/*********************************************************/
+
+#define COMMA ,
+#define MODE(...) __VA_ARGS__
+#define RETURNPARAM(f, P1, Args...) P1=f(Args)
+
+// Macro computes number of Arguments
+#define NUMARGS(...) \
+ PP_NARG_(__VA_ARGS__,PP_RSEQ_N())
+#define PP_NARG_(...) \
+ PP_ARG_N(__VA_ARGS__)
+#define PP_ARG_N( \
+ _1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \
+ _11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
+ _21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
+ _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
+ _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
+ _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
+ _61,_62,_63,N,...) N
+#define PP_RSEQ_N() \
+ 63,62,61,60, \
+ 59,58,57,56,55,54,53,52,51,50, \
+ 49,48,47,46,45,44,43,42,41,40, \
+ 39,38,37,36,35,34,33,32,31,30, \
+ 29,28,27,26,25,24,23,22,21,20, \
+ 19,18,17,16,15,14,13,12,11,10, \
+ 9,8,7,6,5,4,3,2,1,0
+
+#define NOSPLIT() FFLAS::ParSeqHelper::Sequential()
+
+// overload of SPLITTER
+#define splitting_0() FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads>()
+#define splitting_1(a) FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads>(a)
+#define splitting_2(a,c) FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,c>(a)
+#define splitting_3(a,b,c) FFLAS::ParSeqHelper::Parallel<b,c>(a)
+
+#define splitt(_1,_2,_3, NAME,...) NAME
+
+#define SPLITTER(...) splitt(__VA_ARGS__, splitting_3, splitting_2, splitting_1, splitting_0)(__VA_ARGS__)
+
+
+#include "fflas-ffpack/paladin/blockcuts.inl"
+
+#endif //__FFLASFFPACK_fflas_parallel_H
+
diff --git a/fflas-ffpack/paladin/pfgemm_variants.inl b/fflas-ffpack/paladin/pfgemm_variants.inl
new file mode 100644
index 0000000..68cce1f
--- /dev/null
+++ b/fflas-ffpack/paladin/pfgemm_variants.inl
@@ -0,0 +1,486 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_pfgemm.inl
+ * Copyright (C) 2013 Jean Guillaume Dumas Clement Pernet Ziad Sultan
+ *<ziad.sultan at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+
+namespace FFLAS
+{
+
+
+ template<class Field, class AlgoT, class FieldTrait>
+ typename Field::Element*
+ pfgemm(const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::ConstElement_ptr A, const size_t lda,
+ const typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element * C, const size_t ldc,
+ MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Block, StrategyParameter::Threads> > & H){
+ {
+ H.parseq.set_numthreads( std::min(H.parseq.numthreads(), std::max((size_t)1,(size_t)(m*n/(__FFLASFFPACK_SEQPARTHRESHOLD*__FFLASFFPACK_SEQPARTHRESHOLD)))) );
+
+ MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Sequential> SeqH (H);
+ size_t sa = (ta==FFLAS::FflasNoTrans)?lda:1;
+ size_t sb = (tb==FFLAS::FflasNoTrans)?1:ldb;
+ SYNCH_GROUP({FORBLOCK2D(iter,m,n,H.parseq,
+ TASK( MODE(
+ READ(A[iter.ibegin()*sa],B[iter.jbegin()*sb])
+ CONSTREFERENCE(F, SeqH)
+ READWRITE(C[iter.ibegin()*ldc+iter.jbegin()])),
+ fgemm( F, ta, tb, iter.iend()-iter.ibegin(), iter.jend()-iter.jbegin(), k, alpha, A+iter.ibegin()*sa, lda, B+iter.jbegin()*sb, ldb, beta, C+iter.ibegin()*ldc+iter.jbegin(), ldc, SeqH););
+ );
+ });
+ }
+ return C;
+
+
+ }
+
+ template<class Field, class AlgoT, class FieldTrait>
+ typename Field::Element*
+ pfgemm(const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::ConstElement_ptr AA, const size_t lda,
+ const typename Field::ConstElement_ptr BB, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element * C, const size_t ldc,
+ MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive, StrategyParameter::ThreeDAdaptive> > & H){
+
+ typename Field::Element a = alpha;
+ typename Field::Element b = beta;
+ typename Field::ConstElement_ptr B = BB;
+ typename Field::ConstElement_ptr A = AA;
+ if (!m || !n) {return C;}
+ if (!k || F.isZero (alpha)){
+ fscalin(F, m, n, beta, C, ldc);
+ return C;
+ }
+
+ if (H.parseq.numthreads()<=1 || std::min(m*n,std::min(m*k,k*n))<=__FFLASFFPACK_SEQPARTHRESHOLD*__FFLASFFPACK_SEQPARTHRESHOLD){
+ MMHelper<Field,AlgoT,FieldTrait,ParSeqHelper::Sequential> SeqH(H);
+ return fgemm(F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, SeqH);
+ }
+
+ typedef MMHelper<Field,AlgoT,FieldTrait,ParSeqHelper::Parallel<CuttingStrategy::Recursive, StrategyParameter::ThreeDAdaptive> > MMH_t;
+ MMH_t H1(H);
+ MMH_t H2(H);
+ if(__FFLASFFPACK_DIMKPENALTY*m > k && m >= n) {
+ SYNCH_GROUP(size_t M2= m>>1;
+ H1.parseq.set_numthreads(H1.parseq.numthreads() >> 1);
+ H2.parseq.set_numthreads(H.parseq.numthreads() - H1.parseq.numthreads());
+
+ typename Field::ConstElement_ptr A1= A;
+ typename Field::ConstElement_ptr A2= A+M2*((ta==FFLAS::FflasTrans)?1:lda);
+ typename Field::Element_ptr C1= C;
+ typename Field::Element_ptr C2= C+M2*ldc;
+
+ // 2 multiply (1 split on dimension m)
+
+ TASK(MODE(CONSTREFERENCE(F, H1) READ(A1,B) READWRITE(C1)),
+ {pfgemm( F, ta, tb, M2, n, k, alpha, A1, lda, B, ldb, beta, C1, ldc, H1);}
+ );
+
+ TASK(MODE(CONSTREFERENCE(F,H2) READ(A2,B) READWRITE(C2)),
+ {pfgemm(F, ta, tb, m-M2, n, k, alpha, A2, lda, B, ldb, beta, C2, ldc, H2);}
+ );
+ );
+
+ } else if (__FFLASFFPACK_DIMKPENALTY*n > k) {
+ SYNCH_GROUP(
+ size_t N2 = n>>1;
+ H1.parseq.set_numthreads( H1.parseq.numthreads() >> 1);
+ H2.parseq.set_numthreads(H.parseq.numthreads() - H1.parseq.numthreads());
+ typename Field::ConstElement_ptr B1= B;
+ typename Field::ConstElement_ptr B2= B+N2*((tb==FFLAS::FflasTrans)?ldb:1);
+
+ typename Field::Element_ptr C1= C;
+ typename Field::Element_ptr C2= C+N2;
+
+ TASK(MODE(CONSTREFERENCE(F,H1) READ(A,B1) READWRITE(C1)), pfgemm(F, ta, tb, m, N2, k, a, A, lda, B1, ldb, b, C1, ldc, H1));
+ TASK(MODE(CONSTREFERENCE(F,H2) READ(A,B2) READWRITE(C2)), pfgemm(F, ta, tb, m, n-N2, k, a, A, lda, B2, ldb, b,C2, ldc, H2));
+ );
+
+ } else {
+
+ size_t K2 = k>>1;
+
+ typename Field::ConstElement_ptr B1= B;
+ typename Field::ConstElement_ptr B2= B+K2*((tb==FFLAS::FflasTrans)?1:ldb);
+ typename Field::ConstElement_ptr A1= A;
+ typename Field::ConstElement_ptr A2= A+K2*((ta==FFLAS::FflasTrans)?lda:1);
+ typename Field::Element_ptr C2 = fflas_new (F, m, n,Alignment::CACHE_PAGESIZE);
+
+ H1.parseq.set_numthreads(H1.parseq.numthreads() >> 1);
+ H2.parseq.set_numthreads(H.parseq.numthreads()-H1.parseq.numthreads());
+ SYNCH_GROUP(
+ TASK(MODE(CONSTREFERENCE(F,H1) READ(A1,B1) READWRITE(C)), pfgemm(F, ta, tb, m, n, K2, a, A1, lda, B1, ldb, b, C, ldc, H1));
+
+ TASK(MODE(CONSTREFERENCE(F,H2) READ(A2,B2) READWRITE(C2)), pfgemm(F, ta, tb, m, n, k-K2, a, A2, lda, B2, ldb, F.zero, C2, n, H2));
+ CHECK_DEPENDENCIES;
+
+ TASK(MODE(CONSTREFERENCE(F) READ(C2) READWRITE(C)),faddin(F, n, m, C2, n, C, ldc));
+
+ );
+ fflas_delete(C2);
+ }
+
+ return C;
+ }
+
+ template<class Field, class AlgoT, class FieldTrait>
+ typename Field::Element*
+ pfgemm (const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::ConstElement_ptr AA, const size_t lda,
+ const typename Field::ConstElement_ptr BB, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element * C, const size_t ldc,
+ MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoDAdaptive> > & H){
+
+ typename Field::Element a = alpha;
+ typename Field::Element b = beta;
+ typename Field::ConstElement_ptr B = BB;
+ typename Field::ConstElement_ptr A = AA;
+ if (!m || !n) {return C;}
+ if (!k || F.isZero (alpha)){
+ fscalin(F, m, n, beta, C, ldc);
+ return C;
+ }
+ if (H.parseq.numthreads()<=1 || m*n<=__FFLASFFPACK_SEQPARTHRESHOLD*__FFLASFFPACK_SEQPARTHRESHOLD){
+ MMHelper<Field,AlgoT,FieldTrait,ParSeqHelper::Sequential> SeqH(H);
+ return fgemm(F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, SeqH);
+
+ }
+ typedef MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive, StrategyParameter::TwoDAdaptive> > MMH_t;
+ MMH_t H1(H);
+ MMH_t H2(H);
+ H1.parseq.set_numthreads(H1.parseq.numthreads() >> 1);
+ H2.parseq.set_numthreads(H.parseq.numthreads() - H1.parseq.numthreads());
+ if(m >= n) {
+ size_t M2= m>>1;
+ typename Field::ConstElement_ptr A1= A;
+ typename Field::ConstElement_ptr A2= A+M2*((ta==FFLAS::FflasTrans)?1:lda);
+ typename Field::Element_ptr C1= C;
+ typename Field::Element_ptr C2= C+M2*ldc;
+ SYNCH_GROUP(
+ TASK(MODE(CONSTREFERENCE(F,H1, A1, B) READ(M2, A1[0],B[0]) READWRITE(C1[0])), pfgemm(F, ta, tb, M2, n, k, alpha, A1, lda, B, ldb, beta, C1, ldc, H1));
+ TASK(MODE(CONSTREFERENCE(F,H2, A2, B) READ(M2, A2[0],B[0]) READWRITE(C2[0])), pfgemm(F, ta, tb, m-M2, n, k, alpha, A2, lda, B, ldb, beta, C2, ldc, H2));
+
+ );
+
+ } else {
+ size_t N2 = n>>1;
+ typename Field::ConstElement_ptr B1= B;
+ typename Field::ConstElement_ptr B2= B+N2*((tb==FFLAS::FflasTrans)?ldb:1);
+ typename Field::Element_ptr C1= C;
+ typename Field::Element_ptr C2= C+N2;
+ SYNCH_GROUP(
+ TASK(MODE(CONSTREFERENCE(F,H1, A, B1) READ(N2, A[0], B1[0]) READWRITE(C1[0])), pfgemm(F, ta, tb, m, N2, k, a, A, lda, B1, ldb, b, C1, ldc, H1));
+ TASK(MODE(CONSTREFERENCE(F,H2, A, B2) READ(N2, A[0], B2[0]) READWRITE(C2[0])), pfgemm(F, ta, tb, m, n-N2, k, a, A, lda, B2, ldb, b,C2, ldc, H2));
+ );
+ }
+ return C;
+ }
+
+ template<class Field, class AlgoT, class FieldTrait>
+ typename Field::Element*
+ pfgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::ConstElement_ptr AA, const size_t lda,
+ const typename Field::ConstElement_ptr BB, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element * C, const size_t ldc,
+ MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoD> > & H){
+
+ typename Field::Element a = alpha;
+ typename Field::Element b = beta;
+ typename Field::ConstElement_ptr B = BB;
+ typename Field::ConstElement_ptr A = AA;
+ if (!m || !n) {return C;}
+ if (!k || F.isZero (alpha)){
+ fscalin(F, m, n, beta, C, ldc);
+ return C;
+ }
+
+ if(H.parseq.numthreads()<=1|| m*n<=__FFLASFFPACK_SEQPARTHRESHOLD*__FFLASFFPACK_SEQPARTHRESHOLD){
+ MMHelper<Field,AlgoT,FieldTrait,ParSeqHelper::Sequential> SeqH(H);
+ return fgemm(F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, SeqH);
+ } else
+ {
+ size_t M2= m>>1;
+ size_t N2= n>>1;
+
+ typename Field::ConstElement_ptr A1= A;
+ typename Field::ConstElement_ptr A2= A+M2*((ta==FFLAS::FflasTrans)?1:lda);
+ typename Field::ConstElement_ptr B1= B;
+ typename Field::ConstElement_ptr B2= B+N2*((tb==FFLAS::FflasTrans)?ldb:1);
+
+ typename Field::Element_ptr C11= C;
+ typename Field::Element_ptr C21= C+M2*ldc;
+ typename Field::Element_ptr C12= C+N2;
+ typename Field::Element_ptr C22= C+N2+M2*ldc;
+
+ typedef MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoD> > MMH_t;
+ MMH_t H1(H);
+ MMH_t H2(H);
+ MMH_t H3(H);
+ MMH_t H4(H);
+ size_t nt = H.parseq.numthreads();
+ size_t nt_rec = nt/4;
+ size_t nt_mod = nt%4;
+ H1.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H2.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H3.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H4.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ SYNCH_GROUP(
+ TASK(MODE(CONSTREFERENCE(F,H1) READ(A1,B1) READWRITE(C11)), pfgemm(F, ta, tb, M2, N2, k, alpha, A1, lda, B1, ldb, beta, C11, ldc, H1));
+
+ TASK(MODE(CONSTREFERENCE(F,H2) READ(A1,B2) READWRITE(C12)), pfgemm(F, ta, tb, M2, n-N2, k, alpha, A1, lda, B2, ldb, beta, C12, ldc, H2));
+
+ TASK(MODE(CONSTREFERENCE(F,H3) READ(A2,B1) READWRITE(C21)), pfgemm(F, ta, tb, m-M2, N2, k, a, A2, lda, B1, ldb, b, C21, ldc, H3));
+
+ TASK(MODE(CONSTREFERENCE(F,H4) READ(A2,B2) READWRITE(C22)), pfgemm(F, ta, tb, m-M2, n-N2, k, a, A2, lda, B2, ldb, b,C22, ldc, H4));
+ );
+ }
+ return C;
+ }
+
+
+
+ template<class Field, class AlgoT, class FieldTrait>
+ typename Field::Element_ptr
+ pfgemm(const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::ConstElement_ptr A, const size_t lda,
+ const typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::ThreeD> > & H){
+
+
+ if (!m || !n) {return C;}
+ if (!k || F.isZero (alpha)){
+ fscalin(F, m, n, beta, C, ldc);
+ return C;
+ }
+ if(H.parseq.numthreads() <= 1|| std::min(m*n,std::min(m*k,k*n))<=__FFLASFFPACK_SEQPARTHRESHOLD*__FFLASFFPACK_SEQPARTHRESHOLD){
+ FFLAS::MMHelper<Field, AlgoT, FieldTrait,FFLAS::ParSeqHelper::Sequential> WH (H);
+ return fgemm(F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, WH);
+ }
+ else
+ {
+ typename Field::Element a = alpha;
+ typename Field::Element b = 0;
+
+ size_t M2= m>>1;
+ size_t N2= n>>1;
+ size_t K2= k>>1;
+ typename Field::ConstElement_ptr A11= A;
+ typename Field::ConstElement_ptr A12= A+K2*((ta==FFLAS::FflasTrans)?lda:1);
+ typename Field::ConstElement_ptr A21= A+M2*((ta==FFLAS::FflasTrans)?1:lda);
+ typename Field::ConstElement_ptr A22= A12+M2*((ta==FFLAS::FflasTrans)?1:lda);
+
+ typename Field::ConstElement_ptr B11= B;
+ typename Field::ConstElement_ptr B12= B+N2*((tb==FFLAS::FflasTrans)?ldb:1);
+ typename Field::ConstElement_ptr B21= B+K2*((tb==FFLAS::FflasTrans)?1:ldb);
+ typename Field::ConstElement_ptr B22= B12+K2*((tb==FFLAS::FflasTrans)?1:ldb);
+
+ typename Field::Element_ptr C11= C;
+ typename Field::Element_ptr C_11 = fflas_new (F, M2, N2,Alignment::CACHE_PAGESIZE);
+
+ typename Field::Element_ptr C12= C+N2;
+ typename Field::Element_ptr C_12 = fflas_new (F, M2, n-N2,Alignment::CACHE_PAGESIZE);
+
+ typename Field::Element_ptr C21= C+M2*ldc;
+ typename Field::Element_ptr C_21 = fflas_new (F, m-M2, N2,Alignment::CACHE_PAGESIZE);
+
+ typename Field::Element_ptr C22= C+N2+M2*ldc;
+ typename Field::Element_ptr C_22 = fflas_new (F, m-M2, n-N2,Alignment::CACHE_PAGESIZE);
+
+ // 1/ 8 multiply in parallel
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C11));
+
+ typedef MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::ThreeD> > MMH_t;
+ MMH_t H1(H);
+ MMH_t H2(H);
+ MMH_t H3(H);
+ MMH_t H4(H);
+ MMH_t H5(H);
+ MMH_t H6(H);
+ MMH_t H7(H);
+ MMH_t H8(H);
+ size_t nt = H.parseq.numthreads();
+ size_t nt_rec = nt/8;
+ size_t nt_mod = nt % 8 ;
+ H1.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H2.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H3.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H4.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H5.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H6.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H7.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H8.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+
+ SYNCH_GROUP(
+ TASK(MODE(CONSTREFERENCE(F,H1) READ(A11,B11) READWRITE(C11)), pfgemm(F, ta, tb, M2, N2, K2, alpha, A11, lda, B11, ldb, beta, C11, ldc, H1));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C_11));
+ TASK(MODE(CONSTREFERENCE(F,H2) READ(A12,B21) WRITE(C_11)), pfgemm(F, ta, tb, M2, N2, k-K2, a, A12, lda, B21, ldb, b,C_11, N2, H2));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C12));
+ TASK(MODE(CONSTREFERENCE(F,H3) READ(A12,B22) READWRITE(C12)), pfgemm(F, ta, tb, M2, n-N2, k-K2, alpha, A12, lda, B22, ldb, beta, C12, ldc, H3));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C_12));
+ TASK(MODE(CONSTREFERENCE(F,H4) READ(A11,B12) WRITE(C_12)), pfgemm(F, ta, tb, M2, n-N2, K2, a, A11, lda, B12, ldb, b, C_12, n-N2, H4));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C21));
+ TASK(MODE(CONSTREFERENCE(F,H5) READ(A22,B21) READWRITE(C21)), pfgemm(F, ta, tb, m-M2, N2, k-K2, alpha, A22, lda, B21, ldb, beta, C21, ldc, H5));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C_21));
+ TASK(MODE(CONSTREFERENCE(F,H6) READ(A21,B11) WRITE(C_21)), pfgemm(F, ta, tb, m-M2, N2, K2, a, A21, lda, B11, ldb, b,C_21, N2, H6));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C22));
+ TASK(MODE(CONSTREFERENCE(F,H7) READ(A21,B12) READWRITE(C22)), pfgemm(F, ta, tb, m-M2, n-N2, K2, alpha, A21, lda, B12, ldb, beta, C22, ldc, H7));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C_22));
+ TASK(MODE(CONSTREFERENCE(F,H8) READ(A22,B22) WRITE(C_22)), pfgemm(F, ta, tb, m-M2, n-N2, k-K2, a, A22, lda, B22, ldb, b,C_22, n-N2, H8));
+
+ CHECK_DEPENDENCIES;
+ // 2/ final add
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C11));
+ TASK(MODE(CONSTREFERENCE(F) READ(C_11) READWRITE(C11)), faddin(F, M2, N2, C_11, N2, C11, ldc));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C12));
+ TASK(MODE(CONSTREFERENCE(F) READ(C_12) READWRITE(C12)),faddin(F, M2, n-N2, C_12, n-N2, C12, ldc));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C21));
+ TASK(MODE(CONSTREFERENCE(F) READ(C_21) READWRITE(C21)), faddin(F, m-M2, N2, C_21, N2, C21, ldc));
+ //omp_set_task_affinity(omp_get_locality_domain_num_for( C22));
+ TASK(MODE(CONSTREFERENCE(F) READ(C_22) READWRITE(C22)), faddin(F, m-M2, n-N2, C_22, n-N2, C22, ldc));
+
+ );
+ FFLAS::fflas_delete (C_11);
+ FFLAS::fflas_delete (C_12);
+ FFLAS::fflas_delete (C_21);
+ FFLAS::fflas_delete (C_22);
+ }
+ return C;
+}
+
+ template<class Field, class AlgoT, class FieldTrait>
+ typename Field::Element*
+ pfgemm( const Field& F,
+ const FFLAS_TRANSPOSE ta,
+ const FFLAS_TRANSPOSE tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::ConstElement_ptr A, const size_t lda,
+ const typename Field::ConstElement_ptr B, const size_t ldb,
+ const typename Field::Element beta,
+ typename Field::Element_ptr C, const size_t ldc,
+ MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::ThreeDInPlace> > & H){
+
+
+ if (!m || !n) {return C;}
+ if (!k || F.isZero (alpha)){
+ fscalin(F, m, n, beta, C, ldc);
+ return C;
+ }
+
+ if(H.parseq.numthreads() <= 1|| std::min(m*n,std::min(m*k,k*n))<=__FFLASFFPACK_SEQPARTHRESHOLD*__FFLASFFPACK_SEQPARTHRESHOLD){ // threshold
+ FFLAS::MMHelper<Field, AlgoT, FieldTrait,FFLAS::ParSeqHelper::Sequential> WH (H);
+ return fgemm(F, ta, tb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc, WH);
+ }else{
+ size_t M2= m>>1;
+ size_t N2= n>>1;
+ size_t K2= k>>1;
+ typename Field::ConstElement_ptr A11= A;
+ typename Field::ConstElement_ptr A12= A+K2*((ta==FFLAS::FflasTrans)?lda:1);
+ typename Field::ConstElement_ptr A21= A+M2*((ta==FFLAS::FflasTrans)?1:lda);
+ typename Field::ConstElement_ptr A22= A12+M2*((ta==FFLAS::FflasTrans)?1:lda);
+
+ typename Field::ConstElement_ptr B11= B;
+ typename Field::ConstElement_ptr B12= B+N2*((tb==FFLAS::FflasTrans)?ldb:1);
+ typename Field::ConstElement_ptr B21= B+K2*((tb==FFLAS::FflasTrans)?1:ldb);
+ typename Field::ConstElement_ptr B22= B12+K2*((tb==FFLAS::FflasTrans)?1:ldb);
+
+
+ typename Field::Element_ptr C11= C;
+ typename Field::Element_ptr C12= C+N2;
+ typename Field::Element_ptr C21= C+M2*ldc;
+ typename Field::Element_ptr C22= C+N2+M2*ldc;
+ typedef MMHelper<Field, AlgoT, FieldTrait, ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::ThreeDInPlace> > MMH_t;
+ MMH_t H1(H);
+ MMH_t H2(H);
+ MMH_t H3(H);
+ MMH_t H4(H);
+ size_t nt = H.parseq.numthreads();
+ size_t nt_rec = nt/4;
+ size_t nt_mod = nt%4;
+ H1.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H2.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H3.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ H4.parseq.set_numthreads(std::max(size_t(1),nt_rec + ((nt_mod-- > 0)?1:0)));
+ SYNCH_GROUP(
+ // 1/ 4 multiply
+ TASK(MODE(CONSTREFERENCE(F,H1) READ(A11,B11) READWRITE(C11)), pfgemm(F, ta, tb, M2, N2, K2, alpha, A11, lda, B11, ldb, beta, C11, ldc, H1));
+ TASK(MODE(CONSTREFERENCE(F,H2) READ(A12,B22) READWRITE(C12)), pfgemm(F, ta, tb, M2, n-N2, k-K2, alpha, A12, lda, B22, ldb, beta, C12, ldc, H2));
+ TASK(MODE(CONSTREFERENCE(F,H3) READ(A22,B21) READWRITE(C21)), pfgemm(F, ta, tb, m-M2, N2, k-K2, alpha, A22, lda, B21, ldb, beta, C21, ldc, H3));
+ TASK(MODE(CONSTREFERENCE(F,H4) READ(A21,B12) READWRITE(C22)), pfgemm(F, ta, tb, m-M2, n-N2, K2, alpha, A21, lda, B12, ldb, beta, C22, ldc, H4));
+
+ CHECK_DEPENDENCIES;
+ // 2/ 4 add+multiply
+ TASK(MODE(CONSTREFERENCE(F,H1) READ(A12,B21) READWRITE(C11)), pfgemm(F, ta, tb, M2, N2, k-K2, alpha, A12, lda, B21, ldb, F.one, C11, ldc, H1));
+ TASK(MODE(CONSTREFERENCE(F,H2) READ(A11,B12) READWRITE(C12)), pfgemm(F, ta, tb, M2, n-N2, K2, alpha, A11, lda, B12, ldb, F.one, C12, ldc, H2));
+ TASK(MODE(CONSTREFERENCE(F,H3) READ(A21,B11) READWRITE(C21)), pfgemm(F, ta, tb, m-M2, N2, K2, alpha, A21, lda, B11, ldb, F.one, C21, ldc, H3));
+ TASK(MODE(CONSTREFERENCE(F,H4) READ(A22,B22) READWRITE(C22)), pfgemm(F, ta, tb, m-M2, n-N2, k-K2, alpha, A22, lda, B22, ldb, F.one, C22, ldc, H4));
+ );
+ }
+ return C;
+}
+
+
+
+} // FFLAS
diff --git a/fflas-ffpack/utils/Makefile.am b/fflas-ffpack/utils/Makefile.am
index c401de8..77e2788 100644
--- a/fflas-ffpack/utils/Makefile.am
+++ b/fflas-ffpack/utils/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -20,11 +20,24 @@
pkgincludesubdir=$(pkgincludedir)/utils
-pkgincludesub_HEADERS= \
- args-parser.h \
- print-utils.h \
- debug.h \
- Matio.h
-EXTRA_DIST=timer.h timer.C
+
+pkgincludesub_HEADERS= \
+ align-allocator.h \
+ args-parser.h \
+ debug.h \
+ fflas_memory.h \
+ fflas_randommatrix.h \
+ flimits.h \
+ Matio.h \
+ bit_manipulation.h \
+ print-utils.h \
+ timer.h \
+ cast.h \
+ fflas_intrinsic.h
+
+### is this really extra dist ?
+### this is wrong in a header only lib
+# EXTRA_DIST=timer.h
+
diff --git a/fflas-ffpack/utils/Makefile.in b/fflas-ffpack/utils/Makefile.in
deleted file mode 100644
index a6bb6dc..0000000
--- a/fflas-ffpack/utils/Makefile.in
+++ /dev/null
@@ -1,549 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = utils
-DIST_COMMON = $(pkgincludesub_HEADERS) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__uninstall_files_from_dir = { \
- test -z "$$files" \
- || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
- || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
- $(am__cd) "$$dir" && rm -f $$files; }; \
- }
-am__installdirs = "$(DESTDIR)$(pkgincludesubdir)"
-HEADERS = $(pkgincludesub_HEADERS)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-pkgincludesubdir = $(pkgincludedir)/utils
-pkgincludesub_HEADERS = \
- args-parser.h \
- print-utils.h \
- debug.h \
- Matio.h
-
-EXTRA_DIST = timer.h timer.C
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps utils/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps utils/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-install-pkgincludesubHEADERS: $(pkgincludesub_HEADERS)
- @$(NORMAL_INSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(pkgincludesubdir)" || exit 1; \
- fi; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; \
- done | $(am__base_list) | \
- while read files; do \
- echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludesubdir)" || exit $$?; \
- done
-
-uninstall-pkgincludesubHEADERS:
- @$(NORMAL_UNINSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- dir='$(DESTDIR)$(pkgincludesubdir)'; $(am__uninstall_files_from_dir)
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(HEADERS)
-installdirs:
- for dir in "$(DESTDIR)$(pkgincludesubdir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am: install-pkgincludesubHEADERS
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-pkgincludesubHEADERS
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
- clean-libtool ctags distclean distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-pkgincludesubHEADERS install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags uninstall uninstall-am uninstall-pkgincludesubHEADERS
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/fflas-ffpack/utils/Matio.h b/fflas-ffpack/utils/Matio.h
index ffd7a48..6cc5b0c 100644
--- a/fflas-ffpack/utils/Matio.h
+++ b/fflas-ffpack/utils/Matio.h
@@ -27,255 +27,90 @@
#include <cstring>
#include <stdio.h>
#include <stdlib.h>
-#include "fflas-ffpack/fflas/fflas.h"
+//#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas_memory.h"
-// Reading and writing matrices over double
-
-#if 0
-// Reading a matrice from a (eventually zipped) file
-double * read_dbl(char * mat_file,int* tni,int* tnj)
-{
- char *UT, *File_Name;
- int is_gzipped = 0;
- size_t s = strlen(mat_file);
- double* X;
- if ((mat_file[--s] == 'z') &&
- (mat_file[--s] == 'g') &&
- (mat_file[--s] == '.')) {
- is_gzipped = 1;
- File_Name = "/tmp/bbXXXXXX_";
- mkstemp(File_Name);
- UT = new char[s+34+strlen(File_Name)];
- sprintf(UT,"gunzip -c %s > %s", mat_file, File_Name);
- system(UT);
- sprintf(UT,"\\rm %s", File_Name);
- } else
- File_Name = mat_file;
-
- FILE* FileDes = fopen(File_Name, "r");
- if (FileDes != NULL) {
- char * tmp = new char[200];// usigned long tni, tnj;
- fscanf(FileDes,"%d %d %s\n",tni, tnj, &tmp) ;
- int n=*tni;
- int p=*tnj;
- X = new double[n*p];
- for (int i=0;i<n*p;++i)
- X[i] = (double) 0;
- long i,j; long val;
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- while(i && j) {
- X[p*(i-1)+j-1] = (double) val;
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- }
-
- fclose(FileDes);
- if (is_gzipped) system(UT);
- return X;
-}
-
-// Displays a matrix
-std::ostream& write_dbl(std::ostream& c,
- double* E,
- int n, int m, int id)
-{
-
- for (int i = 0; i<n;++i){
- for (int j=0; j<m;++j)
- c << *(E+j+id*i) << " ";
- c << std::endl;
- }
- return c << std::endl;
-}
-#endif
// Reading and writing matrices over field
// Reading a matrice from a (eventually zipped) file
template<class Field>
-typename Field::Element * read_field(const Field& F,char * mat_file,int* tni,int* tnj)
+typename Field::Element_ptr read_field(const Field& F, const char * mat_file,int* tni,int* tnj)
{
- char *UT, *File_Name;
+ char *UT = NULL;
+ const char* File_Name;
int is_gzipped = 0;
size_t s = strlen(mat_file);
- typename Field::Element zero;
- F.init(zero,0UL);
- typename Field::Element * X = NULL;
+ typename Field::Element_ptr X = NULL;
if ((mat_file[--s] == 'z') &&
(mat_file[--s] == 'g') &&
(mat_file[--s] == '.')) {
is_gzipped = 1;
char tmp_nam[] = "/tmp/bbXXXXXX_";
+ if (mkstemp(tmp_nam))
+ printf("Error opening file]\n");
File_Name = tmp_nam;
- mkstemp(File_Name);
- UT = new char[s+34+strlen(File_Name)];
+
+ UT = FFLAS::fflas_new<char>(s+34+strlen(File_Name));
sprintf(UT,"gunzip -c %s > %s", mat_file, File_Name);
- system(UT);
+ if (system(UT))
+ printf("Error uncompressing file\n");
sprintf(UT,"\\rm %s", File_Name);
- } else
+ } else {
File_Name = mat_file;
+ }
+
FILE* FileDes = fopen(File_Name, "r");
if (FileDes != NULL) {
- char tmp [200];// usigned long tni, tnj;
- fscanf(FileDes,"%d %d %s\n",tni, tnj, tmp) ;
+ char tmp [200];// unsigned long tni, tnj;
+ if (fscanf(FileDes,"%d %d %199s\n",tni, tnj, tmp)<0)
+ printf("Error Reading first line of file \n");
int n=*tni;
int p=*tnj;
- X = new typename Field::Element[n*p];
+ X = FFLAS::fflas_new<typename Field::Element>(n*p);
for (int i=0;i<n*p;++i)
- X[i] = zero;
+ F.assign(X[i], F.zero);
long i,j; long val;
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
+ if(fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val)<0)
+ printf("Read Error\n");
while(i && j) {
F.init(X[p*(i-1)+j-1],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
+ if(fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val)<0)
+ printf("Read Error\n");
}
+ fclose(FileDes);
}
- fclose(FileDes);
- if (is_gzipped) system(UT);
+ if (is_gzipped)
+ if (system(UT))
+ printf("Error uncompressing file\n");
+ if (UT != NULL)
+ FFLAS::fflas_delete( UT);
return X;
}
-template<class Field>
-void read_field4(const Field& F,char * mat_file,int* tni,int* tnj,
- typename Field::Element *& NW,typename Field::Element *& NE,
- typename Field::Element *& SW,typename Field::Element *& SE)
-{
- char *UT, *File_Name;
- int is_gzipped = 0;
- size_t s = strlen(mat_file);
- typename Field::Element zero;
- F.init(zero,0);
- typename Field::Element * X;
- if ((mat_file[--s] == 'z') &&
- (mat_file[--s] == 'g') &&
- (mat_file[--s] == '.')) {
- is_gzipped = 1;
- // XXX on fait pas ça !
- File_Name = "/tmp/bbXXXXXX_";
- mkstemp(File_Name);
- UT = new char[s+34+strlen(File_Name)];
- sprintf(UT,"gunzip -c %s > %s", mat_file, File_Name);
- system(UT);
- sprintf(UT,"\\rm %s", File_Name);
- } else
- File_Name = mat_file;
- FILE* FileDes = fopen(File_Name, "r");
- if (FileDes != NULL) {
- char * tmp = new char[200];// usigned long tni, tnj;
- fscanf(FileDes,"%d %d %s\n",tni, tnj, &tmp) ;
- int n=*tni;
- int p=*tnj;
- int no2= n>>1;
- int po2 = p>>1;
- NW = new typename Field::Element[no2*po2];
- NE = new typename Field::Element[no2*(p-po2)];
- SW = new typename Field::Element[(n-no2)*po2];
- SE = new typename Field::Element[(n-no2)*(p-po2)];
-
- for (int i=0;i<no2*po2;++i)
- NW[i] = zero;
- for (int i=0;i<no2*(p-po2);++i)
- NE[i] = zero;
- for (int i=0;i<(n-no2)*po2;++i)
- SW[i] = zero;
- for (int i=0;i<(n-no2)*(p-po2);++i)
- SE[i] = zero;
- long i,j; long val;
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- while(i && j) {
- if (i<=no2){
- if (j<=po2){
- F.init(NW[po2*(i-1)+j-1],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- else{
- F.init(NE[po2*(i-1)+j-1-po2],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- }
- else{
- if (j<=po2){
- F.init(SW[(p-po2)*(i-1-no2)+j-1],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- else{
- F.init(SE[(p-po2)*(i-1-no2)+j-1-po2],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- }
- }
- // *A1 = NW;
- //*A2 = NE;
- //*A3 = SW;
- //*A4 = SE;
-
- }
-
- fclose(FileDes);
- if (is_gzipped) system(UT);
-}
-
// Displays a matrix
template<class Field>
std::ostream& write_field(const Field& F,std::ostream& c,
- const typename Field::Element* E,
- int n, int m, int id, bool mapleFormat = false)
+ typename Field::ConstElement_ptr E,
+ int n, int m, int id, bool mapleFormat = false, bool column_major=false)
{
- double tmp;
- if (mapleFormat) c << "Matrix(" << n <<',' << m << ", [" ;
+ //typename Field::Element tmp;
+ // double tmp;
+// Givaro::Integer tmp;
+ typename Field::Element tmp;
+ F.init(tmp);
+ if (mapleFormat) c << "Matrix(" << n <<',' << m << ",\n[" ;
for (int i = 0; i<n;++i){
if (mapleFormat) c << '[';
for (int j=0; j<m;++j){
- F.convert(tmp,*(E+j+id*i));
- c << tmp;
- if (mapleFormat && j<m-1) c << ',';
- c << ' ';
- }
- if (mapleFormat) c << ']';
- if (mapleFormat && i<n-1) c << ',';
- if (!mapleFormat) c << std::endl;
- }
- if (mapleFormat) c << "])";
- return c ;
-}
-
-// Displays a triangular matrix
-//! @todo let the user choose to convert to a non destructive format (not double but long or Integer...)
-template<class Field>
-std::ostream& write_field(const Field& F,std::ostream& c,
- const FFLAS::FFLAS_UPLO uplo, const FFLAS::FFLAS_DIAG unit,
- const typename Field::Element* E,
- int n, int m, int id, bool mapleFormat = false)
-{
-
- double tmp;
- if (mapleFormat) c << "Matrix(" << n <<',' << m << ",[";
- for (int i = 0; i<n;++i){
- if (mapleFormat) c << '[';
- // under diag
- for (int j=0; j<i ;++j){
- if (uplo == FFLAS::FflasLower)
- F.convert(tmp,*(E+j+id*i));
- else tmp = 0 ;
- c << tmp;
- if (mapleFormat && j<m-1) c << ',';
- c << ' ';
- }
- // on diag
- if (unit == FFLAS::FflasNonUnit)
- F.convert(tmp,*(E+i+id*i));
- else
- tmp = 1.;
- c << tmp;
- if (mapleFormat && i<m-1) c << ',';
- c << ' ';
- // over diag
- for (int j=i+1; j<m;++j){
- if (uplo == FFLAS::FflasUpper)
- F.convert(tmp,*(E+j+id*i));
+ if (column_major)
+ //F.convert(tmp,*(E+i+id*j));
+ tmp = *(E+i+id*j);
+
else
- tmp = 0 ;
+// F.convert(tmp,*(E+j+id*i));
+ tmp =*(E+j+id*i);
c << tmp;
if (mapleFormat && j<m-1) c << ',';
c << ' ';
@@ -284,8 +119,15 @@ std::ostream& write_field(const Field& F,std::ostream& c,
if (mapleFormat && i<n-1) c << ',';
if (!mapleFormat) c << std::endl;
}
- if (mapleFormat) c << "])";
+ if (mapleFormat) c << "]);";
return c ;
}
+inline std::ostream& write_perm (std::ostream& c, const size_t* P, size_t N){
+ c<<"[ ";
+ for (size_t i=0; i<N; ++i)
+ c<<P[i]<<" ";
+ c<<"]"<<std::endl;
+ return c;
+}
#endif //__FFLASFFPACK_matio_H
diff --git a/fflas-ffpack/utils/align-allocator.h b/fflas-ffpack/utils/align-allocator.h
new file mode 100644
index 0000000..08c6464
--- /dev/null
+++ b/fflas-ffpack/utils/align-allocator.h
@@ -0,0 +1,255 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by <bastien.vialla at lirmm.fr>
+ *
+ * STL align allocator inspired by MAlloc from Stephan T. Lavavej, Visual C++ Libraries Developer
+ * (http://blogs.msdn.com/b/vcblog/archive/2008/08/28/the-mallocator.aspx)
+ * Update to c++11
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_align_allocator_H
+#define __FFLASFFPACK_align_allocator_H
+
+#include "fflas-ffpack/config.h"
+
+#ifdef __FFLASFFPACK_HAVE_CXX11
+
+#include <memory>
+#include <utility>
+#include <assert.h>
+#include <cstddef>
+#include <iostream>
+
+#include "fflas-ffpack/utils/fflas_intrinsic.h"
+//#include <immintrin.h>
+// Alignment Type
+enum class Alignment : size_t {
+ NONE = 0,
+ Normal = sizeof(void*),
+ SSE = 16,
+ AVX = 32,
+ XEON_PHI = 64,
+ CACHE_LINE = 64,
+ CACHE_PAGESIZE = 4096,
+ DEFAULT =
+#ifdef __FFLASFFPACK_USE_AVX
+ 32
+#else
+ 16
+#endif
+};
+
+/*
+ * Allocate T[size] with address aligned to alignement
+ * ex : int* tab = malloc_align<int>(100, Alignment::AVX)
+ */
+template<class T>
+T* malloc_align(size_t size, Alignment alignment = Alignment::DEFAULT) noexcept
+{
+ void* p = nullptr;
+ int err = 0;
+ err = posix_memalign(&p, (size_t) alignment, size*sizeof(T));
+ if(err)
+ std::cout << "posix_memalign error" << std::endl;
+ //return new(p) T[size];
+ return static_cast<T*>(p);
+}
+
+namespace detail
+{
+inline void* allocate(size_t align, size_t size)
+{
+ assert(align >= sizeof(void*));
+
+ if (size == 0) {
+ return nullptr;
+ }
+
+ void* ptr = nullptr;
+ int rc = posix_memalign(&ptr, align, size);
+
+ if (rc != 0) {
+ return nullptr;
+ }
+
+ return ptr;
+}
+
+inline void deallocate(void* ptr) noexcept
+{
+ return free(ptr);
+}
+}
+
+
+/* STL Aligned Allocator
+ * ex : std::vector<T, AlignedAllocator<T, Alignment::AVX>>;
+ *
+ * template<class T> using vector = std::vector<T, AlignedAllocator<T, Alignment::AVX>>;
+ */
+
+template <class T, Alignment Align = Alignment::SSE> class AlignedAllocator;
+
+template <class T, Alignment Align> class AlignedAllocator {
+public:
+ using value_type = T;
+ using pointer = T*;
+ using const_pointer = const T*;
+ using reference = T&;
+ using const_reference = const T&;
+ using size_type = std::size_t;
+ using difference_type = ptrdiff_t;
+
+ using propagate_on_container_move_assignment = std::true_type;
+
+ template <class U> struct rebind {
+ using other = AlignedAllocator<U, Align>;
+ };
+
+public:
+ AlignedAllocator() noexcept {}
+
+ template <class U>
+ AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept {}
+
+ size_type max_size() const noexcept {
+ return (size_type(~0) - size_type(Align)) / sizeof(T);
+ }
+
+ pointer address(reference x) const noexcept { return std::addressof(x); }
+
+ const_pointer address(const_reference x) const noexcept {
+ return std::addressof(x);
+ }
+
+ pointer allocate(size_type n,
+ typename AlignedAllocator<void, Align>::const_pointer = 0) {
+ const size_type alignment = static_cast<size_type>(Align);
+ void* ptr = detail::allocate(alignment, n * sizeof(T));
+ if (ptr == nullptr) {
+ throw std::bad_alloc();
+ }
+
+ return reinterpret_cast<pointer>(ptr);
+ }
+
+ void deallocate(pointer p, size_type) noexcept {
+ return detail::deallocate(p);
+ }
+
+ template <class U, class... Args> void construct(U* p, Args&&... args) {
+ ::new (reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...);
+ }
+
+ void destroy(pointer p) { p->~T(); }
+};
+
+/*
+ * Specialization for void*
+ */
+template <Alignment Align> class AlignedAllocator<void, Align> {
+public:
+ using pointer = void*;
+ using const_pointer = const void*;
+ using value_type = void;
+
+ template <class U> struct rebind {
+ using other = AlignedAllocator<U, Align>;
+ };
+};
+
+/*
+ * Specialization for const T
+ */
+template <class T, Alignment Align> class AlignedAllocator<const T, Align> {
+public:
+ using value_type = T;
+ using pointer = const T*;
+ using const_pointer = const T*;
+ using reference = const T&;
+ using const_reference = const T&;
+ using size_type = std::size_t;
+ using difference_type = ptrdiff_t;
+
+ using propagate_on_container_move_assignment = std::true_type;
+
+ template <class U> struct rebind {
+ using other = AlignedAllocator<U, Align>;
+ };
+
+public:
+ AlignedAllocator() noexcept {}
+
+ template <class U>
+ AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept {}
+
+ size_type max_size() const noexcept {
+ return (size_type(~0) - size_type(Align)) / sizeof(T);
+ }
+
+ const_pointer address(const_reference x) const noexcept {
+ return std::addressof(x);
+ }
+
+ pointer allocate(size_type n,
+ typename AlignedAllocator<void, Align>::const_pointer = 0) {
+ const size_type alignment = static_cast<size_type>(Align);
+ void* ptr = detail::allocate(alignment, n * sizeof(T));
+ if (ptr == nullptr) {
+ throw std::bad_alloc();
+ }
+
+ return reinterpret_cast<pointer>(ptr);
+ }
+
+ void deallocate(pointer p, size_type) noexcept {
+ return detail::deallocate(p);
+ }
+
+ template <class U, class... Args> void construct(U* p, Args&&... args) {
+ ::new (reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...);
+ }
+
+ void destroy(pointer p) { p->~T(); }
+};
+
+template <class T, Alignment TAlign, class U, Alignment UAlign>
+inline bool operator==(const AlignedAllocator<T, TAlign>&,
+ const AlignedAllocator<U, UAlign>&) noexcept {
+ return TAlign == UAlign;
+}
+
+template <class T, Alignment TAlign, class U, Alignment UAlign>
+inline bool operator!=(const AlignedAllocator<T, TAlign>&,
+ const AlignedAllocator<U, UAlign>&) noexcept {
+ return TAlign != UAlign;
+}
+
+#else // C++11
+#error "You need a c++11 compiler."
+#endif // C++11
+
+#endif /* _FFLASFFPACK_align_allocator_h */
diff --git a/fflas-ffpack/utils/args-parser.h b/fflas-ffpack/utils/args-parser.h
index 059fa4f..3d58061 100644
--- a/fflas-ffpack/utils/args-parser.h
+++ b/fflas-ffpack/utils/args-parser.h
@@ -34,6 +34,8 @@
#ifndef __FFLASFFPACK_args_parser_H
#define __FFLASFFPACK_args_parser_H
+#include <fflas-ffpack/fflas-ffpack-config.h>
+#include <givaro/givinteger.h>
#include <iostream>
#include <fstream>
#include <vector>
@@ -44,13 +46,19 @@
#include "fflas-ffpack/utils/print-utils.h"
enum ArgumentType {
- TYPE_NONE, TYPE_INT, TYPE_INTEGER, TYPE_DOUBLE, TYPE_INTLIST, TYPE_STR
+ TYPE_NONE, TYPE_INT, TYPE_LONGLONG, TYPE_INTEGER, TYPE_DOUBLE, TYPE_INTLIST, TYPE_STR
};
#define TYPE_BOOL TYPE_NONE
#define END_OF_ARGUMENTS \
{ '\0', "\0", "\0", TYPE_NONE, NULL }
+#ifdef _GIVARO_CONFIG_H
+#define type_integer Givaro::Integer
+#else
+#define type_integer long int
+#endif
+
struct Argument
{
char c;
@@ -66,10 +74,6 @@ namespace FFLAS {
void parseArguments (int argc, char **argv, Argument *args, bool printDefaults = true);
}
-
-/** writes the values of all arguments, preceded by the programName */
-std::ostream& writeCommandString (std::ostream& os, Argument *args, char* programName);
-
void printHelpMessage (const char *program, Argument *args, bool printDefaults = false)
{
int i, l;
@@ -111,8 +115,11 @@ void printHelpMessage (const char *program, Argument *args, bool printDefaults =
case TYPE_INT:
std::cout << *(int *) args[i].data;
break;
+ case TYPE_LONGLONG:
+ std::cout << *(long long *) args[i].data;
+ break;
case TYPE_INTEGER:
- std::cout << *(long int *) args[i].data;
+ std::cout << *(type_integer *) args[i].data;
break;
case TYPE_DOUBLE:
std::cout << *(double *) args[i].data;
@@ -121,7 +128,7 @@ void printHelpMessage (const char *program, Argument *args, bool printDefaults =
std::cout << *(std::list<int> *) args[i].data ;
break;
case TYPE_STR:
- std::cout << *(std::string *) args[i].data ;
+ std::cout << "\"" << *(std::string *) args[i].data << "\"" ;
break;
}
std::cout << ")";
@@ -130,15 +137,15 @@ void printHelpMessage (const char *program, Argument *args, bool printDefaults =
}
std::cout << " -h or -? Display this message" << std::endl;
- if (messageboolean)
+ if (messageboolean)
std::cout << "For boolean switches, the argument may be omitted, meaning the switch should be ON" << std::endl;
std::cout << std::endl;
std::cout << "If <report file> is '-' the report is written to std output. If <report file> is" << std::endl;
std::cout << "not given, then no detailed reporting is done. This is suitable if you wish only" << std::endl;
std::cout << "to determine whether the tests succeeded." << std::endl;
std::cout << std::endl;
- if (messageprimality)
- std::cout << "[1] N.B. This program does not verify the primality of Q, and does not use a" << std::endl
+ if (messageprimality)
+ std::cout << "[1] N.B. This program does not verify the primality of Q, and does not use a" << std::endl
<< " field extension in the event that Q=p^n, n > 1" << std::endl;
std::cout << std::endl;
}
@@ -227,7 +234,7 @@ namespace FFLAS {
std::cout << "Writing report data to cout (intermingled with brief report)" << std::endl << std::endl;
std::cout.flush ();
}
- else if (argv[i][1] == 'h' || argv[i][1] == '?') {
+ else if (argv[i][1] == 'h' || argv[i][1] == '?' || argv[i][1] == '-') {
printHelpMessage (argv[0], args, printDefaults);
exit (1);
}
@@ -237,7 +244,7 @@ namespace FFLAS {
{
if (argc == i+1 || (argv[i+1][0] == '-' && argv[i+1][1] != '\0')) {
// if at last argument, or next argument is a switch, set to true
- *(bool *) current->data = true;
+ *((bool *) current->data) = true;
break;
}
*(bool *) current->data =
@@ -257,11 +264,22 @@ namespace FFLAS {
}
break;
- case TYPE_INTEGER:
+ case TYPE_LONGLONG:
{
- long int tmp = atoi(argv[i+1]);
- *(long int *) current->data = tmp;
+ *(long long *) current->data = atoi (argv[i+1]);
+ ++i;
}
+ break;
+
+ case TYPE_INTEGER:
+ {
+#ifdef _GIVARO_CONFIG_H
+ type_integer tmp(argv[i+1]);
+#else
+ type_integer tmp = atol(argv[i+1]);
+#endif
+ *(type_integer *) current->data = tmp;
+ }
++i;
break;
@@ -300,37 +318,45 @@ namespace FFLAS {
}
}
}
-}
-
-std::ostream& writeCommandString (std::ostream& os, Argument *args, char* programName)
-{
- os << programName;
- for (int i = 0; args[i].c != '\0'; ++i) {
- os << " -" << args[i].c;
- switch (args[i].type) {
- case TYPE_NONE:
- if (! (*(bool *)args[i].data)) os << " N";
- break;
- case TYPE_INT:
- os << ' ' << *(int *) args[i].data;
- break;
- case TYPE_INTEGER:
- os << ' ' << *(long int *) args[i].data;
- break;
- case TYPE_DOUBLE:
- os << ' ' << *(double *) args[i].data;
- break;
- case TYPE_INTLIST:
- os << ' ' << *(std::list<int> *) args[i].data;
- break;
- case TYPE_STR:
- os << ' ' << *(std::string *) args[i].data;
- break;
+ /** writes the values of all arguments, preceded by the programName */
+ std::ostream& writeCommandString (std::ostream& os, Argument *args, char* programName = nullptr)
+ {
+ if (programName != nullptr)
+ os << programName;
+
+ for (int i = 0; args[i].c != '\0'; ++i) {
+ os << " -" << args[i].c;
+ switch (args[i].type) {
+ case TYPE_NONE:
+ if ((*(bool *)args[i].data)) os << " Y";
+ else os << " N";
+ break;
+ case TYPE_INT:
+ os << ' ' << *(int *) args[i].data;
+ break;
+ case TYPE_LONGLONG:
+ os << ' ' << *(long long *) args[i].data;
+ break;
+ case TYPE_INTEGER:
+ os << ' ' << *(Givaro::Integer *) args[i].data;
+ break;
+ case TYPE_DOUBLE:
+ os << ' ' << *(double *) args[i].data;
+ break;
+ case TYPE_INTLIST:
+ os << ' ' << *(std::list<int> *) args[i].data;
+ break;
+ case TYPE_STR:
+ os << " \"" << *(std::string *) args[i].data << "\"";
+ break;
+ }
}
+
+ return os;
}
- return os << std::endl;
}
+#undef type_integer
#endif // __FFLASFFPACK_args_parser_H
diff --git a/fflas-ffpack/utils/bit_manipulation.h b/fflas-ffpack/utils/bit_manipulation.h
new file mode 100644
index 0000000..dcb7c24
--- /dev/null
+++ b/fflas-ffpack/utils/bit_manipulation.h
@@ -0,0 +1,173 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK group
+ *
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * Part of this code is taken from http://libdivide.com/
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+#ifndef __FFLASFFPACK_utils_bit_manipulation_H
+#define __FFLASFFPACK_utils_bit_manipulation_H
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
+#endif
+
+#include <givaro/udl.h>
+
+// count leading zeros
+inline int32_t clz(uint64_t val) {
+#if __GNUC__ || __has_builtin(__builtin_clzll)
+ return __builtin_clzll(val);
+#else
+ if (! val) return 64 ;
+ int32_t result = 0;
+ while (! (val & (1_ui64 << 63))) {
+ val <<= 1;
+ result++;
+ }
+ return result;
+#endif
+}
+
+inline int32_t clz(uint32_t val) {
+#if __GNUC__ || __has_builtin(__builtin_clzll)
+ return __builtin_clz(val);
+#else
+ if (! val) return 32 ;
+ int32_t result = 0;
+ while (! (val & (1 << 31))) {
+ val <<= 1;
+ result++;
+ }
+ return result;
+#endif
+}
+
+// count trailing zeros
+inline int32_t ctz(uint32_t val) {
+#if __GNUC__ || __has_builtin(__builtin_ctz)
+ return __builtin_ctz(val);
+#else
+ if (!val) return 32;
+ int32_t result = 0;
+ val = (val ^ (val - 1)) >> 1; // Set v's trailing 0s to 1s and zero rest
+ while (val) {
+ val >>= 1;
+ result++;
+ }
+ return result;
+#endif
+}
+
+// count trailing zeros
+inline int32_t ctz(uint64_t val) {
+#if __GNUC__ || __has_builtin(__builtin_ctzll)
+ return __builtin_ctzll(val);
+#else
+ if (!val) return 64;
+ uint32_t lo = val & 0xFFFFFFFF;
+ if (lo != 0) return ctz(lo);
+ return 32 + ctz(val >> 32);
+#endif
+}
+
+
+
+#ifdef __x86_64__
+// division 128bits by 64 bits
+// int128_t(u1,u0) = u1*2^64+u0, div v, rem r
+// return quo
+static uint64_t divide_128(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
+{
+ // u0 -> rax
+ // u1 -> rdx
+ // divq
+ uint64_t result;
+ __asm__("divq %[v]"
+ : "=a"(result), "=d"(*r)
+ : [v] "r"(v), "a"(u0), "d"(u1)
+ );
+ return result;
+}
+#endif
+
+static uint64_t getpoweroftwoden_128(uint32_t d, uint64_t q, uint64_t *r) {
+#ifdef __x86_64__
+ return divide_128(1_ui64 << (d - 1), 0, q, r);
+#else
+ lldiv_t ta;
+ ta = lldiv(1ULL<<63,q);
+ lldiv_t br;
+ br = lldiv(ta.rem<<d,q);
+ *r = br.rem;
+ return (ta.quot<<d)+br.quot;
+#endif
+}
+
+
+
+static inline uint32_t mullhi_u32(uint32_t x, uint32_t y) {
+ uint64_t xl = x, yl = y;
+ uint64_t rl = xl * yl;
+ return (uint32_t)(rl >> 32);
+}
+
+static inline int64_t mulhi_64(int64_t x, int64_t y) {
+#ifdef __x86_64__
+ int128_t xl = x, yl = y;
+ int128_t rl = xl * yl;
+ return (int64_t)(rl >> 64);
+#else
+ const uint32_t mask = 0xFFFFFFFF;
+ const uint32_t x0 = (uint32_t)(x & mask), y0 = (uint32_t)(y & mask);
+ const int32_t x1 = (int32_t)(x >> 32), y1 = (int32_t)(y >> 32);
+ const uint32_t x0y0_hi = mullhi_u32(x0, y0);
+ const int64_t t = x1*(int64_t)y0 + x0y0_hi;
+ const int64_t w1 = x0*(int64_t)y1 + (t & mask);
+ return x1*(int64_t)y1 + (t >> 32) + (w1 >> 32);
+#endif
+}
+
+static inline int64_t mulhi_fast_64(int64_t x, int64_t y) {
+#if 0 // todo check this type
+ int128_t xl = x, yl = y;
+ int128_t rl = xl * yl;
+ return (int64_t)(rl >> 64);
+#else
+ const uint32_t mask = 0xFFFFFFFF;
+ const uint32_t x0 = (uint32_t)(x & mask), y0 = (uint32_t)(y & mask);
+ const int32_t x1 = (int32_t)(x >> 32), y1 = (int32_t)(y >> 32);
+ // const uint32_t x0y0_hi = libdivide__mullhi_u32(x0, y0);
+ const int64_t t = x1*(int64_t)y0 ; // + x0y0_hi;
+ const int64_t w1 = x0*(int64_t)y1 ; // + (t & mask);
+ return x1*(int64_t)y1 + (t >> 32) + (w1 >> 32);
+#endif
+}
+
+
+
+#endif // __FFLASFFPACK_utils_bit_manipulation_H
diff --git a/fflas-ffpack/fflas-ffpack.h b/fflas-ffpack/utils/cast.h
similarity index 61%
copy from fflas-ffpack/fflas-ffpack.h
copy to fflas-ffpack/utils/cast.h
index ceeb9c0..7da53f1 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/fflas-ffpack/utils/cast.h
@@ -1,7 +1,9 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+/*
+ * This file is part of FFLAS-FFPACK
+ * Copyright (C) 2011 Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * ------------------------------------
+ *
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -20,20 +22,21 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* ========LICENCE========
- *
+ *.
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
- */
+#ifndef __FFLASFFPACK_const_H
+#define __FFLASFFPACK_const_H
+
+namespace FFPACK {
+ template<class T, class CT = const T>
+ T fflas_const_cast (CT x)
+ {
+ return const_cast<T>(x);
+ }
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
+}
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
+#endif
-#endif // __FFLASFFPACK_fflas_ffpack_H
diff --git a/fflas-ffpack/utils/debug.h b/fflas-ffpack/utils/debug.h
index aacea0e..9863775 100644
--- a/fflas-ffpack/utils/debug.h
+++ b/fflas-ffpack/utils/debug.h
@@ -32,10 +32,13 @@
#ifndef __FFLASFFPACK_util_debug_H
#define __FFLASFFPACK_util_debug_H
+#include <fflas-ffpack/fflas-ffpack-config.h>
+#include <iostream>
#include <sstream>
+#include <cmath>
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
+#include "fflas-ffpack/fflas-ffpack-config.h"
#ifdef __FFLASFFPACK_HAVE_STDINT_H
@@ -43,52 +46,38 @@
#define __STDC_LIMIT_MACROS
#endif
#include <stdint.h>
+#include <limits>
+// If somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)
#ifndef INT64_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
#define INT64_MAX std::numeric_limits<int64_t>::max()
#endif
#ifndef UINT64_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
#define UINT64_MAX std::numeric_limits<uint64_t>::max()
#endif
#ifndef INT32_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
#define INT32_MAX std::numeric_limits<int32_t>::max()
#endif
#ifndef UINT32_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
#define UINT32_MAX std::numeric_limits<uint32_t>::max()
#endif
#ifndef INT16_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
#define INT16_MAX std::numeric_limits<int16_t>::max()
#endif
#ifndef UINT16_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
#define UINT16_MAX std::numeric_limits<uint16_t>::max()
#endif
#ifndef INT8_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
#define INT8_MAX std::numeric_limits<int8_t>::max()
#endif
#ifndef UINT8_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
#define UINT8_MAX std::numeric_limits<uint8_t>::max()
#endif
@@ -96,41 +85,51 @@
#error "you need intXX_t types"
#endif
-#ifndef DEBUG
-#define FFLASFFPACK_check(check) ((void) 0)
-#else
+#ifndef NDEBUG
+#include <stdexcept>
#define FFLASFFPACK_check(check) \
if (!(check)) {\
-throw FFPACK::Failure (__func__, __FILE__, __LINE__, #check); /*BB : should work on non gnu compilers too */ \
+FFPACK::failure()(__func__, __FILE__, __LINE__, #check); \
+throw std::runtime_error(#check); \
+}
+#define FFLASFFPACK_abort(msg) \
+{\
+FFPACK::failure()(__func__, __FILE__, __LINE__, msg); \
+throw std::runtime_error(msg); \
}
+#else
+#define FFLASFFPACK_check(check) ((void) 0)
+#define FFLASFFPACK_abort(mst) ((void) 0)
#endif
namespace FFPACK {
-
/*! A precondtion failed.
* @ingroup util
* The \c throw mechanism is usually used here as in
\code
if (!check)
- throw(Failure(__func__,__LINE__,"this check just failed");
+ failure()(__func__,__LINE__,"this check just failed");
\endcode
* The parameters of the constructor help debugging.
*/
class Failure {
protected:
- static std::ostream *_errorStream;
+ std::ostream *_errorStream;
public:
+
+ Failure() {}
+
/*! @internal
* A precondtion failed.
* @param function usually \c __func__, the function that threw the error
* @param line usually \c __LINE__, the line where it happened
* @param check a string telling what failed.
*/
- Failure (const char *function, int line, const char *check)
+ void operator() (const char *function, int line, const char *check)
{
if (_errorStream == (std::ostream *) 0)
_errorStream = &std::cerr;
@@ -149,7 +148,7 @@ namespace FFPACK {
* @param line usually \c __LINE__, the line where it happened
* @param check a string telling what failed.
*/
- Failure (const char* function, const char *file, int line, const char *check)
+ void operator() (const char* function, const char *file, int line, const char *check)
{
if (_errorStream == (std::ostream *) 0)
_errorStream = &std::cerr;
@@ -159,7 +158,7 @@ namespace FFPACK {
(*_errorStream) << "Precondition not met:" << check << std::endl;
}
- static void setErrorStream (std::ostream &stream);
+ void setErrorStream (std::ostream &stream);
/*! @internal overload the virtual print of LinboxError.
* @param o output stream
@@ -173,48 +172,25 @@ namespace FFPACK {
}
};
-#if 0
- /*! @internal A function is "not implemented yet(tm)".
- * where, why ?
- */
- class NotImplementedYet {
- protected:
- static std::ostream *_errorStream;
+
+ inline Failure& failure() {
+ static Failure failure_internal;
+ return failure_internal;
+ }
- public:
- /*! @internal
- * A precondtion failed.
- * The parameter help debugging. This is not much different from the previous
- * except we can digg faster in the file where the exception was triggered.
- * @param function usually \c __func__, the function that threw the error
- * @param file usually \c __FILE__, the file where this function is
- * @param line usually \c __LINE__, the line where it happened
- * @param why by default, lazy people don't provide an explanation.
- */
- NotImplementedYet() {}
-
- NotImplementedYet(const char * function,
- const char* file,
- int line,
- const char * why='\0')
- {
- if (_errorStream == (std::ostream *) 0)
- _errorStream = &std::cerr;
+ template<class T>
+ inline bool isOdd (const T & a) {
+ return (a%2);
+ }
- (*_errorStream) << std::endl << std::endl;
- (*_errorStream) << "ERROR (at " << function << " in " << file << ':' << line << "): " << std::endl;
- (*_errorStream) << " This function is not implemented yet" ;
- if (why)
- (*_errorStream) << " (" << why << ")" <<std::endl;
- else
- (*_errorStream) << "." << std::endl;
+ inline bool isOdd(const float &a) {
+ return (bool)(int)fmodf(a,2.f);
+ }
- }
- };
-
-#endif
+ inline bool isOdd(const double &a) {
+ return (bool)(int)fmod(a,2.);
+ }
- std::ostream *Failure::_errorStream;
} // FFPACK
#endif // __FFLASFFPACK_util_debug_H
diff --git a/fflas-ffpack/utils/fflas_intrinsic.h b/fflas-ffpack/utils/fflas_intrinsic.h
new file mode 100644
index 0000000..3841969
--- /dev/null
+++ b/fflas-ffpack/utils/fflas_intrinsic.h
@@ -0,0 +1,49 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2016 the FFLAS-FFPACK group
+ *
+ * Written by <clement.pernet at imag.fr>
+ *
+ * Includes the proper intrinsic definitions, according to the architecture and system.
+ * Code proposed by Marat Dukhan http://stackoverflow.com/questions/11228855/header-files-for-simd-intrinsics
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#if defined(_MSC_VER)
+ /* Microsoft C/C++-compatible compiler */
+#include <intrin.h>
+#elif (defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)) && (defined(__x86_64__) || defined(__i386__))
+ /* GCC-compatible compiler, targeting x86/x86-64 */
+#include <x86intrin.h>
+#elif (defined(__GNUC__) || defined(__clang__)) && defined(__ARM_NEON__)
+ /* GCC-compatible compiler, targeting ARM with NEON */
+#include <arm_neon.h>
+#elif (defined(__GNUC__) || defined(__clang__)) && defined(__IWMMXT__)
+ /* GCC-compatible compiler, targeting ARM with WMMX */
+#include <mmintrin.h>
+#elif (defined(__GNUC__) || defined(__xlC__) || defined(__clang__)) && (defined(__VEC__) || defined(__ALTIVEC__))
+ /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
+#include <altivec.h>
+#elif (defined(__GNUC__) || defined(__clang__)) && defined(__SPE__)
+ /* GCC-compatible compiler, targeting PowerPC with SPE */
+#include <spe.h>
+#endif
diff --git a/fflas-ffpack/utils/fflas_memory.h b/fflas-ffpack/utils/fflas_memory.h
new file mode 100644
index 0000000..a2acc12
--- /dev/null
+++ b/fflas-ffpack/utils/fflas_memory.h
@@ -0,0 +1,383 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* fflas/fflas_memory.h
+ * Copyright (C) 2014 fflas-ffpack group
+ *
+ * Written by Clement Pernet <Clement.Pernet at imag.fr>
+ *
+ * The cache size detection has been copied from the Eigen library,
+ * a lightweight C++ template library for linear algebra, licensed under
+ * the Mozilla
+ * Public License v. 2.0. If a copy of the MPL was not distributed
+ * with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ * Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud at inria.fr>
+ * Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1 at gmail.com>
+ * Copyright (C) 2009 Kenneth Riddile <kfriddile at yahoo.com>
+ * Copyright (C) 2010 Hauke Heibel <hauke.heibel at gmail.com>
+ * Copyright (C) 2010 Thomas Capricelli <orzel at freehackers.org>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_memory_H
+#define __FFLASFFPACK_memory_H
+
+#include "fflas-ffpack/utils/align-allocator.h"
+#include <givaro/givinteger.h>
+
+namespace FFLAS{
+
+ template<class Element>
+ inline bool alignable() {
+ return true ;
+ }
+
+ // BB : segfault in Givaro::Integer::logcpy otherwise
+ template<>
+ inline bool alignable<Givaro::Integer*>() {
+ return false;
+ }
+
+ template<class Field>
+ inline typename Field::Element_ptr fflas_new (const Field& F, const size_t m, const size_t n, const Alignment align = Alignment::DEFAULT)
+ {
+ if (alignable<typename Field::Element_ptr>() ) {
+ return malloc_align<typename Field::Element>(m*n, align);
+ }
+ else {
+ return new typename Field::Element[m*n];
+ }
+ }
+
+ template<class Element >
+ inline Element* fflas_new (const size_t m, const Alignment align = Alignment::DEFAULT)
+ {
+ if (alignable<Element*>() ) {
+ return malloc_align<Element>(m, align);
+ }
+ else {
+ return new Element[m];
+ }
+
+ }
+
+ template<class Element_ptr>
+ inline void fflas_delete(Element_ptr A)
+ {
+ if (alignable<Element_ptr>() )
+ free(A);
+ else
+ delete[] A;
+ }
+
+ template<class Ptr, class ...Args>
+ inline void fflas_delete(Ptr p, Args ... args){
+ fflas_delete(p);
+ fflas_delete(std::forward<Args>(args)...);
+ }
+
+#ifdef __FFLASFFPACK_USE_SIMD
+ inline void prefetch(const int64_t* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+#else
+ inline void prefetch(const int64_t*) {}
+#endif
+
+
+#define __CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+
+ inline void getCacheSize(int& l1, int& l2, int& l3)
+ {
+ int abcd[4];
+ l1 = l2 = l3 = 0;
+ int cache_id = 0;
+ int cache_type = 0;
+ do {
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ __CPUID(abcd,0x4,cache_id);
+ cache_type = (abcd[0] & 0x0F) >> 0;
+ if(cache_type==1||cache_type==3) // data or unified cache
+ {
+ int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
+ int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
+ int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
+ int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
+ int sets = (abcd[2]); // C[31:0]
+ int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
+ switch(cache_level)
+ {
+ case 1: l1 = cache_size; break;
+ case 2: l2 = cache_size; break;
+ case 3: l3 = cache_size; break;
+ default: break;
+ }
+ }
+ cache_id++;
+ } while(cache_type>0 && cache_id<16);
+ }
+
+ inline void getTLBSize(int& tlb)
+ {
+ int abcd[4]={};
+ int sTLB=0;
+ int lTLB;
+ __CPUID(abcd,0x2,0);
+ unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
+ for(int i=0; i<14; ++i)
+ switch(bytes[i]){
+ case 0x03: sTLB=64; break;
+ case 0x04: lTLB=8; break;
+ case 0x05: lTLB=32; break;
+ case 0x56: lTLB=16; break;
+ case 0x57: sTLB=16; break;
+ case 0x59: sTLB=16; break;
+ case 0x5A: lTLB=32; break;
+ case 0x5B: sTLB=lTLB=64; break;
+ case 0x5C: sTLB=lTLB=128; break;
+ case 0x5D: sTLB=lTLB=256; break;
+ case 0xB3: sTLB=128; break;
+ case 0xB4: sTLB=256; break;
+ case 0xBA: sTLB=64; break;
+ case 0xC0: sTLB=lTLB=8; break;
+ case 0xCA: sTLB=512; break;
+ default: break;
+ }
+ //cout<<"small TLB: "<<sTLB<<endl;
+ //cout<<"large TLB: "<<lTLB<<endl;
+ tlb=sTLB*4096;
+ }
+
+//---------- Cache sizes ----------
+
+#if !defined(EIGEN_NO_CPUID)
+# if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
+# if defined(__PIC__) && defined(__i386__)
+ // Case for x86 with PIC
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+# else
+ // Case for x86_64 or x86 w/o PIC
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+# endif
+# elif defined(_MSC_VER)
+# if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
+# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
+# endif
+# endif
+#endif
+
+
+#ifdef EIGEN_CPUID
+
+inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
+{
+ return abcd[1]==(reinterpret_cast<const int*>(vendor))[0] && abcd[3]==(reinterpret_cast<const int*>(vendor))[1] && abcd[2]==(reinterpret_cast<const int*>(vendor))[2];
+}
+
+inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ l1 = l2 = l3 = 0;
+ int cache_id = 0;
+ int cache_type = 0;
+ do {
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x4,cache_id);
+ cache_type = (abcd[0] & 0x0F) >> 0;
+ if(cache_type==1||cache_type==3) // data or unified cache
+ {
+ int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
+ int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
+ int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
+ int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
+ int sets = (abcd[2]); // C[31:0]
+
+ int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
+
+ switch(cache_level)
+ {
+ case 1: l1 = cache_size; break;
+ case 2: l2 = cache_size; break;
+ case 3: l3 = cache_size; break;
+ default: break;
+ }
+ }
+ cache_id++;
+ } while(cache_type>0 && cache_id<16);
+}
+
+inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ l1 = l2 = l3 = 0;
+ EIGEN_CPUID(abcd,0x00000002,0);
+ unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
+ bool check_for_p2_core2 = false;
+ for(int i=0; i<14; ++i)
+ {
+ switch(bytes[i])
+ {
+ case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
+ case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
+ case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
+ case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
+ case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
+ case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
+ case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
+ case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
+ case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
+ case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
+ case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
+ case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
+ case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
+ case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
+ case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
+ case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
+ case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
+ case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
+ case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
+ case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
+ case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
+ case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
+ case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
+ case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
+ case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
+ case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
+ case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
+ case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
+ case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
+ case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
+ case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
+ case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
+ case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
+ case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
+ case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
+ case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
+ case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
+ case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
+ case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
+ case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
+ case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
+ case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
+ case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
+ case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
+ case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
+ case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
+ case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
+ case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
+ case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
+ case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
+ case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
+
+ default: break;
+ }
+ }
+ if(check_for_p2_core2 && l2 == l3)
+ l3 = 0;
+ l1 *= 1024;
+ l2 *= 1024;
+ l3 *= 1024;
+}
+
+inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
+{
+ if(max_std_funcs>=4)
+ queryCacheSizes_intel_direct(l1,l2,l3);
+ else
+ queryCacheSizes_intel_codes(l1,l2,l3);
+}
+
+inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
+{
+ int abcd[4];
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000005,0);
+ l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000006,0);
+ l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
+}
+#endif
+
+/** \internal
+ * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
+inline void queryCacheSizes(int& l1, int& l2, int& l3)
+{
+ #ifdef EIGEN_CPUID
+ int abcd[4];
+
+ // identify the CPU vendor
+ EIGEN_CPUID(abcd,0x0,0);
+ int max_std_funcs = abcd[1];
+ if(cpuid_is_vendor(abcd,"GenuineIntel"))
+ queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+ else if(cpuid_is_vendor(abcd,"AuthenticAMD") || cpuid_is_vendor(abcd,"AMDisbetter!"))
+ queryCacheSizes_amd(l1,l2,l3);
+ else
+ // by default let's use Intel's API
+ queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+
+ // here is the list of other vendors:
+// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
+// ||cpuid_is_vendor(abcd,"CyrixInstead")
+// ||cpuid_is_vendor(abcd,"CentaurHauls")
+// ||cpuid_is_vendor(abcd,"GenuineTMx86")
+// ||cpuid_is_vendor(abcd,"TransmetaCPU")
+// ||cpuid_is_vendor(abcd,"RiseRiseRise")
+// ||cpuid_is_vendor(abcd,"Geode by NSC")
+// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
+// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
+// ||cpuid_is_vendor(abcd,"NexGenDriven")
+ #else
+ l1 = l2 = l3 = -1;
+ #endif
+}
+
+/** \internal
+ * \returns the size in Bytes of the L1 data cache */
+inline int queryL1CacheSize()
+{
+ int l1(-1), l2, l3;
+ queryCacheSizes(l1,l2,l3);
+ return l1;
+}
+
+/** \internal
+ * \returns the size in Bytes of the L2 or L3 cache if this later is present */
+inline int queryTopLevelCacheSize()
+{
+ int l1, l2(-1), l3(-1);
+ queryCacheSizes(l1,l2,l3);
+ return (std::max)(l2,l3);
+}
+
+} // namespace FFLAS
+#endif // __FFLASFFPACK_memory_H
diff --git a/fflas-ffpack/utils/fflas_randommatrix.h b/fflas-ffpack/utils/fflas_randommatrix.h
new file mode 100644
index 0000000..cb618a0
--- /dev/null
+++ b/fflas-ffpack/utils/fflas_randommatrix.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+/*! @file utils/fflas_randommatrix.h
+ * @ingroup tests
+ * @brief Utilities to create matrices with prescribed shapes, properties,...
+ * To be used in benchmarks/tests
+ */
+
+#ifndef __FFLASFFPACK_randommatrix_H
+#define __FFLASFFPACK_randommatrix_H
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "fflas-ffpack/utils/debug.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include <givaro/givinteger.h>
+#include <givaro/givintprime.h>
+#include <givaro/givranditer.h>
+#include <chrono>
+#include <random>
+
+namespace FFPACK {
+
+ /*! @brief Random Matrix.
+ * Creates a \c m x \c n matrix with random entries.
+ * @param F field
+ * @param A pointer to the matrix (preallocated to at least \c m x \c lda field elements)
+ * @param m number of rows in \p A
+ * @param n number of cols in \p A
+ * @param lda leading dimension of \p A
+ * @return pointer to \c A.
+ */
+ template<class Field>
+ typename Field::Element * RandomMatrix(const Field & F,
+ typename Field::Element * A,
+ size_t m, size_t n, size_t lda, size_t b=0)
+ {
+ typedef typename Field::RandIter Randiter ;
+ Randiter R(F, b);
+ for (size_t i=0 ; i<m ; ++i)
+ for (size_t j= 0; j<n ;++j)
+ R.random( A[i*lda+j] );
+ return A;
+
+ }
+
+ /*! Random integer in range.
+ * @param a min bound
+ * @param b max bound
+ * @return a random integer in [a,b[ */
+ size_t RandInt(size_t a, size_t b)
+ {
+ size_t x = a ;
+ x += (size_t)rand()%(b-a);
+ FFLASFFPACK_check(x<b && x>=a);
+ return x ;
+ }
+
+ /*! @brief Random Matrix with prescribed rank.
+ * Creates an \c m x \c n matrix with random entries and rank \c r.
+ * @param F field
+ * @param A pointer to the matrix (preallocated to at least \c m x \c lda field elements)
+ * @param r rank of the matrix to build
+ * @param m number of rows in \p A
+ * @param n number of cols in \p A
+ * @param lda leading dimension of \p A
+ * @return pointer to \c A.
+ */
+ template<class Field>
+ typename Field::Element_ptr RandomMatrixWithRank (const Field & F,
+ typename Field::Element_ptr A, size_t lda,
+ size_t r, size_t m, size_t n)
+ {
+ FFLASFFPACK_check(r <= std::min(m,n));
+ FFLASFFPACK_check(n <= lda);
+ typedef typename Field::RandIter Randiter ;
+ typedef typename Field::Element_ptr Element_ptr;
+ Randiter R(F);
+ Givaro::GeneralRingNonZeroRandIter<Field,Randiter> nzR(R);
+
+ size_t * P = FFLAS::fflas_new<size_t>(n);
+ size_t * Q = FFLAS::fflas_new<size_t>(m);
+ for (size_t i = 0 ; i < m ; ++i ) Q[i] = 0;
+ for (size_t i = 0 ; i < n ; ++i ) P[i] = 0;
+
+ Element_ptr U = FFLAS::fflas_new(F,m,n);
+ Element_ptr L = FFLAS::fflas_new(F,m,m);
+
+
+ /* Create L, lower invertible */
+ for (size_t i=0 ; i<m ; ++i){
+ for (size_t j= 0; j<i ;++j) R.random( L[i*m+j] );
+ nzR.random( L[i*m+i] );
+ for (size_t j= i+1; j<m ;++j) F.init(L[i*m+j],F.zero);
+ }
+
+ /* Create U, upper or rank r */
+ for (size_t i=0 ; i<r ; ++i){
+ for (size_t j= 0 ; j<i ;++j) F.init(U[i*n+j],0U);
+ nzR.random( U[i*n+i] );
+ for (size_t j= i+1; j<n ;++j) R.random( U[i*n+j] );
+ }
+ for (size_t i=r ; i<m ; ++i)
+ for (size_t j= 0 ; j<n ;++j)
+ F.init(U[i*n+j],F.zero);
+
+ /* Create a random P,Q */
+ for (size_t i = 0 ; i < n ; ++i)
+ P[i] = i + RandInt(0U,n-i);
+ for (size_t i = 0 ; i < m ; ++i)
+ Q[i] = i + RandInt(0U,m-i);
+
+ /* compute product */
+
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ m,0,(int)n, U, n, P);
+ FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
+ m,0,(int)m, L, m, Q);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ m, n, m, F.one, L, m, U, n, F.zero, A, lda);
+ //! @todo compute LU with ftrtr
+
+ FFLAS::fflas_delete(P);
+ FFLAS::fflas_delete(L);
+ FFLAS::fflas_delete(U);
+ FFLAS::fflas_delete(Q);
+
+ return A;
+
+ }
+
+ void RandomRankProfile (size_t N, size_t R, size_t* rkp){
+ size_t curr = 0;
+ std::vector<bool> rows(N,false);
+ while (curr<R){
+ size_t i;
+ while (rows [i = rand() % N]);
+ rows[i] = true;
+ rkp [curr] = i;
+ curr++;
+ }
+ }
+
+
+ template<class Field>
+ void RandomMatrixWithRankandRPM (const Field& F, typename Field::Element_ptr A, size_t lda,
+ size_t R, size_t M, size_t N,
+ const size_t * RRP, const size_t * CRP){
+
+ typedef typename Field::RandIter Randiter ;
+ Randiter RI(F);
+ Givaro::GeneralRingNonZeroRandIter<Field,Randiter> nzR(RI);
+ typename Field::Element_ptr L= FFLAS::fflas_new(F,M,N);
+
+ FFLAS::pfzero(F, M, N, L, N);
+ FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> H;
+
+ SYNCH_GROUP ( FOR1D(k, R, H,
+ {
+ size_t i = RRP[k];
+ size_t j = CRP[k];
+ nzR.random (L [i*N+j]);
+ for (size_t l=i+1; l < M; ++l)
+ RI.random (L [l*N+j]);
+ }));
+
+ typename Field::Element_ptr U= FFLAS::fflas_new(F,N,N);
+ FFLAS::pfzero(F, N, N, U, N);
+ SYNCH_GROUP ( FOR1D(i, N, H,
+ {
+ nzR.random (U [i*N+i]);
+ for (size_t j=i+1; j < N; ++j)
+ RI.random (U [i*N+j]);
+ }));
+
+ typename Field::Element alpha, beta;
+ F.init(alpha,1.0);
+ F.init(beta,0.0);
+ // auto sp=SPLITTER(); //CP: broken with Modular<Integer>. Need to reorganize the helper behaviour with ParSeq and ModeTraits
+ auto sp=NOSPLIT();
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M,N,N, alpha, L, N, U, N, beta, A, lda, sp);
+ FFLAS::fflas_delete(L);
+ FFLAS::fflas_delete(U);
+
+ }
+
+ /*! @brief Random Matrix with prescribed rank, with random rank profile matrix
+ * Creates an \c m x \c n matrix with random entries, rank \c r and with a rank profile matrix
+ * chosen uniformly at random.
+ * @param F field
+ * @param A pointer to the matrix (preallocated to at least \c m x \c lda field elements)
+ * @param r rank of the matrix to build
+ * @param m number of rows in \p A
+ * @param n number of cols in \p A
+ * @param lda leading dimension of \p A
+ * @return pointer to \c A.
+ */
+ template<class Field>
+ void RandomMatrixWithRankandRandomRPM (const Field& F, typename Field::Element_ptr A, size_t lda,
+ size_t R, size_t M, size_t N){
+ // generate the r pivots in the rank profile matrix E
+ size_t pivot_r[R];
+ size_t pivot_c[R];
+
+ RandomRankProfile (M, R, pivot_r);
+ RandomRankProfile (N, R, pivot_c);
+ RandomMatrixWithRankandRPM (F, A, lda, R, M, N, pivot_r, pivot_c);
+ }
+
+ /*! @brief Random Matrix with prescribed det.
+ * @bug duplicate with linbox
+ * Creates a \c m x \c n matrix with random entries and rank \c r.
+ * @param F field
+ * @param A pointer to the matrix (preallocated to at least \c m x \c lda field elements)
+ * @param r rank of the matrix to build
+ * @param m number of rows in \p A
+ * @param n number of cols in \p A
+ * @param lda leading dimension of \p A
+ * @return pointer to \c A.
+ */
+ template<class Field>
+ typename Field::Element * RandomMatrixWithDet(const Field & F,
+ typename Field::Element * A,
+ typename Field::Element d,
+ size_t n, size_t lda)
+ {
+ FFLASFFPACK_check(n <= lda);
+ typedef typename Field::RandIter Randiter ;
+ typedef typename Field::Element Element ;
+ Randiter R(F);
+ Givaro::GeneralRingNonZeroRandIter<Field,Randiter> nzR(R);
+
+ size_t * P = FFLAS::fflas_new<size_t>(n);
+ size_t * Q = FFLAS::fflas_new<size_t>(n);
+ for (size_t i = 0 ; i < n ; ++i ) Q[i] = 0;
+ for (size_t i = 0 ; i < n ; ++i ) P[i] = 0;
+
+ Element * U = FFLAS::fflas_new<Element>(n*lda);
+ Element * L = FFLAS::fflas_new<Element>(n*n);
+
+ /* Create a random P,Q */
+
+ for (size_t i = 0 ; i < n ; ++i)
+ P[i] = i + RandInt(0U,n-i);
+ for (size_t i = 0 ; i < n ; ++i)
+ Q[i] = i + RandInt(0U,n-i);
+
+ /* det of P,Q */
+ int d1 =1 ;
+ for (size_t i = 0 ; i < n ; ++i)
+ if (P[i] != i)
+ d1 = -d1;
+ for (size_t i = 0 ; i < n ; ++i)
+ if (Q[i] != i)
+ d1 = -d1;
+
+
+
+ /* Create L, lower det d */
+ for (size_t i=0 ; i<n ; ++i)
+ for (size_t j= 0; j<i ;++j)
+ R.random( L[i*n+j] );
+
+ Element dd = F.one;
+ for (size_t i=0 ; i<n-1 ; ++i) {
+ nzR.random( L[i*n+i] );
+ F.mulin(dd,L[i*n+i]);
+ }
+
+ F.div(dd,d,dd);
+ if (d1<0) F.negin(dd);
+ L[n*n-1] = dd ;
+
+ for (size_t i=0 ; i<n ; ++i)
+ for (size_t j= i+1; j<n ;++j)
+ F.init(L[i*n+j],0U);
+
+
+ /* Create U, upper or rank r */
+ for (size_t i=0 ; i<n ; ++i) {
+ for (size_t j= 0; j<i ;++j)
+ U[i*lda+j] = F.zero;
+ U[i*lda+i] = F.one;
+ for (size_t j= i+1; j<n ;++j)
+ R.random( U[i*lda+j] );
+ }
+
+ /* compute product */
+
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ n,0,(int)n, U, lda, P);
+ FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
+ n,0,(int)n, L, n, Q);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ n,n,n, 1.0, L,n, U,lda, 0.0, A,lda);
+ //! @todo compute LU with ftrtr
+
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( Q);
+
+ return A;
+
+ }
+
+} // FFPACK
+#endif
diff --git a/fflas-ffpack/utils/flimits.h b/fflas-ffpack/utils/flimits.h
new file mode 100644
index 0000000..66f6cd4
--- /dev/null
+++ b/fflas-ffpack/utils/flimits.h
@@ -0,0 +1,193 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by <bastien.vialla at lirmm.fr>
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#ifndef __FFLASFFPACK_limits_H
+#define __FFLASFFPACK_limits_H
+
+//#include <cstddef>
+#include <climits>
+#include <limits>
+#include <type_traits>
+
+#include <givaro/givinteger.h>
+
+template <class T> struct limits;
+// {
+// constexpr inline static T max() noexcept {return 0;}
+// constexpr inline static T min() noexcept {return 0;}
+// };
+
+template <> struct limits<unsigned char> {
+ typedef unsigned char T ;
+ constexpr inline static unsigned char max() noexcept {return UCHAR_MAX;}
+ constexpr inline static unsigned char min() noexcept {return 0;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<signed char> {
+ typedef signed char T ;
+ constexpr inline static signed char max() noexcept {return SCHAR_MAX;}
+ constexpr inline static signed char min() noexcept {return SCHAR_MIN;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<char> {
+ typedef char T ;
+ constexpr inline static char max() noexcept {return CHAR_MAX;}
+ constexpr inline static char min() noexcept {return CHAR_MIN;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<unsigned short int> {
+ typedef unsigned short int T ;
+ constexpr inline static unsigned short int max() noexcept {return USHRT_MAX;}
+ constexpr inline static unsigned short int min() noexcept {return 0;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<short int> {
+ typedef short int T ;
+ constexpr inline static short int max() noexcept {return SHRT_MAX;}
+ constexpr inline static short int min() noexcept {return SHRT_MIN;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<unsigned int> {
+ typedef unsigned int T ;
+ constexpr inline static unsigned int max() noexcept {return UINT_MAX;}
+ constexpr inline static unsigned int min() noexcept {return 0;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<int> {
+ typedef int T ;
+ constexpr inline static int max() noexcept {return INT_MAX;}
+ constexpr inline static int min() noexcept {return INT_MIN;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<unsigned long> {
+ typedef unsigned long T ;
+ constexpr inline static unsigned long max() noexcept {return ULONG_MAX;}
+ constexpr inline static unsigned long min() noexcept {return 0;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<long> {
+ typedef long T ;
+ constexpr inline static long max() noexcept {return LONG_MAX;}
+ constexpr inline static long min() noexcept {return LONG_MIN;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<unsigned long long> {
+ typedef unsigned long long T ;
+ constexpr inline static unsigned long long max() noexcept {
+ return ULLONG_MAX;
+ }
+ constexpr inline static unsigned long long min() noexcept {return 0;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<long long> {
+ typedef long long T ;
+ constexpr inline static long long max() noexcept {return LLONG_MAX;}
+ constexpr inline static long long min() noexcept {return LLONG_MIN;}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<float> {
+ typedef float T ;
+ constexpr inline static int32_t max() noexcept {return (int32_t(1) << FLT_MANT_DIG) - 1;}
+ constexpr inline static int32_t min() noexcept {return -((int32_t(1) << FLT_MANT_DIG) - 1);}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<double> {
+ typedef double T;
+ constexpr inline static int64_t max() noexcept {return (int64_t(1) << DBL_MANT_DIG) - 1;}
+ constexpr inline static int64_t min() noexcept {return -((int64_t(1) << DBL_MANT_DIG) - 1);}
+ constexpr inline static int32_t digits() noexcept {return std:: numeric_limits<T>::digits ;}
+};
+
+template <> struct limits<Givaro::Integer> {
+ typedef Givaro::Integer T;
+ constexpr inline static int max() noexcept {return -1;}
+ constexpr inline static int min() noexcept {return 0;}
+};
+
+template <size_t K> struct limits<RecInt::ruint<K> > {
+ typedef RecInt::ruint<K> T;
+ constexpr inline static RecInt::ruint<K> max() noexcept {return RecInt::ruint<K>(-1);}
+ constexpr inline static RecInt::ruint<K> min() noexcept {return 0;}
+};
+
+template <size_t K> struct limits<RecInt::rint<K> > {
+ typedef RecInt::ruint<K> T;
+ constexpr inline static RecInt::rint<K> max() noexcept {return RecInt::rint<K>(RecInt::ruint<K>(-1) >> 1u);}
+ constexpr inline static RecInt::rint<K> min() noexcept {return max() + 1;}
+};
+// template <size_t K> struct limits<RecInt::rint<K> > {
+// constexpr inline static RecInt::rint<K> max() noexcept {return RecInt::rint<K>(RecInt::ruint<K>(-1))/2;}
+
+// constexpr inline static RecInt::rint<K> min() noexcept {return -RecInt::rint<K>(RecInt::ruint<K>(-1))/2;}
+// };
+// template <size_t K,size_t MG> struct limits<RecInt::rmint<K,MG> > {
+// constexpr inline static RecInt::ruint<K> max() noexcept {return RecInt::rmint<K,MG>(-1);}
+
+// constexpr inline static RecInt::ruint<K> min() noexcept {return 0;}
+// };
+
+/*
+ * in_range, determine if an element e of type E fit in a type T
+ */
+
+template<class T, class E>
+typename std::enable_if<std::is_signed<T>::value == std::is_signed<E>::value, bool>::type
+in_range(E e)
+{
+ return (e >= limits<T>::min() && e <= limits<T>::max());
+}
+
+template<class T, class E>
+typename std::enable_if<(std::is_signed<T>::value) && !(std::is_signed<E>::value), bool>::type
+ in_range(E e)
+{
+ return (e <= static_cast<E>(limits<T>::max()));
+}
+
+template<class T, class E>
+typename std::enable_if<!(std::is_signed<T>::value) && (std::is_signed<E>::value), bool>::type
+ in_range(E e)
+{
+ return ((e >= 0) && (static_cast<T>(e) <= limits<T>::max()));
+}
+
+
+
+#endif /* _FFLASFFPACK_limits_H */
diff --git a/fflas-ffpack/utils/print-utils.h b/fflas-ffpack/utils/print-utils.h
index 5ecfbc1..03aeb4e 100644
--- a/fflas-ffpack/utils/print-utils.h
+++ b/fflas-ffpack/utils/print-utils.h
@@ -1,7 +1,8 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* tests/print-utils.h
- * Copyright (C) 2011, Brice Boyer <bboyer at imag.fr>
+ * Copyright (C) 2011, Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * Bastien Vialla <bastien.vialla at lirmm.fr>
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
*
@@ -24,10 +25,12 @@
#ifndef __FFLASFFPACK_print_utils_H
#define __FFLASFFPACK_print_utils_H
+#include <fflas-ffpack/fflas-ffpack-config.h>
#include <vector>
// #include <pair>
#include <list>
#include <set>
+#include <iterator>
namespace std
{
@@ -37,16 +40,17 @@ namespace std
* @param v vector
* @warning <<(ostream&,T&) exists !
*/
- template<class T>
- std::ostream & operator<<(std::ostream&o, const std::vector<T> & v)
+ template<class T, class Alloc>
+ std::ostream & operator<<(std::ostream&o, const std::vector<T, Alloc> & v)
{
o << '[' ;
- if (v.size()) {
- size_t i = 0 ;
- for (; i < v.size()-1 ; ++i)
- o << v[i] << ',' ;
- o << v[i] ;
- }
+ std::copy(v.begin(), v.end(), std::ostream_iterator<T>(o, " "));
+ // if (v.size()) {
+ // size_t i = 0 ;
+ // for (; i < v.size()-1 ; ++i)
+ // o << v[i] << ',' ;
+ // o << v[i] ;
+ // }
return o << ']' ;
}
@@ -69,20 +73,22 @@ namespace std
* @param C a pair
* @warning <<(ostream&,T&) exists !
*/
- template<class T>
- std::ostream& operator<< (std::ostream& o, const std::list<T> & L)
+ template<class T, class Alloc>
+ std::ostream& operator<< (std::ostream& o, const std::list<T, Alloc> & L)
{
- typename std::list<T>::const_iterator it = L.begin() ;
o << '{' ;
- if (it != L.end() )
- while(true) {
- o << *it ;
- if (++it != L.end())
- o << ", " ;
- else
- break;
- }
+ std::copy(L.begin(), L.end(), std::ostream_iterator<T>(o, " "));
return o << '}' ;
+
+ // typename std::list<T>::const_iterator it = L.begin() ;
+ // if (it != L.end() )
+ // while(true) {
+ // o << *it ;
+ // if (++it != L.end())
+ // o << ", " ;
+ // else
+ // break;
+ // }
}
@@ -91,20 +97,23 @@ namespace std
* @param C a pair
* @warning <<(ostream&,T&) exists !
*/
- template<class T>
- std::ostream& operator<< (std::ostream& o, const std::set<T> & L)
+ template<class T, class Alloc>
+ std::ostream& operator<< (std::ostream& o, const std::set<T, Alloc> & S)
{
- typename std::set<T>::const_iterator it = L.begin() ;
o << '|' ;
- if (it != L.end() )
- while(true) {
- o << *it ;
- if (++it != L.end())
- o << ", " ;
- else
- break;
- }
+ std::copy(S.begin(), S.end(), std::ostream_iterator<T>(o, " "));
return o << '|' ;
+ // typename std::set<T>::const_iterator it = L.begin() ;
+ // o << '|' ;
+ // if (it != L.end() )
+ // while(true) {
+ // o << *it ;
+ // if (++it != L.end())
+ // o << ", " ;
+ // else
+ // break;
+ // }
+ // return o << '|' ;
}
@@ -132,7 +141,9 @@ namespace std
return o << std::endl;
}
+
#endif
+
}
#endif // __FFLASFFPACK_print_utils_H
diff --git a/fflas-ffpack/utils/timer.C b/fflas-ffpack/utils/timer.C
deleted file mode 100644
index fa93ff9..0000000
--- a/fflas-ffpack/utils/timer.C
+++ /dev/null
@@ -1,218 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* tests/timer.C
- * Copyright (C) 1994-1997 Givaro Team
- *
- * Written by T. Gautier
- * Imported from LinBox by Clément Pernet.
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *
- * This file implements the C++ interface to commentators (for
- * providing runtime commentary to the user)
- */
-#ifndef __FFLASFFPACK_timer_C
-#define __FFLASFFPACK_timer_C
-// Description:
-// - various timer objects
-// - to be rewritten to be more efficient
-
-#include <cmath>
-
-extern "C" {
-# include <sys/time.h>
-# include <sys/resource.h>
-// int getrusage (int, struct rusage*) ;
-}
-
-#include <iostream>
-
-#include "timer.h"
-
-// Return a value to initialize random generator
-long BaseTimer::seed()
-{
- struct timeval tp;
- gettimeofday(&tp, 0) ;
- return(tp.tv_usec);
-}
-
-// Output the value of the timer :
-std::ostream& BaseTimer::print( std::ostream& o ) const
-{ return o << _t ; }
-
-// Some arithmetic operator :
-BaseTimer& BaseTimer::operator = (const BaseTimer & T)
-{
- _t = T._t ;
- return *this ;
-}
-
-// Computes and returns interval of time
-// beteween *this and T
-const BaseTimer BaseTimer::operator - (const BaseTimer & T) const
-{
- BaseTimer Tmp ;
- Tmp._t = _t - T._t ;
- return Tmp ;
-}
-
-const BaseTimer BaseTimer::operator - ()
-{
- BaseTimer Tmp ;
- Tmp._t = -_t ;
- return Tmp ;
-}
-
-const BaseTimer BaseTimer::operator + (const BaseTimer & T) const
-{
- BaseTimer Tmp ;
- Tmp._t = _t + T._t ;
- return Tmp ;
-}
-
-// Start timer
-void RealTimer::start()
-{
- struct timeval tmp2 ;
- gettimeofday (&tmp2, 0) ;
-
- // real time
- _t = (double) tmp2.tv_sec +
- ((double) tmp2.tv_usec)/ (double)BaseTimer::MSPSEC ;
-}
-
-
-// Stop timer
-void RealTimer::stop()
-{
- struct timeval tmp2 ;
- gettimeofday (&tmp2, 0) ;
-
- // real time
- _t = (double) tmp2.tv_sec +
- ((double) tmp2.tv_usec)/ (double)BaseTimer::MSPSEC - _t ;
-}
-
-// Start timer
-void UserTimer::start()
-{
- struct rusage tmp1 ; // to getrusage (sys+user times)
- getrusage (RUSAGE_SELF, &tmp1) ;
- // user time
- _t = (double) tmp1.ru_utime.tv_sec +
- ((double) tmp1.ru_utime.tv_usec)/ (double)MSPSEC ;
-}
-
-
-// Stop timer
-void UserTimer::stop()
-{
- struct rusage tmp1 ; // to getrusage (sys+user times)
- getrusage (RUSAGE_SELF, &tmp1) ;
- // user time
- _t = (double) tmp1.ru_utime.tv_sec +
- ((double) tmp1.ru_utime.tv_usec)/ (double)MSPSEC - _t ;
-}
-
-
-// Start timer
-void SysTimer::start()
-{
- struct rusage tmp1 ; // to getrusage (sys+user times)
- getrusage (RUSAGE_SELF, &tmp1) ;
- // user time
- _t = (double) tmp1.ru_stime.tv_sec +
- ((double) tmp1.ru_stime.tv_usec)/ (double)MSPSEC ;
-}
-
-
-// Stop timer
-void SysTimer::stop()
-{
- struct rusage tmp1 ; // to getrusage (sys+user times)
- getrusage (RUSAGE_SELF, &tmp1) ;
- // user time
- _t = (double) tmp1.ru_stime.tv_sec +
- ((double) tmp1.ru_stime.tv_usec)/ (double)MSPSEC - _t ;
-}
-
-
-
-// Clear timer :
-void Timer::clear()
-{ rt.clear() ; ut.clear(); st.clear() ; }
-
-// Start timer
-void Timer::start()
-{ rt.start() ; ut.start(); st.start() ; }
-
-// Stop timer
-void Timer::stop()
-{ rt.stop() ; ut.stop(); st.stop() ; }
-
-
-std::ostream& Timer::print( std::ostream& o ) const
-{
- o << "user time: " << usertime() << '\n' ;
- o << "sys. time: " << systime() << '\n' ;
- return o << "real time: " << realtime() << std::endl ;
-}
-
-// Some arithmetic operator :
-Timer& Timer::operator = (const Timer & T)
-{
- ut = T.ut ;
- st = T.st ;
- rt = T.rt ;
- return *this ;
-}
-
-// Comput._tes and returns interval of time
-// beteween *this and T
-const Timer Timer::operator - (const Timer & T) const
-{
- Timer Tmp ;
- Tmp.ut = ut - T.ut ;
- Tmp.st = st - T.st ;
- Tmp.rt = rt - T.rt ;
- return Tmp ;
-}
-
-const Timer Timer::operator - ()
-{
- Timer Tmp ;
- Tmp.ut = -ut ;
- Tmp.st = -st ;
- Tmp.rt = -rt ;
- return Tmp ;
-}
-
-const Timer Timer::operator + (const Timer & T) const
-{
- Timer Tmp ;
- Tmp.ut = ut + T.ut ;
- Tmp.st = st + T.st ;
- Tmp.rt = rt + T.rt ;
- return Tmp ;
-}
-
-
-#endif
diff --git a/fflas-ffpack/utils/timer.h b/fflas-ffpack/utils/timer.h
index 3eaa045..91ec7e2 100644
--- a/fflas-ffpack/utils/timer.h
+++ b/fflas-ffpack/utils/timer.h
@@ -41,154 +41,32 @@
* providing runtime commentary to the user)
*/
-#ifndef __TIMER_H
-#define __TIMER_H
+#ifndef __FFLASFFPACK_timer_H
+#define __FFLASFFPACK_timer_H
-#include <iostream>
-class BaseTimer {
- public:
- enum {
- MSPSEC = 1000000 // microsecond per second
- };
+#include <time.h>
- // -- Clear timer :
- inline void clear() { _t = 0; }
-
- // -- total amount of second spent
- inline double time() const { return _t; }
-
- // -- Return a value to initialize random generator
- static long seed();
-
- // -- basic methods:
- std::ostream& print( std::ostream& ) const;
-
- // -- Some arithmetic operators to compute cumulative time :
- BaseTimer& operator = (const BaseTimer & T) ;
- const BaseTimer operator - (const BaseTimer & T) const;
- const BaseTimer operator - () ;
- const BaseTimer operator + (const BaseTimer & T) const;
- BaseTimer& operator += (const BaseTimer & T) { return *this = *this + T; };
- BaseTimer& operator -= (const BaseTimer & T) { return *this = *this - T; };
-
- public:
- double _t; // time
-};
-
-inline std::ostream &operator << (std::ostream &o, const BaseTimer &BT)
- { return BT.print(o); }
-
-class RealTimer : public BaseTimer {
- public:
- inline RealTimer (const BaseTimer &BT) : BaseTimer (BT) {};
- inline RealTimer () {};
- void start ();
- void stop ();
-};
-
-
-class UserTimer : public BaseTimer {
- public:
- inline UserTimer (const BaseTimer &BT) : BaseTimer (BT) {};
- inline UserTimer () {};
- void start ();
- void stop ();
-};
-
-
-class SysTimer : public BaseTimer {
- public:
- inline SysTimer (const BaseTimer &BT): BaseTimer (BT) {};
- inline SysTimer () {};
- void start ();
- void stop ();
-};
-
-
-class Timer {
-public :
-
- // Clear timer :
- void clear();
-
- // Start timer
- void start();
-
- // Stop timer
- void stop();
-
- // total amount of second spent in user mode
- double usertime() const { return ut.time(); }
-
- // total amount of second spent in system mode
- double systime () const { return st.time(); }
-
- // real total amount of second spent.
- double realtime () const { return rt.time(); }
-
- // retourne une petite graine
- // long seed() const { return RealTimer::seed(); }
-
- // Some arithmetic operators to compute cumulative time :
- Timer& operator = (const Timer & T) ;
- const Timer operator - (const Timer & T) const;
- const Timer operator - () ;
- const Timer operator + (const Timer & T) const;
- /* const */Timer& operator += (const Timer & T) { return *this = *this + T; };
- /* const */Timer& operator -= (const Timer & T) { return *this = *this - T; };
-
- // -- methods :
- std::ostream &print (std::ostream &) const;
-
-
-
- RealTimer rt;
- UserTimer ut;
- SysTimer st;
-};
-
-// inline std::ostream &operator << (std::ostream &o, const Timer &T)
-// { return T.print (o); }
+#ifdef __FFLASFFPACK_USE_OPENMP
+# ifndef __GIVARO_USE_OPENMP
+# define __GIVARO_USE_OPENMP 1
+# endif
+#endif
-inline std::ostream &operator << (std::ostream &o, const Timer &T)
-{
- double ut = T.usertime();
- if (ut < 0.0000000001) ut = 0;
- return o << T.realtime() << "s (" << ut << " cpu) ";
-}
+#include <givaro/givtimer.h>
+#ifdef __GIVARO_USE_OPENMP
+#include <givaro/givomptimer.h>
+#endif
-#include <omp.h>
-struct OMPTimer {
- double _c;
- void start() { _c = omp_get_wtime(); }
- void stop() { _c = omp_get_wtime() - _c; }
- void clear() { _c = 0.0; }
- double realtime() const { return _c; }
- double usertime() const { return _c; }
- OMPTimer& operator =(const OMPTimer& t) { _c = t._c; return *this; }
- OMPTimer& operator+=(const OMPTimer& t) { _c += t._c; return *this; }
- OMPTimer& operator-=(const OMPTimer& t) { _c -= t._c; return *this; }
- OMPTimer operator +(const OMPTimer& t) const
- {
- OMPTimer r; r._c = _c + t._c; return r;
- }
- OMPTimer operator -(const OMPTimer& t) const
- {
- OMPTimer r; r._c = _c - t._c; return r;
- }
- OMPTimer operator -() { OMPTimer r; r._c = - _c; return r; }
-};
-//#endif
-//
-inline std::ostream &operator << (std::ostream &o, const OMPTimer &T)
-{
- return o << T.usertime() << "s" ;
+namespace FFLAS {
+ typedef Givaro::Timer Timer ;
+ typedef Givaro::BaseTimer BaseTimer ;
+ typedef Givaro::UserTimer UserTimer ;
+ typedef Givaro::SysTimer SysTimer ;
+#ifdef __GIVARO_USE_OPENMP
+ typedef Givaro::OMPTimer OMPTimer ;
+#endif
}
-
-
-#include "timer.C"
-
-#endif
+#endif // __FFLASFFPACK_timer_H
diff --git a/incremente-versions b/incremente-versions
new file mode 100755
index 0000000..e4abc3d
--- /dev/null
+++ b/incremente-versions
@@ -0,0 +1,135 @@
+#!/bin/csh -f
+# Copyright (c) 2011 FFLAS-FFPACK
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+# adapted from LinBox configuration
+#
+# ========LICENCE========
+# This file is part of the library FFLAS-FFPACK.
+#
+# FFLAS-FFPACK is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+# ========LICENCE========
+#/
+
+
+
+set conf = configure.ac
+set ver = Makefile.am
+
+#verbatim second argument of AC_INIT
+set verb = `grep ^AC_INIT $conf | cut -d',' -f2`
+#removes spaces and brackets
+set vern = `echo "$verb" | sed 's/ //g;s/\[//;s/\]//'`
+echo "Current version is $vern."
+
+
+echo -n "Increment library version ? (y/n)"
+set answ = $<
+if ("$answ" == "y") then
+ set line = `fgrep -n ^AC_INIT $conf | cut -d':' -f1` #gets the line
+ set macro = `echo "$vern" | cut -d'.' -f1` #a version number is macro.minor.micro
+ set minor = `echo "$vern" | cut -d'.' -f2`
+ set micro = `echo "$vern" | cut -d'.' -f3`
+ set tmpfile = `mktemp` #tempfile
+ set sedfile = `mktemp` #temp sed file
+ set pmicro = `echo $micro`
+ @ pmicro ++
+ set pminor = `echo $minor`
+ @ pminor ++
+ set pmacro = `echo $macro`
+ @ pmacro ++
+ echo "Increment micro revision number ($vern -> $macro.$minor.$pmicro) ? press '0' "
+ echo "Increment minor revision number ($vern -> $macro.$pminor.0) ? press '1' "
+ echo -n "Increment macro revision number ($vern -> $pmacro.0.0) ? press '2' "
+ set increm = $<
+ switch ($increm)
+ case 0:
+ set newv = "[$macro.$minor.$pmicro]"
+ breaksw
+ case 1:
+ set newv = "[$macro.$pminor.0]"
+ breaksw
+ case 2:
+ set newv = "[$pmacro.0.0]"
+ breaksw
+ default:
+ set newv = "$verb"
+ echo "'$increm' read. Not incrementing anything."
+ breaksw
+ endsw
+
+ #replacing [ ] and . with escaped version for sed would understand them as 'operators'
+ echo "$line s/$verb/$newv/" | sed 's/\./\\\./g;s/\[/\\\[/g;s/\]/\\\]/g' > $sedfile
+ sed -f $sedfile $conf > $tmpfile
+ #clean up
+ \rm -f $sedfile
+ #diff for changes
+ diff -u0 $conf $tmpfile
+ #if something was changed, confirm incrementation :
+ if ("$newv" != "$verb") then
+ echo -n "Confirmation of incrementation ? (yes/no)"
+ set answ = $<
+ set backupconf = $conf.back$$
+ if ("$answ" == "yes") then
+ \cp -p $conf $backupconf
+ echo "Back-up of $conf made in $backupconf. Now overwriting $conf."
+ \mv -f $tmpfile $conf
+ else
+ echo "'$answ' read. Not incrementing anything."
+ \rm -f $tmpfile
+ exit 0 ;
+ endif
+ #now change Makefile accordingly
+ echo -n "Incrementing Makefile revision accordingly"
+ set tmpfile = `mktemp` #tempfile
+ set sedfile = `mktemp` #tempfile
+ switch ($increm)
+ case 0:
+ echo -n "s/VERSION.*/VERSION=$macro.$minor.$pmicro/" >> $sedfile
+ breaksw
+ case 1:
+ echo "s/VERSION.*/VERSION=$macro.$pminor.0/" > $sedfile
+ breaksw
+ case 2:
+ echo "s/VERSION.*/VERSION=$pmacro.0.0/" > $sedfile
+ breaksw
+ default:
+ echo "Something abnormal happened"
+ exit 1
+ breaksw
+ endsw
+ sed -f $sedfile $ver > $tmpfile
+ \rm -f $sedfile
+ diff -u0 $ver $tmpfile
+ echo -n "Confirmation of incrementation ? (yes/no) "
+ set answ = $<
+ if ("$answ" == "yes") then
+ \mv -f $tmpfile $ver
+ echo " your old $conf is destroyed..."
+ \rm -f $backupconf
+ else
+ echo "'$answ' read. Not incrementing anything."
+ echo " your old $conf is restored..."
+ \rm -f $tmpfile
+ \mv -f $backupconf $conf
+ exit 0
+ endif
+
+ endif
+else
+ echo "'$answ' read. Not doing anything."
+endif
+
+exit 0
+
diff --git a/fflas-ffpack/Makefile.am b/macros/CodeChunk/Makefile.am
similarity index 73%
copy from fflas-ffpack/Makefile.am
copy to macros/CodeChunk/Makefile.am
index b18ba82..a2faf5c 100644
--- a/fflas-ffpack/Makefile.am
+++ b/macros/CodeChunk/Makefile.am
@@ -1,5 +1,5 @@
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2013 FFLAS-FFPACK
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -22,12 +22,13 @@
#/
-SUBDIRS=fflas ffpack field
+EXTRA_DIST= \
+ clapack.C \
+ lapack.C \
+ cblas.C \
+ givaro.C \
+ cuda.C \
+ sse.C \
+ avx.C \
+ gmp.C
-EXTRA_DIST=fflas-ffpack.doxy utils
-
-pkginclude_HEADERS = config-blas.h \
- fflas-ffpack.h \
- fflas-ffpack-config.h \
- fflas-ffpack-configuration.h \
- fflas-ffpack-optimise.h
diff --git a/macros/CodeChunk/avx.C b/macros/CodeChunk/avx.C
new file mode 100644
index 0000000..2c8bb49
--- /dev/null
+++ b/macros/CodeChunk/avx.C
@@ -0,0 +1,11 @@
+#include <immintrin.h>
+int main() {
+ __m256d P ;
+ double p = 0;
+ P = _mm256_set1_pd(p);
+ P = _mm256_add_pd(P,P);
+#ifdef __try_avx2
+ P = _mm256_fnmadd_pd(P,P,P);
+#endif
+ return 0;
+}
diff --git a/fflas-ffpack/fflas-ffpack.h b/macros/CodeChunk/cblas.C
similarity index 61%
copy from fflas-ffpack/fflas-ffpack.h
copy to macros/CodeChunk/cblas.C
index ceeb9c0..d83652f 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/macros/CodeChunk/cblas.C
@@ -1,7 +1,8 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+/*
+ * Copyright (C) 2013 FFLAS-FFPACK group.
+ *
+ * Extirpé form a m4 macro by Brice Boyer (briceboyer) <boyer.brice at gmail.com>.
+ *
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -23,17 +24,16 @@
*
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
- */
-
-
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
-
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
-
-#endif // __FFLASFFPACK_fflas_ffpack_H
+#define __FFLASFFPACK_CONFIGURATION
+#include "fflas-ffpack/config-blas.h"
+int main ()
+{
+ double a[4] = {1.,2.,3.,4.};
+ double b[4] = {4.,3.,2.,1.};
+ double c[4];
+ cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
+ if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
+ return -1;
+ else
+ return 0;
+}
diff --git a/fflas-ffpack/fflas-ffpack.h b/macros/CodeChunk/clapack.C
similarity index 61%
copy from fflas-ffpack/fflas-ffpack.h
copy to macros/CodeChunk/clapack.C
index ceeb9c0..ddb6b4b 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/macros/CodeChunk/clapack.C
@@ -1,7 +1,8 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+/*
+ * Copyright (C) 2013 FFLAS-FFPACK group.
+ *
+ * Extirpé form a m4 macro by Brice Boyer (briceboyer) <boyer.brice at gmail.com>.
+ *
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -23,17 +24,16 @@
*
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
- */
-
-
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
-
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
-
-#endif // __FFLASFFPACK_fflas_ffpack_H
+#define __FFLASFFPACK_CONFIGURATION
+#define __FFLASFFPACK_HAVE_LAPACK 1
+#define __FFLASFFPACK_HAVE_CLAPACK 1
+#include "fflas-ffpack/config-blas.h"
+int main () {
+ double a[4] = {1.,2.,3.,4.};
+ CBLAS_INT ipiv[2];
+ clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
+ if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
+ return -1;
+ else
+ return 0;
+}
diff --git a/macros/CodeChunk/cuda.C b/macros/CodeChunk/cuda.C
new file mode 100644
index 0000000..7d190f6
--- /dev/null
+++ b/macros/CodeChunk/cuda.C
@@ -0,0 +1,9 @@
+#include <stdio.h>
+#include <cuda_runtime.h>
+#include <cusparse.h>
+
+int main() {
+ cusparseHandle_t handle = 0;
+ cusparseCreate( &handle );
+ return 0 ;
+}
diff --git a/fflas-ffpack/fflas-ffpack.h b/macros/CodeChunk/givaro.C
similarity index 61%
copy from fflas-ffpack/fflas-ffpack.h
copy to macros/CodeChunk/givaro.C
index ceeb9c0..1b02716 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/macros/CodeChunk/givaro.C
@@ -1,7 +1,8 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+/*
+ * Copyright (C) 2013 FFLAS-FFPACK group.
+ *
+ * Extirpé form a m4 macro by Brice Boyer (briceboyer) <boyer.brice at gmail.com>.
+ *
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -23,17 +24,11 @@
*
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
- */
-
-
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
-
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
+#include <givaro/givconfig.h>
+int main () {
+ if (GIVARO_VERSION < $version_min || GIVARO_VERSION >= $version_max || GIVARO_VERSION>0x030000)
+ return -1;
+ else
+ return 0; /* old version of Givaro are defined as hexa 0x03yyzz*/
+}
-#endif // __FFLASFFPACK_fflas_ffpack_H
diff --git a/macros/CodeChunk/gmp.C b/macros/CodeChunk/gmp.C
new file mode 100644
index 0000000..4db77b3
--- /dev/null
+++ b/macros/CodeChunk/gmp.C
@@ -0,0 +1,6 @@
+#include <gmpxx.h>
+ int main () {
+ if (__GNU_MP_VERSION < 4) return -1;
+ mpz_class a(2),b(3),c(5); if ( a+b == c ) return 0; else return -1; }
+
+
diff --git a/fflas-ffpack/fflas-ffpack.h b/macros/CodeChunk/lapack.C
similarity index 61%
copy from fflas-ffpack/fflas-ffpack.h
copy to macros/CodeChunk/lapack.C
index ceeb9c0..e0509ce 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/macros/CodeChunk/lapack.C
@@ -1,7 +1,8 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+/*
+ * Copyright (C) 2013 FFLAS-FFPACK group.
+ *
+ * Extirpé form a m4 macro by Brice Boyer (briceboyer) <boyer.brice at gmail.com>.
+ *
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -23,17 +24,16 @@
*
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
- */
-
-
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
-
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
-
-#endif // __FFLASFFPACK_fflas_ffpack_H
+#define __FFLASFFPACK_CONFIGURATION
+#define __FFLASFFPACK_HAVE_LAPACK 1
+// #define __FFLASFFPACK_HAVE_CLAPACK 1
+#include "fflas-ffpack/config-blas.h"
+int main () {
+ double a[4] = {1.,2.,3.,4.};
+ CBLAS_INT ipiv[2];
+ clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
+ if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
+ return -1;
+ else
+ return 0;
+}
diff --git a/macros/CodeChunk/sse.C b/macros/CodeChunk/sse.C
new file mode 100644
index 0000000..0ace724
--- /dev/null
+++ b/macros/CodeChunk/sse.C
@@ -0,0 +1,12 @@
+#include <immintrin.h>
+
+int main() {
+ // SSE 2
+ __m128d P ;
+ double p = 0;
+ P = _mm_set1_pd(p);
+ P = _mm_add_pd(P,P);
+ // SSE 4.1
+ P = _mm_floor_pd(P);
+ return 0;
+}
diff --git a/macros/Makefile.am b/macros/Makefile.am
index 57b58d9..6ef1b6d 100644
--- a/macros/Makefile.am
+++ b/macros/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# adapted from LinBox configuration
#
# ========LICENCE========
@@ -21,19 +21,20 @@
# ========LICENCE========
#/
+SUBDIRS=CodeChunk
EXTRA_DIST= \
aclocal-include.m4 \
- blas-check.m4 \
- blasATLAS-check.m4 \
- blasGOTO-check.m4 \
- blasGSL-check.m4 \
+ fflas-ffpack-blas.m4 \
config-header.m4 \
debug.m4 \
fflas-ffpack-doc.m4 \
fflas-ffpack-misc.m4 \
fflas-ffpack-opt.m4 \
+ fflas-ffpack-precompile.m4\
givaro-check.m4 \
- gmp-check.m4 \
- lapack-check.m4
+ mkl-check.m4 \
+ avx-check.m4 \
+ omp-check.m4 \
+ cuda-check.m4
diff --git a/macros/Makefile.in b/macros/Makefile.in
deleted file mode 100644
index 5bb0ba8..0000000
--- a/macros/Makefile.in
+++ /dev/null
@@ -1,454 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# adapted from LinBox configuration
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = macros
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-EXTRA_DIST = \
- aclocal-include.m4 \
- blas-check.m4 \
- blasATLAS-check.m4 \
- blasGOTO-check.m4 \
- blasGSL-check.m4 \
- config-header.m4 \
- debug.m4 \
- fflas-ffpack-doc.m4 \
- fflas-ffpack-misc.m4 \
- fflas-ffpack-opt.m4 \
- givaro-check.m4 \
- gmp-check.m4 \
- lapack-check.m4
-
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps macros/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps macros/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags: TAGS
-TAGS:
-
-ctags: CTAGS
-CTAGS:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- distclean distclean-generic distclean-libtool distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/macros/aclocal-include.m4 b/macros/aclocal-include.m4
index 84c496b..775e335 100644
--- a/macros/aclocal-include.m4
+++ b/macros/aclocal-include.m4
@@ -1,6 +1,6 @@
dnl aclocal-include.m4
dnl Copyright (c) 2011 FFLAS-FFPACK
-dnl written by BB <bboyer at imag.fr>
+dnl written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
dnl adapted from LinBox configuration
dnl
dnl ========LICENCE========
diff --git a/macros/avx-check.m4 b/macros/avx-check.m4
new file mode 100644
index 0000000..28c505a
--- /dev/null
+++ b/macros/avx-check.m4
@@ -0,0 +1,123 @@
+dnl Check for AVX
+dnl Copyright (c) 2011 FFLAS-FFPACK
+dnl Created by BB, 2014-03-25
+dnl ========LICENCE========
+dnl This file is part of the library FFLAS-FFPACK.
+dnl
+dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+dnl ========LICENCE========
+dnl
+
+dnl FF_CHECK_AVX
+dnl
+dnl turn on AVX or AVX2 extensions if available
+
+AC_DEFUN([FF_CHECK_AVX],
+[
+ AC_ARG_ENABLE(avx,
+ [ AC_HELP_STRING([--enable-avx], [ Use Intel(r) AVX ]) ],
+ [ avec_avx=$enable_avx ],
+ [ avec_avx=yes ]
+ )
+
+ AC_MSG_CHECKING(for AVX)
+
+ dnl Is check enabled?
+ AS_IF([ test "x$avec_avx" != "xno" ],
+ [
+ BACKUP_CXXFLAGS=${CXXFLAGS}
+ CODE_AVX=`cat macros/CodeChunk/avx.C`
+
+ dnl Check for AVX
+ dnl Intel compilers usually do not require option to enable avx
+ dnl Thus, we test with no option on
+ for switch_avxflags in "" "-mavx"; do
+ CXXFLAGS="${BACKUP_CXXFLAGS} -O0 ${switch_avxflags}"
+ AC_TRY_RUN([ ${CODE_AVX} ],
+ [
+ avx_found="yes"
+ AVXFLAGS=${switch_avxflags}
+ break
+ ],
+ [
+ avx_found="no"
+ ],
+ [
+ echo "cross compiling...disabling"
+ avx_found="no"
+ break
+ ])
+ done
+
+ dnl Is AVX found?
+ AS_IF([ test "x$avx_found" = "xyes" ],
+ [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(USE_AVX,1,[Define if AVX is available])
+ AC_SUBST(AVXFLAGS)
+
+ dnl Check for AVX2
+ AC_MSG_CHECKING(for AVX2)
+
+ for switch_avx2flags in "" "-mfma -mavx2"; do
+ CXXFLAGS="${BACKUP_CXXFLAGS} -O0 ${switch_avx2flags}"
+ AC_TRY_RUN(
+ [
+ #define __try_avx2
+ ${CODE_AVX}
+ ],
+ [
+ avx2_found="yes"
+ AVX2FLAGS=${switch_avx2flags}
+ break
+ ],
+ [
+ avx2_found="no"
+ ],
+ [
+ echo "cross compiling...disabling"
+ avx2_found = "no"
+ break
+ ])
+ done
+
+ dnl Is AVX2 found?
+ AS_IF([ test "x$avx2_found" = "xyes" ],
+ [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(USE_AVX2,1,[Define if AVX2 is available])
+ AVXFLAGS=${AVX2FLAGS}
+ AC_SUBST(AVXFLAGS)
+ ],
+ [
+ dnl No AVX2
+ AC_MSG_RESULT(no)
+ ]
+ )
+ ],
+ [
+ dnl No AVX
+ AC_MSG_RESULT(no)
+ ]
+ )
+
+ CXXFLAGS=${BACKUP_CXXFLAGS}
+ ],
+ [
+ dnl --enable-avx=no
+ AC_MSG_RESULT(no [disabled])
+ ]
+ )
+])
diff --git a/macros/ax_cxx_compile_stdcxx_11.m4 b/macros/ax_cxx_compile_stdcxx_11.m4
new file mode 100644
index 0000000..af37acd
--- /dev/null
+++ b/macros/ax_cxx_compile_stdcxx_11.m4
@@ -0,0 +1,133 @@
+# ============================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx_11.html
+# ============================================================================
+#
+# SYNOPSIS
+#
+# AX_CXX_COMPILE_STDCXX_11([ext|noext],[mandatory|optional])
+#
+# DESCRIPTION
+#
+# Check for baseline language coverage in the compiler for the C++11
+# standard; if necessary, add switches to CXXFLAGS to enable support.
+#
+# The first argument, if specified, indicates whether you insist on an
+# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
+# -std=c++11). If neither is specified, you get whatever works, with
+# preference for an extended mode.
+#
+# The second argument, if specified 'mandatory' or if left unspecified,
+# indicates that baseline C++11 support is required and that the macro
+# should error out if no mode with that support is found. If specified
+# 'optional', then configuration proceeds regardless, after defining
+# HAVE_CXX11 if and only if a supporting mode is found.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Benjamin Kosnik <bkoz at redhat.com>
+# Copyright (c) 2012 Zack Weinberg <zackw at panix.com>
+# Copyright (c) 2013 Roy Stogner <roystgnr at ices.utexas.edu>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice
+# and this notice are preserved. This file is offered as-is, without any
+# warranty.
+
+#serial 3
+
+m4_define([_AX_CXX_COMPILE_STDCXX_11_testbody], [
+ template <typename T>
+ struct check
+ {
+ static_assert(sizeof(int) <= sizeof(T), "not big enough");
+ };
+
+ typedef check<check<bool>> right_angle_brackets;
+
+ int a;
+ decltype(a) b;
+
+ typedef check<int> check_type;
+ check_type c;
+ check_type&& cr = static_cast<check_type&&>(c);
+
+ auto d = a;
+])
+
+AC_DEFUN([AX_CXX_COMPILE_STDCXX_11], [dnl
+ m4_if([$1], [], [],
+ [$1], [ext], [],
+ [$1], [noext], [],
+ [m4_fatal([invalid argument `$1' to AX_CXX_COMPILE_STDCXX_11])])dnl
+ m4_if([$2], [], [ax_cxx_compile_cxx11_required=true],
+ [$2], [mandatory], [ax_cxx_compile_cxx11_required=true],
+ [$2], [optional], [ax_cxx_compile_cxx11_required=false],
+ [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX_11])])dnl
+ AC_LANG_PUSH([C++])dnl
+ ac_success=no
+ AC_CACHE_CHECK(whether $CXX supports C++11 features by default,
+ ax_cv_cxx_compile_cxx11,
+ [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
+ [ax_cv_cxx_compile_cxx11=yes],
+ [ax_cv_cxx_compile_cxx11=no])])
+ if test x$ax_cv_cxx_compile_cxx11 = xyes; then
+ ac_success=yes
+ fi
+
+ m4_if([$1], [noext], [], [dnl
+ if test x$ac_success = xno; then
+ for switch in -std=gnu++11 -std=gnu++0x; do
+ cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch])
+ AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch,
+ $cachevar,
+ [ac_save_CXXFLAGS="$CXXFLAGS"
+ CXXFLAGS="$CXXFLAGS $switch"
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
+ [eval $cachevar=yes],
+ [eval $cachevar=no])
+ CXXFLAGS="$ac_save_CXXFLAGS"])
+ if eval test x\$$cachevar = xyes; then
+ CXXFLAGS="$CXXFLAGS $switch"
+ ac_success=yes
+ break
+ fi
+ done
+ fi])
+
+ m4_if([$1], [ext], [], [dnl
+ if test x$ac_success = xno; then
+ for switch in -std=c++11 -std=c++0x; do
+ cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx11_$switch])
+ AC_CACHE_CHECK(whether $CXX supports C++11 features with $switch,
+ $cachevar,
+ [ac_save_CXXFLAGS="$CXXFLAGS"
+ CXXFLAGS="$CXXFLAGS $switch"
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_11_testbody])],
+ [eval $cachevar=yes],
+ [eval $cachevar=no])
+ CXXFLAGS="$ac_save_CXXFLAGS"])
+ if eval test x\$$cachevar = xyes; then
+ CXXFLAGS="$CXXFLAGS $switch"
+ ac_success=yes
+ break
+ fi
+ done
+ fi])
+ AC_LANG_POP([C++])
+ if test x$ax_cxx_compile_cxx11_required = xtrue; then
+ if test x$ac_success = xno; then
+ AC_MSG_ERROR([*** A compiler with support for C++11 language features is required.])
+ fi
+ else
+ if test x$ac_success = xno; then
+ HAVE_CXX11=0
+ AC_MSG_NOTICE([No compiler with C++11 support was found])
+ else
+ HAVE_CXX11=1
+ AC_DEFINE(HAVE_CXX11,1,
+ [define if the compiler supports basic C++11 syntax])
+ fi
+
+ AC_SUBST(HAVE_CXX11)
+ fi
+])
diff --git a/macros/blas-check.m4 b/macros/blas-check.m4
deleted file mode 100644
index a56b46a..0000000
--- a/macros/blas-check.m4
+++ /dev/null
@@ -1,176 +0,0 @@
-dnl Check for BLAS
-dnl Copyright Pascal Giorgi 2005
-dnl Modified Brice Boyer 2011
-dnl This file is part of FFLAS-FFPACK (and comes from LinBox)
-
-dnl
-dnl ========LICENCE========
-dnl This file is part of the library FFLAS-FFPACK.
-dnl
-dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public
-dnl License as published by the Free Software Foundation; either
-dnl version 2.1 of the License, or (at your option) any later version.
-dnl
-dnl This library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public
-dnl License along with this library; if not, write to the Free Software
-dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-dnl ========LICENCE========
-dnl/
-
-
-
-dnl **********************************
-dnl * TODO *
-dnl **********************************
-dnl no support yet to MKL
-dnl AS_IF([test -r "$BLAS_VAL/include/mkl_cblas.h"],
-dnl [ BLAS_LIBS="-L${BLAS_VAL}/lib/${MKL_ARCH}/ -lmkl_lapack64 -lmkl -lvml -lguide" ])
-dnl **********************************
-
-
-
-dnl FF_CHECK_BLAS ([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]])
-dnl
-dnl Test for BLAS and define BLAS_LIBS
-
-AC_DEFUN([FF_CHECK_BLAS],
- [ AC_ARG_WITH(blas,
- [AC_HELP_STRING([--with-blas=<lflags>],
- [Use BLAS library. This library is mandatory for FFLAS-FFPACK
- compilation. The user has the responsability to
- provide library flags such that the compiler
- will find and use BLAS (and LAPACK). An example
- could be --with-blas=/path/to/blas or
- --with-blas="-L/path/to/blas/lib -lsomeblas".\n
- * Warning : we don't really handle .a archives alone...])
- ])
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- AS_IF([ test -n "$with_blas"],[
-
- AC_MSG_CHECKING("for BLAS ($with_blas)")
-
- BLAS_LIBS="$with_blas"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- AC_TRY_LINK(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"],
- [double a;],
- [
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
- ],[
- blas_found="yes"
- ],[
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
- ],[
- blas_found="yes"
- blas_cross="yes"
- ])
- ],
- [
- blas_found="no"
- ])
-
-
- AS_IF([ test "x$blas_found" = "xyes" ],
- [ BLAS_VENDOR="USER"
- AC_SUBST(BLAS_VENDOR)
- AC_SUBST(BLAS_LIBS)
- AC_SUBST(CBLAS_FLAG)
- AC_SUBST(BLAS_PATH)
- AC_DEFINE(HAVE_BLAS,1,[Define if BLAS is installed])
- AC_DEFINE(HAVE_CBLAS,1,[Define if C interface to BLAS is installed])
- BLAS_FOUND=true
- AC_SUBST(BLAS_FOUND)
- dnl AC_DEFINE(BLAS_AVAILABLE,,[Define if BLAS routines are available])
- HAVE_BLAS=yes
- AS_IF([test "x$blas_cross" != "xyes"],
- [ AC_MSG_RESULT(found (cblas)) ] ,
- [AC_MSG_RESULT(unknown)
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."])
- ], dnl CBLAS not found. Looking for BLAS
- [
- CBLAS_FLAG=""
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
-
- AC_TRY_LINK(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"],
- [double a;],
- [
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
- ],[
- blas_found="yes"
- ],[
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
- ],[
- blas_found="yes"
- blas_cross="yes"
- ])
- ],
- [
- blas_found="no"
- ])
- AS_IF([test "x$blas_found" = "xyes"],
- [ BLAS_VENDOR="USER"
- AC_SUBST(BLAS_VENDOR)
- AC_SUBST(BLAS_LIBS)
- AC_SUBST(CBLAS_FLAG)
- AC_SUBST(BLAS_PATH)
- AC_DEFINE(HAVE_BLAS,1,[Define if BLAS is installed])
- dnl AC_DEFINE(HAVE_CBLAS,1,[Define if C interface to BLAS is installed])
- BLAS_FOUND=true
- AC_SUBST(BLAS_FOUND)
- dnl AC_DEFINE(BLAS_AVAILABLE,,[Define if BLAS routines are available])
- HAVE_BLAS=yes
- AC_MSG_RESULT(found (cblas)) ] ,
- [ AC_MSG_RESULT(no) ])
-
- ])
-
-
- ])
-
-
- AM_CONDITIONAL(FFLASFFPACK_HAVE_BLAS, test "x$HAVE_BLAS" = "xyes")
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
- dnl unset LD_LIBRARY_PATH
-
-
-])
-
-
diff --git a/macros/blasATLAS-check.m4 b/macros/blasATLAS-check.m4
deleted file mode 100644
index 5dd5cd9..0000000
--- a/macros/blasATLAS-check.m4
+++ /dev/null
@@ -1,268 +0,0 @@
-dnl Check for BLAS
-dnl Copyright Pascal Giorgi 2005
-dnl Modified Brice Boyer 2011
-
-dnl
-dnl ========LICENCE========
-dnl This file is part of the library FFLAS-FFPACK.
-dnl
-dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public
-dnl License as published by the Free Software Foundation; either
-dnl version 2.1 of the License, or (at your option) any later version.
-dnl
-dnl This library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public
-dnl License along with this library; if not, write to the Free Software
-dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-dnl ========LICENCE========
-dnl/
-
-
-
-dnl FF_CHECK_CBLAS ([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]])
-dnl Test for C interface to BLAS and define BLAS_LIBS
-
-AC_DEFUN([FF_CHECK_CBLAS],
- [ AC_ARG_WITH(cblas,
- [AC_HELP_STRING([--with-cblas=<lib>], [Use BLAS library. This library is mandatory for FFLAS-FFPACK
- compilation. If argument is <empty> that means
- the library is reachable with the standard search path
- (/usr or /usr/local). Otherwise you give the <path> to
- the directory which contains the library. ])
- ])
-
- BLAS_HOME_PATH="$with_cblas ${DEFAULT_CHECKING_PATH}"
-
- dnl Check for existence
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- AC_MSG_CHECKING(for C interface to BLAS with -lcblas)
-
- dnl **************************************
- dnl Check first for C interface to BLAS
- dnl **************************************
-
-
- for BLAS_HOME in ${BLAS_HOME_PATH} ; do
- dnl echo looking in ${BLAS_HOME}
- CBLAS="yes"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
- ATLAS_LIBS="-lcblas"
- AS_IF(
- dnl obscure
- [ test -r "/System/Library/Frameworks/Accelerate.framework" ],
- [BLAS_LIBS="-Wl,-framework -Wl,Accelerate"],
- dnl lib/libcblas.* ?
- [ test -r "$BLAS_HOME/lib/libcblas.a" -o -r "$BLAS_HOME/lib/libcblas.so" ],
- [ ATLAS_NEEDED=`nm -u $BLAS_HOME/lib/libcblas.a | grep ATL`
- ATLAS_NEEDED2=`nm -Du $BLAS_HOME/lib/libcblas.so | grep ATL`
- AS_IF( [test -n "$ATLAS_NEEDED" -o -n "$ATLAS_NEEDED2"],
- [ATLAS_LIBS=" ${ATLAS_LIBS} -latlas"])
-
- BLAS_LIBS=" ${ATLAS_LIBS}"
- BLAS_PATH="${BLAS_HOME}/lib"
-
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME}/lib ${ATLAS_LIBS}"])
- ],
- dnl libcblas.* ?
- [ test -r "$BLAS_HOME/libcblas.a" -o -r "$BLAS_HOME/libcblas.so" ],
- [ ATLAS_NEEDED=`nm -u $BLAS_HOME/libcblas.a | grep ATL`
- ATLAS_NEEDED2=`nm -Du $BLAS_HOME/libcblas.so | grep ATL`
- AS_IF( [test -n "$ATLAS_NEEDED" -o -n "$ATLAS_NEEDED2"],
- [ATLAS_LIBS=" ${ATLAS_LIBS} -latlas"])
-
- BLAS_LIBS=" ${ATLAS_LIBS}"
- BLAS_PATH="${BLAS_HOME}"
-
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME} ${ATLAS_LIBS}"])
- ]
- )
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- dnl echo $LIBS
-
- AC_TRY_LINK(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"],
- [double a;],
- [
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
- ],[
- blas_found="yes"
- break
- ],[
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
- ],[
- blas_found="yes"
- blas_cross="yes"
- break
- ]
- )
- ],
- [
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS
- ]
- )
- done
-
-
-
- AS_IF([ test "x$blas_found" = "xyes" ],
- BLAS_VENDOR="ATLAS"
- AC_SUBST(BLAS_VENDOR)
-
- [ AC_SUBST(BLAS_LIBS)
- AC_SUBST(BLAS_PATH)
- AC_SUBST(CBLAS_FLAG)
- AC_DEFINE(HAVE_BLAS,1,[Define if BLAS is installed])
- AC_DEFINE(HAVE_CBLAS,1,[Define if C interface to BLAS is available])
- dnl AC_DEFINE(BLAS_AVAILABLE,,[Define if BLAS routines are available])
- HAVE_BLAS=yes
- BLAS_FOUND=true
- AC_SUBST(BLAS_FOUND)
-
- AS_IF([ test "x$blas_cross" != "xyes" ],
- [AC_MSG_RESULT(found)],
- [AC_MSG_RESULT(unknown)
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
- ])
-
- ifelse([$2], , :, [$2])
- ],
- [ test -n "$blas_problem" ],
- [ AC_MSG_RESULT(not working) ],
- [ test "x$blas_found" = "xno" ],
- [ AC_MSG_RESULT(not found)]
- )
- AS_IF([ test "x$blas_found" != "xyes" ],
- [
- AC_MSG_CHECKING(for C interface to BLAS with -lblas)
-
- dnl **************************************
- dnl Check first for C interface to BLAS
- dnl **************************************
-
-
- for BLAS_HOME in ${BLAS_HOME_PATH} ; do
- dnl echo looking in ${BLAS_HOME}
- CBLAS="yes"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
- ATLAS_LIBS="-lblas"
- AS_IF(
- dnl lib/libblas.* ?
- [ test -r "$BLAS_HOME/lib/libblas.a" -o -r "$BLAS_HOME/lib/libblas.so" ],
- [
- dnl CBLAS_SYM=`nm -Du $BLAS_HOME/lib/libblas.so | grep cblas_'
- BLAS_LIBS=" ${ATLAS_LIBS}"
- BLAS_PATH="${BLAS_HOME}/lib"
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME}/lib ${ATLAS_LIBS}"])
- ],
- dnl libblas.* ?
- [ test -r "$BLAS_HOME/libblas.a" -o -r "$BLAS_HOME/libblas.so" ],
- [ BLAS_LIBS=" ${ATLAS_LIBS}"
- BLAS_PATH="${BLAS_HOME}"
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME} ${ATLAS_LIBS}"])
- ])
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- AC_TRY_LINK(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"],
- [double a;],
- [
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
-#include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
- ],[
- blas_found="yes"
- break
- ],[
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
- ],[
- blas_found="yes"
- blas_cross="yes"
- break
- ]
- )
- ],
- [
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS
- ]
- )
- done ;
-
- AS_IF([ test "x$blas_found" = "xyes" ],
- [ BLAS_VENDOR="OTHER"
- AC_SUBST(BLAS_VENDOR)
- AC_SUBST(BLAS_LIBS)
- AC_SUBST(BLAS_PATH)
- AC_SUBST(CBLAS_FLAG)
- AC_DEFINE(HAVE_BLAS,1,[Define if BLAS is installed])
- AC_DEFINE(HAVE_CBLAS,1,[Define if C interface to BLAS is available])
- dnl AC_DEFINE(BLAS_AVAILABLE,,[Define if BLAS routines are available])
- HAVE_BLAS=yes
- BLAS_FOUND=true
- AC_SUBST(BLAS_FOUND)
- AS_IF([ test "x$blas_cross" != "xyes" ],
- [AC_MSG_RESULT(found)],
- [AC_MSG_RESULT(unknown)
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
- ])
- ifelse([$2], , :, [$2])
- ],
- [ test -n "$blas_problem" ],
- [ AC_MSG_RESULT(not working) ],
- dnl echo "Sorry, your BLAS are not working. Disabling."
- [ test "x$blas_found" = "xno" ],
- [ AC_MSG_RESULT(not found)]
- )
- ])
-
-
- AM_CONDITIONAL(FFLASFFPACK_HAVE_BLAS, test "x$HAVE_BLAS" = "xyes")
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
- dnl unset LD_LIBRARY_PATH
-
-
-])
diff --git a/macros/blasGOTO-check.m4 b/macros/blasGOTO-check.m4
deleted file mode 100644
index 86c2472..0000000
--- a/macros/blasGOTO-check.m4
+++ /dev/null
@@ -1,145 +0,0 @@
-dnl Check for BLAS
-dnl Copyright 2011 Brice Boyer <bboyer at imag.fr>
-dnl
-dnl ========LICENCE========
-dnl This file is part of the library FFLAS-FFPACK.
-dnl
-dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public
-dnl License as published by the Free Software Foundation; either
-dnl version 2.1 of the License, or (at your option) any later version.
-dnl
-dnl This library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public
-dnl License along with this library; if not, write to the Free Software
-dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-dnl ========LICENCE========
-dnl/
-
-
-
-
-dnl FF_CHECK_BLAS ([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]])
-dnl
-dnl Test for BLAS and define BLAS_LIBS
-
-AC_DEFUN([FF_CHECK_GOTOBLAS],
- [
- AC_ARG_WITH(gotoblas2,
- [AC_HELP_STRING([--with-gotoblas2=<path|yes>],
- [Use GOTO2 blas library. BLAS are mandatory for FFLAS-FFPACK
- compilation. If argument is <yes> that means
- the library is reachable with the standard search path
- (/usr or /usr/local). Otherwise you give the <path> to
- the directory which contains the library. If empty, GOTO2 are not searched for. ])
- ])
- dnl echo $with_gotoblas2
- dnl echo $withval
-
- AS_IF([ test -n "$with_gotoblas2" ],
- [ BLAS_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- AS_IF([ test "$with_gotoblas2" != "yes" ],
- [ BLAS_HOME_PATH="$with_gotoblas2 ${DEFAULT_CHECKING_PATH}" ])
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- AC_MSG_CHECKING(for C interface to BLAS with -lgoto2)
-
-
- for BLAS_HOME in ${BLAS_HOME_PATH} ; do
- dnl remove last '/'
- dnl BLAS_HOME=`echo $BLAS_HOME | sed 's/\(.*\)\/$/\1/'`
- CBLAS="yes"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
-
- AS_IF([ test -r "$BLAS_HOME/lib/libgoto2.a" -o -r "$BLAS_HOME/lib/libgoto2.so" ],
- [BLAS_LIBS="-lgoto2 -pthread"
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME}/lib -Wl,-R,${BLAS_HOME}/lib ${BLAS_LIBS}"])
- ],
- [test -r "$BLAS_HOME/libgoto2.a" -o -r "$BLAS_HOME/libgoto2.so" ],
- [ BLAS_LIBS="-lgoto2 -pthread"
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME} -Wl,-R,${BLAS_HOME}/lib ${BLAS_LIBS}"])
- ])
-
- AS_CASE(["x$CCNAM"],
- ["xgcc"],[BLAS_LIBS="${BLAS_LIBS} -lgfortran"],
- ["xicc"],[BLAS_LIBS="${BLAS_LIBS} -lifcore"])
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- AC_TRY_LINK(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"],
- [double a;],
- [
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main ()
- { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- } ],
- [ blas_found="yes"
- BLAS_PATH=${BLAS_HOME}
- break ],
- [ blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS ],
- [ blas_found="yes"
- blas_cross="yes"
- BLAS_PATH=${BLAS_HOME}
- break ])
- ],
- [
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS ]
- )
- done
-
- AS_IF([ test "x$blas_found" = "xyes" ],[
- BLAS_VENDOR="GOTO2"
- AC_SUBST(BLAS_VENDOR)
- AC_SUBST(BLAS_LIBS)
- AC_SUBST(CBLAS_FLAG)
- AC_SUBST(BLAS_PATH)
- AC_DEFINE(HAVE_BLAS,1,[Define if BLAS is installed])
- AC_DEFINE(HAVE_CBLAS,1,[Define if C interface to BLAS is available])
- dnl AC_DEFINE(BLAS_AVAILABLE,,[Define if BLAS routines are available])
- BLAS_FOUND=true
- AC_SUBST(BLAS_FOUND)
- HAVE_BLAS=yes
- AS_IF([ test "x$blas_cross" != "xyes" ], [
- AC_MSG_RESULT(found)],
- [AC_MSG_RESULT(unknown)
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
- ])
- ifelse([$2], , :, [$2])
- ],
- [ test -n "$blas_problem" ],
- [ AC_MSG_RESULT(not working) ],
- dnl echo "Sorry, your BLAS are not working. Disabling."
- [test "x$blas_found" = "xno" ],
- [AC_MSG_RESULT(not found)]
- )
-
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
- ])
-
-])
-
-
diff --git a/macros/blasGSL-check.m4 b/macros/blasGSL-check.m4
deleted file mode 100644
index 4d6c494..0000000
--- a/macros/blasGSL-check.m4
+++ /dev/null
@@ -1,141 +0,0 @@
-dnl Check for BLAS
-dnl Copyright 2011 Brice Boyer <bboyer at imag.fr>
-dnl This file is part of FFLAS-FFPACK (and comes from LinBox)
-dnl
-dnl ========LICENCE========
-dnl This file is part of the library FFLAS-FFPACK.
-dnl
-dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public
-dnl License as published by the Free Software Foundation; either
-dnl version 2.1 of the License, or (at your option) any later version.
-dnl
-dnl This library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public
-dnl License along with this library; if not, write to the Free Software
-dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-dnl ========LICENCE========
-dnl/
-
-
-
-
-dnl FF_CHECK_BLAS ([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]])
-dnl
-dnl Test for BLAS and define BLAS_LIBS
-
-AC_DEFUN([FF_CHECK_GSL],
- [
- AC_ARG_WITH(gsl,
- [AC_HELP_STRING([--with-gsl=<path|yes>],
- [Use GSL blas library. BLAS are mandatory for FFLAS-FFPACK
- compilation. If argument is <yes> that means
- the library is reachable with the standard search path
- (/usr or /usr/local). Otherwise you give the <path> to
- the directory which contains the library. If empty, GSL is not
- searched for. ])
- ])
- dnl echo $with_gsl
- dnl echo $withval
-
- AS_IF([ test -n "$with_gsl" ],
- [ BLAS_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- AS_IF([ test "$with_gsl" != "yes" ],
- [ BLAS_HOME_PATH="$with_gsl ${DEFAULT_CHECKING_PATH}" ])
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- AC_MSG_CHECKING(for C interface to BLAS with -lgsl -lgslcblas)
-
-
- for BLAS_HOME in ${BLAS_HOME_PATH} ; do
- CBLAS="yes"
- CBLAS_FLAG="-D__FFLASFFPACK_HAVE_CBLAS"
-
- AS_IF([ test -r "$BLAS_HOME/lib/libgsl.a" -o -r "$BLAS_HOME/lib/libgsl.so" ],
- [BLAS_LIBS="-lgsl -lgslcblas -lm"
- BLAS_PATH="${BLAS_HOME}/lib"
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME}/lib ${BLAS_LIBS}"])
- ],
- [test -r "$BLAS_HOME/libgsl.a" -o -r "$BLAS_HOME/libgsl.so" ],
- [ BLAS_LIBS="-lgsl -lgslcblas -lm"
- BLAS_PATH="${BLAS_HOME}"
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME} ${BLAS_LIBS}"])
- ])
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- AC_TRY_LINK(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"],
- [double a;],
- [
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main ()
- { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- } ],
- [ blas_found="yes"
- break ],
- [ blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS ],
- [ blas_found="yes"
- blas_cross="yes"
- break ])
- ],
- [
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS ]
- )
- done
-
- AS_IF([ test "x$blas_found" = "xyes" ],[
- BLAS_VENDOR="GSL"
- AC_SUBST(BLAS_VENDOR)
- AC_SUBST(BLAS_LIBS)
- AC_SUBST(CBLAS_FLAG)
- AC_SUBST(BLAS_PATH)
- AC_DEFINE(HAVE_BLAS,1,[Define if BLAS is installed])
- AC_DEFINE(HAVE_CBLAS,1,[Define if C interface to BLAS is available])
- dnl AC_DEFINE(BLAS_AVAILABLE,,[Define if BLAS routines are available])
- BLAS_FOUND=true
- AC_SUBST(BLAS_FOUND)
- HAVE_BLAS=yes
- AS_IF([ test "x$blas_cross" != "xyes" ], [
- AC_MSG_RESULT(found)],
- [AC_MSG_RESULT(unknown)
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."
- ])
- ifelse([$2], , :, [$2])
- ],
- [ test -n "$blas_problem" ],
- [ AC_MSG_RESULT(not working) ],
- dnl echo "Sorry, your BLAS are not working. Disabling."
- [test "x$blas_found" = "xno" ],
- [AC_MSG_RESULT(not found)]
- )
-
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
- ])
-
-])
-
-
diff --git a/macros/blasOTHER-check.m4 b/macros/blasOTHER-check.m4
deleted file mode 100644
index bf5d973..0000000
--- a/macros/blasOTHER-check.m4
+++ /dev/null
@@ -1,165 +0,0 @@
-dnl Check for BLAS
-dnl Copyright Pascal Giorgi 2005
-dnl Modified Brice Boyer 2011
-dnl
-dnl ========LICENCE========
-dnl This file is part of the library FFLAS-FFPACK.
-dnl
-dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public
-dnl License as published by the Free Software Foundation; either
-dnl version 2.1 of the License, or (at your option) any later version.
-dnl
-dnl This library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public
-dnl License along with this library; if not, write to the Free Software
-dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-dnl ========LICENCE========
-dnl/
-
-
-
-dnl **********************************
-dnl * TODO *
-dnl **********************************
-dnl no support yet to MKL
-dnl AS_IF([test -r "$BLAS_VAL/include/mkl_cblas.h"],
-dnl [ BLAS_LIBS="-L${BLAS_VAL}/lib/${MKL_ARCH}/ -lmkl_lapack64 -lmkl -lvml -lguide" ])
-dnl **********************************
-
-
-
-dnl FF_CHECK_BLAS ([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]])
-dnl
-dnl Test for BLAS and define BLAS_LIBS
-
-AC_DEFUN([FF_CHECK_OTHERBLAS],
- [ AC_ARG_WITH(otherblas,
- [AC_HELP_STRING([--with-otherblas=<lib>],
- [Use BLAS library. This library is mandatory for FFLAS-FFPACK
- compilation. If argument is <empty> that means
- the library is reachable with the standard search path
- (/usr or /usr/local). Otherwise you give the <path> to
- the directory which contains the library.
- ])
- ])
-
- BLAS_HOME_PATH="$with_otherblas ${DEFAULT_CHECKING_PATH}"
-
- dnl Check for existence
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
- dnl *****************************************************************
- dnl Check if other BLAS are available (only if C BLAS are not available)
- dnl *****************************************************************
-
- AC_MSG_CHECKING(for other BLAS)
-
- dnl check in default path
- for BLAS_HOME in ${BLAS_HOME_PATH}; do
- CBLAS="no"
- CBLAS_FLAG=""
- BLAS_LIBS=""
-
- dnl checking for libblas.*
-
- AS_IF(
- [test -r "$BLAS_HOME/lib/libblas.a" -o -r "$BLAS_HOME/lib/libblas.so" ],
- [BLAS_LIBS="-lblas"
- BLAS_PATH="${BLAS_HOME}/lib"
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME}/lib -lblas"])
- ],
- [test -r "$BLAS_HOME/libblas.a" -o -r "$BLAS_HOME/libblas.so" ],
- [ BLAS_LIBS="-lblas"
- BLAS_PATH="${BLAS_HOME}"
- AS_IF([ test "x$BLAS_HOME" != "x/usr" -a "x$BLAS_HOME" != "x/usr/local"],
- [BLAS_LIBS="-L${BLAS_HOME} -lblas"])
- ]
- )
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG}"
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
- AC_TRY_LINK(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"],
- [double a;],
- [
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.}; double b[4]= {4.,3.,2.,1.}; double c[4];
- cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,2,2,2,1., a,2,b,2,0.,c,2);
- if ( (c[0]!=8.) && (c[1]!=5.) && (c[2]!=20.) && (c[3]!=13))
- return -1;
- else
- return 0;
- }
- ],[
- blas_found="yes"
- break
- ],[
- blas_problem="$problem $BLAS_HOME"
- unset BLAS_LIBS
- ],[
- blas_found="yes"
- blas_cross="yes"
- break
- ])
- ],
- [
- blas_found="no"
- blas_checked="$checked $BLAS_HOME"
- unset BLAS_LIBS
- ])
- done
-
-
- AS_IF([ test "x$blas_found" = "xyes" ],
- [ BLAS_VENDOR="OTHER"
- AC_SUBST(BLAS_VENDOR)
- AC_SUBST(BLAS_LIBS)
- AC_SUBST(CBLAS_FLAG)
- AC_SUBST(BLAS_PATH)
- AC_DEFINE(HAVE_BLAS,1,[Define if BLAS is installed])
- BLAS_FOUND=true
- AC_SUBST(BLAS_FOUND)
- dnl AC_DEFINE(BLAS_AVAILABLE,,[Define if BLAS routines are available])
- HAVE_BLAS=yes
- AS_IF([test "x$blas_cross" != "xyes"],
- [ AC_MSG_RESULT(found) ] ,
- [AC_MSG_RESULT(unknown)
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your BLAS are good. I am assuming it is."])
- ifelse([$2], , :, [$2])
- ],
- [test -n "$blas_problem" ],
- [ AC_MSG_RESULT(problem)
- echo "Sorry, your BLAS are not working. Disabling."
- ifelse([$3], , :, [$3])
- ],
- [ test "x$blas_found" = "xno" ],
- [ AC_MSG_RESULT(not found)
- ifelse([$3], , :, [$3])
- ])
-
-
-
-
- AM_CONDITIONAL(FFLASFFPACK_HAVE_BLAS, test "x$HAVE_BLAS" = "xyes")
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
- dnl unset LD_LIBRARY_PATH
-
-
-])
-
-
diff --git a/macros/cuda-check.m4 b/macros/cuda-check.m4
new file mode 100644
index 0000000..7fc7609
--- /dev/null
+++ b/macros/cuda-check.m4
@@ -0,0 +1,123 @@
+dnl Check for CUDA
+dnl Copyright(c)'1994-2009,2003,2013 by The Givaro group
+dnl This file is part of FFLAS-FFPACK
+
+dnl ========LICENCE========
+dnl This file is part of the library FFLAS-FFPACK.
+dnl
+dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+dnl ========LICENCE========
+dnl/
+
+
+dnl Modified by Pascal Giorgi, 2003-12-03
+dnl Modified by BB, 2013-5-22 and other times
+
+dnl Test for CUDA
+dnl Sets CUDA_CFLAGS and CUDA_LIBS
+dnl Defines HAVE_CUDA
+
+AC_DEFUN([FF_CHECK_CUDA], [
+
+ AC_ARG_WITH(cuda,
+ [AC_HELP_STRING([--with-cuda=<path>|yes|no],[
+ Use CUDA library.
+ If argument is no, you do not have the library installed on your machine.
+ If argument is yes or <empty> that means the library is reachable with the standard
+ search path "/usr" or "/usr/local" (set as default).
+ Otherwise you give the <path> to the directory which contain the library.
+ ])],
+ [if test "$withval" = yes ; then
+ CUDA_HOME_PATH="${DEFAULT_CHECKING_PATH}"
+ elif test "$withval" != no ; then
+ CUDA_HOME_PATH="$withval ${DEFAULT_CHECKING_PATH}"
+ fi],
+ [CUDA_HOME_PATH="${DEFAULT_CHECKING_PATH}"])
+
+ min_cuda_version=ifelse([$1], ,5.5.0,$1)
+
+ dnl Check for existence
+
+BACKUP_CXXFLAGS=${CXXFLAGS}
+BACKUP_LIBS=${LIBS}
+
+AC_MSG_CHECKING(for CUDA >= $min_cuda_version )
+
+dnl todo lib (32) and lib64.
+CUDA_PATH=
+for CUDA_HOME in ${CUDA_HOME_PATH}
+do
+ if test "x$CUDA_HOME" != "x/usr" -a "x$CUDA_HOME" != "x/usr/local"; then
+ if test -r "$CUDA_HOME/include/cuda.h" ; then
+ CUDA_CFLAGS="-I${CUDA_HOME}/include"
+ CUDA_PATH="-L${CUDA_HOME}/lib64"
+ CUDA_LIBS="-L${CUDA_HOME}/lib64 -lcusparse"
+ else
+ echo "($CUDA_HOME) seems an invalid CUDA prefix"
+ echo "Searching CUDA in PATH"
+ CUDA_CFLAGS=""
+ CUDA_LIBS="-lcusparse"
+ fi
+ else
+ CUDA_CFLAGS=""
+ CUDA_LIBS="-lcusparse"
+ fi
+
+ CXXFLAGS="${CXXFLAGS} ${CUDA_CFLAGS}"
+ LIBS="${LIBS} ${CUDA_LIBS}"
+ CODE_CUDA=`cat macros/CodeChunk/cuda.C`
+
+ AC_TRY_LINK(
+ [
+ #include <cuda.h>
+ ],
+ [ CUresult a;],
+ [
+ dnl # See if we are running CUDA 4.0 with --enable-cxx
+ AC_TRY_RUN(
+ [ ${CODE_CUDA} ],
+ [
+ AC_MSG_RESULT(found)
+ AC_DEFINE(HAVE_CUDA,1,[Define if CUDA is installed])
+
+ dnl CUDA_VERSION="" dnl I could find it but why is it here ?
+ CUDA_LIBS="${CUDA_PATH} -lcusparse"
+ dnl AC_SUBST(CUDA_VERSION)
+ AC_SUBST(CUDA_LIBS)
+ AC_SUBST(CUDA_CFLAGS)
+ break;
+ ],[
+ AC_MSG_RESULT(no : cuda is too old or not found)
+ dnl AC_SUBST(CUDA_VERSION)
+ ],[ dnl This should never happen
+ AC_MSG_RESULT(no)
+ ])
+ ],[
+ AC_MSG_RESULT(unknown)
+ echo "WARNING: You appear to be cross compiling, so there is no way to determine"
+ echo "whether your CUDA version is new enough. I am assuming it is."
+ AC_SUBST(CUDA_CFLAGS)
+ AC_SUBST(CUDA_LIBS)
+ AC_DEFINE(HAVE_CUDA,1,[Define if CUDA is installed])
+ ])
+ unset CUDA_CFLAGS
+ unset CUDA_LIBS
+done
+
+CXXFLAGS=${BACKUP_CXXFLAGS}
+LIBS=${BACKUP_LIBS}
+#unset LD_LIBRARY_PATH
+
+])
diff --git a/macros/debug.m4 b/macros/debug.m4
index ee8f8f8..b65adf7 100644
--- a/macros/debug.m4
+++ b/macros/debug.m4
@@ -1,5 +1,5 @@
dnl Copyright(c)'2011 FFLAS-FFPACK
-dnl Written by BB <bboyer at imag.fr>
+dnl Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
dnl
dnl ========LICENCE========
dnl This file is part of the library FFLAS-FFPACK.
@@ -26,11 +26,11 @@ dnl enable basic debug mode.
AC_DEFUN([AC_DEBUG],
[AC_MSG_CHECKING([whether to enable debugging options in the library])
AC_ARG_ENABLE(debug,
-[AC_HELP_STRING([--enable-debug], [enable debugging options in library])],
+[AC_HELP_STRING([--enable-debug=yes|no], [enable debugging options in library])],
USE_DEBUG=$enableval,
USE_DEBUG=no)
AC_MSG_RESULT([$USE_DEBUG])
- AM_CONDITIONAL(DEBUG, [test $USE_DEBUG = yes])
+ AM_CONDITIONAL(DEBUG, [test x$USE_DEBUG = xyes])
DBG=$USE_DEBUG
AC_SUBST(DBG)dnl
]
@@ -39,7 +39,7 @@ AC_DEFUN([AC_DEBUG],
AC_DEFUN([AC_PROFILE],
[AC_MSG_CHECKING([whether to enable profiling everything in the library])
AC_ARG_ENABLE(profile,
-[AC_HELP_STRING([--enable-profile], [enable profiling options in library])],
+[AC_HELP_STRING([--enable-profile=yes|no], [enable profiling options in library])],
USE_PROFILE=$enableval,
USE_PROFILE=no)
AC_MSG_RESULT([$USE_PROFILE])
@@ -83,7 +83,7 @@ AC_DEFUN([AC_COMPILER_NAME], [
AC_SUBST(CCNAM)
])
-dnl PATHSCALE ?
+dnl PATHSCALE > 4 ?
AS_IF([ test -z "${CCNAM}"], [
AC_TRY_RUN( [
#ifdef __PATHSCALE__
@@ -96,7 +96,50 @@ dnl PATHSCALE ?
AC_SUBST(CCNAM) ])
])
-dnl GCC ?
+dnl CLANG > 3.1 ?
+ AS_IF([ test -z "${CCNAM}"], [
+ AC_TRY_RUN( [
+ #ifdef __clang__
+ int main() { return !(__clang_major__ >=3 && __clang_minor__ >=1) ; }
+ #else
+ pas clang non plus.
+ #endif], [
+ AC_MSG_RESULT(clang31)
+ CCNAM=clang31
+ AC_SUBST(CCNAM) ])
+ ])
+
+dnl CLANG > 3 ?
+ AS_IF([ test -z "${CCNAM}"], [
+ AC_TRY_RUN( [
+ #ifdef __clang__
+ int main() { return !(__clang_major__ >=3) ; }
+ #else
+ pas clang non plus.
+ #endif], [
+ AC_MSG_RESULT(clang31)
+ CCNAM=clang
+ AC_SUBST(CCNAM) ])
+ ])
+
+
+dnl GCC >= 4.8 ?
+ AS_IF([ test -z "${CCNAM}"], [
+ AC_TRY_RUN( [
+ #ifdef __GNUC__
+ int main() { return !(__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ > 7 )) ; }
+ #else
+ pas gcc non plus ???
+ #endif], [
+ CCNOM=gcc
+ AS_IF([ test -n "${CC}" ], [CCNOM="`$CC --version 2>&1| awk 'NR<2{print $1}'`"])
+ CCNAM=gcc48
+ AC_SUBST(CCNAM)
+ AC_MSG_RESULT($CCNOM)
+ ])
+ ])
+
+dnl GCC > 4.2 ?
AS_IF([ test -z "${CCNAM}"], [
AC_TRY_RUN( [
#ifdef __GNUC__
@@ -104,9 +147,11 @@ dnl GCC ?
#else
pas gcc non plus ???
#endif], [
- AC_MSG_RESULT(gcc)
+ CCNOM=gcc
+ AS_IF([ test -n "${CC}" ], [CCNOM="`$CC --version 2>&1| awk 'NR<2{print $1}'`"])
CCNAM=gcc
AC_SUBST(CCNAM)
+ AC_MSG_RESULT($CCNOM)
])
])
diff --git a/macros/fflas-ffpack-blas.m4 b/macros/fflas-ffpack-blas.m4
new file mode 100644
index 0000000..656187a
--- /dev/null
+++ b/macros/fflas-ffpack-blas.m4
@@ -0,0 +1,181 @@
+dnl Check for BLAS
+dnl Copyright 2014 Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+dnl This file is part of FFLAS-FFPACK
+dnl
+dnl ========LICENCE========
+dnl This file is part of the library FFLAS-FFPACK.
+dnl
+dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+dnl ========LICENCE========
+dnl/
+
+dnl Tests BLAS for and define CBLAS_FLAG and CBLAS_LIBS
+dnl Defines HAVE_LAPACK, HAVE_CLAPACK, HAVE_BLAS, HAVE_CBLAS if available
+
+AC_DEFUN([FF_CHECK_BLAS_CFLAGS],
+ [ AC_ARG_WITH(blas-cflags,
+ [AC_HELP_STRING([--with-blas-cflags=<cflags>],
+ [ CFLAGS for BLAS/LAPACK (i.e. -I/path/to/toto-blas) ])
+ ])
+ CBLAS_FLAG="$with_blas_cflags -D__FFLASFFPACK_HAVE_CBLAS"
+ AC_SUBST(CBLAS_FLAG)
+ dnl echo $CBLAS_FLAG;
+ ]
+ )
+
+dnl
+AC_DEFUN([FF_CHECK_BLAS_LIBS],
+ [ AC_ARG_WITH(blas-libs,
+ [AC_HELP_STRING([--with-blas-libs=<libs>],
+ [ LIBS for BLAS/LAPACK (i.e. -L/path/to/toto-blas -ltoto-blas) ])
+ ])
+ CBLAS_LIBS="$with_blas_libs"
+ AC_SUBST(CBLAS_LIBS)
+ dnl echo $CBLAS_LIBS;
+ ]
+ )
+
+dnl
+AC_DEFUN([FF_CHECK_USER_BLAS],
+ [
+ BACKUP_CXXFLAGS=${CXXFLAGS}
+ BACKUP_LIBS=${LIBS}
+ saved_LD_RUN_PATH="$LD_RUN_PATH"
+ blas_lib_path=`echo $CBLAS_LIBS | $EGREP '\-L' | $SED -e 's/-L//;s/ .*//'`
+ LD_RUN_PATH="${LD_RUN_PATH:+$LD_RUN_PATH$PATH_SEPARATOR}$blas_lib_path"
+ export LD_RUN_PATH
+ CODE_CBLAS=`cat macros/CodeChunk/cblas.C`
+
+ AC_MSG_CHECKING(for USER BLAS)
+
+ CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} -I. -I.. -I`pwd` -I`pwd`/fflas-ffpack ${GIVARO_CFLAGS}"
+ LIBS="${BACKUP_LIBS} ${CBLAS_LIBS}"
+
+ AC_TRY_LINK( [
+#define __FFLASFFPACK_CONFIGURATION
+#include "fflas-ffpack/config-blas.h"],
+ [double a;],
+ [
+ AC_TRY_RUN(
+ [ ${CODE_CBLAS} ],[
+ blas_found="yes"
+ ],[
+ blas_problem="$problem"
+ ],[
+ blas_found="yes"
+ blas_cross="yes"
+ ])
+ ],
+ [
+ blas_found="no"
+ ])
+
+ AS_IF([ test "x$blas_found" = "xyes" ],
+ [
+ BLAS_VENDOR="USER"
+ AC_SUBST(BLAS_VENDOR)
+ dnl AC_SUBST(CBLAS_FLAG)
+ dnl AC_SUBST(BLAS_PATH)
+ AC_DEFINE(HAVE_BLAS,1,[Define if BLAS is installed])
+ AC_DEFINE(HAVE_CBLAS,1,[Define if C interface to BLAS is installed])
+ BLAS_FOUND=true
+ AC_SUBST(BLAS_FOUND)
+ dnl AC_DEFINE(BLAS_AVAILABLE,,[Define if BLAS routines are available])
+ #echo ${CBLAS_FLAG}
+ #echo ${CBLAS_LIBS}
+ HAVE_BLAS=yes
+ AS_IF([test "x$blas_cross" != "xyes"],
+ [ AC_MSG_RESULT(found (cblas)) ] ,
+ [AC_MSG_RESULT(unknown)
+ echo "WARNING: You appear to be cross compiling, so there is no way to determine"
+ echo "whether your BLAS are good. I am assuming it is."])
+ ],
+ [
+ AC_MSG_RESULT(problem)
+ ]
+ )
+
+
+ AM_CONDITIONAL(FFLASFFPACK_HAVE_BLAS, test "x$HAVE_BLAS" = "xyes")
+
+ CXXFLAGS=${BACKUP_CXXFLAGS}
+ LIBS=${BACKUP_LIBS}
+ LD_RUN_PATH="$saved_LD_RUN_PATH"
+ export LD_RUN_PATH
+ unset saved_LD_RUN_PATH
+ dnl unset LD_LIBRARY_PATH
+
+
+ ]
+ )
+
+dnl
+AC_DEFUN([FF_CHECK_USER_LAPACK],
+ [
+ BACKUP_CXXFLAGS=${CXXFLAGS}
+ BACKUP_LIBS=${LIBS}
+
+ CODE_CLAPACK=`cat macros/CodeChunk/clapack.C`
+ CODE_LAPACK=`cat macros/CodeChunk/lapack.C`
+
+ AC_MSG_CHECKING(for USER LAPACK)
+
+ CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} -I. -I.. -I`pwd` -I`pwd`/fflas-ffpack ${GIVARO_CFLAGS}"
+ LIBS="${BACKUP_LIBS} ${CBLAS_LIBS}"
+
+ AC_TRY_RUN(
+ [ ${CODE_CLAPACK} ],
+ [ dgetrf_found="yes" ],
+ [ dgetrf_problem="problem" ],
+ [ dgetrf_found="" ]
+ )
+
+ AS_IF([ test "${dgetrf_found}" = "yes"],
+ [
+ AC_MSG_RESULT( yes (clapack))
+ AC_DEFINE(HAVE_LAPACK,1,[Define if LAPACK is installed])
+ AC_DEFINE(HAVE_CLAPACK,1,[Define if C interface to LAPACK is available])
+ HAVE_LAPACK=yes
+ ],
+ [
+ AC_TRY_RUN(
+ [ ${CODE_LAPACK} ],
+ [ dgetrf_found="yes"],
+ [ dgetrf_problem="$problem"],
+ [ dgetrf_found="" ]
+ )
+ AS_IF([ test "x${dgetrf_found}" = "xyes"],
+ [
+ AC_SUBST(LAPACK_LIBS)
+ AC_MSG_RESULT( yes (lapack))
+ AC_DEFINE(HAVE_LAPACK,1,[Define if LAPACK is installed])
+ HAVE_LAPACK=yes
+ ], dnl clapack not found. looking for lapack
+ [
+ AC_MSG_RESULT( no )
+ ]
+ )
+ ]
+ )
+
+ dnl
+ AM_CONDITIONAL(FFLASFFPACK_HAVE_LAPACK, test "x$HAVE_LAPACK" = "xyes")
+ CXXFLAGS=${BACKUP_CXXFLAGS}
+ LIBS=${BACKUP_LIBS}
+ dnl unset LD_LIBRARY_PATH
+
+ ]
+)
+
diff --git a/macros/fflas-ffpack-doc.m4 b/macros/fflas-ffpack-doc.m4
index d2035ab..c225398 100644
--- a/macros/fflas-ffpack-doc.m4
+++ b/macros/fflas-ffpack-doc.m4
@@ -1,5 +1,5 @@
dnl Copyright(c)'2011 FFLAS-FFPACK
-dnl Written by BB <bboyer at imag.fr>
+dnl Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
dnl
dnl ========LICENCE========
dnl This file is part of the library FFLAS-FFPACK.
diff --git a/macros/fflas-ffpack-misc.m4 b/macros/fflas-ffpack-misc.m4
index 5f5e304..c04836d 100644
--- a/macros/fflas-ffpack-misc.m4
+++ b/macros/fflas-ffpack-misc.m4
@@ -3,7 +3,7 @@ dnl Copyright (c) fflas-ffpack
dnl This file comes from LinBox' linbox-misc.m4
dnl
dnl Copyright(c)'2011 FFLAS-FFPACK
-dnl Written by BB <bboyer at imag.fr>
+dnl Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
dnl
dnl ========LICENCE========
dnl This file is part of the library FFLAS-FFPACK.
diff --git a/macros/fflas-ffpack-opt.m4 b/macros/fflas-ffpack-opt.m4
index e31198b..bb54037 100644
--- a/macros/fflas-ffpack-opt.m4
+++ b/macros/fflas-ffpack-opt.m4
@@ -48,19 +48,72 @@ AC_MSG_RESULT(yes)
BACKUP_CXXFLAGS=${CXXFLAGS}
BACKUP_LIBS=${LIBS}
-echo " *** OPTIMISATIONS *** "
+echo " *** OPTIMIZATION *** "
AC_MSG_CHECKING([best threshold for Strassen-Winograd matrix multiplication])
AC_MSG_RESULT([see below])
-CXXFLAGS_ALL="${BACKUP_CXXFLAGS} -I. -I.. -I`pwd` -I`pwd`/fflas-ffpack ${BLAS_CFLAGS} ${CBLAS_FLAG}"
-LIBS="${BACKUP_LIBS} ${BLAS_LIBS} "
+CXXFLAGS_ALL="-I. -I.. -I`pwd` -I`pwd`/fflas-ffpack ${BACKUP_CXXFLAGS} ${AVXFLAGS} ${DEFAULT_CFLAGS} ${GIVARO_CFLAGS} ${CBLAS_FLAG} ${OMPFLAGS}"
+LIBS="${BACKUP_LIBS} ${CBLAS_LIBS} ${GIVARO_LIBS}"
WINO=`cat optimiser/winograd.C`
+ADDFLAGS="-DOPTIMISATION_MODE"
+saved_LD_RUN_PATH="$LD_RUN_PATH"
+LD_RUN_PATH="${LD_RUN_PATH:+$LD_RUN_PATH$PATH_SEPARATOR}$givaro_lib_path"
+export LD_RUN_PATH
+dnl for Wino threshold for double
+echo " == Wino/BLAS threshold for Givaro::Modular<double> == "
+CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=Givaro::Modular<double> ${ADDFLAGS}"
+AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[
+ dnl remove last line
+ dnl sed -i '$d' fflas-ffpack/fflas-ffpack-optimise.h ;
+ dnl -i does not work on BSD sed
+ sed '$d' fflas-ffpack/fflas-ffpack-optimise.h > fflas-ffpack/fflas-ffpack-optimise.back.h ;
+ mv fflas-ffpack/fflas-ffpack-optimise.back.h fflas-ffpack/fflas-ffpack-optimise.h ;
+ dnl append new definition
+ cat WinoThreshold >> fflas-ffpack/fflas-ffpack-optimise.h ;
+ dnl close the file
+ echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h
+ dnl echo done : `cat WinoThreshold`
+ WINOT=`cat WinoThreshold | awk 'NR==2' | awk '{print $ 3}'`
+ dnl cleaning service !
+ rm WinoThreshold ;
+ AC_MSG_RESULT(done (${WINOT}))
+ ],[
+ AC_MSG_RESULT(problem)
+ break
+ ],[
+ AC_MSG_RESULT(cross compilation)
+ break
+ ])
+dnl for WinoThreshold for float
+echo " == Wino/BLAS threshold for Givaro::Modular<float> == "
+CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=Givaro::Modular<float> ${ADDFLAGS}"
+AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[
+ dnl remove last line
+ dnl sed -i '$ d' fflas-ffpack/fflas-ffpack-optimise.h ;
+ sed '$d' fflas-ffpack/fflas-ffpack-optimise.h > fflas-ffpack/fflas-ffpack-optimise.back.h ;
+ mv fflas-ffpack/fflas-ffpack-optimise.back.h fflas-ffpack/fflas-ffpack-optimise.h ;
+ dnl append new definition
+ cat WinoThreshold >> fflas-ffpack/fflas-ffpack-optimise.h ;
+ dnl close the file
+ echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h
+ dnl echo done : `cat WinoThreshold`
+ WINOT=`cat WinoThreshold | awk 'NR==2' | awk '{print $ 3}'`
+ dnl cleaning service !
+ rm WinoThreshold ;
+ AC_MSG_RESULT(done (${WINOT}))
+ ],[
+ AC_MSG_RESULT(problem)
+ break
+ ],[
+ AC_MSG_RESULT(cross compilation)
+ break
+ ])
dnl for Wino threshold for double
-CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=double"
-echo " == Wino/BLAS threshold for double == "
+echo " == Wino/BLAS threshold for Givaro::ModularBalanced<double> == "
+CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=Givaro::ModularBalanced<double> ${ADDFLAGS}"
AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[
dnl remove last line
dnl sed -i '$d' fflas-ffpack/fflas-ffpack-optimise.h ;
@@ -72,9 +125,10 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[
dnl close the file
echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h
dnl cleaning service !
+ WINOT=`cat WinoThreshold | awk 'NR==2' | awk '{print $ 3}'`
dnl echo done : `cat WinoThreshold`
rm WinoThreshold ;
- AC_MSG_RESULT(done)
+ AC_MSG_RESULT(done (${WINOT}))
],[
AC_MSG_RESULT(problem)
break
@@ -84,8 +138,8 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[
])
dnl for WinoThreshold for float
-echo " == Wino/BLAS threshold for float == "
-CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=float"
+echo " == Wino/BLAS threshold for Givaro::ModularBalanced<float> == "
+CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=Givaro::ModularBalanced<float> ${ADDFLAGS}"
AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[
dnl remove last line
dnl sed -i '$ d' fflas-ffpack/fflas-ffpack-optimise.h ;
@@ -96,9 +150,10 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[
dnl close the file
echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h
dnl echo done : `cat WinoThreshold`
+ WINOT=`cat WinoThreshold | awk 'NR==2' | awk '{print $ 3}'`
dnl cleaning service !
rm WinoThreshold ;
- AC_MSG_RESULT(done)
+ AC_MSG_RESULT(done (${WINOT}))
],[
AC_MSG_RESULT(problem)
break
@@ -106,9 +161,10 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[
AC_MSG_RESULT(cross compilation)
break
])
-
+LD_RUN_PATH="$saved_LD_RUN_PATH"
+unset givaro_lib_path
],
-[AC_MSG_RESULT(no optimisation)]
+[AC_MSG_RESULT(no optimization)]
)
])
diff --git a/macros/aclocal-include.m4 b/macros/fflas-ffpack-precompile.m4
similarity index 55%
copy from macros/aclocal-include.m4
copy to macros/fflas-ffpack-precompile.m4
index 84c496b..51b5f44 100644
--- a/macros/aclocal-include.m4
+++ b/macros/fflas-ffpack-precompile.m4
@@ -1,8 +1,6 @@
-dnl aclocal-include.m4
-dnl Copyright (c) 2011 FFLAS-FFPACK
-dnl written by BB <bboyer at imag.fr>
-dnl adapted from LinBox configuration
-dnl
+dnl Copyright (c) 2012 FFLAS-FFPACK
+dnl Written by Clément Pernet, Brice Boyer.
+dnl This file was taken from LinBox linbox-opt.m4
dnl ========LICENCE========
dnl This file is part of the library FFLAS-FFPACK.
dnl
@@ -22,18 +20,27 @@ dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 U
dnl ========LICENCE========
dnl/
-dnl This macro adds the name macrodir to the set of directories
-dnl that `aclocal' searches for macros.
-dnl serial 1
-dnl AM_ACLOCAL_INCLUDE(macrodir)
-AC_DEFUN([AM_ACLOCAL_INCLUDE],
+
+
+
+AC_DEFUN([FF_PRECOMPILE],
[
- AM_CONDITIONAL(INSIDE_GNOME_COMMON, test x = y)
- test -n "$ACLOCAL_FLAGS" && ACLOCAL="$ACLOCAL $ACLOCAL_FLAGS"
+AC_MSG_CHECKING([whether to compile the standard specializations])
- for k in $1 ; do ACLOCAL="$ACLOCAL -I $k" ; done
+AC_ARG_ENABLE(precompilation,
+[AC_HELP_STRING([--enable-precompilation], [ Enable precompilation of the standard specializations])])
+AM_CONDITIONAL(FFLASFFPACK_PRECOMPILED, test "x$enable_precompilation" == "xyes")
+AS_IF([test "x$enable_precompilation" == "xyes"],
+ [
+ AC_MSG_RESULT(yes)
+ PRECOMPILE_FLAGS="-DFFLAS_COMPILED -DFFPACK_COMPILED"
+ PRECOMPILE_LIBS="-L${libdir} -lfflas -lffpack"
+ AC_SUBST(PRECOMPILE_FLAGS)
+ AC_SUBST(PRECOMPILE_LIBS)
+ ],
+ [AC_MSG_RESULT(no)]
+ )
])
-
diff --git a/macros/givaro-check.m4 b/macros/givaro-check.m4
index 6725383..3e688ff 100644
--- a/macros/givaro-check.m4
+++ b/macros/givaro-check.m4
@@ -1,13 +1,7 @@
dnl Check for GIVARO
-dnl Bradford Hovinen, 2001-06-13
-dnl Modified by Pascal Giorgi, 2003-12-03
-dnl Inspired by gnome-bonobo-check.m4 by Miguel de Icaza, 99-04-12
-dnl Stolen from Chris Lahey 99-2-5
-dnl stolen from Manish Singh again
-dnl stolen back from Frank Belew
-dnl stolen from Manish Singh
-dnl Shamelessly stolen from Owen Taylor
-dnl Copyright (c) 2011 FFLAS-FFPACK
+dnl Copyright (c) the Givaro group
+dnl This file is part of FFLAS-FFPACK
+
dnl ========LICENCE========
dnl This file is part of the library FFLAS-FFPACK.
dnl
@@ -29,11 +23,14 @@ dnl/
-dnl LB_CHECK_GIVARO ([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]])
+dnl adapted from LinBox by BB.
+
+dnl FF_CHECK_GIVARO ([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]])
dnl
-dnl Test for Givaro and define GIVARO_CFLAGS and GIVARO_LIBS
+dnl Tests for Givaro and define GIVARO_CFLAGS and GIVARO_LIBS
+dnl Defines HAVE_GIVARO
-AC_DEFUN([LB_CHECK_GIVARO],
+AC_DEFUN([FF_CHECK_GIVARO],
[
AC_ARG_WITH(givaro,
@@ -55,13 +52,15 @@ dnl -------------- dnl
dnl GIVARO VERSION dnl
dnl -------------- dnl
-version_min=30700
-version_max=30800
+dnl As we need Integer and Modular, should be updated on each interface changes
+version_min=40001
+version_max=40002
dnl Check for existence
BACKUP_CXXFLAGS=${CXXFLAGS}
BACKUP_LIBS=${LIBS}
+saved_LD_RUN_PATH="$LD_RUN_PATH"
AC_MSG_CHECKING(for GIVARO >= $version_min and < $version_max)
@@ -69,23 +68,21 @@ for GIVARO_HOME in ${GIVARO_HOME_PATH}
do
if test -r "$GIVARO_HOME/include/givaro/givconfig.h"; then
- if test "x$GIVARO_HOME" != "x/usr" -a "x$GIVARO_HOME" != "x/usr/local"; then
- GIVARO_CFLAGS="-I${GIVARO_HOME}/include"
- GIVARO_LIBS="-L${GIVARO_HOME}/lib -lgivaro"
- else
- GIVARO_CFLAGS=
- GIVARO_LIBS="-lgivaro"
- fi
- CXXFLAGS="${BACKUP_CXXFLAGS} ${GIVARO_CFLAGS} ${GMP_CFLAGS}"
- LIBS="${BACKUP_LIBS} ${GIVARO_LIBS} ${GMP_LIBS}"
-
+ # Givaro Libs + CFlags contain GMP info - AB 2014-12-12
+ GIVARO_LIBS=`$GIVARO_HOME/bin/givaro-config --libs`
+ GIVARO_CFLAGS=`$GIVARO_HOME/bin/givaro-config --cflags`
+ givaro_lib_path=`$GIVARO_HOME/bin/givaro-config --prefix`/lib
+ CXXFLAGS="${BACKUP_CXXFLAGS} ${GIVARO_CFLAGS}"
+ LIBS="${BACKUP_LIBS} ${GIVARO_LIBS}"
+ LD_RUN_PATH="${LD_RUN_PATH:+$LD_RUN_PATH$PATH_SEPARATOR}$givaro_lib_path"
+ export LD_RUN_PATH
AC_TRY_LINK(
[#include <givaro/givinteger.h>],
[Givaro::Integer a;],
[
AC_TRY_RUN(
[#include <givaro/givconfig.h>
- int main () { if (GIVARO_VERSION < $version_min || GIVARO_VERSION >= $version_max || GIVARO_VERSION>0x030000) return -1; else return 0; /* old version of Givaro are defined as hexa 0x03yyzz*/ }
+ int main () { if (GIVARO_VERSION >= $version_min && GIVARO_VERSION < $version_max) return 0; else return -1; /* old version of Givaro are defined as hexa 0x03yyzz*/ }
],[
givaro_found="yes"
break
@@ -101,10 +98,11 @@ if test -r "$GIVARO_HOME/include/givaro/givconfig.h"; then
])
],
[
- givaro_found="no"
+ givaro_found="yes"
givaro_checked="$checked $GIVARO_HOME"
- unset GIVARO_CFLAGS
- unset GIVARO_LIBS
+#unset GIVARO_CFLAGS
+#unset GIVARO_LIBS
+ break
])
else
@@ -115,8 +113,10 @@ done
if test "x$givaro_found" = "xyes" ; then
AC_SUBST(GIVARO_CFLAGS)
AC_SUBST(GIVARO_LIBS)
+ dnl echo $GIVARO_CFLAGS $GIVARO_LIBS
AC_DEFINE(HAVE_GIVARO,1,[Define if GIVARO is installed])
HAVE_GIVARO=yes
+
if test "x$givaro_cross" != "xyes"; then
AC_MSG_RESULT(found)
else
@@ -124,6 +124,7 @@ if test "x$givaro_found" = "xyes" ; then
echo "WARNING: You appear to be cross compiling, so there is no way to determine"
echo "whether your GIVARO version is new enough. I am assuming it is."
fi
+
ifelse([$2], , :, [$2])
elif test -n "$givaro_problem"; then
AC_MSG_RESULT(problem)
@@ -134,10 +135,13 @@ elif test "x$givaro_found" = "xno" ; then
ifelse([$3], , :, [$3])
fi
-AM_CONDITIONAL(LINBOX_HAVE_GIVARO, test "x$HAVE_GIVARO" = "xyes")
+AM_CONDITIONAL(FFLASFFPACK_HAVE_GIVARO, test "x$HAVE_GIVARO" = "xyes")
CXXFLAGS=${BACKUP_CXXFLAGS}
LIBS=${BACKUP_LIBS}
+LD_RUN_PATH="$saved_LD_RUN_PATH"
+export LD_RUN_PATH
+unset saved_LD_RUN_PATH
#unset LD_LIBRARY_PATH
])
diff --git a/macros/gmp-check.m4 b/macros/gmp-check.m4
deleted file mode 100644
index ef521c9..0000000
--- a/macros/gmp-check.m4
+++ /dev/null
@@ -1,155 +0,0 @@
-dnl Check for GMP
-dnl Copyright (c) 2011 FFLAS-FFPACK
-dnl Modified by Pascal Giorgi, 2003-12-03
-dnl ========LICENCE========
-dnl This file is part of the library FFLAS-FFPACK.
-dnl
-dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public
-dnl License as published by the Free Software Foundation; either
-dnl version 2.1 of the License, or (at your option) any later version.
-dnl
-dnl This library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public
-dnl License along with this library; if not, write to the Free Software
-dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-dnl ========LICENCE========
-dnl/
-
-
-dnl LB_CHECK_GMP ([MINIMUM-VERSION [, ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]]])
-dnl
-dnl Test for the GNU Multiprecision library and define GMP_CFLAGS and GMP_LIBS
-
-AC_DEFUN([LB_CHECK_GMP],
-[
-AC_ARG_WITH(gmp,
-[AC_HELP_STRING([--with-gmp= <path>|yes], [Use GMP library. This library is mandatory for LinBox
- compilation. If argument is yes or <empty> that means
- the library is reachable with the standard search path
- "/usr" or "/usr/local" (set as default). Otherwise you
- give the <path> to the directory which contain the
- library.
-])],
- [if test "$withval" = yes ; then
- GMP_HOME_PATH="${DEFAULT_CHECKING_PATH}"
- elif test "$withval" != no ; then
- GMP_HOME_PATH="$withval ${DEFAULT_CHECKING_PATH}"
- fi],
- [GMP_HOME_PATH="${DEFAULT_CHECKING_PATH}"])
-
-min_gmp_version=ifelse([$1], ,3.1.1,$1)
-
-dnl Check for existence
-BACKUP_CXXFLAGS=${CXXFLAGS}
-BACKUP_LIBS=${LIBS}
-
-AC_MSG_CHECKING(for GMP >= $min_gmp_version)
-
-for GMP_HOME in ${GMP_HOME_PATH}
- do
- if test -r "$GMP_HOME/include/gmp.h"; then
-
- if test "x$GMP_HOME" != "x/usr" -a "x$GMP_HOME" != "x/usr/local"; then
- GMP_CFLAGS="-I${GMP_HOME}/include"
- GMP_LIBS="-L${GMP_HOME}/lib -lgmpxx -lgmp"
- else
- GMP_CFLAGS=
- GMP_LIBS="-lgmpxx -lgmp"
- fi
-
- CXXFLAGS="${CXXFLAGS} ${GMP_CFLAGS}"
- LIBS="${LIBS} ${GMP_LIBS}"
-
- AC_TRY_LINK(
- [#include <gmp.h>],
- [mpz_t a; mpz_init (a);],
- [
- AC_TRY_RUN(
- [#include <gmp.h>
- int main () { if (__GNU_MP_VERSION < 3) return -1; else return 0; }
- ],[
- AC_MSG_RESULT(found)
- AC_SUBST(GMP_CFLAGS)
- AC_SUBST(GMP_LIBS)
- AC_DEFINE(HAVE_GMP,1,[Define if GMP is installed])
- # See if we are running GMP 4.0
- AC_MSG_CHECKING(whether GMP is 4.0 or greater)
- AC_TRY_RUN(
- [#include <gmp.h>
- int main () { if (__GNU_MP_VERSION < 4) return -1; else return 0; }
- ],[
- gmp_found="yes"
- AC_MSG_RESULT(yes)
- # See if GMP was compiled with --enable-cxx
- AC_MSG_CHECKING(whether GMP was compiled with --enable-cxx)
- AC_TRY_RUN(
- [#include <gmpxx.h>
- int main () { mpz_class a(2),b(3),c(5); if ( a+b == c ) return 0; else return -1; }
- ],[
- AC_MSG_RESULT(yes)
- GMP_VERSION=""
- AC_SUBST(GMP_VERSION)
- ],[
- gmp_found="no"
- AC_MSG_RESULT(no)
- ],[
- dnl This should never happen
- AC_MSG_RESULT(no)
- ])
- ],[
- AC_MSG_RESULT(no)
- AC_DEFINE(GMP_VERSION_3,1,[Define if GMP is version 3.xxx])
- GMP_VERSION="-DGMP_VERSION_3"
- AC_SUBST(GMP_VERSION)
- ],[
- dnl This should never happen
- AC_MSG_RESULT(no)
- ])
- ifelse([$2], , :, [$2])
- break
- ],[
- gmp_problem="$gmp_problem $GMP_HOME"
- unset GMP_CFLAGS
- unset GMP_LIBS
- ],[
- AC_MSG_RESULT(unknown)
- echo "WARNING: You appear to be cross compiling, so there is no way to determine"
- echo "whether your GMP version is new enough. I am assuming it is."
- AC_SUBST(GMP_CFLAGS)
- AC_SUBST(GMP_LIBS)
- AC_DEFINE(HAVE_GMP,1,[Define if GMP is installed])
- ifelse([$2], , :, [$2])
- break
- ])
- ],[
- gmp_found="no"
- unset GMP_CFLAGS
- unset GMP_LIBS
- ])
-
- else
- gmp_found="no"
- fi
-done
-
-if test "x$gmp_found" != "xyes"; then
- if test -n "$gmp_problem"; then
- AC_MSG_RESULT(problem)
- echo "Sorry, your GMP version is too old. Disabling."
- elif test "x$gmp_found" != "xno"; then
- AC_MSG_RESULT(not found)
- fi
- ifelse($3, , :, $3)
-fi
-
-
-CXXFLAGS=${BACKUP_CXXFLAGS}
-LIBS=${BACKUP_LIBS}
-#unset LD_LIBRARY_PATH
-
-])
diff --git a/macros/lapack-check.m4 b/macros/lapack-check.m4
deleted file mode 100644
index e094478..0000000
--- a/macros/lapack-check.m4
+++ /dev/null
@@ -1,318 +0,0 @@
-dnl Check for LAPACK
-dnl Copyright 2011 Brice Boyer <bboyer at imag.fr>
-dnl ========LICENCE========
-dnl This file is part of the library FFLAS-FFPACK.
-dnl
-dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public
-dnl License as published by the Free Software Foundation; either
-dnl version 2.1 of the License, or (at your option) any later version.
-dnl
-dnl This library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public
-dnl License along with this library; if not, write to the Free Software
-dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-dnl ========LICENCE========
-dnl/
-
-
-dnl **********************************
-dnl * TODO *
-dnl **********************************
-dnl no support yet to MKL
-dnl AS_IF([test -r "$BLAS_VAL/include/mkl_cblas.h"],
-dnl [ BLAS_LIBS="-L${BLAS_VAL}/lib/${MKL_ARCH}/ -lmkl_lapack64 -lmkl -lvml -lguide" ])
-dnl **********************************
-
-AC_DEFUN([FF_CHECK_LAPACK], [
-
- BACKUP_CXXFLAGS=${CXXFLAGS}
- BACKUP_LIBS=${LIBS}
-
-
- AC_ARG_WITH(lapack,
- [AC_HELP_STRING([--with-lapack=<blas|path>],
- [Use LAPACK functions. This library is mandatory for LinBox
- compilation. If argument is <empty> that means
- the library is reachable with the standard search path
- (/usr or /usr/local). Or, you can give the <path> to
- the directory which contains the library. If the argument
- is 'blas', then we look in the BLAS vendor library.
- We look for a C interface (clapack_), and if not present,
- look for standard functions (as dgetrf_). First one available
- in order in '$path /usr /usr/local', first chosen, even if it is
- not clapack_ (example: clapack_ in /usr but dgetrf_ in $path : dgetrf_ chosen,
- $path not even looked into).
- ])
- ])
-
-
- AC_MSG_CHECKING(for LAPACK)
-
-
- AS_IF([ test "$with_lapack" = "blas"], [
- dnl echo "vendor ${BLAS_VENDOR} in ${BLAS_PATH}"
- dnl check for lapack function in vendor lib
- AS_CASE([${BLAS_VENDOR}],
- ["ATLAS"],[
- dnl atlas provides a liblapack next to its libcblas
- LAPACK_LIBS="-llapack"
- dnl why would we need lapack_atlas when llapack is enough ?
- dnl could llapack not provide the symbols ?
- AS_IF([test -r "${BLAS_PATH}/liblapack_atlas.a" -o -r "${BLAS_PATH}/liblapack_atlas.so"],
- [LAPACK_LIBS="${LAPACK_LIBS} -llapack_atlas"])
- dnl AS_IF([ test "x$BLAS_PATH" != "x/usr/lib" -a "x$BLAS_PATH" != "x/usr/local/lib"],
- dnl [LAPACK_LIBS="-L${BLAS_PATH} ${BLAS_LIBS}"])
-
- ],
- dnl GSL provides no lapack ! why would you use GSL ?
- ["GSL"],
- [LAPACK_LIBS=""],
- dnl lapack is in libgoto2
- ["GOTO2"],
- [LAPACK_LIBS=""],
- dnl maybe lapack is in libblas ?
- ["OTHER"],
- [LAPACK_LIBS=""],
- dnl defaulting somewhere...
- [LAPACK_LIBS=""])
-
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS} ${LAPACK_LIBS}"
-
- dnl echo ${LAPACK_LIBS}
-
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- #define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- } ],
- [ dgetrf_found="yes" ],
- [ dgetrf_problem="problem" ],
- [ dgetrf_found="" ])
-
- AS_IF( [test "${dgetrf_found}" = "yes"],
- [ AC_SUBST(LAPACK_LIBS)
- AC_MSG_RESULT( yes (clapack))
- AC_DEFINE(HAVE_LAPACK,1,[Define if LAPACK is installed])
- AC_DEFINE(HAVE_CLAPACK,1,[Define if C interface to LAPACK is available])
- ],
- [dnl not found : trying only lapack
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS} ${LAPACK_LIBS}"
-
-
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- //#define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- } ],
- [ dgetrf_found="yes" ],
- [ dgetrf_problem="problem" ],
- [ dgetrf_found="" ])
- AS_IF([test "${dgetrf_found}" = "yes"],
- [ AC_SUBST(LAPACK_LIBS)
- AC_MSG_RESULT( yes (lapack))
- AC_DEFINE(HAVE_LAPACK,1,[Define if LAPACK is installed])
- ],
- [ AC_MSG_RESULT(no) ])
- ])
- ],[ dnl not BLAS vendor asked, so looking in DEFAULT_CHECKING_PATH
- dnl echo "path"
-
- AS_IF([test "x$BLAS_VENDOR" = "xUSER"], [ dnl this is temporary -- because the user supplies everything in --with-blas.
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS}"
-
-
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- #define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- } ],
- [ dgetrf_found="yes"
- dnl echo "yes"
- ],
- [ dgetrf_problem="problem"
- dnl echo "no"
- ],
- [ ])
-
- AS_IF([ test "${dgetrf_found}" = "yes"],
- [ AC_SUBST(LAPACK_LIBS)
- AC_MSG_RESULT( yes (clapack))
- AC_DEFINE(HAVE_LAPACK,1,[Define if LAPACK is installed])
- AC_DEFINE(HAVE_CLAPACK,1,[Define if C interface to LAPACK is available])
- ], dnl clapack not found. looking for lapack
- [
-
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- //#define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- } ],
- [ dgetrf_found="yes"
- ],
- [ dgetrf_problem="$problem"
- ],
- [ ])
-
- AS_IF([ test "x${dgetrf_found}" = "xyes"],
- [ AC_SUBST(LAPACK_LIBS)
- AC_MSG_RESULT( yes (lapack))
- AC_DEFINE(HAVE_LAPACK,1,[Define if LAPACK is installed])
- ], dnl clapack not found. looking for lapack
- [
- AC_MSG_RESULT( no )
- ])
- ])
-
- ],[
-
- LAPACK_HOME_PATH="$with_lapack ${DEFAULT_CHECKING_PATH}"
- for LAPACK_HOME in ${LAPACK_HOME_PATH} ; do
- dnl echo "in ${LAPACK_HOME} for clapack"
- AS_IF(
- [test -r "$LAPACK_HOME/lib/liblapack.a" -o -r "$LAPACK_HOME/lib/liblapack.so" ],
- [LAPACK_LIBS="-llapack"
- LAPACK_PATH="${LAPACK_HOME}/lib"
- AS_IF([ test "x$LAPACK_HOME" != "x/usr" -a "x$LAPACK_HOME" != "x/usr/local"],
- [LAPACK_LIBS="-L${LAPACK_HOME}/lib -llapack"])
- ],
- [test -r "$LAPACK_HOME/liblapack.a" -o -r "$LAPACK_HOME/liblapack.so" ],
- [ LAPACK_LIBS="-llapack"
- LAPACK_PATH="${LAPACK_HOME}"
- AS_IF([ test "x$LAPACK_HOME" != "x/usr" -a "x$LAPACK_HOME" != "x/usr/local"],
- [LAPACK_LIBS="-L${LAPACK_HOME} -llapack"])
- ]
- )
- dnl echo "lapack libs : $LAPACK_LIBS"
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS} ${LAPACK_LIBS}"
-
-
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- #define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- } ],
- [ dgetrf_found="yes"
- dnl echo "yes"
- break ],
- [ dgetrf_problem="problem"
- unset LAPACK_LIBS
- dnl echo "no" ],
- [ break ])
- done ;
- AS_IF([ test "${dgetrf_found}" = "yes"],
- [ AC_SUBST(LAPACK_LIBS)
- AC_MSG_RESULT( yes (clapack))
- AC_DEFINE(HAVE_LAPACK,1,[Define if LAPACK is installed])
- AC_DEFINE(HAVE_CLAPACK,1,[Define if C interface to LAPACK is available])
- ], dnl clapack not found. looking for lapack
- [
- for LAPACK_HOME in ${LAPACK_HOME_PATH} ; do
- dnl echo "in ${LAPACK_HOME}"
- AS_IF(
- [test -r "$LAPACK_HOME/lib/liblapack.a" -o -r "$LAPACK_HOME/lib/liblapack.so" ],
- [LAPACK_LIBS="-llapack"
- LAPACK_PATH="${LAPACK_HOME}/lib"
- AS_IF([ test "x$LAPACK_HOME" != "x/usr" -a "x$LAPACK_HOME" != "x/usr/local"],
- [LAPACK_LIBS="-L${LAPACK_HOME}/lib -llapack"])
- ],
- [test -r "$LAPACK_HOME/liblapack.a" -o -r "$LAPACK_HOME/liblapack.so" ],
- [ LAPACK_LIBS="-llapack"
- LAPACK_PATH="${LAPACK_HOME}"
- AS_IF([ test "x$LAPACK_HOME" != "x/usr" -a "x$LAPACK_HOME" != "x/usr/local"],
- [LAPACK_LIBS="-L${LAPACK_HOME} -llapack"])
- ]
- )
- CXXFLAGS="${BACKUP_CXXFLAGS} ${CBLAS_FLAG} "
- LIBS="${BACKUP_LIBS} ${BLAS_LIBS} ${LAPACK_LIBS}"
-
-
- AC_TRY_RUN(
- [#define __FFLASFFPACK_CONFIGURATION
- #define __FFLASFFPACK_HAVE_LAPACK 1
- //#define __FFLASFFPACK_HAVE_CLAPACK 1
- #include "fflas-ffpack/config-blas.h"
- int main () { double a[4] = {1.,2.,3.,4.};
- int ipiv[2];
- clapack_dgetrf(CblasRowMajor, 2, 2, a, 2, ipiv);
- if ( (a[0]!=2.) && (a[1]!=0.5) && (a[2]!=4.) && (a[3]!=1.))
- return -1;
- else
- return 0;
- } ],
- [ dgetrf_found="yes"
- break ],
- [ dgetrf_problem="$problem"
- unset LAPACK_LIBS
- ],
- [ break ])
- done ;
- AS_IF([ test "${dgetrf_found}" = "yes"],
- [ AC_SUBST(LAPACK_LIBS)
- AC_MSG_RESULT( yes (lapack))
- AC_DEFINE(HAVE_LAPACK,1,[Define if LAPACK is installed])
- ], dnl clapack not found. looking for lapack
- [
- AC_MSG_RESULT( no )
- ])
- ])
- ])
- ])
-
- dnl AM_CONDITIONAL(FFLASFFPACK_HAVE_LAPACK, test "x$HAVE_LAPACK" = "xyes")
-
- CXXFLAGS=${BACKUP_CXXFLAGS}
- LIBS=${BACKUP_LIBS}
- dnl unset LD_LIBRARY_PATH
-
-
- ])
-
diff --git a/macros/libtool.m4 b/macros/libtool.m4
deleted file mode 100644
index bc28ccc..0000000
--- a/macros/libtool.m4
+++ /dev/null
@@ -1,7995 +0,0 @@
-# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
-#
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# Written by Gordon Matzigkeit, 1996
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-m4_define([_LT_COPYING], [dnl
-# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# Written by Gordon Matzigkeit, 1996
-#
-# This file is part of GNU Libtool.
-#
-# GNU Libtool is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2 of
-# the License, or (at your option) any later version.
-#
-# As a special exception to the GNU General Public License,
-# if you distribute this file as part of a program or library that
-# is built using GNU Libtool, you may include this file under the
-# same distribution terms that you use for the rest of that program.
-#
-# GNU Libtool is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Libtool; see the file COPYING. If not, a copy
-# can be downloaded from http://www.gnu.org/licenses/gpl.html, or
-# obtained by writing to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-])
-
-# serial 57 LT_INIT
-
-
-# LT_PREREQ(VERSION)
-# ------------------
-# Complain and exit if this libtool version is less that VERSION.
-m4_defun([LT_PREREQ],
-[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1,
- [m4_default([$3],
- [m4_fatal([Libtool version $1 or higher is required],
- 63)])],
- [$2])])
-
-
-# _LT_CHECK_BUILDDIR
-# ------------------
-# Complain if the absolute build directory name contains unusual characters
-m4_defun([_LT_CHECK_BUILDDIR],
-[case `pwd` in
- *\ * | *\ *)
- AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;;
-esac
-])
-
-
-# LT_INIT([OPTIONS])
-# ------------------
-AC_DEFUN([LT_INIT],
-[AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT
-AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
-AC_BEFORE([$0], [LT_LANG])dnl
-AC_BEFORE([$0], [LT_OUTPUT])dnl
-AC_BEFORE([$0], [LTDL_INIT])dnl
-m4_require([_LT_CHECK_BUILDDIR])dnl
-
-dnl Autoconf doesn't catch unexpanded LT_ macros by default:
-m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl
-m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl
-dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4
-dnl unless we require an AC_DEFUNed macro:
-AC_REQUIRE([LTOPTIONS_VERSION])dnl
-AC_REQUIRE([LTSUGAR_VERSION])dnl
-AC_REQUIRE([LTVERSION_VERSION])dnl
-AC_REQUIRE([LTOBSOLETE_VERSION])dnl
-m4_require([_LT_PROG_LTMAIN])dnl
-
-_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}])
-
-dnl Parse OPTIONS
-_LT_SET_OPTIONS([$0], [$1])
-
-# This can be used to rebuild libtool when needed
-LIBTOOL_DEPS="$ltmain"
-
-# Always use our own libtool.
-LIBTOOL='$(SHELL) $(top_builddir)/libtool'
-AC_SUBST(LIBTOOL)dnl
-
-_LT_SETUP
-
-# Only expand once:
-m4_define([LT_INIT])
-])# LT_INIT
-
-# Old names:
-AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT])
-AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_PROG_LIBTOOL], [])
-dnl AC_DEFUN([AM_PROG_LIBTOOL], [])
-
-
-# _LT_CC_BASENAME(CC)
-# -------------------
-# Calculate cc_basename. Skip known compiler wrappers and cross-prefix.
-m4_defun([_LT_CC_BASENAME],
-[for cc_temp in $1""; do
- case $cc_temp in
- compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;;
- distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;;
- \-*) ;;
- *) break;;
- esac
-done
-cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
-])
-
-
-# _LT_FILEUTILS_DEFAULTS
-# ----------------------
-# It is okay to use these file commands and assume they have been set
-# sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'.
-m4_defun([_LT_FILEUTILS_DEFAULTS],
-[: ${CP="cp -f"}
-: ${MV="mv -f"}
-: ${RM="rm -f"}
-])# _LT_FILEUTILS_DEFAULTS
-
-
-# _LT_SETUP
-# ---------
-m4_defun([_LT_SETUP],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-AC_REQUIRE([AC_CANONICAL_BUILD])dnl
-AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl
-AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl
-
-_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl
-dnl
-_LT_DECL([], [host_alias], [0], [The host system])dnl
-_LT_DECL([], [host], [0])dnl
-_LT_DECL([], [host_os], [0])dnl
-dnl
-_LT_DECL([], [build_alias], [0], [The build system])dnl
-_LT_DECL([], [build], [0])dnl
-_LT_DECL([], [build_os], [0])dnl
-dnl
-AC_REQUIRE([AC_PROG_CC])dnl
-AC_REQUIRE([LT_PATH_LD])dnl
-AC_REQUIRE([LT_PATH_NM])dnl
-dnl
-AC_REQUIRE([AC_PROG_LN_S])dnl
-test -z "$LN_S" && LN_S="ln -s"
-_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl
-dnl
-AC_REQUIRE([LT_CMD_MAX_LEN])dnl
-_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl
-_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl
-dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_CHECK_SHELL_FEATURES])dnl
-m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl
-m4_require([_LT_CMD_RELOAD])dnl
-m4_require([_LT_CHECK_MAGIC_METHOD])dnl
-m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl
-m4_require([_LT_CMD_OLD_ARCHIVE])dnl
-m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl
-m4_require([_LT_WITH_SYSROOT])dnl
-
-_LT_CONFIG_LIBTOOL_INIT([
-# See if we are running on zsh, and set the options which allow our
-# commands through without removal of \ escapes INIT.
-if test -n "\${ZSH_VERSION+set}" ; then
- setopt NO_GLOB_SUBST
-fi
-])
-if test -n "${ZSH_VERSION+set}" ; then
- setopt NO_GLOB_SUBST
-fi
-
-_LT_CHECK_OBJDIR
-
-m4_require([_LT_TAG_COMPILER])dnl
-
-case $host_os in
-aix3*)
- # AIX sometimes has problems with the GCC collect2 program. For some
- # reason, if we set the COLLECT_NAMES environment variable, the problems
- # vanish in a puff of smoke.
- if test "X${COLLECT_NAMES+set}" != Xset; then
- COLLECT_NAMES=
- export COLLECT_NAMES
- fi
- ;;
-esac
-
-# Global variables:
-ofile=libtool
-can_build_shared=yes
-
-# All known linkers require a `.a' archive for static linking (except MSVC,
-# which needs '.lib').
-libext=a
-
-with_gnu_ld="$lt_cv_prog_gnu_ld"
-
-old_CC="$CC"
-old_CFLAGS="$CFLAGS"
-
-# Set sane defaults for various variables
-test -z "$CC" && CC=cc
-test -z "$LTCC" && LTCC=$CC
-test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
-test -z "$LD" && LD=ld
-test -z "$ac_objext" && ac_objext=o
-
-_LT_CC_BASENAME([$compiler])
-
-# Only perform the check for file, if the check method requires it
-test -z "$MAGIC_CMD" && MAGIC_CMD=file
-case $deplibs_check_method in
-file_magic*)
- if test "$file_magic_cmd" = '$MAGIC_CMD'; then
- _LT_PATH_MAGIC
- fi
- ;;
-esac
-
-# Use C for the default configuration in the libtool script
-LT_SUPPORTED_TAG([CC])
-_LT_LANG_C_CONFIG
-_LT_LANG_DEFAULT_CONFIG
-_LT_CONFIG_COMMANDS
-])# _LT_SETUP
-
-
-# _LT_PREPARE_SED_QUOTE_VARS
-# --------------------------
-# Define a few sed substitution that help us do robust quoting.
-m4_defun([_LT_PREPARE_SED_QUOTE_VARS],
-[# Backslashify metacharacters that are still active within
-# double-quoted strings.
-sed_quote_subst='s/\([["`$\\]]\)/\\\1/g'
-
-# Same as above, but do not quote variable references.
-double_quote_subst='s/\([["`\\]]\)/\\\1/g'
-
-# Sed substitution to delay expansion of an escaped shell variable in a
-# double_quote_subst'ed string.
-delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
-
-# Sed substitution to delay expansion of an escaped single quote.
-delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g'
-
-# Sed substitution to avoid accidental globbing in evaled expressions
-no_glob_subst='s/\*/\\\*/g'
-])
-
-# _LT_PROG_LTMAIN
-# ---------------
-# Note that this code is called both from `configure', and `config.status'
-# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably,
-# `config.status' has no value for ac_aux_dir unless we are using Automake,
-# so we pass a copy along to make sure it has a sensible value anyway.
-m4_defun([_LT_PROG_LTMAIN],
-[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl
-_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir'])
-ltmain="$ac_aux_dir/ltmain.sh"
-])# _LT_PROG_LTMAIN
-
-
-## ------------------------------------- ##
-## Accumulate code for creating libtool. ##
-## ------------------------------------- ##
-
-# So that we can recreate a full libtool script including additional
-# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS
-# in macros and then make a single call at the end using the `libtool'
-# label.
-
-
-# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS])
-# ----------------------------------------
-# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later.
-m4_define([_LT_CONFIG_LIBTOOL_INIT],
-[m4_ifval([$1],
- [m4_append([_LT_OUTPUT_LIBTOOL_INIT],
- [$1
-])])])
-
-# Initialize.
-m4_define([_LT_OUTPUT_LIBTOOL_INIT])
-
-
-# _LT_CONFIG_LIBTOOL([COMMANDS])
-# ------------------------------
-# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later.
-m4_define([_LT_CONFIG_LIBTOOL],
-[m4_ifval([$1],
- [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS],
- [$1
-])])])
-
-# Initialize.
-m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS])
-
-
-# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS])
-# -----------------------------------------------------
-m4_defun([_LT_CONFIG_SAVE_COMMANDS],
-[_LT_CONFIG_LIBTOOL([$1])
-_LT_CONFIG_LIBTOOL_INIT([$2])
-])
-
-
-# _LT_FORMAT_COMMENT([COMMENT])
-# -----------------------------
-# Add leading comment marks to the start of each line, and a trailing
-# full-stop to the whole comment if one is not present already.
-m4_define([_LT_FORMAT_COMMENT],
-[m4_ifval([$1], [
-m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])],
- [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.])
-)])
-
-
-
-## ------------------------ ##
-## FIXME: Eliminate VARNAME ##
-## ------------------------ ##
-
-
-# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?])
-# -------------------------------------------------------------------
-# CONFIGNAME is the name given to the value in the libtool script.
-# VARNAME is the (base) name used in the configure script.
-# VALUE may be 0, 1 or 2 for a computed quote escaped value based on
-# VARNAME. Any other value will be used directly.
-m4_define([_LT_DECL],
-[lt_if_append_uniq([lt_decl_varnames], [$2], [, ],
- [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name],
- [m4_ifval([$1], [$1], [$2])])
- lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3])
- m4_ifval([$4],
- [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])])
- lt_dict_add_subkey([lt_decl_dict], [$2],
- [tagged?], [m4_ifval([$5], [yes], [no])])])
-])
-
-
-# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION])
-# --------------------------------------------------------
-m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])])
-
-
-# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...])
-# ------------------------------------------------
-m4_define([lt_decl_tag_varnames],
-[_lt_decl_filter([tagged?], [yes], $@)])
-
-
-# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..])
-# ---------------------------------------------------------
-m4_define([_lt_decl_filter],
-[m4_case([$#],
- [0], [m4_fatal([$0: too few arguments: $#])],
- [1], [m4_fatal([$0: too few arguments: $#: $1])],
- [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)],
- [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)],
- [lt_dict_filter([lt_decl_dict], $@)])[]dnl
-])
-
-
-# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...])
-# --------------------------------------------------
-m4_define([lt_decl_quote_varnames],
-[_lt_decl_filter([value], [1], $@)])
-
-
-# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...])
-# ---------------------------------------------------
-m4_define([lt_decl_dquote_varnames],
-[_lt_decl_filter([value], [2], $@)])
-
-
-# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...])
-# ---------------------------------------------------
-m4_define([lt_decl_varnames_tagged],
-[m4_assert([$# <= 2])dnl
-_$0(m4_quote(m4_default([$1], [[, ]])),
- m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]),
- m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))])
-m4_define([_lt_decl_varnames_tagged],
-[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])])
-
-
-# lt_decl_all_varnames([SEPARATOR], [VARNAME1...])
-# ------------------------------------------------
-m4_define([lt_decl_all_varnames],
-[_$0(m4_quote(m4_default([$1], [[, ]])),
- m4_if([$2], [],
- m4_quote(lt_decl_varnames),
- m4_quote(m4_shift($@))))[]dnl
-])
-m4_define([_lt_decl_all_varnames],
-[lt_join($@, lt_decl_varnames_tagged([$1],
- lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl
-])
-
-
-# _LT_CONFIG_STATUS_DECLARE([VARNAME])
-# ------------------------------------
-# Quote a variable value, and forward it to `config.status' so that its
-# declaration there will have the same value as in `configure'. VARNAME
-# must have a single quote delimited value for this to work.
-m4_define([_LT_CONFIG_STATUS_DECLARE],
-[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`'])
-
-
-# _LT_CONFIG_STATUS_DECLARATIONS
-# ------------------------------
-# We delimit libtool config variables with single quotes, so when
-# we write them to config.status, we have to be sure to quote all
-# embedded single quotes properly. In configure, this macro expands
-# each variable declared with _LT_DECL (and _LT_TAGDECL) into:
-#
-# <var>='`$ECHO "$<var>" | $SED "$delay_single_quote_subst"`'
-m4_defun([_LT_CONFIG_STATUS_DECLARATIONS],
-[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames),
- [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])])
-
-
-# _LT_LIBTOOL_TAGS
-# ----------------
-# Output comment and list of tags supported by the script
-m4_defun([_LT_LIBTOOL_TAGS],
-[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl
-available_tags="_LT_TAGS"dnl
-])
-
-
-# _LT_LIBTOOL_DECLARE(VARNAME, [TAG])
-# -----------------------------------
-# Extract the dictionary values for VARNAME (optionally with TAG) and
-# expand to a commented shell variable setting:
-#
-# # Some comment about what VAR is for.
-# visible_name=$lt_internal_name
-m4_define([_LT_LIBTOOL_DECLARE],
-[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1],
- [description])))[]dnl
-m4_pushdef([_libtool_name],
- m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl
-m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])),
- [0], [_libtool_name=[$]$1],
- [1], [_libtool_name=$lt_[]$1],
- [2], [_libtool_name=$lt_[]$1],
- [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl
-m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl
-])
-
-
-# _LT_LIBTOOL_CONFIG_VARS
-# -----------------------
-# Produce commented declarations of non-tagged libtool config variables
-# suitable for insertion in the LIBTOOL CONFIG section of the `libtool'
-# script. Tagged libtool config variables (even for the LIBTOOL CONFIG
-# section) are produced by _LT_LIBTOOL_TAG_VARS.
-m4_defun([_LT_LIBTOOL_CONFIG_VARS],
-[m4_foreach([_lt_var],
- m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)),
- [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])])
-
-
-# _LT_LIBTOOL_TAG_VARS(TAG)
-# -------------------------
-m4_define([_LT_LIBTOOL_TAG_VARS],
-[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames),
- [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])])
-
-
-# _LT_TAGVAR(VARNAME, [TAGNAME])
-# ------------------------------
-m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])])
-
-
-# _LT_CONFIG_COMMANDS
-# -------------------
-# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of
-# variables for single and double quote escaping we saved from calls
-# to _LT_DECL, we can put quote escaped variables declarations
-# into `config.status', and then the shell code to quote escape them in
-# for loops in `config.status'. Finally, any additional code accumulated
-# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded.
-m4_defun([_LT_CONFIG_COMMANDS],
-[AC_PROVIDE_IFELSE([LT_OUTPUT],
- dnl If the libtool generation code has been placed in $CONFIG_LT,
- dnl instead of duplicating it all over again into config.status,
- dnl then we will have config.status run $CONFIG_LT later, so it
- dnl needs to know what name is stored there:
- [AC_CONFIG_COMMANDS([libtool],
- [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])],
- dnl If the libtool generation code is destined for config.status,
- dnl expand the accumulated commands and init code now:
- [AC_CONFIG_COMMANDS([libtool],
- [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])])
-])#_LT_CONFIG_COMMANDS
-
-
-# Initialize.
-m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT],
-[
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-
-sed_quote_subst='$sed_quote_subst'
-double_quote_subst='$double_quote_subst'
-delay_variable_subst='$delay_variable_subst'
-_LT_CONFIG_STATUS_DECLARATIONS
-LTCC='$LTCC'
-LTCFLAGS='$LTCFLAGS'
-compiler='$compiler_DEFAULT'
-
-# A function that is used when there is no print builtin or printf.
-func_fallback_echo ()
-{
- eval 'cat <<_LTECHO_EOF
-\$[]1
-_LTECHO_EOF'
-}
-
-# Quote evaled strings.
-for var in lt_decl_all_varnames([[ \
-]], lt_decl_quote_varnames); do
- case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
- *[[\\\\\\\`\\"\\\$]]*)
- eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\""
- ;;
- *)
- eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
- ;;
- esac
-done
-
-# Double-quote double-evaled strings.
-for var in lt_decl_all_varnames([[ \
-]], lt_decl_dquote_varnames); do
- case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
- *[[\\\\\\\`\\"\\\$]]*)
- eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\""
- ;;
- *)
- eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
- ;;
- esac
-done
-
-_LT_OUTPUT_LIBTOOL_INIT
-])
-
-# _LT_GENERATED_FILE_INIT(FILE, [COMMENT])
-# ------------------------------------
-# Generate a child script FILE with all initialization necessary to
-# reuse the environment learned by the parent script, and make the
-# file executable. If COMMENT is supplied, it is inserted after the
-# `#!' sequence but before initialization text begins. After this
-# macro, additional text can be appended to FILE to form the body of
-# the child script. The macro ends with non-zero status if the
-# file could not be fully written (such as if the disk is full).
-m4_ifdef([AS_INIT_GENERATED],
-[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])],
-[m4_defun([_LT_GENERATED_FILE_INIT],
-[m4_require([AS_PREPARE])]dnl
-[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl
-[lt_write_fail=0
-cat >$1 <<_ASEOF || lt_write_fail=1
-#! $SHELL
-# Generated by $as_me.
-$2
-SHELL=\${CONFIG_SHELL-$SHELL}
-export SHELL
-_ASEOF
-cat >>$1 <<\_ASEOF || lt_write_fail=1
-AS_SHELL_SANITIZE
-_AS_PREPARE
-exec AS_MESSAGE_FD>&1
-_ASEOF
-test $lt_write_fail = 0 && chmod +x $1[]dnl
-m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT
-
-# LT_OUTPUT
-# ---------
-# This macro allows early generation of the libtool script (before
-# AC_OUTPUT is called), incase it is used in configure for compilation
-# tests.
-AC_DEFUN([LT_OUTPUT],
-[: ${CONFIG_LT=./config.lt}
-AC_MSG_NOTICE([creating $CONFIG_LT])
-_LT_GENERATED_FILE_INIT(["$CONFIG_LT"],
-[# Run this file to recreate a libtool stub with the current configuration.])
-
-cat >>"$CONFIG_LT" <<\_LTEOF
-lt_cl_silent=false
-exec AS_MESSAGE_LOG_FD>>config.log
-{
- echo
- AS_BOX([Running $as_me.])
-} >&AS_MESSAGE_LOG_FD
-
-lt_cl_help="\
-\`$as_me' creates a local libtool stub from the current configuration,
-for use in further configure time tests before the real libtool is
-generated.
-
-Usage: $[0] [[OPTIONS]]
-
- -h, --help print this help, then exit
- -V, --version print version number, then exit
- -q, --quiet do not print progress messages
- -d, --debug don't remove temporary files
-
-Report bugs to <bug-libtool at gnu.org>."
-
-lt_cl_version="\
-m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl
-m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION])
-configured by $[0], generated by m4_PACKAGE_STRING.
-
-Copyright (C) 2011 Free Software Foundation, Inc.
-This config.lt script is free software; the Free Software Foundation
-gives unlimited permision to copy, distribute and modify it."
-
-while test $[#] != 0
-do
- case $[1] in
- --version | --v* | -V )
- echo "$lt_cl_version"; exit 0 ;;
- --help | --h* | -h )
- echo "$lt_cl_help"; exit 0 ;;
- --debug | --d* | -d )
- debug=: ;;
- --quiet | --q* | --silent | --s* | -q )
- lt_cl_silent=: ;;
-
- -*) AC_MSG_ERROR([unrecognized option: $[1]
-Try \`$[0] --help' for more information.]) ;;
-
- *) AC_MSG_ERROR([unrecognized argument: $[1]
-Try \`$[0] --help' for more information.]) ;;
- esac
- shift
-done
-
-if $lt_cl_silent; then
- exec AS_MESSAGE_FD>/dev/null
-fi
-_LTEOF
-
-cat >>"$CONFIG_LT" <<_LTEOF
-_LT_OUTPUT_LIBTOOL_COMMANDS_INIT
-_LTEOF
-
-cat >>"$CONFIG_LT" <<\_LTEOF
-AC_MSG_NOTICE([creating $ofile])
-_LT_OUTPUT_LIBTOOL_COMMANDS
-AS_EXIT(0)
-_LTEOF
-chmod +x "$CONFIG_LT"
-
-# configure is writing to config.log, but config.lt does its own redirection,
-# appending to config.log, which fails on DOS, as config.log is still kept
-# open by configure. Here we exec the FD to /dev/null, effectively closing
-# config.log, so it can be properly (re)opened and appended to by config.lt.
-lt_cl_success=:
-test "$silent" = yes &&
- lt_config_lt_args="$lt_config_lt_args --quiet"
-exec AS_MESSAGE_LOG_FD>/dev/null
-$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false
-exec AS_MESSAGE_LOG_FD>>config.log
-$lt_cl_success || AS_EXIT(1)
-])# LT_OUTPUT
-
-
-# _LT_CONFIG(TAG)
-# ---------------
-# If TAG is the built-in tag, create an initial libtool script with a
-# default configuration from the untagged config vars. Otherwise add code
-# to config.status for appending the configuration named by TAG from the
-# matching tagged config vars.
-m4_defun([_LT_CONFIG],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-_LT_CONFIG_SAVE_COMMANDS([
- m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl
- m4_if(_LT_TAG, [C], [
- # See if we are running on zsh, and set the options which allow our
- # commands through without removal of \ escapes.
- if test -n "${ZSH_VERSION+set}" ; then
- setopt NO_GLOB_SUBST
- fi
-
- cfgfile="${ofile}T"
- trap "$RM \"$cfgfile\"; exit 1" 1 2 15
- $RM "$cfgfile"
-
- cat <<_LT_EOF >> "$cfgfile"
-#! $SHELL
-
-# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
-# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION
-# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
-# NOTE: Changes made to this file will be lost: look at ltmain.sh.
-#
-_LT_COPYING
-_LT_LIBTOOL_TAGS
-
-# ### BEGIN LIBTOOL CONFIG
-_LT_LIBTOOL_CONFIG_VARS
-_LT_LIBTOOL_TAG_VARS
-# ### END LIBTOOL CONFIG
-
-_LT_EOF
-
- case $host_os in
- aix3*)
- cat <<\_LT_EOF >> "$cfgfile"
-# AIX sometimes has problems with the GCC collect2 program. For some
-# reason, if we set the COLLECT_NAMES environment variable, the problems
-# vanish in a puff of smoke.
-if test "X${COLLECT_NAMES+set}" != Xset; then
- COLLECT_NAMES=
- export COLLECT_NAMES
-fi
-_LT_EOF
- ;;
- esac
-
- _LT_PROG_LTMAIN
-
- # We use sed instead of cat because bash on DJGPP gets confused if
- # if finds mixed CR/LF and LF-only lines. Since sed operates in
- # text mode, it properly converts lines to CR/LF. This bash problem
- # is reportedly fixed, but why not run on old versions too?
- sed '$q' "$ltmain" >> "$cfgfile" \
- || (rm -f "$cfgfile"; exit 1)
-
- _LT_PROG_REPLACE_SHELLFNS
-
- mv -f "$cfgfile" "$ofile" ||
- (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
- chmod +x "$ofile"
-],
-[cat <<_LT_EOF >> "$ofile"
-
-dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded
-dnl in a comment (ie after a #).
-# ### BEGIN LIBTOOL TAG CONFIG: $1
-_LT_LIBTOOL_TAG_VARS(_LT_TAG)
-# ### END LIBTOOL TAG CONFIG: $1
-_LT_EOF
-])dnl /m4_if
-],
-[m4_if([$1], [], [
- PACKAGE='$PACKAGE'
- VERSION='$VERSION'
- TIMESTAMP='$TIMESTAMP'
- RM='$RM'
- ofile='$ofile'], [])
-])dnl /_LT_CONFIG_SAVE_COMMANDS
-])# _LT_CONFIG
-
-
-# LT_SUPPORTED_TAG(TAG)
-# ---------------------
-# Trace this macro to discover what tags are supported by the libtool
-# --tag option, using:
-# autoconf --trace 'LT_SUPPORTED_TAG:$1'
-AC_DEFUN([LT_SUPPORTED_TAG], [])
-
-
-# C support is built-in for now
-m4_define([_LT_LANG_C_enabled], [])
-m4_define([_LT_TAGS], [])
-
-
-# LT_LANG(LANG)
-# -------------
-# Enable libtool support for the given language if not already enabled.
-AC_DEFUN([LT_LANG],
-[AC_BEFORE([$0], [LT_OUTPUT])dnl
-m4_case([$1],
- [C], [_LT_LANG(C)],
- [C++], [_LT_LANG(CXX)],
- [Go], [_LT_LANG(GO)],
- [Java], [_LT_LANG(GCJ)],
- [Fortran 77], [_LT_LANG(F77)],
- [Fortran], [_LT_LANG(FC)],
- [Windows Resource], [_LT_LANG(RC)],
- [m4_ifdef([_LT_LANG_]$1[_CONFIG],
- [_LT_LANG($1)],
- [m4_fatal([$0: unsupported language: "$1"])])])dnl
-])# LT_LANG
-
-
-# _LT_LANG(LANGNAME)
-# ------------------
-m4_defun([_LT_LANG],
-[m4_ifdef([_LT_LANG_]$1[_enabled], [],
- [LT_SUPPORTED_TAG([$1])dnl
- m4_append([_LT_TAGS], [$1 ])dnl
- m4_define([_LT_LANG_]$1[_enabled], [])dnl
- _LT_LANG_$1_CONFIG($1)])dnl
-])# _LT_LANG
-
-
-m4_ifndef([AC_PROG_GO], [
-############################################################
-# NOTE: This macro has been submitted for inclusion into #
-# GNU Autoconf as AC_PROG_GO. When it is available in #
-# a released version of Autoconf we should remove this #
-# macro and use it instead. #
-############################################################
-m4_defun([AC_PROG_GO],
-[AC_LANG_PUSH(Go)dnl
-AC_ARG_VAR([GOC], [Go compiler command])dnl
-AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl
-_AC_ARG_VAR_LDFLAGS()dnl
-AC_CHECK_TOOL(GOC, gccgo)
-if test -z "$GOC"; then
- if test -n "$ac_tool_prefix"; then
- AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo])
- fi
-fi
-if test -z "$GOC"; then
- AC_CHECK_PROG(GOC, gccgo, gccgo, false)
-fi
-])#m4_defun
-])#m4_ifndef
-
-
-# _LT_LANG_DEFAULT_CONFIG
-# -----------------------
-m4_defun([_LT_LANG_DEFAULT_CONFIG],
-[AC_PROVIDE_IFELSE([AC_PROG_CXX],
- [LT_LANG(CXX)],
- [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])])
-
-AC_PROVIDE_IFELSE([AC_PROG_F77],
- [LT_LANG(F77)],
- [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])])
-
-AC_PROVIDE_IFELSE([AC_PROG_FC],
- [LT_LANG(FC)],
- [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])])
-
-dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal
-dnl pulling things in needlessly.
-AC_PROVIDE_IFELSE([AC_PROG_GCJ],
- [LT_LANG(GCJ)],
- [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],
- [LT_LANG(GCJ)],
- [AC_PROVIDE_IFELSE([LT_PROG_GCJ],
- [LT_LANG(GCJ)],
- [m4_ifdef([AC_PROG_GCJ],
- [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])])
- m4_ifdef([A][M_PROG_GCJ],
- [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])])
- m4_ifdef([LT_PROG_GCJ],
- [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])])
-
-AC_PROVIDE_IFELSE([AC_PROG_GO],
- [LT_LANG(GO)],
- [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])])
-
-AC_PROVIDE_IFELSE([LT_PROG_RC],
- [LT_LANG(RC)],
- [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])])
-])# _LT_LANG_DEFAULT_CONFIG
-
-# Obsolete macros:
-AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)])
-AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)])
-AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)])
-AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)])
-AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_CXX], [])
-dnl AC_DEFUN([AC_LIBTOOL_F77], [])
-dnl AC_DEFUN([AC_LIBTOOL_FC], [])
-dnl AC_DEFUN([AC_LIBTOOL_GCJ], [])
-dnl AC_DEFUN([AC_LIBTOOL_RC], [])
-
-
-# _LT_TAG_COMPILER
-# ----------------
-m4_defun([_LT_TAG_COMPILER],
-[AC_REQUIRE([AC_PROG_CC])dnl
-
-_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl
-_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl
-_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl
-_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl
-
-# If no C compiler was specified, use CC.
-LTCC=${LTCC-"$CC"}
-
-# If no C compiler flags were specified, use CFLAGS.
-LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
-
-# Allow CC to be a program name with arguments.
-compiler=$CC
-])# _LT_TAG_COMPILER
-
-
-# _LT_COMPILER_BOILERPLATE
-# ------------------------
-# Check for compiler boilerplate output or warnings with
-# the simple compiler test code.
-m4_defun([_LT_COMPILER_BOILERPLATE],
-[m4_require([_LT_DECL_SED])dnl
-ac_outfile=conftest.$ac_objext
-echo "$lt_simple_compile_test_code" >conftest.$ac_ext
-eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
-_lt_compiler_boilerplate=`cat conftest.err`
-$RM conftest*
-])# _LT_COMPILER_BOILERPLATE
-
-
-# _LT_LINKER_BOILERPLATE
-# ----------------------
-# Check for linker boilerplate output or warnings with
-# the simple link test code.
-m4_defun([_LT_LINKER_BOILERPLATE],
-[m4_require([_LT_DECL_SED])dnl
-ac_outfile=conftest.$ac_objext
-echo "$lt_simple_link_test_code" >conftest.$ac_ext
-eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
-_lt_linker_boilerplate=`cat conftest.err`
-$RM -r conftest*
-])# _LT_LINKER_BOILERPLATE
-
-# _LT_REQUIRED_DARWIN_CHECKS
-# -------------------------
-m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
- case $host_os in
- rhapsody* | darwin*)
- AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:])
- AC_CHECK_TOOL([NMEDIT], [nmedit], [:])
- AC_CHECK_TOOL([LIPO], [lipo], [:])
- AC_CHECK_TOOL([OTOOL], [otool], [:])
- AC_CHECK_TOOL([OTOOL64], [otool64], [:])
- _LT_DECL([], [DSYMUTIL], [1],
- [Tool to manipulate archived DWARF debug symbol files on Mac OS X])
- _LT_DECL([], [NMEDIT], [1],
- [Tool to change global to local symbols on Mac OS X])
- _LT_DECL([], [LIPO], [1],
- [Tool to manipulate fat objects and archives on Mac OS X])
- _LT_DECL([], [OTOOL], [1],
- [ldd/readelf like tool for Mach-O binaries on Mac OS X])
- _LT_DECL([], [OTOOL64], [1],
- [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4])
-
- AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod],
- [lt_cv_apple_cc_single_mod=no
- if test -z "${LT_MULTI_MODULE}"; then
- # By default we will add the -single_module flag. You can override
- # by either setting the environment variable LT_MULTI_MODULE
- # non-empty at configure time, or by adding -multi_module to the
- # link flags.
- rm -rf libconftest.dylib*
- echo "int foo(void){return 1;}" > conftest.c
- echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
--dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD
- $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
- -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
- _lt_result=$?
- # If there is a non-empty error log, and "single_module"
- # appears in it, assume the flag caused a linker warning
- if test -s conftest.err && $GREP single_module conftest.err; then
- cat conftest.err >&AS_MESSAGE_LOG_FD
- # Otherwise, if the output was created with a 0 exit code from
- # the compiler, it worked.
- elif test -f libconftest.dylib && test $_lt_result -eq 0; then
- lt_cv_apple_cc_single_mod=yes
- else
- cat conftest.err >&AS_MESSAGE_LOG_FD
- fi
- rm -rf libconftest.dylib*
- rm -f conftest.*
- fi])
-
- AC_CACHE_CHECK([for -exported_symbols_list linker flag],
- [lt_cv_ld_exported_symbols_list],
- [lt_cv_ld_exported_symbols_list=no
- save_LDFLAGS=$LDFLAGS
- echo "_main" > conftest.sym
- LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym"
- AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])],
- [lt_cv_ld_exported_symbols_list=yes],
- [lt_cv_ld_exported_symbols_list=no])
- LDFLAGS="$save_LDFLAGS"
- ])
-
- AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load],
- [lt_cv_ld_force_load=no
- cat > conftest.c << _LT_EOF
-int forced_loaded() { return 2;}
-_LT_EOF
- echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD
- $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD
- echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD
- $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD
- echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD
- $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD
- cat > conftest.c << _LT_EOF
-int main() { return 0;}
-_LT_EOF
- echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD
- $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
- _lt_result=$?
- if test -s conftest.err && $GREP force_load conftest.err; then
- cat conftest.err >&AS_MESSAGE_LOG_FD
- elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
- lt_cv_ld_force_load=yes
- else
- cat conftest.err >&AS_MESSAGE_LOG_FD
- fi
- rm -f conftest.err libconftest.a conftest conftest.c
- rm -rf conftest.dSYM
- ])
- case $host_os in
- rhapsody* | darwin1.[[012]])
- _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;;
- darwin1.*)
- _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
- darwin*) # darwin 5.x on
- # if running on 10.5 or later, the deployment target defaults
- # to the OS version, if on x86, and 10.4, the deployment
- # target defaults to 10.4. Don't you love it?
- case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in
- 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*)
- _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
- 10.[[012]]*)
- _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;;
- 10.*)
- _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
- esac
- ;;
- esac
- if test "$lt_cv_apple_cc_single_mod" = "yes"; then
- _lt_dar_single_mod='$single_module'
- fi
- if test "$lt_cv_ld_exported_symbols_list" = "yes"; then
- _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym'
- else
- _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}'
- fi
- if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then
- _lt_dsymutil='~$DSYMUTIL $lib || :'
- else
- _lt_dsymutil=
- fi
- ;;
- esac
-])
-
-
-# _LT_DARWIN_LINKER_FEATURES([TAG])
-# ---------------------------------
-# Checks for linker and compiler features on darwin
-m4_defun([_LT_DARWIN_LINKER_FEATURES],
-[
- m4_require([_LT_REQUIRED_DARWIN_CHECKS])
- _LT_TAGVAR(archive_cmds_need_lc, $1)=no
- _LT_TAGVAR(hardcode_direct, $1)=no
- _LT_TAGVAR(hardcode_automatic, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
- if test "$lt_cv_ld_force_load" = "yes"; then
- _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
- m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes],
- [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes])
- else
- _LT_TAGVAR(whole_archive_flag_spec, $1)=''
- fi
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined"
- case $cc_basename in
- ifort*) _lt_dar_can_shared=yes ;;
- *) _lt_dar_can_shared=$GCC ;;
- esac
- if test "$_lt_dar_can_shared" = "yes"; then
- output_verbose_link_cmd=func_echo_all
- _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}"
- _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}"
- _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
- _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
- m4_if([$1], [CXX],
-[ if test "$lt_cv_apple_cc_single_mod" != "yes"; then
- _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}"
- _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}"
- fi
-],[])
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
-])
-
-# _LT_SYS_MODULE_PATH_AIX([TAGNAME])
-# ----------------------------------
-# Links a minimal program and checks the executable
-# for the system default hardcoded library path. In most cases,
-# this is /usr/lib:/lib, but when the MPI compilers are used
-# the location of the communication and MPI libs are included too.
-# If we don't find anything, use the default library path according
-# to the aix ld manual.
-# Store the results from the different compilers for each TAGNAME.
-# Allow to override them for all tags through lt_cv_aix_libpath.
-m4_defun([_LT_SYS_MODULE_PATH_AIX],
-[m4_require([_LT_DECL_SED])dnl
-if test "${lt_cv_aix_libpath+set}" = set; then
- aix_libpath=$lt_cv_aix_libpath
-else
- AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])],
- [AC_LINK_IFELSE([AC_LANG_PROGRAM],[
- lt_aix_libpath_sed='[
- /Import File Strings/,/^$/ {
- /^0/ {
- s/^0 *\([^ ]*\) *$/\1/
- p
- }
- }]'
- _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- # Check for a 64-bit object if we didn't find anything.
- if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then
- _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"`
- fi],[])
- if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then
- _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib"
- fi
- ])
- aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])
-fi
-])# _LT_SYS_MODULE_PATH_AIX
-
-
-# _LT_SHELL_INIT(ARG)
-# -------------------
-m4_define([_LT_SHELL_INIT],
-[m4_divert_text([M4SH-INIT], [$1
-])])# _LT_SHELL_INIT
-
-
-
-# _LT_PROG_ECHO_BACKSLASH
-# -----------------------
-# Find how we can fake an echo command that does not interpret backslash.
-# In particular, with Autoconf 2.60 or later we add some code to the start
-# of the generated configure script which will find a shell with a builtin
-# printf (which we can use as an echo command).
-m4_defun([_LT_PROG_ECHO_BACKSLASH],
-[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
-ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
-ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
-
-AC_MSG_CHECKING([how to print strings])
-# Test print first, because it will be a builtin if present.
-if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \
- test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then
- ECHO='print -r --'
-elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then
- ECHO='printf %s\n'
-else
- # Use this function as a fallback that always works.
- func_fallback_echo ()
- {
- eval 'cat <<_LTECHO_EOF
-$[]1
-_LTECHO_EOF'
- }
- ECHO='func_fallback_echo'
-fi
-
-# func_echo_all arg...
-# Invoke $ECHO with all args, space-separated.
-func_echo_all ()
-{
- $ECHO "$*"
-}
-
-case "$ECHO" in
- printf*) AC_MSG_RESULT([printf]) ;;
- print*) AC_MSG_RESULT([print -r]) ;;
- *) AC_MSG_RESULT([cat]) ;;
-esac
-
-m4_ifdef([_AS_DETECT_SUGGESTED],
-[_AS_DETECT_SUGGESTED([
- test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || (
- ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
- ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
- ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
- PATH=/empty FPATH=/empty; export PATH FPATH
- test "X`printf %s $ECHO`" = "X$ECHO" \
- || test "X`print -r -- $ECHO`" = "X$ECHO" )])])
-
-_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts])
-_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes])
-])# _LT_PROG_ECHO_BACKSLASH
-
-
-# _LT_WITH_SYSROOT
-# ----------------
-AC_DEFUN([_LT_WITH_SYSROOT],
-[AC_MSG_CHECKING([for sysroot])
-AC_ARG_WITH([sysroot],
-[ --with-sysroot[=DIR] Search for dependent libraries within DIR
- (or the compiler's sysroot if not specified).],
-[], [with_sysroot=no])
-
-dnl lt_sysroot will always be passed unquoted. We quote it here
-dnl in case the user passed a directory name.
-lt_sysroot=
-case ${with_sysroot} in #(
- yes)
- if test "$GCC" = yes; then
- lt_sysroot=`$CC --print-sysroot 2>/dev/null`
- fi
- ;; #(
- /*)
- lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
- ;; #(
- no|'')
- ;; #(
- *)
- AC_MSG_RESULT([${with_sysroot}])
- AC_MSG_ERROR([The sysroot must be an absolute path.])
- ;;
-esac
-
- AC_MSG_RESULT([${lt_sysroot:-no}])
-_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl
-[dependent libraries, and in which our libraries should be installed.])])
-
-# _LT_ENABLE_LOCK
-# ---------------
-m4_defun([_LT_ENABLE_LOCK],
-[AC_ARG_ENABLE([libtool-lock],
- [AS_HELP_STRING([--disable-libtool-lock],
- [avoid locking (might break parallel builds)])])
-test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
-
-# Some flags need to be propagated to the compiler or linker for good
-# libtool support.
-case $host in
-ia64-*-hpux*)
- # Find out which ABI we are using.
- echo 'int i;' > conftest.$ac_ext
- if AC_TRY_EVAL(ac_compile); then
- case `/usr/bin/file conftest.$ac_objext` in
- *ELF-32*)
- HPUX_IA64_MODE="32"
- ;;
- *ELF-64*)
- HPUX_IA64_MODE="64"
- ;;
- esac
- fi
- rm -rf conftest*
- ;;
-*-*-irix6*)
- # Find out which ABI we are using.
- echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext
- if AC_TRY_EVAL(ac_compile); then
- if test "$lt_cv_prog_gnu_ld" = yes; then
- case `/usr/bin/file conftest.$ac_objext` in
- *32-bit*)
- LD="${LD-ld} -melf32bsmip"
- ;;
- *N32*)
- LD="${LD-ld} -melf32bmipn32"
- ;;
- *64-bit*)
- LD="${LD-ld} -melf64bmip"
- ;;
- esac
- else
- case `/usr/bin/file conftest.$ac_objext` in
- *32-bit*)
- LD="${LD-ld} -32"
- ;;
- *N32*)
- LD="${LD-ld} -n32"
- ;;
- *64-bit*)
- LD="${LD-ld} -64"
- ;;
- esac
- fi
- fi
- rm -rf conftest*
- ;;
-
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
-s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
- # Find out which ABI we are using.
- echo 'int i;' > conftest.$ac_ext
- if AC_TRY_EVAL(ac_compile); then
- case `/usr/bin/file conftest.o` in
- *32-bit*)
- case $host in
- x86_64-*kfreebsd*-gnu)
- LD="${LD-ld} -m elf_i386_fbsd"
- ;;
- x86_64-*linux*)
- LD="${LD-ld} -m elf_i386"
- ;;
- ppc64-*linux*|powerpc64-*linux*)
- LD="${LD-ld} -m elf32ppclinux"
- ;;
- s390x-*linux*)
- LD="${LD-ld} -m elf_s390"
- ;;
- sparc64-*linux*)
- LD="${LD-ld} -m elf32_sparc"
- ;;
- esac
- ;;
- *64-bit*)
- case $host in
- x86_64-*kfreebsd*-gnu)
- LD="${LD-ld} -m elf_x86_64_fbsd"
- ;;
- x86_64-*linux*)
- LD="${LD-ld} -m elf_x86_64"
- ;;
- ppc*-*linux*|powerpc*-*linux*)
- LD="${LD-ld} -m elf64ppc"
- ;;
- s390*-*linux*|s390*-*tpf*)
- LD="${LD-ld} -m elf64_s390"
- ;;
- sparc*-*linux*)
- LD="${LD-ld} -m elf64_sparc"
- ;;
- esac
- ;;
- esac
- fi
- rm -rf conftest*
- ;;
-
-*-*-sco3.2v5*)
- # On SCO OpenServer 5, we need -belf to get full-featured binaries.
- SAVE_CFLAGS="$CFLAGS"
- CFLAGS="$CFLAGS -belf"
- AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
- [AC_LANG_PUSH(C)
- AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
- AC_LANG_POP])
- if test x"$lt_cv_cc_needs_belf" != x"yes"; then
- # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
- CFLAGS="$SAVE_CFLAGS"
- fi
- ;;
-*-*solaris*)
- # Find out which ABI we are using.
- echo 'int i;' > conftest.$ac_ext
- if AC_TRY_EVAL(ac_compile); then
- case `/usr/bin/file conftest.o` in
- *64-bit*)
- case $lt_cv_prog_gnu_ld in
- yes*)
- case $host in
- i?86-*-solaris*)
- LD="${LD-ld} -m elf_x86_64"
- ;;
- sparc*-*-solaris*)
- LD="${LD-ld} -m elf64_sparc"
- ;;
- esac
- # GNU ld 2.21 introduced _sol2 emulations. Use them if available.
- if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
- LD="${LD-ld}_sol2"
- fi
- ;;
- *)
- if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
- LD="${LD-ld} -64"
- fi
- ;;
- esac
- ;;
- esac
- fi
- rm -rf conftest*
- ;;
-esac
-
-need_locks="$enable_libtool_lock"
-])# _LT_ENABLE_LOCK
-
-
-# _LT_PROG_AR
-# -----------
-m4_defun([_LT_PROG_AR],
-[AC_CHECK_TOOLS(AR, [ar], false)
-: ${AR=ar}
-: ${AR_FLAGS=cru}
-_LT_DECL([], [AR], [1], [The archiver])
-_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive])
-
-AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file],
- [lt_cv_ar_at_file=no
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM],
- [echo conftest.$ac_objext > conftest.lst
- lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD'
- AC_TRY_EVAL([lt_ar_try])
- if test "$ac_status" -eq 0; then
- # Ensure the archiver fails upon bogus file names.
- rm -f conftest.$ac_objext libconftest.a
- AC_TRY_EVAL([lt_ar_try])
- if test "$ac_status" -ne 0; then
- lt_cv_ar_at_file=@
- fi
- fi
- rm -f conftest.* libconftest.a
- ])
- ])
-
-if test "x$lt_cv_ar_at_file" = xno; then
- archiver_list_spec=
-else
- archiver_list_spec=$lt_cv_ar_at_file
-fi
-_LT_DECL([], [archiver_list_spec], [1],
- [How to feed a file listing to the archiver])
-])# _LT_PROG_AR
-
-
-# _LT_CMD_OLD_ARCHIVE
-# -------------------
-m4_defun([_LT_CMD_OLD_ARCHIVE],
-[_LT_PROG_AR
-
-AC_CHECK_TOOL(STRIP, strip, :)
-test -z "$STRIP" && STRIP=:
-_LT_DECL([], [STRIP], [1], [A symbol stripping program])
-
-AC_CHECK_TOOL(RANLIB, ranlib, :)
-test -z "$RANLIB" && RANLIB=:
-_LT_DECL([], [RANLIB], [1],
- [Commands used to install an old-style archive])
-
-# Determine commands to create old-style static archives.
-old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
-old_postinstall_cmds='chmod 644 $oldlib'
-old_postuninstall_cmds=
-
-if test -n "$RANLIB"; then
- case $host_os in
- openbsd*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
- ;;
- *)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
- ;;
- esac
- old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
-fi
-
-case $host_os in
- darwin*)
- lock_old_archive_extraction=yes ;;
- *)
- lock_old_archive_extraction=no ;;
-esac
-_LT_DECL([], [old_postinstall_cmds], [2])
-_LT_DECL([], [old_postuninstall_cmds], [2])
-_LT_TAGDECL([], [old_archive_cmds], [2],
- [Commands used to build an old-style archive])
-_LT_DECL([], [lock_old_archive_extraction], [0],
- [Whether to use a lock for old archive extraction])
-])# _LT_CMD_OLD_ARCHIVE
-
-
-# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
-# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE])
-# ----------------------------------------------------------------
-# Check whether the given compiler option works
-AC_DEFUN([_LT_COMPILER_OPTION],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_SED])dnl
-AC_CACHE_CHECK([$1], [$2],
- [$2=no
- m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4])
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
- lt_compiler_flag="$3"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- # The option is referenced via a variable to avoid confusing sed.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
- (eval "$lt_compile" 2>conftest.err)
- ac_status=$?
- cat conftest.err >&AS_MESSAGE_LOG_FD
- echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
- if (exit $ac_status) && test -s "$ac_outfile"; then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings other than the usual output.
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
- $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
- if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
- $2=yes
- fi
- fi
- $RM conftest*
-])
-
-if test x"[$]$2" = xyes; then
- m4_if([$5], , :, [$5])
-else
- m4_if([$6], , :, [$6])
-fi
-])# _LT_COMPILER_OPTION
-
-# Old name:
-AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], [])
-
-
-# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
-# [ACTION-SUCCESS], [ACTION-FAILURE])
-# ----------------------------------------------------
-# Check whether the given linker option works
-AC_DEFUN([_LT_LINKER_OPTION],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_SED])dnl
-AC_CACHE_CHECK([$1], [$2],
- [$2=no
- save_LDFLAGS="$LDFLAGS"
- LDFLAGS="$LDFLAGS $3"
- echo "$lt_simple_link_test_code" > conftest.$ac_ext
- if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
- # The linker can only warn and ignore the option if not recognized
- # So say no if there are warnings
- if test -s conftest.err; then
- # Append any errors to the config.log.
- cat conftest.err 1>&AS_MESSAGE_LOG_FD
- $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
- $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
- if diff conftest.exp conftest.er2 >/dev/null; then
- $2=yes
- fi
- else
- $2=yes
- fi
- fi
- $RM -r conftest*
- LDFLAGS="$save_LDFLAGS"
-])
-
-if test x"[$]$2" = xyes; then
- m4_if([$4], , :, [$4])
-else
- m4_if([$5], , :, [$5])
-fi
-])# _LT_LINKER_OPTION
-
-# Old name:
-AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], [])
-
-
-# LT_CMD_MAX_LEN
-#---------------
-AC_DEFUN([LT_CMD_MAX_LEN],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-# find the maximum length of command line arguments
-AC_MSG_CHECKING([the maximum length of command line arguments])
-AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
- i=0
- teststring="ABCD"
-
- case $build_os in
- msdosdjgpp*)
- # On DJGPP, this test can blow up pretty badly due to problems in libc
- # (any single argument exceeding 2000 bytes causes a buffer overrun
- # during glob expansion). Even if it were fixed, the result of this
- # check would be larger than it should be.
- lt_cv_sys_max_cmd_len=12288; # 12K is about right
- ;;
-
- gnu*)
- # Under GNU Hurd, this test is not required because there is
- # no limit to the length of command line arguments.
- # Libtool will interpret -1 as no limit whatsoever
- lt_cv_sys_max_cmd_len=-1;
- ;;
-
- cygwin* | mingw* | cegcc*)
- # On Win9x/ME, this test blows up -- it succeeds, but takes
- # about 5 minutes as the teststring grows exponentially.
- # Worse, since 9x/ME are not pre-emptively multitasking,
- # you end up with a "frozen" computer, even though with patience
- # the test eventually succeeds (with a max line length of 256k).
- # Instead, let's just punt: use the minimum linelength reported by
- # all of the supported platforms: 8192 (on NT/2K/XP).
- lt_cv_sys_max_cmd_len=8192;
- ;;
-
- mint*)
- # On MiNT this can take a long time and run out of memory.
- lt_cv_sys_max_cmd_len=8192;
- ;;
-
- amigaos*)
- # On AmigaOS with pdksh, this test takes hours, literally.
- # So we just punt and use a minimum line length of 8192.
- lt_cv_sys_max_cmd_len=8192;
- ;;
-
- netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
- # This has been around since 386BSD, at least. Likely further.
- if test -x /sbin/sysctl; then
- lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
- elif test -x /usr/sbin/sysctl; then
- lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
- else
- lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs
- fi
- # And add a safety zone
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
- ;;
-
- interix*)
- # We know the value 262144 and hardcode it with a safety zone (like BSD)
- lt_cv_sys_max_cmd_len=196608
- ;;
-
- os2*)
- # The test takes a long time on OS/2.
- lt_cv_sys_max_cmd_len=8192
- ;;
-
- osf*)
- # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
- # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
- # nice to cause kernel panics so lets avoid the loop below.
- # First set a reasonable default.
- lt_cv_sys_max_cmd_len=16384
- #
- if test -x /sbin/sysconfig; then
- case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
- *1*) lt_cv_sys_max_cmd_len=-1 ;;
- esac
- fi
- ;;
- sco3.2v5*)
- lt_cv_sys_max_cmd_len=102400
- ;;
- sysv5* | sco5v6* | sysv4.2uw2*)
- kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
- if test -n "$kargmax"; then
- lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'`
- else
- lt_cv_sys_max_cmd_len=32768
- fi
- ;;
- *)
- lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
- if test -n "$lt_cv_sys_max_cmd_len"; then
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
- else
- # Make teststring a little bigger before we do anything with it.
- # a 1K string should be a reasonable start.
- for i in 1 2 3 4 5 6 7 8 ; do
- teststring=$teststring$teststring
- done
- SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
- # If test is not a shell built-in, we'll probably end up computing a
- # maximum length that is only half of the actual maximum length, but
- # we can't tell.
- while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
- = "X$teststring$teststring"; } >/dev/null 2>&1 &&
- test $i != 17 # 1/2 MB should be enough
- do
- i=`expr $i + 1`
- teststring=$teststring$teststring
- done
- # Only check the string length outside the loop.
- lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1`
- teststring=
- # Add a significant safety factor because C++ compilers can tack on
- # massive amounts of additional arguments before passing them to the
- # linker. It appears as though 1/2 is a usable value.
- lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
- fi
- ;;
- esac
-])
-if test -n $lt_cv_sys_max_cmd_len ; then
- AC_MSG_RESULT($lt_cv_sys_max_cmd_len)
-else
- AC_MSG_RESULT(none)
-fi
-max_cmd_len=$lt_cv_sys_max_cmd_len
-_LT_DECL([], [max_cmd_len], [0],
- [What is the maximum length of a command?])
-])# LT_CMD_MAX_LEN
-
-# Old name:
-AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], [])
-
-
-# _LT_HEADER_DLFCN
-# ----------------
-m4_defun([_LT_HEADER_DLFCN],
-[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl
-])# _LT_HEADER_DLFCN
-
-
-# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE,
-# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING)
-# ----------------------------------------------------------------
-m4_defun([_LT_TRY_DLOPEN_SELF],
-[m4_require([_LT_HEADER_DLFCN])dnl
-if test "$cross_compiling" = yes; then :
- [$4]
-else
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<_LT_EOF
-[#line $LINENO "configure"
-#include "confdefs.h"
-
-#if HAVE_DLFCN_H
-#include <dlfcn.h>
-#endif
-
-#include <stdio.h>
-
-#ifdef RTLD_GLOBAL
-# define LT_DLGLOBAL RTLD_GLOBAL
-#else
-# ifdef DL_GLOBAL
-# define LT_DLGLOBAL DL_GLOBAL
-# else
-# define LT_DLGLOBAL 0
-# endif
-#endif
-
-/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
- find out it does not work in some platform. */
-#ifndef LT_DLLAZY_OR_NOW
-# ifdef RTLD_LAZY
-# define LT_DLLAZY_OR_NOW RTLD_LAZY
-# else
-# ifdef DL_LAZY
-# define LT_DLLAZY_OR_NOW DL_LAZY
-# else
-# ifdef RTLD_NOW
-# define LT_DLLAZY_OR_NOW RTLD_NOW
-# else
-# ifdef DL_NOW
-# define LT_DLLAZY_OR_NOW DL_NOW
-# else
-# define LT_DLLAZY_OR_NOW 0
-# endif
-# endif
-# endif
-# endif
-#endif
-
-/* When -fvisbility=hidden is used, assume the code has been annotated
- correspondingly for the symbols needed. */
-#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3))
-int fnord () __attribute__((visibility("default")));
-#endif
-
-int fnord () { return 42; }
-int main ()
-{
- void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
- int status = $lt_dlunknown;
-
- if (self)
- {
- if (dlsym (self,"fnord")) status = $lt_dlno_uscore;
- else
- {
- if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
- else puts (dlerror ());
- }
- /* dlclose (self); */
- }
- else
- puts (dlerror ());
-
- return status;
-}]
-_LT_EOF
- if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then
- (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null
- lt_status=$?
- case x$lt_status in
- x$lt_dlno_uscore) $1 ;;
- x$lt_dlneed_uscore) $2 ;;
- x$lt_dlunknown|x*) $3 ;;
- esac
- else :
- # compilation failed
- $3
- fi
-fi
-rm -fr conftest*
-])# _LT_TRY_DLOPEN_SELF
-
-
-# LT_SYS_DLOPEN_SELF
-# ------------------
-AC_DEFUN([LT_SYS_DLOPEN_SELF],
-[m4_require([_LT_HEADER_DLFCN])dnl
-if test "x$enable_dlopen" != xyes; then
- enable_dlopen=unknown
- enable_dlopen_self=unknown
- enable_dlopen_self_static=unknown
-else
- lt_cv_dlopen=no
- lt_cv_dlopen_libs=
-
- case $host_os in
- beos*)
- lt_cv_dlopen="load_add_on"
- lt_cv_dlopen_libs=
- lt_cv_dlopen_self=yes
- ;;
-
- mingw* | pw32* | cegcc*)
- lt_cv_dlopen="LoadLibrary"
- lt_cv_dlopen_libs=
- ;;
-
- cygwin*)
- lt_cv_dlopen="dlopen"
- lt_cv_dlopen_libs=
- ;;
-
- darwin*)
- # if libdl is installed we need to link against it
- AC_CHECK_LIB([dl], [dlopen],
- [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[
- lt_cv_dlopen="dyld"
- lt_cv_dlopen_libs=
- lt_cv_dlopen_self=yes
- ])
- ;;
-
- *)
- AC_CHECK_FUNC([shl_load],
- [lt_cv_dlopen="shl_load"],
- [AC_CHECK_LIB([dld], [shl_load],
- [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"],
- [AC_CHECK_FUNC([dlopen],
- [lt_cv_dlopen="dlopen"],
- [AC_CHECK_LIB([dl], [dlopen],
- [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
- [AC_CHECK_LIB([svld], [dlopen],
- [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
- [AC_CHECK_LIB([dld], [dld_link],
- [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"])
- ])
- ])
- ])
- ])
- ])
- ;;
- esac
-
- if test "x$lt_cv_dlopen" != xno; then
- enable_dlopen=yes
- else
- enable_dlopen=no
- fi
-
- case $lt_cv_dlopen in
- dlopen)
- save_CPPFLAGS="$CPPFLAGS"
- test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
-
- save_LDFLAGS="$LDFLAGS"
- wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
-
- save_LIBS="$LIBS"
- LIBS="$lt_cv_dlopen_libs $LIBS"
-
- AC_CACHE_CHECK([whether a program can dlopen itself],
- lt_cv_dlopen_self, [dnl
- _LT_TRY_DLOPEN_SELF(
- lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes,
- lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross)
- ])
-
- if test "x$lt_cv_dlopen_self" = xyes; then
- wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
- AC_CACHE_CHECK([whether a statically linked program can dlopen itself],
- lt_cv_dlopen_self_static, [dnl
- _LT_TRY_DLOPEN_SELF(
- lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes,
- lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross)
- ])
- fi
-
- CPPFLAGS="$save_CPPFLAGS"
- LDFLAGS="$save_LDFLAGS"
- LIBS="$save_LIBS"
- ;;
- esac
-
- case $lt_cv_dlopen_self in
- yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
- *) enable_dlopen_self=unknown ;;
- esac
-
- case $lt_cv_dlopen_self_static in
- yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
- *) enable_dlopen_self_static=unknown ;;
- esac
-fi
-_LT_DECL([dlopen_support], [enable_dlopen], [0],
- [Whether dlopen is supported])
-_LT_DECL([dlopen_self], [enable_dlopen_self], [0],
- [Whether dlopen of programs is supported])
-_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0],
- [Whether dlopen of statically linked programs is supported])
-])# LT_SYS_DLOPEN_SELF
-
-# Old name:
-AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], [])
-
-
-# _LT_COMPILER_C_O([TAGNAME])
-# ---------------------------
-# Check to see if options -c and -o are simultaneously supported by compiler.
-# This macro does not hard code the compiler like AC_PROG_CC_C_O.
-m4_defun([_LT_COMPILER_C_O],
-[m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_TAG_COMPILER])dnl
-AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext],
- [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)],
- [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no
- $RM -r conftest 2>/dev/null
- mkdir conftest
- cd conftest
- mkdir out
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
- lt_compiler_flag="-o out/conftest2.$ac_objext"
- # Insert the option either (1) after the last *FLAGS variable, or
- # (2) before a word containing "conftest.", or (3) at the end.
- # Note that $ac_compile itself does not contain backslashes and begins
- # with a dollar sign (not a hyphen), so the echo should work correctly.
- lt_compile=`echo "$ac_compile" | $SED \
- -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
- -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
- -e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
- (eval "$lt_compile" 2>out/conftest.err)
- ac_status=$?
- cat out/conftest.err >&AS_MESSAGE_LOG_FD
- echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
- if (exit $ac_status) && test -s out/conftest2.$ac_objext
- then
- # The compiler can only warn and ignore the option if not recognized
- # So say no if there are warnings
- $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
- $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
- if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
- _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
- fi
- fi
- chmod u+w . 2>&AS_MESSAGE_LOG_FD
- $RM conftest*
- # SGI C++ compiler will create directory out/ii_files/ for
- # template instantiation
- test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
- $RM out/* && rmdir out
- cd ..
- $RM -r conftest
- $RM conftest*
-])
-_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1],
- [Does compiler simultaneously support -c and -o options?])
-])# _LT_COMPILER_C_O
-
-
-# _LT_COMPILER_FILE_LOCKS([TAGNAME])
-# ----------------------------------
-# Check to see if we can do hard links to lock some files if needed
-m4_defun([_LT_COMPILER_FILE_LOCKS],
-[m4_require([_LT_ENABLE_LOCK])dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-_LT_COMPILER_C_O([$1])
-
-hard_links="nottested"
-if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then
- # do not overwrite the value of need_locks provided by the user
- AC_MSG_CHECKING([if we can lock with hard links])
- hard_links=yes
- $RM conftest*
- ln conftest.a conftest.b 2>/dev/null && hard_links=no
- touch conftest.a
- ln conftest.a conftest.b 2>&5 || hard_links=no
- ln conftest.a conftest.b 2>/dev/null && hard_links=no
- AC_MSG_RESULT([$hard_links])
- if test "$hard_links" = no; then
- AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe])
- need_locks=warn
- fi
-else
- need_locks=no
-fi
-_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?])
-])# _LT_COMPILER_FILE_LOCKS
-
-
-# _LT_CHECK_OBJDIR
-# ----------------
-m4_defun([_LT_CHECK_OBJDIR],
-[AC_CACHE_CHECK([for objdir], [lt_cv_objdir],
-[rm -f .libs 2>/dev/null
-mkdir .libs 2>/dev/null
-if test -d .libs; then
- lt_cv_objdir=.libs
-else
- # MS-DOS does not allow filenames that begin with a dot.
- lt_cv_objdir=_libs
-fi
-rmdir .libs 2>/dev/null])
-objdir=$lt_cv_objdir
-_LT_DECL([], [objdir], [0],
- [The name of the directory that contains temporary libtool files])dnl
-m4_pattern_allow([LT_OBJDIR])dnl
-AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/",
- [Define to the sub-directory in which libtool stores uninstalled libraries.])
-])# _LT_CHECK_OBJDIR
-
-
-# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME])
-# --------------------------------------
-# Check hardcoding attributes.
-m4_defun([_LT_LINKER_HARDCODE_LIBPATH],
-[AC_MSG_CHECKING([how to hardcode library paths into programs])
-_LT_TAGVAR(hardcode_action, $1)=
-if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" ||
- test -n "$_LT_TAGVAR(runpath_var, $1)" ||
- test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then
-
- # We can hardcode non-existent directories.
- if test "$_LT_TAGVAR(hardcode_direct, $1)" != no &&
- # If the only mechanism to avoid hardcoding is shlibpath_var, we
- # have to relink, otherwise we might link with an installed library
- # when we should be linking with a yet-to-be-installed one
- ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no &&
- test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then
- # Linking always hardcodes the temporary library directory.
- _LT_TAGVAR(hardcode_action, $1)=relink
- else
- # We can link without hardcoding, and we can hardcode nonexisting dirs.
- _LT_TAGVAR(hardcode_action, $1)=immediate
- fi
-else
- # We cannot hardcode anything, or else we can only hardcode existing
- # directories.
- _LT_TAGVAR(hardcode_action, $1)=unsupported
-fi
-AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)])
-
-if test "$_LT_TAGVAR(hardcode_action, $1)" = relink ||
- test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then
- # Fast installation is not supported
- enable_fast_install=no
-elif test "$shlibpath_overrides_runpath" = yes ||
- test "$enable_shared" = no; then
- # Fast installation is not necessary
- enable_fast_install=needless
-fi
-_LT_TAGDECL([], [hardcode_action], [0],
- [How to hardcode a shared library path into an executable])
-])# _LT_LINKER_HARDCODE_LIBPATH
-
-
-# _LT_CMD_STRIPLIB
-# ----------------
-m4_defun([_LT_CMD_STRIPLIB],
-[m4_require([_LT_DECL_EGREP])
-striplib=
-old_striplib=
-AC_MSG_CHECKING([whether stripping libraries is possible])
-if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then
- test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
- test -z "$striplib" && striplib="$STRIP --strip-unneeded"
- AC_MSG_RESULT([yes])
-else
-# FIXME - insert some real tests, host_os isn't really good enough
- case $host_os in
- darwin*)
- if test -n "$STRIP" ; then
- striplib="$STRIP -x"
- old_striplib="$STRIP -S"
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no])
- fi
- ;;
- *)
- AC_MSG_RESULT([no])
- ;;
- esac
-fi
-_LT_DECL([], [old_striplib], [1], [Commands to strip libraries])
-_LT_DECL([], [striplib], [1])
-])# _LT_CMD_STRIPLIB
-
-
-# _LT_SYS_DYNAMIC_LINKER([TAG])
-# -----------------------------
-# PORTME Fill in your ld.so characteristics
-m4_defun([_LT_SYS_DYNAMIC_LINKER],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_OBJDUMP])dnl
-m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_CHECK_SHELL_FEATURES])dnl
-AC_MSG_CHECKING([dynamic linker characteristics])
-m4_if([$1],
- [], [
-if test "$GCC" = yes; then
- case $host_os in
- darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
- *) lt_awk_arg="/^libraries:/" ;;
- esac
- case $host_os in
- mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;;
- *) lt_sed_strip_eq="s,=/,/,g" ;;
- esac
- lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq`
- case $lt_search_path_spec in
- *\;*)
- # if the path contains ";" then we assume it to be the separator
- # otherwise default to the standard path separator (i.e. ":") - it is
- # assumed that no part of a normal pathname contains ";" but that should
- # okay in the real world where ";" in dirpaths is itself problematic.
- lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'`
- ;;
- *)
- lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"`
- ;;
- esac
- # Ok, now we have the path, separated by spaces, we can step through it
- # and add multilib dir if necessary.
- lt_tmp_lt_search_path_spec=
- lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null`
- for lt_sys_path in $lt_search_path_spec; do
- if test -d "$lt_sys_path/$lt_multi_os_dir"; then
- lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir"
- else
- test -d "$lt_sys_path" && \
- lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
- fi
- done
- lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk '
-BEGIN {RS=" "; FS="/|\n";} {
- lt_foo="";
- lt_count=0;
- for (lt_i = NF; lt_i > 0; lt_i--) {
- if ($lt_i != "" && $lt_i != ".") {
- if ($lt_i == "..") {
- lt_count++;
- } else {
- if (lt_count == 0) {
- lt_foo="/" $lt_i lt_foo;
- } else {
- lt_count--;
- }
- }
- }
- }
- if (lt_foo != "") { lt_freq[[lt_foo]]++; }
- if (lt_freq[[lt_foo]] == 1) { print lt_foo; }
-}'`
- # AWK program above erroneously prepends '/' to C:/dos/paths
- # for these hosts.
- case $host_os in
- mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\
- $SED 's,/\([[A-Za-z]]:\),\1,g'` ;;
- esac
- sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP`
-else
- sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
-fi])
-library_names_spec=
-libname_spec='lib$name'
-soname_spec=
-shrext_cmds=".so"
-postinstall_cmds=
-postuninstall_cmds=
-finish_cmds=
-finish_eval=
-shlibpath_var=
-shlibpath_overrides_runpath=unknown
-version_type=none
-dynamic_linker="$host_os ld.so"
-sys_lib_dlsearch_path_spec="/lib /usr/lib"
-need_lib_prefix=unknown
-hardcode_into_libs=no
-
-# when you set need_version to no, make sure it does not cause -set_version
-# flags to be left without arguments
-need_version=unknown
-
-case $host_os in
-aix3*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
- shlibpath_var=LIBPATH
-
- # AIX 3 has no versioning support, so we append a major version to the name.
- soname_spec='${libname}${release}${shared_ext}$major'
- ;;
-
-aix[[4-9]]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- hardcode_into_libs=yes
- if test "$host_cpu" = ia64; then
- # AIX 5 supports IA64
- library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- else
- # With GCC up to 2.95.x, collect2 would create an import file
- # for dependence libraries. The import file would start with
- # the line `#! .'. This would cause the generated library to
- # depend on `.', always an invalid library. This was fixed in
- # development snapshots of GCC prior to 3.0.
- case $host_os in
- aix4 | aix4.[[01]] | aix4.[[01]].*)
- if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
- echo ' yes '
- echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
- :
- else
- can_build_shared=no
- fi
- ;;
- esac
- # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
- # soname into executable. Probably we can add versioning support to
- # collect2, so additional links can be useful in future.
- if test "$aix_use_runtimelinking" = yes; then
- # If using run time linking (on AIX 4.2 or later) use lib<name>.so
- # instead of lib<name>.a to let people know that these are not
- # typical AIX shared libraries.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- else
- # We preserve .a as extension for shared libraries through AIX4.2
- # and later when we are not doing run time linking.
- library_names_spec='${libname}${release}.a $libname.a'
- soname_spec='${libname}${release}${shared_ext}$major'
- fi
- shlibpath_var=LIBPATH
- fi
- ;;
-
-amigaos*)
- case $host_cpu in
- powerpc)
- # Since July 2007 AmigaOS4 officially supports .so libraries.
- # When compiling the executable, add -use-dynld -Lsobjs: to the compileline.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- ;;
- m68k)
- library_names_spec='$libname.ixlibrary $libname.a'
- # Create ${libname}_ixlibrary.a entries in /sys/libs.
- finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
- ;;
- esac
- ;;
-
-beos*)
- library_names_spec='${libname}${shared_ext}'
- dynamic_linker="$host_os ld.so"
- shlibpath_var=LIBRARY_PATH
- ;;
-
-bsdi[[45]]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
- sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
- # the default ld.so.conf also contains /usr/contrib/lib and
- # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
- # libtool to hard-code these into programs
- ;;
-
-cygwin* | mingw* | pw32* | cegcc*)
- version_type=windows
- shrext_cmds=".dll"
- need_version=no
- need_lib_prefix=no
-
- case $GCC,$cc_basename in
- yes,*)
- # gcc
- library_names_spec='$libname.dll.a'
- # DLL is installed to $(libdir)/../bin by postinstall_cmds
- postinstall_cmds='base_file=`basename \${file}`~
- dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
- dldir=$destdir/`dirname \$dlpath`~
- test -d \$dldir || mkdir -p \$dldir~
- $install_prog $dir/$dlname \$dldir/$dlname~
- chmod a+x \$dldir/$dlname~
- if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
- eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
- fi'
- postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
- dlpath=$dir/\$dldll~
- $RM \$dlpath'
- shlibpath_overrides_runpath=yes
-
- case $host_os in
- cygwin*)
- # Cygwin DLLs use 'cyg' prefix rather than 'lib'
- soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
-m4_if([$1], [],[
- sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"])
- ;;
- mingw* | cegcc*)
- # MinGW DLLs use traditional 'lib' prefix
- soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
- ;;
- pw32*)
- # pw32 DLLs use 'pw' prefix rather than 'lib'
- library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
- ;;
- esac
- dynamic_linker='Win32 ld.exe'
- ;;
-
- *,cl*)
- # Native MSVC
- libname_spec='$name'
- soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
- library_names_spec='${libname}.dll.lib'
-
- case $build_os in
- mingw*)
- sys_lib_search_path_spec=
- lt_save_ifs=$IFS
- IFS=';'
- for lt_path in $LIB
- do
- IFS=$lt_save_ifs
- # Let DOS variable expansion print the short 8.3 style file name.
- lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"`
- sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
- done
- IFS=$lt_save_ifs
- # Convert to MSYS style.
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'`
- ;;
- cygwin*)
- # Convert to unix form, then to dos form, then back to unix form
- # but this time dos style (no spaces!) so that the unix form looks
- # like /cygdrive/c/PROGRA~1:/cygdr...
- sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
- sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null`
- sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
- ;;
- *)
- sys_lib_search_path_spec="$LIB"
- if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then
- # It is most probably a Windows format PATH.
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
- else
- sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
- fi
- # FIXME: find the short name or the path components, as spaces are
- # common. (e.g. "Program Files" -> "PROGRA~1")
- ;;
- esac
-
- # DLL is installed to $(libdir)/../bin by postinstall_cmds
- postinstall_cmds='base_file=`basename \${file}`~
- dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~
- dldir=$destdir/`dirname \$dlpath`~
- test -d \$dldir || mkdir -p \$dldir~
- $install_prog $dir/$dlname \$dldir/$dlname'
- postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
- dlpath=$dir/\$dldll~
- $RM \$dlpath'
- shlibpath_overrides_runpath=yes
- dynamic_linker='Win32 link.exe'
- ;;
-
- *)
- # Assume MSVC wrapper
- library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib'
- dynamic_linker='Win32 ld.exe'
- ;;
- esac
- # FIXME: first we should search . and the directory the executable is in
- shlibpath_var=PATH
- ;;
-
-darwin* | rhapsody*)
- dynamic_linker="$host_os dyld"
- version_type=darwin
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext'
- soname_spec='${libname}${release}${major}$shared_ext'
- shlibpath_overrides_runpath=yes
- shlibpath_var=DYLD_LIBRARY_PATH
- shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
-m4_if([$1], [],[
- sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"])
- sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
- ;;
-
-dgux*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- ;;
-
-freebsd* | dragonfly*)
- # DragonFly does not have aout. When/if they implement a new
- # versioning mechanism, adjust this.
- if test -x /usr/bin/objformat; then
- objformat=`/usr/bin/objformat`
- else
- case $host_os in
- freebsd[[23]].*) objformat=aout ;;
- *) objformat=elf ;;
- esac
- fi
- # Handle Gentoo/FreeBSD as it was Linux
- case $host_vendor in
- gentoo)
- version_type=linux ;;
- *)
- version_type=freebsd-$objformat ;;
- esac
-
- case $version_type in
- freebsd-elf*)
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
- need_version=no
- need_lib_prefix=no
- ;;
- freebsd-*)
- library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
- need_version=yes
- ;;
- linux)
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- need_lib_prefix=no
- need_version=no
- ;;
- esac
- shlibpath_var=LD_LIBRARY_PATH
- case $host_os in
- freebsd2.*)
- shlibpath_overrides_runpath=yes
- ;;
- freebsd3.[[01]]* | freebsdelf3.[[01]]*)
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
- freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \
- freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1)
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
- *) # from 4.6 on, and DragonFly
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
- esac
- ;;
-
-gnu*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-haiku*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- dynamic_linker="$host_os runtime_loader"
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib'
- hardcode_into_libs=yes
- ;;
-
-hpux9* | hpux10* | hpux11*)
- # Give a soname corresponding to the major version so that dld.sl refuses to
- # link against other versions.
- version_type=sunos
- need_lib_prefix=no
- need_version=no
- case $host_cpu in
- ia64*)
- shrext_cmds='.so'
- hardcode_into_libs=yes
- dynamic_linker="$host_os dld.so"
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- if test "X$HPUX_IA64_MODE" = X32; then
- sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
- else
- sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
- fi
- sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
- ;;
- hppa*64*)
- shrext_cmds='.sl'
- hardcode_into_libs=yes
- dynamic_linker="$host_os dld.sl"
- shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
- shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
- sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
- ;;
- *)
- shrext_cmds='.sl'
- dynamic_linker="$host_os dld.sl"
- shlibpath_var=SHLIB_PATH
- shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- ;;
- esac
- # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
- postinstall_cmds='chmod 555 $lib'
- # or fails outright, so override atomically:
- install_override_mode=555
- ;;
-
-interix[[3-9]]*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-irix5* | irix6* | nonstopux*)
- case $host_os in
- nonstopux*) version_type=nonstopux ;;
- *)
- if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux # correct to gnu/linux during the next big refactor
- else
- version_type=irix
- fi ;;
- esac
- need_lib_prefix=no
- need_version=no
- soname_spec='${libname}${release}${shared_ext}$major'
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
- case $host_os in
- irix5* | nonstopux*)
- libsuff= shlibsuff=
- ;;
- *)
- case $LD in # libtool.m4 will add one of these switches to LD
- *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
- libsuff= shlibsuff= libmagic=32-bit;;
- *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
- libsuff=32 shlibsuff=N32 libmagic=N32;;
- *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
- libsuff=64 shlibsuff=64 libmagic=64-bit;;
- *) libsuff= shlibsuff= libmagic=never-match;;
- esac
- ;;
- esac
- shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
- shlibpath_overrides_runpath=no
- sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
- sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
- hardcode_into_libs=yes
- ;;
-
-# No shared lib support for Linux oldld, aout, or coff.
-linux*oldld* | linux*aout* | linux*coff*)
- dynamic_linker=no
- ;;
-
-# This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
-
- # Some binutils ld are patched to set DT_RUNPATH
- AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath],
- [lt_cv_shlibpath_overrides_runpath=no
- save_LDFLAGS=$LDFLAGS
- save_libdir=$libdir
- eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \
- LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\""
- AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])],
- [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null],
- [lt_cv_shlibpath_overrides_runpath=yes])])
- LDFLAGS=$save_LDFLAGS
- libdir=$save_libdir
- ])
- shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
-
- # This implies no fast_install, which is unacceptable.
- # Some rework will be needed to allow for fast_install
- # before this can be enabled.
- hardcode_into_libs=yes
-
- # Append ld.so.conf contents to the search path
- if test -f /etc/ld.so.conf; then
- lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
- sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
- fi
-
- # We used to test for /lib/ld.so.1 and disable shared libraries on
- # powerpc, because MkLinux only supported shared libraries with the
- # GNU dynamic linker. Since this was broken with cross compilers,
- # most powerpc-linux boxes support dynamic linking these days and
- # people can always --disable-shared, the test was removed, and we
- # assume the GNU/Linux dynamic linker is in use.
- dynamic_linker='GNU/Linux ld.so'
- ;;
-
-netbsd*)
- version_type=sunos
- need_lib_prefix=no
- need_version=no
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
- dynamic_linker='NetBSD (a.out) ld.so'
- else
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- dynamic_linker='NetBSD ld.elf_so'
- fi
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- ;;
-
-newsos6)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- ;;
-
-*nto* | *qnx*)
- version_type=qnx
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- dynamic_linker='ldqnx.so'
- ;;
-
-openbsd*)
- version_type=sunos
- sys_lib_dlsearch_path_spec="/usr/lib"
- need_lib_prefix=no
- # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
- case $host_os in
- openbsd3.3 | openbsd3.3.*) need_version=yes ;;
- *) need_version=no ;;
- esac
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- case $host_os in
- openbsd2.[[89]] | openbsd2.[[89]].*)
- shlibpath_overrides_runpath=no
- ;;
- *)
- shlibpath_overrides_runpath=yes
- ;;
- esac
- else
- shlibpath_overrides_runpath=yes
- fi
- ;;
-
-os2*)
- libname_spec='$name'
- shrext_cmds=".dll"
- need_lib_prefix=no
- library_names_spec='$libname${shared_ext} $libname.a'
- dynamic_linker='OS/2 ld.exe'
- shlibpath_var=LIBPATH
- ;;
-
-osf3* | osf4* | osf5*)
- version_type=osf
- need_lib_prefix=no
- need_version=no
- soname_spec='${libname}${release}${shared_ext}$major'
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
- sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
- ;;
-
-rdos*)
- dynamic_linker=no
- ;;
-
-solaris*)
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- # ldd complains unless libraries are executable
- postinstall_cmds='chmod +x $lib'
- ;;
-
-sunos4*)
- version_type=sunos
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
- finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- if test "$with_gnu_ld" = yes; then
- need_lib_prefix=no
- fi
- need_version=yes
- ;;
-
-sysv4 | sysv4.3*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- case $host_vendor in
- sni)
- shlibpath_overrides_runpath=no
- need_lib_prefix=no
- runpath_var=LD_RUN_PATH
- ;;
- siemens)
- need_lib_prefix=no
- ;;
- motorola)
- need_lib_prefix=no
- need_version=no
- shlibpath_overrides_runpath=no
- sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
- ;;
- esac
- ;;
-
-sysv4*MP*)
- if test -d /usr/nec ;then
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
- soname_spec='$libname${shared_ext}.$major'
- shlibpath_var=LD_LIBRARY_PATH
- fi
- ;;
-
-sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
- version_type=freebsd-elf
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=yes
- hardcode_into_libs=yes
- if test "$with_gnu_ld" = yes; then
- sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
- else
- sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
- case $host_os in
- sco3.2v5*)
- sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
- ;;
- esac
- fi
- sys_lib_dlsearch_path_spec='/usr/lib'
- ;;
-
-tpf*)
- # TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux # correct to gnu/linux during the next big refactor
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
-uts4*)
- version_type=linux # correct to gnu/linux during the next big refactor
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- ;;
-
-*)
- dynamic_linker=no
- ;;
-esac
-AC_MSG_RESULT([$dynamic_linker])
-test "$dynamic_linker" = no && can_build_shared=no
-
-variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
-if test "$GCC" = yes; then
- variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
-fi
-
-if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
- sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
-fi
-if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
- sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
-fi
-
-_LT_DECL([], [variables_saved_for_relink], [1],
- [Variables whose values should be saved in libtool wrapper scripts and
- restored at link time])
-_LT_DECL([], [need_lib_prefix], [0],
- [Do we need the "lib" prefix for modules?])
-_LT_DECL([], [need_version], [0], [Do we need a version for libraries?])
-_LT_DECL([], [version_type], [0], [Library versioning type])
-_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable])
-_LT_DECL([], [shlibpath_var], [0],[Shared library path variable])
-_LT_DECL([], [shlibpath_overrides_runpath], [0],
- [Is shlibpath searched before the hard-coded library search path?])
-_LT_DECL([], [libname_spec], [1], [Format of library name prefix])
-_LT_DECL([], [library_names_spec], [1],
- [[List of archive names. First name is the real one, the rest are links.
- The last name is the one that the linker finds with -lNAME]])
-_LT_DECL([], [soname_spec], [1],
- [[The coded name of the library, if different from the real name]])
-_LT_DECL([], [install_override_mode], [1],
- [Permission mode override for installation of shared libraries])
-_LT_DECL([], [postinstall_cmds], [2],
- [Command to use after installation of a shared archive])
-_LT_DECL([], [postuninstall_cmds], [2],
- [Command to use after uninstallation of a shared archive])
-_LT_DECL([], [finish_cmds], [2],
- [Commands used to finish a libtool library installation in a directory])
-_LT_DECL([], [finish_eval], [1],
- [[As "finish_cmds", except a single script fragment to be evaled but
- not shown]])
-_LT_DECL([], [hardcode_into_libs], [0],
- [Whether we should hardcode library paths into libraries])
-_LT_DECL([], [sys_lib_search_path_spec], [2],
- [Compile-time system search path for libraries])
-_LT_DECL([], [sys_lib_dlsearch_path_spec], [2],
- [Run-time system search path for libraries])
-])# _LT_SYS_DYNAMIC_LINKER
-
-
-# _LT_PATH_TOOL_PREFIX(TOOL)
-# --------------------------
-# find a file program which can recognize shared library
-AC_DEFUN([_LT_PATH_TOOL_PREFIX],
-[m4_require([_LT_DECL_EGREP])dnl
-AC_MSG_CHECKING([for $1])
-AC_CACHE_VAL(lt_cv_path_MAGIC_CMD,
-[case $MAGIC_CMD in
-[[\\/*] | ?:[\\/]*])
- lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
- ;;
-*)
- lt_save_MAGIC_CMD="$MAGIC_CMD"
- lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
-dnl $ac_dummy forces splitting on constant user-supplied paths.
-dnl POSIX.2 word splitting is done only on the output of word expansions,
-dnl not every word. This closes a longstanding sh security hole.
- ac_dummy="m4_if([$2], , $PATH, [$2])"
- for ac_dir in $ac_dummy; do
- IFS="$lt_save_ifs"
- test -z "$ac_dir" && ac_dir=.
- if test -f $ac_dir/$1; then
- lt_cv_path_MAGIC_CMD="$ac_dir/$1"
- if test -n "$file_magic_test_file"; then
- case $deplibs_check_method in
- "file_magic "*)
- file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
- MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
- if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
- $EGREP "$file_magic_regex" > /dev/null; then
- :
- else
- cat <<_LT_EOF 1>&2
-
-*** Warning: the command libtool uses to detect shared libraries,
-*** $file_magic_cmd, produces output that libtool cannot recognize.
-*** The result is that libtool may fail to recognize shared libraries
-*** as such. This will affect the creation of libtool libraries that
-*** depend on shared libraries, but programs linked with such libtool
-*** libraries will work regardless of this problem. Nevertheless, you
-*** may want to report the problem to your system manager and/or to
-*** bug-libtool at gnu.org
-
-_LT_EOF
- fi ;;
- esac
- fi
- break
- fi
- done
- IFS="$lt_save_ifs"
- MAGIC_CMD="$lt_save_MAGIC_CMD"
- ;;
-esac])
-MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
-if test -n "$MAGIC_CMD"; then
- AC_MSG_RESULT($MAGIC_CMD)
-else
- AC_MSG_RESULT(no)
-fi
-_LT_DECL([], [MAGIC_CMD], [0],
- [Used to examine libraries when file_magic_cmd begins with "file"])dnl
-])# _LT_PATH_TOOL_PREFIX
-
-# Old name:
-AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], [])
-
-
-# _LT_PATH_MAGIC
-# --------------
-# find a file program which can recognize a shared library
-m4_defun([_LT_PATH_MAGIC],
-[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH)
-if test -z "$lt_cv_path_MAGIC_CMD"; then
- if test -n "$ac_tool_prefix"; then
- _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH)
- else
- MAGIC_CMD=:
- fi
-fi
-])# _LT_PATH_MAGIC
-
-
-# LT_PATH_LD
-# ----------
-# find the pathname to the GNU or non-GNU linker
-AC_DEFUN([LT_PATH_LD],
-[AC_REQUIRE([AC_PROG_CC])dnl
-AC_REQUIRE([AC_CANONICAL_HOST])dnl
-AC_REQUIRE([AC_CANONICAL_BUILD])dnl
-m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_PROG_ECHO_BACKSLASH])dnl
-
-AC_ARG_WITH([gnu-ld],
- [AS_HELP_STRING([--with-gnu-ld],
- [assume the C compiler uses GNU ld @<:@default=no@:>@])],
- [test "$withval" = no || with_gnu_ld=yes],
- [with_gnu_ld=no])dnl
-
-ac_prog=ld
-if test "$GCC" = yes; then
- # Check if gcc -print-prog-name=ld gives a path.
- AC_MSG_CHECKING([for ld used by $CC])
- case $host in
- *-*-mingw*)
- # gcc leaves a trailing carriage return which upsets mingw
- ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
- *)
- ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
- esac
- case $ac_prog in
- # Accept absolute paths.
- [[\\/]]* | ?:[[\\/]]*)
- re_direlt='/[[^/]][[^/]]*/\.\./'
- # Canonicalize the pathname of ld
- ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
- while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
- ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
- done
- test -z "$LD" && LD="$ac_prog"
- ;;
- "")
- # If it fails, then pretend we aren't using GCC.
- ac_prog=ld
- ;;
- *)
- # If it is relative, then search for the first ld in PATH.
- with_gnu_ld=unknown
- ;;
- esac
-elif test "$with_gnu_ld" = yes; then
- AC_MSG_CHECKING([for GNU ld])
-else
- AC_MSG_CHECKING([for non-GNU ld])
-fi
-AC_CACHE_VAL(lt_cv_path_LD,
-[if test -z "$LD"; then
- lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
- for ac_dir in $PATH; do
- IFS="$lt_save_ifs"
- test -z "$ac_dir" && ac_dir=.
- if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
- lt_cv_path_LD="$ac_dir/$ac_prog"
- # Check to see if the program is GNU ld. I'd rather use --version,
- # but apparently some variants of GNU ld only accept -v.
- # Break only if it was the GNU/non-GNU ld that we prefer.
- case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
- *GNU* | *'with BFD'*)
- test "$with_gnu_ld" != no && break
- ;;
- *)
- test "$with_gnu_ld" != yes && break
- ;;
- esac
- fi
- done
- IFS="$lt_save_ifs"
-else
- lt_cv_path_LD="$LD" # Let the user override the test with a path.
-fi])
-LD="$lt_cv_path_LD"
-if test -n "$LD"; then
- AC_MSG_RESULT($LD)
-else
- AC_MSG_RESULT(no)
-fi
-test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
-_LT_PATH_LD_GNU
-AC_SUBST([LD])
-
-_LT_TAGDECL([], [LD], [1], [The linker used to build libraries])
-])# LT_PATH_LD
-
-# Old names:
-AU_ALIAS([AM_PROG_LD], [LT_PATH_LD])
-AU_ALIAS([AC_PROG_LD], [LT_PATH_LD])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AM_PROG_LD], [])
-dnl AC_DEFUN([AC_PROG_LD], [])
-
-
-# _LT_PATH_LD_GNU
-#- --------------
-m4_defun([_LT_PATH_LD_GNU],
-[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld,
-[# I'd rather use --version here, but apparently some GNU lds only accept -v.
-case `$LD -v 2>&1 </dev/null` in
-*GNU* | *'with BFD'*)
- lt_cv_prog_gnu_ld=yes
- ;;
-*)
- lt_cv_prog_gnu_ld=no
- ;;
-esac])
-with_gnu_ld=$lt_cv_prog_gnu_ld
-])# _LT_PATH_LD_GNU
-
-
-# _LT_CMD_RELOAD
-# --------------
-# find reload flag for linker
-# -- PORTME Some linkers may need a different reload flag.
-m4_defun([_LT_CMD_RELOAD],
-[AC_CACHE_CHECK([for $LD option to reload object files],
- lt_cv_ld_reload_flag,
- [lt_cv_ld_reload_flag='-r'])
-reload_flag=$lt_cv_ld_reload_flag
-case $reload_flag in
-"" | " "*) ;;
-*) reload_flag=" $reload_flag" ;;
-esac
-reload_cmds='$LD$reload_flag -o $output$reload_objs'
-case $host_os in
- cygwin* | mingw* | pw32* | cegcc*)
- if test "$GCC" != yes; then
- reload_cmds=false
- fi
- ;;
- darwin*)
- if test "$GCC" = yes; then
- reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs'
- else
- reload_cmds='$LD$reload_flag -o $output$reload_objs'
- fi
- ;;
-esac
-_LT_TAGDECL([], [reload_flag], [1], [How to create reloadable object files])dnl
-_LT_TAGDECL([], [reload_cmds], [2])dnl
-])# _LT_CMD_RELOAD
-
-
-# _LT_CHECK_MAGIC_METHOD
-# ----------------------
-# how to check for library dependencies
-# -- PORTME fill in with the dynamic library characteristics
-m4_defun([_LT_CHECK_MAGIC_METHOD],
-[m4_require([_LT_DECL_EGREP])
-m4_require([_LT_DECL_OBJDUMP])
-AC_CACHE_CHECK([how to recognize dependent libraries],
-lt_cv_deplibs_check_method,
-[lt_cv_file_magic_cmd='$MAGIC_CMD'
-lt_cv_file_magic_test_file=
-lt_cv_deplibs_check_method='unknown'
-# Need to set the preceding variable on all platforms that support
-# interlibrary dependencies.
-# 'none' -- dependencies not supported.
-# `unknown' -- same as none, but documents that we really don't know.
-# 'pass_all' -- all dependencies passed with no checks.
-# 'test_compile' -- check by making test program.
-# 'file_magic [[regex]]' -- check by looking for files in library path
-# which responds to the $file_magic_cmd with a given extended regex.
-# If you have `file' or equivalent on your system and you're not sure
-# whether `pass_all' will *always* work, you probably want this one.
-
-case $host_os in
-aix[[4-9]]*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-beos*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-bsdi[[45]]*)
- lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
- lt_cv_file_magic_cmd='/usr/bin/file -L'
- lt_cv_file_magic_test_file=/shlib/libc.so
- ;;
-
-cygwin*)
- # func_win32_libid is a shell function defined in ltmain.sh
- lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
- lt_cv_file_magic_cmd='func_win32_libid'
- ;;
-
-mingw* | pw32*)
- # Base MSYS/MinGW do not provide the 'file' command needed by
- # func_win32_libid shell function, so use a weaker test based on 'objdump',
- # unless we find 'file', for example because we are cross-compiling.
- # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin.
- if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then
- lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
- lt_cv_file_magic_cmd='func_win32_libid'
- else
- # Keep this pattern in sync with the one in func_win32_libid.
- lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)'
- lt_cv_file_magic_cmd='$OBJDUMP -f'
- fi
- ;;
-
-cegcc*)
- # use the weaker test based on 'objdump'. See mingw*.
- lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?'
- lt_cv_file_magic_cmd='$OBJDUMP -f'
- ;;
-
-darwin* | rhapsody*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-freebsd* | dragonfly*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
- case $host_cpu in
- i*86 )
- # Not sure whether the presence of OpenBSD here was a mistake.
- # Let's accept both of them until this is cleared up.
- lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library'
- lt_cv_file_magic_cmd=/usr/bin/file
- lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
- ;;
- esac
- else
- lt_cv_deplibs_check_method=pass_all
- fi
- ;;
-
-gnu*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-haiku*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-hpux10.20* | hpux11*)
- lt_cv_file_magic_cmd=/usr/bin/file
- case $host_cpu in
- ia64*)
- lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64'
- lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
- ;;
- hppa*64*)
- [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]']
- lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
- ;;
- *)
- lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library'
- lt_cv_file_magic_test_file=/usr/lib/libc.sl
- ;;
- esac
- ;;
-
-interix[[3-9]]*)
- # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
- lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$'
- ;;
-
-irix5* | irix6* | nonstopux*)
- case $LD in
- *-32|*"-32 ") libmagic=32-bit;;
- *-n32|*"-n32 ") libmagic=N32;;
- *-64|*"-64 ") libmagic=64-bit;;
- *) libmagic=never-match;;
- esac
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-# This must be glibc/ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-netbsd*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
- lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
- else
- lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$'
- fi
- ;;
-
-newos6*)
- lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
- lt_cv_file_magic_cmd=/usr/bin/file
- lt_cv_file_magic_test_file=/usr/lib/libnls.so
- ;;
-
-*nto* | *qnx*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-openbsd*)
- if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$'
- else
- lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
- fi
- ;;
-
-osf3* | osf4* | osf5*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-rdos*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-solaris*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
-sysv4 | sysv4.3*)
- case $host_vendor in
- motorola)
- lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]'
- lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
- ;;
- ncr)
- lt_cv_deplibs_check_method=pass_all
- ;;
- sequent)
- lt_cv_file_magic_cmd='/bin/file'
- lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )'
- ;;
- sni)
- lt_cv_file_magic_cmd='/bin/file'
- lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib"
- lt_cv_file_magic_test_file=/lib/libc.so
- ;;
- siemens)
- lt_cv_deplibs_check_method=pass_all
- ;;
- pc)
- lt_cv_deplibs_check_method=pass_all
- ;;
- esac
- ;;
-
-tpf*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-esac
-])
-
-file_magic_glob=
-want_nocaseglob=no
-if test "$build" = "$host"; then
- case $host_os in
- mingw* | pw32*)
- if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then
- want_nocaseglob=yes
- else
- file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"`
- fi
- ;;
- esac
-fi
-
-file_magic_cmd=$lt_cv_file_magic_cmd
-deplibs_check_method=$lt_cv_deplibs_check_method
-test -z "$deplibs_check_method" && deplibs_check_method=unknown
-
-_LT_DECL([], [deplibs_check_method], [1],
- [Method to check whether dependent libraries are shared objects])
-_LT_DECL([], [file_magic_cmd], [1],
- [Command to use when deplibs_check_method = "file_magic"])
-_LT_DECL([], [file_magic_glob], [1],
- [How to find potential files when deplibs_check_method = "file_magic"])
-_LT_DECL([], [want_nocaseglob], [1],
- [Find potential files using nocaseglob when deplibs_check_method = "file_magic"])
-])# _LT_CHECK_MAGIC_METHOD
-
-
-# LT_PATH_NM
-# ----------
-# find the pathname to a BSD- or MS-compatible name lister
-AC_DEFUN([LT_PATH_NM],
-[AC_REQUIRE([AC_PROG_CC])dnl
-AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM,
-[if test -n "$NM"; then
- # Let the user override the test.
- lt_cv_path_NM="$NM"
-else
- lt_nm_to_check="${ac_tool_prefix}nm"
- if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
- lt_nm_to_check="$lt_nm_to_check nm"
- fi
- for lt_tmp_nm in $lt_nm_to_check; do
- lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
- for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
- IFS="$lt_save_ifs"
- test -z "$ac_dir" && ac_dir=.
- tmp_nm="$ac_dir/$lt_tmp_nm"
- if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
- # Check to see if the nm accepts a BSD-compat flag.
- # Adding the `sed 1q' prevents false positives on HP-UX, which says:
- # nm: unknown option "B" ignored
- # Tru64's nm complains that /dev/null is an invalid object file
- case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
- */dev/null* | *'Invalid file or object type'*)
- lt_cv_path_NM="$tmp_nm -B"
- break
- ;;
- *)
- case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
- */dev/null*)
- lt_cv_path_NM="$tmp_nm -p"
- break
- ;;
- *)
- lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
- continue # so that we can try to find one that supports BSD flags
- ;;
- esac
- ;;
- esac
- fi
- done
- IFS="$lt_save_ifs"
- done
- : ${lt_cv_path_NM=no}
-fi])
-if test "$lt_cv_path_NM" != "no"; then
- NM="$lt_cv_path_NM"
-else
- # Didn't find any BSD compatible name lister, look for dumpbin.
- if test -n "$DUMPBIN"; then :
- # Let the user override the test.
- else
- AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :)
- case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
- *COFF*)
- DUMPBIN="$DUMPBIN -symbols"
- ;;
- *)
- DUMPBIN=:
- ;;
- esac
- fi
- AC_SUBST([DUMPBIN])
- if test "$DUMPBIN" != ":"; then
- NM="$DUMPBIN"
- fi
-fi
-test -z "$NM" && NM=nm
-AC_SUBST([NM])
-_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl
-
-AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface],
- [lt_cv_nm_interface="BSD nm"
- echo "int some_variable = 0;" > conftest.$ac_ext
- (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD)
- (eval "$ac_compile" 2>conftest.err)
- cat conftest.err >&AS_MESSAGE_LOG_FD
- (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD)
- (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
- cat conftest.err >&AS_MESSAGE_LOG_FD
- (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD)
- cat conftest.out >&AS_MESSAGE_LOG_FD
- if $GREP 'External.*some_variable' conftest.out > /dev/null; then
- lt_cv_nm_interface="MS dumpbin"
- fi
- rm -f conftest*])
-])# LT_PATH_NM
-
-# Old names:
-AU_ALIAS([AM_PROG_NM], [LT_PATH_NM])
-AU_ALIAS([AC_PROG_NM], [LT_PATH_NM])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AM_PROG_NM], [])
-dnl AC_DEFUN([AC_PROG_NM], [])
-
-# _LT_CHECK_SHAREDLIB_FROM_LINKLIB
-# --------------------------------
-# how to determine the name of the shared library
-# associated with a specific link library.
-# -- PORTME fill in with the dynamic library characteristics
-m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB],
-[m4_require([_LT_DECL_EGREP])
-m4_require([_LT_DECL_OBJDUMP])
-m4_require([_LT_DECL_DLLTOOL])
-AC_CACHE_CHECK([how to associate runtime and link libraries],
-lt_cv_sharedlib_from_linklib_cmd,
-[lt_cv_sharedlib_from_linklib_cmd='unknown'
-
-case $host_os in
-cygwin* | mingw* | pw32* | cegcc*)
- # two different shell functions defined in ltmain.sh
- # decide which to use based on capabilities of $DLLTOOL
- case `$DLLTOOL --help 2>&1` in
- *--identify-strict*)
- lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib
- ;;
- *)
- lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback
- ;;
- esac
- ;;
-*)
- # fallback: assume linklib IS sharedlib
- lt_cv_sharedlib_from_linklib_cmd="$ECHO"
- ;;
-esac
-])
-sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd
-test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO
-
-_LT_DECL([], [sharedlib_from_linklib_cmd], [1],
- [Command to associate shared and link libraries])
-])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB
-
-
-# _LT_PATH_MANIFEST_TOOL
-# ----------------------
-# locate the manifest tool
-m4_defun([_LT_PATH_MANIFEST_TOOL],
-[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :)
-test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
-AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool],
- [lt_cv_path_mainfest_tool=no
- echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD
- $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out
- cat conftest.err >&AS_MESSAGE_LOG_FD
- if $GREP 'Manifest Tool' conftest.out > /dev/null; then
- lt_cv_path_mainfest_tool=yes
- fi
- rm -f conftest*])
-if test "x$lt_cv_path_mainfest_tool" != xyes; then
- MANIFEST_TOOL=:
-fi
-_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl
-])# _LT_PATH_MANIFEST_TOOL
-
-
-# LT_LIB_M
-# --------
-# check for math library
-AC_DEFUN([LT_LIB_M],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-LIBM=
-case $host in
-*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*)
- # These system don't have libm, or don't need it
- ;;
-*-ncr-sysv4.3*)
- AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
- AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm")
- ;;
-*)
- AC_CHECK_LIB(m, cos, LIBM="-lm")
- ;;
-esac
-AC_SUBST([LIBM])
-])# LT_LIB_M
-
-# Old name:
-AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_CHECK_LIBM], [])
-
-
-# _LT_COMPILER_NO_RTTI([TAGNAME])
-# -------------------------------
-m4_defun([_LT_COMPILER_NO_RTTI],
-[m4_require([_LT_TAG_COMPILER])dnl
-
-_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
-
-if test "$GCC" = yes; then
- case $cc_basename in
- nvcc*)
- _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;;
- *)
- _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;;
- esac
-
- _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions],
- lt_cv_prog_compiler_rtti_exceptions,
- [-fno-rtti -fno-exceptions], [],
- [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"])
-fi
-_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1],
- [Compiler flag to turn off builtin functions])
-])# _LT_COMPILER_NO_RTTI
-
-
-# _LT_CMD_GLOBAL_SYMBOLS
-# ----------------------
-m4_defun([_LT_CMD_GLOBAL_SYMBOLS],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-AC_REQUIRE([AC_PROG_CC])dnl
-AC_REQUIRE([AC_PROG_AWK])dnl
-AC_REQUIRE([LT_PATH_NM])dnl
-AC_REQUIRE([LT_PATH_LD])dnl
-m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_TAG_COMPILER])dnl
-
-# Check for command to grab the raw symbol name followed by C symbol from nm.
-AC_MSG_CHECKING([command to parse $NM output from $compiler object])
-AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe],
-[
-# These are sane defaults that work on at least a few old systems.
-# [They come from Ultrix. What could be older than Ultrix?!! ;)]
-
-# Character class describing NM global symbol codes.
-symcode='[[BCDEGRST]]'
-
-# Regexp to match symbols that can be accessed directly from C.
-sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)'
-
-# Define system-specific variables.
-case $host_os in
-aix*)
- symcode='[[BCDT]]'
- ;;
-cygwin* | mingw* | pw32* | cegcc*)
- symcode='[[ABCDGISTW]]'
- ;;
-hpux*)
- if test "$host_cpu" = ia64; then
- symcode='[[ABCDEGRST]]'
- fi
- ;;
-irix* | nonstopux*)
- symcode='[[BCDEGRST]]'
- ;;
-osf*)
- symcode='[[BCDEGQRST]]'
- ;;
-solaris*)
- symcode='[[BDRT]]'
- ;;
-sco3.2v5*)
- symcode='[[DT]]'
- ;;
-sysv4.2uw2*)
- symcode='[[DT]]'
- ;;
-sysv5* | sco5v6* | unixware* | OpenUNIX*)
- symcode='[[ABDT]]'
- ;;
-sysv4)
- symcode='[[DFNSTU]]'
- ;;
-esac
-
-# If we're using GNU nm, then use its standard symbol codes.
-case `$NM -V 2>&1` in
-*GNU* | *'with BFD'*)
- symcode='[[ABCDGIRSTW]]' ;;
-esac
-
-# Transform an extracted symbol line into a proper C declaration.
-# Some systems (esp. on ia64) link data and code symbols differently,
-# so use this general approach.
-lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
-
-# Transform an extracted symbol line into symbol name and symbol address
-lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p'"
-lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"lib\2\", (void *) \&\2},/p'"
-
-# Handle CRLF in mingw tool chain
-opt_cr=
-case $build_os in
-mingw*)
- opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp
- ;;
-esac
-
-# Try without a prefix underscore, then with it.
-for ac_symprfx in "" "_"; do
-
- # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
- symxfrm="\\1 $ac_symprfx\\2 \\2"
-
- # Write the raw and C identifiers.
- if test "$lt_cv_nm_interface" = "MS dumpbin"; then
- # Fake it for dumpbin and say T for any non-static function
- # and D for any global variable.
- # Also find C++ and __fastcall symbols from MSVC++,
- # which start with @ or ?.
- lt_cv_sys_global_symbol_pipe="$AWK ['"\
-" {last_section=section; section=\$ 3};"\
-" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
-" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
-" \$ 0!~/External *\|/{next};"\
-" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
-" {if(hide[section]) next};"\
-" {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\
-" {split(\$ 0, a, /\||\r/); split(a[2], s)};"\
-" s[1]~/^[@?]/{print s[1], s[1]; next};"\
-" s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\
-" ' prfx=^$ac_symprfx]"
- else
- lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
- fi
- lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'"
-
- # Check to see that the pipe works correctly.
- pipe_works=no
-
- rm -f conftest*
- cat > conftest.$ac_ext <<_LT_EOF
-#ifdef __cplusplus
-extern "C" {
-#endif
-char nm_test_var;
-void nm_test_func(void);
-void nm_test_func(void){}
-#ifdef __cplusplus
-}
-#endif
-int main(){nm_test_var='a';nm_test_func();return(0);}
-_LT_EOF
-
- if AC_TRY_EVAL(ac_compile); then
- # Now try to grab the symbols.
- nlist=conftest.nm
- if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then
- # Try sorting and uniquifying the output.
- if sort "$nlist" | uniq > "$nlist"T; then
- mv -f "$nlist"T "$nlist"
- else
- rm -f "$nlist"T
- fi
-
- # Make sure that we snagged all the symbols we need.
- if $GREP ' nm_test_var$' "$nlist" >/dev/null; then
- if $GREP ' nm_test_func$' "$nlist" >/dev/null; then
- cat <<_LT_EOF > conftest.$ac_ext
-/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */
-#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
-/* DATA imports from DLLs on WIN32 con't be const, because runtime
- relocations are performed -- see ld's documentation on pseudo-relocs. */
-# define LT@&t at _DLSYM_CONST
-#elif defined(__osf__)
-/* This system does not cope well with relocations in const data. */
-# define LT@&t at _DLSYM_CONST
-#else
-# define LT@&t at _DLSYM_CONST const
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-_LT_EOF
- # Now generate the symbol file.
- eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext'
-
- cat <<_LT_EOF >> conftest.$ac_ext
-
-/* The mapping between symbol names and symbols. */
-LT@&t at _DLSYM_CONST struct {
- const char *name;
- void *address;
-}
-lt__PROGRAM__LTX_preloaded_symbols[[]] =
-{
- { "@PROGRAM@", (void *) 0 },
-_LT_EOF
- $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext
- cat <<\_LT_EOF >> conftest.$ac_ext
- {0, (void *) 0}
-};
-
-/* This works around a problem in FreeBSD linker */
-#ifdef FREEBSD_WORKAROUND
-static const void *lt_preloaded_setup() {
- return lt__PROGRAM__LTX_preloaded_symbols;
-}
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-_LT_EOF
- # Now try linking the two files.
- mv conftest.$ac_objext conftstm.$ac_objext
- lt_globsym_save_LIBS=$LIBS
- lt_globsym_save_CFLAGS=$CFLAGS
- LIBS="conftstm.$ac_objext"
- CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)"
- if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then
- pipe_works=yes
- fi
- LIBS=$lt_globsym_save_LIBS
- CFLAGS=$lt_globsym_save_CFLAGS
- else
- echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD
- fi
- else
- echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD
- fi
- else
- echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD
- fi
- else
- echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD
- cat conftest.$ac_ext >&5
- fi
- rm -rf conftest* conftst*
-
- # Do not use the global_symbol_pipe unless it works.
- if test "$pipe_works" = yes; then
- break
- else
- lt_cv_sys_global_symbol_pipe=
- fi
-done
-])
-if test -z "$lt_cv_sys_global_symbol_pipe"; then
- lt_cv_sys_global_symbol_to_cdecl=
-fi
-if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
- AC_MSG_RESULT(failed)
-else
- AC_MSG_RESULT(ok)
-fi
-
-# Response file support.
-if test "$lt_cv_nm_interface" = "MS dumpbin"; then
- nm_file_list_spec='@'
-elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then
- nm_file_list_spec='@'
-fi
-
-_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1],
- [Take the output of nm and produce a listing of raw symbols and C names])
-_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1],
- [Transform the output of nm in a proper C declaration])
-_LT_DECL([global_symbol_to_c_name_address],
- [lt_cv_sys_global_symbol_to_c_name_address], [1],
- [Transform the output of nm in a C name address pair])
-_LT_DECL([global_symbol_to_c_name_address_lib_prefix],
- [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1],
- [Transform the output of nm in a C name address pair when lib prefix is needed])
-_LT_DECL([], [nm_file_list_spec], [1],
- [Specify filename containing input files for $NM])
-]) # _LT_CMD_GLOBAL_SYMBOLS
-
-
-# _LT_COMPILER_PIC([TAGNAME])
-# ---------------------------
-m4_defun([_LT_COMPILER_PIC],
-[m4_require([_LT_TAG_COMPILER])dnl
-_LT_TAGVAR(lt_prog_compiler_wl, $1)=
-_LT_TAGVAR(lt_prog_compiler_pic, $1)=
-_LT_TAGVAR(lt_prog_compiler_static, $1)=
-
-m4_if([$1], [CXX], [
- # C++ specific cases for pic, static, wl, etc.
- if test "$GXX" = yes; then
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-
- case $host_os in
- aix*)
- # All AIX code is PIC.
- if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- fi
- ;;
-
- amigaos*)
- case $host_cpu in
- powerpc)
- # see comment about AmigaOS4 .so support
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- ;;
- m68k)
- # FIXME: we need at least 68020 code to build shared libraries, but
- # adding the `-m68020' flag to GCC prevents building anything better,
- # like `-m68040'.
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
- ;;
- esac
- ;;
-
- beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
- # PIC is the default for these OSes.
- ;;
- mingw* | cygwin* | os2* | pw32* | cegcc*)
- # This hack is so that the source file can tell whether it is being
- # built for inclusion in a dll (and should export symbols for example).
- # Although the cygwin gcc ignores -fPIC, still need this for old-style
- # (--disable-auto-import) libraries
- m4_if([$1], [GCJ], [],
- [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
- ;;
- darwin* | rhapsody*)
- # PIC is the default on this platform
- # Common symbols not allowed in MH_DYLIB files
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
- ;;
- *djgpp*)
- # DJGPP does not support shared libraries at all
- _LT_TAGVAR(lt_prog_compiler_pic, $1)=
- ;;
- haiku*)
- # PIC is the default for Haiku.
- # The "-static" flag exists, but is broken.
- _LT_TAGVAR(lt_prog_compiler_static, $1)=
- ;;
- interix[[3-9]]*)
- # Interix 3.x gcc -fpic/-fPIC options generate broken code.
- # Instead, we relocate shared libraries at runtime.
- ;;
- sysv4*MP*)
- if test -d /usr/nec; then
- _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
- fi
- ;;
- hpux*)
- # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
- # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag
- # sets the default TLS model and affects inlining.
- case $host_cpu in
- hppa*64*)
- ;;
- *)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- ;;
- esac
- ;;
- *qnx* | *nto*)
- # QNX uses GNU C++, but need to define -shared option too, otherwise
- # it will coredump.
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
- ;;
- *)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- ;;
- esac
- else
- case $host_os in
- aix[[4-9]]*)
- # All AIX code is PIC.
- if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- else
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
- fi
- ;;
- chorus*)
- case $cc_basename in
- cxch68*)
- # Green Hills C++ Compiler
- # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
- ;;
- esac
- ;;
- mingw* | cygwin* | os2* | pw32* | cegcc*)
- # This hack is so that the source file can tell whether it is being
- # built for inclusion in a dll (and should export symbols for example).
- m4_if([$1], [GCJ], [],
- [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
- ;;
- dgux*)
- case $cc_basename in
- ec++*)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- ;;
- ghcx*)
- # Green Hills C++ Compiler
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
- ;;
- *)
- ;;
- esac
- ;;
- freebsd* | dragonfly*)
- # FreeBSD uses GNU C++
- ;;
- hpux9* | hpux10* | hpux11*)
- case $cc_basename in
- CC*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
- if test "$host_cpu" != ia64; then
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
- fi
- ;;
- aCC*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
- case $host_cpu in
- hppa*64*|ia64*)
- # +Z the default
- ;;
- *)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
- ;;
- esac
- ;;
- *)
- ;;
- esac
- ;;
- interix*)
- # This is c89, which is MS Visual C++ (no shared libs)
- # Anyone wants to do a port?
- ;;
- irix5* | irix6* | nonstopux*)
- case $cc_basename in
- CC*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
- # CC pic flag -KPIC is the default.
- ;;
- *)
- ;;
- esac
- ;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
- case $cc_basename in
- KCC*)
- # KAI C++ Compiler
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- ;;
- ecpc* )
- # old Intel C++ for x86_64 which still supported -KPIC.
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
- ;;
- icpc* )
- # Intel C++, used to be incompatible with GCC.
- # ICC 10 doesn't accept -KPIC any more.
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
- ;;
- pgCC* | pgcpp*)
- # Portland Group C++ compiler
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
- cxx*)
- # Compaq C++
- # Make sure the PIC flag is empty. It appears that all Alpha
- # Linux and Compaq Tru64 Unix objects are PIC.
- _LT_TAGVAR(lt_prog_compiler_pic, $1)=
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
- ;;
- xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*)
- # IBM XL 8.0, 9.0 on PPC and BlueGene
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
- ;;
- *)
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ C*)
- # Sun C++ 5.9
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
- ;;
- esac
- ;;
- esac
- ;;
- lynxos*)
- ;;
- m88k*)
- ;;
- mvs*)
- case $cc_basename in
- cxx*)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall'
- ;;
- *)
- ;;
- esac
- ;;
- netbsd*)
- ;;
- *qnx* | *nto*)
- # QNX uses GNU C++, but need to define -shared option too, otherwise
- # it will coredump.
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
- ;;
- osf3* | osf4* | osf5*)
- case $cc_basename in
- KCC*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
- ;;
- RCC*)
- # Rational C++ 2.4.1
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
- ;;
- cxx*)
- # Digital/Compaq C++
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- # Make sure the PIC flag is empty. It appears that all Alpha
- # Linux and Compaq Tru64 Unix objects are PIC.
- _LT_TAGVAR(lt_prog_compiler_pic, $1)=
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
- ;;
- *)
- ;;
- esac
- ;;
- psos*)
- ;;
- solaris*)
- case $cc_basename in
- CC* | sunCC*)
- # Sun C++ 4.2, 5.x and Centerline C++
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
- ;;
- gcx*)
- # Green Hills C++ Compiler
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
- ;;
- *)
- ;;
- esac
- ;;
- sunos4*)
- case $cc_basename in
- CC*)
- # Sun C++ 4.x
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
- lcc*)
- # Lucid
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
- ;;
- *)
- ;;
- esac
- ;;
- sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
- case $cc_basename in
- CC*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
- esac
- ;;
- tandem*)
- case $cc_basename in
- NCC*)
- # NonStop-UX NCC 3.20
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- ;;
- *)
- ;;
- esac
- ;;
- vxworks*)
- ;;
- *)
- _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
- ;;
- esac
- fi
-],
-[
- if test "$GCC" = yes; then
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
-
- case $host_os in
- aix*)
- # All AIX code is PIC.
- if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- fi
- ;;
-
- amigaos*)
- case $host_cpu in
- powerpc)
- # see comment about AmigaOS4 .so support
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- ;;
- m68k)
- # FIXME: we need at least 68020 code to build shared libraries, but
- # adding the `-m68020' flag to GCC prevents building anything better,
- # like `-m68040'.
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
- ;;
- esac
- ;;
-
- beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
- # PIC is the default for these OSes.
- ;;
-
- mingw* | cygwin* | pw32* | os2* | cegcc*)
- # This hack is so that the source file can tell whether it is being
- # built for inclusion in a dll (and should export symbols for example).
- # Although the cygwin gcc ignores -fPIC, still need this for old-style
- # (--disable-auto-import) libraries
- m4_if([$1], [GCJ], [],
- [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
- ;;
-
- darwin* | rhapsody*)
- # PIC is the default on this platform
- # Common symbols not allowed in MH_DYLIB files
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
- ;;
-
- haiku*)
- # PIC is the default for Haiku.
- # The "-static" flag exists, but is broken.
- _LT_TAGVAR(lt_prog_compiler_static, $1)=
- ;;
-
- hpux*)
- # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
- # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag
- # sets the default TLS model and affects inlining.
- case $host_cpu in
- hppa*64*)
- # +Z the default
- ;;
- *)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- ;;
- esac
- ;;
-
- interix[[3-9]]*)
- # Interix 3.x gcc -fpic/-fPIC options generate broken code.
- # Instead, we relocate shared libraries at runtime.
- ;;
-
- msdosdjgpp*)
- # Just because we use GCC doesn't mean we suddenly get shared libraries
- # on systems that don't support them.
- _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
- enable_shared=no
- ;;
-
- *nto* | *qnx*)
- # QNX uses GNU C++, but need to define -shared option too, otherwise
- # it will coredump.
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
- ;;
-
- sysv4*MP*)
- if test -d /usr/nec; then
- _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
- fi
- ;;
-
- *)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- ;;
- esac
-
- case $cc_basename in
- nvcc*) # Cuda Compiler Driver 2.2
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker '
- if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
- _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)"
- fi
- ;;
- esac
- else
- # PORTME Check for flag to pass linker flags through the system compiler.
- case $host_os in
- aix*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- if test "$host_cpu" = ia64; then
- # AIX 5 now supports IA64 processor
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- else
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
- fi
- ;;
-
- mingw* | cygwin* | pw32* | os2* | cegcc*)
- # This hack is so that the source file can tell whether it is being
- # built for inclusion in a dll (and should export symbols for example).
- m4_if([$1], [GCJ], [],
- [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'])
- ;;
-
- hpux9* | hpux10* | hpux11*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
- # not for PA HP-UX.
- case $host_cpu in
- hppa*64*|ia64*)
- # +Z the default
- ;;
- *)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
- ;;
- esac
- # Is there a better lt_prog_compiler_static that works with the bundled CC?
- _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
- ;;
-
- irix5* | irix6* | nonstopux*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- # PIC (with -KPIC) is the default.
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
- ;;
-
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
- case $cc_basename in
- # old Intel for x86_64 which still supported -KPIC.
- ecc*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
- ;;
- # icc used to be incompatible with GCC.
- # ICC 10 doesn't accept -KPIC any more.
- icc* | ifort*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
- ;;
- # Lahey Fortran 8.1.
- lf95*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='--static'
- ;;
- nagfor*)
- # NAG Fortran compiler
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
- pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
- # Portland Group compilers (*not* the Pentium gcc compiler,
- # which looks to be a dead project)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
- ccc*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- # All Alpha code is PIC.
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
- ;;
- xl* | bgxl* | bgf* | mpixl*)
- # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
- ;;
- *)
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*)
- # Sun Fortran 8.3 passes all unrecognized flags to the linker
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- _LT_TAGVAR(lt_prog_compiler_wl, $1)=''
- ;;
- *Sun\ F* | *Sun*Fortran*)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
- ;;
- *Sun\ C*)
- # Sun C 5.9
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- ;;
- *Intel*\ [[CF]]*Compiler*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
- ;;
- *Portland\ Group*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
- esac
- ;;
- esac
- ;;
-
- newsos6)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
-
- *nto* | *qnx*)
- # QNX uses GNU C++, but need to define -shared option too, otherwise
- # it will coredump.
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared'
- ;;
-
- osf3* | osf4* | osf5*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- # All OSF/1 code is PIC.
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
- ;;
-
- rdos*)
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
- ;;
-
- solaris*)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- case $cc_basename in
- f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';;
- *)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';;
- esac
- ;;
-
- sunos4*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
-
- sysv4 | sysv4.2uw2* | sysv4.3*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
-
- sysv4*MP*)
- if test -d /usr/nec ;then
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- fi
- ;;
-
- sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
-
- unicos*)
- _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
- _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
- ;;
-
- uts4*)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
- _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
- ;;
-
- *)
- _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
- ;;
- esac
- fi
-])
-case $host_os in
- # For platforms which do not support PIC, -DPIC is meaningless:
- *djgpp*)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)=
- ;;
- *)
- _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t at m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])"
- ;;
-esac
-
-AC_CACHE_CHECK([for $compiler option to produce PIC],
- [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)],
- [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)])
-_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)
-
-#
-# Check to make sure the PIC flag actually works.
-#
-if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
- _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works],
- [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)],
- [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t at m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [],
- [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in
- "" | " "*) ;;
- *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;;
- esac],
- [_LT_TAGVAR(lt_prog_compiler_pic, $1)=
- _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no])
-fi
-_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1],
- [Additional compiler flags for building library objects])
-
-_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1],
- [How to pass a linker flag through the compiler])
-#
-# Check to make sure the static flag actually works.
-#
-wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\"
-_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works],
- _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1),
- $lt_tmp_static_flag,
- [],
- [_LT_TAGVAR(lt_prog_compiler_static, $1)=])
-_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1],
- [Compiler flag to prevent dynamic linking])
-])# _LT_COMPILER_PIC
-
-
-# _LT_LINKER_SHLIBS([TAGNAME])
-# ----------------------------
-# See if the linker supports building shared libraries.
-m4_defun([_LT_LINKER_SHLIBS],
-[AC_REQUIRE([LT_PATH_LD])dnl
-AC_REQUIRE([LT_PATH_NM])dnl
-m4_require([_LT_PATH_MANIFEST_TOOL])dnl
-m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_DECL_SED])dnl
-m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl
-m4_require([_LT_TAG_COMPILER])dnl
-AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
-m4_if([$1], [CXX], [
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
- _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*']
- case $host_os in
- aix[[4-9]]*)
- # If we're using GNU nm, then we don't want the "-C" option.
- # -C means demangle to AIX nm, but means don't demangle with GNU nm
- # Also, AIX nm treats weak defined symbols like other global defined
- # symbols, whereas GNU nm marks them as "W".
- if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
- else
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
- fi
- ;;
- pw32*)
- _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds"
- ;;
- cygwin* | mingw* | cegcc*)
- case $cc_basename in
- cl*)
- _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
- ;;
- *)
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
- _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
- ;;
- esac
- ;;
- *)
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
- ;;
- esac
-], [
- runpath_var=
- _LT_TAGVAR(allow_undefined_flag, $1)=
- _LT_TAGVAR(always_export_symbols, $1)=no
- _LT_TAGVAR(archive_cmds, $1)=
- _LT_TAGVAR(archive_expsym_cmds, $1)=
- _LT_TAGVAR(compiler_needs_object, $1)=no
- _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
- _LT_TAGVAR(export_dynamic_flag_spec, $1)=
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
- _LT_TAGVAR(hardcode_automatic, $1)=no
- _LT_TAGVAR(hardcode_direct, $1)=no
- _LT_TAGVAR(hardcode_direct_absolute, $1)=no
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
- _LT_TAGVAR(hardcode_libdir_separator, $1)=
- _LT_TAGVAR(hardcode_minus_L, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
- _LT_TAGVAR(inherit_rpath, $1)=no
- _LT_TAGVAR(link_all_deplibs, $1)=unknown
- _LT_TAGVAR(module_cmds, $1)=
- _LT_TAGVAR(module_expsym_cmds, $1)=
- _LT_TAGVAR(old_archive_from_new_cmds, $1)=
- _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)=
- _LT_TAGVAR(thread_safe_flag_spec, $1)=
- _LT_TAGVAR(whole_archive_flag_spec, $1)=
- # include_expsyms should be a list of space-separated symbols to be *always*
- # included in the symbol list
- _LT_TAGVAR(include_expsyms, $1)=
- # exclude_expsyms can be an extended regexp of symbols to exclude
- # it will be wrapped by ` (' and `)$', so one must not match beginning or
- # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
- # as well as any symbol that contains `d'.
- _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*']
- # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
- # platforms (ab)use it in PIC code, but their linkers get confused if
- # the symbol is explicitly referenced. Since portable code cannot
- # rely on this symbol name, it's probably fine to never include it in
- # preloaded symbol tables.
- # Exclude shared library initialization/finalization symbols.
-dnl Note also adjust exclude_expsyms for C++ above.
- extract_expsyms_cmds=
-
- case $host_os in
- cygwin* | mingw* | pw32* | cegcc*)
- # FIXME: the MSVC++ port hasn't been tested in a loooong time
- # When not using gcc, we currently assume that we are using
- # Microsoft Visual C++.
- if test "$GCC" != yes; then
- with_gnu_ld=no
- fi
- ;;
- interix*)
- # we just hope/assume this is gcc and not c89 (= MSVC++)
- with_gnu_ld=yes
- ;;
- openbsd*)
- with_gnu_ld=no
- ;;
- esac
-
- _LT_TAGVAR(ld_shlibs, $1)=yes
-
- # On some targets, GNU ld is compatible enough with the native linker
- # that we're better off using the native interface for both.
- lt_use_gnu_ld_interface=no
- if test "$with_gnu_ld" = yes; then
- case $host_os in
- aix*)
- # The AIX port of GNU ld has always aspired to compatibility
- # with the native linker. However, as the warning in the GNU ld
- # block says, versions before 2.19.5* couldn't really create working
- # shared libraries, regardless of the interface used.
- case `$LD -v 2>&1` in
- *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
- *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;;
- *\ \(GNU\ Binutils\)\ [[3-9]]*) ;;
- *)
- lt_use_gnu_ld_interface=yes
- ;;
- esac
- ;;
- *)
- lt_use_gnu_ld_interface=yes
- ;;
- esac
- fi
-
- if test "$lt_use_gnu_ld_interface" = yes; then
- # If archive_cmds runs LD, not CC, wlarc should be empty
- wlarc='${wl}'
-
- # Set some defaults for GNU ld with shared library support. These
- # are reset later if shared libraries are not supported. Putting them
- # here allows them to be overridden if necessary.
- runpath_var=LD_RUN_PATH
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
- # ancient GNU ld didn't support --whole-archive et. al.
- if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
- _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
- else
- _LT_TAGVAR(whole_archive_flag_spec, $1)=
- fi
- supports_anon_versioning=no
- case `$LD -v 2>&1` in
- *GNU\ gold*) supports_anon_versioning=yes ;;
- *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11
- *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
- *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
- *\ 2.11.*) ;; # other 2.11 versions
- *) supports_anon_versioning=yes ;;
- esac
-
- # See if GNU ld supports shared libraries.
- case $host_os in
- aix[[3-9]]*)
- # On AIX/PPC, the GNU linker is very broken
- if test "$host_cpu" != ia64; then
- _LT_TAGVAR(ld_shlibs, $1)=no
- cat <<_LT_EOF 1>&2
-
-*** Warning: the GNU linker, at least up to release 2.19, is reported
-*** to be unable to reliably create shared libraries on AIX.
-*** Therefore, libtool is disabling shared libraries support. If you
-*** really care for shared libraries, you may want to install binutils
-*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
-*** You will then need to restart the configuration process.
-
-_LT_EOF
- fi
- ;;
-
- amigaos*)
- case $host_cpu in
- powerpc)
- # see comment about AmigaOS4 .so support
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)=''
- ;;
- m68k)
- _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- ;;
- esac
- ;;
-
- beos*)
- if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- # Joseph Beckenbach <jrb3 at best.com> says some releases of gcc
- # support --undefined. This deserves some investigation. FIXME
- _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
-
- cygwin* | mingw* | pw32* | cegcc*)
- # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
- # as there is no search path for DLLs.
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols'
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- _LT_TAGVAR(always_export_symbols, $1)=no
- _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
- _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
-
- if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
- # If the export-symbols file already is a .def file (1st line
- # is EXPORTS), use it as is; otherwise, prepend...
- _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
- cp $export_symbols $output_objdir/$soname.def;
- else
- echo EXPORTS > $output_objdir/$soname.def;
- cat $export_symbols >> $output_objdir/$soname.def;
- fi~
- $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
-
- haiku*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- ;;
-
- interix[[3-9]]*)
- _LT_TAGVAR(hardcode_direct, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
- # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
- # Instead, shared libraries are loaded at an image base (0x10000000 by
- # default) and relocated if they conflict, which is a slow very memory
- # consuming and fragmenting process. To avoid this, we pick a random,
- # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
- # time. Moving up from 0x10000000 also allows more sbrk(2) space.
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
- ;;
-
- gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
- tmp_diet=no
- if test "$host_os" = linux-dietlibc; then
- case $cc_basename in
- diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn)
- esac
- fi
- if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
- && test "$tmp_diet" = no
- then
- tmp_addflag=' $pic_flag'
- tmp_sharedflag='-shared'
- case $cc_basename,$host_cpu in
- pgcc*) # Portland Group C compiler
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- tmp_addflag=' $pic_flag'
- ;;
- pgf77* | pgf90* | pgf95* | pgfortran*)
- # Portland Group f77 and f90 compilers
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- tmp_addflag=' $pic_flag -Mnomain' ;;
- ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64
- tmp_addflag=' -i_dynamic' ;;
- efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64
- tmp_addflag=' -i_dynamic -nofor_main' ;;
- ifc* | ifort*) # Intel Fortran compiler
- tmp_addflag=' -nofor_main' ;;
- lf95*) # Lahey Fortran 8.1
- _LT_TAGVAR(whole_archive_flag_spec, $1)=
- tmp_sharedflag='--shared' ;;
- xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below)
- tmp_sharedflag='-qmkshrobj'
- tmp_addflag= ;;
- nvcc*) # Cuda Compiler Driver 2.2
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- _LT_TAGVAR(compiler_needs_object, $1)=yes
- ;;
- esac
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ C*) # Sun C 5.9
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- _LT_TAGVAR(compiler_needs_object, $1)=yes
- tmp_sharedflag='-G' ;;
- *Sun\ F*) # Sun Fortran 8.3
- tmp_sharedflag='-G' ;;
- esac
- _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
-
- if test "x$supports_anon_versioning" = xyes; then
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
- cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
- echo "local: *; };" >> $output_objdir/$libname.ver~
- $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
- fi
-
- case $cc_basename in
- xlf* | bgf* | bgxlf* | mpixlf*)
- # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
- _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
- if test "x$supports_anon_versioning" = xyes; then
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
- cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
- echo "local: *; };" >> $output_objdir/$libname.ver~
- $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
- fi
- ;;
- esac
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
-
- netbsd*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
- wlarc=
- else
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- fi
- ;;
-
- solaris*)
- if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
- _LT_TAGVAR(ld_shlibs, $1)=no
- cat <<_LT_EOF 1>&2
-
-*** Warning: The releases 2.8.* of the GNU linker cannot reliably
-*** create shared libraries on Solaris systems. Therefore, libtool
-*** is disabling shared libraries support. We urge you to upgrade GNU
-*** binutils to release 2.9.1 or newer. Another option is to modify
-*** your PATH or compiler configuration so that the native linker is
-*** used, and then restart.
-
-_LT_EOF
- elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
-
- sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
- case `$LD -v 2>&1` in
- *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*)
- _LT_TAGVAR(ld_shlibs, $1)=no
- cat <<_LT_EOF 1>&2
-
-*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
-*** reliably create shared libraries on SCO systems. Therefore, libtool
-*** is disabling shared libraries support. We urge you to upgrade GNU
-*** binutils to release 2.16.91.0.3 or newer. Another option is to modify
-*** your PATH or compiler configuration so that the native linker is
-*** used, and then restart.
-
-_LT_EOF
- ;;
- *)
- # For security reasons, it is highly recommended that you always
- # use absolute paths for naming shared libraries, and exclude the
- # DT_RUNPATH tag from executables and libraries. But doing so
- # requires that you compile everything twice, which is a pain.
- if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
- esac
- ;;
-
- sunos4*)
- _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
- wlarc=
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- *)
- if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
- esac
-
- if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then
- runpath_var=
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
- _LT_TAGVAR(export_dynamic_flag_spec, $1)=
- _LT_TAGVAR(whole_archive_flag_spec, $1)=
- fi
- else
- # PORTME fill in a description of your system's linker (not GNU ld)
- case $host_os in
- aix3*)
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- _LT_TAGVAR(always_export_symbols, $1)=yes
- _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
- # Note: this linker hardcodes the directories in LIBPATH if there
- # are no directories specified by -L.
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
- # Neither direct hardcoding nor static linking is supported with a
- # broken collect2.
- _LT_TAGVAR(hardcode_direct, $1)=unsupported
- fi
- ;;
-
- aix[[4-9]]*)
- if test "$host_cpu" = ia64; then
- # On IA64, the linker does run time linking by default, so we don't
- # have to do anything special.
- aix_use_runtimelinking=no
- exp_sym_flag='-Bexport'
- no_entry_flag=""
- else
- # If we're using GNU nm, then we don't want the "-C" option.
- # -C means demangle to AIX nm, but means don't demangle with GNU nm
- # Also, AIX nm treats weak defined symbols like other global
- # defined symbols, whereas GNU nm marks them as "W".
- if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
- else
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
- fi
- aix_use_runtimelinking=no
-
- # Test if we are trying to use run time linking or normal
- # AIX style linking. If -brtl is somewhere in LDFLAGS, we
- # need to do runtime linking.
- case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*)
- for ld_flag in $LDFLAGS; do
- if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
- aix_use_runtimelinking=yes
- break
- fi
- done
- ;;
- esac
-
- exp_sym_flag='-bexport'
- no_entry_flag='-bnoentry'
- fi
-
- # When large executables or shared objects are built, AIX ld can
- # have problems creating the table of contents. If linking a library
- # or program results in "error TOC overflow" add -mminimal-toc to
- # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not
- # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
-
- _LT_TAGVAR(archive_cmds, $1)=''
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
- _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- _LT_TAGVAR(file_list_spec, $1)='${wl}-f,'
-
- if test "$GCC" = yes; then
- case $host_os in aix4.[[012]]|aix4.[[012]].*)
- # We only want to do this on AIX 4.2 and lower, the check
- # below for broken collect2 doesn't work under 4.3+
- collect2name=`${CC} -print-prog-name=collect2`
- if test -f "$collect2name" &&
- strings "$collect2name" | $GREP resolve_lib_name >/dev/null
- then
- # We have reworked collect2
- :
- else
- # We have old collect2
- _LT_TAGVAR(hardcode_direct, $1)=unsupported
- # It fails to find uninstalled libraries when the uninstalled
- # path is not listed in the libpath. Setting hardcode_minus_L
- # to unsupported forces relinking
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=
- fi
- ;;
- esac
- shared_flag='-shared'
- if test "$aix_use_runtimelinking" = yes; then
- shared_flag="$shared_flag "'${wl}-G'
- fi
- else
- # not using gcc
- if test "$host_cpu" = ia64; then
- # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
- # chokes on -Wl,-G. The following line is correct:
- shared_flag='-G'
- else
- if test "$aix_use_runtimelinking" = yes; then
- shared_flag='${wl}-G'
- else
- shared_flag='${wl}-bM:SRE'
- fi
- fi
- fi
-
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall'
- # It seems that -bexpall does not export symbols beginning with
- # underscore (_), so it is better to generate a list of symbols to export.
- _LT_TAGVAR(always_export_symbols, $1)=yes
- if test "$aix_use_runtimelinking" = yes; then
- # Warning - without using the other runtime loading flags (-brtl),
- # -berok will link without error, but may produce a broken library.
- _LT_TAGVAR(allow_undefined_flag, $1)='-berok'
- # Determine the default libpath from the value encoded in an
- # empty executable.
- _LT_SYS_MODULE_PATH_AIX([$1])
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
- else
- if test "$host_cpu" = ia64; then
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
- _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
- _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
- else
- # Determine the default libpath from the value encoded in an
- # empty executable.
- _LT_SYS_MODULE_PATH_AIX([$1])
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
- # Warning - without using the other run time loading flags,
- # -berok will link without error, but may produce a broken library.
- _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
- _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
- if test "$with_gnu_ld" = yes; then
- # We only use this code for GNU lds that support --whole-archive.
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
- else
- # Exported symbols can be pulled into shared objects from archives
- _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
- fi
- _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
- # This is similar to how AIX traditionally builds its shared libraries.
- _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
- fi
- fi
- ;;
-
- amigaos*)
- case $host_cpu in
- powerpc)
- # see comment about AmigaOS4 .so support
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)=''
- ;;
- m68k)
- _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- ;;
- esac
- ;;
-
- bsdi[[45]]*)
- _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic
- ;;
-
- cygwin* | mingw* | pw32* | cegcc*)
- # When not using gcc, we currently assume that we are using
- # Microsoft Visual C++.
- # hardcode_libdir_flag_spec is actually meaningless, as there is
- # no search path for DLLs.
- case $cc_basename in
- cl*)
- # Native MSVC
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- _LT_TAGVAR(always_export_symbols, $1)=yes
- _LT_TAGVAR(file_list_spec, $1)='@'
- # Tell ltmain to make .lib files, not .a files.
- libext=lib
- # Tell ltmain to make .dll files, not .so files.
- shrext_cmds=".dll"
- # FIXME: Setting linknames here is a bad hack.
- _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
- _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
- sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
- else
- sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
- fi~
- $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
- linknames='
- # The linker will not automatically build a static lib if we build a DLL.
- # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
- _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
- _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols'
- # Don't use ranlib
- _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
- _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~
- lt_tool_outputfile="@TOOL_OUTPUT@"~
- case $lt_outputfile in
- *.exe|*.EXE) ;;
- *)
- lt_outputfile="$lt_outputfile.exe"
- lt_tool_outputfile="$lt_tool_outputfile.exe"
- ;;
- esac~
- if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
- $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
- $RM "$lt_outputfile.manifest";
- fi'
- ;;
- *)
- # Assume MSVC wrapper
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- # Tell ltmain to make .lib files, not .a files.
- libext=lib
- # Tell ltmain to make .dll files, not .so files.
- shrext_cmds=".dll"
- # FIXME: Setting linknames here is a bad hack.
- _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
- # The linker will automatically build a .lib file if we build a DLL.
- _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
- # FIXME: Should let the user specify the lib program.
- _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs'
- _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
- ;;
- esac
- ;;
-
- darwin* | rhapsody*)
- _LT_DARWIN_LINKER_FEATURES($1)
- ;;
-
- dgux*)
- _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
- # support. Future versions do this automatically, but an explicit c++rt0.o
- # does not break anything, and helps significantly (at the cost of a little
- # extra space).
- freebsd2.2*)
- _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- # Unfortunately, older versions of FreeBSD 2 do not have this feature.
- freebsd2.*)
- _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
- freebsd* | dragonfly*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- hpux9*)
- if test "$GCC" = yes; then
- _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
- else
- _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
- fi
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
- _LT_TAGVAR(hardcode_direct, $1)=yes
-
- # hardcode_minus_L: Not really in the search PATH,
- # but as the default location of the library.
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
- ;;
-
- hpux10*)
- if test "$GCC" = yes && test "$with_gnu_ld" = no; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
- else
- _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
- fi
- if test "$with_gnu_ld" = no; then
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
- # hardcode_minus_L: Not really in the search PATH,
- # but as the default location of the library.
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- fi
- ;;
-
- hpux11*)
- if test "$GCC" = yes && test "$with_gnu_ld" = no; then
- case $host_cpu in
- hppa*64*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- ia64*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- *)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- esac
- else
- case $host_cpu in
- hppa*64*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- ia64*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- *)
- m4_if($1, [], [
- # Older versions of the 11.00 compiler do not understand -b yet
- # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
- _LT_LINKER_OPTION([if $CC understands -b],
- _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b],
- [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'],
- [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])],
- [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'])
- ;;
- esac
- fi
- if test "$with_gnu_ld" = no; then
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
- case $host_cpu in
- hppa*64*|ia64*)
- _LT_TAGVAR(hardcode_direct, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
- *)
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
-
- # hardcode_minus_L: Not really in the search PATH,
- # but as the default location of the library.
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- ;;
- esac
- fi
- ;;
-
- irix5* | irix6* | nonstopux*)
- if test "$GCC" = yes; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- # Try to use the -exported_symbol ld option, if it does not
- # work, assume that -exports_file does not work either and
- # implicitly export all symbols.
- # This should be the same for all languages, so no per-tag cache variable.
- AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol],
- [lt_cv_irix_exported_symbol],
- [save_LDFLAGS="$LDFLAGS"
- LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
- AC_LINK_IFELSE(
- [AC_LANG_SOURCE(
- [AC_LANG_CASE([C], [[int foo (void) { return 0; }]],
- [C++], [[int foo (void) { return 0; }]],
- [Fortran 77], [[
- subroutine foo
- end]],
- [Fortran], [[
- subroutine foo
- end]])])],
- [lt_cv_irix_exported_symbol=yes],
- [lt_cv_irix_exported_symbol=no])
- LDFLAGS="$save_LDFLAGS"])
- if test "$lt_cv_irix_exported_symbol" = yes; then
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
- fi
- else
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib'
- fi
- _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
- _LT_TAGVAR(inherit_rpath, $1)=yes
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- ;;
-
- netbsd*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out
- else
- _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF
- fi
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- newsos6)
- _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- *nto* | *qnx*)
- ;;
-
- openbsd*)
- if test -f /usr/libexec/ld.so; then
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
- if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
- else
- case $host_os in
- openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*)
- _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
- ;;
- *)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
- ;;
- esac
- fi
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
-
- os2*)
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
- _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
- ;;
-
- osf3*)
- if test "$GCC" = yes; then
- _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- else
- _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- fi
- _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
- ;;
-
- osf4* | osf5*) # as osf3* with the addition of -msym flag
- if test "$GCC" = yes; then
- _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- else
- _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~
- $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp'
-
- # Both c and cxx compiler support -rpath directly
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
- fi
- _LT_TAGVAR(archive_cmds_need_lc, $1)='no'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
- ;;
-
- solaris*)
- _LT_TAGVAR(no_undefined_flag, $1)=' -z defs'
- if test "$GCC" = yes; then
- wlarc='${wl}'
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
- else
- case `$CC -V 2>&1` in
- *"Compilers 5.0"*)
- wlarc=''
- _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp'
- ;;
- *)
- wlarc='${wl}'
- _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
- ;;
- esac
- fi
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- case $host_os in
- solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
- *)
- # The compiler driver will combine and reorder linker options,
- # but understands `-z linker_flag'. GCC discards it without `$wl',
- # but is careful enough not to reorder.
- # Supported since Solaris 2.6 (maybe 2.5.1?)
- if test "$GCC" = yes; then
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
- else
- _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract'
- fi
- ;;
- esac
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- ;;
-
- sunos4*)
- if test "x$host_vendor" = xsequent; then
- # Use $CC to link under sequent, because it throws in some extra .o
- # files that make .init and .fini sections work.
- _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
- else
- _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
- fi
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- sysv4)
- case $host_vendor in
- sni)
- _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true???
- ;;
- siemens)
- ## LD is ld it makes a PLAMLIB
- ## CC just makes a GrossModule.
- _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs'
- _LT_TAGVAR(hardcode_direct, $1)=no
- ;;
- motorola)
- _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie
- ;;
- esac
- runpath_var='LD_RUN_PATH'
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- sysv4.3*)
- _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport'
- ;;
-
- sysv4*MP*)
- if test -d /usr/nec; then
- _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- runpath_var=LD_RUN_PATH
- hardcode_runpath_var=yes
- _LT_TAGVAR(ld_shlibs, $1)=yes
- fi
- ;;
-
- sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
- _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
- _LT_TAGVAR(archive_cmds_need_lc, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- runpath_var='LD_RUN_PATH'
-
- if test "$GCC" = yes; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- else
- _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- fi
- ;;
-
- sysv5* | sco3.2v5* | sco5v6*)
- # Note: We can NOT use -z defs as we might desire, because we do not
- # link with -lc, and that would cause any symbols used from libc to
- # always be unresolved, which means just about no library would
- # ever link correctly. If we're not using GNU ld we use -z text
- # though, which does catch some bad symbols but isn't as heavy-handed
- # as -z defs.
- _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
- _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
- _LT_TAGVAR(archive_cmds_need_lc, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
- runpath_var='LD_RUN_PATH'
-
- if test "$GCC" = yes; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- else
- _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- fi
- ;;
-
- uts4*)
- _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
-
- *)
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- esac
-
- if test x$host_vendor = xsni; then
- case $host in
- sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym'
- ;;
- esac
- fi
- fi
-])
-AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)])
-test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
-
-_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld
-
-_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl
-_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl
-_LT_DECL([], [extract_expsyms_cmds], [2],
- [The commands to extract the exported symbol list from a shared archive])
-
-#
-# Do we need to explicitly link libc?
-#
-case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in
-x|xyes)
- # Assume -lc should be added
- _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
-
- if test "$enable_shared" = yes && test "$GCC" = yes; then
- case $_LT_TAGVAR(archive_cmds, $1) in
- *'~'*)
- # FIXME: we may have to deal with multi-command sequences.
- ;;
- '$CC '*)
- # Test whether the compiler implicitly links with -lc since on some
- # systems, -lgcc has to come before -lc. If gcc already passes -lc
- # to ld, don't add -lc before -lgcc.
- AC_CACHE_CHECK([whether -lc should be explicitly linked in],
- [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1),
- [$RM conftest*
- echo "$lt_simple_compile_test_code" > conftest.$ac_ext
-
- if AC_TRY_EVAL(ac_compile) 2>conftest.err; then
- soname=conftest
- lib=conftest
- libobjs=conftest.$ac_objext
- deplibs=
- wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1)
- pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1)
- compiler_flags=-v
- linker_flags=-v
- verstring=
- output_objdir=.
- libname=conftest
- lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1)
- _LT_TAGVAR(allow_undefined_flag, $1)=
- if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1)
- then
- lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no
- else
- lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes
- fi
- _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag
- else
- cat conftest.err 1>&5
- fi
- $RM conftest*
- ])
- _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)
- ;;
- esac
- fi
- ;;
-esac
-
-_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0],
- [Whether or not to add -lc for building shared libraries])
-_LT_TAGDECL([allow_libtool_libs_with_static_runtimes],
- [enable_shared_with_static_runtimes], [0],
- [Whether or not to disallow shared libs when runtime libs are static])
-_LT_TAGDECL([], [export_dynamic_flag_spec], [1],
- [Compiler flag to allow reflexive dlopens])
-_LT_TAGDECL([], [whole_archive_flag_spec], [1],
- [Compiler flag to generate shared objects directly from archives])
-_LT_TAGDECL([], [compiler_needs_object], [1],
- [Whether the compiler copes with passing no objects directly])
-_LT_TAGDECL([], [old_archive_from_new_cmds], [2],
- [Create an old-style archive from a shared archive])
-_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2],
- [Create a temporary old-style archive to link instead of a shared archive])
-_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive])
-_LT_TAGDECL([], [archive_expsym_cmds], [2])
-_LT_TAGDECL([], [module_cmds], [2],
- [Commands used to build a loadable module if different from building
- a shared archive.])
-_LT_TAGDECL([], [module_expsym_cmds], [2])
-_LT_TAGDECL([], [with_gnu_ld], [1],
- [Whether we are building with GNU ld or not])
-_LT_TAGDECL([], [allow_undefined_flag], [1],
- [Flag that allows shared libraries with undefined symbols to be built])
-_LT_TAGDECL([], [no_undefined_flag], [1],
- [Flag that enforces no undefined symbols])
-_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1],
- [Flag to hardcode $libdir into a binary during linking.
- This must work even if $libdir does not exist])
-_LT_TAGDECL([], [hardcode_libdir_separator], [1],
- [Whether we need a single "-rpath" flag with a separated argument])
-_LT_TAGDECL([], [hardcode_direct], [0],
- [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes
- DIR into the resulting binary])
-_LT_TAGDECL([], [hardcode_direct_absolute], [0],
- [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes
- DIR into the resulting binary and the resulting library dependency is
- "absolute", i.e impossible to change by setting ${shlibpath_var} if the
- library is relocated])
-_LT_TAGDECL([], [hardcode_minus_L], [0],
- [Set to "yes" if using the -LDIR flag during linking hardcodes DIR
- into the resulting binary])
-_LT_TAGDECL([], [hardcode_shlibpath_var], [0],
- [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
- into the resulting binary])
-_LT_TAGDECL([], [hardcode_automatic], [0],
- [Set to "yes" if building a shared library automatically hardcodes DIR
- into the library and all subsequent libraries and executables linked
- against it])
-_LT_TAGDECL([], [inherit_rpath], [0],
- [Set to yes if linker adds runtime paths of dependent libraries
- to runtime path list])
-_LT_TAGDECL([], [link_all_deplibs], [0],
- [Whether libtool must link a program against all its dependency libraries])
-_LT_TAGDECL([], [always_export_symbols], [0],
- [Set to "yes" if exported symbols are required])
-_LT_TAGDECL([], [export_symbols_cmds], [2],
- [The commands to list exported symbols])
-_LT_TAGDECL([], [exclude_expsyms], [1],
- [Symbols that should not be listed in the preloaded symbols])
-_LT_TAGDECL([], [include_expsyms], [1],
- [Symbols that must always be exported])
-_LT_TAGDECL([], [prelink_cmds], [2],
- [Commands necessary for linking programs (against libraries) with templates])
-_LT_TAGDECL([], [postlink_cmds], [2],
- [Commands necessary for finishing linking programs])
-_LT_TAGDECL([], [file_list_spec], [1],
- [Specify filename containing input files])
-dnl FIXME: Not yet implemented
-dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1],
-dnl [Compiler flag to generate thread safe objects])
-])# _LT_LINKER_SHLIBS
-
-
-# _LT_LANG_C_CONFIG([TAG])
-# ------------------------
-# Ensure that the configuration variables for a C compiler are suitably
-# defined. These variables are subsequently used by _LT_CONFIG to write
-# the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_C_CONFIG],
-[m4_require([_LT_DECL_EGREP])dnl
-lt_save_CC="$CC"
-AC_LANG_PUSH(C)
-
-# Source file extension for C test sources.
-ac_ext=c
-
-# Object file extension for compiled C test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code="int some_variable = 0;"
-
-# Code to be used in simple link tests
-lt_simple_link_test_code='int main(){return(0);}'
-
-_LT_TAG_COMPILER
-# Save the default compiler, since it gets overwritten when the other
-# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP.
-compiler_DEFAULT=$CC
-
-# save warnings/boilerplate of simple test code
-_LT_COMPILER_BOILERPLATE
-_LT_LINKER_BOILERPLATE
-
-## CAVEAT EMPTOR:
-## There is no encapsulation within the following macros, do not change
-## the running order or otherwise move them around unless you know exactly
-## what you are doing...
-if test -n "$compiler"; then
- _LT_COMPILER_NO_RTTI($1)
- _LT_COMPILER_PIC($1)
- _LT_COMPILER_C_O($1)
- _LT_COMPILER_FILE_LOCKS($1)
- _LT_LINKER_SHLIBS($1)
- _LT_SYS_DYNAMIC_LINKER($1)
- _LT_LINKER_HARDCODE_LIBPATH($1)
- LT_SYS_DLOPEN_SELF
- _LT_CMD_STRIPLIB
-
- # Report which library types will actually be built
- AC_MSG_CHECKING([if libtool supports shared libraries])
- AC_MSG_RESULT([$can_build_shared])
-
- AC_MSG_CHECKING([whether to build shared libraries])
- test "$can_build_shared" = "no" && enable_shared=no
-
- # On AIX, shared libraries and static libraries use the same namespace, and
- # are all built from PIC.
- case $host_os in
- aix3*)
- test "$enable_shared" = yes && enable_static=no
- if test -n "$RANLIB"; then
- archive_cmds="$archive_cmds~\$RANLIB \$lib"
- postinstall_cmds='$RANLIB $lib'
- fi
- ;;
-
- aix[[4-9]]*)
- if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
- test "$enable_shared" = yes && enable_static=no
- fi
- ;;
- esac
- AC_MSG_RESULT([$enable_shared])
-
- AC_MSG_CHECKING([whether to build static libraries])
- # Make sure either enable_shared or enable_static is yes.
- test "$enable_shared" = yes || enable_static=yes
- AC_MSG_RESULT([$enable_static])
-
- _LT_CONFIG($1)
-fi
-AC_LANG_POP
-CC="$lt_save_CC"
-])# _LT_LANG_C_CONFIG
-
-
-# _LT_LANG_CXX_CONFIG([TAG])
-# --------------------------
-# Ensure that the configuration variables for a C++ compiler are suitably
-# defined. These variables are subsequently used by _LT_CONFIG to write
-# the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_CXX_CONFIG],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-m4_require([_LT_DECL_EGREP])dnl
-m4_require([_LT_PATH_MANIFEST_TOOL])dnl
-if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
- ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
- (test "X$CXX" != "Xg++"))) ; then
- AC_PROG_CXXCPP
-else
- _lt_caught_CXX_error=yes
-fi
-
-AC_LANG_PUSH(C++)
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-_LT_TAGVAR(allow_undefined_flag, $1)=
-_LT_TAGVAR(always_export_symbols, $1)=no
-_LT_TAGVAR(archive_expsym_cmds, $1)=
-_LT_TAGVAR(compiler_needs_object, $1)=no
-_LT_TAGVAR(export_dynamic_flag_spec, $1)=
-_LT_TAGVAR(hardcode_direct, $1)=no
-_LT_TAGVAR(hardcode_direct_absolute, $1)=no
-_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_separator, $1)=
-_LT_TAGVAR(hardcode_minus_L, $1)=no
-_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
-_LT_TAGVAR(hardcode_automatic, $1)=no
-_LT_TAGVAR(inherit_rpath, $1)=no
-_LT_TAGVAR(module_cmds, $1)=
-_LT_TAGVAR(module_expsym_cmds, $1)=
-_LT_TAGVAR(link_all_deplibs, $1)=unknown
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-_LT_TAGVAR(no_undefined_flag, $1)=
-_LT_TAGVAR(whole_archive_flag_spec, $1)=
-_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
-
-# Source file extension for C++ test sources.
-ac_ext=cpp
-
-# Object file extension for compiled C++ test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# No sense in running all these tests if we already determined that
-# the CXX compiler isn't working. Some variables (like enable_shared)
-# are currently assumed to apply to all compilers on this platform,
-# and will be corrupted by setting them based on a non-working compiler.
-if test "$_lt_caught_CXX_error" != yes; then
- # Code to be used in simple compile tests
- lt_simple_compile_test_code="int some_variable = 0;"
-
- # Code to be used in simple link tests
- lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }'
-
- # ltmain only uses $CC for tagged configurations so make sure $CC is set.
- _LT_TAG_COMPILER
-
- # save warnings/boilerplate of simple test code
- _LT_COMPILER_BOILERPLATE
- _LT_LINKER_BOILERPLATE
-
- # Allow CC to be a program name with arguments.
- lt_save_CC=$CC
- lt_save_CFLAGS=$CFLAGS
- lt_save_LD=$LD
- lt_save_GCC=$GCC
- GCC=$GXX
- lt_save_with_gnu_ld=$with_gnu_ld
- lt_save_path_LD=$lt_cv_path_LD
- if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
- lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
- else
- $as_unset lt_cv_prog_gnu_ld
- fi
- if test -n "${lt_cv_path_LDCXX+set}"; then
- lt_cv_path_LD=$lt_cv_path_LDCXX
- else
- $as_unset lt_cv_path_LD
- fi
- test -z "${LDCXX+set}" || LD=$LDCXX
- CC=${CXX-"c++"}
- CFLAGS=$CXXFLAGS
- compiler=$CC
- _LT_TAGVAR(compiler, $1)=$CC
- _LT_CC_BASENAME([$compiler])
-
- if test -n "$compiler"; then
- # We don't want -fno-exception when compiling C++ code, so set the
- # no_builtin_flag separately
- if test "$GXX" = yes; then
- _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin'
- else
- _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
- fi
-
- if test "$GXX" = yes; then
- # Set up default GNU C++ configuration
-
- LT_PATH_LD
-
- # Check if GNU C++ uses GNU ld as the underlying linker, since the
- # archiving commands below assume that GNU ld is being used.
- if test "$with_gnu_ld" = yes; then
- _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
-
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
-
- # If archive_cmds runs LD, not CC, wlarc should be empty
- # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
- # investigate it a little bit more. (MM)
- wlarc='${wl}'
-
- # ancient GNU ld didn't support --whole-archive et. al.
- if eval "`$CC -print-prog-name=ld` --help 2>&1" |
- $GREP 'no-whole-archive' > /dev/null; then
- _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
- else
- _LT_TAGVAR(whole_archive_flag_spec, $1)=
- fi
- else
- with_gnu_ld=no
- wlarc=
-
- # A generic and very simple default shared library creation
- # command for GNU C++ for the case where it uses the native
- # linker, instead of GNU ld. If possible, this setting should
- # overridden to take advantage of the native linker features on
- # the platform it is being used on.
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
- fi
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
-
- else
- GXX=no
- with_gnu_ld=no
- wlarc=
- fi
-
- # PORTME: fill in a description of your system's C++ link characteristics
- AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
- _LT_TAGVAR(ld_shlibs, $1)=yes
- case $host_os in
- aix3*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- aix[[4-9]]*)
- if test "$host_cpu" = ia64; then
- # On IA64, the linker does run time linking by default, so we don't
- # have to do anything special.
- aix_use_runtimelinking=no
- exp_sym_flag='-Bexport'
- no_entry_flag=""
- else
- aix_use_runtimelinking=no
-
- # Test if we are trying to use run time linking or normal
- # AIX style linking. If -brtl is somewhere in LDFLAGS, we
- # need to do runtime linking.
- case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*)
- for ld_flag in $LDFLAGS; do
- case $ld_flag in
- *-brtl*)
- aix_use_runtimelinking=yes
- break
- ;;
- esac
- done
- ;;
- esac
-
- exp_sym_flag='-bexport'
- no_entry_flag='-bnoentry'
- fi
-
- # When large executables or shared objects are built, AIX ld can
- # have problems creating the table of contents. If linking a library
- # or program results in "error TOC overflow" add -mminimal-toc to
- # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not
- # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
-
- _LT_TAGVAR(archive_cmds, $1)=''
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
- _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- _LT_TAGVAR(file_list_spec, $1)='${wl}-f,'
-
- if test "$GXX" = yes; then
- case $host_os in aix4.[[012]]|aix4.[[012]].*)
- # We only want to do this on AIX 4.2 and lower, the check
- # below for broken collect2 doesn't work under 4.3+
- collect2name=`${CC} -print-prog-name=collect2`
- if test -f "$collect2name" &&
- strings "$collect2name" | $GREP resolve_lib_name >/dev/null
- then
- # We have reworked collect2
- :
- else
- # We have old collect2
- _LT_TAGVAR(hardcode_direct, $1)=unsupported
- # It fails to find uninstalled libraries when the uninstalled
- # path is not listed in the libpath. Setting hardcode_minus_L
- # to unsupported forces relinking
- _LT_TAGVAR(hardcode_minus_L, $1)=yes
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=
- fi
- esac
- shared_flag='-shared'
- if test "$aix_use_runtimelinking" = yes; then
- shared_flag="$shared_flag "'${wl}-G'
- fi
- else
- # not using gcc
- if test "$host_cpu" = ia64; then
- # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
- # chokes on -Wl,-G. The following line is correct:
- shared_flag='-G'
- else
- if test "$aix_use_runtimelinking" = yes; then
- shared_flag='${wl}-G'
- else
- shared_flag='${wl}-bM:SRE'
- fi
- fi
- fi
-
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall'
- # It seems that -bexpall does not export symbols beginning with
- # underscore (_), so it is better to generate a list of symbols to
- # export.
- _LT_TAGVAR(always_export_symbols, $1)=yes
- if test "$aix_use_runtimelinking" = yes; then
- # Warning - without using the other runtime loading flags (-brtl),
- # -berok will link without error, but may produce a broken library.
- _LT_TAGVAR(allow_undefined_flag, $1)='-berok'
- # Determine the default libpath from the value encoded in an empty
- # executable.
- _LT_SYS_MODULE_PATH_AIX([$1])
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
-
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
- else
- if test "$host_cpu" = ia64; then
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
- _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
- _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
- else
- # Determine the default libpath from the value encoded in an
- # empty executable.
- _LT_SYS_MODULE_PATH_AIX([$1])
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
- # Warning - without using the other run time loading flags,
- # -berok will link without error, but may produce a broken library.
- _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
- _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
- if test "$with_gnu_ld" = yes; then
- # We only use this code for GNU lds that support --whole-archive.
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
- else
- # Exported symbols can be pulled into shared objects from archives
- _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
- fi
- _LT_TAGVAR(archive_cmds_need_lc, $1)=yes
- # This is similar to how AIX traditionally builds its shared
- # libraries.
- _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
- fi
- fi
- ;;
-
- beos*)
- if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- # Joseph Beckenbach <jrb3 at best.com> says some releases of gcc
- # support --undefined. This deserves some investigation. FIXME
- _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
-
- chorus*)
- case $cc_basename in
- *)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- esac
- ;;
-
- cygwin* | mingw* | pw32* | cegcc*)
- case $GXX,$cc_basename in
- ,cl* | no,cl*)
- # Native MSVC
- # hardcode_libdir_flag_spec is actually meaningless, as there is
- # no search path for DLLs.
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- _LT_TAGVAR(always_export_symbols, $1)=yes
- _LT_TAGVAR(file_list_spec, $1)='@'
- # Tell ltmain to make .lib files, not .a files.
- libext=lib
- # Tell ltmain to make .dll files, not .so files.
- shrext_cmds=".dll"
- # FIXME: Setting linknames here is a bad hack.
- _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames='
- _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
- $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
- else
- $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp;
- fi~
- $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
- linknames='
- # The linker will not automatically build a static lib if we build a DLL.
- # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
- _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
- # Don't use ranlib
- _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
- _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~
- lt_tool_outputfile="@TOOL_OUTPUT@"~
- case $lt_outputfile in
- *.exe|*.EXE) ;;
- *)
- lt_outputfile="$lt_outputfile.exe"
- lt_tool_outputfile="$lt_tool_outputfile.exe"
- ;;
- esac~
- func_to_tool_file "$lt_outputfile"~
- if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then
- $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1;
- $RM "$lt_outputfile.manifest";
- fi'
- ;;
- *)
- # g++
- # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
- # as there is no search path for DLLs.
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols'
- _LT_TAGVAR(allow_undefined_flag, $1)=unsupported
- _LT_TAGVAR(always_export_symbols, $1)=no
- _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
-
- if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
- # If the export-symbols file already is a .def file (1st line
- # is EXPORTS), use it as is; otherwise, prepend...
- _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
- cp $export_symbols $output_objdir/$soname.def;
- else
- echo EXPORTS > $output_objdir/$soname.def;
- cat $export_symbols >> $output_objdir/$soname.def;
- fi~
- $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
- esac
- ;;
- darwin* | rhapsody*)
- _LT_DARWIN_LINKER_FEATURES($1)
- ;;
-
- dgux*)
- case $cc_basename in
- ec++*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- ghcx*)
- # Green Hills C++ Compiler
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- *)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- esac
- ;;
-
- freebsd2.*)
- # C++ shared libraries reported to be fairly broken before
- # switch to ELF
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
- freebsd-elf*)
- _LT_TAGVAR(archive_cmds_need_lc, $1)=no
- ;;
-
- freebsd* | dragonfly*)
- # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
- # conventions
- _LT_TAGVAR(ld_shlibs, $1)=yes
- ;;
-
- gnu*)
- ;;
-
- haiku*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- ;;
-
- hpux9*)
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
- # but as the default
- # location of the library.
-
- case $cc_basename in
- CC*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- aCC*)
- _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
- ;;
- *)
- if test "$GXX" = yes; then
- _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
- else
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
- esac
- ;;
-
- hpux10*|hpux11*)
- if test $with_gnu_ld = no; then
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
- case $host_cpu in
- hppa*64*|ia64*)
- ;;
- *)
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
- ;;
- esac
- fi
- case $host_cpu in
- hppa*64*|ia64*)
- _LT_TAGVAR(hardcode_direct, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- ;;
- *)
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
- _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
- # but as the default
- # location of the library.
- ;;
- esac
-
- case $cc_basename in
- CC*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- aCC*)
- case $host_cpu in
- hppa*64*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- ia64*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- *)
- _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- esac
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
- ;;
- *)
- if test "$GXX" = yes; then
- if test $with_gnu_ld = no; then
- case $host_cpu in
- hppa*64*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- ia64*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- *)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- ;;
- esac
- fi
- else
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
- esac
- ;;
-
- interix[[3-9]]*)
- _LT_TAGVAR(hardcode_direct, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
- # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
- # Instead, shared libraries are loaded at an image base (0x10000000 by
- # default) and relocated if they conflict, which is a slow very memory
- # consuming and fragmenting process. To avoid this, we pick a random,
- # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
- # time. Moving up from 0x10000000 also allows more sbrk(2) space.
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
- ;;
- irix5* | irix6*)
- case $cc_basename in
- CC*)
- # SGI C++
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
-
- # Archives containing C++ object files must be created using
- # "CC -ar", where "CC" is the IRIX C++ compiler. This is
- # necessary to make sure instantiated templates are included
- # in the archive.
- _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs'
- ;;
- *)
- if test "$GXX" = yes; then
- if test "$with_gnu_ld" = no; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- else
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib'
- fi
- fi
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- ;;
- esac
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
- _LT_TAGVAR(inherit_rpath, $1)=yes
- ;;
-
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
- case $cc_basename in
- KCC*)
- # Kuck and Associates, Inc. (KAI) C++ Compiler
-
- # KCC will only create a shared library if the output file
- # ends with ".so" (or ".sl" for HP-UX), so rename the library
- # to its proper name (with version) after linking.
- _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
-
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
-
- # Archives containing C++ object files must be created using
- # "CC -Bstatic", where "CC" is the KAI C++ compiler.
- _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs'
- ;;
- icpc* | ecpc* )
- # Intel C++
- with_gnu_ld=yes
- # version 8.0 and above of icpc choke on multiply defined symbols
- # if we add $predep_objects and $postdep_objects, however 7.1 and
- # earlier do not add the objects themselves.
- case `$CC -V 2>&1` in
- *"Version 7."*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- ;;
- *) # Version 8.0 or newer
- tmp_idyn=
- case $host_cpu in
- ia64*) tmp_idyn=' -i_dynamic';;
- esac
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
- ;;
- esac
- _LT_TAGVAR(archive_cmds_need_lc, $1)=no
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
- ;;
- pgCC* | pgcpp*)
- # Portland Group C++ compiler
- case `$CC -V` in
- *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*)
- _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~
- rm -rf $tpldir~
- $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~
- compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"'
- _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~
- rm -rf $tpldir~
- $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~
- $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~
- $RANLIB $oldlib'
- _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~
- rm -rf $tpldir~
- $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
- $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~
- rm -rf $tpldir~
- $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~
- $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
- ;;
- *) # Version 6 and above use weak symbols
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
- ;;
- esac
-
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- ;;
- cxx*)
- # Compaq C++
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
-
- runpath_var=LD_RUN_PATH
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed'
- ;;
- xl* | mpixl* | bgxl*)
- # IBM XL 8.0 on PPC, with GNU ld
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
- _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
- if test "x$supports_anon_versioning" = xyes; then
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
- cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
- echo "local: *; };" >> $output_objdir/$libname.ver~
- $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
- fi
- ;;
- *)
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ C*)
- # Sun C++ 5.9
- _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
- _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive'
- _LT_TAGVAR(compiler_needs_object, $1)=yes
-
- # Not sure whether something based on
- # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1
- # would be better.
- output_verbose_link_cmd='func_echo_all'
-
- # Archives containing C++ object files must be created using
- # "CC -xar", where "CC" is the Sun C++ compiler. This is
- # necessary to make sure instantiated templates are included
- # in the archive.
- _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
- ;;
- esac
- ;;
- esac
- ;;
-
- lynxos*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
- m88k*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
- mvs*)
- case $cc_basename in
- cxx*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- *)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- esac
- ;;
-
- netbsd*)
- if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
- _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
- wlarc=
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- fi
- # Workaround some broken pre-1.5 toolchains
- output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
- ;;
-
- *nto* | *qnx*)
- _LT_TAGVAR(ld_shlibs, $1)=yes
- ;;
-
- openbsd2*)
- # C++ shared libraries are fairly broken
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
- openbsd*)
- if test -f /usr/libexec/ld.so; then
- _LT_TAGVAR(hardcode_direct, $1)=yes
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- _LT_TAGVAR(hardcode_direct_absolute, $1)=yes
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
- if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
- _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
- fi
- output_verbose_link_cmd=func_echo_all
- else
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
-
- osf3* | osf4* | osf5*)
- case $cc_basename in
- KCC*)
- # Kuck and Associates, Inc. (KAI) C++ Compiler
-
- # KCC will only create a shared library if the output file
- # ends with ".so" (or ".sl" for HP-UX), so rename the library
- # to its proper name (with version) after linking.
- _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
-
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
- # Archives containing C++ object files must be created using
- # the KAI C++ compiler.
- case $host in
- osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;;
- *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;;
- esac
- ;;
- RCC*)
- # Rational C++ 2.4.1
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- cxx*)
- case $host in
- osf3*)
- _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- ;;
- *)
- _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
- echo "-hidden">> $lib.exp~
- $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~
- $RM $lib.exp'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
- ;;
- esac
-
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- #
- # There doesn't appear to be a way to prevent this compiler from
- # explicitly linking system object files so we need to strip them
- # from the output so that they don't get included in the library
- # dependencies.
- output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"'
- ;;
- *)
- if test "$GXX" = yes && test "$with_gnu_ld" = no; then
- _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
- case $host in
- osf3*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- ;;
- *)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
- ;;
- esac
-
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=:
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
-
- else
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- fi
- ;;
- esac
- ;;
-
- psos*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
- sunos4*)
- case $cc_basename in
- CC*)
- # Sun C++ 4.x
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- lcc*)
- # Lucid
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- *)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- esac
- ;;
-
- solaris*)
- case $cc_basename in
- CC* | sunCC*)
- # Sun C++ 4.2, 5.x and Centerline C++
- _LT_TAGVAR(archive_cmds_need_lc,$1)=yes
- _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
- _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- case $host_os in
- solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
- *)
- # The compiler driver will combine and reorder linker options,
- # but understands `-z linker_flag'.
- # Supported since Solaris 2.6 (maybe 2.5.1?)
- _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract'
- ;;
- esac
- _LT_TAGVAR(link_all_deplibs, $1)=yes
-
- output_verbose_link_cmd='func_echo_all'
-
- # Archives containing C++ object files must be created using
- # "CC -xar", where "CC" is the Sun C++ compiler. This is
- # necessary to make sure instantiated templates are included
- # in the archive.
- _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
- ;;
- gcx*)
- # Green Hills C++ Compiler
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
-
- # The C++ compiler must be used to create the archive.
- _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
- ;;
- *)
- # GNU C++ compiler with Solaris linker
- if test "$GXX" = yes && test "$with_gnu_ld" = no; then
- _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs'
- if $CC --version | $GREP -v '^2\.7' > /dev/null; then
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
- else
- # g++ 2.7 appears to require `-G' NOT `-shared' on this
- # platform.
- _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
- _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
- $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp'
-
- # Commands to make compiler produce verbose output that lists
- # what "hidden" libraries, object files and flags are used when
- # linking a shared library.
- output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"'
- fi
-
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir'
- case $host_os in
- solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
- *)
- _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract'
- ;;
- esac
- fi
- ;;
- esac
- ;;
-
- sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
- _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
- _LT_TAGVAR(archive_cmds_need_lc, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- runpath_var='LD_RUN_PATH'
-
- case $cc_basename in
- CC*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- *)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- esac
- ;;
-
- sysv5* | sco3.2v5* | sco5v6*)
- # Note: We can NOT use -z defs as we might desire, because we do not
- # link with -lc, and that would cause any symbols used from libc to
- # always be unresolved, which means just about no library would
- # ever link correctly. If we're not using GNU ld we use -z text
- # though, which does catch some bad symbols but isn't as heavy-handed
- # as -z defs.
- _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
- _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
- _LT_TAGVAR(archive_cmds_need_lc, $1)=no
- _LT_TAGVAR(hardcode_shlibpath_var, $1)=no
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir'
- _LT_TAGVAR(hardcode_libdir_separator, $1)=':'
- _LT_TAGVAR(link_all_deplibs, $1)=yes
- _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
- runpath_var='LD_RUN_PATH'
-
- case $cc_basename in
- CC*)
- _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~
- '"$_LT_TAGVAR(old_archive_cmds, $1)"
- _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~
- '"$_LT_TAGVAR(reload_cmds, $1)"
- ;;
- *)
- _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
- ;;
- esac
- ;;
-
- tandem*)
- case $cc_basename in
- NCC*)
- # NonStop-UX NCC 3.20
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- *)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- esac
- ;;
-
- vxworks*)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
- *)
- # FIXME: insert proper C++ library support
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
- esac
-
- AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)])
- test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
-
- _LT_TAGVAR(GCC, $1)="$GXX"
- _LT_TAGVAR(LD, $1)="$LD"
-
- ## CAVEAT EMPTOR:
- ## There is no encapsulation within the following macros, do not change
- ## the running order or otherwise move them around unless you know exactly
- ## what you are doing...
- _LT_SYS_HIDDEN_LIBDEPS($1)
- _LT_COMPILER_PIC($1)
- _LT_COMPILER_C_O($1)
- _LT_COMPILER_FILE_LOCKS($1)
- _LT_LINKER_SHLIBS($1)
- _LT_SYS_DYNAMIC_LINKER($1)
- _LT_LINKER_HARDCODE_LIBPATH($1)
-
- _LT_CONFIG($1)
- fi # test -n "$compiler"
-
- CC=$lt_save_CC
- CFLAGS=$lt_save_CFLAGS
- LDCXX=$LD
- LD=$lt_save_LD
- GCC=$lt_save_GCC
- with_gnu_ld=$lt_save_with_gnu_ld
- lt_cv_path_LDCXX=$lt_cv_path_LD
- lt_cv_path_LD=$lt_save_path_LD
- lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
- lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
-fi # test "$_lt_caught_CXX_error" != yes
-
-AC_LANG_POP
-])# _LT_LANG_CXX_CONFIG
-
-
-# _LT_FUNC_STRIPNAME_CNF
-# ----------------------
-# func_stripname_cnf prefix suffix name
-# strip PREFIX and SUFFIX off of NAME.
-# PREFIX and SUFFIX must not contain globbing or regex special
-# characters, hashes, percent signs, but SUFFIX may contain a leading
-# dot (in which case that matches only a dot).
-#
-# This function is identical to the (non-XSI) version of func_stripname,
-# except this one can be used by m4 code that may be executed by configure,
-# rather than the libtool script.
-m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl
-AC_REQUIRE([_LT_DECL_SED])
-AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])
-func_stripname_cnf ()
-{
- case ${2} in
- .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;;
- *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;;
- esac
-} # func_stripname_cnf
-])# _LT_FUNC_STRIPNAME_CNF
-
-# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME])
-# ---------------------------------
-# Figure out "hidden" library dependencies from verbose
-# compiler output when linking a shared library.
-# Parse the compiler output and extract the necessary
-# objects, libraries and library flags.
-m4_defun([_LT_SYS_HIDDEN_LIBDEPS],
-[m4_require([_LT_FILEUTILS_DEFAULTS])dnl
-AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl
-# Dependencies to place before and after the object being linked:
-_LT_TAGVAR(predep_objects, $1)=
-_LT_TAGVAR(postdep_objects, $1)=
-_LT_TAGVAR(predeps, $1)=
-_LT_TAGVAR(postdeps, $1)=
-_LT_TAGVAR(compiler_lib_search_path, $1)=
-
-dnl we can't use the lt_simple_compile_test_code here,
-dnl because it contains code intended for an executable,
-dnl not a library. It's possible we should let each
-dnl tag define a new lt_????_link_test_code variable,
-dnl but it's only used here...
-m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF
-int a;
-void foo (void) { a = 0; }
-_LT_EOF
-], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF
-class Foo
-{
-public:
- Foo (void) { a = 0; }
-private:
- int a;
-};
-_LT_EOF
-], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF
- subroutine foo
- implicit none
- integer*4 a
- a=0
- return
- end
-_LT_EOF
-], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF
- subroutine foo
- implicit none
- integer a
- a=0
- return
- end
-_LT_EOF
-], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF
-public class foo {
- private int a;
- public void bar (void) {
- a = 0;
- }
-};
-_LT_EOF
-], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF
-package foo
-func foo() {
-}
-_LT_EOF
-])
-
-_lt_libdeps_save_CFLAGS=$CFLAGS
-case "$CC $CFLAGS " in #(
-*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
-*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
-*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
-esac
-
-dnl Parse the compiler output and extract the necessary
-dnl objects, libraries and library flags.
-if AC_TRY_EVAL(ac_compile); then
- # Parse the compiler output and extract the necessary
- # objects, libraries and library flags.
-
- # Sentinel used to keep track of whether or not we are before
- # the conftest object file.
- pre_test_object_deps_done=no
-
- for p in `eval "$output_verbose_link_cmd"`; do
- case ${prev}${p} in
-
- -L* | -R* | -l*)
- # Some compilers place space between "-{L,R}" and the path.
- # Remove the space.
- if test $p = "-L" ||
- test $p = "-R"; then
- prev=$p
- continue
- fi
-
- # Expand the sysroot to ease extracting the directories later.
- if test -z "$prev"; then
- case $p in
- -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;;
- -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;;
- -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;;
- esac
- fi
- case $p in
- =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;;
- esac
- if test "$pre_test_object_deps_done" = no; then
- case ${prev} in
- -L | -R)
- # Internal compiler library paths should come after those
- # provided the user. The postdeps already come after the
- # user supplied libs so there is no need to process them.
- if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then
- _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}"
- else
- _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}"
- fi
- ;;
- # The "-l" case would never come before the object being
- # linked, so don't bother handling this case.
- esac
- else
- if test -z "$_LT_TAGVAR(postdeps, $1)"; then
- _LT_TAGVAR(postdeps, $1)="${prev}${p}"
- else
- _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}"
- fi
- fi
- prev=
- ;;
-
- *.lto.$objext) ;; # Ignore GCC LTO objects
- *.$objext)
- # This assumes that the test object file only shows up
- # once in the compiler output.
- if test "$p" = "conftest.$objext"; then
- pre_test_object_deps_done=yes
- continue
- fi
-
- if test "$pre_test_object_deps_done" = no; then
- if test -z "$_LT_TAGVAR(predep_objects, $1)"; then
- _LT_TAGVAR(predep_objects, $1)="$p"
- else
- _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p"
- fi
- else
- if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then
- _LT_TAGVAR(postdep_objects, $1)="$p"
- else
- _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p"
- fi
- fi
- ;;
-
- *) ;; # Ignore the rest.
-
- esac
- done
-
- # Clean up.
- rm -f a.out a.exe
-else
- echo "libtool.m4: error: problem compiling $1 test program"
-fi
-
-$RM -f confest.$objext
-CFLAGS=$_lt_libdeps_save_CFLAGS
-
-# PORTME: override above test on systems where it is broken
-m4_if([$1], [CXX],
-[case $host_os in
-interix[[3-9]]*)
- # Interix 3.5 installs completely hosed .la files for C++, so rather than
- # hack all around it, let's just trust "g++" to DTRT.
- _LT_TAGVAR(predep_objects,$1)=
- _LT_TAGVAR(postdep_objects,$1)=
- _LT_TAGVAR(postdeps,$1)=
- ;;
-
-linux*)
- case `$CC -V 2>&1 | sed 5q` in
- *Sun\ C*)
- # Sun C++ 5.9
-
- # The more standards-conforming stlport4 library is
- # incompatible with the Cstd library. Avoid specifying
- # it if it's in CXXFLAGS. Ignore libCrun as
- # -library=stlport4 depends on it.
- case " $CXX $CXXFLAGS " in
- *" -library=stlport4 "*)
- solaris_use_stlport4=yes
- ;;
- esac
-
- if test "$solaris_use_stlport4" != yes; then
- _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun'
- fi
- ;;
- esac
- ;;
-
-solaris*)
- case $cc_basename in
- CC* | sunCC*)
- # The more standards-conforming stlport4 library is
- # incompatible with the Cstd library. Avoid specifying
- # it if it's in CXXFLAGS. Ignore libCrun as
- # -library=stlport4 depends on it.
- case " $CXX $CXXFLAGS " in
- *" -library=stlport4 "*)
- solaris_use_stlport4=yes
- ;;
- esac
-
- # Adding this requires a known-good setup of shared libraries for
- # Sun compiler versions before 5.6, else PIC objects from an old
- # archive will be linked into the output, leading to subtle bugs.
- if test "$solaris_use_stlport4" != yes; then
- _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun'
- fi
- ;;
- esac
- ;;
-esac
-])
-
-case " $_LT_TAGVAR(postdeps, $1) " in
-*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;;
-esac
- _LT_TAGVAR(compiler_lib_search_dirs, $1)=
-if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then
- _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'`
-fi
-_LT_TAGDECL([], [compiler_lib_search_dirs], [1],
- [The directories searched by this compiler when creating a shared library])
-_LT_TAGDECL([], [predep_objects], [1],
- [Dependencies to place before and after the objects being linked to
- create a shared library])
-_LT_TAGDECL([], [postdep_objects], [1])
-_LT_TAGDECL([], [predeps], [1])
-_LT_TAGDECL([], [postdeps], [1])
-_LT_TAGDECL([], [compiler_lib_search_path], [1],
- [The library search path used internally by the compiler when linking
- a shared library])
-])# _LT_SYS_HIDDEN_LIBDEPS
-
-
-# _LT_LANG_F77_CONFIG([TAG])
-# --------------------------
-# Ensure that the configuration variables for a Fortran 77 compiler are
-# suitably defined. These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_F77_CONFIG],
-[AC_LANG_PUSH(Fortran 77)
-if test -z "$F77" || test "X$F77" = "Xno"; then
- _lt_disable_F77=yes
-fi
-
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-_LT_TAGVAR(allow_undefined_flag, $1)=
-_LT_TAGVAR(always_export_symbols, $1)=no
-_LT_TAGVAR(archive_expsym_cmds, $1)=
-_LT_TAGVAR(export_dynamic_flag_spec, $1)=
-_LT_TAGVAR(hardcode_direct, $1)=no
-_LT_TAGVAR(hardcode_direct_absolute, $1)=no
-_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_separator, $1)=
-_LT_TAGVAR(hardcode_minus_L, $1)=no
-_LT_TAGVAR(hardcode_automatic, $1)=no
-_LT_TAGVAR(inherit_rpath, $1)=no
-_LT_TAGVAR(module_cmds, $1)=
-_LT_TAGVAR(module_expsym_cmds, $1)=
-_LT_TAGVAR(link_all_deplibs, $1)=unknown
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-_LT_TAGVAR(no_undefined_flag, $1)=
-_LT_TAGVAR(whole_archive_flag_spec, $1)=
-_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
-
-# Source file extension for f77 test sources.
-ac_ext=f
-
-# Object file extension for compiled f77 test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# No sense in running all these tests if we already determined that
-# the F77 compiler isn't working. Some variables (like enable_shared)
-# are currently assumed to apply to all compilers on this platform,
-# and will be corrupted by setting them based on a non-working compiler.
-if test "$_lt_disable_F77" != yes; then
- # Code to be used in simple compile tests
- lt_simple_compile_test_code="\
- subroutine t
- return
- end
-"
-
- # Code to be used in simple link tests
- lt_simple_link_test_code="\
- program t
- end
-"
-
- # ltmain only uses $CC for tagged configurations so make sure $CC is set.
- _LT_TAG_COMPILER
-
- # save warnings/boilerplate of simple test code
- _LT_COMPILER_BOILERPLATE
- _LT_LINKER_BOILERPLATE
-
- # Allow CC to be a program name with arguments.
- lt_save_CC="$CC"
- lt_save_GCC=$GCC
- lt_save_CFLAGS=$CFLAGS
- CC=${F77-"f77"}
- CFLAGS=$FFLAGS
- compiler=$CC
- _LT_TAGVAR(compiler, $1)=$CC
- _LT_CC_BASENAME([$compiler])
- GCC=$G77
- if test -n "$compiler"; then
- AC_MSG_CHECKING([if libtool supports shared libraries])
- AC_MSG_RESULT([$can_build_shared])
-
- AC_MSG_CHECKING([whether to build shared libraries])
- test "$can_build_shared" = "no" && enable_shared=no
-
- # On AIX, shared libraries and static libraries use the same namespace, and
- # are all built from PIC.
- case $host_os in
- aix3*)
- test "$enable_shared" = yes && enable_static=no
- if test -n "$RANLIB"; then
- archive_cmds="$archive_cmds~\$RANLIB \$lib"
- postinstall_cmds='$RANLIB $lib'
- fi
- ;;
- aix[[4-9]]*)
- if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
- test "$enable_shared" = yes && enable_static=no
- fi
- ;;
- esac
- AC_MSG_RESULT([$enable_shared])
-
- AC_MSG_CHECKING([whether to build static libraries])
- # Make sure either enable_shared or enable_static is yes.
- test "$enable_shared" = yes || enable_static=yes
- AC_MSG_RESULT([$enable_static])
-
- _LT_TAGVAR(GCC, $1)="$G77"
- _LT_TAGVAR(LD, $1)="$LD"
-
- ## CAVEAT EMPTOR:
- ## There is no encapsulation within the following macros, do not change
- ## the running order or otherwise move them around unless you know exactly
- ## what you are doing...
- _LT_COMPILER_PIC($1)
- _LT_COMPILER_C_O($1)
- _LT_COMPILER_FILE_LOCKS($1)
- _LT_LINKER_SHLIBS($1)
- _LT_SYS_DYNAMIC_LINKER($1)
- _LT_LINKER_HARDCODE_LIBPATH($1)
-
- _LT_CONFIG($1)
- fi # test -n "$compiler"
-
- GCC=$lt_save_GCC
- CC="$lt_save_CC"
- CFLAGS="$lt_save_CFLAGS"
-fi # test "$_lt_disable_F77" != yes
-
-AC_LANG_POP
-])# _LT_LANG_F77_CONFIG
-
-
-# _LT_LANG_FC_CONFIG([TAG])
-# -------------------------
-# Ensure that the configuration variables for a Fortran compiler are
-# suitably defined. These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_FC_CONFIG],
-[AC_LANG_PUSH(Fortran)
-
-if test -z "$FC" || test "X$FC" = "Xno"; then
- _lt_disable_FC=yes
-fi
-
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-_LT_TAGVAR(allow_undefined_flag, $1)=
-_LT_TAGVAR(always_export_symbols, $1)=no
-_LT_TAGVAR(archive_expsym_cmds, $1)=
-_LT_TAGVAR(export_dynamic_flag_spec, $1)=
-_LT_TAGVAR(hardcode_direct, $1)=no
-_LT_TAGVAR(hardcode_direct_absolute, $1)=no
-_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_separator, $1)=
-_LT_TAGVAR(hardcode_minus_L, $1)=no
-_LT_TAGVAR(hardcode_automatic, $1)=no
-_LT_TAGVAR(inherit_rpath, $1)=no
-_LT_TAGVAR(module_cmds, $1)=
-_LT_TAGVAR(module_expsym_cmds, $1)=
-_LT_TAGVAR(link_all_deplibs, $1)=unknown
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-_LT_TAGVAR(no_undefined_flag, $1)=
-_LT_TAGVAR(whole_archive_flag_spec, $1)=
-_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no
-
-# Source file extension for fc test sources.
-ac_ext=${ac_fc_srcext-f}
-
-# Object file extension for compiled fc test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# No sense in running all these tests if we already determined that
-# the FC compiler isn't working. Some variables (like enable_shared)
-# are currently assumed to apply to all compilers on this platform,
-# and will be corrupted by setting them based on a non-working compiler.
-if test "$_lt_disable_FC" != yes; then
- # Code to be used in simple compile tests
- lt_simple_compile_test_code="\
- subroutine t
- return
- end
-"
-
- # Code to be used in simple link tests
- lt_simple_link_test_code="\
- program t
- end
-"
-
- # ltmain only uses $CC for tagged configurations so make sure $CC is set.
- _LT_TAG_COMPILER
-
- # save warnings/boilerplate of simple test code
- _LT_COMPILER_BOILERPLATE
- _LT_LINKER_BOILERPLATE
-
- # Allow CC to be a program name with arguments.
- lt_save_CC="$CC"
- lt_save_GCC=$GCC
- lt_save_CFLAGS=$CFLAGS
- CC=${FC-"f95"}
- CFLAGS=$FCFLAGS
- compiler=$CC
- GCC=$ac_cv_fc_compiler_gnu
-
- _LT_TAGVAR(compiler, $1)=$CC
- _LT_CC_BASENAME([$compiler])
-
- if test -n "$compiler"; then
- AC_MSG_CHECKING([if libtool supports shared libraries])
- AC_MSG_RESULT([$can_build_shared])
-
- AC_MSG_CHECKING([whether to build shared libraries])
- test "$can_build_shared" = "no" && enable_shared=no
-
- # On AIX, shared libraries and static libraries use the same namespace, and
- # are all built from PIC.
- case $host_os in
- aix3*)
- test "$enable_shared" = yes && enable_static=no
- if test -n "$RANLIB"; then
- archive_cmds="$archive_cmds~\$RANLIB \$lib"
- postinstall_cmds='$RANLIB $lib'
- fi
- ;;
- aix[[4-9]]*)
- if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
- test "$enable_shared" = yes && enable_static=no
- fi
- ;;
- esac
- AC_MSG_RESULT([$enable_shared])
-
- AC_MSG_CHECKING([whether to build static libraries])
- # Make sure either enable_shared or enable_static is yes.
- test "$enable_shared" = yes || enable_static=yes
- AC_MSG_RESULT([$enable_static])
-
- _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu"
- _LT_TAGVAR(LD, $1)="$LD"
-
- ## CAVEAT EMPTOR:
- ## There is no encapsulation within the following macros, do not change
- ## the running order or otherwise move them around unless you know exactly
- ## what you are doing...
- _LT_SYS_HIDDEN_LIBDEPS($1)
- _LT_COMPILER_PIC($1)
- _LT_COMPILER_C_O($1)
- _LT_COMPILER_FILE_LOCKS($1)
- _LT_LINKER_SHLIBS($1)
- _LT_SYS_DYNAMIC_LINKER($1)
- _LT_LINKER_HARDCODE_LIBPATH($1)
-
- _LT_CONFIG($1)
- fi # test -n "$compiler"
-
- GCC=$lt_save_GCC
- CC=$lt_save_CC
- CFLAGS=$lt_save_CFLAGS
-fi # test "$_lt_disable_FC" != yes
-
-AC_LANG_POP
-])# _LT_LANG_FC_CONFIG
-
-
-# _LT_LANG_GCJ_CONFIG([TAG])
-# --------------------------
-# Ensure that the configuration variables for the GNU Java Compiler compiler
-# are suitably defined. These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_GCJ_CONFIG],
-[AC_REQUIRE([LT_PROG_GCJ])dnl
-AC_LANG_SAVE
-
-# Source file extension for Java test sources.
-ac_ext=java
-
-# Object file extension for compiled Java test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code="class foo {}"
-
-# Code to be used in simple link tests
-lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }'
-
-# ltmain only uses $CC for tagged configurations so make sure $CC is set.
-_LT_TAG_COMPILER
-
-# save warnings/boilerplate of simple test code
-_LT_COMPILER_BOILERPLATE
-_LT_LINKER_BOILERPLATE
-
-# Allow CC to be a program name with arguments.
-lt_save_CC=$CC
-lt_save_CFLAGS=$CFLAGS
-lt_save_GCC=$GCC
-GCC=yes
-CC=${GCJ-"gcj"}
-CFLAGS=$GCJFLAGS
-compiler=$CC
-_LT_TAGVAR(compiler, $1)=$CC
-_LT_TAGVAR(LD, $1)="$LD"
-_LT_CC_BASENAME([$compiler])
-
-# GCJ did not exist at the time GCC didn't implicitly link libc in.
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-
-## CAVEAT EMPTOR:
-## There is no encapsulation within the following macros, do not change
-## the running order or otherwise move them around unless you know exactly
-## what you are doing...
-if test -n "$compiler"; then
- _LT_COMPILER_NO_RTTI($1)
- _LT_COMPILER_PIC($1)
- _LT_COMPILER_C_O($1)
- _LT_COMPILER_FILE_LOCKS($1)
- _LT_LINKER_SHLIBS($1)
- _LT_LINKER_HARDCODE_LIBPATH($1)
-
- _LT_CONFIG($1)
-fi
-
-AC_LANG_RESTORE
-
-GCC=$lt_save_GCC
-CC=$lt_save_CC
-CFLAGS=$lt_save_CFLAGS
-])# _LT_LANG_GCJ_CONFIG
-
-
-# _LT_LANG_GO_CONFIG([TAG])
-# --------------------------
-# Ensure that the configuration variables for the GNU Go compiler
-# are suitably defined. These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_GO_CONFIG],
-[AC_REQUIRE([LT_PROG_GO])dnl
-AC_LANG_SAVE
-
-# Source file extension for Go test sources.
-ac_ext=go
-
-# Object file extension for compiled Go test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code="package main; func main() { }"
-
-# Code to be used in simple link tests
-lt_simple_link_test_code='package main; func main() { }'
-
-# ltmain only uses $CC for tagged configurations so make sure $CC is set.
-_LT_TAG_COMPILER
-
-# save warnings/boilerplate of simple test code
-_LT_COMPILER_BOILERPLATE
-_LT_LINKER_BOILERPLATE
-
-# Allow CC to be a program name with arguments.
-lt_save_CC=$CC
-lt_save_CFLAGS=$CFLAGS
-lt_save_GCC=$GCC
-GCC=yes
-CC=${GOC-"gccgo"}
-CFLAGS=$GOFLAGS
-compiler=$CC
-_LT_TAGVAR(compiler, $1)=$CC
-_LT_TAGVAR(LD, $1)="$LD"
-_LT_CC_BASENAME([$compiler])
-
-# Go did not exist at the time GCC didn't implicitly link libc in.
-_LT_TAGVAR(archive_cmds_need_lc, $1)=no
-
-_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
-_LT_TAGVAR(reload_flag, $1)=$reload_flag
-_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
-
-## CAVEAT EMPTOR:
-## There is no encapsulation within the following macros, do not change
-## the running order or otherwise move them around unless you know exactly
-## what you are doing...
-if test -n "$compiler"; then
- _LT_COMPILER_NO_RTTI($1)
- _LT_COMPILER_PIC($1)
- _LT_COMPILER_C_O($1)
- _LT_COMPILER_FILE_LOCKS($1)
- _LT_LINKER_SHLIBS($1)
- _LT_LINKER_HARDCODE_LIBPATH($1)
-
- _LT_CONFIG($1)
-fi
-
-AC_LANG_RESTORE
-
-GCC=$lt_save_GCC
-CC=$lt_save_CC
-CFLAGS=$lt_save_CFLAGS
-])# _LT_LANG_GO_CONFIG
-
-
-# _LT_LANG_RC_CONFIG([TAG])
-# -------------------------
-# Ensure that the configuration variables for the Windows resource compiler
-# are suitably defined. These variables are subsequently used by _LT_CONFIG
-# to write the compiler configuration to `libtool'.
-m4_defun([_LT_LANG_RC_CONFIG],
-[AC_REQUIRE([LT_PROG_RC])dnl
-AC_LANG_SAVE
-
-# Source file extension for RC test sources.
-ac_ext=rc
-
-# Object file extension for compiled RC test sources.
-objext=o
-_LT_TAGVAR(objext, $1)=$objext
-
-# Code to be used in simple compile tests
-lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }'
-
-# Code to be used in simple link tests
-lt_simple_link_test_code="$lt_simple_compile_test_code"
-
-# ltmain only uses $CC for tagged configurations so make sure $CC is set.
-_LT_TAG_COMPILER
-
-# save warnings/boilerplate of simple test code
-_LT_COMPILER_BOILERPLATE
-_LT_LINKER_BOILERPLATE
-
-# Allow CC to be a program name with arguments.
-lt_save_CC="$CC"
-lt_save_CFLAGS=$CFLAGS
-lt_save_GCC=$GCC
-GCC=
-CC=${RC-"windres"}
-CFLAGS=
-compiler=$CC
-_LT_TAGVAR(compiler, $1)=$CC
-_LT_CC_BASENAME([$compiler])
-_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
-
-if test -n "$compiler"; then
- :
- _LT_CONFIG($1)
-fi
-
-GCC=$lt_save_GCC
-AC_LANG_RESTORE
-CC=$lt_save_CC
-CFLAGS=$lt_save_CFLAGS
-])# _LT_LANG_RC_CONFIG
-
-
-# LT_PROG_GCJ
-# -----------
-AC_DEFUN([LT_PROG_GCJ],
-[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ],
- [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ],
- [AC_CHECK_TOOL(GCJ, gcj,)
- test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2"
- AC_SUBST(GCJFLAGS)])])[]dnl
-])
-
-# Old name:
-AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([LT_AC_PROG_GCJ], [])
-
-
-# LT_PROG_GO
-# ----------
-AC_DEFUN([LT_PROG_GO],
-[AC_CHECK_TOOL(GOC, gccgo,)
-])
-
-
-# LT_PROG_RC
-# ----------
-AC_DEFUN([LT_PROG_RC],
-[AC_CHECK_TOOL(RC, windres,)
-])
-
-# Old name:
-AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([LT_AC_PROG_RC], [])
-
-
-# _LT_DECL_EGREP
-# --------------
-# If we don't have a new enough Autoconf to choose the best grep
-# available, choose the one first in the user's PATH.
-m4_defun([_LT_DECL_EGREP],
-[AC_REQUIRE([AC_PROG_EGREP])dnl
-AC_REQUIRE([AC_PROG_FGREP])dnl
-test -z "$GREP" && GREP=grep
-_LT_DECL([], [GREP], [1], [A grep program that handles long lines])
-_LT_DECL([], [EGREP], [1], [An ERE matcher])
-_LT_DECL([], [FGREP], [1], [A literal string matcher])
-dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too
-AC_SUBST([GREP])
-])
-
-
-# _LT_DECL_OBJDUMP
-# --------------
-# If we don't have a new enough Autoconf to choose the best objdump
-# available, choose the one first in the user's PATH.
-m4_defun([_LT_DECL_OBJDUMP],
-[AC_CHECK_TOOL(OBJDUMP, objdump, false)
-test -z "$OBJDUMP" && OBJDUMP=objdump
-_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper])
-AC_SUBST([OBJDUMP])
-])
-
-# _LT_DECL_DLLTOOL
-# ----------------
-# Ensure DLLTOOL variable is set.
-m4_defun([_LT_DECL_DLLTOOL],
-[AC_CHECK_TOOL(DLLTOOL, dlltool, false)
-test -z "$DLLTOOL" && DLLTOOL=dlltool
-_LT_DECL([], [DLLTOOL], [1], [DLL creation program])
-AC_SUBST([DLLTOOL])
-])
-
-# _LT_DECL_SED
-# ------------
-# Check for a fully-functional sed program, that truncates
-# as few characters as possible. Prefer GNU sed if found.
-m4_defun([_LT_DECL_SED],
-[AC_PROG_SED
-test -z "$SED" && SED=sed
-Xsed="$SED -e 1s/^X//"
-_LT_DECL([], [SED], [1], [A sed program that does not truncate output])
-_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"],
- [Sed that helps us avoid accidentally triggering echo(1) options like -n])
-])# _LT_DECL_SED
-
-m4_ifndef([AC_PROG_SED], [
-############################################################
-# NOTE: This macro has been submitted for inclusion into #
-# GNU Autoconf as AC_PROG_SED. When it is available in #
-# a released version of Autoconf we should remove this #
-# macro and use it instead. #
-############################################################
-
-m4_defun([AC_PROG_SED],
-[AC_MSG_CHECKING([for a sed that does not truncate output])
-AC_CACHE_VAL(lt_cv_path_SED,
-[# Loop through the user's path and test for sed and gsed.
-# Then use that list of sed's as ones to test for truncation.
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for lt_ac_prog in sed gsed; do
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then
- lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext"
- fi
- done
- done
-done
-IFS=$as_save_IFS
-lt_ac_max=0
-lt_ac_count=0
-# Add /usr/xpg4/bin/sed as it is typically found on Solaris
-# along with /bin/sed that truncates output.
-for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do
- test ! -f $lt_ac_sed && continue
- cat /dev/null > conftest.in
- lt_ac_count=0
- echo $ECHO_N "0123456789$ECHO_C" >conftest.in
- # Check for GNU sed and select it if it is found.
- if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then
- lt_cv_path_SED=$lt_ac_sed
- break
- fi
- while true; do
- cat conftest.in conftest.in >conftest.tmp
- mv conftest.tmp conftest.in
- cp conftest.in conftest.nl
- echo >>conftest.nl
- $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break
- cmp -s conftest.out conftest.nl || break
- # 10000 chars as input seems more than enough
- test $lt_ac_count -gt 10 && break
- lt_ac_count=`expr $lt_ac_count + 1`
- if test $lt_ac_count -gt $lt_ac_max; then
- lt_ac_max=$lt_ac_count
- lt_cv_path_SED=$lt_ac_sed
- fi
- done
-done
-])
-SED=$lt_cv_path_SED
-AC_SUBST([SED])
-AC_MSG_RESULT([$SED])
-])#AC_PROG_SED
-])#m4_ifndef
-
-# Old name:
-AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED])
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([LT_AC_PROG_SED], [])
-
-
-# _LT_CHECK_SHELL_FEATURES
-# ------------------------
-# Find out whether the shell is Bourne or XSI compatible,
-# or has some other useful features.
-m4_defun([_LT_CHECK_SHELL_FEATURES],
-[AC_MSG_CHECKING([whether the shell understands some XSI constructs])
-# Try some XSI features
-xsi_shell=no
-( _lt_dummy="a/b/c"
- test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \
- = c,a/b,b/c, \
- && eval 'test $(( 1 + 1 )) -eq 2 \
- && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \
- && xsi_shell=yes
-AC_MSG_RESULT([$xsi_shell])
-_LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell'])
-
-AC_MSG_CHECKING([whether the shell understands "+="])
-lt_shell_append=no
-( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \
- >/dev/null 2>&1 \
- && lt_shell_append=yes
-AC_MSG_RESULT([$lt_shell_append])
-_LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append'])
-
-if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
- lt_unset=unset
-else
- lt_unset=false
-fi
-_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl
-
-# test EBCDIC or ASCII
-case `echo X|tr X '\101'` in
- A) # ASCII based system
- # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
- lt_SP2NL='tr \040 \012'
- lt_NL2SP='tr \015\012 \040\040'
- ;;
- *) # EBCDIC based system
- lt_SP2NL='tr \100 \n'
- lt_NL2SP='tr \r\n \100\100'
- ;;
-esac
-_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl
-_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl
-])# _LT_CHECK_SHELL_FEATURES
-
-
-# _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY)
-# ------------------------------------------------------
-# In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and
-# '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY.
-m4_defun([_LT_PROG_FUNCTION_REPLACE],
-[dnl {
-sed -e '/^$1 ()$/,/^} # $1 /c\
-$1 ()\
-{\
-m4_bpatsubsts([$2], [$], [\\], [^\([ ]\)], [\\\1])
-} # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
-test 0 -eq $? || _lt_function_replace_fail=:
-])
-
-
-# _LT_PROG_REPLACE_SHELLFNS
-# -------------------------
-# Replace existing portable implementations of several shell functions with
-# equivalent extended shell implementations where those features are available..
-m4_defun([_LT_PROG_REPLACE_SHELLFNS],
-[if test x"$xsi_shell" = xyes; then
- _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl
- case ${1} in
- */*) func_dirname_result="${1%/*}${2}" ;;
- * ) func_dirname_result="${3}" ;;
- esac])
-
- _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl
- func_basename_result="${1##*/}"])
-
- _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl
- case ${1} in
- */*) func_dirname_result="${1%/*}${2}" ;;
- * ) func_dirname_result="${3}" ;;
- esac
- func_basename_result="${1##*/}"])
-
- _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl
- # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are
- # positional parameters, so assign one to ordinary parameter first.
- func_stripname_result=${3}
- func_stripname_result=${func_stripname_result#"${1}"}
- func_stripname_result=${func_stripname_result%"${2}"}])
-
- _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl
- func_split_long_opt_name=${1%%=*}
- func_split_long_opt_arg=${1#*=}])
-
- _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl
- func_split_short_opt_arg=${1#??}
- func_split_short_opt_name=${1%"$func_split_short_opt_arg"}])
-
- _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl
- case ${1} in
- *.lo) func_lo2o_result=${1%.lo}.${objext} ;;
- *) func_lo2o_result=${1} ;;
- esac])
-
- _LT_PROG_FUNCTION_REPLACE([func_xform], [ func_xform_result=${1%.*}.lo])
-
- _LT_PROG_FUNCTION_REPLACE([func_arith], [ func_arith_result=$(( $[*] ))])
-
- _LT_PROG_FUNCTION_REPLACE([func_len], [ func_len_result=${#1}])
-fi
-
-if test x"$lt_shell_append" = xyes; then
- _LT_PROG_FUNCTION_REPLACE([func_append], [ eval "${1}+=\\${2}"])
-
- _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl
- func_quote_for_eval "${2}"
-dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \
- eval "${1}+=\\\\ \\$func_quote_for_eval_result"])
-
- # Save a `func_append' function call where possible by direct use of '+='
- sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
- test 0 -eq $? || _lt_function_replace_fail=:
-else
- # Save a `func_append' function call even when '+=' is not available
- sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \
- && mv -f "$cfgfile.tmp" "$cfgfile" \
- || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp")
- test 0 -eq $? || _lt_function_replace_fail=:
-fi
-
-if test x"$_lt_function_replace_fail" = x":"; then
- AC_MSG_WARN([Unable to substitute extended shell functions in $ofile])
-fi
-])
-
-# _LT_PATH_CONVERSION_FUNCTIONS
-# -----------------------------
-# Determine which file name conversion functions should be used by
-# func_to_host_file (and, implicitly, by func_to_host_path). These are needed
-# for certain cross-compile configurations and native mingw.
-m4_defun([_LT_PATH_CONVERSION_FUNCTIONS],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-AC_REQUIRE([AC_CANONICAL_BUILD])dnl
-AC_MSG_CHECKING([how to convert $build file names to $host format])
-AC_CACHE_VAL(lt_cv_to_host_file_cmd,
-[case $host in
- *-*-mingw* )
- case $build in
- *-*-mingw* ) # actually msys
- lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32
- ;;
- *-*-cygwin* )
- lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32
- ;;
- * ) # otherwise, assume *nix
- lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32
- ;;
- esac
- ;;
- *-*-cygwin* )
- case $build in
- *-*-mingw* ) # actually msys
- lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin
- ;;
- *-*-cygwin* )
- lt_cv_to_host_file_cmd=func_convert_file_noop
- ;;
- * ) # otherwise, assume *nix
- lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin
- ;;
- esac
- ;;
- * ) # unhandled hosts (and "normal" native builds)
- lt_cv_to_host_file_cmd=func_convert_file_noop
- ;;
-esac
-])
-to_host_file_cmd=$lt_cv_to_host_file_cmd
-AC_MSG_RESULT([$lt_cv_to_host_file_cmd])
-_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd],
- [0], [convert $build file names to $host format])dnl
-
-AC_MSG_CHECKING([how to convert $build file names to toolchain format])
-AC_CACHE_VAL(lt_cv_to_tool_file_cmd,
-[#assume ordinary cross tools, or native build.
-lt_cv_to_tool_file_cmd=func_convert_file_noop
-case $host in
- *-*-mingw* )
- case $build in
- *-*-mingw* ) # actually msys
- lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32
- ;;
- esac
- ;;
-esac
-])
-to_tool_file_cmd=$lt_cv_to_tool_file_cmd
-AC_MSG_RESULT([$lt_cv_to_tool_file_cmd])
-_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd],
- [0], [convert $build files to toolchain format])dnl
-])# _LT_PATH_CONVERSION_FUNCTIONS
diff --git a/macros/ltoptions.m4 b/macros/ltoptions.m4
deleted file mode 100644
index 5d9acd8..0000000
--- a/macros/ltoptions.m4
+++ /dev/null
@@ -1,384 +0,0 @@
-# Helper functions for option handling. -*- Autoconf -*-
-#
-# Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
-# Written by Gary V. Vaughan, 2004
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-# serial 7 ltoptions.m4
-
-# This is to help aclocal find these macros, as it can't see m4_define.
-AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
-
-
-# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
-# ------------------------------------------
-m4_define([_LT_MANGLE_OPTION],
-[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
-
-
-# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
-# ---------------------------------------
-# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
-# matching handler defined, dispatch to it. Other OPTION-NAMEs are
-# saved as a flag.
-m4_define([_LT_SET_OPTION],
-[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
-m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
- _LT_MANGLE_DEFUN([$1], [$2]),
- [m4_warning([Unknown $1 option `$2'])])[]dnl
-])
-
-
-# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
-# ------------------------------------------------------------
-# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
-m4_define([_LT_IF_OPTION],
-[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
-
-
-# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
-# -------------------------------------------------------
-# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
-# are set.
-m4_define([_LT_UNLESS_OPTIONS],
-[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
- [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
- [m4_define([$0_found])])])[]dnl
-m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
-])[]dnl
-])
-
-
-# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
-# ----------------------------------------
-# OPTION-LIST is a space-separated list of Libtool options associated
-# with MACRO-NAME. If any OPTION has a matching handler declared with
-# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
-# the unknown option and exit.
-m4_defun([_LT_SET_OPTIONS],
-[# Set options
-m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
- [_LT_SET_OPTION([$1], _LT_Option)])
-
-m4_if([$1],[LT_INIT],[
- dnl
- dnl Simply set some default values (i.e off) if boolean options were not
- dnl specified:
- _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
- ])
- _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
- ])
- dnl
- dnl If no reference was made to various pairs of opposing options, then
- dnl we run the default mode handler for the pair. For example, if neither
- dnl `shared' nor `disable-shared' was passed, we enable building of shared
- dnl archives by default:
- _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
- _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
- _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
- _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
- [_LT_ENABLE_FAST_INSTALL])
- ])
-])# _LT_SET_OPTIONS
-
-
-## --------------------------------- ##
-## Macros to handle LT_INIT options. ##
-## --------------------------------- ##
-
-# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
-# -----------------------------------------
-m4_define([_LT_MANGLE_DEFUN],
-[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
-
-
-# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
-# -----------------------------------------------
-m4_define([LT_OPTION_DEFINE],
-[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
-])# LT_OPTION_DEFINE
-
-
-# dlopen
-# ------
-LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
-])
-
-AU_DEFUN([AC_LIBTOOL_DLOPEN],
-[_LT_SET_OPTION([LT_INIT], [dlopen])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you
-put the `dlopen' option into LT_INIT's first parameter.])
-])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
-
-
-# win32-dll
-# ---------
-# Declare package support for building win32 dll's.
-LT_OPTION_DEFINE([LT_INIT], [win32-dll],
-[enable_win32_dll=yes
-
-case $host in
-*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
- AC_CHECK_TOOL(AS, as, false)
- AC_CHECK_TOOL(DLLTOOL, dlltool, false)
- AC_CHECK_TOOL(OBJDUMP, objdump, false)
- ;;
-esac
-
-test -z "$AS" && AS=as
-_LT_DECL([], [AS], [1], [Assembler program])dnl
-
-test -z "$DLLTOOL" && DLLTOOL=dlltool
-_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl
-
-test -z "$OBJDUMP" && OBJDUMP=objdump
-_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl
-])# win32-dll
-
-AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
-[AC_REQUIRE([AC_CANONICAL_HOST])dnl
-_LT_SET_OPTION([LT_INIT], [win32-dll])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you
-put the `win32-dll' option into LT_INIT's first parameter.])
-])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
-
-
-# _LT_ENABLE_SHARED([DEFAULT])
-# ----------------------------
-# implement the --enable-shared flag, and supports the `shared' and
-# `disable-shared' LT_INIT options.
-# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
-m4_define([_LT_ENABLE_SHARED],
-[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
-AC_ARG_ENABLE([shared],
- [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
- [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
- [p=${PACKAGE-default}
- case $enableval in
- yes) enable_shared=yes ;;
- no) enable_shared=no ;;
- *)
- enable_shared=no
- # Look at the argument we got. We use all the common list separators.
- lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
- for pkg in $enableval; do
- IFS="$lt_save_ifs"
- if test "X$pkg" = "X$p"; then
- enable_shared=yes
- fi
- done
- IFS="$lt_save_ifs"
- ;;
- esac],
- [enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
-
- _LT_DECL([build_libtool_libs], [enable_shared], [0],
- [Whether or not to build shared libraries])
-])# _LT_ENABLE_SHARED
-
-LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
-LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
-
-# Old names:
-AC_DEFUN([AC_ENABLE_SHARED],
-[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
-])
-
-AC_DEFUN([AC_DISABLE_SHARED],
-[_LT_SET_OPTION([LT_INIT], [disable-shared])
-])
-
-AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
-AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AM_ENABLE_SHARED], [])
-dnl AC_DEFUN([AM_DISABLE_SHARED], [])
-
-
-
-# _LT_ENABLE_STATIC([DEFAULT])
-# ----------------------------
-# implement the --enable-static flag, and support the `static' and
-# `disable-static' LT_INIT options.
-# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
-m4_define([_LT_ENABLE_STATIC],
-[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
-AC_ARG_ENABLE([static],
- [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
- [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
- [p=${PACKAGE-default}
- case $enableval in
- yes) enable_static=yes ;;
- no) enable_static=no ;;
- *)
- enable_static=no
- # Look at the argument we got. We use all the common list separators.
- lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
- for pkg in $enableval; do
- IFS="$lt_save_ifs"
- if test "X$pkg" = "X$p"; then
- enable_static=yes
- fi
- done
- IFS="$lt_save_ifs"
- ;;
- esac],
- [enable_static=]_LT_ENABLE_STATIC_DEFAULT)
-
- _LT_DECL([build_old_libs], [enable_static], [0],
- [Whether or not to build static libraries])
-])# _LT_ENABLE_STATIC
-
-LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
-LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
-
-# Old names:
-AC_DEFUN([AC_ENABLE_STATIC],
-[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
-])
-
-AC_DEFUN([AC_DISABLE_STATIC],
-[_LT_SET_OPTION([LT_INIT], [disable-static])
-])
-
-AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
-AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AM_ENABLE_STATIC], [])
-dnl AC_DEFUN([AM_DISABLE_STATIC], [])
-
-
-
-# _LT_ENABLE_FAST_INSTALL([DEFAULT])
-# ----------------------------------
-# implement the --enable-fast-install flag, and support the `fast-install'
-# and `disable-fast-install' LT_INIT options.
-# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
-m4_define([_LT_ENABLE_FAST_INSTALL],
-[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
-AC_ARG_ENABLE([fast-install],
- [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
- [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
- [p=${PACKAGE-default}
- case $enableval in
- yes) enable_fast_install=yes ;;
- no) enable_fast_install=no ;;
- *)
- enable_fast_install=no
- # Look at the argument we got. We use all the common list separators.
- lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
- for pkg in $enableval; do
- IFS="$lt_save_ifs"
- if test "X$pkg" = "X$p"; then
- enable_fast_install=yes
- fi
- done
- IFS="$lt_save_ifs"
- ;;
- esac],
- [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
-
-_LT_DECL([fast_install], [enable_fast_install], [0],
- [Whether or not to optimize for fast installation])dnl
-])# _LT_ENABLE_FAST_INSTALL
-
-LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
-LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
-
-# Old names:
-AU_DEFUN([AC_ENABLE_FAST_INSTALL],
-[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you put
-the `fast-install' option into LT_INIT's first parameter.])
-])
-
-AU_DEFUN([AC_DISABLE_FAST_INSTALL],
-[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you put
-the `disable-fast-install' option into LT_INIT's first parameter.])
-])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
-dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
-
-
-# _LT_WITH_PIC([MODE])
-# --------------------
-# implement the --with-pic flag, and support the `pic-only' and `no-pic'
-# LT_INIT options.
-# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
-m4_define([_LT_WITH_PIC],
-[AC_ARG_WITH([pic],
- [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
- [try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
- [lt_p=${PACKAGE-default}
- case $withval in
- yes|no) pic_mode=$withval ;;
- *)
- pic_mode=default
- # Look at the argument we got. We use all the common list separators.
- lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
- for lt_pkg in $withval; do
- IFS="$lt_save_ifs"
- if test "X$lt_pkg" = "X$lt_p"; then
- pic_mode=yes
- fi
- done
- IFS="$lt_save_ifs"
- ;;
- esac],
- [pic_mode=default])
-
-test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
-
-_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
-])# _LT_WITH_PIC
-
-LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
-LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
-
-# Old name:
-AU_DEFUN([AC_LIBTOOL_PICMODE],
-[_LT_SET_OPTION([LT_INIT], [pic-only])
-AC_DIAGNOSE([obsolete],
-[$0: Remove this warning and the call to _LT_SET_OPTION when you
-put the `pic-only' option into LT_INIT's first parameter.])
-])
-
-dnl aclocal-1.4 backwards compatibility:
-dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
-
-## ----------------- ##
-## LTDL_INIT Options ##
-## ----------------- ##
-
-m4_define([_LTDL_MODE], [])
-LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
- [m4_define([_LTDL_MODE], [nonrecursive])])
-LT_OPTION_DEFINE([LTDL_INIT], [recursive],
- [m4_define([_LTDL_MODE], [recursive])])
-LT_OPTION_DEFINE([LTDL_INIT], [subproject],
- [m4_define([_LTDL_MODE], [subproject])])
-
-m4_define([_LTDL_TYPE], [])
-LT_OPTION_DEFINE([LTDL_INIT], [installable],
- [m4_define([_LTDL_TYPE], [installable])])
-LT_OPTION_DEFINE([LTDL_INIT], [convenience],
- [m4_define([_LTDL_TYPE], [convenience])])
diff --git a/macros/ltsugar.m4 b/macros/ltsugar.m4
deleted file mode 100644
index 9000a05..0000000
--- a/macros/ltsugar.m4
+++ /dev/null
@@ -1,123 +0,0 @@
-# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
-#
-# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
-# Written by Gary V. Vaughan, 2004
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-# serial 6 ltsugar.m4
-
-# This is to help aclocal find these macros, as it can't see m4_define.
-AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
-
-
-# lt_join(SEP, ARG1, [ARG2...])
-# -----------------------------
-# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
-# associated separator.
-# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
-# versions in m4sugar had bugs.
-m4_define([lt_join],
-[m4_if([$#], [1], [],
- [$#], [2], [[$2]],
- [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
-m4_define([_lt_join],
-[m4_if([$#$2], [2], [],
- [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
-
-
-# lt_car(LIST)
-# lt_cdr(LIST)
-# ------------
-# Manipulate m4 lists.
-# These macros are necessary as long as will still need to support
-# Autoconf-2.59 which quotes differently.
-m4_define([lt_car], [[$1]])
-m4_define([lt_cdr],
-[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
- [$#], 1, [],
- [m4_dquote(m4_shift($@))])])
-m4_define([lt_unquote], $1)
-
-
-# lt_append(MACRO-NAME, STRING, [SEPARATOR])
-# ------------------------------------------
-# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
-# Note that neither SEPARATOR nor STRING are expanded; they are appended
-# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
-# No SEPARATOR is output if MACRO-NAME was previously undefined (different
-# than defined and empty).
-#
-# This macro is needed until we can rely on Autoconf 2.62, since earlier
-# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
-m4_define([lt_append],
-[m4_define([$1],
- m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
-
-
-
-# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
-# ----------------------------------------------------------
-# Produce a SEP delimited list of all paired combinations of elements of
-# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list
-# has the form PREFIXmINFIXSUFFIXn.
-# Needed until we can rely on m4_combine added in Autoconf 2.62.
-m4_define([lt_combine],
-[m4_if(m4_eval([$# > 3]), [1],
- [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
-[[m4_foreach([_Lt_prefix], [$2],
- [m4_foreach([_Lt_suffix],
- ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
- [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
-
-
-# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
-# -----------------------------------------------------------------------
-# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
-# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
-m4_define([lt_if_append_uniq],
-[m4_ifdef([$1],
- [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
- [lt_append([$1], [$2], [$3])$4],
- [$5])],
- [lt_append([$1], [$2], [$3])$4])])
-
-
-# lt_dict_add(DICT, KEY, VALUE)
-# -----------------------------
-m4_define([lt_dict_add],
-[m4_define([$1($2)], [$3])])
-
-
-# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
-# --------------------------------------------
-m4_define([lt_dict_add_subkey],
-[m4_define([$1($2:$3)], [$4])])
-
-
-# lt_dict_fetch(DICT, KEY, [SUBKEY])
-# ----------------------------------
-m4_define([lt_dict_fetch],
-[m4_ifval([$3],
- m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
- m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
-
-
-# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
-# -----------------------------------------------------------------
-m4_define([lt_if_dict_fetch],
-[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
- [$5],
- [$6])])
-
-
-# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
-# --------------------------------------------------------------
-m4_define([lt_dict_filter],
-[m4_if([$5], [], [],
- [lt_join(m4_quote(m4_default([$4], [[, ]])),
- lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
- [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
-])
diff --git a/macros/ltversion.m4 b/macros/ltversion.m4
deleted file mode 100644
index 07a8602..0000000
--- a/macros/ltversion.m4
+++ /dev/null
@@ -1,23 +0,0 @@
-# ltversion.m4 -- version numbers -*- Autoconf -*-
-#
-# Copyright (C) 2004 Free Software Foundation, Inc.
-# Written by Scott James Remnant, 2004
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-# @configure_input@
-
-# serial 3337 ltversion.m4
-# This file is part of GNU Libtool
-
-m4_define([LT_PACKAGE_VERSION], [2.4.2])
-m4_define([LT_PACKAGE_REVISION], [1.3337])
-
-AC_DEFUN([LTVERSION_VERSION],
-[macro_version='2.4.2'
-macro_revision='1.3337'
-_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
-_LT_DECL(, macro_revision, 0)
-])
diff --git a/macros/lt~obsolete.m4 b/macros/lt~obsolete.m4
deleted file mode 100644
index c573da9..0000000
--- a/macros/lt~obsolete.m4
+++ /dev/null
@@ -1,98 +0,0 @@
-# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
-#
-# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
-# Written by Scott James Remnant, 2004.
-#
-# This file is free software; the Free Software Foundation gives
-# unlimited permission to copy and/or distribute it, with or without
-# modifications, as long as this notice is preserved.
-
-# serial 5 lt~obsolete.m4
-
-# These exist entirely to fool aclocal when bootstrapping libtool.
-#
-# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
-# which have later been changed to m4_define as they aren't part of the
-# exported API, or moved to Autoconf or Automake where they belong.
-#
-# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN
-# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
-# using a macro with the same name in our local m4/libtool.m4 it'll
-# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
-# and doesn't know about Autoconf macros at all.)
-#
-# So we provide this file, which has a silly filename so it's always
-# included after everything else. This provides aclocal with the
-# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
-# because those macros already exist, or will be overwritten later.
-# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
-#
-# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
-# Yes, that means every name once taken will need to remain here until
-# we give up compatibility with versions before 1.7, at which point
-# we need to keep only those names which we still refer to.
-
-# This is to help aclocal find these macros, as it can't see m4_define.
-AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
-
-m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
-m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])])
-m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
-m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])])
-m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
-m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])])
-m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])])
-m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
-m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])])
-m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])])
-m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])])
-m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
-m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
-m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
-m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
-m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])])
-m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])])
-m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
-m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
-m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])])
-m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])])
-m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
-m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
-m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
-m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
-m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
-m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
-m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
-m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])])
-m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])])
-m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])])
-m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
-m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])])
-m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])])
-m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])])
-m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])])
-m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
-m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])])
-m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
-m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])])
-m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])])
-m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])])
-m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
-m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
-m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
-m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
-m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
-m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
-m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
-m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
-m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
-m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
-m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])])
-m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
-m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
-m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])])
-m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
-m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
-m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])])
-m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])])
-m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])])
diff --git a/macros/aclocal-include.m4 b/macros/mkl-check.m4
similarity index 64%
copy from macros/aclocal-include.m4
copy to macros/mkl-check.m4
index 84c496b..936fdb5 100644
--- a/macros/aclocal-include.m4
+++ b/macros/mkl-check.m4
@@ -1,7 +1,7 @@
-dnl aclocal-include.m4
-dnl Copyright (c) 2011 FFLAS-FFPACK
-dnl written by BB <bboyer at imag.fr>
-dnl adapted from LinBox configuration
+dnl Check for MKL
+dnl Brice Boyer 2014
+dnl This file is part of FFLAS-FFPACK
+
dnl
dnl ========LICENCE========
dnl This file is part of the library FFLAS-FFPACK.
@@ -22,18 +22,24 @@ dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 U
dnl ========LICENCE========
dnl/
-dnl This macro adds the name macrodir to the set of directories
-dnl that `aclocal' searches for macros.
-
-dnl serial 1
-
-dnl AM_ACLOCAL_INCLUDE(macrodir)
-AC_DEFUN([AM_ACLOCAL_INCLUDE],
-[
- AM_CONDITIONAL(INSIDE_GNOME_COMMON, test x = y)
-
- test -n "$ACLOCAL_FLAGS" && ACLOCAL="$ACLOCAL $ACLOCAL_FLAGS"
- for k in $1 ; do ACLOCAL="$ACLOCAL -I $k" ; done
-])
+AC_DEFUN([FF_CHECK_MKL],
+ [
+ AC_MSG_CHECKING(for use of MKL)
+ dnl echo $CBLAS_LIBS
+ USE_MKL="false"
+ MKL_USED=`echo $CBLAS_LIBS | grep -i MKL`
+ AS_IF( [test -n "$MKL_USED"] , [
+ AC_DEFINE(HAVE_MKL,1,[Define if we use MKL for blas/lapack])
+ USE_MKL="true"
+ AC_SUBST(USE_MKL)
+ AC_MSG_RESULT( yes )
+ ]
+ ,
+ [
+ AC_MSG_RESULT( no )
+ ]
+ )
+ ]
+ )
diff --git a/macros/omp-check.m4 b/macros/omp-check.m4
new file mode 100644
index 0000000..01c5fda
--- /dev/null
+++ b/macros/omp-check.m4
@@ -0,0 +1,73 @@
+dnl turn on OPENMP
+dnl Copyright (c) 2011 FFLAS-FFPACK
+dnl Created by BB, 2014-07-01
+dnl ========LICENCE========
+dnl This file is part of the library FFLAS-FFPACK.
+dnl
+dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+dnl ========LICENCE========
+dnl
+
+dnl FF_CHECK_OMP
+dnl
+dnl turn on OpenMP if available
+
+AC_DEFUN([FF_CHECK_OMP],
+ [ AC_ARG_ENABLE(openmp,
+ [AC_HELP_STRING([--enable-openmp],
+ [ Use OpenMP ])
+ ],
+ [ avec_omp=$enable_openmp],
+ [ avec_omp=yes ]
+ )
+ AC_MSG_CHECKING(for OpenMP)
+ AS_IF([ test "x$avec_omp" != "xno" ],
+ [
+ BACKUP_CXXFLAGS=${CXXFLAGS}
+ OMPFLAGS="-fopenmp"
+ CXXFLAGS="${BACKUP_CXXFLAGS} ${OMPFLAGS}"
+ AC_TRY_RUN([
+#include <omp.h>
+ int main() {
+ int p = omp_get_num_threads();
+ return 0;
+ }
+ ],
+ [ omp_found="yes" ],
+ [ omp_found="no" ],
+ [
+ echo "cross compiling...disabling"
+ omp_found="no"
+ ])
+ AS_IF( [ test "x$omp_found" = "xyes" ],
+ [
+ AC_DEFINE(USE_OPENMP,1,[Define if OMP is available])
+ AC_SUBST(OMPFLAGS)
+ AC_MSG_RESULT(yes)
+ HAVE_OMP=yes
+ ],
+ [
+ OMPFLAGS=
+ AC_SUBST(OMPFLAGS)
+ AC_MSG_RESULT(no)
+ ]
+ )
+ CXXFLAGS=${BACKUP_CXXFLAGS}
+ ],
+ [ AC_MSG_RESULT(no) ]
+ )
+ AM_CONDITIONAL(FFLASFFPACK_HAVE_OMP, test "x$HAVE_OMP" = "xyes")
+]
+)
diff --git a/macros/sse2-check.m4 b/macros/sse2-check.m4
new file mode 100644
index 0000000..198299e
--- /dev/null
+++ b/macros/sse2-check.m4
@@ -0,0 +1,67 @@
+dnl Check for SSE
+dnl Copyright (c) 2011 FFLAS-FFPACK
+dnl Created by BB, 2014-03-25
+dnl ========LICENCE========
+dnl This file is part of the library FFLAS-FFPACK.
+dnl
+dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+dnl ========LICENCE========
+dnl
+
+dnl FF_CHECK_SSE
+dnl
+dnl turn on SSE4.1 extensions if available
+
+AC_DEFUN([FF_CHECK_SSE],
+ [
+ AC_ARG_ENABLE(sse,
+ [AC_HELP_STRING([--enable-sse],
+ [ Use Intel(r) SSE 4.1])
+ ],
+ [ avec_sse=$enable_sse ],
+ [ avec_sse=yes ]
+ )
+ AC_MSG_CHECKING(for SSE 4.1)
+ AS_IF([ test "x$avec_sse" != "xno" ],
+ [
+ BACKUP_CXXFLAGS=${CXXFLAGS}
+ dnl SSEFLAGS="-msse2"
+ SSEFLAGS="-msse4.1"
+ CXXFLAGS="${BACKUP_CXXFLAGS} ${SSEFLAGS}"
+ CODE_SSE=`cat macros/CodeChunk/sse.C`
+ AC_TRY_RUN([
+ ${CODE_SSE}
+ ],
+ [ sse_found="yes" ],
+ [ sse_found="no" ],
+ [
+ echo "cross compiling...disabling"
+ sse_found="no"
+ ])
+ AS_IF([ test "x$sse_found" = "xyes" ],[
+ AC_DEFINE(USE_SSE,1,[Define if SSE is available])
+ AC_SUBST(SSEFLAGS)
+ AC_MSG_RESULT(yes (SSE))
+ ],
+ [
+ SSEFLAGS=""
+ AC_MSG_RESULT(no)
+ ]
+ )
+ CXXFLAGS=${BACKUP_CXXFLAGS}
+ ],
+ [ AC_MSG_RESULT(no) ]
+ )
+ ])
diff --git a/optimiser/Makefile.am b/optimiser/Makefile.am
index 39e325e..7d061b2 100644
--- a/optimiser/Makefile.am
+++ b/optimiser/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2012 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
diff --git a/optimiser/Makefile.in b/optimiser/Makefile.in
deleted file mode 100644
index 49a6261..0000000
--- a/optimiser/Makefile.in
+++ /dev/null
@@ -1,439 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2012 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = optimiser
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-EXTRA_DIST = winograd.C
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps optimiser/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps optimiser/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags: TAGS
-TAGS:
-
-ctags: CTAGS
-CTAGS:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am check check-am clean clean-generic clean-libtool \
- distclean distclean-generic distclean-libtool distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/optimiser/winograd.C b/optimiser/winograd.C
index 0232e8f..8a4b546 100644
--- a/optimiser/winograd.C
+++ b/optimiser/winograd.C
@@ -4,7 +4,7 @@
/*
* Copyright (C) 2012 FFLAS-FFPACK group.
*
- * Extirpé form a m4 macro by BB <bboyer at imag.fr>.
+ * Extirpé form a m4 macro by Brice Boyer (briceboyer) <boyer.brice at gmail.com>.
*
*
* ========LICENCE========
@@ -29,73 +29,124 @@
//#define LinBoxSrcOnly
+#define DOUBLE_TO_FLOAT_CROSSOVER 0
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
#include <fstream>
-#include "fflas-ffpack/config-blas.h"
-#include "fflas-ffpack/fflas-ffpack-config.h"
-#include "fflas-ffpack/fflas-ffpack-optimise.h"
-#include "fflas-ffpack/field/modular-positive.h"
-#include "fflas-ffpack/fflas/fflas.h"
+#include <givaro/modular.h>
+#include <givaro/modular-balanced.h>
#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
#ifndef FLTTYPE
-#define FLTTYPE double
+#define FLTTYPE Givaro::Modular<double>
+#endif
+
+template<class Field>
+bool balanced(const Field & )
+{
+ return false;
+}
+
+template <class T>
+bool balanced(const Givaro::ModularBalanced<T>&)
+{
+ return true;
+}
+
+#ifdef __GIVARO_USE_OPENMP
+typedef Givaro::OMPTimer TTimer;
+#else
+typedef Givaro::Timer TTimer;
+#endif
+
+#define MFLOPS (2.0*iter/chrono.realtime()*(double)n/100.0*(double)n/100.0*(double)n/100.0)
+#define GFLOPS (2.0*iter/chrono.realtime()*(double)n/1000.0*(double)n/1000.0*(double)n/1000.0)
+
+#ifdef __FFLASFFPACK_HAVE_CXX11
+#include <ctime>
#endif
//using namespace LinBox;
int main () {
using namespace std;
- typedef FFPACK::Modular<FLTTYPE> Field ;
+ typedef FLTTYPE Field ;
Field F(17);
typedef Field::Element Element ;
- size_t n=1000, nmax=5000, prec=512, nbest=0, count=0;
- Timer chrono;
- double basetime, time;
+ size_t n=768, nmax=5000, prec=512, nbest=0, count=0;
+ TTimer chrono;
bool bound=false;
+ Field::RandIter G(F);
- Element *A, *C;
- A = new Element[nmax*nmax];
- C = new Element[nmax*nmax];
- for (size_t i=0; i<nmax*nmax;++i){
- A[i]=2.;
- }
+ Element *A,*B,*C;
+ A = FFLAS::fflas_new<Element>(nmax*nmax);
+ B = FFLAS::fflas_new<Element>(nmax*nmax);
+ C = FFLAS::fflas_new<Element>(nmax*nmax);
+ for (size_t i=0; i<nmax*nmax;++i)
+ G.random(A[i]);
+
+ for (size_t i=0; i<nmax*nmax;++i)
+ G.random(B[i]);
+
+ for (size_t i=0; i<nmax*nmax;++i)
+ G.random(C[i]);
+
std::ofstream outlog;
outlog.open("optim.log", std::ofstream::out | std::ofstream::app);
+#ifdef __FFLASFFPACK_HAVE_CXX11
+ std::time_t result = std::time(NULL);
+ outlog << std::endl <<
+ "---------------------------------------------------------------------"
+ << std::endl << std::asctime(std::localtime(&result));
+#endif
outlog << std::endl
<< "Threshold for finite field Strassen-Winograd matrix multiplication" ;
F.write(outlog << "(using ") << ')' << std::endl;
do {
- chrono.start();
+ double basetime, time;
+ FFLAS::MMHelper<Field, FFLAS::MMHelperAlgo::Winograd> ClassicH(F,0, FFLAS::ParSeqHelper::Sequential());
+ FFLAS::MMHelper<Field, FFLAS::MMHelperAlgo::Winograd> WinogradH(F,1, FFLAS::ParSeqHelper::Sequential());
+
+ int iter=3;
+ //warm up computation
FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- n, n, n, F.one, A, n, A, n, F.zero, C, n, 0);
+ n, n, n, F.mOne, A, n, B, n, F.one, C, n, ClassicH);
+ chrono.start();
+ for (int i=0;i<iter;i++)
+ FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, n, n, n, F.mOne, A, n, B, n, F.one, C, n, ClassicH);
chrono.stop();
std::cout << std::endl
<< "fgemm " << n << "x" << n << ": "
- << chrono.usertime() << " s, "
- << (2.0/chrono.usertime()*n/100.0*n/100.0*n/100.0) << " Mffops"
+ << chrono.realtime()/iter << " s, "
+ << GFLOPS << " Gffops"
<< std::endl;
outlog << std::endl
<< "fgemm " << n << "x" << n << ": "
- << chrono.usertime() << " s, "
- << (2.0/chrono.usertime()*n/100.0*n/100.0*n/100.0) << " Mffops"
+ << chrono.realtime()/iter << " s, "
+ << GFLOPS << " Gffops"
<< std::endl;
- basetime= chrono.usertime();
+ basetime= chrono.realtime();
+ //warm up
+ FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ n, n, n, F.mOne, A, n, B, n, F.one, C, n, WinogradH);
chrono.clear();
chrono.start();
- FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- n, n, n, 1., A, n, A, n, 0., C, n, 1);
+ for (int i=0; i<iter; i++)
+ FFLAS::fgemm(F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ n, n, n, F.mOne, A, n, B,n, F.one, C, n, WinogradH);
chrono.stop();
std::cout << "1Wino " << n << "x" << n << ": "
- << chrono.usertime() << " s, "
- << (2.0/chrono.usertime()*n/100.0*n/100.0*n/100.0) << " Mffops"
+ << chrono.realtime()/iter << " s, "
+ << GFLOPS << " Gffops"
<< std::endl;
outlog << "1Wino " << n << "x" << n << ": "
- << chrono.usertime() << " s, "
- << (2.0/chrono.usertime()*n/100.0*n/100.0*n/100.0) << " Mffops"
+ << chrono.realtime()/iter << " s, "
+ << GFLOPS << " Gffops"
<< std::endl;
- time= chrono.usertime();
+ time= chrono.realtime();
if (basetime > time ){
count++;
@@ -117,14 +168,26 @@ int main () {
std::ofstream out("WinoThreshold");
if (nbest != 0 ) {
if (typeid(Element).name() == typeid(double).name()) {
- out << "#ifndef __FFLASFFPACK_WINOTHRESHOLD" << endl;
- out << "#define __FFLASFFPACK_WINOTHRESHOLD" << ' ' << nbest << endl;
+ if ( balanced(F) ) {
+ out << "#ifndef __FFLASFFPACK_WINOTHRESHOLD_BAL" << endl;
+ out << "#define __FFLASFFPACK_WINOTHRESHOLD_BAL" << ' ' << nbest << endl;
+ }
+ else {
+ out << "#ifndef __FFLASFFPACK_WINOTHRESHOLD" << endl;
+ out << "#define __FFLASFFPACK_WINOTHRESHOLD" << ' ' << nbest << endl;
+ }
out << "#endif" << endl << endl;
}
if (typeid(Element).name() == typeid(float).name()) {
- out << "#ifndef __FFLASFFPACK_WINOTHRESHOLD_FLT" << endl;
- out << "#define __FFLASFFPACK_WINOTHRESHOLD_FLT" << ' ' << nbest << endl;
+ if ( balanced(F) ) {
+ out << "#ifndef __FFLASFFPACK_WINOTHRESHOLD_BAL_FLT" << endl;
+ out << "#define __FFLASFFPACK_WINOTHRESHOLD_BAL_FLT" << ' ' << nbest << endl;
+ }
+ else {
+ out << "#ifndef __FFLASFFPACK_WINOTHRESHOLD_FLT" << endl;
+ out << "#define __FFLASFFPACK_WINOTHRESHOLD_FLT" << ' ' << nbest << endl;
+ }
out << "#endif" << endl << endl;
}
}
@@ -133,8 +196,8 @@ int main () {
outlog << "defined __FFLASFFPACK_WINOTHRESHOLD to " << nbest << "" << std::endl;
outlog.close();
- delete[] A;
- delete[] C;
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( C);
return 0;
}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 00f1208..151b4c7 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,5 +1,5 @@
# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
@@ -20,29 +20,54 @@
# ========LICENCE========
#/
-SUBDIRS =
+SUBDIRS = data
check:
$(BASE_TESTS)
AM_CPPFLAGS=-I$(top_srcdir)
AM_CXXFLAGS = @TESTS_CFLAGS@
-AM_CPPFLAGS += $(OPTFLAGS) -I$(top_srcdir)/fflas-ffpack/utils/ -I$(top_srcdir)/fflas-ffpack/fflas/ -I$(top_srcdir)/fflas-ffpack/ffpack -I$(top_srcdir)/fflas-ffpack/field $(CBLAS_FLAG) $(GMP_CFLAGS) $(GIVARO_CFLAGS)
-LDADD = $(BLAS_LIBS) $(GIVARO_LIBS) $(GMP_LIBS)
-AM_LDFLAGS=-static
+AM_CPPFLAGS += $(OPTFLAGS) -I$(top_srcdir)/fflas-ffpack/ -I$(top_srcdir)/fflas-ffpack/utils/ -I$(top_srcdir)/fflas-ffpack/fflas/ -I$(top_srcdir)/fflas-ffpack/ffpack -I$(top_srcdir)/fflas-ffpack/field $(GIVARO_CFLAGS) $(CBLAS_FLAG) $(CUDA_CFLAGS) $(PARFLAGS) $(PRECOMPILE_FLAGS)
-BASIC_TESTS = \
- test-lqup \
- test-compressQ
+AM_LDFLAGS=-static #-L$(prefix)/lib -lfflas -lffpack -lfflas_c -lffpack_c
+EXTRA_DIST= test-utils.h
+
+PERFPUBLISHERFILE=tests-report.xml
+
+BASIC_TESTS = \
+ test-lu \
+ test-det \
+ test-echelon \
+ test-rankprofiles \
+ test-compressQ \
+ test-fadd \
+ test-finit \
+ test-fscal \
+ test-fgemm \
+ test-fger \
+ test-ftrsm \
+ test-multifile \
+ regression-check
+
+if FFLASFFPACK_PRECOMPILED
+LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARFLAGS) \
+ $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la \
+ $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la
+INTERFACE_TESTS= test-interfaces-c
+test_interfaces_c_LDFLAGS = $(LDADD) \
+ $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas_c.la \
+ $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack_c.la
+else
+LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARFLAGS)
+endif
NOT_A_TEST = \
+ test-lqup2 \
test-charpoly \
benchlqup \
test-fsquare \
- test-det \
test-redcolechelon \
benchfgemm \
test-rank \
- test-echelon \
test-krylov-elim \
test-rowechelon \
test-fgemv \
@@ -51,8 +76,6 @@ NOT_A_TEST = \
test-redrowechelon \
test-ftrtri \
test-redechelon \
- test-ftrsm \
- test-fgemm \
test-frobenius \
test-fgesv \
test-invert \
@@ -63,31 +86,34 @@ INFINITE_TEST= \
testeur_ftrsm \
testeur_lqup
-EXTRA_DIST= test-utils.h
-
-CLEANFILES = \
- $(BASIC_TESTS) $(NOT_A_TEST)
EXTRA_PROGRAMS = \
- $(BASIC_TESTS)
+ $(BASIC_TESTS) $(USE_OMP_TESTS) $(INTERFACE_TESTS)
-TESTS = \
- $(BASIC_TESTS)
+CLEANFILES = \
+ $(NOT_A_TEST) $(EXTRA_PROGRAMS) $(PERFPUBLISHERFILE)
+
+TESTS = $(EXTRA_PROGRAMS)
test_compressQ_SOURCES = test-compressQ.C
-test_lqup_SOURCES = test-lqup.C
+test_lu_SOURCES = test-lu.C
+#test_lqup2_SOURCES = test-lqup2.C
+test_det_SOURCES = test-det.C
+test_echelon_SOURCES = test-echelon.C
+test_rankprofiles_SOURCES = test-rankprofiles.C
+test_fgemm_SOURCES = test-fgemm.C
+test_fger_SOURCES = test-fger.C
+test_multifile_SOURCES = test-multifile1.C test-multifile2.C
+# test_fgemm_SOURCES = test-fgemm.C
# test_charpoly_SOURCES = test-charpoly.C
-# dense_generator_SOURCES = dense_generator.C
-# test_det_SOURCES = test-det.C
# benchfgemm_SOURCES = benchfgemm.C
# test_fsquare_SOURCES = test-fsquare.C
# test_rank_SOURCES = test-rank.C
# benchlqup_SOURCES = benchlqup.C
-# test_echelon_SOURCES = test-echelon.C
# test_ftrmm_SOURCES = test-ftrmm.C
# test_redcolechelon_SOURCES = test-redcolechelon.C
# testeur_fgemm_SOURCES = testeur_fgemm.C
-# test_ftrsm_SOURCES = test-ftrsm.C
+test_ftrsm_SOURCES = test-ftrsm.C
# test_redechelon_SOURCES = test-redechelon.C
# testeur_ftrsm_SOURCES = testeur_ftrsm.C
# test_ftrtri_SOURCES = test-ftrtri.C
@@ -95,7 +121,6 @@ test_lqup_SOURCES = test-lqup.C
# testeur_lqup_SOURCES = testeur_lqup.C
# test_fullranksubmatrix_SOURCES = test-fullranksubmatrix.C
# test_rowechelon_SOURCES = test-rowechelon.C
-# test_fgemm_SOURCES = test-fgemm.C
# test_invert_SOURCES = test-invert.C
# test_fgemv_SOURCES = test-fgemv.C
# test_krylov_elim_SOURCES = test-krylov-elim.C
@@ -103,12 +128,33 @@ test_lqup_SOURCES = test-lqup.C
# test_fgesv_SOURCES = test-fgesv.C
# test_frobenius_SOURCES = test-frobenius.C
# test_nullspace_SOURCES = test-nullspace.C
+test_fadd_SOURCES = test-fadd.C
+test_fscal_SOURCES = test-fscal.C
+test_finit_SOURCES = test-finit.C
+test_interfaces_c_SOURCES = test-interfaces-c.c
+#test_interfaces_c_CFLAGS= -std=c11 -I/$(prefix)/include $(AM_CPPFLAGS) $(AM_CXXFLAGS) $(PARFLAGS)
+#test_interfaces_c_LDFLAGS= $(LDFLAGS) $(LDADD) $(AM_LDFLAGS) -L/$(prefix)/lib/ -lfflas_c -lffpack_c -lstdc++
+# test_fspmv_SOURCES = test-fspmv.C
+
+regression_check_SOURCES = regression-check.C
-dense_generator:
+
+dense_generator: dense_generator.C
$(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) dense_generator.C -o dense_generator
+# dense_generator_SOURCES = dense_generator.C
+
+# Perfpublisher script interaction - AB 2014/11/17
+perfpublisher:
+ +./perfpublisher.sh "$(PERFPUBLISHERFILE)" "$(EXTRA_PROGRAMS)" "$(CXX)"
# for compilation of new tests
FFLASFFPACK_BIN=@bindir@
+
+new_examp_comp = $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
+
%:%.C
- $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) $(OPTFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
+ $(new_examp_comp)
+
+%:%.cpp
+ $(new_examp_comp)
diff --git a/tests/Makefile.in b/tests/Makefile.in
deleted file mode 100644
index dec55bb..0000000
--- a/tests/Makefile.in
+++ /dev/null
@@ -1,869 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-#
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-#/
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-EXTRA_PROGRAMS = $(am__EXEEXT_1)
-TESTS = $(am__EXEEXT_1)
-subdir = tests
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-am__EXEEXT_1 = test-lqup$(EXEEXT) test-compressQ$(EXEEXT)
-am_test_compressQ_OBJECTS = test-compressQ.$(OBJEXT)
-test_compressQ_OBJECTS = $(am_test_compressQ_OBJECTS)
-test_compressQ_LDADD = $(LDADD)
-am__DEPENDENCIES_1 =
-test_compressQ_DEPENDENCIES = $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
-am_test_lqup_OBJECTS = test-lqup.$(OBJEXT)
-test_lqup_OBJECTS = $(am_test_lqup_OBJECTS)
-test_lqup_LDADD = $(LDADD)
-test_lqup_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1)
-DEFAULT_INCLUDES = -I. at am__isrc@ -I$(top_builddir)
-depcomp =
-am__depfiles_maybe =
-CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
-CXXLD = $(CXX)
-CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
- $(LDFLAGS) -o $@
-SOURCES = $(test_compressQ_SOURCES) $(test_lqup_SOURCES)
-DIST_SOURCES = $(test_compressQ_SOURCES) $(test_lqup_SOURCES)
-RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
- html-recursive info-recursive install-data-recursive \
- install-dvi-recursive install-exec-recursive \
- install-html-recursive install-info-recursive \
- install-pdf-recursive install-ps-recursive install-recursive \
- installcheck-recursive installdirs-recursive pdf-recursive \
- ps-recursive uninstall-recursive
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
- distclean-recursive maintainer-clean-recursive
-AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
- $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
- distdir
-ETAGS = etags
-CTAGS = ctags
-am__tty_colors = \
-red=; grn=; lgn=; blu=; std=
-DIST_SUBDIRS = $(SUBDIRS)
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-am__relativize = \
- dir0=`pwd`; \
- sed_first='s,^\([^/]*\)/.*$$,\1,'; \
- sed_rest='s,^[^/]*/*,,'; \
- sed_last='s,^.*/\([^/]*\)$$,\1,'; \
- sed_butlast='s,/*[^/]*$$,,'; \
- while test -n "$$dir1"; do \
- first=`echo "$$dir1" | sed -e "$$sed_first"`; \
- if test "$$first" != "."; then \
- if test "$$first" = ".."; then \
- dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
- dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
- else \
- first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
- if test "$$first2" = "$$first"; then \
- dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
- else \
- dir2="../$$dir2"; \
- fi; \
- dir0="$$dir0"/"$$first"; \
- fi; \
- fi; \
- dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
- done; \
- reldir="$$dir2"
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-SUBDIRS =
-AM_CPPFLAGS = -I$(top_srcdir) $(OPTFLAGS) \
- -I$(top_srcdir)/fflas-ffpack/utils/ \
- -I$(top_srcdir)/fflas-ffpack/fflas/ \
- -I$(top_srcdir)/fflas-ffpack/ffpack \
- -I$(top_srcdir)/fflas-ffpack/field $(CBLAS_FLAG) $(GMP_CFLAGS) \
- $(GIVARO_CFLAGS)
-AM_CXXFLAGS = @TESTS_CFLAGS@
-LDADD = $(BLAS_LIBS) $(GIVARO_LIBS) $(GMP_LIBS)
-AM_LDFLAGS = -static
-BASIC_TESTS = \
- test-lqup \
- test-compressQ
-
-NOT_A_TEST = \
- test-charpoly \
- benchlqup \
- test-fsquare \
- test-det \
- test-redcolechelon \
- benchfgemm \
- test-rank \
- test-echelon \
- test-krylov-elim \
- test-rowechelon \
- test-fgemv \
- test-colechelon \
- test-fullranksubmatrix \
- test-redrowechelon \
- test-ftrtri \
- test-redechelon \
- test-ftrsm \
- test-fgemm \
- test-frobenius \
- test-fgesv \
- test-invert \
- test-nullspace
-
-INFINITE_TEST = \
- testeur_fgemm \
- testeur_ftrsm \
- testeur_lqup
-
-EXTRA_DIST = test-utils.h
-CLEANFILES = \
- $(BASIC_TESTS) $(NOT_A_TEST)
-
-test_compressQ_SOURCES = test-compressQ.C
-test_lqup_SOURCES = test-lqup.C
-
-# for compilation of new tests
-FFLASFFPACK_BIN = @bindir@
-all: all-recursive
-
-.SUFFIXES:
-.SUFFIXES: .C .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps tests/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-test-compressQ$(EXEEXT): $(test_compressQ_OBJECTS) $(test_compressQ_DEPENDENCIES) $(EXTRA_test_compressQ_DEPENDENCIES)
- @rm -f test-compressQ$(EXEEXT)
- $(CXXLINK) $(test_compressQ_OBJECTS) $(test_compressQ_LDADD) $(LIBS)
-test-lqup$(EXEEXT): $(test_lqup_OBJECTS) $(test_lqup_DEPENDENCIES) $(EXTRA_test_lqup_DEPENDENCIES)
- @rm -f test-lqup$(EXEEXT)
- $(CXXLINK) $(test_lqup_OBJECTS) $(test_lqup_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
-.C.o:
- $(CXXCOMPILE) -c -o $@ $<
-
-.C.obj:
- $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-.C.lo:
- $(LTCXXCOMPILE) -c -o $@ $<
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-$(RECURSIVE_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- target=`echo $@ | sed s/-recursive//`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- dot_seen=yes; \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done; \
- if test "$$dot_seen" = "no"; then \
- $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
- fi; test -z "$$fail"
-
-$(RECURSIVE_CLEAN_TARGETS):
- @fail= failcom='exit 1'; \
- for f in x $$MAKEFLAGS; do \
- case $$f in \
- *=* | --[!k]*);; \
- *k*) failcom='fail=yes';; \
- esac; \
- done; \
- dot_seen=no; \
- case "$@" in \
- distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
- *) list='$(SUBDIRS)' ;; \
- esac; \
- rev=''; for subdir in $$list; do \
- if test "$$subdir" = "."; then :; else \
- rev="$$subdir $$rev"; \
- fi; \
- done; \
- rev="$$rev ."; \
- target=`echo $@ | sed s/-recursive//`; \
- for subdir in $$rev; do \
- echo "Making $$target in $$subdir"; \
- if test "$$subdir" = "."; then \
- local_target="$$target-am"; \
- else \
- local_target="$$target"; \
- fi; \
- ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
- || eval $$failcom; \
- done && test -z "$$fail"
-tags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
- done
-ctags-recursive:
- list='$(SUBDIRS)'; for subdir in $$list; do \
- test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
- done
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
- include_option=--etags-include; \
- empty_fix=.; \
- else \
- include_option=--include; \
- empty_fix=; \
- fi; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- test ! -f $$subdir/TAGS || \
- set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
- fi; \
- done; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-check-TESTS: $(TESTS)
- @failed=0; all=0; xfail=0; xpass=0; skip=0; \
- srcdir=$(srcdir); export srcdir; \
- list=' $(TESTS) '; \
- $(am__tty_colors); \
- if test -n "$$list"; then \
- for tst in $$list; do \
- if test -f ./$$tst; then dir=./; \
- elif test -f $$tst; then dir=; \
- else dir="$(srcdir)/"; fi; \
- if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
- all=`expr $$all + 1`; \
- case " $(XFAIL_TESTS) " in \
- *[\ \ ]$$tst[\ \ ]*) \
- xpass=`expr $$xpass + 1`; \
- failed=`expr $$failed + 1`; \
- col=$$red; res=XPASS; \
- ;; \
- *) \
- col=$$grn; res=PASS; \
- ;; \
- esac; \
- elif test $$? -ne 77; then \
- all=`expr $$all + 1`; \
- case " $(XFAIL_TESTS) " in \
- *[\ \ ]$$tst[\ \ ]*) \
- xfail=`expr $$xfail + 1`; \
- col=$$lgn; res=XFAIL; \
- ;; \
- *) \
- failed=`expr $$failed + 1`; \
- col=$$red; res=FAIL; \
- ;; \
- esac; \
- else \
- skip=`expr $$skip + 1`; \
- col=$$blu; res=SKIP; \
- fi; \
- echo "$${col}$$res$${std}: $$tst"; \
- done; \
- if test "$$all" -eq 1; then \
- tests="test"; \
- All=""; \
- else \
- tests="tests"; \
- All="All "; \
- fi; \
- if test "$$failed" -eq 0; then \
- if test "$$xfail" -eq 0; then \
- banner="$$All$$all $$tests passed"; \
- else \
- if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
- banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
- fi; \
- else \
- if test "$$xpass" -eq 0; then \
- banner="$$failed of $$all $$tests failed"; \
- else \
- if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
- banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
- fi; \
- fi; \
- dashes="$$banner"; \
- skipped=""; \
- if test "$$skip" -ne 0; then \
- if test "$$skip" -eq 1; then \
- skipped="($$skip test was not run)"; \
- else \
- skipped="($$skip tests were not run)"; \
- fi; \
- test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
- dashes="$$skipped"; \
- fi; \
- report=""; \
- if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
- report="Please report to $(PACKAGE_BUGREPORT)"; \
- test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
- dashes="$$report"; \
- fi; \
- dashes=`echo "$$dashes" | sed s/./=/g`; \
- if test "$$failed" -eq 0; then \
- col="$$grn"; \
- else \
- col="$$red"; \
- fi; \
- echo "$${col}$$dashes$${std}"; \
- echo "$${col}$$banner$${std}"; \
- test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
- test -z "$$report" || echo "$${col}$$report$${std}"; \
- echo "$${col}$$dashes$${std}"; \
- test "$$failed" -eq 0; \
- else :; fi
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
- $(am__make_dryrun) \
- || test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
- $(am__relativize); \
- new_distdir=$$reldir; \
- dir1=$$subdir; dir2="$(top_distdir)"; \
- $(am__relativize); \
- new_top_distdir=$$reldir; \
- echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
- echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
- ($(am__cd) $$subdir && \
- $(MAKE) $(AM_MAKEFLAGS) \
- top_distdir="$$new_top_distdir" \
- distdir="$$new_distdir" \
- am__remove_distdir=: \
- am__skip_length_check=: \
- am__skip_mode_fix=: \
- distdir) \
- || exit 1; \
- fi; \
- done
-check-am: all-am
- $(MAKE) $(AM_MAKEFLAGS) check-TESTS
-check: check-recursive
-all-am: Makefile
-installdirs: installdirs-recursive
-installdirs-am:
-install: install-recursive
-install-exec: install-exec-recursive
-install-data: install-data-recursive
-uninstall: uninstall-recursive
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-recursive
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
- -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-recursive
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-recursive
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-recursive
-
-dvi-am:
-
-html: html-recursive
-
-html-am:
-
-info: info-recursive
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-recursive
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-recursive
-
-install-html-am:
-
-install-info: install-info-recursive
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-recursive
-
-install-pdf-am:
-
-install-ps: install-ps-recursive
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-recursive
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-recursive
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
- mostlyclean-libtool
-
-pdf: pdf-recursive
-
-pdf-am:
-
-ps: ps-recursive
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) check-am \
- ctags-recursive install-am install-strip tags-recursive
-
-.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
- all all-am check check-TESTS check-am clean clean-generic \
- clean-libtool ctags ctags-recursive distclean \
- distclean-compile distclean-generic distclean-libtool \
- distclean-tags distdir dvi dvi-am html html-am info info-am \
- install install-am install-data install-data-am install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-info install-info-am install-man \
- install-pdf install-pdf-am install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- installdirs-am maintainer-clean maintainer-clean-generic \
- mostlyclean mostlyclean-compile mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
- uninstall uninstall-am
-
-check:
- $(BASE_TESTS)
-# test_charpoly_SOURCES = test-charpoly.C
-# dense_generator_SOURCES = dense_generator.C
-# test_det_SOURCES = test-det.C
-# benchfgemm_SOURCES = benchfgemm.C
-# test_fsquare_SOURCES = test-fsquare.C
-# test_rank_SOURCES = test-rank.C
-# benchlqup_SOURCES = benchlqup.C
-# test_echelon_SOURCES = test-echelon.C
-# test_ftrmm_SOURCES = test-ftrmm.C
-# test_redcolechelon_SOURCES = test-redcolechelon.C
-# testeur_fgemm_SOURCES = testeur_fgemm.C
-# test_ftrsm_SOURCES = test-ftrsm.C
-# test_redechelon_SOURCES = test-redechelon.C
-# testeur_ftrsm_SOURCES = testeur_ftrsm.C
-# test_ftrtri_SOURCES = test-ftrtri.C
-# test_redrowechelon_SOURCES = test-redrowechelon.C
-# testeur_lqup_SOURCES = testeur_lqup.C
-# test_fullranksubmatrix_SOURCES = test-fullranksubmatrix.C
-# test_rowechelon_SOURCES = test-rowechelon.C
-# test_fgemm_SOURCES = test-fgemm.C
-# test_invert_SOURCES = test-invert.C
-# test_fgemv_SOURCES = test-fgemv.C
-# test_krylov_elim_SOURCES = test-krylov-elim.C
-# test_colechelon_SOURCES = test-colechelon.C
-# test_fgesv_SOURCES = test-fgesv.C
-# test_frobenius_SOURCES = test-frobenius.C
-# test_nullspace_SOURCES = test-nullspace.C
-
-dense_generator:
- $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) dense_generator.C -o dense_generator
-%:%.C
- $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) $(OPTFLAGS) ${INCLUDES} $(AM_CPPFLAGS) $*.C -o $@ $(LDFLAGS) $(LDADD) $(LOADLIBES)
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/tests/Makefile.template b/tests/Makefile.template
new file mode 100644
index 0000000..535e7a7
--- /dev/null
+++ b/tests/Makefile.template
@@ -0,0 +1,48 @@
+#----------------------------------------------------------
+# Parameters to be configured by the user
+
+# root for the blas library, for ex. /home/foo/ATLAS/lib/Linux_P4SSE2
+BLASROOT =
+
+# ATLAS BLAS users : uncomment these lines:
+#CXXFLAGS+=-D__LINBOX_HAVE_CBLAS
+#LOADLIBES+=-L${BLASROOT} -lcblas -latlas
+
+# GotoBlas BLAS users : uncomment this line:
+#LOADLIBES+=-L${BLASROOT} -lgoto
+
+# Other BLAS users, uncomment this line:
+#LOADLIBES+=-L${BLASROOT} -lcblas
+
+# architecture parameter for gcc:
+#ARCH = -march=pentium3
+#ARCH = -march=pentium4
+#ARCH = -march=athlon
+#ARCH = -march=opteron
+#ARCH = -m64 -mtune=k8
+
+# Givaro/GMP root (only necessary for compiling the regression tests testeur_fgemm, testeur_lqup and testeur_ftrsm)
+#GIVARO_ROOT=
+#GMP_ROOT=
+#INCLUDES+= -I ${GIVARO_ROOT}/include -I ${GMP_ROOT}/include
+#LOADLIBES+= -L ${GIVARO_ROOT}/lib -lgivaro -L ${GMP_ROOT}/lib -lgmp -lgmpxx
+
+#----------------------------------------------------------
+
+OPTFLAGS+=-O3
+#OPTFLAGS+=-g
+OPTFLAGS+= ${ARCH}
+
+CXXFLAGS+=${OPTFLAGS}
+
+INCLUDES+=-I. -I../include
+
+
+CXX=g++ ${INCLUDES}
+
+all: test-fgemm test-invert test-det test-rank test-charpoly test-lqup test-nullspace dense_generator
+
+regression: testeur_fgemm testeur_lqup testeur_ftrsm
+
+clean:
+ rm -f test-fgemm test-fgemv test-invert test-det test-rank test-charpoly test-lqup dense_generator testeur_fgemm testeur_lqup testeur_ftrsm
diff --git a/tests/benchfgemm.C b/tests/benchfgemm.C
new file mode 100644
index 0000000..9202491
--- /dev/null
+++ b/tests/benchfgemm.C
@@ -0,0 +1,100 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//#include "goto-def.h"
+
+/*
+ * Copyright (c) FFLAS-FFPACK
+ * Written by Clement Pernet <clement.pernet at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+#include <iostream>
+
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/field/modular-positive.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+
+using namespace std;
+using namespace FFPACK;
+
+int main(int argc, char** argv)
+{
+
+ // parameter: p, n, iteration, file1, file2
+
+ double p = atof(argv[1]);
+ int n = atoi(argv[2]);
+ size_t w = atoi (argv[3]);
+ size_t iter = atoi(argv[4]);
+
+// typedef Givaro::Modular<float> Field;
+// typedef Givaro::Modular<double> Field;
+// typedef ModularBalanced<double> Field;
+ typedef ModularBalanced<float> Field;
+ typedef Field::Element Element;
+
+ Field F((Field::Element)p);
+ Element one,zero;
+ F.init(one, 1.0);
+ F.init(zero,0.0);
+
+ FFLAS::Timer chrono;
+ double time=0.0;
+ // double time2=0.0;
+ // int singular;
+
+ Element * A, * B, * C;
+
+ for (size_t i=0;i<iter;++i){
+
+ Field::RandIter G(F);
+ A = FFLAS::fflas_new<Element>(n*n);
+ for (size_t i=0; i<(size_t)n*n; ++i)
+ G.random (*(A+i));
+
+ B = FFLAS::fflas_new<Element>(n*n);
+ for (size_t i=0; i<(size_t)n*n; ++i)
+ G.random(*(B+i));
+
+ C = FFLAS::fflas_new<Element>(n*n);
+
+ chrono.clear();
+ chrono.start();
+ FFLAS::MMHelper<Field, FFLAS::MMHelperAlgo::Winograd> WH (F,w);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, n,n,n, one,
+ A, n, B, n, zero, C,n, WH);
+ chrono.stop();
+ time+=chrono.realtime();
+
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( C);
+ }
+
+ std::cerr<<"n: "<<n <<" p: "<<p<<" w: "<<w<<std::endl
+ <<" time: "<<time/(double)iter<<" s"<<std::endl
+ <<" speed: "<<2.0*n/1000.0*n/1000.0/time*n/1000.0*double(iter)<<" Gffops"
+ <<std::endl;
+
+ return 0;
+}
+
diff --git a/tests/benchlqup.C b/tests/benchlqup.C
new file mode 100644
index 0000000..9748ed4
--- /dev/null
+++ b/tests/benchlqup.C
@@ -0,0 +1,87 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//
+/*
+ * Copyright (c) FFLAS-FFPACK
+ * Written by Clement Pernet <clement.pernet at imag.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+#include <iostream>
+
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+
+using namespace std;
+using namespace FFPACK;
+
+int main(int argc, char** argv) {
+
+ // parameter: p, n, iteration, file
+
+ float p = (float)atof(argv[1]);
+ int n = atoi(argv[2]);
+ size_t iter = atoi(argv[3]);
+
+
+ typedef ModularBalanced<double> Field;
+ // typedef ModularBalanced<float> Field;
+ typedef Field::Element Element;
+
+ Field F(p);
+
+ FFLAS::Timer chrono;
+ double time=0.0;
+ // int singular;
+
+ Element *A;
+
+ for (size_t i=0;i<iter;++i){
+
+ A = FFLAS::fflas_new<Element>(n*n);
+ Field::RandIter G(F);
+ for (size_t i=0; i< (size_t)n*n; ++i)
+ G.random(*(A+i));
+
+ size_t * P = FFLAS::fflas_new<size_t>(n);
+ size_t * Q = FFLAS::fflas_new<size_t>(n);
+
+ chrono.clear();
+ chrono.start();
+ FFPACK::LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, n, n, A, n,
+ P, Q);
+ chrono.stop();
+
+ time+=chrono.realtime();
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ FFLAS::fflas_delete( A);
+
+ }
+
+ cerr<<"n: "<<n<<" p: "<<p<<std::endl
+ <<" time: "<<time/(double)iter<<std::endl
+ <<" speed: "<<2/3.0*n/1000.0*n/1000.0*n/1000.0/time*double(iter)<<" Gffops"<<std::endl;
+
+
+ return 0;
+}
+
diff --git a/benchmark/Makefile.am b/tests/data/Makefile.am
similarity index 86%
rename from benchmark/Makefile.am
rename to tests/data/Makefile.am
index 31793b2..c73158b 100644
--- a/benchmark/Makefile.am
+++ b/tests/data/Makefile.am
@@ -1,5 +1,6 @@
-# Copyright (c) FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
+# Copyright (c) 2011 FFLAS-FFPACK
+# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+#
# ========LICENCE========
# This file is part of the library FFLAS-FFPACK.
#
@@ -19,10 +20,7 @@
# ========LICENCE========
#/
-#
-# Nothing yet
+SUBDIRS =
-SUBDIRS=graph src html test-src
-#
-EXTRA_DIST=run.sh
+EXTRA_DIST= mat11.sms
diff --git a/tests/data/mat11.sms b/tests/data/mat11.sms
new file mode 100644
index 0000000..0ce2cf5
--- /dev/null
+++ b/tests/data/mat11.sms
@@ -0,0 +1,35 @@
+11 11 M
+1 3 2
+1 4 3
+1 10 1
+3 1 2
+3 3 888
+3 4 1
+3 5 -1
+3 11 6
+4 1 3
+4 3 1
+4 4 4
+4 7 12
+4 10 -13
+5 3 -1
+6 6 1
+6 8 1
+6 10 1
+7 4 12
+8 6 1
+8 8 500
+8 9 400
+8 10 300
+8 11 200
+9 8 400
+10 1 1
+10 4 -13
+10 6 1
+10 8 300
+10 10 10
+10 11 1
+11 10 1
+11 8 200
+11 3 6
+0 0 0
diff --git a/fflas-ffpack/field/field-general.h b/tests/dense_generator.C
similarity index 55%
rename from fflas-ffpack/field/field-general.h
rename to tests/dense_generator.C
index 4837388..11db2d2 100644
--- a/fflas-ffpack/field/field-general.h
+++ b/tests/dense_generator.C
@@ -1,68 +1,66 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* field/field-general
- * This file is part of FFLAS-FFPACK
- * Copyright (C) 2011 Brice Boyer <bboyer at imag.fr>
+//
+/*
+ * Copyright (c) FFLAS-FFPACK
+ * Written by Clement Pernet <clement.pernet at imag.fr>
*
- * ------------------------------------
- *
- *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* ========LICENCE========
- *.
*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <iostream>
-#ifndef __FFLASFFPACK_field_general_H
-#define __FFLASFFPACK_field_general_H
-#include <ostream>
-namespace FFPACK {
+template<class T>
+T& myrand (T& r, long size)
+{
+ if (size < 0)
+ return r = T( (lrand48() % (-size-size)) + size );
+ else
+ return r = T( lrand48() % size ) ;
+};
- template<class T>
- class UnparametricField ;
+int main(int argc, char ** argv)
+{
- template<class T>
- class Modular ;
+ srand48(time(NULL));
+ long ni=10,nj=10,max=100;
+ int offset = 0;
- template<class T>
- class ModularBalanced ;
+ if (argc > ++offset)
+ ni = atoi( argv[offset] );
+ if (argc > ++offset)
+ nj = atoi( argv[offset] );
+ if (argc > ++offset)
+ max = atoi( argv[offset] );
- template<class T>
- std::ostream & operator<<( std::ostream & o, const Modular<T> & F)
- {
- return F.write(o);
- }
+ long tmp;
+ printf("%ld %ld M\n", ni, nj);
+ for (long i = 0; i < ni; ++i)
+ for (long j = 0; j < nj; ++j){
+ printf("%ld %ld %ld\n", i+1, j+1, myrand(tmp, max));
+ }
- template<class T>
- std::ostream & operator<<( std::ostream & o, const ModularBalanced<T> & F)
- {
- return F.write(o);
- }
-
- template<class T>
- std::ostream & operator<<( std::ostream & o, const UnparametricField<T> & F)
- {
- return F.write(o);
- }
+ printf("0 0 0\n");
+ return 0;
}
-
-
-#endif // __FFLASFFPACK_field_general_H
diff --git a/tests/perfpublisher.sh b/tests/perfpublisher.sh
new file mode 100755
index 0000000..2c3c452
--- /dev/null
+++ b/tests/perfpublisher.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+# Script to format tests results into a single xml file.
+# See https://wiki.jenkins-ci.org/display/JENKINS/PerfPublisher+Plugin
+# -----
+# 2014/11/17 - Written by AB <Alexis.Breust at imag.fr>
+
+XMLFILE=$1
+tests=$2
+COMPILER=$3
+
+#=================#
+# Plateform infos #
+#=================#
+
+COMPILERVERSION=$($COMPILER --version 2>&1 | head -1)
+CPUFREQ=$(lscpu | grep "MHz" | rev | cut -f1 -d' ' | rev)
+ARCH=$(uname -m)
+OSNAME=$(uname -s)
+OSVERSION=$(uname -r)
+
+if hash lsb_release 2>/dev/null
+ then DISTRIB=$(lsb_release -ds)
+ else DISTRIB='Unknown distribution'
+fi
+
+#==========#
+# Prologue #
+#==========#
+
+if [[ -f $XMLFILE ]]
+then
+ echo '----> WARNING: File '$XMLFILE' is not empty.'
+ echo '----> Results will be added to its end.'
+fi
+
+#========#
+# Header #
+#========#
+
+echo '<?xml version="1.0" encoding="UTF-8"?>' >> $XMLFILE
+echo '<report name="tests-report" categ="tests">' >> $XMLFILE
+
+#=======#
+# Start #
+#=======#
+
+echo '<start>' >> $XMLFILE
+echo '<date format="YYYYMMDD" val="'$(date +%Y%m%d)'" />' >> $XMLFILE
+echo '<time format="HHMMSS" val="'$(date +%H%M%S)'" />' >> $XMLFILE
+echo '</start>' >> $XMLFILE
+
+#=======#
+# Tests #
+#=======#
+
+for test in $tests
+do
+ if [[ ! -f $test ]]
+ then
+ #File does not exist: compile it
+ echo '[Compiling]' $test
+ COMPILESTART=$(date +%s%3N)
+ COMPILELOG=$(make $test 2>&1; echo 'Returned state: '$?)
+ COMPILEEND=$(date +%s%3N)
+ COMPILETIME=$(($COMPILEEND - $COMPILESTART))
+ COMPILECHECK=$(echo $COMPILELOG | grep -o '[^ ]*$')
+ COMPILETIMERELEVANT='true'
+ else
+ #File does exist
+ echo '[Already compiled]' $benchmark
+ COMPILELOG='(Previously compiled)'
+ COMPILETIME='0.0'
+ COMPILECHECK='0'
+ COMPILETIMERELEVANT='false'
+ fi
+
+ if [[ $COMPILECHECK -ne 0 ]]
+ then
+ #Compilation failure
+ # EXECUTED='no' - keep it to yes so that Jenkins
+ # uses it within its results
+ EXECUTED='yes'
+ PASSED='no'
+ STATE='0'
+ EXECUTIONLOG='(Not executed)'
+ EXECUTIONTIME='0.0'
+ COMPILETIMERELEVANT='false'
+ EXECUTIONTIMERELEVANT='false'
+ ERRORLOG='Does not compile.'
+ echo '-> Does not compile.'
+ else
+ #Compilation success
+ echo '[Executing]' $test
+ EXECUTED='yes'
+ EXECUTIONSTART=$(date +%s%3N)
+ EXECUTIONLOG=$(./$test 2>&1; echo 'Returned state: '$?)
+ EXECUTIONEND=$(date +%s%3N)
+ EXECUTIONTIME=$(($EXECUTIONEND - $EXECUTIONSTART))
+ EXECUTIONCHECK=$(echo $EXECUTIONLOG | grep -o '[^ ]*$')
+
+ if [[ $EXECUTIONCHECK -ne 0 ]]
+ then
+ #Execution failure
+ PASSED='no'
+ STATE='0'
+ EXECUTIONTIMERELEVANT='false'
+ ERRORLOG='Execution failure.'
+ echo '-> Execution failure.'
+ else
+ #Execution success
+ PASSED='yes'
+ STATE='100'
+ EXECUTIONTIMERELEVANT='true'
+ ERRORLOG=''
+ fi
+ fi
+
+ echo '<test name="'$test'" executed="'$EXECUTED'">' >> $XMLFILE
+ echo '<targets><target>TEST</target></targets>' >> $XMLFILE
+ echo '<platform>' >> $XMLFILE
+ echo '<os>' >> $XMLFILE
+ echo '<name><![CDATA['$OSNAME']]></name>' >> $XMLFILE
+ echo '<version><![CDATA['$OSVERSION']]></version>' >> $XMLFILE
+ echo '<distribution><![CDATA['$DISTRIB']]></distribution>' >> $XMLFILE
+ echo '</os>' >> $XMLFILE
+ echo '<processor arch="'$ARCH'">' >> $XMLFILE
+ echo '<frequency unit="MHz" cpufreq="'$CPUFREQ'" />' >> $XMLFILE
+ echo '</processor>' >> $XMLFILE
+ echo '<compiler name="'$COMPILER'" version="'$COMPILERVERSION'" />' >> $XMLFILE
+ echo '</platform>' >> $XMLFILE
+ echo '<result>' >> $XMLFILE
+
+ # Logs
+ echo '<success passed="'$PASSED'" state="'$STATE'" />' >> $XMLFILE
+ echo '<errorlog><![CDATA['$ERRORLOG']]></errorlog>' >> $XMLFILE
+ echo '<log name="Compile output"><![CDATA['"$COMPILELOG"']]></log>' >> $XMLFILE
+ echo '<log name="Execution output"><![CDATA['"$test $EXECUTIONLOG"']]></log>' >> $XMLFILE
+
+ # Times
+ echo '<compiletime unit="ms" mesure="'$COMPILETIME'" isRelevant="'$COMPILETIMERELEVANT'" />' >> $XMLFILE
+ echo '<executiontime unit="ms" mesure="'$EXECUTIONTIME'" isRelevant="'$EXECUTIONTIMERELEVANT'" />' >> $XMLFILE
+
+ echo '</result>' >> $XMLFILE
+ echo '</test>' >> $XMLFILE
+done
+
+#========#
+# Footer #
+#========#
+
+echo '</report>' >> $XMLFILE
+
+#==========#
+# Epilogue #
+#==========#
+
+echo 'Results correctly exported to' $XMLFILE
+
diff --git a/tests/regression-check.C b/tests/regression-check.C
new file mode 100644
index 0000000..cb296e6
--- /dev/null
+++ b/tests/regression-check.C
@@ -0,0 +1,88 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* tests/regression-check.C
+ * Copyright (C) 2014 the FFLAS-FFPACK group
+ *
+ * Written by all reporters of bugs (see ffpack-devel at googlegroups.com)
+ *
+ * ------------------------------------
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <givaro/modular.h>
+#include "fflas-ffpack/fflas-ffpack.h"
+
+/* #1 */
+bool check1 () ;
+
+/* #2 */
+bool check2()
+{
+ Givaro::Modular<double> F(2);
+ Givaro::Modular<double>::RandIter R(F);
+
+ size_t ok = 0 ;
+ size_t tot = 500 ;
+ for (size_t i = 0 ; i < tot ; ++i) {
+ double elt ;
+ R.random(elt);
+ if (elt == 1) ++ok ;
+ }
+ double f = (double) ok / (double) tot ;
+ if (f < 0.3 or f > 0.7) return false ;
+
+ return true ;
+
+}
+
+/* #3 */
+bool check3()
+{
+ Givaro::Modular<double> F(2);
+ double * A = NULL ;
+ double d = FFPACK::Det(F,0,0,A,0);
+ return F.areEqual(d,F.one);
+
+}
+
+/* #4 */
+bool check4()
+{
+ typedef int32_t Element;
+ Givaro::Modular<Element> F(2);
+ Element * A = NULL ;
+ Element * X = NULL ;
+ int nul;
+ FFPACK::Invert2(F,0,A,0,X,0,nul);
+ return true ;
+}
+
+
+int main() {
+ bool pass = true ;
+ pass &= check2();
+ pass &= check3();
+ pass &= check4();
+ return !pass;
+}
+
diff --git a/tests/test-bini-p.C b/tests/test-bini-p.C
new file mode 100644
index 0000000..8e644b6
--- /dev/null
+++ b/tests/test-bini-p.C
@@ -0,0 +1,2457 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2015 the FFLAS-FFPACK group
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *
+ */
+
+
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "test-utils.h"
+#include "assert.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "fflas-ffpack/utils/flimits.h"
+
+#include <givaro/udl.h>
+
+// using namespace FFPACK;
+#define NEWWINO
+// #define NOTRANDOM
+//
+#define DIVIDE_INTO(x,y) (((x) + (y) - 1)/(y))
+
+const int algos = 6 ;
+const int algos_k = 2 ;
+
+using Givaro::Modular;
+using Givaro::ModularBalanced;
+using Givaro::Timer;
+using FFLAS::FieldTraits;
+typedef std::vector<Timer> time_v ;
+typedef std::vector<int> int_v ;
+
+const int selec[] = {
+ 0
+ ,1
+ ,2
+ ,3
+ ,4
+ ,5
+};
+
+const int selec_k[] = {
+ 0
+ ,1
+};
+
+const char * descr[] = {
+ "322 low mem"
+ , "322 first 1"
+ , "322 4 tmp "
+ , "223 low mem"
+ , "232 first 1"
+ , "232 all tmp"
+ , "comp left "
+ , "comp right "
+ // , "322 sqrt "
+};
+
+const char * descr_k[] = {
+ "comp left "
+ , "comp right "
+};
+
+namespace FFLAS { /* compression */
+
+ template<class Elem, int Num>
+ struct Packer ;
+
+ template<>
+ struct Packer<double,2> {
+ uint64_t bits = (limits<double>::digits()/2) ;
+ double base = (double) (1_ui64 << bits) ;
+ uint64_t mask = (1_ui64 << bits) - 1_ui64 ;
+
+ template<class T>
+ void accu(double * p, T * w) {
+ *p *= base ;
+ *p += (double)*w ;
+ }
+ } ;
+
+
+ /* ****** */
+ /* pack */
+ /* ****** */
+
+ /* pack nb words (a,b,c) -> [a|b|c] */
+ template<class wide_T, class pack_T, int Nb>
+ void pack_word( pack_T * packed,
+ const wide_T * words, int32_t stride,
+ Packer<pack_T,Nb> & packer) ;
+
+
+ template<class wide_T>
+ void pack_word/*<wide_T,double,2>*/( double * packed,
+ const wide_T * words, int32_t stride,
+ Packer<double,2> & packer)
+ {
+ // std::cout << "pack " << *words << '+' << *(words+stride) << " * " << (uint64_t) packer.base << " = ";
+ // words += stride ;
+ *packed = (double) *words ;
+ words += stride ;
+ packer.accu(packed,words);
+ // std::cout << (uint64_t) *packed << std::endl;
+ }
+
+ /* pack nb words (a,b) -> [a|b|0] filling with zeros */
+ template<class wide_T, class pack_T, int Nb>
+ void pack_word_part( pack_T * packed, int32_t nb,
+ const wide_T * words, int32_t stride,
+ Packer<pack_T,Nb> & packer) ;
+
+ template<class wide_T>
+ void pack_word_part/* <wide_T,double,2> */( double * packed, int32_t nb,
+ const wide_T * words, int32_t stride,
+ Packer<double,2> & packer)
+ {
+ assert(nb == 1);
+ *packed = (double) *words ;
+ // words += stride ;
+ // packer.accu(packed,words);
+ *packed *= packer.base ;
+ }
+
+ /* ****** */
+ /* unpack */
+ /* ****** */
+
+ template<class wide_T, class pack_T, int Nb>
+ void unpack_word( wide_T * words, int32_t stride,
+ const pack_T * packed,
+ Packer<pack_T,Nb> & packer);
+
+ template<class wide_T>
+ void unpack_word/* <wide_T,double,2> */( wide_T * words, int32_t stride,
+ const double * packed,
+ Packer<double ,2> & packer)
+ {
+ uint64_t pck = (uint64_t) *packed ;
+ words += stride ;
+ *words = (double) (pck & packer.mask) ;
+ words -= stride ;
+ pck >>= packer.bits ;
+ *words = (double) pck /* & packer.mask */ ;
+ }
+
+
+ template<class wide_T, class pack_T, int Nb>
+ void unpack_word_part( wide_T * words, int32_t stride,
+ const pack_T * packed, int32_t nb,
+ Packer<pack_T,Nb> & packer);
+
+ template<class wide_T>
+ void unpack_word_part/* <wide_T,double,2> */( wide_T * words, int32_t stride,
+ const double * packed, int32_t nb,
+ Packer<double,2> & packer)
+ {
+ assert(nb == 1);
+ words += stride ;
+ *words = 0 ;
+ words -= stride ;
+ uint64_t pck = (uint64_t) *packed ;
+ pck >>= packer.bits ;
+ *words = (double)pck /* & packer.mask */ ;
+ }
+
+ /* ****** */
+ /* pack */
+ /* ****** */
+
+ template<class wide_T, class pack_T, int Nb, bool row_packed>
+ void pack_matrix( pack_T * packed, int32_t row_p, int32_t col_p, int32_t ldm_p,
+ const wide_T * elemts, int32_t row_e, int32_t col_e, int32_t ldm_e,
+ Packer<pack_T,Nb> & packer)
+ {
+ if (row_packed == true) {
+ for (int32_t i = 0 ; i < row_e ; i++ ) {
+ const wide_T * e_p = elemts + i * ldm_e ;
+ pack_T * p_p = packed + i * ldm_p ;
+ int32_t j = 0 ;
+ for ( ; j < col_e/Nb*Nb ; j+=Nb, e_p+=Nb, p_p++) {
+ pack_word<wide_T>(p_p,e_p,1,packer);
+
+ }
+ if (j < col_e)
+ pack_word_part<wide_T>(p_p,col_e-j,e_p,1,packer);
+ }
+ }
+ else { /* col_packed */
+ int32_t i = 0 ;
+ int32_t ii = 0 ;
+ for ( ; i < row_e/Nb*Nb ; i += Nb , ii++) {
+ const wide_T * e_p = elemts + i * ldm_e ;
+ pack_T * p_p = packed + ii * ldm_p ;
+ for (int32_t j = 0 ; j < col_e ; j++, e_p++, p_p++) {
+ pack_word<wide_T>(p_p,e_p,ldm_e,packer);
+
+ }
+ }
+ if (i < row_e)
+ pack_word_part<wide_T>(packed+i*ldm_p,row_e-i,elemts+ii*ldm_e,ldm_e,packer);
+
+ }
+ }
+
+ /* ****** */
+ /* unpack */
+ /* ****** */
+
+ template<class wide_T, class pack_T, int Nb, bool row_packed>
+ void unpack_matrix( wide_T * elemts, int32_t row_e, int32_t col_e, int32_t ldm_e,
+ const pack_T * packed, int32_t row_p, int32_t col_p, int32_t ldm_p,
+ Packer<pack_T,Nb> & packer)
+ {
+ if (row_packed == true) {
+ for (int32_t i = 0 ; i < row_e ; i++ ) {
+ wide_T * e_p = elemts + i * ldm_e ;
+ const pack_T * p_p = packed + i * ldm_p ;
+ int32_t j = 0 ;
+ for ( ; j < col_e/Nb*Nb ; j+=Nb, e_p+=Nb, p_p++) {
+ unpack_word<wide_T>(e_p,1,p_p,packer);
+
+ }
+ if (j < col_e)
+ unpack_word_part<wide_T>(e_p,1,p_p,col_e-j,packer);
+ }
+ }
+ else { /* col_packed */
+ int32_t i = 0 ;
+ int32_t ii = 0 ;
+ for ( ; i < row_e/Nb*Nb ; i += Nb , ii++) {
+ wide_T * e_p = elemts + i * ldm_e ;
+ const pack_T * p_p = packed + ii * ldm_p ;
+ for (int32_t j = 0 ; j < col_e ; j++, e_p++, p_p++) {
+ unpack_word<wide_T>(e_p,ldm_e,p_p,packer);
+
+ }
+ }
+ if (i < row_e)
+ unpack_word_part<wide_T>(elemts+i*ldm_e,ldm_e,packed+ii*ldm_p,row_e-i,packer);
+
+ }
+ }
+
+ /* compress A */
+ template<class Field, bool left_compress >
+ void
+ fgemm_compressed(const Field & F,
+ int m, int n, int k,
+ const typename Field::Element * A, int lda,
+ const typename Field::Element * B, int ldb,
+ typename Field::Element * C, int ldc
+ )
+ {
+ Givaro::ZRing<double> NoField;
+ double * A_k, * B_k, * C_k ;
+
+ typedef typename Field::Element elem_t ;
+ Packer<elem_t,2> packer ;
+
+ int m_k = m , n_k = n , lda_k = lda, ldb_k = ldb, ldc_k = ldc ;
+ if (left_compress) {
+ m_k = DIVIDE_INTO(m,2)*2 ;
+ lda_k = m_k ;
+ ldc_k = n ;
+
+ A_k = FFLAS::fflas_new<double>(m_k*k) ;
+ //!@bug don't zero all, just the "border"
+ FFLAS::fzero(NoField,m_k,k,A_k,k);
+
+ B_k = const_cast<typename Field::Element *>(B) ;
+
+ pack_matrix<elem_t,elem_t,2,false>(A_k,m_k,k,lda_k,
+ A,m,k,lda,
+ packer);
+ }
+ else {
+ n_k = DIVIDE_INTO(n,2)*2 ;
+ ldb_k = n_k ;
+ ldc_k = n_k ;
+
+ A_k = const_cast<typename Field::Element *>(A) ;
+ B_k = FFLAS::fflas_new<double>(k*n_k) ;
+ //!@bug don't zero all, just the "border"
+ FFLAS::fzero(NoField,k,n_k,B_k,n_k);
+
+ pack_matrix<elem_t,elem_t,2,true>(B_k,k,n_k,ldb_k,
+ B,k,n,ldb,
+ packer);
+ }
+
+ C_k = FFLAS::fflas_new<double>(m_k*n_k) ;
+ //!@bug don't zero all, just the "border"
+ FFLAS::fzero(NoField,m_k,n_k,C_k,n_k);
+
+ pack_matrix<elem_t,elem_t,2,!left_compress>(C_k,m_k,n_k,ldc_k,
+ C,m,n,ldc,
+ packer);
+
+#if 0
+ double * C_e = FFLAS::fflas_new<double>(m*ldc);
+ unpack_matrix<elem_t,elem_t,2,!left_compress>(C_e,m,n,ldc,
+ C_k,m_k,n_k,ldc_k,
+ packer);
+
+ int faux = 0 ;
+ for (int i = 0 ; i < m ; ++i) {
+ for (int j = 0 ; j < n ; ++j) {
+ if (! (C[i*ldc+j] == C_e[i*ldc+j]) ) {
+ ++faux ;
+ }
+ }
+ }
+ if (faux) {
+ std::cout << "bad pack/unpack ; bad/all = " << faux << '/' << m*n << " ~~ " << (double)faux/(double)(m*n) << std::endl;
+ }
+
+ if (faux && (n<20)) {
+ std::cout << "IN " << std::endl;
+ for (int i = 0 ; i < m ; ++i) {
+ for (int j = 0 ; j < n ; ++j)
+ std::cout << C[i*ldc+j] << ' ';
+ std::cout << std::endl;
+ }
+ std::cout << "OUT" << std::endl;
+ for (int i = 0 ; i < m ; ++i) {
+ for (int j = 0 ; j < n ; ++j)
+ std::cout << C_e[i*ldc+j] << ' ';
+ std::cout << std::endl;
+ }
+
+
+ }
+
+ if (faux)
+ exit(-1);
+#endif
+
+
+
+
+ Givaro::DoubleDomain G ;
+
+ fgemm(G,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,
+ m_k,n_k,k, 1, A_k,lda_k, B_k,ldb_k, 0, C_k, ldc_k);
+
+ // cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,
+ // m_k,n_k,k, 1, A_k,lda_k, B_k,ldb_k, 0, C_k, ldc_k);
+
+
+ unpack_matrix<elem_t,elem_t,2,!left_compress>(C,m,n,ldc,
+ C_k,m_k,n_k,ldc_k,
+ packer);
+
+ if (left_compress)
+ FFLAS::fflas_delete(A_k);
+ else
+ FFLAS::fflas_delete(B_k);
+ FFLAS::fflas_delete(C_k);
+ }
+
+}
+
+namespace FFLAS { /* tools */
+
+
+ template<class Field>
+ void finit_fuzzy(Field & F, size_t m, size_t n, double * C, size_t ldc)
+ {
+
+
+ if (n == ldc)
+ // FFLAS::vectorised::modp<true,true>(C,C,m*n,p,invp,0,p-1);
+ FFLAS::vectorised::modp<Field,true>(F,C,m*n,C);
+ else
+ for (size_t i = 0 ; i < m ; ++i)
+ // FFLAS::vectorised::modp<true,true>(C+i*ldc,C+i*ldc,n,p,invp,0,p-1);
+ FFLAS::vectorised::modp<Field,true>(F,C+i*ldc,n,C+i*ldc);
+ }
+
+
+ // C = a*A + B
+ void add(const size_t m, const size_t n,
+ double a,
+ const double *A, const size_t lda,
+ const double *B, const size_t ldb,
+ double *C, const size_t ldc)
+ {
+ const double *Ai = A,*Bi = B;
+ double *Ci = C;
+ for (;Ai < A+m*lda ; Ai+=lda,Bi+=ldb,Ci+=ldc)
+ for (size_t j = 0 ; j < n ; ++j)
+ Ci[j] = a * Ai[j] + Bi[j];
+ }
+
+ // C = C-(A+B)
+ void subadd(const size_t m, const size_t n,
+ const double *A, const size_t lda,
+ const double *B, const size_t ldb,
+ double *C, const size_t ldc)
+ {
+ const double *Ai = A,*Bi = B;
+ double *Ci = C;
+ for (;Ai < A+m*lda ; Ai+=lda,Bi+=ldb,Ci+=ldc)
+ for (size_t j = 0 ; j < n ; ++j) {
+ Ci[j] = Ci[j] - Ai[j] - Bi[j] ;
+ }
+
+ }
+
+ // C = -(A+B)
+ void negadd(const size_t m, const size_t n,
+ const double *A, const size_t lda,
+ const double *B, const size_t ldb,
+ double *C, const size_t ldc)
+ {
+ const double *Ai = A,*Bi = B;
+ double *Ci = C;
+ for (;Ai < A+m*lda ; Ai+=lda,Bi+=ldb,Ci+=ldc)
+ for (size_t j = 0 ; j < n ; ++j) {
+ Ci[j] = - Ai[j] - Bi[j] ;
+ }
+
+ }
+
+
+ // C = C+A-B
+ void addsub(const size_t m, const size_t n,
+ const double *A, const size_t lda,
+ const double *B, const size_t ldb,
+ double *C, const size_t ldc)
+ {
+ const double *Ai = A,*Bi = B;
+ double *Ci = C;
+ for (;Ai < A+m*lda ; Ai+=lda,Bi+=ldb,Ci+=ldc)
+ for (size_t j = 0 ; j < n ; ++j) {
+ Ci[j] = Ci[j] + Ai[j] - Bi[j] ;
+ }
+
+ }
+
+
+ // C = (C+B)/e
+ template<class Field>
+ void addscalinf(const Field & F, const size_t m, const size_t n,
+ const double *B, const size_t ldb,
+ double e,
+ double *C, const size_t ldc)
+ {
+ const double * Bi = B;
+ double * Ci = C;
+ for (;Bi < B+m*ldb ; Ci+=ldc, Bi += ldb)
+ for (size_t j = 0 ; j < n ; ++j)
+ Ci[j]= (Ci[j]+Bi[j])*e ;
+ // F.init( Ci[j], (Ci[j]+Bi[j])/e );
+
+ }
+
+ // C = (C-B)/e
+ template<class Field>
+ void subscalinf(const Field & F, const size_t m, const size_t n,
+ const double *B, const size_t ldb,
+ double e,
+ double *C, const size_t ldc)
+ {
+ const double * Bi = B;
+ double * Ci = C;
+ for (;Bi < B+m*ldb ; Ci+=ldc, Bi += ldb)
+ for (size_t j = 0 ; j < n ; ++j)
+ Ci[j]= (Ci[j]-Bi[j])*e ;
+ // F.init( Ci[j], (Ci[j]-Bi[j])/e );
+
+ }
+
+ // C = (D-B)/e
+ template<class Field>
+ void subscal(const Field & F, const size_t m, const size_t n,
+ const double *D, const size_t ldd,
+ const double *B, const size_t ldb,
+ double e,
+ double *C, const size_t ldc)
+ {
+ const double * Bi = B;
+ const double * Di = D;
+ double * Ci = C;
+ for (;Bi < B+m*ldb ; Ci+=ldc, Bi += ldb, Di += ldd)
+ for (size_t j = 0 ; j < n ; ++j)
+ Ci[j] = (Di[j]-Bi[j])*e ;
+
+ }
+
+ // C = (D+B)/e
+ template<class Field>
+ void addscal(const Field & F, const size_t m, const size_t n,
+ const double *D, const size_t ldd,
+ const double *B, const size_t ldb,
+ double e,
+ double *C, const size_t ldc)
+ {
+ const double * Bi = B;
+ const double * Di = D;
+ double * Ci = C;
+ for (;Bi < B+m*ldb ; Ci+=ldc, Bi += ldb, Di += ldd)
+ for (size_t j = 0 ; j < n ; ++j)
+ Ci[j] = (Di[j]+Bi[j])*e ;
+
+ }
+
+ // C = C + (D-B)/e
+ template<class Field>
+ void subscalacc(const Field & F, const size_t m, const size_t n,
+ const double *D, const size_t ldd,
+ const double *B, const size_t ldb,
+ double e,
+ double *C, const size_t ldc)
+ {
+ const double * Bi = B;
+ const double * Di = D;
+ double * Ci = C;
+ for (;Bi < B+m*ldb ; Ci+=ldc, Bi += ldb, Di += ldd)
+ for (size_t j = 0 ; j < n ; ++j)
+ Ci[j] += (Di[j]-Bi[j])*e ;
+
+ }
+
+#ifndef TRE
+ // #ifndef NDEBUG
+// #define TRE 1
+ // #else
+ #define TRE (size_t)(__FFLASFFPACK_WINOTHRESHOLD)
+ // #define TRE (size_t)(__FFLASFFPACK_WINOTHRESHOLD*0.9)
+ // #endif
+#endif
+ template<class Field>
+ double * gemm_fflas(const Field & F,
+ const size_t m, const size_t n, const size_t k,
+ const double *A, size_t lda,
+ const double *B, size_t ldb,
+ double * C, size_t ldc,
+ int rec = 0)
+ {
+ Givaro::DoubleDomain R;
+ FFLAS::fgemm(R,
+ FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,
+ m,n,k,
+ 1,
+ A,lda, B,ldb,
+ 0,
+ C, ldc);
+
+ // cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,
+ // m,n,k,1,A,lda,B,ldb,0,C,ldc);
+
+ return C;
+ }
+} // FFLAS
+
+namespace FFLAS { namespace Protected { namespace Rec {
+
+ // Field must be Givaro::Modular<double>
+ template<class Field>
+ double *
+ gemm_bini_322_0(const Field & F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const double *A , const size_t lda
+ , const double *B , const size_t ldb
+ , double *C , const size_t ldc
+ , int rec
+ , const double & epsilon
+ )
+ {
+ Givaro::ZRing<double> NoField;
+ // const double p = (double)F.characteristic();
+ size_t M = (n>m)?std::min(k,m):std::min(k,n);
+ // std::cout << rec << ',' << M << std::endl;
+ // Field G(p*p);
+
+ if ( M < TRE || rec <= 0) {
+ return gemm_fflas(F, m,n,k, A,lda, B,ldb, C, ldc);
+ }
+
+ assert(k/2*2==k); // k divisible par 2
+ assert(n/2*2==n); // n divisible par 2
+ assert(m/3*3==m); // m divisible par 3
+
+
+ size_t n2 = n/2;
+ size_t k2 = k/2;
+ size_t m3 = m/3;
+
+ // std::cout << "€ = " << epsilon << std::endl;
+
+ // sub matrices in A
+ const double * A11 = A;
+ const double * A12 = A +k2;
+ const double * A21 = A +lda*m3;
+ const double * A22 = A21 +k2;
+ const double * A31 = A21 +lda*m3;
+ const double * A32 = A31 +k2;
+
+ // sub matrices in C
+ double * C11 = C;
+ double * C12 = C +n2;
+ double * C21 = C +ldc*m3;
+ double * C22 = C21 +n2;
+ double * C31 = C21 +ldc*m3;
+ double * C32 = C31 +n2;
+
+ // sub matrices in B
+ const double * B11 = B;
+ const double * B12 = B +n2;
+ const double * B21 = B +ldb*k2;
+ const double * B22 = B21 +n2;
+
+ FFLAS::fzero(NoField,m,n,C,ldc);
+
+ /*
+ * Algo :
+ * S1 := A11 +A22;
+ * S4 := e*A12+A22;
+ * S5 := A11 +e*A12;
+ * S6 := A21 +A32;
+ * S9 := A21 +e*A31;
+ * S10 := e*A31+A32;
+ *
+ * T1 := e*B11 +B22;
+ * T2 := B21 +B22;
+ * T4 := -e*B11+B21;
+ * T5 := e*B12 +B22;
+ * T6 := B11 +e*B22;
+ * T7 := B11 +B12;
+ * T9 := B12 -e*B22;
+ * T10 := B11 +e*B21;
+ *
+ * P1 := S1 *T1;
+ * P2 := A22*T2;
+ * P3 := A11*B22;
+ * P4 := S4 *T4;
+ * P5 := S5 *T5;
+ * P6 := S6 *T6;
+ * P7 := A21*T7;
+ * P8 := A32*B11;
+ * P9 := S9 *T9;
+ * P10:= S10*T10;
+ *
+ * C11 := (P1-P2-P3+P4)/e;
+ * C12 := (P3-P5)/(-e) ;
+ * C21 := P4+P6-P10 ;
+ * C22 := P1-P5+P9;
+ * C31 := (-P8+P10)/e;
+ * C32 := (P6-P7-P8+P9)/e;
+ *
+ */
+
+ double * S1 = FFLAS::fflas_new<double>(m3*k2) ;
+ // double * C11t = FFLAS::fflas_new<double>(n2*m3) ;
+ // S1 := A11 +A22;
+ FFLAS::fadd(NoField,m3,k2,A11,lda,A22,lda,S1,k2);
+ // T1 := e*B11 +B22;
+ double * T1 = FFLAS::fflas_new<double>(n2*k2) ; // ou aire
+ add(k2,n2,epsilon,B11,ldb,B22,ldb,T1,n2);
+ // P1 := S1 *T1; (dans C22)
+ gemm_bini_322_0(F,m3,n2,k2,S1,k2,T1,n2,C22,ldc,rec-1,epsilon);
+ // S4 := e*A12+A22;
+ double * eA12 = FFLAS::fflas_new<double >(m3*k2);
+ FFLAS::fscal(NoField,m3,k2,epsilon,A12,lda,eA12,k2) ;
+ FFLAS::fadd(NoField,m3,k2,eA12,k2,A22,lda,S1,k2);
+ // T4 := -e*B11+B21;
+ add(k2,n2,-epsilon,B11,ldb,B21,ldb,T1,n2);
+ // P4 := S4 *T4; (dans C21)
+ gemm_bini_322_0(F,m3,n2,k2,S1,k2,T1,n2,C21,ldc,rec-1,epsilon);
+ // C11 = P1+P4
+ FFLAS::fadd(NoField,m3,n2,C21,ldc,C22,ldc,C11,ldc);
+ // T2 := B21 +B22;
+ FFLAS::fadd(NoField,k2,n2,B21,ldb,B22,ldb,T1,n2);
+ // P2 := A22*T2;
+ double * P1 = FFLAS::fflas_new<double>(n2*m3) ; // ou aire
+ gemm_bini_322_0(F,m3,n2,k2,A22,lda,T1,n2,P1,n2,rec-1,epsilon);
+ // P3 := A11*B22; (dans C12)
+ gemm_bini_322_0(F,m3,n2,k2,A11,lda,B22,ldb,C12,ldc,rec-1,epsilon);
+ // C11 -= (P2+P3)
+ subadd(m3,n2,P1,n2,C12,ldc,C11,ldc);
+ // S5 := A11 +e*A12;
+ FFLAS::fadd(NoField,m3,k2,eA12,k2,A11,lda,S1,k2);
+ // T5 := e*B12 +B22;
+ add(k2,n2,epsilon,B12,ldb,B22,ldb,T1,n2);
+ // P5 := S5 *T5;
+ double * P2 = FFLAS::fflas_new<double>(n2*m3) ; // ou aire
+ gemm_bini_322_0(F,m3,n2,k2,S1,k2,T1,n2,P2,n2,rec-1,epsilon);
+ // C12 -= P5
+ subscalinf(NoField,m3,n2,P2,n2,-(double)1/epsilon,C12,ldc);
+ // S6 := A21 +A32;
+ FFLAS::fadd(NoField,m3,k2,A21,lda,A32,lda,S1,k2);
+ // T6 := B11 +e*B22;
+ add(k2,n2,epsilon,B22,ldb,B11,ldb,T1,n2);
+ // P6 := S6 *T6; dans C32
+ gemm_bini_322_0(F,m3,n2,k2,S1,k2,T1,n2,C32,ldc,rec-1,epsilon);
+ // C21+= P6
+ FFLAS::faddin(NoField,m3,n2,C32,ldc,C21,ldc);
+ // T7 := B11 +B12;
+ FFLAS::fadd(NoField,k2,n2,B11,ldb,B12,ldb,T1,n2);
+ // P7 := A21*T7; !signe
+ gemm_bini_322_0(F,m3,n2,k2,A21,lda,T1,n2,P1,n2,rec-1,epsilon);
+ // P8 := A32*B11; dans C31 !signe
+ gemm_bini_322_0(F,m3,n2,k2,A32,lda,B11,ldb,C31,ldc,rec-1,epsilon);
+ // C32 -= P8+P7
+ subadd(m3,n2,P1,n2,C31,ldc,C32,ldc);
+ // S9 := A21 +e*A31;
+ double * eA31 = eA12 ;
+ FFLAS::fscal(NoField,m3,k2,epsilon,A31,lda,eA31,k2);
+ FFLAS::fadd(NoField,m3,k2,eA31,k2,A21,lda,S1,k2);
+ // T9 := B12 -e*B22;
+ add(k2,n2,-epsilon,B22,ldb,B12,ldb,T1,n2);
+ // P9 := S9 *T9;
+ gemm_bini_322_0(F,m3,n2,k2,S1,k2,T1,n2,P1,n2,rec-1,epsilon);
+ // C32= (C32+P9)/p
+ addscalinf(NoField,m3,n2,P1,n2,(double)1/epsilon,C32,ldc);
+ // C22+= P9-P5
+ addsub(m3,n2,P1,n2,P2,n2,C22,ldc);
+ FFLAS::fflas_delete( P2);
+ // S10 := e*A31+A32;
+ FFLAS::fadd(NoField,m3,k2,eA31,k2,A32,lda,S1,k2);
+ FFLAS::fflas_delete( eA12 );
+ // T10 := B11 +e*B21;
+ add(k2,n2,epsilon,B21,ldb,B11,ldb,T1,n2);
+ // P10:= S10*T10;
+ gemm_bini_322_0(F,m3,n2,k2,S1,k2,T1,n2,P1,n2,rec-1,epsilon);
+ FFLAS::fflas_delete( S1);
+ FFLAS::fflas_delete( T1);
+ // C21-= P10
+ FFLAS::fsubin(NoField,m3,n2,P1,n2,C21,ldc);
+ // C31= (C31-P10)/(-epsilon)
+ subscalinf(NoField,m3,n2,P1,n2,-(double)1/epsilon,C31,ldc);
+ FFLAS::fflas_delete( P1);
+ // C11 := (P1+P-P3+P4)/e;
+ FFLAS::fscalin(NoField,m3,n2,(double)1/epsilon,C11,ldc);
+
+ return C;
+
+ }
+
+ // Field must be Givaro::Modular<double>
+ template<class Field>
+ double *
+ gemm_bini_322_mem(const Field & F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const double *A , const size_t lda
+ , const double *B , const size_t ldb
+ , double *C , const size_t ldc
+ , int rec
+ , const double & epsilon
+ )
+ {
+ Givaro::ZRing<double> NoField;
+ // const double p = (double)F.characteristic();
+ size_t M = (n>m)?std::min(k,m):std::min(k,n);
+ // std::cout << rec << ',' << M << std::endl;
+ // Field G(p*p);
+
+ if ( M < TRE || rec <= 0) {
+ // std::cout << "ffw" << std::endl;
+ return gemm_fflas(F, m,n,k, A,lda, B,ldb, C, ldc);
+ // return gemm_fflas(NoField, m,n,k, A,lda, B,ldb, C, ldc);
+ }
+
+ assert(k/2*2==k); // k divisible par 2
+ assert(n/2*2==n); // n divisible par 2
+ assert(m/3*3==m); // m divisible par 3
+
+ // std::cout << "tested" << std::endl;
+
+ size_t n2 = n/2;
+ size_t k2 = k/2;
+ size_t m3 = m/3;
+
+ // std::cout << "€ = " << epsilon << std::endl;
+
+ // sub matrices in A
+ const double * A11 = A;
+ const double * A12 = A +k2;
+ const double * A21 = A +lda*m3;
+ const double * A22 = A21 +k2;
+ const double * A31 = A21 +lda*m3;
+ const double * A32 = A31 +k2;
+
+ // sub matrices in C
+ double * C11 = C;
+ double * C12 = C +n2;
+ double * C21 = C +ldc*m3;
+ double * C22 = C21 +n2;
+ double * C31 = C21 +ldc*m3;
+ double * C32 = C31 +n2;
+
+ // sub matrices in B
+ const double * B11 = B;
+ const double * B12 = B +n2;
+ const double * B21 = B +ldb*k2;
+ const double * B22 = B21 +n2;
+
+ FFLAS::fzero(F,m,n,C,ldc);
+
+ /*
+ * Algo :
+ * S1 := A11 +A22;
+ * S4 := e*A12+A22;
+ * S5 := A11 +e*A12;
+ * S6 := A21 +A32;
+ * S9 := A21 +e*A31;
+ * S3 := e*A31+A32;
+ *
+ * T1 := e*B11 +B22;
+ * T2 := B21 +B22;
+ * T4 := -e*B11+B21;
+ * T5 := e*B12 +B22;
+ * T6 := B11 +e*B22;
+ * T7 := B11 +B12;
+ * T9 := B12 -e*B22;
+ * T3 := B11 +e*B21;
+ *
+ * P1 := S1 *T1;
+ * P2 := A22*T2;
+ * P10 := A11*B22;
+ * P4 := S4 *T4;
+ * P5 := S5 *T5;
+ * P6 := S6 *T6;
+ * P7 := A21*T7;
+ * P8 := A32*B11;
+ * P9 := S9 *T9;
+ * P3:= S3*T3;
+ *
+ * C11 := (P1-P2-P10+P4)/e;
+ * C12 := (P10-P5)/(-e) ;
+ * C21 := P4+P6-P3 ;
+ * C22 := P1-P5+P9;
+ * C31 := (-P8+P3)/e;
+ * C32 := (P6-P7-P8+P9)/e;
+ *
+ */
+
+
+ // P10
+ gemm_bini_322_mem(F,m3,n2,k2,A11,lda,B22,ldb,C11,ldc,rec-1,epsilon);
+ // S5
+ double * X = FFLAS::fflas_new<double>(m3*k2);
+ add(m3,k2,epsilon,A12,lda,A11,lda,X,k2);
+ // T5
+ // double * Y = FFLAS::fflas_new<double>(std::max(k2,m3)*n2);
+ double * Y = FFLAS::fflas_new<double>(k2*n2);
+ add(k2,n2,epsilon,B12,ldb,B22,ldb,Y,n2);
+ // P5
+ gemm_bini_322_mem(F,m3,n2,k2,X,k2,Y,n2,C22,ldc,rec-1,epsilon);
+ // C12
+ subscal(NoField,m3,n2,C22,ldc,C11,ldc,(double)1/epsilon,C12,ldc);
+ // T2
+ FFLAS::fadd(NoField,k2,n2,B21,ldb,B22,ldb,Y,n2);
+ // P2
+ gemm_bini_322_mem(F,m3,n2,k2,A22,lda,Y,n2,C31,ldc,rec-1,epsilon);
+ // C11
+ FFLAS::faddin(NoField,m3,n2,C31,ldc,C11,ldc);
+ // S1
+ FFLAS::fadd(NoField,m3,k2,A11,lda,A22,lda,X,k2);
+ // T1
+ add(k2,n2,epsilon,B11,ldb,B22,ldb,Y,n2);
+ // P1
+ gemm_bini_322_mem(F,m3,n2,k2,X,k2,Y,n2,C21,ldc,rec-1,epsilon);
+ // C22
+ FFLAS::fsub(NoField,m3,n2,C21,ldc,C22,ldc,C22,ldc);
+ // C11
+ FFLAS::fsub(NoField,m3,n2,C21,ldc,C11,ldc,C11,ldc);
+ // S4
+ add(m3,k2,epsilon,A12,lda,A22,lda,X,k2);
+ // T4
+ add(k2,n2,-epsilon,B11,ldb,B21,ldb,Y,n2);
+ // P4
+ gemm_bini_322_mem(F,m3,n2,k2,X,k2,Y,n2,C21,ldc,rec-1,epsilon);
+ // C11
+ addscalinf(NoField,m3,n2,C21,ldc,(double)1/epsilon,C11,ldc);
+ // S9
+ add(m3,k2,epsilon,A31,lda,A21,lda,X,k2);
+ // T9
+ add(k2,n2,-epsilon,B22,ldb,B12,ldb,Y,n2);
+ // P9
+ gemm_bini_322_mem(F,m3,n2,k2,X,k2,Y,n2,C32,ldc,rec-1,epsilon);
+ // C22
+ FFLAS::faddin(NoField,m3,n2,C32,ldc,C22,ldc);
+ // S6
+ FFLAS::fadd(NoField,m3,k2,A21,lda,A32,lda,X,k2);
+ // T6
+ add(k2,n2,epsilon,B22,ldb,B11,ldb,Y,n2);
+ // P6
+ gemm_bini_322_mem(F,m3,n2,k2,X,k2,Y,n2,C31,ldc,rec-1,epsilon);
+ // C21
+ FFLAS::faddin(NoField,m3,n2,C31,ldc,C21,ldc);
+ // C32
+ FFLAS::faddin(NoField,m3,n2,C31,ldc,C32,ldc);
+ // T7
+ FFLAS::fadd(NoField,k2,n2,B11,ldb,B12,ldb,Y,n2);
+ // P7
+ gemm_bini_322_mem(F,m3,n2,k2,A21,lda,Y,n2,C31,ldc,rec-1,epsilon);
+ // if (epsilon > 1 && rec == 2) { FFLAS::finit(G,m3,n2,C31,ldc);}
+ // C32
+ FFLAS::fsubin(NoField,m3,n2,C31,ldc,C32,ldc);
+ // S3
+ add(m3,k2,epsilon,A31,lda,A32,lda,X,k2);
+ // T3
+ add(k2,n2,epsilon,B21,ldb,B11,ldb,Y,n2);
+ // P3
+ gemm_bini_322_mem(F,m3,n2,k2,X,k2,Y,n2,C31,ldc,rec-1,epsilon);
+ FFLAS::fflas_delete( X);
+ FFLAS::fflas_delete( Y );
+ // C21
+ FFLAS::fsubin(NoField,m3,n2,C31,ldc,C21,ldc);
+ // P8
+ Y = FFLAS::fflas_new<double>(m3*n2);
+ gemm_bini_322_mem(F,m3,n2,k2,A32,lda,B11,ldb,Y,n2,rec-1,epsilon);
+ // C31
+ subscalinf(NoField,m3,n2,Y,n2,(double)1/epsilon,C31,ldc);
+ // FFLAS::fsubin(NoField,m3,n2,Y,n2,C31,ldc);
+ // C32
+ subscalinf(NoField,m3,n2,Y,n2,(double)1/epsilon,C32,ldc);
+ // FFLAS::fsubin(NoField,m3,n2,Y,n2,C32,ldc);
+ // FFLAS::fscalin(NoField,m3,n,(double)1/epsilon,C31,ldc);
+ FFLAS::fflas_delete( Y );
+
+
+ return C;
+
+ }
+
+ // Field must be Givaro::Modular<double>
+ template<class Field>
+ double *
+ gemm_bini_223_mem(const Field & F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const double *A , const size_t lda
+ , const double *B , const size_t ldb
+ , double *C , const size_t ldc
+ , int rec
+ , const double & epsilon
+ )
+ {
+ Givaro::ZRing<double> NoField;
+ // const double p = (double)F.characteristic();
+ size_t M = (n>m)?std::min(k,m):std::min(k,n);
+ // std::cout << rec << ',' << M << std::endl;
+ // Field G(p*p);
+
+ if ( M < TRE || rec <= 0) {
+ // std::cout << "ffw" << std::endl;
+ return gemm_fflas(F, m,n,k, A,lda, B,ldb, C, ldc);
+ // return gemm_fflas(NoField, m,n,k, A,lda, B,ldb, C, ldc);
+ }
+
+ assert(k/2*2==k); // k divisible par 2
+ assert(n/3*3==n); // n divisible par 2
+ assert(m/2*2==m); // m divisible par 3
+
+ // std::cout << "tested" << std::endl;
+
+ size_t m2 = m/2;
+ size_t k2 = k/2;
+ size_t n3 = n/3;
+
+ // std::cout << "€ = " << epsilon << std::endl;
+
+ // sub matrices in A
+ const double * A11 = A;
+ const double * A12 = A +k2;
+ const double * A21 = A +lda*m2;
+ const double * A22 = A21 +k2;
+
+ // sub matrices in C
+ double * C11 = C;
+ double * C12 = C +n3;
+ double * C13 = C +2*n3;
+ double * C21 = C +ldc*m2;
+ double * C22 = C21 +n3;
+ double * C23 = C21 +2*n3;
+
+
+
+ // sub matrices in B
+ const double * B11 = B;
+ const double * B12 = B +n3;
+ const double * B13 = B +2*n3;
+ const double * B21 = B +ldb*k2;
+ const double * B22 = B21 +n3;
+ const double * B23 = B21 +2*n3;
+
+
+ FFLAS::fzero(F,m,n,C,ldc);
+
+ /*
+ * Algo :
+ * S1 := B11 +B22;
+ * S4 := e*B21+B22;
+ * S5 := B11 +e*B21;
+ * S6 := B12 +B23;
+ * S9 := B12 +e*B13;
+ * S3 := e*B13+B23;
+ *
+ * T1 := e*A11 +A22;
+ * T2 := A12 +A22;
+ * T4 := -e*A11+A12;
+ * T5 := e*A21 +A22;
+ * T6 := A11 +e*A22;
+ * T7 := A11 +A21;
+ * T9 := A21 -e*A22;
+ * T3 := A11 +e*A12;
+ *
+ * P1 := S1 *T1;
+ * P2 := T2 * B22;
+ * P10 := A22 * B11;
+ * P4 := S4 *T4;
+ * P5 := S5 *T5;
+ * P6 := S6 *T6;
+ * P7 := T7*B12;
+ * P8 := A11*B23;
+ * P9 := S9 *T9;
+ * P3 := S3*T3;
+ *
+ * C11 := (P1-P2-P10+P4)/e;
+ * C21 := (P10-P5)/(-e) ;
+ * C12 := P4+P6-P3 ;
+ * C22 := P1-P5+P9;
+ * C13 := (-P8+P3)/e;
+ * C23 := (P6-P7-P8+P9)/e;
+ *
+ */
+
+
+ // P10
+ gemm_bini_223_mem(F,m2,n3,k2,A22,lda,B11,ldb,C11,ldc,rec-1,epsilon);
+ // S5
+ double * Y = FFLAS::fflas_new<double>(k2*n3);
+ add(k2,n3,epsilon,B21,ldb,B11,ldb,Y,n3);
+ // T5
+ double * X = FFLAS::fflas_new<double>(m2*k2);
+ add(m2,k2,epsilon,A21,lda,A22,lda,X,k2);
+ // P5
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C22,ldc,rec-1,epsilon);
+ // C12
+ subscal(NoField,m2,n3,C22,ldc,C11,ldc,(double)1/epsilon,C21,ldc);
+ // T2
+ FFLAS::fadd(NoField,m2,k2,A12,lda,A22,lda,X,k2);
+ // P2
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,B22,ldb,C13,ldc,rec-1,epsilon);
+ // C11
+ FFLAS::faddin(NoField,m2,n3,C13,ldc,C11,ldc);
+ // S1
+ FFLAS::fadd(NoField,k2,n3,B11,ldb,B22,ldb,Y,n3);
+ // T1
+ add(m2,k2,epsilon,A11,lda,A22,lda,X,k2);
+ // P1
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C12,ldc,rec-1,epsilon);
+ // C22
+ FFLAS::fsub(NoField,m2,n3,C12,ldc,C22,ldc,C22,ldc);
+ // C11
+ FFLAS::fsub(NoField,m2,n3,C12,ldc,C11,ldc,C11,ldc);
+ // S4
+ add(k2,n3,epsilon,B21,ldb,B22,ldb,Y,n3);
+ // T4
+ add(m2,k2,-epsilon,A11,lda,A12,lda,X,k2);
+ // P4
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C12,ldc,rec-1,epsilon);
+ // C11
+ addscalinf(NoField,m2,n3,C12,ldc,(double)1/epsilon,C11,ldc);
+ // S9
+ add(k2,n3,epsilon,B13,ldb,B12,ldb,Y,n3);
+ // T9
+ add(m2,k2,-epsilon,A22,lda,A21,lda,X,k2);
+ // P9
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C23,ldc,rec-1,epsilon);
+ // C22
+ FFLAS::faddin(NoField,m2,n3,C23,ldc,C22,ldc);
+ // S6
+ FFLAS::fadd(NoField,k2,n3,B12,ldb,B23,ldb,Y,n3);
+ // T6
+ add(m2,k2,epsilon,A22,lda,A11,lda,X,k2);
+ // P6
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C13,ldc,rec-1,epsilon);
+ // C21
+ FFLAS::faddin(NoField,m2,n3,C13,ldc,C12,ldc);
+ // C32
+ FFLAS::faddin(NoField,m2,n3,C13,ldc,C23,ldc);
+ // T7
+ FFLAS::fadd(NoField,m2,k2,A11,lda,A21,lda,X,k2);
+ // P7
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,B12,ldb,C13,ldc,rec-1,epsilon);
+ // if (epsilon > 1 && rec == 2) { FFLAS::finit(G,m2,n3,C31,ldc);}
+ // C32
+ FFLAS::fsubin(NoField,m2,n3,C13,ldc,C23,ldc);
+ // S3
+ add(k2,n3,epsilon,B13,ldb,B23,ldb,Y,n3);
+ // T3
+ add(m2,k2,epsilon,A12,lda,A11,lda,X,k2);
+ // P3
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C13,ldc,rec-1,epsilon);
+ FFLAS::fflas_delete( Y );
+ FFLAS::fflas_delete( X );
+ // C21
+ FFLAS::fsubin(NoField,m2,n3,C13,ldc,C12,ldc);
+ // P8
+ Y = FFLAS::fflas_new<double>(m2*n3);
+ gemm_bini_223_mem(F,m2,n3,k2,A11,lda,B23,ldb,Y,n3,rec-1,epsilon);
+ // C31
+ subscalinf(NoField,m2,n3,Y,n3,(double)1/epsilon,C13,ldc);
+ // C32
+ subscalinf(NoField,m2,n3,Y,n3,(double)1/epsilon,C23,ldc);
+ FFLAS::fflas_delete( Y );
+
+
+ return C;
+
+ }
+
+ // Field must be Givaro::Modular<double>
+ template<class Field>
+ double *
+ gemm_bini_322_2(const Field & F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const double *A , const size_t lda
+ , const double *B , const size_t ldb
+ , double *C , const size_t ldc
+ , int rec
+ , const double & epsilon
+ )
+ {
+ Givaro::ZRing<double> NoField;
+ // const double p = (double)F.characteristic();
+ size_t M = (n>m)?std::min(k,m):std::min(k,n);
+ // std::cout << rec << ',' << M << std::endl;
+ // Field G(p*p);
+
+ if ( M < TRE || rec <= 0) {
+ // std::cout << "ffw" << std::endl;
+ return gemm_fflas(F, m,n,k, A,lda, B,ldb, C, ldc);
+ // return gemm_fflas(NoField, m,n,k, A,lda, B,ldb, C, ldc);
+ }
+
+ assert(k/2*2==k); // k divisible par 2
+ assert(n/2*2==n); // n divisible par 2
+ assert(m/3*3==m); // m divisible par 3
+
+ // std::cout << "tested" << std::endl;
+
+ size_t n2 = n/2;
+ size_t k2 = k/2;
+ size_t m3 = m/3;
+
+ // std::cout << "€ = " << epsilon << std::endl;
+
+ // sub matrices in A
+ const double * A11 = A;
+ const double * A12 = A +k2;
+ const double * A21 = A +lda*m3;
+ const double * A22 = A21 +k2;
+ const double * A31 = A21 +lda*m3;
+ const double * A32 = A31 +k2;
+
+ // sub matrices in C
+ double * C11 = C;
+ double * C12 = C +n2;
+ double * C21 = C +ldc*m3;
+ double * C22 = C21 +n2;
+ double * C31 = C21 +ldc*m3;
+ double * C32 = C31 +n2;
+
+ // sub matrices in B
+ const double * B11 = B;
+ const double * B12 = B +n2;
+ const double * B21 = B +ldb*k2;
+ const double * B22 = B21 +n2;
+
+ FFLAS::fzero(F,m,n,C,ldc);
+
+ /*
+ * Algo :
+ * S1 := A11 +A22;
+ * S4 := e*A12+A22;
+ * S5 := A11 +e*A12;
+ * S3 := e*A31+A32;
+ * S6 := A21 +A32;
+ * S9 := A21 +e*A31;
+ *
+ * T1 := e*B11 +B22;
+ * T2 := B21 +B22;
+ * T3 := B11 +e*B21;
+ * T4 := -e*B11+B21;
+ * T5 := e*B12 +B22;
+ * T6 := B11 +e*B22;
+ * T7 := B11 +B12;
+ * T9 := B12 -e*B22;
+ *
+ * P1 := S1 *T1;
+ * P2 := A22*T2;
+ * P10 := A11*B22;
+ * P4 := S4 *T4;
+ * P5 := S5 *T5;
+ * P6 := S6 *T6;
+ * P7 := A21*T7;
+ * P8 := A32*B11;
+ * P9 := S9 *T9;
+ * P3:= S3*T3;
+ *
+ * C11 := (P1-P2-P10+P4)/e;
+ * C12 := (P10-P5)/(-e) ;
+ * C21 := P4+P6-P3 ;
+ * C22 := P1-P5+P9;
+ * C31 := (-P8+P3)/e;
+ * C32 := (P6-P7-P8+P9)/e;
+ *
+ */
+
+ double * U = FFLAS::fflas_new<double>(m3*n2);
+ double * V = FFLAS::fflas_new<double>(m3*n2);
+ double * X = FFLAS::fflas_new<double>(m3*std::max(k2,n2));
+ double * Y = FFLAS::fflas_new<double>(std::max(k2,m3)*n2);
+
+ // S4
+ add(m3,k2,epsilon,A12,lda,A22,lda,X,k2);
+ // T4
+ add(k2,n2,-epsilon,B11,ldb,B21,ldb,Y,n2);
+ // P4
+ gemm_bini_322_2(F,m3,n2,k2,X,k2,Y,n2,U,n2,rec-1,epsilon);
+ // S9
+ add(m3,k2,epsilon,A31,lda,A21,lda,X,k2);
+ // T9
+ add(k2,n2,-epsilon,B22,ldb,B12,ldb,Y,n2);
+ // P9
+ gemm_bini_322_2(F,m3,n2,k2,X,k2,Y,n2,V,n2,rec-1,epsilon);
+ // S5
+ add(m3,k2,epsilon,A12,lda,A11,lda,X,k2);
+ // T5
+ add(k2,n2,epsilon,B12,ldb,B22,ldb,Y,n2);
+ // P5
+ gemm_bini_322_2(F,m3,n2,k2,X,k2,Y,n2,C12,ldc,rec-1,epsilon);
+ // S3
+ add(m3,k2,epsilon,A31,lda,A32,lda,X,k2);
+ // T3
+ add(k2,n2,epsilon,B21,ldb,B11,ldb,Y,n2);
+ // P3
+ gemm_bini_322_2(F,m3,n2,k2,X,k2,Y,n2,C31,ldc,rec-1,epsilon);
+ // C22 = P9-P5
+ FFLAS::fsub(NoField,m3,n2,V,n2,C12,ldc,C22,ldc);
+ // C21 = P4-P3
+ FFLAS::fsub(NoField,m3,n2,U,n2,C31,ldc,C21,ldc);
+ // T2
+ FFLAS::fadd(NoField,k2,n2,B21,ldb,B22,ldb,Y,n2);
+ // P2
+ gemm_bini_322_2(F,m3,n2,k2,A22,lda,Y,n2,X,n2,rec-1,epsilon);
+ // XXX approximate
+ // C11 = (P4 - P2) / e
+ subscal(NoField,m3,n2,U,n2,X,n2,1./epsilon,C11,ldc);
+ // T7
+ FFLAS::fadd(NoField,k2,n2,B11,ldb,B12,ldb,Y,n2);
+ // P7
+ gemm_bini_322_2(F,m3,n2,k2,A21,lda,Y,n2,X,n2,rec-1,epsilon);
+ // XXX approximate
+ // C32 = (P9-P7) / e
+ subscal(NoField,m3,n2,V,n2,X,n2,1./epsilon,C32,ldc);
+ // S1
+ FFLAS::fadd(NoField,m3,k2,A11,lda,A22,lda,X,k2);
+ // T1
+ add(k2,n2,epsilon,B11,ldb,B22,ldb,Y,n2);
+ // P1
+ gemm_bini_322_2(F,m3,n2,k2,X,k2,Y,n2,U,n2,rec-1,epsilon);
+ // C22 += P1
+ FFLAS::faddin(NoField,m3,n2,U,n2,C22,ldc);
+ // P10
+ gemm_bini_322_2(F,m3,n2,k2,A11,lda,B22,ldb,V,n2,rec-1,epsilon);
+ // C12 = (P5-P10)/e
+ subscalinf(NoField,m3,n2,V,n2,1./epsilon,C12,ldc);
+ // XXX approximate
+ // C11 = C11 + (P1-P10)/e
+ subscalacc(NoField,m3,n2,U,n2,V,n2,1./epsilon,C11,ldc);
+ // S6
+ FFLAS::fadd(NoField,m3,k2,A21,lda,A32,lda,X,k2);
+ // T6
+ add(k2,n2,epsilon,B22,ldb,B11,ldb,Y,n2);
+ // P6
+ gemm_bini_322_2(F,m3,n2,k2,X,k2,Y,n2,U,n2,rec-1,epsilon);
+ // C21 += P6
+ FFLAS::faddin(NoField,m3,n2,U,n2,C21,ldc);
+ // P8
+ gemm_bini_322_2(F,m3,n2,k2,A32,lda,B11,ldb,V,n2,rec-1,epsilon);
+ // C31 = (P3-P8)/2
+ subscalinf(NoField,m3,n2,V,n2,1./epsilon,C31,ldc);
+ // XXX approximate
+ // C32 = C32 + (P6-P8)/e
+ subscalacc(NoField,m3,n2,U,n2,V,n2,1./epsilon,C32,ldc);
+
+
+ FFLAS::fflas_delete( X);
+ FFLAS::fflas_delete( Y );
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( V);
+
+
+ return C;
+
+ }
+
+
+ // Field must be Givaro::Modular<double>
+ template<class Field>
+ double *
+ gemm_bini_232_2(const Field & F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const double *A , const size_t lda
+ , const double *B , const size_t ldb
+ , double *C , const size_t ldc
+ , int rec
+ , const double & epsilon
+ )
+ {
+ Givaro::ZRing<double> NoField;
+ // const double p = (double)F.characteristic();
+ size_t M = (n>m)?std::min(k,m):std::min(k,n);
+ // Field G(p*p);
+
+ if ( M < TRE || rec <= 0) {
+ // std::cout << "ffw" << std::endl;
+ return gemm_fflas(F, m,n,k, A,lda, B,ldb, C, ldc);
+ // return gemm_fflas(NoField, m,n,k, A,lda, B,ldb, C, ldc);
+ }
+
+ assert(k/3*3==k); // k divisible par 3
+ assert(n/2*2==n); // n divisible par 2
+ assert(m/2*2==m); // m divisible par 2
+
+ // std::cout << "tested" << std::endl;
+
+ size_t n2 = n/2;
+ size_t k3 = k/3;
+ size_t m2 = m/2;
+
+ // std::cout << "€ = " << epsilon << std::endl;
+
+ // sub matrices in B
+ const double * B11 = B;
+ const double * B12 = B +n2;
+ const double * B21 = B +ldb*k3;
+ const double * B22 = B21 +n2;
+ const double * B31 = B21 +ldb*k3;
+ const double * B32 = B31 +n2;
+
+ // sub matrices in C
+ double * C11 = C;
+ double * C12 = C +n2;
+ double * C21 = C +ldc*m2;
+ double * C22 = C21 +n2;
+
+ // sub matrices in A
+
+ const double * A11 = A;
+ const double * A12 = A +k3;
+ const double * A13 = A +2*k3;
+ const double * A21 = A +lda*m2;
+ const double * A22 = A21 +k3;
+ const double * A23 = A21 +2*k3;
+
+
+ FFLAS::fzero(F,m,n,C,ldc);
+
+ /*
+ * Algo :
+ *
+ * S1 := A11 +A22*e;
+ * S3 := -(A11+A21);
+ * S4 := A11+A12*e;
+ * S5 := A21 - A22*e;
+ * S6 := A12*e + A23;
+ * S8 := -(A13+A23):
+ * S9 := A22*e + A23;
+ * S10 := -A12*e+A13;
+ *
+ * T1 := B11 +B22;
+ * T4 := e*B12+B22;
+ * T5 := B11 +e*B12;
+ * T6 := B21 +B32;
+ * T9 := B21 + e*B31;
+ * T10 := e*B31 +B32;
+ *
+ * P1 := Bini232(S1,T1 ,e);
+ * P2 := Bini232(A11,B22 ,e);
+ * P3 := Bini232(S3,B11,e);
+ * P4 := Bini232(S4,T4 ,e);
+ * P5 := Bini232(S5,T5 ,e);
+ * P6 := Bini232(S6,T6 ,e);
+ * P7 := Bini232(A23,B21 ,e);
+ * P8 := Bini232(S8,B32,e);
+ * P9 := Bini232(S9,T9 ,e);
+ * P10:= Bini232(S10,T10,e);
+ *
+ *
+ * C11 := evalm(P1-P4+(P6-P7+P8+P10)/e);
+ * C12 := evalm((-P2+P4)/e+P10) ;
+ * C21 := evalm(P5+(-P7+P9)/e) ;
+ * C22 := evalm((P1-P2+P3+P5)/e+P6-P9);
+ *
+ */
+
+ double * U = FFLAS::fflas_new<double>(m2*n2);
+ double * V = FFLAS::fflas_new<double>(m2*n2);
+ double * X = FFLAS::fflas_new<double>(m2*k3);
+ double * Y = FFLAS::fflas_new<double>(k3*n2);
+
+ // S1
+ add(m2,k3,epsilon,A22,lda,A11,lda,X,k3);
+ // T1
+ FFLAS::fadd(NoField,k3,n2,B11,ldb,B22,ldb,Y,n2);
+ // P1 (in U)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,U,n2,rec-1,epsilon);
+ // S3
+ negadd(m2,k3,A11,lda,A21,lda,X,k3);
+ // P3 (in V)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,B11,ldb,V,n2,rec-1,epsilon);
+ // C22 = (P1+P3)/e
+ // FFLAS::fadd(NoField,m2,n2,U,n2,V,n2,C22,ldc); // XXX acc
+ addscal(NoField,m2,n2,U,n2,V,n2,(double)1/epsilon,C22,ldc);
+ // S6
+ add(m2,k3,epsilon,A12,lda,A23,lda,X,k3);
+ // T6
+ FFLAS::fadd(NoField,k3,n2,B21,ldb,B32,ldb,Y,n2);
+ // P6 (in V)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,V,n2,rec-1,epsilon);
+ // C22 += P6
+ FFLAS::faddin(NoField,m2,n2,V,n2,C22,ldc);
+ // S8
+ negadd(m2,k3,A13,lda,A23,lda,X,k3);
+ // P8 (in C11)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,B32,ldb,C11,ldc,rec-1,epsilon);
+ // C11 = (P8+P6)/e
+ addscalinf(NoField,m2,n2,V,n2,(double)1/epsilon,C11,ldc);
+ // C11 += P1
+ FFLAS::faddin(NoField,m2,n2,U,n2,C11,ldc);
+ // S4
+ add(m2,k3,epsilon,A12,lda,A11,lda,X,k3);
+ // T4
+ add(k3,n2,epsilon,B12,ldb,B22,ldb,Y,n2);
+ // P4 (in U)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,U,n2,rec-1,epsilon);
+ // C11 -= P4
+ FFLAS::fsubin(NoField,m2,n2,U,n2,C11,ldc);
+ // P2 (in C12)
+ gemm_bini_232_2(F,m2,n2,k3,A11,lda,B22,ldb,C12,ldc,rec-1,epsilon);
+ // S5
+ add(m2,k3,-epsilon,A22,lda,A21,lda,X,k3);
+ // T5
+ add(k3,n2,epsilon,B12,ldb,B11,ldb,Y,n2);
+ // P5 (in V)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,V,n2,rec-1,epsilon);
+ // C22 += (P5-P2)/e
+ subscalacc(NoField,m2,n2,V,n2,C12,ldc,(double)1/epsilon,C22,ldc);
+ // C12 = (P4-P2)/e
+ subscalinf(NoField,m2,n2,U,n2,-(double)1/epsilon,C12,ldc);
+ // S9
+ add(m2,k3,epsilon,A22,lda,A23,lda,X,k3);
+ // T9
+ add(k3,n2,epsilon,B31,ldb,B21,ldb,Y,n2);
+ // P9 (in U)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,U,n2,rec-1,epsilon);
+ // C22 -= P9
+ FFLAS::fsubin(NoField,m2,n2,U,n2,C22,ldc);
+ // P7 (in C21)
+ gemm_bini_232_2(F,m2,n2,k3,A23,lda,B21,ldb,C21,ldc,rec-1,epsilon);
+ // C11 = C11 - P7/e
+ add(m2,n2,-(double)1/epsilon,C21,ldc,C11,ldc,C11,ldc);
+ // C21 = (P9-P7)/e
+ subscalinf(NoField,m2,n2,U,n2,-(double)1/epsilon,C21,ldc);
+ // C21 += P5
+ FFLAS::faddin(NoField,m2,n2,V,n2,C21,ldc);
+ // S10
+ add(m2,k3,-epsilon,A12,lda,A13,lda,X,k3);
+ // T10
+ add(k3,n2,epsilon,B31,ldb,B32,ldb,Y,n2);
+ // P10 (in U)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,U,n2,rec-1,epsilon);
+ // C12 += P10
+ FFLAS::faddin(NoField,m2,n2,U,n2,C12,ldc);
+ // C11 += P10/e
+ add(m2,n2,(double)1/epsilon,U,n2,C11,ldc,C11,ldc);
+
+
+ FFLAS::fflas_delete( X );
+ FFLAS::fflas_delete( Y );
+ FFLAS::fflas_delete( U );
+ FFLAS::fflas_delete( V );
+
+
+ return C;
+
+ }
+
+ template<class Field>
+ double *
+ gemm_bini_232_3_acc(const Field & F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const double *A , const size_t lda
+ , const double *B , const size_t ldb
+ , double *C , const size_t ldc
+ , int rec
+ , const double & epsilon
+ )
+ {
+ if (rec != 0)
+ exit(-1);
+ Givaro::DoubleDomain R;
+ FFLAS::fgemm(R,
+ FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,
+ m,n,k,
+ 1,
+ A,lda, B,ldb,
+ 1,
+ C, ldc);
+
+
+ }
+
+ template<class Field>
+ double *
+ gemm_bini_232_3(const Field & F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const double *A , const size_t lda
+ , const double *B , const size_t ldb
+ , double *C , const size_t ldc
+ , int rec
+ , const double & epsilon
+ )
+ {
+ Givaro::ZRing<double> NoField;
+ // const double p = (double)F.characteristic();
+ size_t M = (n>m)?std::min(k,m):std::min(k,n);
+ // Field G(p*p);
+
+ if ( M < TRE || rec <= 0) {
+ // std::cout << "ffw" << std::endl;
+ return gemm_fflas(F, m,n,k, A,lda, B,ldb, C, ldc);
+ // return gemm_fflas(NoField, m,n,k, A,lda, B,ldb, C, ldc);
+ }
+
+ assert(k/3*3==k); // k divisible par 3
+ assert(n/2*2==n); // n divisible par 2
+ assert(m/2*2==m); // m divisible par 2
+
+ // std::cout << "tested" << std::endl;
+
+ size_t n2 = n/2;
+ size_t k3 = k/3;
+ size_t m2 = m/2;
+
+ // std::cout << "€ = " << epsilon << std::endl;
+
+ // sub matrices in B
+ const double * B11 = B;
+ const double * B12 = B +n2;
+ const double * B21 = B +ldb*k3;
+ const double * B22 = B21 +n2;
+ const double * B31 = B21 +ldb*k3;
+ const double * B32 = B31 +n2;
+
+ // sub matrices in C
+ double * C11 = C;
+ double * C12 = C +n2;
+ double * C21 = C +ldc*m2;
+ double * C22 = C21 +n2;
+
+ // sub matrices in A
+
+ const double * A11 = A;
+ const double * A12 = A +k3;
+ const double * A13 = A +2*k3;
+ const double * A21 = A +lda*m2;
+ const double * A22 = A21 +k3;
+ const double * A23 = A21 +2*k3;
+
+
+ FFLAS::fzero(F,m,n,C,ldc);
+
+ /*
+ * Algo :
+ *
+ * S1 := A11 +A22*e;
+ * S3 := -(A11+A21);
+ * S4 := A11+A12*e;
+ * S5 := A21 - A22*e;
+ * S6 := A12*e + A23;
+ * S8 := -(A13+A23):
+ * S9 := A22*e + A23;
+ * S10 := -A12*e+A13;
+ *
+ * T1 := B11 +B22;
+ * T4 := e*B12+B22;
+ * T5 := B11 +e*B12;
+ * T6 := B21 +B32;
+ * T9 := B21 + e*B31;
+ * T10 := e*B31 +B32;
+ *
+ * P1 := Bini232(S1,T1 ,e);
+ * P2 := Bini232(A11,B22 ,e);
+ * P3 := Bini232(S3,B11,e);
+ * P4 := Bini232(S4,T4 ,e);
+ * P5 := Bini232(S5,T5 ,e);
+ * P6 := Bini232(S6,T6 ,e);
+ * P7 := Bini232(A23,B21 ,e);
+ * P8 := Bini232(S8,B32,e);
+ * P9 := Bini232(S9,T9 ,e);
+ * P10:= Bini232(S10,T10,e);
+ *
+ *
+ * C11 := evalm(P1-P4+(P6-P7+P8+P10)/e);
+ * C12 := evalm((-P2+P4)/e+P10) ;
+ * C21 := evalm(P5+(-P7+P9)/e) ;
+ * C22 := evalm((P1-P2+P3+P5)/e+P6-P9);
+ *
+ */
+
+ // could be just one band for the scalings
+
+
+
+ double * U = FFLAS::fflas_new<double>(m2*n2);
+ double * V = FFLAS::fflas_new<double>(std::max(k3,m2)*n2);
+ double * X = FFLAS::fflas_new<double>(m2*k3);
+ double * Y = FFLAS::fflas_new<double>(k3*n2);
+
+ // S1
+ double * eA22 = FFLAS::fflas_new<double>(std::max(m2,n2)*k3);
+ FFLAS::fscal(NoField,m2,k3,epsilon,A22,lda,eA22,k3);
+ FFLAS::fadd(NoField,m2,k3,eA22,k3,A11,lda,X,k3);
+ // T1
+ FFLAS::fadd(NoField,k3,n2,B11,ldb,B22,ldb,Y,n2);
+ // P1 (in U)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,U,n2,rec-1,epsilon);
+ // S3
+ negadd(m2,k3,A11,lda,A21,lda,X,k3);
+ // P3 (in V)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,B11,ldb,V,n2,rec-1,epsilon);
+ // C22 = (P1+P3)/e
+ addscal(NoField,m2,n2,U,n2,V,n2,(double)1/epsilon,C22,ldc);
+ // S6
+ double * eA12 = FFLAS::fflas_new<double>(m2*k3);
+ FFLAS::fscal(NoField,m2,k3,epsilon,A12,lda,eA12,k3);
+ FFLAS::fadd(NoField,m2,k3,eA12,k3,A23,lda,X,k3);
+ // T6
+ FFLAS::fadd(NoField,k3,n2,B21,ldb,B32,ldb,Y,n2);
+ // P6 (in V)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,V,n2,rec-1,epsilon);
+ // C22 += P6
+ FFLAS::faddin(NoField,m2,n2,V,n2,C22,ldc);
+ // S8
+ negadd(m2,k3,A13,lda,A23,lda,X,k3);
+ // P8 (in C11)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,B32,ldb,C11,ldc,rec-1,epsilon);
+ // C11 = (P8+P6)/e
+ addscalinf(NoField,m2,n2,V,n2,(double)1/epsilon,C11,ldc);
+ // C11 += P1
+ FFLAS::faddin(NoField,m2,n2,U,n2,C11,ldc);
+ // S4
+ FFLAS::fadd(NoField,m2,k3,eA12,k3,A11,lda,X,k3);
+ // T4
+ double * eB12 = V ; // FFLAS::fflas_new<double>(n2*k3);
+ FFLAS::fscal(NoField,k3,n2,epsilon,B12,ldb,eB12,n2);
+ FFLAS::fadd(NoField,k3,n2,eB12,n2,B22,ldb,Y,n2);
+ // P4 (in U)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,U,n2,rec-1,epsilon);
+ // C11 -= P4
+ FFLAS::fsubin(NoField,m2,n2,U,n2,C11,ldc);
+ // P2 (in C12)
+ gemm_bini_232_2(F,m2,n2,k3,A11,lda,B22,ldb,C12,ldc,rec-1,epsilon);
+ // S5
+ FFLAS::fsub(NoField,m2,k3,A21,lda,eA22,k3,X,k3);
+ // T5
+ FFLAS::fadd(NoField,k3,n2,eB12,n2,B11,ldb,Y,n2);
+ // FFLAS::fflas_delete( eB12);
+ // P5 (in V)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,V,n2,rec-1,epsilon);
+ // C22 += (P5-P2)/e
+ subscalacc(NoField,m2,n2,V,n2,C12,ldc,(double)1/epsilon,C22,ldc);
+ // C12 = (P4-P2)/e
+ subscalinf(NoField,m2,n2,U,n2,-(double)1/epsilon,C12,ldc);
+ // S9
+ FFLAS::fadd(NoField,m2,k3,eA22,k3,A23,lda,X,k3);
+ double * eB31 = eA22 ;
+ FFLAS::fscal(NoField,k3,n2,epsilon,B31,ldb,eB31,n2);
+ // T9
+ FFLAS::fadd(NoField,k3,n2,eB31,n2,B21,ldb,Y,n2);
+ // P9 (in U)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,U,n2,rec-1,epsilon);
+ // C22 -= P9
+ FFLAS::fsubin(NoField,m2,n2,U,n2,C22,ldc);
+ // P7 (in C21)
+ gemm_bini_232_2(F,m2,n2,k3,A23,lda,B21,ldb,C21,ldc,rec-1,epsilon);
+ // C11 = C11 - P7/e
+ add(m2,n2,-(double)1/epsilon,C21,ldc,C11,ldc,C11,ldc);
+ // C21 = (P9-P7)/e
+ subscalinf(NoField,m2,n2,U,n2,-(double)1/epsilon,C21,ldc);
+ // C21 += P5
+ FFLAS::faddin(NoField,m2,n2,V,n2,C21,ldc);
+ // S10
+ FFLAS::fsub(NoField,m2,k3,A13,lda,eA12,k3,X,k3);
+ FFLAS::fflas_delete( eA12);
+ // T10
+ FFLAS::fadd(NoField,k3,n2,eB31,n2,B32,ldb,Y,n2);
+ FFLAS::fflas_delete( eA22);
+ // P10 (in U)
+ gemm_bini_232_2(F,m2,n2,k3,X,k3,Y,n2,U,n2,rec-1,epsilon);
+ // C12 += P10
+ FFLAS::faddin(NoField,m2,n2,U,n2,C12,ldc);
+ // C11 += P10/e
+ add(m2,n2,(double)1/epsilon,U,n2,C11,ldc,C11,ldc);
+
+
+ FFLAS::fflas_delete( X );
+ FFLAS::fflas_delete( Y );
+ FFLAS::fflas_delete( U );
+ FFLAS::fflas_delete( V );
+
+
+ return C;
+
+ }
+
+#if 0
+ template<class Field>
+ double *
+ gemm_bini_322_sqrt(const Field & F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const double *A , const size_t lda
+ , const double *B , const size_t ldb
+ , double *C , const size_t ldc
+ , int rec
+ , const double & epsilon
+ )
+ {
+ Givaro::ZRing<double> NoField;
+ // const double p = (double)F.characteristic();
+ size_t M = (n>m)?std::min(k,m):std::min(k,n);
+ // std::cout << rec << ',' << M << std::endl;
+ // Field G(p*p);
+
+ if ( M < TRE || rec <= 0) {
+ // std::cout << "ffw" << std::endl;
+ return gemm_fflas(F, m,n,k, A,lda, B,ldb, C, ldc);
+ // return gemm_fflas(NoField, m,n,k, A,lda, B,ldb, C, ldc);
+ }
+
+ assert(k/2*2==k); // k divisible par 2
+ assert(n/3*3==n); // n divisible par 2
+ assert(m/2*2==m); // m divisible par 3
+
+ // std::cout << "tested" << std::endl;
+
+ size_t m2 = m/2;
+ size_t k2 = k/2;
+ size_t n3 = n/3;
+
+ // std::cout << "€ = " << epsilon << std::endl;
+
+ // sub matrices in A
+ const double * A11 = A;
+ const double * A12 = A +k2;
+ const double * A21 = A +lda*m2;
+ const double * A22 = A21 +k2;
+
+ // sub matrices in C
+ double * C11 = C;
+ double * C12 = C +n3;
+ double * C13 = C +2*n3;
+ double * C21 = C +ldc*m2;
+ double * C22 = C21 +n3;
+ double * C23 = C21 +2*n3;
+
+
+
+ // sub matrices in B
+ const double * B11 = B;
+ const double * B12 = B +n3;
+ const double * B13 = B +2*n3;
+ const double * B21 = B +ldb*k2;
+ const double * B22 = B21 +n3;
+ const double * B23 = B21 +2*n3;
+
+
+ FFLAS::fzero(F,m,n,C,ldc);
+
+ /*
+ * Algo :
+ * S1 := B11 +B22;
+ * S4 := e*B21+B22;
+ * S5 := B11 +e*B21;
+ * S6 := B12 +B23;
+ * S9 := B12 +e*B13;
+ * S3 := e*B13+B23;
+ *
+ * T1 := e*A11 +A22;
+ * T2 := A12 +A22;
+ * T4 := -e*A11+A12;
+ * T5 := e*A21 +A22;
+ * T6 := A11 +e*A22;
+ * T7 := A11 +A21;
+ * T9 := A21 -e*A22;
+ * T3 := A11 +e*A12;
+ *
+ * P1 := S1 *T1;
+ * P2 := T2 * B22;
+ * P10 := A22 * B11;
+ * P4 := S4 *T4;
+ * P5 := S5 *T5;
+ * P6 := S6 *T6;
+ * P7 := T7*B12;
+ * P8 := A11*B23;
+ * P9 := S9 *T9;
+ * P3 := S3*T3;
+ *
+ * C11 := (P1-P2-P10+P4)/e;
+ * C21 := (P10-P5)/(-e) ;
+ * C12 := P4+P6-P3 ;
+ * C22 := P1-P5+P9;
+ * C13 := (-P8+P3)/e;
+ * C23 := (P6-P7-P8+P9)/e;
+ *
+ */
+
+
+ // P10
+ gemm_bini_223_mem(F,m2,n3,k2,A22,lda,B11,ldb,C11,ldc,rec-1,epsilon);
+ // S5
+ double * Y = FFLAS::fflas_new<double>(k2*n3);
+ add(k2,n3,epsilon,B21,ldb,B11,ldb,Y,n3);
+ // T5
+ double * X = FFLAS::fflas_new<double>(m2*k2);
+ add(m2,k2,epsilon,A21,lda,A22,lda,X,k2);
+ // P5
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C22,ldc,rec-1,epsilon);
+ // C12
+ subscal(NoField,m2,n3,C22,ldc,C11,ldc,(double)1/epsilon,C21,ldc);
+ // T2
+ FFLAS::fadd(NoField,m2,k2,A12,lda,A22,lda,X,k2);
+ // P2
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,B22,ldb,C13,ldc,rec-1,epsilon);
+ // C11
+ FFLAS::faddin(NoField,m2,n3,C13,ldc,C11,ldc);
+ // S1
+ FFLAS::fadd(NoField,k2,n3,B11,ldb,B22,ldb,Y,n3);
+ // T1
+ add(m2,k2,epsilon,A11,lda,A22,lda,X,k2);
+ // P1
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C12,ldc,rec-1,epsilon);
+ // C22
+ FFLAS::fsub(NoField,m2,n3,C12,ldc,C22,ldc,C22,ldc);
+ // C11
+ FFLAS::fsub(NoField,m2,n3,C12,ldc,C11,ldc,C11,ldc);
+ // S4
+ add(k2,n3,epsilon,B21,ldb,B22,ldb,Y,n3);
+ // T4
+ add(m2,k2,-epsilon,A11,lda,A12,lda,X,k2);
+ // P4
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C12,ldc,rec-1,epsilon);
+ // C11
+ addscalinf(NoField,m2,n3,C12,ldc,(double)1/epsilon,C11,ldc);
+ // S9
+ add(k2,n3,epsilon,B13,ldb,B12,ldb,Y,n3);
+ // T9
+ add(m2,k2,-epsilon,A22,lda,A21,lda,X,k2);
+ // P9
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C23,ldc,rec-1,epsilon);
+ // C22
+ FFLAS::faddin(NoField,m2,n3,C23,ldc,C22,ldc);
+ // S6
+ FFLAS::fadd(NoField,k2,n3,B12,ldb,B23,ldb,Y,n3);
+ // T6
+ add(m2,k2,epsilon,A22,lda,A11,lda,X,k2);
+ // P6
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C13,ldc,rec-1,epsilon);
+ // C21
+ FFLAS::faddin(NoField,m2,n3,C13,ldc,C12,ldc);
+ // C32
+ FFLAS::faddin(NoField,m2,n3,C13,ldc,C23,ldc);
+ // T7
+ FFLAS::fadd(NoField,m2,k2,A11,lda,A21,lda,X,k2);
+ // P7
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,B12,ldb,C13,ldc,rec-1,epsilon);
+ // if (epsilon > 1 && rec == 2) { FFLAS::finit(G,m2,n3,C31,ldc);}
+ // C32
+ FFLAS::fsubin(NoField,m2,n3,C13,ldc,C23,ldc);
+ // S3
+ add(k2,n3,epsilon,B13,ldb,B23,ldb,Y,n3);
+ // T3
+ add(m2,k2,epsilon,A12,lda,A11,lda,X,k2);
+ // P3
+ gemm_bini_223_mem(F,m2,n3,k2,X,k2,Y,n3,C13,ldc,rec-1,epsilon);
+ FFLAS::fflas_delete( Y );
+ FFLAS::fflas_delete( X );
+ // C21
+ FFLAS::fsubin(NoField,m2,n3,C13,ldc,C12,ldc);
+ // P8
+ Y = FFLAS::fflas_new<double>(m2*n3);
+ gemm_bini_223_mem(F,m2,n3,k2,A11,lda,B23,ldb,Y,n3,rec-1,epsilon);
+ // C31
+ subscalinf(NoField,m2,n3,Y,n3,(double)1/epsilon,C13,ldc);
+ // C32
+ subscalinf(NoField,m2,n3,Y,n3,(double)1/epsilon,C23,ldc);
+ FFLAS::fflas_delete( Y );
+
+
+ return C;
+
+ }
+#endif
+
+
+} // Rec
+} // Protected
+} // FFLAS
+
+namespace FFLAS { namespace Protected {
+
+ template<class Field>
+ typename Field::Element *
+ gemm_bini_p(const Field &F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const typename Field::Element *A
+ , const size_t lda
+ , const typename Field::Element *B
+ , const size_t ldb
+ , typename Field::Element *C
+ , const size_t ldc
+ , int rec
+ , size_t algo
+ )
+ {
+
+ assert(k/6*6==k); // k divisible par 6
+ assert(n/6*6==n); // n divisible par 6
+ assert(m/6*6==m); // m divisible par 6
+
+ // e-formule
+ double epsilon = (double) F.characteristic() ;
+ switch(algo) {
+ case 0 :
+ Rec::gemm_bini_322_mem(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ FFLAS::finit_fuzzy(F,m,n,C,ldc);
+ // FFLAS::finit(F,m,n,C,ldc);
+ break;
+ case 1 :
+ Rec::gemm_bini_322_0(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ FFLAS::finit_fuzzy(F,m,n,C,ldc);
+ // FFLAS::finit(F,m,n,C,ldc);
+ break;
+ case 2 :
+ Rec::gemm_bini_322_2(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ FFLAS::finit_fuzzy(F,m,n,C,ldc);
+ break;
+ case 3 :
+ Rec::gemm_bini_223_mem(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ FFLAS::finit_fuzzy(F,m,n,C,ldc);
+ // FFLAS::finit(F,m,n,C,ldc);
+ break;
+ case 4 :
+ Rec::gemm_bini_232_2(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ FFLAS::finit_fuzzy(F,m,n,C,ldc);
+ break;
+ case 5 :
+ Rec::gemm_bini_232_3(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ FFLAS::finit_fuzzy(F,m,n,C,ldc);
+ break;
+#if 0
+ case 8 : {
+ double epsilon2 = sqrt((double)epsilon);
+ std::cout << epsilon2 << std::endl;
+ Rec::gemm_bini_322_sqrt(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon2);
+ // FFLAS::finit_fuzzy(F,m,n,C,ldc);
+ for(size_t i = 0 ; i < m ; ++i) {
+ for(size_t j = 0 ; j < n ; ++j)
+ C[i*ldc+j] = rint(fmod(C[i*ldc+j],epsilon2));
+ }
+ break;
+ }
+#endif
+ default :
+ std::cout << " not an algo :" << algo << std::endl;;
+ exit(-1);
+ }
+
+
+
+ return C;
+
+ }
+
+ template<class Field>
+ typename Field::Element *
+ gemm_bini_e(const Field &F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const typename Field::Element *A
+ , const size_t lda
+ , const typename Field::Element *B
+ , const size_t ldb
+ , typename Field::Element *C
+ , const size_t ldc
+ , int rec
+ , size_t algo
+ )
+ {
+
+ assert(k/2*2==k); // k divisible par 2
+ assert(n/2*2==n); // n divisible par 2
+ assert(m/3*3==m); // m divisible par 3
+
+ // e-formule
+ double epsilon = 1./(1<<27);
+ switch(algo) {
+ case 0 :
+ Rec::gemm_bini_322_mem(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ break;
+ case 1 :
+ Rec::gemm_bini_322_0(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ break;
+ case 2 :
+ Rec::gemm_bini_322_2(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ break;
+ case 3 :
+ Rec::gemm_bini_223_mem(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ break;
+ case 4 :
+ Rec::gemm_bini_232_2(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ break;
+ case 5 :
+ Rec::gemm_bini_232_3(F,m,n,k,A,lda,B,ldb,C,ldc,rec,epsilon);
+ break;
+ default :
+ std::cout << " not an algo :" << algo << std::endl;;
+ exit(-1);
+ }
+
+
+ // vire les e.
+ // FFLAS::finit_fuzzy(F,m,n,C,ldc);
+ FFLAS::finit_fuzzy(F,m,n,C,ldc);
+
+ return C;
+
+ }
+
+ template<class Field>
+ typename Field::Element *
+ gemm_compress(const Field &F
+ , const size_t m
+ , const size_t n
+ , const size_t k
+ , const typename Field::Element *A
+ , const size_t lda
+ , const typename Field::Element *B
+ , const size_t ldb
+ , typename Field::Element *C
+ , const size_t ldc
+ , int rec
+ , size_t algo
+ )
+ {
+
+ assert(k/6*6==k); // k divisible par 6
+ assert(n/6*6==n); // n divisible par 6
+ assert(m/6*6==m); // m divisible par 6
+
+ switch(algo) {
+ case 0 :
+ fgemm_compressed<Field,true>(F,(int)m,(int)n,(int)k,A,(int)lda,B,(int)ldb,C,(int)ldc);
+ FFLAS::freduce(F,m,n,C,ldc);
+ break;
+ case 1 :
+ fgemm_compressed<Field,false>(F,(int)m,(int)n,(int)k,A,(int)lda,B,(int)ldb,C,(int)ldc);
+ FFLAS::freduce(F,m,n,C,ldc);
+ break;
+ default :
+ std::cout << " not an algo :" << algo << std::endl;;
+ exit(-1);
+ }
+
+
+
+ return C;
+
+ }
+
+} // Protected
+} // FFLAS
+
+template<class Field>
+void check_equal(const Field & F,int m,int n,
+ typename Field::Element * D,int ldd,
+ typename Field::Element * E,int lde,
+ const char * nomalgo, const char * madescr, int & ok_p)
+{
+ int faux = 0 ;
+ for (int i = 0 ; i < m ; ++i) {
+ for (int j = 0 ; j < n ; ++j) {
+ if (!F.areEqual(D[i*ldd+j],E[i*lde+j])) {
+ ++faux ;
+ }
+ }
+ }
+ if (faux) {
+ std::cout << nomalgo << " " << madescr << " : bad/all = " << faux << '/' << m*n << " ~~ " << (double)faux/(double)(m*n) << std::endl;
+ }
+ else ok_p ++ ;
+
+
+#if 1
+ if (faux && (n<20)) {
+ std::cout << "OK" << std::endl;
+ for (int i = 0 ; i < m ; ++i) {
+ for (int j = 0 ; j < n ; ++j)
+ std::cout << D[i*ldd+j] << ' ';
+ std::cout << std::endl;
+ }
+ std::cout << "KO" << std::endl;
+ for (int i = 0 ; i < m ; ++i) {
+ for (int j = 0 ; j < n ; ++j)
+ std::cout << E[i*lde+j] << ' ';
+ std::cout << std::endl;
+ }
+
+
+ std::cout << "Diff" << std::endl;
+ for (int i = 0 ; i < m ; ++i) {
+ for (int j = 0 ; j < n ; ++j)
+ std::cout << D[i*ldd+j]-E[i*lde+j] << ' ';
+ std::cout << std::endl;
+ }
+ }
+#endif
+}
+
+
+template<class Field>
+void test_algos(const Field &F, int m, int n, int k
+ , const typename Field::Element * A, int lda
+ , const typename Field::Element * B, int ldb
+ , int r
+ , time_v & tim_k, time_v & tim_e , time_v & tim_p
+ , int_v & ok_k, int_v & ok_e, int_v & ok_p
+ , FFLAS::Timer & tim_wd, int & nb_wd
+ , bool with_e
+ , bool with_k
+ )
+{
+ FFLAS::Timer tmp ;
+ typedef typename Field::Element Element;
+
+ Element * D = FFLAS::fflas_new<Element>(m*n);
+ Element * C = FFLAS::fflas_new<Element>(m*n);
+
+ tmp.clear();tmp.start();
+ fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,
+ m,n,k, 1, A,k, B,n, 0, D, n);
+ tmp.stop(); tim_wd += tmp ; nb_wd ++;
+
+ /* bini_p */
+ if (with_e) {
+ for (int algo = 0 ; algo < algos ; ++algo) {
+ tmp.clear();tmp.start();
+ FFLAS::Protected::gemm_bini_e(F,m,n,k,A,k,B,n,C,n,r,selec[algo]);
+ tmp.stop(); tim_e[algo] += tmp ;
+
+ /* checking */
+ check_equal(F,m,n,D,n,C,n,"bini_e",descr[algo],ok_e[algo]) ;
+ }
+ }
+
+ /* compress */
+ if (with_k && std::is_same<typename FieldTraits<Field>::category,FFLAS::FieldCategories::ModularTag>::value && (! FieldTraits<Field>::balanced)) {
+ for (int algo = 0 ; algo < algos_k ; ++algo) {
+ tmp.clear();tmp.start();
+ FFLAS::Protected::gemm_compress(F,m,n,k,A,k,B,n,C,n,r,selec_k[algo]);
+ tmp.stop(); tim_k[algo] += tmp ;
+
+ /* checking */
+ check_equal(F,m,n,D,n,C,n,"compress",descr_k[algo],ok_k[algo]) ;
+
+
+ }
+ }
+
+ /* bini_p */
+ for (int algo = 0 ; algo < algos ; ++algo) {
+ tmp.clear();tmp.start();
+ FFLAS::Protected::gemm_bini_p(F,m,n,k,A,k,B,n,C,n,r,selec[algo]);
+ tmp.stop(); tim_p[algo] += tmp ;
+
+ /* checking */
+ check_equal(F,m,n,D,n,C,n,"bini_p",descr[algo],ok_p[algo]) ;
+
+
+ }
+
+ FFLAS::fflas_delete(C);
+ FFLAS::fflas_delete(D);
+}
+
+template<class T>
+struct changeField {
+ typedef T other ;
+};
+
+template<>
+struct changeField<Modular<double> > {
+ typedef Givaro::Modular<float> other;
+};
+
+template<>
+struct changeField<ModularBalanced<double> > {
+ typedef ModularBalanced<float> other;
+};
+
+double descrip(int algo, int_v & ok_e, time_v & tim_e, int iters, const char ** madescr, const char * nom)
+{
+ int min_e = -1 ;
+ double bini_e = -1 ;
+ for (int i = 0 ; i < algo ; ++i){
+ if (ok_e[i] == (int)iters) {
+ double bini1 = tim_e[i].usertime()/(double)ok_e[i] ;
+ if (bini_e < 0) {
+ bini_e = bini1;
+ min_e = (int) i ;
+ }
+ else if (bini1 < bini_e) {
+ min_e = (int)i ;
+ bini_e = bini1 ;
+ }
+ }
+ }
+ for (int i = 0 ; i < algo ; ++i){
+ if (ok_e[i] == (int)iters) {
+ double bini1 = tim_e[i].usertime()/(double)ok_e[i] ;
+ std::cout << nom << " ( " << madescr[i] << " ) : " ;
+ if ((int)i == min_e) std::cout << " * " ;
+ else std::cout << " ";
+ std::cout << bini1 << 's'<< std::endl;
+ }
+ }
+
+ return bini_e ;
+}
+
+
+template<class Field>
+void test(int m, int k, int n, int p, int r, bool with_e, bool with_k, int iters = 4)
+{
+
+ typedef typename Field::Element Element;
+
+ Element * A = FFLAS::fflas_new<Element>(m*k);
+ Element * B = FFLAS::fflas_new<Element>(n*k);
+
+
+ Field F(p);
+ F.write(std::cout<< " * Field " ) << std::endl;
+
+ typedef typename changeField<Field>::other Field_f ;
+ typedef typename Field_f::Element Element_f ;
+ Field_f F_f(p);
+ Element_f * A_f = FFLAS::fflas_new<Element_f>(m*k);
+ Element_f * B_f = FFLAS::fflas_new<Element_f>(n*k);
+ Element_f * C_f = FFLAS::fflas_new<Element_f>(m*n);
+
+#if defined(NOTRANDOM)
+ int i0 ;
+ int j0 ;
+ Element p2 ; F.init(p2,(int)F.mOne/2);
+ std::cout << p2 << std::endl;
+#warning "not random"
+ for (int i = 0 ; i < m ; ++i)
+ for (int j = 0 ; j < k ; ++j) {
+ i0 = i/(m/3);
+ j0 = j/(k/2);
+ if (i0 == 0 and j0 == 0) A[i*k+j] = F.mOne ;
+ else if (i0 == 0 and j0 == 1) A[i*k+j] = F.zero ;
+ else if (i0 == 1 and j0 == 0) A[i*k+j] = F.mOne ;
+ else if (i0 == 1 and j0 == 1) A[i*k+j] = F.mOne ;
+ else if (i0 == 2 and j0 == 0) A[i*k+j] = F.mOne ;
+ else if (i0 == 2 and j0 == 1) A[i*k+j] = F.mOne ;
+ else A[i*k+j] = F.mOne ;
+ }
+ for (int i = 0 ; i < k ; ++i)
+ for (int j = 0 ; j < n ; ++j) {
+ i0 = i/(k/2);
+ j0 = j/(n/2);
+ if (i0 == 0 and j0 == 0) B[i*n+j] = F.mOne ;
+ else if (i0 == 0 and j0 == 1) B[i*n+j] = F.mOne ;
+ else if (i0 == 1 and j0 == 0) B[i*n+j] = F.mOne ;
+ else if (i0 == 1 and j0 == 1) B[i*n+j] = F.zero ;
+ else B[i*n+j] = F.mOne ;
+
+ }
+#endif
+
+ time_v tim_e(algos), tim_p(algos), tim_k(algos_k);
+ FFLAS::Timer tim_wd; tim_wd.clear();
+ FFLAS::Timer tim_wf; tim_wf.clear();
+ FFLAS::Timer tmp;
+ for (int i = 0 ; i < algos ; ++i) {
+ tim_e[i].clear();
+ tim_p[i].clear();
+ }
+ for (int i = 0 ; i < algos_k ; ++i) {
+ tim_k[i].clear();
+ }
+
+ int_v ok_p(algos,0), ok_e(algos,0), ok_k(algos_k,0);
+ int nb_wd = 0 , nb_wf = 0 ;
+
+ for (int b = 0 ; b < iters ; ++b) {
+ std::cout << "iter " << b+1 << " of " << iters << std::endl;
+#if not defined(NOTRANDOM)
+ FFPACK::RandomMatrix(F,A,m,k,k);
+ FFPACK::RandomMatrix(F,B,k,n,n);
+#endif
+ FFLAS::finit(F_f,m,k,A,k,A_f,k);
+ FFLAS::finit(F_f,k,n,B,n,B_f,n);
+
+ tmp.clear();tmp.start();
+ fgemm(F_f,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,
+ m,n,k, 1, A_f,k, B_f,n, 0, C_f, n);
+ tmp.stop(); tim_wf += tmp ; nb_wf ++ ;
+
+ test_algos(F,m,n,k,A,k,B,n,r,
+ tim_k,tim_e,tim_p,
+ ok_k,ok_e,ok_p,
+ tim_wd,nb_wd,
+ with_e,with_k);
+ }
+ std::cout << std::endl << "results" << std::endl;
+
+ double bini_e = descrip(algos,ok_e,tim_e,iters,descr,"Bini_e");
+ double bini_p = descrip(algos,ok_p,tim_p,iters,descr,"Bini_p");
+ double bini_k = descrip(algos_k,ok_k,tim_k,iters,descr_k,"Bini_k");
+
+
+ double t_wd = tim_wd.usertime()/(double)(nb_wd);
+ double t_wf = tim_wf.usertime()/(double)(nb_wf);
+
+ std::cout << "Wino d : " << t_wd << 's'<< std::endl;
+ std::cout << "Wino f : " << t_wf << 's'<< std::endl;
+ double wino = std::min(t_wd,t_wf) ;
+ if (bini_e>=0)
+ std::cout << "Gain e: " << ((bini_e-wino)/wino)*100 << '%' << std::endl;
+ if (bini_p>=0)
+ std::cout << "Gain p: " << ((bini_p-wino)/wino)*100 << '%' << std::endl;
+ if (bini_k>=0)
+ std::cout << "Gain k: " << ((bini_k-wino)/wino)*100 << '%' << std::endl;
+
+
+
+
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( B);
+
+ FFLAS::fflas_delete( A_f );
+ FFLAS::fflas_delete( B_f);
+ FFLAS::fflas_delete( C_f);
+}
+
+
+
+int main(int ac, char **av) {
+ static int m = 36 ;
+ static int n = 12 ;
+ static int k = 18 ;
+ static int p = 101;
+ bool eps = false ;
+ bool kom = false ;
+ int r = 1 ;
+ int seed = (int) time(NULL);
+ int iters = 4;
+
+ static Argument as[] = {
+ { 'p', "-p P", "Set the field characteristic.", TYPE_INT , &p },
+ { 'n', "-n N", "Set the number of cols in C.", TYPE_INT , &n },
+ { 'm', "-m N", "Set the number of rows in C.", TYPE_INT , &m },
+ { 'k', "-k N", "Set the number of rows in B.", TYPE_INT , &k },
+ { 'r', "-k N", "Set the recursive number Bini.", TYPE_INT , &r },
+ { 'i', "-i N", "Set the numebr of iterations.", TYPE_INT , &iters },
+ { 's', "-s N", "Set the seed .", TYPE_INT , &seed },
+ { 'e', "-e " , "epsilon .", TYPE_NONE , &eps },
+ { 'c', "-c " , "compress .", TYPE_NONE , &kom},
+ END_OF_ARGUMENTS
+ };
+ FFLAS::parseArguments(ac,av,as);
+
+ srand(seed);
+ srand48(seed);
+
+ std::cout << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl;
+ std::cout << "size: " << m << ',' << k << ',' << n << std::endl;
+ std::cout << "mod : " << p << std::endl;
+ std::cout << "rec : " << r << std::endl;
+ std::cout << "seed: " << seed << std::endl;
+ std::cout << "thre: " << TRE << std::endl;
+ std::cout << "=====================================================" << std::endl;
+ test<Modular<double> > (m,k,n,p,r,eps,kom,iters);
+ std::cout << "=====================================================" << std::endl;
+ test<ModularBalanced<double> > (m,k,n,p,r,eps,kom,iters);
+ std::cout << "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" << std::endl;
+
+ return 0;
+}
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
diff --git a/tests/test-charpoly.C b/tests/test-charpoly.C
new file mode 100644
index 0000000..9fe71d0
--- /dev/null
+++ b/tests/test-charpoly.C
@@ -0,0 +1,159 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+//--------------------------------------------------------------------------
+// Test for charpoly
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+#include <iomanip>
+#include <iostream>
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/field/modular-positive.h"
+// #include "fflas-ffpack/field/modular-int.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+#include "fflas-ffpack/utils/args-parser.h"
+
+
+using namespace std;
+
+//typedef ModularBalanced<double> Field;
+typedef Givaro::ModularBalanced<double> Field;
+//typedef Givaro::Modular<double> Field;
+//typedef Givaro::Modular<float> Field;
+//typedef Givaro::Modular<int> Field;
+//typedef Givaro::Modular<double> Field;
+
+typedef vector<Field::Element> Polynomial;
+
+using namespace FFPACK;
+template <class Field, class Polynomial>
+void printPolynomial (const Field &F, const Polynomial &v)
+{
+ for (int i = v.size () - 1; i >= 0; i--) {
+ F.write (cout, v[i]);
+ if (i > 0)
+ cout << " x^" << i << " + ";
+ }
+ cout << endl;
+}
+
+template<class Field>
+bool launch_test(const Field & F, typename Field::Element * A, int n,
+ size_t p, size_t nbit, FFPACK::FFPACK_CHARPOLY_TAG CT)
+{
+ FFLAS::Timer tim,t; t.clear();tim.clear();
+ list<Polynomial> P_list;
+ for(size_t i = 0;i<nbit;++i){
+ P_list.clear();
+ t.clear();
+ t.start();
+
+ FFPACK::CharPoly (F, P_list, n, A, n, CT);
+ t.stop();
+ tim+=t;
+ /* test */
+ // apply P_list.A.V and check 0 for random V
+ }
+
+#ifdef _FF_TIMING
+ double mflops = (2+(2.0/3.0))*(n*n/1000000.0)*nbit*n/tim.usertime();
+ list<Polynomial>::iterator P_it = P_list.begin();
+ for (;P_it!=P_list.end();++P_it)
+ printPolynomial ( F, *P_it);
+
+ F.write(cerr<<"n = "<<n<<" #inv. fact = "<<P_list.size()<<" Charpoly (A) over ") << " : t= "
+ << tim.usertime()/nbit
+ << " s, Mffops = "<<mflops
+ << endl;
+
+ cout<<n<<" "<<P_list.size()<<" "<<mflops<<" "<<tim.usertime()/nbit<<endl;
+#endif
+ return true ;
+
+}
+
+int main(int argc, char** argv)
+{
+
+ cerr<<setprecision(10);
+
+ static size_t p = 13; // characteristic
+ static size_t nbit = 2; // repetitions
+ static int n = 100;
+ static std::string file = "" ; // file where
+ static int variant =0;
+
+ static Argument as[] = {
+ { 'p', "-p P", "Set the field characteristic.", TYPE_INT , &p },
+ { 'n', "-n N", "Set the size of the matrix.", TYPE_INT , &p },
+ { 'r', "-r R", "Set number of repetitions.", TYPE_INT , &nbit },
+ { 'f', "-f file", "Set input file", TYPE_STR, &file },
+ { 'a', "-a algorithm", "Set the algorithm variant", TYPE_INT, &variant },
+ END_OF_ARGUMENTS
+ };
+ FFLAS::parseArguments(argc,argv,as);
+
+ FFPACK::FFPACK_CHARPOLY_TAG CT;
+ switch (variant){
+ case 0: CT = FfpackLUK; break;
+ case 1: CT = FfpackKG; break;
+ case 2: CT = FfpackDanilevski; break;
+ case 3: CT = FfpackKGFast; break;
+ case 4: CT = FfpackKGFastG; break;
+ case 5: CT = FfpackHybrid; break;
+ case 6: CT = FfpackArithProg; break;
+ default: CT = FfpackLUK; break;
+ }
+ Field F((long unsigned int)p);
+ Field::Element * A;
+ if (!file.empty()) {
+ const char * filestring = file.c_str();
+ A = read_field<Field>(F,const_cast<char*>(filestring),&n,&n);
+ bool passed = launch_test<Field>(F,A,n,p,nbit,CT);
+ FFLAS::fflas_delete( A);
+ return !passed ;
+ }
+ else {
+ std::cerr << std::endl << "##################################"<< std::endl;
+ std::cerr << std::endl << " **** not implemented yet ! ***" << std::endl;
+ std::cerr << std::endl << "##################################"<< std::endl;
+ // create A random
+ // create A diagonal
+ // create A nilpotent
+ // create A non invertible
+ return false ;
+ }
+
+}
+
diff --git a/tests/test-colechelon.C b/tests/test-colechelon.C
new file mode 100644
index 0000000..a21f7ab
--- /dev/null
+++ b/tests/test-colechelon.C
@@ -0,0 +1,203 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for the column echelon factorisation
+//--------------------------------------------------------------------------
+// usage: test-colechelon p A n, for n computations
+// of A over Z/pZ
+//-------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------
+//#define DEBUG 1
+// Debug option 0: no debug
+// 1: check A = LQUP
+//-------------------------------------------------------------------------
+using namespace std;
+
+
+//#define __LUDIVINE_CUTOFF 1
+#include <iostream>
+#include <iomanip>
+#include "Matio.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+using namespace FFPACK ;
+typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+ cerr<<setprecision(20);
+ int i,j,nbf,m,n;
+ int R=0;
+
+ if (argc!=4){
+ cerr<<"usage : test-colechelon <p> <A> <i>"<<endl
+ <<" to do i Column Echelon factorisation of A"
+ <<endl;
+ exit(-1);
+ }
+ Field F((uint64_t)atoi(argv[1]));
+ Field::Element * A;
+
+ A = read_field(F,argv[2],&m,&n);
+
+ size_t *P = FFLAS::fflas_new<size_t>(n);
+ size_t *Q = FFLAS::fflas_new<size_t>(m);
+
+ // size_t cutoff = atoi(argv[3]);
+ nbf = atoi(argv[3]);
+
+ FFLAS::Timer tim,timc;
+ timc.clear();
+
+
+ for ( i=0;i<nbf;i++){
+ if (i) {
+ FFLAS::fflas_delete( A);
+ A = read_field(F,argv[2],&m,&n);
+ }
+ for (j=0;j<n;j++)
+ P[j]=0;
+ for (j=0;j<m;j++)
+ Q[j]=0;
+ tim.clear();
+ tim.start();
+ R = (int)FFPACK::ColumnEchelonForm (F, m, n, A, n, P, Q);
+ tim.stop();
+ timc+=tim;
+ }
+ //write_field (F,cerr<<"Result = "<<endl, A, m,n,n);
+
+// cerr<<"P = [";
+// for (size_t i=0; i<n; ++i)
+// cerr<<P[i]<<" ";
+// cerr<<"]"<<endl;
+// cerr<<"Q = [";
+// for (size_t i=0; i<m; ++i)
+// cerr<<Q[i]<<" ";
+// cerr<<"]"<<endl;
+#if DEBUG
+ Field::Element * L = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * U = FFLAS::fflas_new<Field::Element>(n*n);
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ for (int i=0; i<R; ++i){
+ for (int j=0; j<=i; ++j)
+ F.assign ( *(U + i*n + j), zero);
+ F.init (*(U+i*(n+1)),one);
+ for (int j=i+1; j<n; ++j)
+ F.assign (*(U + i*n + j), *(A+ i*n+j));
+ }
+ for (int i=R;i<n; ++i){
+ for (int j=0; j<n; ++j)
+ F.assign(*(U+i*n+j), zero);
+ F.init(*(U+i*(n+1)),one);
+ }
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, n, 0, R, U, n, P);
+
+ for ( int i=0; i<m; ++i ){
+ int j=0;
+ for (; j <= ((i<R)?i:R) ; ++j )
+ F.assign( *(L + i*m+j), *(A+i*n+j));
+ for (; j<m; ++j )
+ F.assign( *(L+i*m+j), zero);
+ }
+// cerr<<"P = ";
+// for (size_t i=0; i<n;++i)
+// cerr<<" "<<P[i];
+// cerr<<endl;
+// cerr<<"Q = ";
+// for (size_t i=0; i<m;++i)
+// cerr<<" "<<Q[i];
+// cerr<<endl;
+
+// write_field(F,cerr<<"A = "<<endl,A,m,n,n);
+// write_field(F,cerr<<"L = "<<endl,L,m,n,n);
+// write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+
+ Field::Element * B = read_field(F,argv[2],&m,&n);
+
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,n, 1.0,
+ B, n, U, n, 0.0, X,n);
+ //FFLAS::fflas_delete( A);
+
+ bool fail = false;
+ for (int i=0; i<m; ++i)
+ for (int j=0; j<n; ++j)
+ if (!F.areEqual (*(L+i*n+j), *(X+i*n+j)))
+ fail=true;
+
+ // write_field(F,cerr<<"X = "<<endl,X,m,n,n);
+// write_field(F,cerr<<"L = "<<endl,L,m,n,n);
+
+ FFLAS::fflas_delete( B);
+ if (fail)
+ cerr<<"FAIL"<<endl;
+
+
+ else
+ cerr<<"PASS"<<endl;
+
+// cout<<m<<" "<<n<<" M"<<endl;
+// for (size_t i=0; i<m; ++i)
+// for (size_t j=0; j<n; ++j)
+// if (!F.isZero(*(A+i*n+j)))
+// cout<<i+1<<" "<<j+1<<" "<<(*(A+i*n+j))<<endl;
+// cout<<"0 0 0"<<endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+#endif
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ double t = timc.usertime();
+ double numops = m*m/1000.0*(n-m/3.0);
+
+ cerr<<m<<"x"<< n
+ << " : rank = " << R << " ["
+ << ((double)nbf/1000.0*(double)numops / t)
+ << " MFops "
+ << " in "
+ << t/nbf<<"s"
+ <<"]"<< endl;
+// cout<<m
+// <<" "<<((double)nbf/1000.0*(double)numops / t)
+// <<" "<<t/nbf
+// <<endl;
+
+ return 0;
+}
diff --git a/tests/test-compressQ.C b/tests/test-compressQ.C
index 9894fba..cad90c8 100644
--- a/tests/test-compressQ.C
+++ b/tests/test-compressQ.C
@@ -5,20 +5,20 @@
* Copyright (C) FFLAS-FFPACK
* Written by Clément Pernet
* This file is Free Software and part of FFLAS-FFPACK.
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -37,20 +37,22 @@
//-------------------------------------------------------------------------
//#define DEBUG 0
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include <iostream>
#include <list>
#include <vector>
-#include "Matio.h"
-#include "utils/timer.h"
-#include "fflas-ffpack/field/modular-balanced.h"
-#include "fflas-ffpack/ffpack/ffpack.h"
+#include <givaro/modular-balanced.h>
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
#include "fflas-ffpack/utils/args-parser.h"
-using namespace std;
+#include "Matio.h"
-typedef FFPACK:: Modular<double> Field;
+using namespace std;
+typedef Givaro::Modular<double> Field;
+//! @bug does not belong here
template<class T>
std::ostream& printvect(std::ostream& o, vector<T>& vect){
for(size_t i=0; i < vect.size(); ++i)
@@ -72,9 +74,9 @@ int main(int argc, char** argv)
Field F(65521);
size_t N = 17;
- double * A = new double[N*N];
- double * tmp = new double[N*N];
- size_t * deg = new size_t[N];
+ double * A = FFLAS::fflas_new<double>(N*N);
+ double * tmp = FFLAS::fflas_new<double>(N*N);
+ size_t * deg = FFLAS::fflas_new<size_t>(N);
for (size_t i=0; i<(size_t)N*N; ++i)
A[i] = 0;
@@ -95,22 +97,32 @@ int main(int argc, char** argv)
for (size_t i=0; i<size_t(N); ++i)
A[11+i*N] = A[7+i*N] = A[3+i*N] = double(i % 10);
- double * B = new double[N*N] ;
- FFLAS::fcopy(F,N*N,B,1,A,1);
+ double * B = FFLAS::fflas_new<double>(N*N) ;
+ FFLAS::fassign(F,N*N,A,1,B,1);
// write_field(F, cerr, A, N, N, N);
- FFPACK::CompressRowsQK (F, N, A+9*N, N, tmp, N, deg+3, 4, 3 );
+ FFPACK::Protected::CompressRowsQK (F, N, A+9*N, N, tmp, N, deg+3, 4, 3 );
// write_field(F, cerr, A, N, N, N);
- FFPACK::DeCompressRowsQK (F, N, N-9, A+9*N, N, tmp, N, deg+3, 4, 3 );
+ FFPACK::Protected::DeCompressRowsQK (F, N, N-9, A+9*N, N, tmp, N, deg+3, 4, 3 );
// write_field(F, cerr, A, N, N, N);
+ int ok = 0 ;
for (size_t i = 0 ; i < (size_t)N * (size_t)N ; ++i)
if (A[i] != B[i])
- return 1 ;
- return 0 ;
+ {
+ ok = 1 ;
+ break ;
+ }
+
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( tmp) ;
+ FFLAS::fflas_delete(deg) ;
+ FFLAS::fflas_delete( B );
+
+ return ok ;
}
diff --git a/tests/test-det.C b/tests/test-det.C
new file mode 100644
index 0000000..5ecead0
--- /dev/null
+++ b/tests/test-det.C
@@ -0,0 +1,138 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for det
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iomanip>
+#include <iostream>
+#include <givaro/modular-balanced.h>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#include "test-utils.h"
+#include "Matio.h"
+
+
+// using namespace std;
+template<class Field>
+bool test_det(Field &F, size_t n, int iter)
+{
+ typedef typename Field::Element Element;
+ //! @todo test with stride
+ Element * A = FFLAS::fflas_new<Element>(n*n);
+ // A = read_field(F,argv[2],&n,&n);
+
+ bool pass = true;
+#ifdef TIME_IT
+ FFLAS::Timer tim,t; t.clear();tim.clear();
+#endif
+ Element d=0;
+ Element dt=-4;
+ for(int i = 0;i<iter;++i){
+ F.init(dt,dt);
+ // std::cout << dt << std::endl;
+ FFPACK::RandomMatrixWithDet(F,A,dt,n,n);
+#ifdef TIME_IT
+ t.clear();
+ t.start();
+#endif
+ d = FFPACK::Det (F, n, n, A, n);
+ // std::cout << d << std::endl;
+#ifdef TIME_IT
+ t.stop();
+ tim+=t;
+#endif
+ // if (i+1<iter){
+ // FFLAS::fflas_delete( A);
+ // A = read_field(F,argv[2],&n,&n);
+ if (dt != d) {
+ pass = false;
+ break;
+ }
+ ++dt;
+ }
+
+#ifdef TIME_IT
+ double mflops = 2.0/3.0*(n*n/1000000.0)*iter*n/tim.usertime();
+ F.write (std::cerr<<"n = "<<n<<" Det (A) = ",d)
+ << " mod "<<atoi(argv[1])<<" : t= "
+ << tim.usertime()/(double)iter
+ << " s, Mffops = "<<mflops
+ << std::endl;
+
+ std::cout<<n<<" "<<mflops<<" "<<tim.usertime()/(double)iter<<std::endl;
+#endif
+ FFLAS::fflas_delete( A);
+ return pass;
+ }
+
+int main(int argc, char** argv)
+{
+
+ static int iters =10 ;
+ static uint64_t p = 65521 ;
+ static size_t n = 200 ;
+
+ static Argument as[] = {
+ { 'p', "-p P", "Set the field characteristic.", TYPE_INT , &p },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ // int n;
+ // int iter=atoi(argv[3]); // number of times the product is performed
+ std::cerr<<std::setprecision(10);
+#if 0 /* don't know how to do this in parseArguments ; grosse flemme */
+ if (argc != 4) {
+ std::cerr<<"Usage : test-det <p> <A> <<i>"
+ <<std::endl
+ <<" to compute the determinant of A mod p (i computations)"
+ <<std::endl;
+ exit(-1);
+ }
+#endif
+ bool pass = true ;
+ typedef Givaro::ModularBalanced<double> Field;
+ Field F(p);
+ pass &= test_det(F,n,iters);
+ // pass &= test_det(F,0,iters);
+
+ return ((pass==true)?0:1);
+}
diff --git a/tests/test-echelon.C b/tests/test-echelon.C
new file mode 100644
index 0000000..ea609de
--- /dev/null
+++ b/tests/test-echelon.C
@@ -0,0 +1,433 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+//--------------------------------------------------------------------------
+// Test for the echelon factorisation
+//--------------------------------------------------------------------------
+
+//#define __LUDIVINE_CUTOFF 1
+
+#define __FFLASFFPACK_SEQUENTIAL
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iostream>
+#include <iomanip>
+#include <givaro/modular-balanced.h>
+#include <givaro/udl.h>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#include "test-utils.h"
+#include "Matio.h"
+
+using namespace std;
+using namespace FFPACK;
+using Givaro::Modular;
+using Givaro::ModularBalanced;
+
+template<class Field>
+bool
+test_colechelon(Field &F, size_t m, size_t n, size_t r, size_t iters, FFPACK::FFPACK_LU_TAG LuTag)
+{
+ typedef typename Field::Element Element ;
+ Element * A = FFLAS::fflas_new (F,m,n);
+ Element * B = FFLAS::fflas_new (F,m,n);
+ Element * L = FFLAS::fflas_new (F,m,n);
+ Element * U = FFLAS::fflas_new (F,n,n);
+ Element * X = FFLAS::fflas_new (F,m,n);
+ size_t lda = n; //!@todo check lda
+
+ size_t *P = FFLAS::fflas_new<size_t>(n);
+ size_t *Q = FFLAS::fflas_new<size_t>(m);
+ size_t R = (size_t)-1;
+
+ bool pass=true;
+
+ for (size_t l=0;l<iters;l++){
+ R = (size_t)-1;
+ RandomMatrixWithRank(F,A,lda,r,m,n);
+ FFLAS::fassign(F,m,n,A,lda,B,lda);
+ for (size_t j=0;j<n;j++) P[j]=0;
+ for (size_t j=0;j<m;j++) Q[j]=0;
+
+ R = FFPACK::ColumnEchelonForm (F, m, n, A, n, P, Q, true, LuTag);
+
+ if (R != r) {pass = false; break;}
+
+ FFPACK::getEchelonTransform (F, FFLAS::FflasLower, FFLAS::FflasUnit, m,n,R,P,Q,A,lda,U,n, LuTag);
+
+ FFPACK::getEchelonForm (F, FFLAS::FflasLower, FFLAS::FflasUnit, m,n,R,Q,A,n,L,n,false, LuTag);
+
+ // Testing if C is in col echelon form
+ size_t nextpiv = 0;
+ for (size_t j=0; j<R; ++j){
+ size_t i=0;
+ while ((i < m) && F.isZero (L[i*n+j])) i++;
+ if (i==m) // zero column in the first R columns
+ pass = false;
+ if (i < nextpiv) // not in echelon form
+ pass = false;
+ nextpiv = i+1;
+ }
+ pass &= FFLAS::fiszero (F, m, n-R, L+R, n);
+ // Testing A U = L
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,n, 1.0, B, n, U, n, 0.0, X,n);
+
+ pass &= FFLAS::fequal(F, m, n, L, n, X, n);
+
+ if (!pass) {
+ std::cerr<<"FAIL"<<std::endl;
+ break;
+ }
+ }
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ return pass;
+}
+
+template<class Field>
+bool
+test_rowechelon(Field &F, size_t m, size_t n, size_t r, size_t iters, FFPACK::FFPACK_LU_TAG LuTag)
+{
+ typedef typename Field::Element Element ;
+ Element * A = FFLAS::fflas_new (F,m,n);
+ Element * B = FFLAS::fflas_new (F,m,n);
+ Element * L = FFLAS::fflas_new (F,m,m);
+ Element * U = FFLAS::fflas_new (F,m,n);
+ Element * X = FFLAS::fflas_new (F,m,n);
+ size_t lda = n; //!@todo check lda
+
+ size_t *P = FFLAS::fflas_new<size_t>(m);
+ size_t *Q = FFLAS::fflas_new<size_t>(n);
+ size_t R = (size_t)-1;
+
+ bool pass=true;
+
+ for (size_t l=0;l<iters;l++){
+ R = (size_t)-1;
+ RandomMatrixWithRank(F,A,lda,r,m,n);
+ FFLAS::fassign(F,m,n,A,lda,B,lda);
+ for (size_t j=0;j<m;j++) P[j]=0;
+ for (size_t j=0;j<n;j++) Q[j]=0;
+ //std::cerr<<"=========================="<<std::endl;
+ R = FFPACK::RowEchelonForm (F, m, n, A, n, P, Q, true, LuTag);
+
+ if (R != r) {pass = false; break;}
+
+ FFPACK::getEchelonTransform (F, FFLAS::FflasUpper, FFLAS::FflasUnit, m,n,R,P,Q,A,lda,L,m, LuTag);
+
+ FFPACK::getEchelonForm (F, FFLAS::FflasUpper, FFLAS::FflasUnit, m,n,R,Q,A,n,U,n, false, LuTag);
+
+ // Testing if U is in row echelon form
+ size_t nextpiv = 0;
+ for (size_t j=0; j<R; ++j){
+ size_t i=0;
+ while ((i < n) && F.isZero (U[i+j*n])) i++;
+ if (i==n) // zero row in the first R columns
+ pass = false;
+ if (i < nextpiv) // not in echelon form
+ pass = false;
+ nextpiv = i+1;
+ }
+ pass &= FFLAS::fiszero (F, m-R, n, U+R*n, n);
+
+ // Testing A U = L
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,m, 1.0, L, m, B, n, 0.0, X,n);
+
+ pass &= FFLAS::fequal(F, m, n, U, n, X, n);
+
+ if (!pass) {
+ std::cerr<<"FAIL"<<std::endl;
+ // write_field(F,std::cerr<<"A = "<<std::endl,B,m,n,lda);
+ // write_field(F,std::cerr<<"InplaceEchelon = "<<std::endl,A,m,n,lda);
+ // std::cerr<<"P = ["; for (size_t i=0; i<m; ++i) std::cerr<<P[i]<<", ";std::cerr<<"]\n";
+ // std::cerr<<"Q = ["; for (size_t i=0; i<n; ++i) std::cerr<<Q[i]<<", ";std::cerr<<"]\n";
+
+ // write_field(F,std::cerr<<"RowEchelon = "<<std::endl,U,m,n,n);
+ // write_field(F,std::cerr<<"Transform = "<<std::endl,L,m,m,m);
+ break;
+ }
+ }
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ return pass;
+}
+
+template<class Field>
+bool
+test_redcolechelon(Field &F, size_t m, size_t n, size_t r, size_t iters, FFPACK::FFPACK_LU_TAG LuTag)
+{
+ typedef typename Field::Element Element ;
+ Element * A = FFLAS::fflas_new (F,m,n);
+ Element * B = FFLAS::fflas_new (F,m,n);
+ Element * L = FFLAS::fflas_new (F,m,n);
+ Element * U = FFLAS::fflas_new (F,n,n);
+ Element * X = FFLAS::fflas_new (F,m,n);
+ size_t lda = n; //!@todo check lda
+
+ size_t *P = FFLAS::fflas_new<size_t>(n);
+ size_t *Q = FFLAS::fflas_new<size_t>(m);
+ size_t R = (size_t)-1;
+
+ bool pass=true;
+
+ for (size_t l=0;l<iters;l++){
+ R = (size_t)-1;
+ RandomMatrixWithRank(F,A,lda,r,m,n);
+ FFLAS::fassign(F,m,n,A,lda,B,lda);
+ for (size_t j=0;j<n;j++) P[j]=0;
+ for (size_t j=0;j<m;j++) Q[j]=0;
+
+ R = FFPACK::ReducedColumnEchelonForm (F, m, n, A, n, P, Q, true, LuTag);
+
+ if (R != r) {pass = false; break;}
+
+ FFPACK::getReducedEchelonTransform (F, FFLAS::FflasLower, m,n,R,P,Q,A,lda,U,n, LuTag);
+
+ FFPACK::getReducedEchelonForm (F, FFLAS::FflasLower, m,n,R,Q,A,n,L,n, false, LuTag);
+
+ // Testing if C is in reduced col echelon form
+ size_t nextpiv = 0;
+ for (size_t j=0; j<R; ++j){
+ size_t i=0;
+ while ((i < m) && F.isZero (L[i*n+j])) i++;
+ if (i==m) // zero column in the first R columns
+ pass = false;
+ if (i < nextpiv) // not in echelon form
+ pass = false;
+ if (j) // is pivot row reduced
+ pass &= FFLAS::fiszero(F, j-1, L + i*n, 1);
+ pass &= F.isOne(L[j+i*n]);
+ nextpiv = i+1;
+ }
+ pass &= FFLAS::fiszero (F, m, n-R, L+R, n);
+ // Testing A U = L
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,n, 1.0, B, n, U, n, 0.0, X,n);
+
+ pass &= FFLAS::fequal(F, m, n, L, n, X, n);
+
+ if (!pass) {
+ std::cerr<<"FAIL"<<std::endl;
+ break;
+ }
+ }
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ return pass;
+}
+template<class Field>
+bool
+test_redrowechelon(Field &F, size_t m, size_t n, size_t r, size_t iters, FFPACK::FFPACK_LU_TAG LuTag)
+{
+ typedef typename Field::Element Element ;
+ Element * A = FFLAS::fflas_new (F,m,n);
+ Element * B = FFLAS::fflas_new (F,m,n);
+ Element * L = FFLAS::fflas_new (F,m,m);
+ Element * U = FFLAS::fflas_new (F,m,n);
+ Element * X = FFLAS::fflas_new (F,m,n);
+ size_t lda = n; //!@todo check lda
+
+ size_t *P = FFLAS::fflas_new<size_t>(m);
+ size_t *Q = FFLAS::fflas_new<size_t>(n);
+ size_t R = (size_t)-1;
+
+ bool pass=true;
+
+ for (size_t l=0;l<iters;l++){
+ R = (size_t)-1;
+ RandomMatrixWithRank(F,A,lda,r,m,n);
+ FFLAS::fassign(F,m,n,A,lda,B,lda);
+ for (size_t j=0;j<m;j++) P[j]=0;
+ for (size_t j=0;j<n;j++) Q[j]=0;
+
+ R = FFPACK::ReducedRowEchelonForm (F, m, n, A, n, P, Q, true, LuTag);
+
+
+ if (R != r) {pass = false; break;}
+
+ FFPACK::getReducedEchelonTransform (F, FFLAS::FflasUpper, m,n,R,P,Q,A,lda,L,m, LuTag);
+
+ FFPACK::getReducedEchelonForm (F, FFLAS::FflasUpper, m,n,R,Q,A,n,U,n, false, LuTag);
+
+ // Testing if U is in row echelon form
+ size_t nextpiv = 0;
+ for (size_t j=0; j<R; ++j){
+ size_t i=0;
+ while ((i < n) && F.isZero (U[i+j*n])) i++;
+ if (i==n) // zero row in the first R rows
+ pass = false;
+ if (i < nextpiv) // not in echelon form
+ pass = false;
+ if (j) // is pivot row reduced
+ pass &= FFLAS::fiszero(F, j-1, U + i, n);
+ pass &= F.isOne(U[j*n+i]);
+ nextpiv = i+1;
+ }
+ pass &= FFLAS::fiszero (F, m-R, n, U+R*n, n);
+
+ // Testing A U = L
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,m, 1.0, L, m, B, n, 0.0, X,n);
+
+ pass &= FFLAS::fequal(F, m, n, U, n, X, n);
+
+ if (!pass) {
+ std::cerr<<"FAIL"<<std::endl;
+ // write_field(F,std::cerr<<"A = "<<std::endl,B,m,n,lda);
+ // write_field(F,std::cerr<<"InplaceEchelon = "<<std::endl,A,m,n,lda);
+ // std::cerr<<"P = ["; for (size_t i=0; i<m; ++i) std::cerr<<P[i]<<", ";std::cerr<<"]\n";
+ // std::cerr<<"Q = ["; for (size_t i=0; i<n; ++i) std::cerr<<Q[i]<<", ";std::cerr<<"]\n";
+
+ // write_field(F,std::cerr<<"RowEchelon = "<<std::endl,U,m,n,n);
+ // write_field(F,std::cerr<<"Transform = "<<std::endl,L,m,m,m);
+ break;
+ }
+ }
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ return pass;
+}
+
+template <class Field>
+bool run_with_field (Givaro::Integer q, uint64_t b, size_t m, size_t n, size_t r, size_t iters){
+ bool ok = true ;
+
+ int nbit=(int)iters;
+
+ while (ok && nbit){
+ // choose Field
+ Field* F= chooseField<Field>(q,b);
+ if (F==nullptr)
+ return true;
+
+ std::ostringstream oss;
+ F->write(oss);
+ std::cout.fill('.');
+ std::cout<<"Checking ";
+ std::cout.width(40);
+ std::cout<<oss.str();
+ std::cout<<" .";
+
+#ifdef DEBUG
+ F->write(std::cerr) << std::endl;
+#endif
+
+ ok &= test_colechelon(*F,m,n,r,iters, FFPACK::FfpackSlabRecursive);
+ std::cout<<".";
+ ok &= test_colechelon(*F,m,n,r,iters, FFPACK::FfpackTileRecursive);
+ std::cout<<".";
+ ok &= test_redcolechelon(*F,m,n,r,iters, FFPACK::FfpackSlabRecursive);
+ std::cout<<".";
+ ok &= test_redcolechelon(*F,m,n,r,iters, FFPACK::FfpackTileRecursive);
+ std::cout<<".";
+ ok &= test_rowechelon(*F,m,n,r,iters, FFPACK::FfpackSlabRecursive);
+ std::cout<<".";
+ ok &= test_rowechelon(*F,m,n,r,iters, FFPACK::FfpackTileRecursive);
+ std::cout<<".";
+ ok &= test_redrowechelon(*F,m,n,r,iters, FFPACK::FfpackSlabRecursive);
+ std::cout<<".";
+ ok &= test_redrowechelon(*F,m,n,r,iters, FFPACK::FfpackTileRecursive);
+ std::cout<<".";
+
+ nbit--;
+ if ( !ok )
+ std::cout << "FAILED "<<std::endl;
+ else
+ std::cout << "PASSED "<<std::endl;
+ delete F;
+ }
+ return ok;
+}
+
+int main(int argc, char** argv){
+ std::cerr<<std::setprecision(20);
+
+ Givaro::Integer q = -1;
+ size_t b = 0;
+ size_t m = 80;
+ size_t n = 90;
+ size_t r = 20;
+ size_t iters = 3 ;
+ bool loop = false;
+
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic.", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'n', "-n N", "Set the number of cols in the matrix.", TYPE_INT , &n },
+ { 'm', "-m N", "Set the number of rows in the matrix.", TYPE_INT , &m },
+ { 'r', "-r r", "Set the rank of the matrix." , TYPE_INT , &r },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ { 'l', "-l Y/N", "run the test in an infinte loop.", TYPE_BOOL , &loop },
+ // { 'f', "-f file", "Set input file", TYPE_STR, &file },
+ END_OF_ARGUMENTS
+ };
+ r = std::min(r, std::min(m,n));
+ FFLAS::parseArguments(argc,argv,as);
+
+ bool ok = true;
+ do{
+ ok &= run_with_field<Modular<double> >(q,b,m,n,r,iters);
+ ok &= run_with_field<ModularBalanced<double> >(q,b,m,n,r,iters);
+ ok &= run_with_field<Modular<float> >(q,b,m,n,r,iters);
+ ok &= run_with_field<ModularBalanced<float> >(q,b,m,n,r,iters);
+ ok &= run_with_field<Modular<int32_t> >(q,b,m,n,r,iters);
+ ok &= run_with_field<ModularBalanced<int32_t> >(q,b,m,n,r,iters);
+ ok &= run_with_field<Modular<int64_t> >(q,b,m,n,r,iters);
+// ok &= run_with_field<Modular<RecInt::rint<7> > >(q,b,m,n,r,iters); // BUG: not available yet (missing division in the field
+ ok &= run_with_field<ModularBalanced<int64_t> >(q,b,m,n,r,iters);
+ ok &= run_with_field<Modular<Givaro::Integer> >(q,(b?b:128_ui64),m/8+1,n/8+1,r/8+1,iters);
+
+ } while (loop && ok);
+
+ return !ok ;
+}
diff --git a/tests/test-echelon_old.C b/tests/test-echelon_old.C
new file mode 100644
index 0000000..f6b108c
--- /dev/null
+++ b/tests/test-echelon_old.C
@@ -0,0 +1,204 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for the echelon factorisation
+//--------------------------------------------------------------------------
+// usage: test-echelon p A n, for n lsp factorization
+// of A over Z/pZ
+//-------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------
+//#define DEBUG 1
+// Debug option 0: no debug
+// 1: check A = LQUP
+//-------------------------------------------------------------------------
+using namespace std;
+
+
+
+//#define __LUDIVINE_CUTOFF 1
+#include <iostream>
+#include <iomanip>
+#include "Matio.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+using namespace FFPACK;
+typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+ cerr<<setprecision(20);
+ int i,j,nbf,m,n;
+ int R=0;
+
+ if (argc!=4){
+ cerr<<"usage : test-lqup <p> <A> <i>"<<endl
+ <<" to do i Echelon factorisation of A with "
+ <<endl;
+ exit(-1);
+ }
+ Field F((uint64_t)atoi(argv[1]));
+ Field::Element * A;
+
+ A = read_field(F,argv[2],&m,&n);
+
+ size_t *P = FFLAS::fflas_new<size_t>(n);
+ size_t *Q = FFLAS::fflas_new<size_t>(m);
+
+ // size_t cutoff = atoi(argv[3]);
+ nbf = atoi(argv[3]);
+
+ FFLAS::Timer tim,timc;
+ timc.clear();
+
+
+ for ( i=0;i<nbf;i++){
+ if (i) {
+ FFLAS::fflas_delete( A);
+ A = read_field(F,argv[2],&m,&n);
+ }
+ for (j=0;j<n;j++)
+ P[j]=0;
+ for (j=0;j<m;j++)
+ Q[j]=0;
+ tim.clear();
+ tim.start();
+ R = (int)FFPACK::ColumnEchelonForm (F, m, n, A, n, P, Q);
+ tim.stop();
+ timc+=tim;
+ }
+ //write_field (F,cerr<<"Result = "<<endl, A, m,n,n);
+
+// cerr<<"P = [";
+// for (size_t i=0; i<n; ++i)
+// cerr<<P[i]<<" ";
+// cerr<<"]"<<endl;
+// cerr<<"Q = [";
+// for (size_t i=0; i<m; ++i)
+// cerr<<Q[i]<<" ";
+// cerr<<"]"<<endl;
+#if DEBUG
+ Field::Element * L = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * U = FFLAS::fflas_new<Field::Element>(n*n);
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ for (int i=0; i<R; ++i){
+ for (int j=0; j<=i; ++j)
+ F.assign ( *(U + i*n + j), zero);
+ F.init (*(U+i*(n+1)),one);
+ for (int j=i+1; j<n; ++j)
+ F.assign (*(U + i*n + j), *(A+ i*n+j));
+ }
+ for (int i=R;i<n; ++i){
+ for (int j=0; j<n; ++j)
+ F.assign(*(U+i*n+j), zero);
+ F.init(*(U+i*(n+1)),one);
+ }
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, n, 0, R, U, n, P);
+
+ for ( int i=0; i<m; ++i ){
+ int j=0;
+ for (; j <= ((i<R)?i:R) ; ++j )
+ F.assign( *(L + i*m+j), *(A+i*n+j));
+ for (; j<m; ++j )
+ F.assign( *(L+i*m+j), zero);
+ }
+// cerr<<"P = ";
+// for (size_t i=0; i<n;++i)
+// cerr<<" "<<P[i];
+// cerr<<endl;
+// cerr<<"Q = ";
+// for (size_t i=0; i<m;++i)
+// cerr<<" "<<Q[i];
+// cerr<<endl;
+
+// write_field(F,cerr<<"A = "<<endl,A,m,n,n);
+// write_field(F,cerr<<"L = "<<endl,L,m,n,n);
+// write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+
+ Field::Element * B = read_field(F,argv[2],&m,&n);
+
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,n, 1.0,
+ B, n, U, n, 0.0, X,n);
+ //FFLAS::fflas_delete( A);
+
+ bool fail = false;
+ for (int i=0; i<m; ++i)
+ for (int j=0; j<n; ++j)
+ if (!F.areEqual (*(L+i*n+j), *(X+i*n+j)))
+ fail=true;
+
+ // write_field(F,cerr<<"X = "<<endl,X,m,n,n);
+// write_field(F,cerr<<"L = "<<endl,L,m,n,n);
+
+ FFLAS::fflas_delete( B);
+ if (fail)
+ cerr<<"FAIL"<<endl;
+
+
+ else
+ cerr<<"PASS"<<endl;
+
+// cout<<m<<" "<<n<<" M"<<endl;
+// for (size_t i=0; i<m; ++i)
+// for (size_t j=0; j<n; ++j)
+// if (!F.isZero(*(A+i*n+j)))
+// cout<<i+1<<" "<<j+1<<" "<<(*(A+i*n+j))<<endl;
+// cout<<"0 0 0"<<endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+#endif
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ double t = timc.usertime();
+ double numops = m*m/1000.0*(n-m/3.0);
+
+ cerr<<m<<"x"<< n
+ << " : rank = " << R << " ["
+ << ((double)nbf/1000.0*(double)numops / t)
+ << " MFops "
+ << " in "
+ << t/nbf<<"s"
+ <<"]"<< endl;
+// cout<<m
+// <<" "<<((double)nbf/1000.0*(double)numops / t)
+// <<" "<<t/nbf
+// <<endl;
+
+ return 0;
+}
diff --git a/tests/test-fadd.C b/tests/test-fadd.C
new file mode 100644
index 0000000..ef130b5
--- /dev/null
+++ b/tests/test-fadd.C
@@ -0,0 +1,507 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK
+ * Written by :
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+// #define SIMD_INT
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <typeinfo>
+#include <givaro/modular-balanced.h>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#include "Matio.h"
+#include "test-utils.h"
+#include "assert.h"
+
+template<class Field>
+bool test_fadd(const Field & F, size_t m, size_t k, size_t n, bool timing)
+{
+ typedef typename Field::Element T ;
+
+ T * A = FFLAS::fflas_new<T>(m*n);
+ T * B = FFLAS::fflas_new<T>(m*n);
+ T * C = FFLAS::fflas_new<T>(m*n);
+ T * D = FFLAS::fflas_new<T>(m*n);
+
+ if (timing) std::cout << ">>>" << std::endl ;
+
+ size_t iter = 3 ;
+ FFLAS::Timer tim, tom, tam ;
+ tim.clear() ; tom.clear() ;
+ if (timing) F.write(std::cout << "Field ") << std::endl;
+ for (size_t b = 0 ; b < iter ; ++b) {
+ FFPACK::RandomMatrix(F,A,m,k,n);
+ FFPACK::RandomMatrix(F,B,m,k,n);
+ FFPACK::RandomMatrix(F,C,m,k,n);
+ FFLAS::fassign(F,m,k,C,n,D,n);
+
+ tam.clear();tam.start();
+ for (size_t i = 0 ; i < m ; ++i)
+ for (size_t j = 0 ; j < k ; ++j)
+ F.add(D[i*n+j],A[i*n+j],B[i*n+j]);
+ tam.stop();
+ tim += tam ;
+
+ tam.clear();tam.start();
+ FFLAS::fadd(F,m,k,A,n,B,n,C,n);
+ tam.stop();
+ tom += tam ;
+
+#if 1
+ for (size_t i =0 ; i < m ; ++i)
+ for (size_t j =0 ; j < k ; ++j)
+ if (! F.areEqual(C[i*n+j],D[i*n+j])) {
+ if (timing) std::cout << i << ',' << j << " : " << C[i*n+j] << "!= (ref)" << D[i*n+j] << std::endl;
+ return false ;
+ }
+#endif
+ }
+ if (timing) std::cout << "fadd (___): " << tim.usertime()/(double)iter << 's' << std::endl;
+ if (timing) std::cout << "fadd (AVX): " << tom.usertime()/(double)iter << 's'<< std::endl;
+
+ if (timing) std::cout << "<<<" << std::endl;
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( C );
+ FFLAS::fflas_delete( D );
+
+ return true;
+}
+
+template<class Field>
+bool test_faddin(const Field & F, size_t m, size_t k, size_t n, bool timing)
+{
+ typedef typename Field::Element T ;
+
+ T * A = FFLAS::fflas_new<T>(m*n);
+ T * C = FFLAS::fflas_new<T>(m*n);
+ T * D = FFLAS::fflas_new<T>(m*n);
+
+ if (timing) std::cout << ">>>" << std::endl ;
+ if (timing) F.write(std::cout << "Field ") << std::endl;
+ size_t iter = 3 ;
+ FFLAS::Timer tim, tom, tam ;
+ tim.clear() ; tom.clear() ;
+
+ for (size_t b = 0 ; b < iter ; ++b) {
+ FFPACK::RandomMatrix(F,A,m,k,n);
+ FFPACK::RandomMatrix(F,C,m,k,n);
+ FFLAS::fassign(F,m,k,C,n,D,n);
+
+ tam.clear();tam.start();
+ for (size_t i = 0 ; i < m ; ++i)
+ for (size_t j = 0 ; j < k ; ++j)
+ F.addin(D[i*n+j],A[i*n+j]);
+ tam.stop();
+ tim += tam ;
+
+ tam.clear();tam.start();
+ FFLAS::faddin(F,m,k,A,n,C,n);
+ tam.stop();
+ tom += tam ;
+
+#if 1
+ for (size_t i =0 ; i < m ; ++i)
+ for (size_t j =0 ; j < k ; ++j)
+ if (! F.areEqual(C[i*n+j],D[i*n+j])) {
+ if (timing) std::cout << i << ',' << j << " : " << C[i*n+j] << "!= (ref)" << D[i*n+j] << std::endl;
+ return false ;
+ }
+#endif
+ }
+ if (timing) std::cout << "faddin (___): " << tim.usertime()/(double)iter << 's' << std::endl;
+ if (timing) std::cout << "faddin (AVX): " << tom.usertime()/(double)iter << 's'<< std::endl;
+
+
+ if (timing) std::cout << "<<<" << std::endl;
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( C );
+ FFLAS::fflas_delete( D );
+
+ return true;
+}
+
+template<class Field>
+bool test_fsub(const Field & F, size_t m, size_t k, size_t n, bool timing)
+{
+ typedef typename Field::Element T ;
+
+ T * A = FFLAS::fflas_new<T>(m*n);
+ T * B = FFLAS::fflas_new<T>(m*n);
+ T * C = FFLAS::fflas_new<T>(m*n);
+ T * D = FFLAS::fflas_new<T>(m*n);
+
+ if (timing) std::cout << ">>>" << std::endl ;
+
+ size_t iter = 3 ;
+ FFLAS::Timer tim, tom, tam ;
+ tim.clear() ; tom.clear() ;
+ if (timing) F.write(std::cout << "Field ") << std::endl;
+ for (size_t b = 0 ; b < iter ; ++b) {
+ FFPACK::RandomMatrix(F,A,m,k,n);
+ FFPACK::RandomMatrix(F,B,m,k,n);
+ FFPACK::RandomMatrix(F,C,m,k,n);
+ FFLAS::fassign(F,m,k,C,n,D,n);
+
+ tam.clear();tam.start();
+ for (size_t i = 0 ; i < m ; ++i)
+ for (size_t j = 0 ; j < k ; ++j)
+ F.sub(D[i*n+j],A[i*n+j],B[i*n+j]);
+ tam.stop();
+ tim += tam ;
+
+ tam.clear();tam.start();
+ FFLAS::fsub(F,m,k,A,n,B,n,C,n);
+ tam.stop();
+ tom += tam ;
+
+#if 1
+ for (size_t i =0 ; i < m ; ++i)
+ for (size_t j =0 ; j < k ; ++j)
+ if (! F.areEqual(C[i*n+j],D[i*n+j])) {
+ if (timing) std::cout << i << ',' << j << " : " << C[i*n+j] << "!= (ref)" << D[i*n+j] << std::endl;
+ return false ;
+ }
+#endif
+ }
+ if (timing) std::cout << "fsub (___): " << tim.usertime()/(double)iter << 's' << std::endl;
+ if (timing) std::cout << "fsub (AVX): " << tom.usertime()/(double)iter << 's'<< std::endl;
+
+ if (timing) std::cout << "<<<" << std::endl;
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( C );
+ FFLAS::fflas_delete( D );
+
+ return true;
+}
+
+template<class Field>
+bool test_fsubin(const Field & F, size_t m, size_t k, size_t n, bool timing)
+{
+ typedef typename Field::Element T ;
+
+ T * A = FFLAS::fflas_new<T>(m*n);
+ T * C = FFLAS::fflas_new<T>(m*n);
+ T * D = FFLAS::fflas_new<T>(m*n);
+
+ if (timing) std::cout << ">>>" << std::endl ;
+ if (timing) F.write(std::cout << "Field ") << std::endl;
+ size_t iter = 3 ;
+ FFLAS::Timer tim, tom, tam ;
+ tim.clear() ; tom.clear() ;
+
+ for (size_t b = 0 ; b < iter ; ++b) {
+ FFPACK::RandomMatrix(F,A,m,k,n);
+ FFPACK::RandomMatrix(F,C,m,k,n);
+ FFLAS::fassign(F,m,k,C,n,D,n);
+
+ tam.clear();tam.start();
+ for (size_t i = 0 ; i < m ; ++i)
+ for (size_t j = 0 ; j < k ; ++j)
+ F.subin(D[i*n+j],A[i*n+j]);
+ tam.stop();
+ tim += tam ;
+
+ tam.clear();tam.start();
+ FFLAS::fsubin(F,m,k,A,n,C,n);
+ tam.stop();
+ tom += tam ;
+
+#if 1
+ for (size_t i =0 ; i < m ; ++i)
+ for (size_t j =0 ; j < k ; ++j)
+ if (! F.areEqual(C[i*n+j],D[i*n+j])) {
+ if (timing) std::cout << i << ',' << j << " : " << C[i*n+j] << "!= (ref)" << D[i*n+j] << std::endl;
+ return false ;
+ }
+#endif
+ }
+if (timing) std::cout << "fsubin (___): " << tim.usertime()/(double)iter << 's' << std::endl;
+ if (timing) std::cout << "fsubin (AVX): " << tom.usertime()/(double)iter << 's'<< std::endl;
+
+
+ if (timing) std::cout << "<<<" << std::endl;
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( C );
+ FFLAS::fflas_delete( D );
+
+ return true;
+}
+
+
+int main(int ac, char **av) {
+ static size_t m = 300 ;
+ static size_t n = 301 ;
+ static size_t k = 300 ;
+ static uint64_t p = 7;
+ int seed = (int) time(NULL);
+ static bool timing = false ;
+
+ static Argument as[] = {
+ { 'p', "-p P", "Set the field characteristic.", TYPE_INT , &p },
+ { 'n', "-n N", "Set the number of cols in C.", TYPE_INT , &n },
+ { 'm', "-m N", "Set the number of rows in C.", TYPE_INT , &m },
+ { 'k', "-k N", "Set the number of rows in B.", TYPE_INT , &k },
+ { 's', "-s N", "Set the seed .", TYPE_INT , &seed },
+ { 't', "-timing", "Output timings" , TYPE_NONE, &timing},
+ END_OF_ARGUMENTS
+ };
+
+
+ FFLAS::parseArguments(ac,av,as);
+
+ if (n < k) {
+ std::cout << "Usage : m k n ; matrix of size m x k, lda is n" << std::endl;
+ return -1 ;
+ }
+
+ srand(seed);
+ srand48(seed);
+
+ // std::cout << seed << std::endl;
+
+ bool pass = true ;
+ { /* fadd */
+ {
+ Givaro::Modular<float> F(p) ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<float> F(p) ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<double> F(p) ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<double> F(p) ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int32_t> F( (int32_t)p ) ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int32_t> F((int32_t)p) ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int64_t> F(p) ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int64_t> F(p) ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+#if 1
+ {
+ Givaro::ZRing<float> F ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<double> F ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int32_t> F;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int64_t> F ;
+ pass &= test_fadd(F,m,k,n,timing);
+ }
+#endif
+ }
+ { /* faddin */
+ {
+ Givaro::Modular<float> F(p) ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<float> F(p) ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<double> F(p) ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<double> F(p) ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int32_t> F((int32_t)p) ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int32_t> F((int32_t)p) ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int64_t> F(p) ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int64_t> F(p) ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+#if 1
+ {
+ Givaro::ZRing<float> F ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<double> F ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int32_t> F;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int64_t> F ;
+ pass &= test_faddin(F,m,k,n,timing);
+ }
+#endif
+ }
+ { /* fsub */
+ {
+ Givaro::Modular<float> F(p) ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<float> F(p) ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<double> F(p) ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<double> F(p) ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int32_t> F((int32_t)p) ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int32_t> F((int32_t)p) ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int64_t> F(p) ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int64_t> F(p) ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+#if 1
+ {
+ Givaro::ZRing<float> F ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<double> F ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int32_t> F;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int64_t> F ;
+ pass &= test_fsub(F,m,k,n,timing);
+ }
+#endif
+ }
+ { /* fsubin */
+ {
+ Givaro::Modular<float> F(p) ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<float> F(p) ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<double> F(p) ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<double> F(p) ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int32_t> F((int32_t)p) ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int32_t> F((int32_t)p) ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int64_t> F(p) ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int64_t> F(p) ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+#if 1
+ {
+ Givaro::ZRing<float> F ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<double> F ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int32_t> F;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int64_t> F ;
+ pass &= test_fsubin(F,m,k,n,timing);
+ }
+#endif
+ }
+
+ return (pass?0:1) ;
+}
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
diff --git a/tests/test-fgemm.C b/tests/test-fgemm.C
new file mode 100644
index 0000000..e509e5e
--- /dev/null
+++ b/tests/test-fgemm.C
@@ -0,0 +1,423 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/*
+ * Copyright (C) the FFLAS-FFPACK group
+ * Written by Clément Pernet
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+// #ifndef NEWINO
+// #define NEWWINO
+// #endif
+
+// #define WINOTHRESHOLD 100
+// #define OLD_DYNAMIC_PEELING
+//#define DEBUG 1
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iomanip>
+#include <iostream>
+#include <givaro/modular.h>
+#include <givaro/udl.h>
+#include <recint/rint.h>
+
+#include <givaro/givintprime.h>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+
+#include "fflas-ffpack/utils/args-parser.h"
+#include "test-utils.h"
+#include "fflas-ffpack/utils/Matio.h"
+
+
+
+using namespace std;
+using namespace FFPACK;
+using Givaro::Modular;
+using Givaro::ModularBalanced;
+
+
+// checks that D = alpha . C + beta . A ^ta * B ^tb
+template<class Field>
+bool check_MM(const Field & F,
+ const typename Field::Element_ptr Cd, // c0
+ enum FFLAS::FFLAS_TRANSPOSE & ta,
+ enum FFLAS::FFLAS_TRANSPOSE & tb,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element & alpha,
+ const typename Field::Element_ptr A,
+ const size_t lda,
+ const typename Field::Element_ptr B,
+ const size_t ldb,
+ const typename Field::Element & beta,
+ const typename Field::Element_ptr C, // res
+ const size_t ldc
+ )
+{
+ bool wrong = false;
+
+ typedef typename Field::Element Element;
+ typedef typename Field::Element_ptr Element_ptr;
+ typedef typename Field::ConstElement_ptr ConstElement_ptr;
+ Element tmp;
+ ConstElement_ptr ail,blj;
+ Element_ptr D = FFLAS::fflas_new (F,m,n);
+ FFLAS::fassign(F,m,n,Cd,n,D,n);
+
+ for (size_t i = 0; i < m; ++i)
+ for (size_t j = 0; j < n; ++j){
+ F.mulin(*(D+i*n+j),beta);
+ F.assign (tmp, F.zero);
+ for ( size_t l = 0; l < k ; ++l ){
+ if ( ta == FFLAS::FflasNoTrans )
+ ail = A+i*lda+l;
+ else
+ ail = A+l*lda+i;
+ if ( tb == FFLAS::FflasNoTrans )
+ blj = B+l*ldb+j;
+ else
+ blj = B+j*ldb+l;
+ F.axpyin (tmp, *ail, *blj);
+ }
+ F.axpyin (*(D+i*n+j), alpha, tmp);
+ if ( !F.areEqual( *(D+i*n+j), *(C+i*ldc+j) ) ) {
+ wrong = true;
+ }
+ }
+ if ( wrong ){
+ size_t ici = 20 ;
+ std::cerr<<"FAIL"<<std::endl;
+ std::cerr << "a :" << alpha<<", b : " << beta << std::endl;
+ std::cerr << "m :" << m << ", n : " << n << ", k : " << k << std::endl;
+ std::cerr << "ldA :" << lda << ", ldB : " << ldb << ", ldC : " << ldc << std::endl;
+ for (size_t i=0; i<m && ici; ++i){
+ for (size_t j =0; j<n && ici; ++j)
+ if (!F.areEqual( *(C+i*ldc+j), *(D+i*n+j) ) ) {
+ std::cerr<<"Error C["<<i<<","<<j<<"]="
+ <<(*(C+i*ldc+j))<<" D["<<i<<","<<j<<"]="
+ <<(*(D+i*n+j))<<std::endl;
+ ici--;
+ }
+ }
+ if (m<80 && n<80) {
+ for (size_t i=0; i<m ; ++i){
+ for (size_t j =0; j<n ; ++j) {
+ if ( !F.areEqual( *(C+i*ldc+j), *(D+i*n+j) ) )
+ std::cout << 'X' ;
+ else
+ std::cout << '.' ;
+ }
+ std::cout << std::endl;
+ }
+ }
+ }
+ // else std::cout<<"COOL"<<std::endl;
+
+ FFLAS::fflas_delete (D);
+
+ return !wrong ;
+
+}
+
+
+template<class Field>
+bool launch_MM(const Field & F,
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const typename Field::Element alpha,
+ const typename Field::Element beta,
+ const size_t ldc,
+ const size_t lda,
+ enum FFLAS::FFLAS_TRANSPOSE ta,
+ const size_t ldb,
+ enum FFLAS::FFLAS_TRANSPOSE tb,
+ size_t iters,
+ int nbw,
+ bool par,
+ size_t b)
+{
+ bool ok = true;
+
+ typedef typename Field::Element_ptr Element_ptr;
+ Element_ptr A ;
+ Element_ptr B ;
+ Element_ptr C = FFLAS::fflas_new (F,m,ldc);
+ FFLASFFPACK_check(ldc >= n);
+ FFLAS::fzero(F,m,n,C,ldc);
+ Element_ptr D = FFLAS::fflas_new (F, m, n);
+ for(size_t i = 0;i<iters;++i){
+ if (ta == FFLAS::FflasNoTrans) {
+ FFLASFFPACK_check(lda >= k);
+ A = FFLAS::fflas_new (F, m, lda);
+ FFLAS::fzero(F,m,lda,A,lda);
+ RandomMatrix(F,A,m,k,lda,b);
+ }
+ else {
+ FFLASFFPACK_check(lda >= m);
+ A = FFLAS::fflas_new (F, k, lda);
+ FFLAS::fzero(F,k,lda,A,lda);
+ RandomMatrix(F,A,k,m,lda,b);
+ }
+ if (tb == FFLAS::FflasNoTrans) {
+ FFLASFFPACK_check(ldb >= n);
+ B = FFLAS::fflas_new (F,k,ldb);
+ FFLAS::fzero(F,k,ldb,B,ldb);
+ RandomMatrix(F,B,k,n,ldb,b);
+ }
+ else {
+ FFLASFFPACK_check(ldb >= k);
+ B = FFLAS::fflas_new (F,n,ldb);
+ FFLAS::fzero(F,n,ldb,B,ldb);
+ RandomMatrix(F,B,n,k,ldb,b);
+ }
+ RandomMatrix(F,C,m,n,ldc,b);
+ FFLAS::fassign(F,m,n,C,ldc,D,n);
+ if (par){
+ FFLAS::MMHelper<Field,FFLAS::MMHelperAlgo::Auto, typename FFLAS::ModeTraits<Field>::value, FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::ThreeDAdaptive> > WH (F, nbw);
+ PAR_BLOCK{
+ FFLAS::fgemm (F, ta, tb,m,n,k,alpha, A,lda, B,ldb, beta,C,ldc,WH);
+ }
+ }else{
+ FFLAS::MMHelper<Field,FFLAS::MMHelperAlgo::Auto> WH(F,nbw,FFLAS::ParSeqHelper::Sequential());
+ FFLAS::fgemm (F, ta, tb,m,n,k,alpha, A,lda, B,ldb, beta,C,ldc,WH);
+ }
+ ok &= check_MM(F, D, ta, tb,m,n,k,alpha, A,lda, B,ldb, beta,C,ldc);
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(B);
+
+ if (!ok)
+ break;
+ }
+ FFLAS::fflas_delete (C);
+ FFLAS::fflas_delete (D);
+
+ return ok ;
+}
+
+
+template<class Field>
+bool launch_MM_dispatch(const Field &F,
+ const int mm,
+ const int nn,
+ const int kk,
+ const typename Field::Element alpha,
+ const typename Field::Element beta,
+ const size_t iters,
+ const int nbw,
+ const bool par,
+ const size_t b)
+{
+ bool ok = true;
+ size_t m,n,k;
+ size_t lda,ldb,ldc;
+ //!@bug test for ldX equal
+ //!@bug test for transpo
+ //!@todo does nbw actually do nbw recursive calls and then call blas (check ?) ?
+ size_t ld = 13 ;
+ {
+ FFLAS::FFLAS_TRANSPOSE ta = FFLAS::FflasNoTrans ;
+ FFLAS::FFLAS_TRANSPOSE tb = FFLAS::FflasNoTrans ;
+ if (! par) {
+ if (random()%2) ta = FFLAS::FflasTrans ;
+ if (random()%2) tb = FFLAS::FflasTrans ;
+ }
+
+ if (mm<0)
+ m = 1+(size_t)random() % -mm;
+ else m = mm;
+ if (nn<0)
+ n = 1+(size_t)random() % -nn;
+ else n = nn;
+ if (kk<0)
+ k = 1+(size_t)random() % -kk;
+ else k = kk;
+
+ int logdim = (int)floor(log(std::min(std::min(m,k),n))/log(2.));
+ int nw = std::min (logdim,nbw);
+
+ lda = std::max(k,m)+(size_t)random()%ld;
+ ldb = std::max(n,k)+(size_t)random()%ld;
+ ldc = n+(size_t)random()%ld;
+#ifdef DEBUG
+ std::cerr <<"q = "<<F.characteristic()<<" nw = "<<nw<<" m,k,n = "<<m<<", "<<k<<", "<<n<<" C := "
+ <<alpha<<".A"<<((ta==FFLAS::FflasTrans)?"^T":"")
+ <<" * B"<<((tb==FFLAS::FflasTrans)?"^T":"");
+ if (!F.isZero(beta))
+ cerr<<" + "<<beta<<" C";
+#endif
+ ok &= launch_MM<Field>(F,m,n,k,
+ alpha,beta,
+ ldc,
+ lda, ta,
+ ldb, tb,
+ iters,nw, par, b);
+#ifdef DEBUG
+ std::cerr<<(ok?" -> ok ":" -> KO")<<std::endl;
+#endif
+ }
+ return ok ;
+}
+
+template <class Field>
+bool run_with_field (Givaro::Integer q, uint64_t b, int m, int n, int k, int nbw, size_t iters, bool par ){
+ bool ok = true ;
+
+ int nbit=(int)iters;
+
+ while (ok && nbit){
+ typedef typename Field::Element Element ;
+ // choose Field
+ Field* F= chooseField<Field>(q,b);
+ if (F==nullptr)
+ return true;
+
+ std::ostringstream oss;
+ F->write(oss);
+ std::cout.fill('.');
+ std::cout<<"Checking ";
+ std::cout.width(40);
+ std::cout<<oss.str();
+ std::cout<<" ... ";
+
+ if (nbw<0)
+ nbw = (int) random() % 7;
+#ifdef DEBUG
+ F->write(std::cerr) << std::endl;
+#endif
+ typedef typename Field::RandIter Randiter ;
+ typedef typename Field::Element Element ;
+ Randiter R1(*F,b);
+ Givaro::GeneralRingNonZeroRandIter<Field,Randiter> R(R1);
+
+ //size_t k = 0 ;
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->one,F->zero,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->zero,F->zero,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->mOne,F->zero,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->one ,F->one,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->zero,F->one,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->mOne,F->one,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->one ,F->mOne,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->zero,F->mOne,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->mOne,F->mOne,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+
+ Element alpha,beta ;
+ R.random(alpha);
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->one ,alpha,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->zero,alpha,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,F->mOne,alpha,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,alpha,F->one ,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,alpha,F->zero,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,alpha,F->mOne,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+
+ for (size_t j = 0 ; j < 3 ; ++j) {
+ R.random(alpha);
+ R.random(beta);
+ ok &= launch_MM_dispatch<Field>(*F,m,n,k,alpha,beta,iters,nbw, par, b);
+ //std::cout << k << "/24" << std::endl; ++k;
+ }
+ //std::cout<<std::endl;
+ nbit--;
+ if ( !ok )
+ //std::cout << "\033[1;31mFAILED\033[0m "<<std::endl;
+ std::cout << "FAILED "<<std::endl;
+ else
+ //std::cout << "\033[1;32mPASSED\033[0m "<<std::endl;
+ std::cout << "PASSED "<<std::endl;
+ delete F;
+ }
+ return ok;
+}
+int main(int argc, char** argv)
+{
+ std::cout<<setprecision(17);
+ std::cerr<<setprecision(17);
+ srand((int)time(NULL));
+ srand48(time(NULL));
+
+ static size_t iters = 3 ;
+ static Givaro::Integer q = -1 ;
+ static uint64_t b = 0 ;
+ static int m = -50 ;
+ static int n = -50 ;
+ static int k = -50 ;
+ static int nbw = -1 ;
+ static bool loop = false;
+ static bool p = false;
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'm', "-m M", "Set the dimension of the matrix (negative values, mean, any random value between 0 and |n|).", TYPE_INT , &m },
+ { 'n', "-n N", "Set the dimension of the matrix (negative values, mean, any random value between 0 and |n|).", TYPE_INT , &n },
+ { 'k', "-k K", "Set the dimension of the matrix (negative values, mean, any random value between 0 and |k|).", TYPE_INT , &k },
+ { 'w', "-w N", "Set the number of winograd levels (-1 for random).", TYPE_INT , &nbw },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ { 'l', "-l Y/N", "run the test in an infinte loop.", TYPE_BOOL , &loop },
+ { 'p', "-p Y/N", "run the parallel fgemm.", TYPE_BOOL , &p },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ bool ok = true;
+ do{
+ ok &= run_with_field<Modular<double> >(q,b,m,n,k,nbw,iters,p);
+ ok &= run_with_field<ModularBalanced<double> >(q,b,m,n,k,nbw,iters,p);
+ ok &= run_with_field<Modular<float> >(q,b,m,n,k,nbw,iters,p);
+ ok &= run_with_field<ModularBalanced<float> >(q,b,m,n,k,nbw,iters,p);
+ ok &= run_with_field<Modular<int32_t> >(q,b,m,n,k,nbw,iters,p);
+ ok &= run_with_field<ModularBalanced<int32_t> >(q,b,m,n,k,nbw,iters,p);
+ ok &= run_with_field<Modular<RecInt::rint<7> > >(q,b?b:63_ui64,m,n,k,nbw,iters, p);
+ ok &= run_with_field<Modular<RecInt::rint<8> > >(q,b?b:127_ui64,m,n,k,nbw,iters, p);
+ ok &= run_with_field<Modular<int64_t> >(q,b,m,n,k,nbw,iters, p);
+ ok &= run_with_field<ModularBalanced<int64_t> >(q,b,m,n,k,nbw,iters, p);
+ ok &= run_with_field<Modular<Givaro::Integer> >(q,(b?b:512_ui64),m,n,k,nbw,iters,p);
+ ok &= run_with_field<Givaro::ZRing<Givaro::Integer> >(0,(b?b:512_ui64),m,n,k,nbw,iters,p);
+
+ } while (loop && ok);
+
+ return !ok ;
+}
diff --git a/tests/test-fgemv.C b/tests/test-fgemv.C
new file mode 100644
index 0000000..a8fbf00
--- /dev/null
+++ b/tests/test-fgemv.C
@@ -0,0 +1,153 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for fgemv : 1 computation
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+#define DEBUG 1
+#define TIME 1
+
+#if not defined(STD_RECINT_SIZE)
+#define STD_RECINT_SIZE 8
+#endif
+
+#include <givaro/modular.h>
+#include "recint/recint.h"
+#include <iomanip>
+#include <iostream>
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/fflas/fflas.h"
+
+
+
+using namespace std;
+using namespace FFPACK;
+
+// typedef Givaro::Modular<double> Field;
+
+typedef RecInt::ruint<STD_RECINT_SIZE> Ints;
+typedef Givaro::Modular<Ints> Field;
+
+int main(int argc, char** argv){
+
+ int m,n,k;
+ int nbit=atoi(argv[4]); // number of times the product is performed
+ cerr<<setprecision(10);
+ Field::Element alpha,beta;
+
+
+ if (argc != 8) {
+ cerr<<"Usage : test-fgemv <p> <A> <b> <i>"
+ <<" <alpha> <beta> <c>"<<endl
+ <<" to do i computations of c <- alpha Ab + beta c"
+ <<endl;
+ exit(-1);
+ }
+ Field F(atoi(argv[1]));
+
+ F.init( alpha, double(atoi(argv[5])));
+ F.init( beta, double(atoi(argv[6])));
+
+ Field::Element * A;
+ Field::Element * b;
+
+ b = read_field(F,argv[3],&n,&k);
+ A = read_field(F,argv[2],&m,&n);
+
+ Field::Element * c;
+
+
+ FFLAS::Timer tim,t; t.clear();tim.clear();
+ for(int i = 0;i<nbit;++i){
+ c = read_field(F,argv[7],&m,&k);
+ t.clear();
+ t.start();
+ FFLAS::fgemv (F, FFLAS::FflasNoTrans,m,n,alpha, A,n, b,1,
+ beta, c, 1);
+ t.stop();
+ tim+=t;
+ }
+
+#if DEBUG
+ Field::Element *d;
+ d = read_field(F,argv[7],&m,&k);
+ for (int i=0; i<m; ++i)
+ F.mulin (d[i], beta); // d <- beta c
+ for (int i=0; i<m; ++i)
+ F.mulin (b[i], alpha); // b <- alpha b
+ for (int i=0; i<m; ++i)
+ for (int j=0; j<n; ++j)
+ F.axpyin (d[i], *(A+i*m+j), b[j]); // d <- Ad+b = alpha Ab + beta c
+ bool fail = false;
+ for (int i=0; i<m; ++i)
+ if (!F.areEqual(d[i], c[i]))
+ fail = true;
+
+ if (fail) {
+ cerr<<"FAIL"<<endl;
+ write_field(F, std::cerr<<"i:=",b,n,k,k,true)<<std::endl;
+ write_field(F, std::cerr<<"r:=",c,m,k,k,true)<<std::endl;
+ write_field(F, std::cerr<<"d:=",d,m,k,k,true)<<std::endl;
+
+ F.write(std::cerr<<"alpha:=", alpha) << ';' << std::endl;
+ write_field(F, std::cerr<<"A:=",A,m,n,n,true)<<std::endl;
+ b = read_field(F,argv[3],&n,&k);
+ write_field(F, std::cerr<<"b:=",b,n,k,k,true)<<std::endl;
+ F.write(std::cerr<<"beta:=", beta) << ';' << std::endl;
+ c = read_field(F,argv[7],&m,&k);
+ write_field(F, std::cerr<<"c:=",c,m,k,k,true)<<std::endl;
+ std::cerr<<"p:=" << F.characteristic() << ';' << std::endl;
+
+ } else
+ cerr<<"PASS"<<endl;
+
+ FFLAS::fflas_delete( d);
+#endif
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( b);
+ FFLAS::fflas_delete( c);
+#if TIME
+ double mflops = (2.0*(m*n/1000000.0)*nbit/tim.usertime());
+ cerr << m <<"x" <<n <<" : fgemv over Z/"
+ <<atoi(argv[1])<<"Z : [ "
+ <<mflops<<" MFops in "<<tim.usertime()/nbit<<"s]"
+ << endl;
+
+ cerr<<"alpha, beta = "<<alpha <<", "<<beta <<endl;
+
+ cout<<m<<" "<<n<<" "<<mflops<<" "<<tim.usertime()/nbit<<endl;
+#endif
+}
+
+
diff --git a/tests/test-fger.C b/tests/test-fger.C
new file mode 100644
index 0000000..1da79ea
--- /dev/null
+++ b/tests/test-fger.C
@@ -0,0 +1,300 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by JG Dumas
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for fger : 1 computation
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+#define DEBUG
+#define TIME 1
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iomanip>
+#include <iostream>
+#include <givaro/modular-int32.h>
+#include <givaro/modular-balanced.h>
+#include <givaro/givintprime.h>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#include "test-utils.h"
+#include "Matio.h"
+
+using namespace std;
+using namespace FFPACK;
+using Givaro::Modular;
+using Givaro::ModularBalanced;
+
+// checks that D = alpha . x . y^T + C
+
+// WARNING
+template<class Field>
+bool check_fger(const Field & F,
+ const typename Field::Element_ptr Cd, // c0
+ const size_t m,
+ const size_t n,
+ const typename Field::Element & alpha,
+ const typename Field::Element_ptr x,
+ const size_t incx,
+ const typename Field::Element_ptr y,
+ const size_t incy,
+ const typename Field::Element_ptr C, // res
+ const size_t ldc
+ ) {
+ bool wrong = false;
+
+ typedef typename Field::Element Element;
+ typedef typename Field::Element_ptr Element_ptr;
+
+// std::cerr << "with(LinearAlgebra):" << std::endl;
+// write_field(F,std::cerr <<"X:=",x, m, 1, incx, true) << ';' << std::endl;
+// write_field(F,std::cerr <<"Y:=Transpose(",y, n, 1, incy, true) << ");" << std::endl;
+// write_field(F,std::cerr <<"A:=",Cd, m, n, ldc, true) << ';' << std::endl;
+// F.write(std::cerr << "a:=", alpha) << ';' << std::endl;
+// std::cerr << "q:=" << F.characteristic() << ';' << std::endl;
+
+ Element_ptr D = FFLAS::fflas_new (F,m,n);
+ FFLAS::fassign(F,m,n,Cd,n,D,n);
+ for(size_t i=0; i<m; ++i) {
+ Element tmp; F.init(tmp);
+ F.mul(tmp, alpha, *(x+i*incx) );
+ for(size_t j=0; j<n; j+=incy) {
+ F.axpyin(*(D+i*n+j), tmp, *(y+j) );
+ if ( !F.areEqual( *(D+i*n+j), *(C+i*ldc+j) ) ) {
+ wrong = true;
+ }
+ }
+ }
+// write_field(F,std::cerr <<"d:=",D, m, n, ldc, true) << ';' << std::endl;
+// F.write(std::cerr, alpha) << "*X.Y+A,d;";
+// F.write(std::cerr, alpha) << "*X.Y+A-d mod q;" << std::endl;
+ if ( wrong ){
+ size_t ici = 20 ;
+ std::cout<<"FAIL"<<std::endl;
+ std::cout << "a :" << alpha<<std::endl;
+ std::cout << "m :" << m << ", n : " << n << std::endl;
+ std::cout << "incx :" << incx << ", incy : " << incy << ", ldC : " << ldc << std::endl;
+ for (size_t i=0; i<m && ici; ++i){
+ for (size_t j =0; j<n && ici; ++j)
+ if (!F.areEqual( *(C+i*ldc+j), *(D+i*n+j) ) ) {
+ std::cout<<"Error C["<<i<<","<<j<<"]="
+ <<(*(C+i*ldc+j))<<" D["<<i<<","<<j<<"]="
+ <<(*(D+i*n+j))<<std::endl;
+ ici--;
+ }
+ }
+ if (m<80 && n<80) {
+ for (size_t i=0; i<m ; ++i){
+ for (size_t j =0; j<n ; ++j) {
+ if ( !F.areEqual( *(C+i*ldc+j), *(D+i*n+j) ) )
+ std::cout << 'X' ;
+ else
+ std::cout << '.' ;
+ }
+ std::cout << std::endl;
+ }
+ }
+ }
+ FFLAS::fflas_delete (D);
+
+ return !wrong ;
+}
+
+
+template<class Field>
+bool launch_fger(const Field & F,
+ const size_t m,
+ const size_t n,
+ const typename Field::Element alpha,
+ const size_t ldc,
+ const size_t inca,
+ const size_t incb,
+ size_t iters)
+{
+ bool ok = true;
+
+ typedef typename Field::Element_ptr Element_ptr;
+ Element_ptr A ;
+ FFLASFFPACK_check(inca >= 1);
+ Element_ptr B ;
+ FFLASFFPACK_check(incb >= 1);
+ Element_ptr C = FFLAS::fflas_new (F,m,ldc);
+ FFLASFFPACK_check(ldc >= n);
+ FFLAS::fzero(F,m,n,C,ldc);
+ Element_ptr D = FFLAS::fflas_new (F, m, n);
+ for(size_t i = 0;i<iters;++i){
+ A = FFLAS::fflas_new (F, m, inca);
+ RandomMatrix(F,A,m,inca,inca);
+ B = FFLAS::fflas_new (F, n, incb);
+ RandomMatrix(F,B,n,incb,incb);
+ RandomMatrix(F,C,m,n,ldc);
+ FFLAS::fassign(F,m,n,C,ldc,D,n);
+ FFLAS::fger (F,m,n,alpha, A, inca, B, incb, C,ldc);
+ ok &= check_fger(F, D, m,n,alpha, A, inca, B, incb, C,ldc);
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(B);
+
+ if (!ok)
+ break;
+
+
+ }
+ FFLAS::fflas_delete (C);
+ FFLAS::fflas_delete (D);
+
+ return ok ;
+}
+
+
+template<class Field>
+bool launch_fger_dispatch(const Field &F,
+ const size_t nn,
+ const typename Field::Element alpha,
+ const size_t iters)
+{
+ bool ok = true;
+ size_t m,n;
+ size_t inca,incb,ldc;
+ //!@bug test for incx equal
+ //!@bug test for transpo
+ //!@todo does nbw actually do nbw recursive calls and then call blas (check ?) ?
+ // size_t ld = 13 ;
+ {
+ m = 1+(size_t)random()%nn;
+ n = 1+(size_t)random()%nn;
+
+
+// lda = m+(size_t)random()%ld;
+// ldb = 1+(size_t)random()%ld;
+
+ inca = 1;
+ incb = 1;
+
+// ldc = n+(size_t)random()%ld;
+ ldc = n;
+
+#ifdef DEBUG
+ std::cout <<"q = "<<F.characteristic()<<" m,n = "<<m<<", "<<n<<" C := "
+ <<alpha<<".x * y^T + C";
+#endif
+ ok &= launch_fger<Field>(F,m,n,
+ alpha,
+ ldc,
+ inca,
+ incb,
+ iters);
+#ifdef DEBUG
+ std::cout<<(ok?" -> ok ":" -> KO")<<std::endl;
+#endif
+ }
+ return ok ;
+}
+template <class Field>
+bool run_with_field (int64_t q, uint64_t b, size_t n, size_t iters){
+ bool ok = true ;
+ int nbit=(int)iters;
+ while (ok && nbit){
+ typedef typename Field::Element Element ;
+ typedef typename Field::RandIter Randiter ;
+ typedef typename Field::Element Element ;
+
+ Field* F= chooseField<Field>(q,b);
+
+#ifdef DEBUG
+ F->write(std::cout) << std::endl;
+#endif
+ Randiter R1(*F);
+ Givaro::GeneralRingNonZeroRandIter<Field,Randiter> R(R1);
+
+ //size_t k = 0 ;
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_fger_dispatch<Field>(*F,n,F->one,iters);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_fger_dispatch<Field>(*F,n,F->zero,iters);
+ //std::cout << k << "/24" << std::endl; ++k;
+ ok &= launch_fger_dispatch<Field>(*F,n,F->mOne,iters);
+ //std::cout << k << "/24" << std::endl; ++k;
+
+ Element alpha ;
+ R.random(alpha);
+
+ ok &= launch_fger_dispatch<Field>(*F,n,alpha,iters);
+
+ //std::cout<<std::endl;
+ nbit--;
+ delete F;
+ }
+ return ok;
+}
+
+int main(int argc, char** argv)
+{
+ std::cout<<setprecision(17);
+ std::cerr<<setprecision(17);
+ srand((int)time(NULL));
+ srand48(time(NULL));
+
+ static size_t iters = 3 ;
+ static long long q = -1 ;
+ static uint64_t b = 0 ;
+ static size_t n = 50 ;
+ static bool loop = false;
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_LONGLONG , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ { 'l', "-loop Y/N", "run the test in an infinte loop.", TYPE_BOOL , &loop },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+
+ bool ok = true;
+ do{
+ ok &= run_with_field<Modular<double> >(q,b,n,iters);
+ ok &= run_with_field<ModularBalanced<double> >(q,b,n,iters);
+ ok &= run_with_field<Modular<float> >(q,b,n,iters);
+ ok &= run_with_field<ModularBalanced<float> >(q,b,n,iters);
+ ok &= run_with_field<Modular<int32_t> >(q,b,n,iters);
+ ok &= run_with_field<ModularBalanced<int32_t> >(q,b,n,iters);
+ ok &= run_with_field<Modular<int64_t> >(q,b,n,iters);
+ ok &= run_with_field<ModularBalanced<int64_t> >(q,b,n,iters);
+ } while (loop && ok);
+
+ return !ok ;
+}
+
diff --git a/tests/test-fgesv.C b/tests/test-fgesv.C
new file mode 100644
index 0000000..de0743f
--- /dev/null
+++ b/tests/test-fgesv.C
@@ -0,0 +1,196 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for fgesv : 1 computation
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+//#define DEBUG 1
+#define TIME 1
+
+#include <iomanip>
+#include <iostream>
+using namespace std;
+
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+
+using namespace FFPACK;
+
+typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+
+ int n,m,mb,nb;
+ cerr<<setprecision(10);
+ Field::Element zero, one;
+
+ if (argc != 6) {
+ cerr<<"Usage : test-fgesv <p> <A> <b> <iter> <left/right>"
+ <<endl;
+ exit(-1);
+ }
+ int nbit=atoi(argv[4]); // number of times the product is performed
+ Field F(atoi(argv[1]));
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ Field::Element * A, *B, *X=NULL;
+ A = read_field(F,argv[2],&m,&n);
+ B = read_field(F,argv[3],&mb,&nb);
+
+ FFLAS::FFLAS_SIDE side = (atoi(argv[5])) ? FFLAS::FflasRight : FFLAS::FflasLeft;
+
+ size_t ldx=0;
+ size_t rhs = (side == FFLAS::FflasLeft) ? nb : mb;
+ if (m != n) {
+ if (side == FFLAS::FflasLeft){
+ X = FFLAS::fflas_new<Field::Element>(n*nb);
+ ldx = nb;
+ }
+ else {
+ X = FFLAS::fflas_new<Field::Element>(mb*m);
+ ldx = m;
+ }
+ }
+
+ if ( ((side == FFLAS::FflasRight) && (n != nb))
+ || ((side == FFLAS::FflasLeft)&&(m != mb)) ) {
+ cerr<<"Error in the dimensions of the input matrices"<<endl;
+ exit(-1);
+ }
+ int info=0;
+ FFLAS::Timer t; t.clear();
+ double time=0.0;
+ //write_field(F, cerr<<"A="<<endl, A, k,k,k);
+ size_t R=0;
+ for (int i = 0;i<nbit;++i){
+ t.clear();
+ t.start();
+ if (m == n)
+ R = FFPACK::fgesv (F, side, mb, nb, A, n, B, nb, &info);
+ else
+ R = FFPACK::fgesv (F, side, m, n, rhs, A, n, X, ldx, B, nb, &info);
+ if (info > 0){
+ std::cerr<<"System is inconsistent"<<std::endl;
+ exit(-1);
+ }
+
+ t.stop();
+ time+=t.usertime();
+ if (i+1<nbit){
+ FFLAS::fflas_delete(A);
+ A = read_field(F,argv[2],&m,&n);
+ FFLAS::fflas_delete( B);
+ B = read_field(F,argv[3],&mb,&nb);
+ }
+ }
+
+#if DEBUG
+ Field::Element *B2=NULL;
+ FFLAS::fflas_delete( A);
+
+ if (info > 0){
+ std::cerr<<"System inconsistent"<<std::endl;
+ exit (-1);
+ }
+
+ A = read_field(F,argv[2],&m,&n);
+
+ B2 = FFLAS::fflas_new<Field::Element>(mb*nb);
+
+
+ if (m==n)
+ if (side == FFLAS::FflasLeft)
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m, nb, n,
+ one, A, n, B, nb, zero, B2, nb);
+ else
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mb, n, m,
+ one, B, nb, A, n, zero, B2, nb);
+ else
+ if (side == FFLAS::FflasLeft)
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m, nb, n,
+ one, A, n, X, ldx, zero, B2, nb);
+ else
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, mb, n, m,
+ one, X, ldx, A, n, zero, B2, nb);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( X);
+
+ B = read_field(F,argv[3],&mb,&nb);
+
+ bool wrong = false;
+ for (int i=0;i<mb;++i)
+ for (int j=0;j<nb;++j)
+ if ( !F.areEqual(*(B2+i*nb+j), *(B+i*nb+j))){
+ cerr<<"B2 ["<<i<<", "<<j<<"] = "<<(*(B2+i*nb+j))
+ <<" ; B ["<<i<<", "<<j<<"] = "<<(*(B+i*nb+j))
+ <<endl;
+ wrong = true;
+ }
+
+ if (wrong) {
+ cerr<<"FAIL"<<endl;
+ //write_field (F,cerr<<"B2="<<endl,B2,m,n,n);
+ //write_field (F,cerr<<"B="<<endl,B,m,n,n);
+ }else{
+
+ cerr<<"PASS"<<endl;
+ }
+
+
+ FFLAS::fflas_delete( B2);
+#endif
+
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
+#if TIME
+ double mflops;
+ double cplx = (double)n*m*m-(double)m*m*m/3;
+ if (side == FFLAS::FflasLeft)
+ mflops = (cplx+(double)(2*R*R*n))/1000000.0*nbit/time;
+ else
+ mflops = (cplx+(double)(2*R*R*m))/1000000.0*nbit/time;
+ cerr<<"m,n,mb,nb = "<<m<<" "<<n<<" "<<mb<<" "<<nb<<". fgesv "
+ <<((side == FFLAS::FflasLeft)?" Left ":" Right ")
+ <<"over Z/"<<atoi(argv[1])<<"Z :"
+ <<endl
+ <<"t= "
+ << time/nbit
+ << " s, Mffops = "<<mflops
+ << endl;
+
+ cout<<m<<" "<<n<<" "<<mflops<<" "<<time/nbit<<endl;
+#endif
+}
diff --git a/tests/test-finit.C b/tests/test-finit.C
new file mode 100644
index 0000000..2a5fbf3
--- /dev/null
+++ b/tests/test-finit.C
@@ -0,0 +1,229 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK
+ * Written by :
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+// #define SIMD_INT
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <typeinfo>
+#include <givaro/modular.h>
+#include <givaro/modular-balanced.h>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#include "Matio.h"
+#include "test-utils.h"
+#include "assert.h"
+
+template<class Field>
+bool test_freduce (const Field & F, size_t m, size_t k, size_t n, bool timing)
+{
+ typedef typename Field::Element T ;
+ size_t repet = 3 ;
+
+ T * A = FFLAS::fflas_new<T>(m*n);
+ T * B = FFLAS::fflas_new<T>(m*n);
+
+ Givaro::ModularBalanced<T> E(101);
+
+ if (timing) std::cout << ">>>" << std::endl ;
+ if (timing) std::cout << "=== inc == 1 ===" << std::endl ;
+
+ FFLAS::Timer chrono, tim, tom ;
+ tim.clear(); tom.clear();
+ if (timing) F.write(std::cout << "Field ") << std::endl;
+ for (size_t b = 0 ; b < repet ; ++b) {
+ FFPACK::RandomMatrix(E,A,m,k,n);
+ // RandomMatrix(E,B,m,k,n);
+ FFLAS::fassign(E,m,k,A,n,B,n);
+
+ chrono.clear();chrono.start();
+ for (size_t i = 0 ; i < m ; ++i)
+ for (size_t j = 0 ; j < k ; ++j)
+ F.init(A[i*n+j],A[i*n+j]);
+ chrono.stop();
+ tim += chrono ;
+
+ chrono.clear();chrono.start();
+ FFLAS::freduce (F,m,k,B,n);
+ chrono.stop();
+ tom += chrono ;
+
+#if 1
+ for (size_t i =0 ; i < m ; ++i)
+ for (size_t j =0 ; j < k ; ++j)
+ if (! F.areEqual(B[i*n+j],A[i*n+j])) {
+ F.write(std::cout) << std::endl << i << ',' << j << " : ";
+ F.write(std::cout, B[i*n+j]) << "!= (ref)";
+ F.write(std::cout, A[i*n+j]) << std::endl;
+ return false ;
+ }
+#endif
+ }
+
+ if (timing) std::cout << " freduce (___): " << tim.usertime()/(double)repet << 's' << std::endl;
+ if (timing) std::cout << " freduce (AVX): " << tom.usertime()/(double)repet << 's'<< std::endl << std::endl;
+
+ if (timing) std::cout << "=== inc != 1 ===" << std::endl ;
+
+ tim.clear() ; tom.clear();
+ if (timing) F.write(std::cout << "Modular ") << std::endl;
+ for (size_t b = 0 ; b < repet ; ++b) {
+ FFPACK::RandomMatrix(E,A,m,n,n);
+ FFLAS::fassign(E,m,n,A,n,B,n);
+ size_t incX = 2 ;
+
+ chrono.clear();chrono.start();
+ for (size_t i = 1 ; i < m*n ; i += incX) {
+ F.init(A[i],A[i]);
+ }
+ chrono.stop();
+ tim += chrono ;
+
+ size_t cnt = (size_t)floor((double)(m*n)/(double)incX) ;
+
+ chrono.clear();chrono.start();
+ FFLAS::freduce (F,cnt,B+1,incX);
+ chrono.stop();
+ tom += chrono ;
+
+#if 1
+ for (size_t i =1 ; i < m*n ; i+=incX)
+ if (! F.areEqual(B[i],A[i])) {
+ F.write(std::cout) << std::endl << i << " : ";
+ F.write(std::cout, B[i]) << "!= (ref)";
+ F.write(std::cout, A[i]) << std::endl;
+ return false ;
+ }
+#endif
+
+ }
+
+ if (timing) std::cout << " freduce (___): " << tim.usertime()/(double)repet << 's' << std::endl;
+ if (timing) std::cout << " freduce (AVX): " << tom.usertime()/(double)repet << 's'<< std::endl << std::endl;
+
+ if (timing) std::cout << "<<<" << std::endl;
+
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( B);
+
+ return true;
+}
+
+int main(int ac, char **av) {
+ static size_t m = 297 ;
+ static size_t n = 301 ;
+ static size_t k = 299 ;
+ static uint64_t p = 7;
+ int seed = (int) time(NULL);
+ static bool timing = false ;
+
+ static Argument as[] = {
+ { 'p', "-p P", "Set the field characteristic.", TYPE_INT , &p },
+ { 'n', "-n N", "Set the number of cols in C.", TYPE_INT , &n },
+ { 'm', "-m N", "Set the number of rows in C.", TYPE_INT , &m },
+ { 'k', "-k N", "Set the number of rows in B.", TYPE_INT , &k },
+ { 's', "-s N", "Set the seed .", TYPE_INT , &seed },
+ { 't', "-timing", "Output timings" , TYPE_NONE, &timing},
+ END_OF_ARGUMENTS
+ };
+
+
+ FFLAS::parseArguments(ac,av,as);
+
+ if (n < k) {
+ std::cout << "Usage : m k n ; matrix of size m x k, lda is n" << std::endl;
+ return -1 ;
+ }
+
+ srand(seed);
+ srand48(seed);
+
+ bool pass = true ;
+ { /* freduce */
+ {
+ Givaro::Modular<float> F(p) ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<float> F(p) ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<double> F(p) ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<double> F(p) ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int32_t> F((int32_t)p) ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int32_t> F((int32_t)p) ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int64_t> F(p) ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int64_t> F(p) ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+#if 1
+ {
+ Givaro::ZRing<float> F ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<double> F ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int32_t> F;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int64_t> F ;
+ pass &= test_freduce (F,m,k,n,timing);
+ }
+#endif
+ }
+
+ return (pass?0:1) ;
+}
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
diff --git a/tests/test-compressQ.C b/tests/test-frobenius.C
similarity index 57%
copy from tests/test-compressQ.C
copy to tests/test-frobenius.C
index 9894fba..fb643ea 100644
--- a/tests/test-compressQ.C
+++ b/tests/test-frobenius.C
@@ -5,20 +5,20 @@
* Copyright (C) FFLAS-FFPACK
* Written by Clément Pernet
* This file is Free Software and part of FFLAS-FFPACK.
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -38,79 +38,54 @@
//#define DEBUG 0
#include <iostream>
+#include <iomanip>
#include <list>
#include <vector>
-#include "Matio.h"
-#include "utils/timer.h"
-#include "fflas-ffpack/field/modular-balanced.h"
-#include "fflas-ffpack/ffpack/ffpack.h"
-
-#include "fflas-ffpack/utils/args-parser.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/timer.h"
using namespace std;
+#include "givaro/modular.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
-typedef FFPACK:: Modular<double> Field;
+using namespace FFPACK;
+typedef Givaro::Modular<double> Field;
template<class T>
std::ostream& printvect(std::ostream& o, vector<T>& vect){
- for(size_t i=0; i < vect.size(); ++i)
+ for(size_t i=0; i < vect.size()-1; ++i)
o << vect[i] << " " ;
- return o << std::endl;
-}
-
-int main(int argc, char** argv)
-{
-
-
- static Argument as[] = {
- END_OF_ARGUMENTS
- };
-
- FFLAS::parseArguments(argc,argv,as);
-
- // int m,n;
-
- Field F(65521);
- size_t N = 17;
- double * A = new double[N*N];
- double * tmp = new double[N*N];
- size_t * deg = new size_t[N];
-
- for (size_t i=0; i<(size_t)N*N; ++i)
- A[i] = 0;
- for (size_t i=0; i<3; ++i)
- A[i+i*N] = 1;
-
- for (size_t i=3; i<6; ++i)
- A[i+1+i*N] = 1;
- for (size_t i=6; i<9; ++i)
- A[i+2+i*N] = 1;
-
- A[12+9*N] = 1;
- A[13+10*N] = 1;
- A[14+12*N] = 1;
- A[15+15*N] = 1;
- A[16+16*N] = 1;
- deg[0] = 4; deg[1] = 4; deg[2] = 4;deg[3] = 2; deg[4] = 1; deg[5] =2;
- for (size_t i=0; i<size_t(N); ++i)
- A[11+i*N] = A[7+i*N] = A[3+i*N] = double(i % 10);
-
- double * B = new double[N*N] ;
- FFLAS::fcopy(F,N*N,B,1,A,1);
-
- // write_field(F, cerr, A, N, N, N);
-
- FFPACK::CompressRowsQK (F, N, A+9*N, N, tmp, N, deg+3, 4, 3 );
-
- // write_field(F, cerr, A, N, N, N);
-
- FFPACK::DeCompressRowsQK (F, N, N-9, A+9*N, N, tmp, N, deg+3, 4, 3 );
-
- // write_field(F, cerr, A, N, N, N);
-
- for (size_t i = 0 ; i < (size_t)N * (size_t)N ; ++i)
- if (A[i] != B[i])
- return 1 ;
- return 0 ;
-
+ return o << vect[vect.size()-1] << std::endl;
+ }
+
+int main(int argc, char** argv){
+
+ int m,n;
+ cout<<setprecision(20);
+
+ if (argc!=4){
+ cerr<<"usage : test-frobenius <p> <A> <c>"<<endl
+ <<" to compute the frobenius normal form of the matrix A over Z/pZ, with conditonning parameter c"
+ <<endl;
+ exit(-1);
+ }
+ Field F( atoi(argv[1]) );
+ Field::Element one;
+ F.init(one, 1U);
+ Field::Element * A = read_field<Field> (F,argv[2],&m,&n);
+ size_t c = atoi(argv[3]);
+
+ std::list<vector<Field::Element> > frobForm;
+ FFLAS::Timer tim;
+ tim.clear();
+ tim.start();
+ FFPACK::CharpolyArithProg (F, frobForm, n, A, n, c);
+ tim.stop();
+ std::list<vector<Field::Element> >::iterator it = frobForm.begin();
+ while(it != frobForm.end()){
+ printvect (cout, *(it++));
+ }
+ cerr<<c<<" "<<tim.usertime()<<" "<<4.55*n*n/1000000.0*n/tim.usertime()<<endl;
+ FFLAS::fflas_delete( A);
+ return 0;
}
diff --git a/tests/test-fscal.C b/tests/test-fscal.C
new file mode 100644
index 0000000..95debfa
--- /dev/null
+++ b/tests/test-fscal.C
@@ -0,0 +1,332 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK
+ * Written by :
+ * Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <typeinfo>
+#include <givaro/modular-balanced.h>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#include "Matio.h"
+#include "test-utils.h"
+#include "assert.h"
+
+// using namespace FFPACK;
+using FFPACK::RandomMatrix ;
+using Givaro::ModularBalanced ;
+
+template<class Field>
+bool test_fscal(const Field & F, const typename Field::Element & alpha, size_t m, size_t k, size_t n, bool timing)
+{
+ typedef typename Field::Element T ;
+
+ T * A = FFLAS::fflas_new<T>(m*n);
+ T * C = FFLAS::fflas_new<T>(m*n);
+ T * D = FFLAS::fflas_new<T>(m*n);
+
+if (timing) std::cout << ">>>" << std::endl ;
+
+ size_t iter = 3 ;
+ FFLAS::Timer tim, tom, tam ;
+ tim.clear() ; tom.clear() ;
+ if (timing) F.write(std::cout << "Field ") << std::endl;
+ for (size_t b = 0 ; b < iter ; ++b) {
+ RandomMatrix(F,A,m,k,n);
+ RandomMatrix(F,C,m,k,n);
+ FFLAS::fassign(F,m,k,C,n,D,n);
+
+ tam.clear();tam.start();
+ for (size_t i = 0 ; i < m ; ++i)
+ for (size_t j = 0 ; j < k ; ++j)
+ F.mul(D[i*n+j],A[i*n+j],alpha);
+ tam.stop();
+ tim += tam ;
+
+ tam.clear();tam.start();
+ FFLAS::fscal(F,m,k,alpha,A,n,C,n);
+ tam.stop();
+ tom += tam ;
+
+#if 1
+ for (size_t i =0 ; i < m ; ++i)
+ for (size_t j =0 ; j < k ; ++j)
+ if (! F.areEqual(C[i*n+j],D[i*n+j])) {
+ if (timing) std::cout << i << ',' << j << " : " << C[i*n+j] << "!= (ref)" << D[i*n+j] << std::endl;
+ return false ;
+ }
+#endif
+ }
+ if (timing) std::cout << "fscal(___): " << tim.usertime()/(double)iter << 's' << std::endl;
+ if (timing) std::cout << "fscal (AVX): " << tom.usertime()/(double)iter << 's'<< std::endl;
+
+ if (timing) std::cout << "<<<" << std::endl;
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( C );
+ FFLAS::fflas_delete( D );
+
+ return true;
+}
+
+template<class Field>
+bool test_fscal(const Field & F, size_t m, size_t k, size_t n, bool timing)
+{
+ ModularBalanced<typename Field::Element> G(1234); // for alpha
+ bool pass = true ;
+ typename Field::Element alpha;
+ F.init(alpha,F.one);
+ pass &= test_fscal(F,alpha,m,k,n,timing);
+ F.init(alpha,F.mOne);
+ pass &= test_fscal(F,alpha,m,k,n,timing);
+ F.init(alpha,F.zero);
+ pass &= test_fscal(F,alpha,m,k,n,timing);
+ typename ModularBalanced<typename Field::Element>::RandIter RValue( G );
+ F.init(alpha,RValue.random(alpha));
+ pass &= test_fscal(F,alpha,m,k,n,timing);
+ F.init(alpha,RValue.random(alpha));
+ pass &= test_fscal(F,alpha,m,k,n,timing);
+
+ return pass ;
+}
+
+template<class Field>
+bool test_fscalin(const Field & F, const typename Field::Element & alpha, size_t m, size_t k, size_t n, bool timing)
+{
+ typedef typename Field::Element T ;
+
+ T * C = FFLAS::fflas_new<T>(m*n);
+ T * D = FFLAS::fflas_new<T>(m*n);
+
+ if (timing) std::cout << ">>>" << std::endl ;
+
+ size_t iter = 3 ;
+ FFLAS::Timer tim, tom, tam ;
+ tim.clear() ; tom.clear() ;
+ if (timing) F.write(std::cout << "Field ") << std::endl;
+ for (size_t b = 0 ; b < iter ; ++b) {
+ RandomMatrix(F,C,m,k,n);
+ FFLAS::fassign(F,m,k,C,n,D,n);
+
+ tam.clear();tam.start();
+ for (size_t i = 0 ; i < m ; ++i)
+ for (size_t j = 0 ; j < k ; ++j)
+ F.mulin(D[i*n+j],alpha);
+ tam.stop();
+ tim += tam ;
+
+ tam.clear();tam.start();
+ FFLAS::fscalin(F,m,k,alpha,C,n);
+ tam.stop();
+ tom += tam ;
+
+#if 1
+ for (size_t i =0 ; i < m ; ++i)
+ for (size_t j =0 ; j < k ; ++j)
+ if (! F.areEqual(C[i*n+j],D[i*n+j])) {
+ if (timing) std::cout << i << ',' << j << " : " << C[i*n+j] << "!= (ref)" << D[i*n+j] << std::endl;
+ return false ;
+ }
+#endif
+ }
+ if (timing) std::cout << "fscalin(___): " << tim.usertime()/(double)iter << 's' << std::endl;
+ if (timing) std::cout << "fscalin (AVX): " << tom.usertime()/(double)iter << 's'<< std::endl;
+
+ if (timing) std::cout << "<<<" << std::endl;
+ FFLAS::fflas_delete( C );
+ FFLAS::fflas_delete( D );
+
+ return true;
+}
+
+
+template<class Field>
+bool test_fscalin(const Field & F, size_t m, size_t k, size_t n, bool timing)
+{
+ ModularBalanced<typename Field::Element> G(1234); // for alpha
+ bool pass = true ;
+ typename Field::Element alpha;
+ F.init(alpha,F.one);
+ pass &= test_fscalin(F,alpha,m,k,n,timing);
+ F.init(alpha,F.mOne);
+ pass &= test_fscalin(F,alpha,m,k,n,timing);
+ F.init(alpha,F.zero);
+ pass &= test_fscalin(F,alpha,m,k,n,timing);
+ typename ModularBalanced<typename Field::Element>::RandIter RValue( G );
+ F.init(alpha,RValue.random(alpha));
+ pass &= test_fscalin(F,alpha,m,k,n,timing);
+ F.init(alpha,RValue.random(alpha));
+ pass &= test_fscalin(F,alpha,m,k,n,timing);
+
+ return pass ;
+}
+
+int main(int ac, char **av) {
+ static size_t m = 300 ;
+ static size_t n = 301 ;
+ static size_t k = 300 ;
+ static uint64_t p = 7;
+ int seed = (int) time(NULL);
+ static bool timing = false ;
+
+ static Argument as[] = {
+ { 'p', "-p P", "Set the field characteristic.", TYPE_INT , &p },
+ { 'n', "-n N", "Set the number of cols in C." , TYPE_INT , &n },
+ { 'm', "-m N", "Set the number of rows in C." , TYPE_INT , &m },
+ { 'k', "-k N", "Set the number of rows in B." , TYPE_INT , &k },
+ { 's', "-s N", "Set the seed." , TYPE_INT , &seed },
+ { 't', "-timing", "Output timings" , TYPE_NONE, &timing},
+ END_OF_ARGUMENTS
+ };
+
+
+ FFLAS::parseArguments(ac,av,as);
+
+ if (n < k) {
+ std::cout << "Usage : m k n ; matrix of size m x k, lda is n" << std::endl;
+ return -1 ;
+ }
+
+ srand(seed);
+ srand48(seed);
+
+ // std::cout << seed << std::endl;
+
+ bool pass = true ;
+ { /* fscal */
+ {
+ Givaro::Modular<float> F(p) ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<float> F(p) ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<double> F(p) ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<double> F(p) ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int32_t> F((int32_t)p) ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int32_t> F((int32_t)p) ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int64_t> F(p) ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int64_t> F(p) ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+#if 1
+ {
+ Givaro::ZRing<float> F ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<double> F ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int32_t> F;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int64_t> F ;
+ pass &= test_fscal(F,m,k,n,timing);
+ }
+#endif
+ }
+ { /* fscalin */
+ {
+ Givaro::Modular<float> F(p) ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<float> F(p) ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<double> F(p) ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<double> F(p) ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int32_t> F((int32_t)p) ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int32_t> F((int32_t)p) ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::Modular<int64_t> F(p) ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ModularBalanced<int64_t> F(p) ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+#if 1
+ {
+ Givaro::ZRing<float> F ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<double> F ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int32_t> F;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+ {
+ Givaro::ZRing<int64_t> F ;
+ pass &= test_fscalin(F,m,k,n,timing);
+ }
+#endif
+ }
+
+ return (pass?0:1) ;
+}
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
diff --git a/tests/test-fspmm-dlp.C b/tests/test-fspmm-dlp.C
new file mode 100644
index 0000000..05c734a
--- /dev/null
+++ b/tests/test-fspmm-dlp.C
@@ -0,0 +1,353 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+#define __DLP_CHALLENGE
+
+#include <iostream>
+#include <vector>
+#include <sstream>
+#include <cstdio>
+#include <cstdlib>
+
+#include "gmpxx.h"
+#include <givaro/zring.h>
+#include <givaro/modular.h>
+#include <givaro/modular-balanced.h>
+#include <givaro/givinteger.h>
+#include <recint/recint.h>
+#include <givaro/givintprime.h>
+
+#include "fflas-ffpack/fflas/fflas_sparse.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "fflas-ffpack/field/rns-integer-mod.h"
+#include "fflas-ffpack/fflas/fflas_sparse/read_sparse.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/flimits.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
+
+using namespace std;
+using namespace FFLAS;
+using namespace Givaro;
+
+using Data = std::vector<details_spmv::Coo<ZRing<double>>>;
+using Coo = typename Data::value_type;
+
+/*******************************************************************************************************************
+ *
+ * Utility functions: sms reader and random field
+ *
+ *******************************************************************************************************************/
+
+void readMat(string path, index_t *& row, index_t *& col, double *&val, index_t &rowdim, index_t &coldim, uint64_t & nnz){
+ std::ifstream file(path, std::ios::out);
+ std::string line, nnz_c;
+ std::getline(file, line);
+ std::istringstream(line) >> rowdim >> coldim >> nnz_c;
+ Data mat;
+ int64_t r, c, v;
+ while(std::getline(file, line)){
+ std::istringstream(line) >> r >> c >> v;
+ if(r!=0)
+ mat.emplace_back(v, r-1,c-1);
+ }
+ std::sort(mat.begin(), mat.end(),
+ [](Coo &a, Coo &b){
+ return (a.row < b.row) || ((a.row == b.row) && (a.col < b.col));
+ ;});
+ mat.shrink_to_fit();
+ nnz = mat.size();
+ val = FFLAS::fflas_new<double>(nnz, Alignment::CACHE_LINE);
+ col = FFLAS::fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ row = FFLAS::fflas_new<index_t>(nnz, Alignment::CACHE_LINE);
+ for(size_t i = 0 ; i < nnz ; ++i){
+ val[i] = mat[i].val;
+ col[i] = mat[i].col;
+ row[i] = mat[i].row;
+ }
+}
+
+template<class T>
+size_t bitSize(T n){
+ return sizeof(T)*4-__builtin_clz(n);
+}
+
+template<typename Field>
+Givaro::Integer maxFieldElt() {return (Givaro::Integer)Field::maxCardinality();}
+template<>
+Givaro::Integer maxFieldElt<Givaro::ZRing<Givaro::Integer>>() {return (Givaro::Integer)-1;}
+
+/*** Field chooser for test according to characteristic q and bitsize b ***/
+/* if q=-1 -> field is chosen randomly with a charateristic of b bits
+ if b=0 -> bitsize is chosen randomly according to maxFieldElt
+*/
+template<typename Field>
+Field* chooseField(Givaro::Integer q, uint64_t b){
+ Givaro::Integer maxV= maxFieldElt<Field>();
+ auto seed = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+ std::mt19937 mt_rand(seed);
+ if (maxV>0 && (q> maxV || b> maxV.bitsize()))
+ return nullptr;
+ if (b<=1){
+ //srand((double)std::chrono::high_resolution_clock::now());
+ auto bitrand = std::bind(std::uniform_int_distribution<uint64_t>(2,maxV.bitsize()-1),
+ mt_rand);
+ b = bitrand();
+ }
+ Givaro::IntPrimeDom IPD;
+ Givaro::Integer tmp,p;
+ if (q==-1){
+ // Choose characteristic as a random prime of b bits
+ do{
+ Givaro::Integer _p;
+ Givaro::Integer::seeding(Givaro::Integer(mt_rand()));
+ Givaro::Integer::random_exact_2exp(_p,b);
+ IPD.prevprime( tmp, _p+1 );
+ p = tmp;
+ }while( (p < 2) );
+ }
+ else p=q;
+
+ return new Field(p);
+}
+
+/*************************************************************************************************************/
+
+int main(int argc, char **argv) {
+ using Field = Modular<Integer>;
+ using FieldMat = ZRing<double>;
+ using FieldComp = FFPACK::RNSIntegerMod<FFPACK::rns_double_extended>;
+ using SparseMatrix = FFLAS::Sparse<FieldMat, FFLAS::SparseMatrix_t::CSR>;
+
+ Integer q = -1;
+ int b = 128;
+ int blockSize = 1;
+ std::string matrixFile = "";
+ int nIter = 100;
+
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'k', "-k K", "Set the size of the block (1 by default).", TYPE_INT, &blockSize },
+ { 'n', "-n N", "Number of iterations (1 by default).", TYPE_INT, &nIter },
+ { 'f', "-f FILE", "Set matrix file.", TYPE_STR, &matrixFile },
+ END_OF_ARGUMENTS };
+
+ FFLAS::parseArguments(argc, argv, as);
+
+ // Construct Givaro::Integer field
+ Field *F= chooseField<Field>(q,b);
+ if (F==nullptr) exit(0);
+ Integer p;
+ F->cardinality(p);
+ cout << "Prime p: " << p << endl;
+
+ // Pointers for the matrix
+ index_t *row = nullptr, *col = nullptr;
+ typename FieldMat::Element_ptr dat;
+ index_t rowdim, coldim;
+ uint64_t nnz;
+ // Field associate to the matrix
+ FieldMat Fword;
+
+ // Read the matrix
+ readMat(matrixFile, row, col, dat, rowdim, coldim, nnz);
+
+ vector<int64_t> rows(rowdim, 0);
+ for(size_t i = 0 ; i < nnz ; ++i)
+ rows[row[i]]++;
+ for(size_t i = 0 ; i < 20 ; ++i)
+ cout << "#rows with "<<i<<" nnz: " << std::count(rows.begin(), rows.end(), i) << endl;
+
+ // Build the matrix
+ SparseMatrix A;
+ FFLAS::sparse_init(Fword, A, row, col, dat, rowdim, coldim, nnz);
+
+ FFLAS::fflas_delete(row);
+ FFLAS::fflas_delete(col);
+ FFLAS::fflas_delete(dat);
+
+ vector<double> x(coldim, 1), y(rowdim, 0);
+
+ cout.precision(20);
+
+ // Compute the bigger row
+ FFLAS::fspmv(Fword, A, x.data(), 0, y.data());
+ for(auto &x: y){
+ if(x < 0){
+ x = -x;
+ }
+ }
+ double maxSum = *(std::max_element(y.begin(), y.end()));
+ cout << "maxSum: " << maxSum << endl;
+
+ // Compute the bitsize of the RNS primes
+ size_t primeBitsize = 53 - Integer(maxSum).bitsize()-1;
+ cout << "primeBitsize: " << primeBitsize << endl;
+ // construct RNS
+ // primeBitsize = 23;
+ FFPACK::rns_double_extended RNS(Integer(maxSum)*p, primeBitsize, true, 0);
+ size_t rnsSize = RNS._size;
+ cout << "M: " << RNS._M << endl;
+ cout << "RNS basis size: " << rnsSize << endl;
+ cout << "Rns basis: ";
+ for(auto&x:RNS._basis){
+ cout << x << " ";
+ }
+ cout << endl;
+ cout << "RNS Mi: " << endl;
+ for(auto &x : RNS._Mi){
+ cout << x << " ";
+ }
+ cout << endl;
+ cout << "RNS MMi: " << endl;
+ for(auto &x : RNS._MMi){
+ cout << x << " ";
+ }
+ cout << endl;
+ // construct RNS field
+ FieldComp Frns(p,RNS);
+
+ std::vector<Integer> X(coldim*blockSize), Y(rowdim*blockSize, 0);
+
+ // Fill X with random values
+ for(auto &x: X){
+ Givaro::Integer::random_exact_2exp(x,b);
+ F->init(x, x);
+ }
+
+ size_t ld = 0;
+ Integer maxRep = Integer(maxSum)*rnsSize*p;
+ while(maxRep.bitsize() < RNS._M.bitsize()){
+ maxRep *= Integer(maxSum);
+ ld++;
+ }
+ ld -= 1;
+ cout << "Spmm by modp: " << ld << endl;
+
+ double* Xrns = fflas_new<double>(coldim*blockSize*rnsSize, Alignment::CACHE_LINE);
+ double* Yrns = fflas_new<double>(rowdim*blockSize*rnsSize, Alignment::CACHE_LINE);
+
+ // Transform X in RNS
+ RNS.init(coldim*blockSize, Xrns, X.data(), 1);
+
+ cout << endl;
+ TTimer Tspmm;
+ TTimer Tmodp;
+ TTimer Ttotal;
+ double spmmTime = 0, modpTime = 0;
+ bool bb = true;
+ Ttotal.start();
+ for(size_t kk = 1 ; kk <= nIter ; ++kk){
+ // perform Yrns = A.Xrns + beta.Yrns over ZZ
+ Tspmm.start();
+ if(bb){
+ pfspmm(Fword, A, blockSize*rnsSize, Xrns, blockSize*rnsSize, 0, Yrns, blockSize*rnsSize);
+ RNS.reduce(rowdim*blockSize, Yrns, 1, true);
+ // reduce Yrns wrt the RNS basis
+ Tspmm.stop();
+ spmmTime += Tspmm.usertime();
+
+ cout << "after spmm:" << endl;
+ for(size_t i = 0, end = (Y.size()>20)?20:Y.size() ; i < end ; ++i){
+ cout << Yrns[i] << " ";
+ }
+ cout << endl;
+ bb = !bb;
+ // if(kk%ld == 0){
+ Tmodp.start();
+ Frns.reduce_modp_rnsmajor_scal_quad(rowdim*blockSize, FFPACK::rns_double_elt_ptr(Yrns, 1));
+ Tmodp.stop();
+ modpTime += Tmodp.usertime();
+ cout << "after modp:" << endl;
+ for(size_t i = 0, end = (Y.size()>20)?20:Y.size() ; i < end ; ++i){
+ cout << Yrns[i] << " ";
+ }
+ cout << endl;
+ // }
+ }else{
+ fspmm(Fword, A, blockSize*rnsSize, Yrns, blockSize*rnsSize, 0, Xrns, blockSize*rnsSize);
+ RNS.reduce(rowdim*blockSize, Xrns, 1, true);
+ // reduce Yrns wrt the RNS basis
+ Tspmm.stop();
+ spmmTime += Tspmm.usertime();
+ bb = !bb;
+ for(size_t i = 0, end = (Y.size()>20)?20:Y.size() ; i < end ; ++i){
+ cout << Xrns[i] << " ";
+ }
+ cout << endl;
+ // if(kk%ld == 0){
+ Tmodp.start();
+ Frns.reduce_modp_rnsmajor_scal_quad(rowdim*blockSize, FFPACK::rns_double_elt_ptr(Xrns, 1));
+ Tmodp.stop();
+ modpTime += Tmodp.usertime();
+ // }
+ cout << "after modp:" << endl;
+ for(size_t i = 0, end = (Y.size()>20)?20:Y.size() ; i < end ; ++i){
+ cout << Xrns[i] << " ";
+ }
+ cout << endl;
+ }
+ }
+ // if(bb && nIter%ld != 0){
+ // Tmodp.start();
+ // Frns.reduce_modp_rnsmajor_scal_quad(rowdim*blockSize, FFPACK::rns_double_elt_ptr(Yrns, 1));
+ // Tmodp.stop();
+ // modpTime += Tmodp.usertime();
+ // }else if(!bb && nIter%ld != 0){
+ // Tmodp.start();
+ // Frns.reduce_modp_rnsmajor_scal_quad(rowdim*blockSize, FFPACK::rns_double_elt_ptr(Xrns, 1));
+ // Tmodp.stop();
+ // modpTime += Tmodp.usertime();
+ // }
+
+ // Reconstruct Y from Yrns
+ RNS.convert(rowdim*blockSize, Y.data(), Yrns);
+ Ttotal.stop();
+ for(size_t i = 0 ; i < rowdim*blockSize ; ++i){
+ if(Y[i] < 0){
+ Integer q = -Y[i] / p;
+ Y[i] = p - (-Y[i] - p*q);
+ }
+ Y[i] %= p;
+ }
+ cout << "Y res:" << endl;
+ for(size_t i = 0, end = (Y.size()>20)?20:Y.size() ; i < end ; ++i){
+ cout << Y[i] << " ";
+ }
+ cout << endl;
+ cout << nIter << " iterations in " << Ttotal << endl;
+ cout << "spmm: " << spmmTime << endl;
+ cout << "modp: " << modpTime << endl;
+
+ FFLAS::fflas_delete(Xrns);
+ FFLAS::fflas_delete(Yrns);
+
+ return 0;
+}
+
diff --git a/tests/test-fspmm-recint.C b/tests/test-fspmm-recint.C
new file mode 100644
index 0000000..9d283d0
--- /dev/null
+++ b/tests/test-fspmm-recint.C
@@ -0,0 +1,177 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (c) FFLAS-FFPACK
+* Written by Bastien Vialla <bastien.vialla at lirmm.fr>
+* ========LICENCE========
+* This file is part of the library FFLAS-FFPACK.
+*
+* FFLAS-FFPACK is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+* ========LICENCE========
+*/
+
+#define __DLP_CHALLENGE
+
+#include <iostream>
+#include <vector>
+#include <sstream>
+#include <cstdio>
+#include <cstdlib>
+
+#include "gmpxx.h"
+#include <givaro/zring.h>
+#include <givaro/modular.h>
+#include <givaro/modular-balanced.h>
+#include <givaro/givinteger.h>
+#include <recint/recint.h>
+#include <givaro/givintprime.h>
+
+using namespace RecInt;
+
+#include "fflas-ffpack/fflas/fflas_sparse.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "fflas-ffpack/field/rns-integer-mod.h"
+#include "fflas-ffpack/fflas/fflas_sparse/read_sparse.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/flimits.h"
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+typedef FFLAS::OMPTimer TTimer;
+#else
+typedef FFLAS::Timer TTimer;
+#endif
+
+using namespace std;
+using namespace FFLAS;
+using namespace Givaro;
+
+/*******************************************************************************************************************
+ *
+ * Utility functions: sms reader and random field
+ *
+ *******************************************************************************************************************/
+
+template<class T>
+size_t bitSize(T n){
+ return sizeof(T)*4-__builtin_clz(n);
+}
+
+template<typename Field>
+Givaro::Integer maxFieldElt() {return (Givaro::Integer)Field::maxCardinality();}
+template<>
+Givaro::Integer maxFieldElt<Givaro::ZRing<Givaro::Integer>>() {return (Givaro::Integer)-1;}
+
+/*** Field chooser for test according to characteristic q and bitsize b ***/
+/* if q=-1 -> field is chosen randomly with a charateristic of b bits
+ if b=0 -> bitsize is chosen randomly according to maxFieldElt
+*/
+template<typename Field>
+Field* chooseField(Givaro::Integer q, uint64_t b){
+ Givaro::Integer maxV= maxFieldElt<Field>();
+ auto seed = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+ std::mt19937 mt_rand(seed);
+ if (maxV>0 && (q> maxV || b> maxV.bitsize()))
+ return nullptr;
+ if (b<=1){
+ //srand((double)std::chrono::high_resolution_clock::now());
+ auto bitrand = std::bind(std::uniform_int_distribution<uint64_t>(2,maxV.bitsize()-1),
+ mt_rand);
+ b = bitrand();
+ }
+ Givaro::IntPrimeDom IPD;
+ Givaro::Integer tmp,p;
+ if (q==-1){
+ // Choose characteristic as a random prime of b bits
+ do{
+ Givaro::Integer _p;
+ Givaro::Integer::seeding(Givaro::Integer(mt_rand()));
+ Givaro::Integer::random_exact_2exp(_p,b);
+ IPD.prevprime( tmp, _p+1 );
+ p = tmp;
+ }while( (p < 2) );
+ }
+ else p=q;
+
+ return new Field(p);
+}
+
+/*************************************************************************************************************/
+
+int main(int argc, char **argv) {
+ using Field = Modular<Integer>;
+ using FieldMat = ZRing<double>;
+ using FieldComp = FFPACK::RNSIntegerMod<FFPACK::rns_double>;
+ using FieldElement = RecInt::rmint<7>;
+ using FieldRec = ZRing<FieldElement>;
+ using SparseMatrix = FFLAS::Sparse<FieldRec, FFLAS::SparseMatrix_t::HYB_ZO>;
+
+ Integer q = -1;
+ int b = 128;
+ int blockSize = 1;
+ std::string matrixFile = "";
+ int nIter = 100;
+
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the random characteristic.", TYPE_INT , &b },
+ { 'k', "-k K", "Set the size of the block (1 by default).", TYPE_INT, &blockSize },
+ { 'n', "-n N", "Set the size of the block (1 by default).", TYPE_INT, &nIter },
+ { 'f', "-f FILE", "Set matrix file.", TYPE_STR, &matrixFile },
+ END_OF_ARGUMENTS };
+
+ FFLAS::parseArguments(argc, argv, as);
+
+ // Construct Givaro::Integer field
+ Field *F= chooseField<Field>(q,b);
+ if (F==nullptr) exit(0);
+ Integer p;
+ F->cardinality(p);
+ cout << "Prime p: " << p << endl;
+
+ RecInt::ruint<7> pRec;
+ // RecInt::mpz_to_ruint(pRec, FieldElement(p));
+ FieldElement::init_module(ruint<7>(p));
+ FieldRec Frec;
+ // Pointers for the matrix
+ index_t *row = nullptr, *col = nullptr;
+ typename FieldRec::Element_ptr dat;
+ index_t rowdim, coldim;
+ uint64_t nnz;
+
+ // Read the matrix
+ readSmsFormat(matrixFile, Frec, row, col, dat, rowdim, coldim, nnz);
+ vector<index_t> rowCoo(nnz, 0);
+ for(size_t i = 0 ; i < rowdim ; ++i){
+ for(size_t j = row[i] ; j < row[i+1] ; ++j){
+ rowCoo[j] = i;
+ }
+ }
+
+ // Build the matrix
+ SparseMatrix A;
+ FFLAS::sparse_init(Frec, A, rowCoo.data(), col, dat, rowdim, coldim, nnz);
+
+ FFLAS::fflas_delete(row);
+ FFLAS::fflas_delete(col);
+ FFLAS::fflas_delete(dat);
+ rowCoo.resize(0);
+
+ vector<FieldElement> x(coldim*blockSize, 1), y(rowdim*blockSize, 0);
+
+ pfspmm(Frec, A, blockSize, x.data(), blockSize, 0, y.data(), blockSize);
+
+ return 0;
+}
+
diff --git a/tests/test-fsquare.C b/tests/test-fsquare.C
new file mode 100644
index 0000000..5918f4f
--- /dev/null
+++ b/tests/test-fsquare.C
@@ -0,0 +1,102 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet <clement.pernet at imag.fr>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for fsquare : 1 computation
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+//#define DEBUG 0
+#define TIME 1
+
+#include <iomanip>
+#include <iostream>
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/fflas/fflas.h"
+
+
+using namespace FFPACK;
+using namespace std;
+
+typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+
+ int n;
+
+ cerr<<setprecision(10);
+ if (argc != 6) {
+ cerr<<"Usage : test-fsquare <p> <A> <i>"
+ <<"<alpha> <beta>"
+ <<" to do i computations of C <- AA"
+ <<endl;
+ exit(-1);
+ }
+ Field F(atoi(argv[1]));
+
+ Field::Element * A;
+ Field::Element * C;
+ // size_t lda;
+ // size_t ldb;
+
+ A = read_field(F,argv[2],&n,&n);
+ int nbit=atoi(argv[3]); // number of times the product is performed
+
+ Field::Element alpha,beta;
+ F.init (alpha, (double)atoi(argv[4]));
+ F.init (beta, (double)atoi(argv[5]));
+
+ C = FFLAS::fflas_new<Field::Element>(n*n);
+ FFLAS::Timer tim,t; t.clear();tim.clear();
+ for(int i = 0;i<nbit;++i){
+ t.clear();
+ t.start();
+ FFLAS::fsquare (F, FFLAS::FflasNoTrans,n, alpha, A,n, beta, C, n);
+ t.stop();
+ tim+=t;
+ }
+
+#if TIME
+ double mflops = (2.0*(n*n/1000000.0)*nbit*n/tim.usertime());
+ cerr << n <<"x" <<n <<" : fsquare over Z/"
+ <<atoi(argv[1])<<"Z : [ "
+ <<mflops<<" MFops in "<<tim.usertime()/nbit<<"s]"
+ << endl;
+
+ cerr<<"alpha, beta = "<<alpha <<", "<<beta <<endl;
+
+ cout<<n<<" "<<n<<" "<<mflops<<" "<<tim.usertime()/nbit<<endl;
+#endif
+}
+
+
diff --git a/tests/test-ftrsm.C b/tests/test-ftrsm.C
new file mode 100644
index 0000000..ec4d311
--- /dev/null
+++ b/tests/test-ftrsm.C
@@ -0,0 +1,231 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+#define __FFLASFFPACK_SEQUENTIAL
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <givaro/modular-integer.h>
+
+#include <iomanip>
+#include <iostream>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "test-utils.h"
+#include <givaro/modular.h>
+#include <givaro/modular-balanced.h>
+
+
+using namespace std;
+using namespace FFPACK;
+using Givaro::Modular;
+using Givaro::ModularBalanced;
+
+template<typename T>
+void write_matrix(Givaro::Integer p, size_t m, size_t n, T* C, size_t ldc){
+
+ size_t www=(p.bitsize()*log(2.))/log(10.);
+ for (size_t i=0;i<m;++i){
+ cout<<"[ ";
+ cout.width(www+1);
+ cout<<std::right<<C[i*ldc];
+ for (size_t j=1;j<n;++j){
+ cout<<" ";
+ cout.width(www);
+ cout<<std::right<<C[i*ldc+j];
+ }
+ cout<<"]"<<endl;
+ }
+ cout<<endl;
+
+}
+
+
+template<typename Field>
+bool check_ftrsm (const Field &F, size_t m, size_t n, const typename Field::Element &alpha, FFLAS::FFLAS_SIDE side, FFLAS::FFLAS_UPLO uplo, FFLAS::FFLAS_TRANSPOSE trans, FFLAS::FFLAS_DIAG diag){
+
+ typedef typename Field::Element Element;
+ Element * A, *B, *B2, *C, tmp;
+ size_t k = (side==FFLAS::FflasLeft?m:n);
+ size_t lda,ldb,ldc;
+ lda=k+13;
+ ldb=n+14;
+ ldc=n+15;
+ A = FFLAS::fflas_new(F,k,lda);
+ B = FFLAS::fflas_new(F,m,ldb);
+ B2 = FFLAS::fflas_new(F,m,ldb);
+ C = FFLAS::fflas_new(F,m,ldc);
+
+ typename Field::RandIter Rand(F);
+ typename Field::NonZeroRandIter NZRand(Rand);
+
+ for (size_t i=0;i<k;++i){
+ for (size_t j=0;j<i;++j)
+ A[i*lda+j]= (uplo == FFLAS::FflasLower)? Rand.random(tmp) : F.zero;
+ A[i*lda+i]= (diag == FFLAS::FflasNonUnit)? NZRand.random(tmp) : F.one;
+ for (size_t j=i+1;j<k;++j)
+ A[i*lda+j]= (uplo == FFLAS::FflasUpper)? Rand.random(tmp) : F.zero;
+ }
+ for (size_t i=0;i<m;++i){
+ for(size_t j=0; j<n; ++j){
+ B[i*ldb+j]= Rand.random(tmp);
+ B2[i*ldb+j]=B[i*ldb+j];
+ }
+ }
+
+ string ss=string((uplo == FFLAS::FflasLower)?"Lower_":"Upper_")+string((side == FFLAS::FflasLeft)?"Left_":"Right_")+string((trans == FFLAS::FflasTrans)?"Trans_":"NoTrans_")+string((diag == FFLAS::FflasUnit)?"Unit":"NonUnit");
+
+ cout<<std::left<<"Checking FTRSM_";
+ cout.fill('.');
+ cout.width(35);
+ cout<<ss;
+
+
+ FFLAS::Timer t; t.clear();
+ double time=0.0;
+ t.clear();
+ t.start();
+ FFLAS::ftrsm (F, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
+ t.stop();
+ time+=t.usertime();
+
+ Element invalpha;
+ F.init(invalpha);
+ F.inv(invalpha, alpha);
+
+ //FFLAS::ftrmm (F, side, uplo, trans, diag, m, n, invalpha, A, k, B, n);
+
+ if (side == FFLAS::FflasLeft)
+ FFLAS::fgemm(F, trans, FFLAS::FflasNoTrans, m, n, m, invalpha, A, lda, B, ldb, F.zero, C, ldc);
+ else
+ FFLAS::fgemm(F, FFLAS::FflasNoTrans, trans, m, n, n, invalpha, B, ldb, A, lda, F.zero, C, ldc);
+
+
+ bool wrong = false;
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<n;++j)
+ if ( !F.areEqual(*(B2+i*ldb+j), *(C+i*ldc+j))){
+ wrong = true;
+ }
+ if ( wrong ){
+ //cout << "\033[1;31mFAILED\033[0m ("<<time<<")"<<endl;
+ cout << "FAILED ("<<time<<")"<<endl;
+ //cerr<<"FAILED ("<<time<<")"<<endl;
+
+ } else
+ //cout << "\033[1;32mPASSED\033[0m ("<<time<<")"<<endl;
+ cout << "PASSED ("<<time<<")"<<endl;
+ //cerr<<"PASSED ("<<time<<")"<<endl;
+
+ F.mulin(invalpha,alpha);
+ if (!F.isOne(invalpha)){
+ cerr<<"invalpha is wrong !!!"<<endl;;
+ }
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(B);
+ FFLAS::fflas_delete(B2);
+ FFLAS::fflas_delete(C);
+ return !wrong;
+}
+template <class Field>
+bool run_with_field (Givaro::Integer q, size_t b, size_t m, size_t n, int s, size_t iters){
+ bool ok = true ;
+ int nbit=(int)iters;
+
+ while (ok && nbit){
+ //typedef typename Field::Element Element ;
+ // choose Field
+ Field* F= chooseField<Field>(q,b);
+ if (F==nullptr)
+ return true;
+
+ typename Field::Element alpha;
+ F->init (alpha, (typename Field::Element)s);
+ cout<<"Checking with ";F->write(cout)<<endl;
+
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasLeft,FFLAS::FflasLower,FFLAS::FflasNoTrans,FFLAS::FflasUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasLeft,FFLAS::FflasUpper,FFLAS::FflasNoTrans,FFLAS::FflasUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasLeft,FFLAS::FflasLower,FFLAS::FflasTrans,FFLAS::FflasUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasLeft,FFLAS::FflasUpper,FFLAS::FflasTrans,FFLAS::FflasUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasRight,FFLAS::FflasLower,FFLAS::FflasNoTrans,FFLAS::FflasUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasRight,FFLAS::FflasUpper,FFLAS::FflasNoTrans,FFLAS::FflasUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasRight,FFLAS::FflasLower,FFLAS::FflasTrans,FFLAS::FflasUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasRight,FFLAS::FflasUpper,FFLAS::FflasTrans,FFLAS::FflasUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasLeft,FFLAS::FflasLower,FFLAS::FflasNoTrans,FFLAS::FflasNonUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasLeft,FFLAS::FflasUpper,FFLAS::FflasNoTrans,FFLAS::FflasNonUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasLeft,FFLAS::FflasLower,FFLAS::FflasTrans,FFLAS::FflasNonUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasLeft,FFLAS::FflasUpper,FFLAS::FflasTrans,FFLAS::FflasNonUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasRight,FFLAS::FflasLower,FFLAS::FflasNoTrans,FFLAS::FflasNonUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasRight,FFLAS::FflasUpper,FFLAS::FflasNoTrans,FFLAS::FflasNonUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasRight,FFLAS::FflasLower,FFLAS::FflasTrans,FFLAS::FflasNonUnit);
+ ok = ok && check_ftrsm(*F,m,n,alpha,FFLAS::FflasRight,FFLAS::FflasUpper,FFLAS::FflasTrans,FFLAS::FflasNonUnit);
+ nbit--;
+ delete F;
+ }
+ return ok;
+}
+
+int main(int argc, char** argv)
+{
+ cerr<<setprecision(10);
+ static Givaro::Integer q=-1;
+ static size_t b=0;
+ static size_t m=128;
+ static size_t n=128;
+ static size_t s=1;
+ static size_t iters=1;
+ static bool loop=false;
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the field characteristic.", TYPE_INT , &b },
+ { 'm', "-m M", "Set the row dimension of unknown matrix.", TYPE_INT , &m },
+ { 'n', "-n N", "Set the column dimension of the unknown matrix.", TYPE_INT , &n },
+ { 's', "-s S", "Set the scaling of trsm", TYPE_INT , &s },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ { 'l', "-loop Y/N", "run the test in an infinite loop.", TYPE_BOOL , &loop },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ bool ok = true;
+ do{
+ ok &= run_with_field<Modular<double> >(q,b,m,n,s,iters);
+ ok &= run_with_field<ModularBalanced<double> >(q,b,m,n,s,iters);
+ ok &= run_with_field<Modular<float> >(q,b,m,n,s,iters);
+ ok &= run_with_field<ModularBalanced<float> >(q,b,m,n,s,iters);
+ ok &= run_with_field<Modular<int32_t> >(q,b,m,n,s,iters);
+ ok &= run_with_field<ModularBalanced<int32_t> >(q,b,m,n,s,iters);
+ ok &= run_with_field<Modular<int64_t> >(q,b,m,n,s,iters);
+ ok &= run_with_field<ModularBalanced<int64_t> >(q,b,m,n,s,iters);
+ ok &= run_with_field<Modular<Givaro::Integer> >(q,(b?b:512),m/4+1,n/4+1,s,iters);
+ } while (loop && ok);
+
+ return !ok ;
+}
diff --git a/tests/test-ftrtri.C b/tests/test-ftrtri.C
new file mode 100644
index 0000000..340534c
--- /dev/null
+++ b/tests/test-ftrtri.C
@@ -0,0 +1,131 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet <clement.pernet at imag.fr>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+//--------------------------------------------------------------------------
+// Test for ftrtri : 1 computation
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+#define DEBUG 1
+#define TIME 1
+
+#include <iomanip>
+#include <iostream>
+#include "givaro/modular-balanced.h"
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+
+
+using namespace std;
+using namespace FFPACK;
+
+typedef Givaro::ModularBalanced<double> Field;
+
+int main(int argc, char** argv)
+{
+
+ int n;
+ int nbit=atoi(argv[3]); // number of times the product is performed
+ cerr<<setprecision(10);
+
+ if (argc != 4) {
+ cerr<<"Usage : test-ftrtri <p> <A> <<i>"
+ <<endl
+ <<" to invert a triangular matrix A mod p (i computations)"
+ <<endl;
+ exit(-1);
+ }
+ Field F(atoi(argv[1]));
+ Field::Element * A,*Ab;
+ A = read_field(F,argv[2],&n,&n);
+ Ab = FFLAS::fflas_new<Field::Element>(n*n);
+
+ for (int i=0; i<n;++i){
+ for(int j=i+1; j<n; ++j)
+ F.assign(*(Ab+i*n+j),*(A+i*n+j));
+ F.assign(*(Ab+i*(n+1)), 1.0);
+ for(int j=0; j<i; ++j)
+ F.assign(*(Ab+i*n+j),0.0);
+ }
+
+
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(n*n);
+
+ FFLAS::Timer tim,t; t.clear();tim.clear();
+
+ for(int i = 0;i<nbit;++i){
+ t.clear();
+ t.start();
+// FFPACK::trinv_left (F, n, A, n, X, n);
+ FFPACK::ftrtri(F, FFLAS::FflasUpper, FFLAS::FflasUnit, n, A, n);
+ t.stop();
+ tim+=t;
+ if (i+1<nbit)
+ for (int i=0; i<n*n;++i)
+ F.assign(*(A+i),*(Ab+i));
+ }
+
+#if DEBUG
+ FFLAS::ftrmm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasUnit,
+ n, n, 1.0,
+ A,n, Ab, n);
+ bool wrong = false;
+
+ for (int i=0;i<n;++i)
+ for (int j=0;j<n;++j)
+ if ( ((i!=j) && !F.isZero(*(Ab+i*n+j)))
+ ||((i==j) &&!F.isOne(*(Ab+i*n+j))))
+ wrong = true;
+
+ if ( wrong ){
+ cerr<<"FAIL"<<endl;
+ write_field (F,cerr<<"Ab="<<endl,Ab,n,n,n);
+ //write_field (F,cerr<<"X="<<endl,X,n,n,n);
+ }else{
+
+ cerr<<"PASS"<<endl;
+ }
+#endif
+
+#if TIME
+ double gflops = 1.0/3.0*(n/1000.0*n/1000000.0)*nbit*n/tim.usertime();
+ cerr<<"n = "<<n<<" Inversion over Z/"<<atoi(argv[1])<<"Z : t= "
+ << tim.usertime()/nbit
+ << " s, Gfops = "<<gflops
+ << endl;
+
+ cout<<n<<" "<<gflops<<" "<<tim.usertime()/nbit<<endl;
+#endif
+}
diff --git a/tests/test-fullranksubmatrix.C b/tests/test-fullranksubmatrix.C
new file mode 100644
index 0000000..f69a232
--- /dev/null
+++ b/tests/test-fullranksubmatrix.C
@@ -0,0 +1,82 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for rank
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+#include <iomanip>
+#include <iostream>
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+
+
+using namespace std;
+using namespace FFPACK;
+
+typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+
+ int n,m;
+ cerr<<setprecision(10);
+ if (argc != 3) {
+ cerr<<"Usage : test-fullranksubmatrix <p> <A> <<i>"
+ <<endl;
+ exit(-1);
+ }
+ Field F (atoi(argv[1]));
+ Field::Element * A;
+ Field::Element * X;
+
+ A = read_field(F,argv[2],&m ,&n);
+ write_field (F, cerr<<"A = "<<endl, A, m, n, n);
+
+ FFLAS::Timer tim,t; t.clear();tim.clear();
+ size_t R;
+
+ FFPACK::ColRankProfileSubmatrix (F, m, n, A, n, X, R);
+
+ write_field (F, cerr<<"X = "<<endl, X, (int) R, (int) R, (int) R);
+
+ size_t r2 = FFPACK::Rank(F, R,R, X, R);
+ if (r2 != R)
+ std::cerr<<"Fail : Rank (X) != Rank (A)"<<std::endl;
+
+
+
+ FFLAS::fflas_delete(X);
+ FFLAS::fflas_delete(A);
+}
diff --git a/tests/test-igemm.C b/tests/test-igemm.C
new file mode 100644
index 0000000..1c11b6b
--- /dev/null
+++ b/tests/test-igemm.C
@@ -0,0 +1,328 @@
+
+//#define SIMD_INT
+
+#include "fflas-ffpack/fflas-ffpack.h"
+#include "fflas-ffpack/fflas/fflas_igemm/igemm.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "fflas-ffpack/utils/timer.h"
+
+#include <givaro/udl.h>
+
+// COL_MAJOR true not supported in test. To be updated.
+#define COL_MAJOR false
+#define LEAD_GEN true
+#define DISPLAY false
+#define TRUST_FGEMM false
+
+using namespace FFLAS;
+
+int test_igemm(size_t m, size_t n, size_t k, enum FFLAS_TRANSPOSE tA, enum FFLAS_TRANSPOSE tB, int a_scal, int b_scal, bool timing)
+{
+
+
+ FFLAS::Timer tim;
+
+ srand((unsigned int)time(NULL));
+ typedef Givaro::Modular<Givaro::Integer> IField ;
+ IField Z(1_ui64<<63);
+
+ size_t ra = (tA==FflasNoTrans) ? m : k ;
+ size_t ca = (tA==FflasNoTrans) ? k : m ;
+ size_t rb = (tB==FflasNoTrans) ? k : n;
+ size_t cb = (tB==FflasNoTrans) ? n : k;
+
+ size_t lda = ca ;
+ size_t ldb = cb ; // n
+ size_t ldc = n ; // n
+
+#if COL_MAJOR
+ size_t ldA = m;//+rand() % 3 ; // m
+ size_t ldB = k;//+rand() % 3 ; // k
+ size_t ldC = m;//+rand() % 3 ; // m
+#else
+ size_t ldA = ca ; // k
+ size_t ldB = cb ; // n
+ size_t ldC = n ; // n
+#endif
+
+#if LEAD_GEN
+ lda += rand() % 5;
+ ldb += rand() % 5;
+ ldc += rand() % 5;
+ ldA += rand() % 5;
+ ldB += rand() % 5;
+ ldC += rand() % 5;
+#endif
+
+
+ int seed=0;
+ typename IField::RandIter Rand(Z,seed);
+ // typename IField::RandIter Rand(Z,seed);
+
+ IField::Element_ptr A,B,C,D;
+ C= FFLAS::fflas_new(Z,m,ldc);
+ D= FFLAS::fflas_new(Z,m,n);
+ A= FFLAS::fflas_new(Z,ra,lda);
+ B= FFLAS::fflas_new(Z,rb,ldb) ;
+
+ for (size_t i=0;i<ra;++i)
+ for (size_t j=0;j<ca;++j)
+ // Rand.random(A[i*lda+j]);
+ A[i*lda+j] = rand() % 10;
+ for (size_t i=0;i<rb;++i)
+ for (size_t j=0;j<cb;++j)
+ // Rand.random(B[i*ldb+j]);
+ B[i*ldb+j] = rand() % 10;
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<n;++j)
+ // Rand.random(C[i*ldc+j]);
+ D[i*n+j]=C[i*ldc+j] = 0 ; //rand() % 10;
+
+#if DISPLAY
+ write_field(Z,std::cout << "A:=", A, (int)ra, (int)ca, (int)lda,true,false) <<';' <<std::endl;
+ // write_field(Z,std::cout << "A:=", A, (int)ra, (int)ca, (int)lda,true,(tA==FflasTrans)) <<';'<<std::endl;
+ write_field(Z,std::cout << "B:=", B, (int)rb, (int)cb, (int)ldb,true,false) <<';' <<std::endl;
+ // write_field(Z,std::cout << "B:=", B, (int)rb, (int)cb, (int)ldb,true,(tB==FflasTrans)) <<';' <<std::endl;
+#endif
+
+
+
+ IField::Element alpha,beta ;
+ alpha = (IField::Element) a_scal ;
+ beta = (IField::Element) b_scal ;
+
+ tim.clear(); tim.start();
+#if TRUST_FGEMM
+ FFLAS::fgemm(Z,(FFLAS::FFLAS_TRANSPOSE)tA,(FFLAS::FFLAS_TRANSPOSE)tB,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc);
+#else
+ if (!timing) {
+ IField::Element_ptr ail,blj;
+ IField::Element tmp;
+ for (size_t i = 0; i < m; ++i)
+ for (size_t j = 0; j < n; ++j){
+ Z.mulin(*(C+i*ldc+j),beta);
+ Z.assign (tmp, Z.zero);
+ for ( size_t l = 0; l < k ; ++l ){
+ if ( tA == FflasNoTrans )
+ ail = A+i*lda+l;
+ else
+ ail = A+l*lda+i;
+ if ( tB == FflasNoTrans )
+ blj = B+l*ldb+j;
+ else
+ blj = B+j*ldb+l;
+ Z.axpyin (tmp, *ail, *blj);
+ }
+ Z.axpyin (*(C+i*ldc+j), alpha, tmp);
+ }
+ }
+#endif
+ tim.stop();
+ if (timing) std::cout << "zgemm time: " << tim << std::endl;
+
+
+#if DISPLAY
+ write_field(Z,std::cout << "C:=", C, (int)m, (int)n, (int)ldc,true,false) <<';' <<std::endl;
+ std::cout << ((tA == FflasTrans) ? "LinearAlgebra:-Transpose":"") << "(A).";
+ std::cout << ((tB == FflasTrans) ? "LinearAlgebra:-Transpose":"") << "(B);" << std::endl;;
+#endif
+
+ std::cout << "---------------------------------------------" << std::endl;
+
+
+ typedef Givaro::ZRing<int64_t> FField ;
+ FField F ;
+
+ FField::Element_ptr Ci,Ai,Bi;
+#if COL_MAJOR
+ Ci= FFLAS::fflas_new(F,ldC,n);
+ Ai= FFLAS::fflas_new(F,ldA,k);
+ Bi= FFLAS::fflas_new(F,ldB,n);
+
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<k;++j)
+ F.init(Ai[j*ldA+i],A[i*lda+j]);
+ for (size_t i=0;i<k;++i)
+ for (size_t j=0;j<n;++j)
+ F.init(Bi[j*ldB+i],B[i*ldb+j]);
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<n;++j)
+ F.init(Ci[j*ldC+i],D[i*n+j]);
+#else
+ Ci= FFLAS::fflas_new(F,m,ldC);
+ Ai= FFLAS::fflas_new(F,ra,ldA);
+ Bi= FFLAS::fflas_new(F,rb,ldB);
+
+ for (size_t i=0;i<ra;++i)
+ for (size_t j=0;j<ca;++j)
+ F.init(Ai[i*ldA+j],A[i*lda+j]);
+ for (size_t i=0;i<rb;++i)
+ for (size_t j=0;j<cb;++j)
+ F.init(Bi[i*ldB+j],B[i*ldb+j]);
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<n;++j)
+ F.init(Ci[i*ldC+j],D[i*n+j]);
+
+#endif
+
+#if DISPLAY
+ write_field(F,std::cout << "A:=", Ai, (int)ra, (int)ca, (int)ldA,true,COL_MAJOR) <<';'<<std::endl;
+ // write_field(F,std::cout << "A:=", Ai, (int)ra, (int)ca, (int)ldA,true,(tA==FflasTrans)) <<';'<<std::endl;
+ write_field(F,std::cout << "B:=", Bi, (int)rb, (int)cb, (int)ldB,true,COL_MAJOR) <<';' <<std::endl;
+ // write_field(F,std::cout << "B:=", Bi, (int)rb, (int)cb, (int)ldB,true,(tB==FflasTrans)) <<';' <<std::endl;
+#endif
+
+ FField::Element a,b ;
+ a= (FField::Element) a_scal;
+ b= (FField::Element) b_scal;
+#if 0
+ FFLAS::igemm_(FflasColMajor, tA, tB, m, n, k, a, Ai, ldA, Bi, ldB, b, Ci, ldC);
+#else
+ tim.clear(); tim.start();
+ FFLAS::igemm_(FflasRowMajor,tA,tB, (int)m, (int)n, (int)k, a, Ai, (int)ldA, Bi, (int)ldB, b, Ci, (int)ldC);
+ tim.stop();
+ if (timing) std::cout << "igemm time: " << tim << std::endl;
+#endif
+
+
+#if DISPLAY
+ write_field(F,std::cout << "C:=", Ci, (int)m, (int)n, (int)ldC,true,COL_MAJOR) <<';' <<std::endl;
+ std::cout << ((tA == FflasTrans) ? "LinearAlgebra:-Transpose":"") << "(A).";
+ std::cout << ((tB == FflasTrans) ? "LinearAlgebra:-Transpose":"") << "(B);" << std::endl;;
+#endif
+
+ bool pass = true ;
+ if (!timing) {
+#if DISPLAY
+ for (size_t i = 0 ; i < m ; ++i) {
+ for (size_t j = 0 ; j < n ; ++j) {
+ if (Ci[i*ldC+j] != (typename IField::Element) C[i*ldc+j]) {
+ pass = false;
+ std::cout << 'x' ;
+ }
+ else
+ std::cout << 'o' ;
+ }
+ std::cout << std::endl;
+ }
+#else
+ for (size_t i = 0 ; i < m && pass; ++i) {
+ for (size_t j = 0 ; j < n && pass ; ++j) {
+ if (Ci[i*ldC+j] != (typename IField::Element) C[i*ldc+j]) {
+ pass = false;
+ }
+ }
+ }
+#endif
+ }
+
+ if (!pass) {
+ std::cout << "*** *** " << std::endl;
+ std::cout << "*** error *** " << std::endl;
+ std::cout << "*** *** " << std::endl;
+ }
+ else {
+ std::cout << "+++ pass +++" << std::endl;
+ }
+
+ if (timing) {
+ FFLAS::Timer tom;
+ // Givaro::Modular<double> G(65537);
+ Givaro::ZRing<double> G;
+ double af, bf ;
+ G.init(af,alpha);
+ G.init(bf,beta);
+
+ double *Cf,*Af,*Bf;
+ Cf= FFLAS::fflas_new(G,m,ldC);
+ Af= FFLAS::fflas_new(G,ra,ldA);
+ Bf= FFLAS::fflas_new(G,rb,ldB);
+
+ for (size_t i=0;i<ra;++i)
+ for (size_t j=0;j<ca;++j)
+ G.init(Af[i*ldA+j],A[i*lda+j]);
+ for (size_t i=0;i<rb;++i)
+ for (size_t j=0;j<cb;++j)
+ G.init(Bf[i*ldB+j],B[i*ldb+j]);
+ for (size_t i=0;i<m;++i)
+ for (size_t j=0;j<n;++j)
+ G.init(Cf[i*ldC+j],D[i*n+j]);
+
+
+ tom.clear(); tom.start();
+ FFLAS::fgemm(G,(FFLAS::FFLAS_TRANSPOSE)tA,(FFLAS::FFLAS_TRANSPOSE)tB,m,n,k,af,Af,ldA,Bf,ldB,bf,Cf,ldC);
+ tom.stop();
+ std::cout << "fgemm time: " << tom << std::endl;
+
+ FFLAS::fflas_delete(Af);
+ FFLAS::fflas_delete(Bf);
+ FFLAS::fflas_delete(Cf);
+
+ }
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(B);
+ FFLAS::fflas_delete(C);
+
+ FFLAS::fflas_delete(Ai);
+ FFLAS::fflas_delete(Bi);
+ FFLAS::fflas_delete(Ci);
+
+ FFLAS::fflas_delete(D);
+
+ return pass;
+}
+
+int main(int argc, char** argv)
+{
+
+ static size_t m = 9 ;
+ static size_t n = 10 ;
+ static size_t k = 11 ;
+ static bool timing = false ;
+
+ static Argument as[] = {
+ { 'm', "-m m", "m.", TYPE_INT , &m },
+ { 'n', "-n n", "n.", TYPE_INT , &n },
+ { 'k', "-k k", "k.", TYPE_INT , &k },
+ { 't', "-timing", "Output timing" , TYPE_NONE, &timing},
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+
+ for (int i = -1 ; i < 2 ; ++i) {
+ for (int j = -1 ; j < 2 ; ++j) {
+ std::cout << "===================================================" << std::endl;
+ std::cout << " C = " << i << " A B + " << j << "C" << std::endl;
+ test_igemm(m,n,k,FflasNoTrans, FflasNoTrans,i,j,timing);
+ std::cout << " C = " << i << " A tB + " << j << "C" << std::endl;
+ test_igemm(m,n,k,FflasTrans, FflasNoTrans,i,j,timing);
+ std::cout << " C = " << i << " tA B + " << j << "C" << std::endl;
+ test_igemm(m,n,k,FflasNoTrans, FflasTrans,i,j,timing);
+ std::cout << " C = " << i << " tA tB + " << j << "C" << std::endl;
+ test_igemm(m,n,k,FflasTrans, FflasTrans,i,j,timing);
+ }
+ }
+ for (size_t a = 0 ; a < 4 ; ++a) {
+ int i = rand() % 25 ;
+ int j = rand() % 25 ;
+ if (rand()%2) i = -i ;
+ if (rand()%2) j = -j ;
+ std::cout << " C = " << i << " A B + " << j << "C" << std::endl;
+ test_igemm(m,n,k,FflasNoTrans, FflasNoTrans,i,j,timing);
+ std::cout << " C = " << i << " A tB + " << j << "C" << std::endl;
+ test_igemm(m,n,k,FflasTrans, FflasNoTrans,i,j,timing);
+ std::cout << " C = " << i << " tA B + " << j << "C" << std::endl;
+ test_igemm(m,n,k,FflasNoTrans, FflasTrans,i,j,timing);
+ std::cout << " C = " << i << " tA tB + " << j << "C" << std::endl;
+ test_igemm(m,n,k,FflasTrans, FflasTrans,i,j,timing);
+
+ }
+
+
+ return 0;
+}
+
diff --git a/tests/test-interfaces-c.c b/tests/test-interfaces-c.c
new file mode 100644
index 0000000..aba09b4
--- /dev/null
+++ b/tests/test-interfaces-c.c
@@ -0,0 +1,20 @@
+#include <interfaces/libs/fflas_c.h>
+#include <interfaces/libs/ffpack_c.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main() {
+ double * A = (double*)malloc(4*sizeof(double));
+ A[0] = A[2] = 1 ;
+ A[1] = A[3] = 0 ;
+ size_t * P = (size_t*) malloc(2*sizeof(size_t));
+ size_t * Qt = (size_t*) malloc(2*sizeof(size_t));
+
+ size_t r = RowEchelonForm_modular_double(101,2,2,A,2,P,Qt,false,FfpackSlabRecursive,true);
+ freducein_2_modular_double(101,2,2,A,2,false);
+ freducein_1_modular_double(101,4,A,1,false);
+ fsquare_3_modular_double(101,FflasNoTrans,2,1,A,2,1,A,1,true);
+ return !(r==1);
+}
+
diff --git a/tests/test-invert.C b/tests/test-invert.C
new file mode 100644
index 0000000..79211ea
--- /dev/null
+++ b/tests/test-invert.C
@@ -0,0 +1,120 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for invert : 1 computation
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+//#define DEBUG 1
+#define TIME 1
+using namespace std;
+
+#include <iomanip>
+#include <iostream>
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+
+using namespace FFPACK;
+typedef ModularBalanced<float> Field;
+
+int main(int argc, char** argv){
+
+ int n;
+ int nbit=atoi(argv[3]); // number of times the product is performed
+ cerr<<setprecision(10);
+
+ if (argc != 4) {
+ cerr<<"Usage : test-invert <p> <A> <<i>"
+ <<endl
+ <<" to invert A mod p (i computations)"
+ <<endl;
+ exit(-1);
+ }
+ Field F(atof(argv[1]));
+ Field::Element * A;
+ A = read_field(F,argv[2],&n,&n);
+
+ FFLAS::Timer tim,t; t.clear();tim.clear();
+ int nullity=0;
+
+ for(int i = 0;i<nbit;++i){
+ t.clear();
+ t.start();
+ FFPACK::Invert (F, n, A, n, nullity);
+ t.stop();
+ tim+=t;
+ }
+
+#if DEBUG
+ Field::Element *Ab = read_field(F,argv[2],&n,&n);
+ Field::Element *I = FFLAS::fflas_new<Field::Element>(n*n);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, n, n, n,
+ 1.0, Ab, n, A, n, 0.0, I, n);
+ bool wrong = false;
+
+ for (int i=0;i<n;++i)
+ for (int j=0;j<n;++j)
+ if ( ((i!=j) && !F.isZero(*(I+i*n+j)))
+ ||((i==j) &&!F.isOne(*(I+i*n+j))))
+ wrong = true;
+
+ if ( wrong ){
+ if (nullity > 0)
+ cerr<<"Matrix is singular over Z/"<<argv[1]<<"Z"<<endl;
+ else{
+ cerr<<"FAIL"<<endl;
+ write_field (F,cerr<<"A="<<endl,Ab,n,n,n);
+ write_field (F,cerr<<"A^-1="<<endl,A,n,n,n);
+ write_field (F,cerr<<"I="<<endl,I,n,n,n);
+ }
+ } else {
+ cerr<<"PASS"<<endl;
+ }
+ FFLAS::fflas_delete( I);
+ FFLAS::fflas_delete( Ab);
+
+#endif
+ FFLAS::fflas_delete( A);
+
+#if TIME
+ double mflops = 2*(n*n/1000000.0)*nbit*n/tim.usertime();
+ cerr<<"n = "<<n<<" Inversion over Z/"<<atoi(argv[1])<<"Z : t= "
+ << tim.usertime()/nbit
+ << " s, Mffops = "<<mflops
+ << endl;
+
+ cout<<n<<" "<<mflops<<" "<<tim.usertime()/nbit<<endl;
+#endif
+}
diff --git a/tests/test-compressQ.C b/tests/test-krylov-elim.C
similarity index 52%
copy from tests/test-compressQ.C
copy to tests/test-krylov-elim.C
index 9894fba..bad9eed 100644
--- a/tests/test-compressQ.C
+++ b/tests/test-krylov-elim.C
@@ -5,20 +5,20 @@
* Copyright (C) FFLAS-FFPACK
* Written by Clément Pernet
* This file is Free Software and part of FFLAS-FFPACK.
- *
+ *
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
- *
+ *
* FFLAS-FFPACK is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@@ -38,79 +38,78 @@
//#define DEBUG 0
#include <iostream>
-#include <list>
-#include <vector>
#include "Matio.h"
-#include "utils/timer.h"
+#include "fflas-ffpack/utils/timer.h"
+using namespace std;
#include "fflas-ffpack/field/modular-balanced.h"
#include "fflas-ffpack/ffpack/ffpack.h"
-#include "fflas-ffpack/utils/args-parser.h"
-using namespace std;
-
-typedef FFPACK:: Modular<double> Field;
+using namespace FFPACK;
+typedef Givaro::Modular<double> Field;
template<class T>
-std::ostream& printvect(std::ostream& o, vector<T>& vect){
- for(size_t i=0; i < vect.size(); ++i)
+std::ostream& printvect(std::ostream& o, T* vect, size_t dim)
+{
+ for(size_t i=0; i<dim; ++i)
o << vect[i] << " " ;
return o << std::endl;
}
-int main(int argc, char** argv)
-{
-
-
- static Argument as[] = {
- END_OF_ARGUMENTS
- };
-
- FFLAS::parseArguments(argc,argv,as);
-
- // int m,n;
-
- Field F(65521);
- size_t N = 17;
- double * A = new double[N*N];
- double * tmp = new double[N*N];
- size_t * deg = new size_t[N];
-
- for (size_t i=0; i<(size_t)N*N; ++i)
- A[i] = 0;
- for (size_t i=0; i<3; ++i)
- A[i+i*N] = 1;
-
- for (size_t i=3; i<6; ++i)
- A[i+1+i*N] = 1;
- for (size_t i=6; i<9; ++i)
- A[i+2+i*N] = 1;
-
- A[12+9*N] = 1;
- A[13+10*N] = 1;
- A[14+12*N] = 1;
- A[15+15*N] = 1;
- A[16+16*N] = 1;
- deg[0] = 4; deg[1] = 4; deg[2] = 4;deg[3] = 2; deg[4] = 1; deg[5] =2;
- for (size_t i=0; i<size_t(N); ++i)
- A[11+i*N] = A[7+i*N] = A[3+i*N] = double(i % 10);
-
- double * B = new double[N*N] ;
- FFLAS::fcopy(F,N*N,B,1,A,1);
-
- // write_field(F, cerr, A, N, N, N);
-
- FFPACK::CompressRowsQK (F, N, A+9*N, N, tmp, N, deg+3, 4, 3 );
-
- // write_field(F, cerr, A, N, N, N);
-
- FFPACK::DeCompressRowsQK (F, N, N-9, A+9*N, N, tmp, N, deg+3, 4, 3 );
-
- // write_field(F, cerr, A, N, N, N);
-
- for (size_t i = 0 ; i < (size_t)N * (size_t)N ; ++i)
- if (A[i] != B[i])
- return 1 ;
- return 0 ;
-
+int main(int argc, char** argv){
+
+ size_t m,n;
+
+
+ if (argc!=3){
+ cerr<<"usage : test-lqup <p> <A>"<<endl
+ <<" to compute the rank profile of the (n+m)xn matrix B formed by the n identity vectors and the mxn matrix A over Z/pZ"
+ <<endl;
+ exit(-1);
+ }
+ Field F(atoi(argv[1]));
+ Field::Element one;
+ F.init(one, 1U);
+ Field::Element * A = read_field<Field> (F,argv[2],(int*)&m,(int*)&n);
+
+ Field::Element * B = FFLAS::fflas_new<Field::Element>((m+n)*n);
+ for (size_t i=0; i<(n+m)*n;++i) *(B+i)=0;
+
+ size_t deg = (n-1)/m+1;
+ size_t curr_row = 0;
+ size_t it_idx = 0;
+ size_t bk_idx = 0;
+ for (size_t i=0; i<m; ++i){
+ for (size_t j=0; j<deg; ++j){
+ if (curr_row < n+m -1){
+ F.assign( *(B + curr_row*n + n-1 - it_idx), one);
+ curr_row++;
+ it_idx++;
+ }
+ }
+ for (size_t j=0; j<n; ++j)
+ *(B + curr_row*n + j) = *(A + bk_idx*n + j);
+ bk_idx++;
+ curr_row++;
+ }
+ write_field (F, cout<<"A = "<<endl, A,(int) m,(int) n,(int) n);
+ write_field (F, cout<<"B = "<<endl, B, (int) (m+n),(int) n,(int) n);
+
+ size_t *rp = FFLAS::fflas_new<size_t>(n);
+
+ FFPACK::SpecRankProfile(F, m, n, A, n, deg,rp);
+
+ size_t * P = FFLAS::fflas_new<size_t>(n);
+ size_t * Q = FFLAS::fflas_new<size_t>(n+m);
+ FFPACK::LUdivine(F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans,(int)m+n, n, B, n, P, Q);
+
+ printvect (cout<<"RankProfile (A) = "<<endl, rp, n)<<endl;
+
+ printvect (cout<<"RankProfile (B) = "<<endl, Q, n)<<endl;
+
+ FFLAS::fflas_delete( rp );
+ FFLAS::fflas_delete( A );
+ FFLAS::fflas_delete( B );
+
+ return 0;
}
diff --git a/tests/test-lqup.C b/tests/test-lqup.C
deleted file mode 100644
index 5e51d4b..0000000
--- a/tests/test-lqup.C
+++ /dev/null
@@ -1,1004 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/*
- * Copyright (C) FFLAS-FFPACK
- * Written by Clément Pernet
- * This file is Free Software and part of FFLAS-FFPACK.
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-
-//--------------------------------------------------------------------------
-// Test for the lsp factorisation
-//--------------------------------------------------------------------------
-// usage: test-lsp p A n, for n lsp factorization
-// of A over Z/pZ
-//-------------------------------------------------------------------------
-
-
-
-//#define __LUDIVINE_CUTOFF 1
-#include <iostream>
-#include <iomanip>
-#include "Matio.h"
-#include "utils/timer.h"
-#include "fflas-ffpack/field/modular-balanced.h"
-#include "fflas-ffpack/field/modular-balanced.h"
-#include "fflas-ffpack/ffpack/ffpack.h"
-#include "test-utils.h"
-
-#include "fflas-ffpack/utils/args-parser.h"
-
-using namespace std;
-using namespace FFPACK;
-
-
-/*! Tests the LUdivine routine.
- * @tparam Field Field
- * @tparam Diag Unit diagonal in L ?
- * @tparam Trans ?
- * @param F field
- * @param A Matrix (preallocated)
- * @param r rank of A
- * @param m rows
- * @param n cols
- * @param lda leading dim of A
- * @return 0 iff correct, 1 otherwise
- */
-template<class Field, FFLAS::FFLAS_DIAG diag, FFLAS::FFLAS_TRANSPOSE trans>
-bool test_lu(const Field & F,
- const typename Field::Element * A,
- size_t r,
- size_t m, size_t n, size_t lda)
-{
- bool fail = false;
- typedef typename Field::Element Element ;
- Element * B = new Element[m*lda] ;
- // memcpy(B,A,m*lda*sizeof(Element)); // probably faster than ::fcopy !
- FFLAS::fcopy(F,m,n,B,lda,A,lda);
-
- size_t maxP, maxQ ;
-
- if (trans == FFLAS::FflasTrans){
- maxP = m;
- maxQ = n;
- }
- else{ // trans == FFLAS::FflasNoTrans
- maxP = n;
- maxQ = m;
- }
-
- size_t * P = new size_t[maxP] ;
- size_t * Q = new size_t[maxQ] ;
-
- size_t R = FFPACK::LUdivine (F, diag, trans, m, n, B, lda, P, Q,
- FFPACK::FfpackLQUP);
-
- if (R != r) {
- std::cout << "rank is wrong (expected " << R << " but got " << r << ")" << std::endl;
- delete[] B ;
- delete[] P ;
- delete[] Q ;
- return fail = true;
- }
-
- Element * C = new Element[m*n]; // compute C=LQUP and check C == A
- /* Build L,U */
- Element * L, *U;
- if (trans == FFLAS::FflasNoTrans){
- L = new Element[m*m];
- U = new Element[m*n];
-
- Element zero,one;
- F.init(zero,0.0);
- F.init(one,1.0);
- /* build U */
- for (size_t i=0; i<R; ++i){
- for (size_t j=0; j<i; ++j)
- F.assign ( *(U + i*n + j), zero);
- for (size_t j=i+1; j<n; ++j)
- F.assign (*(U + i*n + j), *(B+ i*lda+j));
- }
- for (size_t i=R;i<m; ++i) {
- for (size_t j=0; j<n; ++j)
- F.assign(*(U+i*n+j), zero);
- }
- /* build L */
- for ( size_t i=0; i<m; ++i ){
- size_t j=0;
- for (; j< ((i<R)?i:R) ; ++j )
- F.assign( *(L + i*m+j), *(B+i*lda+j));
- for (; j<m; ++j )
- F.assign( *(L+i*m+j), zero);
- }
-
- // write_field(F,cerr<<"L = "<<endl,L,m,m,m);
- //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
- FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- m,0,(int)R, L, m, Q);
- for ( size_t i=0; i<m; ++i )
- F.assign(*(L+i*(m+1)), one);
-
- /* reconstruct the diagonal */
- //write_field(F,cerr<<"L = "<<endl,L,m,m,m);
- //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
- if (diag == FFLAS::FflasNonUnit) {
- for ( size_t i=0; i<R; ++i )
- F.assign (*(U+i*(n+1)), *(B+i*(lda+1)));
- }
- else{ // diag == FFLAS::FflasUnit
- for ( size_t i=0; i<R; ++i ){
- *(L+Q[i]*(m+1)) = *(B+Q[i]*lda+i);
- F.assign (*(U+i*(n+1)),one);
- }
- }
- //write_field(F,cerr<<"L = "<<endl,L,m,m,m);
- //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
-
- /* Compute LQUP */
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- m,0,(int) R, U, n, P);
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, U, n, Q);
- FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- m,n,m, 1.0, L,m, U,n, 0.0, C,n);
- //delete[] A;
- }
- else { /* trans == FFLAS::FflasTrans */
-
- L = new Element[m*n];
- U = new Element[n*n];
-
-
- Element zero,one;
- F.init(zero,0.0);
- F.init(one,1.0);
- /* build L */
- for (size_t i=0; i<R; ++i){
- for (size_t j=0; j<i; ++j)
- F.assign ( *(L + i + j*n), zero);
- for (size_t j=i+1; j<m; ++j)
- F.assign (*(L + i + j*n), *(B+ i+j*lda));
- }
- for (size_t i=R;i<n; ++i) {
- for (size_t j=0; j<m; ++j)
- F.assign(*(L+i+j*n), zero);
- }
- /* build U */
- for ( size_t i=0; i<n; ++i ){
- size_t j=0;
- for (; j< ((i<R)?i:R) ; ++j )
- F.assign( *(U + i+j*n), *(B+i+j*lda));
- for (; j<n; ++j )
- F.assign( *(U+i+j*n), zero);
- }
- //write_field(F,cerr<<"L = "<<endl,L,m,n,n);
- //write_field(F,cerr<<"U = "<<endl,U,n,n,n);
-
- FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, U, n, Q);
-
- for (size_t i=0; i<n; ++i)
- F.assign (*(U+i*(n+1)),one);
-
- /* reconstruct the diagonal */
- if (diag == FFLAS::FflasNonUnit) {
- for ( size_t i=0; i<R; ++i )
- F.assign (*(L+i*(n+1)), *(B+i*(lda+1)));
- }
- else{ // diag == FFLAS::FflasUnit
- for ( size_t i=0; i<R; ++i ){
- *(U+Q[i]*(n+1)) = *(B+Q[i]+i*lda);
- F.assign (*(L+i*(n+1)),one);
- }
- }
- //write_field(F,cerr<<"L = "<<endl,L,m,n,n);
- //write_field(F,cerr<<"U = "<<endl,U,n,n,n);
-
- /* Compute LQUP */
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, L, n, P);
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- m,0,(int)R, L, n, Q);
- FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- m,n,n, 1.0, L,n, U,n, 0.0, C,n);
- }
- /* check equality */
- for (size_t i=0; i<m; ++i) {
- for (size_t j=0; j<n; ++j)
- if (!F.areEqual (*(A+i*lda+j), *(C+i*n+j))){
- std::cerr << " A["<<i<<","<<j<<"] = " << (*(A+i*lda+j))
- << " PLUQ["<<i<<","<<j<<"] = " << (*(C+i*n+j))
- << endl;
- fail|=true;
- }
- }
-
- delete[] P;
- delete[] L;
- delete[] U;
- delete[] Q;
- delete[] B;
- delete[] C;
- return fail;
-
-
-}
-
-/*! Tests the LUpdate routine.
- * @tparam Field Field
- * @tparam Diag Unit diagonal in L ?
- * @tparam Trans ?
- * @param F field
- * @param A Matrix (preallocated)
- * @param r rank of A
- * @param B Matrix (preallocated)
- * @param m rows in A
- * @param n cols in A (and B)
- * @param k rows in B
- * @param lda leading dim of A (and B)
- * @return 0 iff correct, 1 otherwise
- */
-template<class Field, FFLAS::FFLAS_DIAG diag, FFLAS::FFLAS_TRANSPOSE trans>
-bool test_lu_append(const Field & F,
- const typename Field::Element * A,
- const typename Field::Element * B,
- size_t m, size_t n, size_t k, size_t lda)
-{
- FFLASFFPACK_check(n<=lda);
-
- bool fail = false;
- size_t M = m + k ;
- typedef typename Field::Element Element ;
- Element * Acop = new Element[m*lda] ;
- FFLAS::fcopy(F,m,n,Acop,lda,A,lda) ;
-
- Element * Bcop = new Element[k*lda] ;
- FFLAS::fcopy(F,k,n,Bcop,lda,B,lda) ;
-
- Element * Append = new Element[M*lda];
- FFLAS::fcopy(F,m,n,Append,lda,A,lda) ;
- FFLAS::fcopy(F,k,n,Append+m*lda,lda,B,lda) ;
-
-#if 0 /* paranoid check */
- for (size_t i = 0 ; i < m ; ++i) {
- for (size_t j = 0 ; j < n ; ++j) {
- FFLASFFPACK_check(Append[i*lda+j]==A[i*lda+j]);
- }
- }
- for (size_t i = 0 ; i < k ; ++i) {
- for (size_t j = 0 ; j < n ; ++j) {
- FFLASFFPACK_check(Append[(i+m)*lda+j]==B[i*lda+j]);
- }
- }
-#endif
-
- Element * Afull = new Element[M*lda];
- FFLAS::fcopy(F,M,n,Afull,lda,Append,lda) ;
- // FFLAS::fcopy(F,m,n,Afull,lda,A,lda) ;
- // FFLAS::fcopy(F,k,n,Afull+m*lda,lda,B,lda) ;
-
-#if 0
-std::cout << "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" << std::endl;
- for (size_t i = 0 ; i < m ; ++i) {
- for (size_t j = 0 ; j < n ; ++j) {
- std::cout << Append[i*lda+j] << "(" << A[i*lda+j] << ") " ;
- } std::cout << std::endl;
- }
-std::cout << "-----------------------------------" << std::endl;
- for (size_t i = 0 ; i < k ; ++i) {
- for (size_t j = 0 ; j < n ; ++j) {
- std::cout << Append[(i+m)*lda+j] ;
- std::cout << "(" << B[i*lda+j] << ") " ;
- }std::cout << std::endl;
- }
-std::cout << "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" << std::flush << std::endl;
-#endif
-
-
-
-#if 0
- for (size_t i = 0 ; i < m ; ++i)
- for (size_t j = 0 ; j < n ; ++j)
- FFLASFFPACK_check(Acop[i*lda+j]==A[i*lda+j]);
- for (size_t i = 0 ; i < k ; ++i)
- for (size_t j = 0 ; j < n ; ++j)
- FFLASFFPACK_check(Bcop[i*lda+j]==B[i*lda+j]);
- for (size_t i = 0 ; i < M ; ++i)
- for (size_t j = 0 ; j < n ; ++j)
- if (i < m)
- FFLASFFPACK_check(Afull[i*lda+j]==A[i*lda+j]);
- else
- FFLASFFPACK_check(Afull[i*lda+j]==B[(i-m)*lda+j]);
-#endif
-
-
-
-
- size_t maxP, maxQ ;
-
- if (trans == FFLAS::FflasTrans){
- maxP = M;
- maxQ = n;
- }
- else{ // trans == FFLAS::FflasNoTrans
- maxP = n;
- maxQ = M;
- }
-
- size_t * P = new size_t[maxP] ;
- size_t * Q = new size_t[maxQ] ;
-
- size_t * PP = new size_t[maxP] ;
- size_t * QQ = new size_t[maxQ] ;
-
- /* valgrind says the following leaks. Just incroyable. */
- size_t R = FFPACK::LUdivine (F, diag, trans, M, n, Append, lda, PP, QQ,
- FFPACK::FfpackLQUP);
-
- size_t R1 = FFPACK::LUdivine (F, diag, trans, m, n, Acop, lda, P, Q,
- FFPACK::FfpackLQUP);
-
- size_t R2 = FFPACK::LUpdate (F,diag,trans,m,n,Acop,lda,R1,k,Bcop,lda,P,Q,
- FFPACK::FfpackLQUP);
-#if 0
- std::cout << "P := [ " ;
- for (size_t i = 0 ; i < maxP ; ++i)
- std::cout << P[i] << " " ;
- std::cout << ']' << std::endl;
- std::cout << "Q := [ ";
- for (size_t i = 0 ; i < maxQ ; ++i)
- std::cout << Q[i] << " " ;
- std::cout << ']' << std::endl;
- std::cout << "PP := [ ";
- for (size_t i = 0 ; i < maxP ; ++i)
- std::cout << PP[i] << " " ;
- std::cout << ']' << std::endl;
- std::cout << "QQ := [ ";
- for (size_t i = 0 ; i < maxQ ; ++i)
- std::cout << QQ[i] << " " ;
- std::cout << ']' << std::endl;
-#endif
-
- if (R2 != R) {
- std::cout << "error, bad rank " << R2 << " <> " << R << " (expected) " << std::endl;
- delete[] Bcop ;
- delete[] Acop ;
- delete[] Append ;
- delete[] PP;
- delete[] QQ;
- delete[] P ;
- delete[] Q ;
- return fail=true;
-
- }
-
- // compute C=LQUP and check C == A
- Element * C = new Element[M*lda];
- /* Build L,U */
- Element * L, *U;
- if (trans == FFLAS::FflasNoTrans){
- L = new Element[M*M];
- U = new Element[M*n];
-
- Element zero,one;
- F.init(zero,0.0);
- F.init(one,1.0);
- /* build U */
- for (size_t i=0; i<R; ++i){
- for (size_t j=0; j<i; ++j)
- F.assign ( *(U + i*n + j), zero);
- for (size_t j=i+1; j<n; ++j)
- if (i < m)
- F.assign (*(U + i*n + j), *(Acop+ i*lda+j));
- else
- F.assign (*(U + i*n + j), *(Bcop+ (i-m)*lda+j));
- }
-
- for (size_t i=R;i<M; ++i) {
- for (size_t j=0; j<n; ++j)
- F.assign(*(U+i*n+j), zero);
- }
- /* build L */
- for ( size_t i=0; i<M; ++i ){
- size_t j=0;
- for (; j< ((i<R)?i:R) ; ++j ) {
- if (i<m)
- F.assign( *(L + i*M+j), *(Acop+i*lda+j));
- else
- F.assign( *(L + i*M+j), *(Bcop+(i-m)*lda+j));
- }
- for (; j<M; ++j )
- F.assign( *(L+i*M+j), zero);
- }
-
- // write_field(F,cerr<<"L = "<<endl,L,m,m,m);
- //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
- FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- M,0,(int)R, L, M, Q);
- for ( size_t i=0; i<M; ++i )
- F.assign(*(L+i*(M+1)), one);
-
- /* reconstruct the diagonal */
- //write_field(F,cerr<<"L = "<<endl,L,m,m,m);
- //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
- if (diag == FFLAS::FflasNonUnit) {
- for ( size_t i=0; i<R; ++i ) {
- if (i<m)
- F.assign (*(U+i*(n+1)), *(Acop+i*(lda+1)));
- else
- F.assign (*(U+i*(n+1)), *(Bcop+(i-m)*(lda+1)));
- }
- }
- else{ // diag == FFLAS::FflasUnit
- for ( size_t i=0; i<R; ++i ){
- if (Q[i] < m)
- *(L+Q[i]*(M+1)) = *(Acop+Q[i]*lda+i);
- else
- *(L+Q[i]*(M+1)) = *(Bcop+(Q[i]-m)*lda+i);
- F.assign (*(U+i*(n+1)),one);
- }
- }
- // write_field(F,cerr<<"L = "<<endl,L,(int)M,(int)M,(int)M);
- // write_field(F,cerr<<"U = "<<endl,U,(int)M,(int)n,(int)n);
-
- /* Compute LQUP */
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- M,0,(int) R, U, n, P);
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, U, n, Q);
- FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- M,n,M, 1.0, L,M, U,n, 0.0, C,lda);
- //delete[] A;
- }
-#if 0 /* not working */
- else { /* trans == FFLAS::FflasTrans */
-
- L = new Element[M*n];
- U = new Element[n*n];
-
-
- Element zero,one;
- F.init(zero,0.0);
- F.init(one,1.0);
- /* build L */
- for (size_t i=0; i<R; ++i){
- for (size_t j=0; j<i; ++j)
- F.assign ( *(L + i + j*n), zero);
- for (size_t j=i+1; j<M; ++j) {
- if (i < m)
- F.assign (*(L + i + j*n), *(Acop+ i+j*lda));
- else
- F.assign (*(L + i + j*n), *(Bcop+ (i-m)+j*lda));
- }
- }
- for (size_t i=R;i<n; ++i) {
- for (size_t j=0; j<M; ++j)
- F.assign(*(L+i+j*n), zero);
- }
- /* build U */
- for ( size_t i=0; i<n; ++i ){
- size_t j=0;
- for (; j< ((i<R)?i:R) ; ++j ) {
- if (i < m)
- F.assign( *(U + i+j*n), *(Acop+i+j*lda));
- else
- F.assign( *(U + i+j*n), *(Bcop+(i-m)+j*lda));
- }
- for (; j<n; ++j )
- F.assign( *(U+i+j*n), zero);
- }
- //write_field(F,cerr<<"L = "<<endl,L,m,n,n);
- //write_field(F,cerr<<"U = "<<endl,U,n,n,n);
-
- FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, U, n, Q);
-
- for (size_t i=0; i<n; ++i)
- F.assign (*(U+i*(n+1)),one);
-
- /* reconstruct the diagonal */
- if (diag == FFLAS::FflasNonUnit) {
- for ( size_t i=0; i<R; ++i ) {
- if (i < m)
- F.assign (*(L+i*(n+1)), *(Acop+i*(lda+1)));
- else
- F.assign (*(L+i*(n+1)), *(Bcop+(i-m)*(lda+1)));
- }
- }
- else{ // diag == FFLAS::FflasUnit
- for ( size_t i=0; i<R; ++i ){
- if (i<m)
- *(U+Q[i]*(n+1)) = *(Acop+Q[i]+i*lda);
- else
- *(U+Q[i]*(n+1)) = *(Bcop+Q[i]+(i-m)*lda);
- F.assign (*(L+i*(n+1)),one);
- }
- }
- // write_field(F,cerr<<"L = "<<endl,L,(int)M,(int)n,(int)n);
- // write_field(F,cerr<<"U = "<<endl,U,(int)n,(int)n,(int)n);
-
- /* Compute LQUP */
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, L, n, P);
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- M,0,(int)R, L, n, Q);
- FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- M,n,n, 1.0, L,n, U,n, 0.0, C,lda);
- }
-#endif
-#if 0 /* check CC == LL UU */
- Element * LL, *UU;
- Element * CC = new Element[M*lda];
- if (trans == FFLAS::FflasNoTrans){
- LL = new Element[M*M];
- UU = new Element[M*n];
-
- Element zero,one;
- F.init(zero,0.0);
- F.init(one,1.0);
- /* build U */
- for (size_t i=0; i<R; ++i){
- for (size_t j=0; j<i; ++j)
- F.assign ( *(UU + i*n + j), zero);
- for (size_t j=i+1; j<n; ++j)
- F.assign (*(UU + i*n + j), *(Append+ i*lda+j));
- }
-
- for (size_t i=R;i<M; ++i) {
- for (size_t j=0; j<n; ++j)
- F.assign(*(UU+i*n+j), zero);
- }
- /* build L */
- for ( size_t i=0; i<M; ++i ){
- size_t j=0;
- for (; j< ((i<R)?i:R) ; ++j ) {
- F.assign( *(LL + i*M+j), *(Append+i*lda+j));
- }
- for (; j<M; ++j )
- F.assign( *(LL+i*M+j), zero);
- }
-
- // write_field(F,cerr<<"LL = "<<endl,LL,m,m,m);
- //write_field(F,cerr<<"UU = "<<endl,UU,m,n,n);
- FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- M,0,(int)R, LL, M, Q);
- for ( size_t i=0; i<M; ++i )
- F.assign(*(LL+i*(M+1)), one);
-
- /* reconstruct the diagonal */
- //write_field(F,cerr<<"LL = "<<endl,LL,m,m,m);
- //write_field(F,cerr<<"UU = "<<endl,UU,m,n,n);
- if (diag == FFLAS::FflasNonUnit) {
- for ( size_t i=0; i<R; ++i ) {
- F.assign (*(UU+i*(n+1)), *(Append+i*(lda+1)));
- }
- }
- else{ // diag == FFLAS::FflasUnit
- for ( size_t i=0; i<R; ++i ){
- *(LL+Q[i]*(M+1)) = *(Append+Q[i]*lda+i);
- F.assign (*(UU+i*(n+1)),one);
- }
- }
- write_field(F,cerr<<"L = "<<endl,LL,(int)M,(int)M,(int)M);
- write_field(F,cerr<<"U = "<<endl,UU,(int)M,(int)n,(int)n);
-
- /* Compute LQUP */
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- M,0,(int) R, UU, n, P);
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, UU, n, Q);
- FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- M,n,M, 1.0, LL,M, UU,n, 0.0, CC,lda);
- //delete[] A;
- }
- else { /* trans == FFLAS::FflasTrans */
-
- LL = new Element[M*n];
- UU = new Element[n*n];
-
-
- Element zero,one;
- F.init(zero,0.0);
- F.init(one,1.0);
- /* build L */
- for (size_t i=0; i<R; ++i){
- for (size_t j=0; j<i; ++j)
- F.assign ( *(LL + i + j*n), zero);
- for (size_t j=i+1; j<M; ++j) {
- F.assign (*(LL + i + j*n), *(Append+ i+j*lda));
- }
- }
- for (size_t i=R;i<n; ++i) {
- for (size_t j=0; j<M; ++j)
- F.assign(*(LL+i+j*n), zero);
- }
- /* build UU */
- for ( size_t i=0; i<n; ++i ){
- size_t j=0;
- for (; j< ((i<R)?i:R) ; ++j ) {
- F.assign( *(UU + i+j*n), *(Append+i+j*lda));
- }
- for (; j<n; ++j )
- F.assign( *(UU+i+j*n), zero);
- }
- // write_field(F,cerr<<"LL = "<<endl,LL,m,n,n);
- // write_field(F,cerr<<"UU = "<<endl,UU,n,n,n);
-
- FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, UU, n, Q);
-
- for (size_t i=0; i<n; ++i)
- F.assign (*(UU+i*(n+1)),one);
-
- /* reconstruct the diagonal */
- if (diag == FFLAS::FflasNonUnit) {
- for ( size_t i=0; i<R; ++i ) {
- F.assign (*(LL+i*(n+1)), *(Append+i*(lda+1)));
- }
- }
- else{ // diag == FFLAS::FflasUnit
- for ( size_t i=0; i<R; ++i ){
- *(UU+Q[i]*(n+1)) = *(Append+Q[i]+i*lda);
- F.assign (*(LL+i*(n+1)),one);
- }
- }
- write_field(F,cerr<<"LL = "<<endl,LL,(int)M,(int)n,(int)n);
- write_field(F,cerr<<"UU = "<<endl,UU,(int)n,(int)n,(int)n);
-
- /* Compute LQUP */
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
- n,0,(int)R, LL, n, P);
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- M,0,(int)R, LL, n, Q);
- FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- M,n,n, 1.0, LL,n, UU,n, 0.0, CC,lda);
- }
- for (size_t i=0; i<M; ++i) {
- for (size_t j=0; j<n; ++j)
- if (!F.areEqual (*(Afull+i*lda+j), *(CC+i*lda+j))){
- std::cerr << " A["<<i<<","<<j<<"] = " << (*(Afull+i*lda+j))
- << " PLUQ["<<i<<","<<j<<"] = " << (*(CC+i*lda+j))
- << endl << "xxxx" << endl;
- fail|=true;
- }
- }
-
-#endif
-
- /* check equality */
- for (size_t i=0; i<M; ++i) {
- for (size_t j=0; j<n; ++j)
- if (!F.areEqual (*(Afull+i*lda+j), *(C+i*lda+j))){
- std::cerr << " A["<<i<<","<<j<<"] = " << (*(Afull+i*lda+j))
- << " PLUQ(append)["<<i<<","<<j<<"] = " << (*(C+i*lda+j))
- << endl;
- fail|=true;
- }
- }
-
- delete[] PP;
- delete[] P;
- delete[] L;
- delete[] U;
- delete[] Q;
- delete[] QQ;
- delete[] Acop;
- delete[] Bcop;
- delete[] Append;
- delete[] Afull;
- delete[] C;
-
- return fail;
-
-
-}
-
-
-
-template<class Field, FFLAS::FFLAS_DIAG diag, FFLAS::FFLAS_TRANSPOSE trans>
-bool launch_test(const Field & F,
- size_t r,
- size_t m, size_t n)
-{
- typedef typename Field::Element Element ;
- bool fail = false ;
- { /* user given and lda bigger */
- size_t lda = n+10 ;
- Element * A = new Element[m*lda];
- RandomMatrixWithRank(F,A,r,m,n,lda);
- fail |= test_lu<Field,diag,trans>(F,A,r,m,n,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- }
- { /* user given and lda bigger. Rank is max */
- size_t lda = n+10 ;
- size_t R = std::min(m,n);
- Element * A = new Element[m*lda];
- RandomMatrixWithRank(F,A,R,m,n,lda);
- fail |= test_lu<Field,diag,trans>(F,A,R,m,n,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- }
- { /* user given and lda bigger. Rank is min */
- size_t lda = n+10 ;
- size_t R = 0;
- Element * A = new Element[m*lda];
- RandomMatrixWithRank(F,A,R,m,n,lda);
- fail |= test_lu<Field,diag,trans>(F,A,R,m,n,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- }
- { /* square */
- size_t M = std::max(m,n);
- size_t N = M ;
- size_t R = M/2 ;
- size_t lda = N+10 ;
- Element * A = new Element[M*lda];
- RandomMatrixWithRank(F,A,R,M,N,lda);
- fail |= test_lu<Field,diag,trans>(F,A,R,M,N,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- }
- { /* wide */
- size_t M = std::max(m,n);
- size_t N = 2*M ;
- size_t R = 3*M/4 ;
- size_t lda = N+5 ;
- Element * A = new Element[M*lda];
- RandomMatrixWithRank(F,A,R,M,N,lda);
- fail |= test_lu<Field,diag,trans>(F,A,R,M,N,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- }
- { /* narrow */
- size_t M = std::max(m,n);
- size_t N = M/2 ;
- size_t R = 3*M/8 ;
- size_t lda = N+5 ;
- Element * A = new Element[M*lda];
- RandomMatrixWithRank(F,A,R,M,N,lda);
- fail |= test_lu<Field,diag,trans>(F,A,R,M,N,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- }
-
- return fail;
-}
-
-template<class Field, FFLAS::FFLAS_DIAG diag, FFLAS::FFLAS_TRANSPOSE trans>
-bool launch_test_append(const Field & F,
- size_t r,
- size_t m, size_t n)
-{
- typedef typename Field::Element Element ;
- bool fail = false ;
- { /* user given and lda bigger */
- size_t lda = n+10 ;
- size_t k = m/2+1 ;
- Element * A = new Element[m*lda];
- Element * B = new Element[k*lda];
- RandomMatrixWithRank(F,A,r,m,n,lda);
- RandomMatrixWithRank(F,B,k/2+1,k,n,lda);
- fail |= test_lu_append<Field,diag,trans>(F,A,B,m,n,k,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- delete[] B ;
- }
- { /* user given and lda bigger. Rank is max */
- size_t lda = n+10 ;
- size_t R = std::min(m,n);
- size_t k = m/2+1 ;
- Element * A = new Element[m*lda];
- Element * B = new Element[k*lda];
- RandomMatrixWithRank(F,A,R,m,n,lda);
- RandomMatrixWithRank(F,B,k/2+1,k,n,lda);
- fail |= test_lu_append<Field,diag,trans>(F,A,B,m,n,k,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- delete[] B ;
- }
- { /* user given and lda bigger. Appended Rank is min */
- size_t lda = n+10 ;
- size_t R = std::min(m,n);
- size_t k = m/2+1 ;
- Element * A = new Element[m*lda];
- Element * B = new Element[k*lda];
- RandomMatrixWithRank(F,A,R,m,n,lda);
- RandomMatrixWithRank(F,B,0,k,n,lda);
- fail |= test_lu_append<Field,diag,trans>(F,A,B,m,n,k,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- delete[] B ;
- }
- { /* user given and lda bigger. Rank is min */
- size_t lda = n+10 ;
- size_t R = 0;
- size_t k = m/2+1 ;
- Element * A = new Element[m*lda];
- Element * B = new Element[k*lda];
- RandomMatrixWithRank(F,A,R,m,n,lda);
- RandomMatrixWithRank(F,B,k/2+1,k,n,lda);
- fail |= test_lu_append<Field,diag,trans>(F,A,B,m,n,k,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- delete[] B ;
- }
- { /* square */
- size_t M = std::max(m,n);
- size_t N = M ;
- size_t R = M/2 ;
- size_t lda = N+10 ;
- size_t k = R ;
- Element * A = new Element[M*lda];
- Element * B = new Element[k*lda];
- RandomMatrixWithRank(F,A,R,M,N,lda);
- RandomMatrixWithRank(F,B,R/2,k,N,lda);
- fail |= test_lu_append<Field,diag,trans>(F,A,B,M,N,k,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- delete[] B ;
- }
- { /* wide */
- size_t M = std::max(m,n);
- size_t N = 2*M ;
- size_t R = M/2 ;
- size_t k = R ;
- size_t lda = N+10 ;
- Element * A = new Element[M*lda];
- Element * B = new Element[k*lda];
- RandomMatrixWithRank(F,A,R,M,N,lda);
- RandomMatrixWithRank(F,B,k/2,k,N,lda);
- fail |= test_lu_append<Field,diag,trans>(F,A,B,M,N,k,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- delete[] B ;
- }
-#if 0 /* leak here */
- { /* narrow */
- size_t M = std::max(m,n);
- size_t N = M/2 ;
- size_t R = M/3 ;
- size_t k = N ;
- size_t lda = N+10 ;
- Element * A = new Element[M*lda];
- Element * B = new Element[k*lda];
- RandomMatrixWithRank(F,A,R,M,N,lda);
- RandomMatrixWithRank(F,A,std::min(k/2,M/2),k,N,lda);
- fail |= test_lu_append<Field,diag,trans>(F,A,B,M,N,k,lda);
- if (fail) std::cout << "failed" << std::endl;
- delete[] A ;
- delete[] B ;
- }
-#endif
-
- return fail;
-}
-
-int main(int argc, char** argv)
-{
- cerr<<setprecision(20);
- int p = 101;
- size_t m = 50;
- size_t n = 50;
- size_t r = 20;
- int iter = 2 ;
- bool fail = false;
-
- static Argument as[] = {
- { 'p', "-p P", "Set the field characteristic.", TYPE_INT , &p },
- { 'n', "-n N", "Set the number of cols in the matrix.", TYPE_INT , &n },
- { 'm', "-m N", "Set the number of rows in the matrix.", TYPE_INT , &m },
- { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iter },
- // { 'f', "-f file", "Set input file", TYPE_STR, &file },
- END_OF_ARGUMENTS
- };
-
- FFLAS::parseArguments(argc,argv,as);
-
- {
- typedef ModularBalanced<double> Field;
- typedef Field::Element Element;
- Field F(p);
-
- for (int i = 0 ; i < iter ; ++i) {
- fail |= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans>(F,r,m,n);
-
-#if 1 /* may be bogus */
- fail |= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- // fail |= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasTrans>(F,r,m,n);
- // fail |= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans>(F,r,m,n);
-#endif
- }
- }
-
- {
- typedef Modular<double> Field;
- typedef Field::Element Element;
- Field F(p);
-
- for (int i = 0 ; i < iter ; ++i) {
- fail |= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans>(F,r,m,n);
-
-#if 1 /* may be bogus */
- fail |= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- // fail |= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasTrans>(F,r,m,n);
- // fail |= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans>(F,r,m,n);
-#endif
-
- }
- }
-
- {
- typedef ModularBalanced<float> Field;
- typedef Field::Element Element;
- Field F(p);
-
- for (int i = 0 ; i < iter ; ++i) {
- fail |= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans>(F,r,m,n);
-
-#if 1 /* may be bogus */
- fail |= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- // fail |= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasTrans>(F,r,m,n);
- // fail |= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans>(F,r,m,n);
-#endif
-
- }
- }
-
- {
- typedef Modular<float> Field;
- typedef Field::Element Element;
- Field F(p);
-
- for (int i = 0 ; i < iter ; ++i) {
- fail |= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans>(F,r,m,n);
-
-#if 1 /* may be bogus */
- fail |= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- fail |= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(F,r,m,n);
- // fail |= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasTrans>(F,r,m,n);
- // fail |= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans>(F,r,m,n);
-#endif
-
-
- }
- }
-
- return fail ;
-
-}
diff --git a/tests/test-lu.C b/tests/test-lu.C
new file mode 100644
index 0000000..f848831
--- /dev/null
+++ b/tests/test-lu.C
@@ -0,0 +1,1062 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//-------------------------------------------------------------------------
+// Test suite for the Gaussian elimination routines: LUdivine and PLUQ
+//-------------------------------------------------------------------------
+
+#define __FFLASFFPACK_SEQUENTIAL
+#define __LUDIVINE_CUTOFF 1
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <givaro/modular-balanced.h>
+#include <iostream>
+#include <iomanip>
+
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "test-utils.h"
+
+#include "fflas-ffpack/utils/args-parser.h"
+
+using namespace std;
+using namespace FFPACK;
+
+
+/*! Tests the LUdivine routine.
+ * @tparam Field Field
+ * @tparam Diag Unit diagonal in U
+ * @tparam Trans
+ * @param F field
+ * @param A Matrix (preallocated)
+ * @param r rank of A
+ * @param m rows
+ * @param n cols
+ * @param lda leading dim of A
+ * @return 0 iff correct, 1 otherwise
+ */
+template<class Field, FFLAS::FFLAS_DIAG diag, FFLAS::FFLAS_TRANSPOSE trans>
+bool test_LUdivine(const Field & F,
+ typename Field::ConstElement_ptr A, size_t lda,
+ size_t r, size_t m, size_t n)
+{
+ bool fail = false;
+ typedef typename Field::Element_ptr Element_ptr ;
+ typedef typename Field::Element Element ;
+ Element_ptr B = FFLAS::fflas_new(F,m,lda) ;
+ FFLAS::fassign(F,m,n,A,lda,B,lda);
+
+ size_t maxP, maxQ ;
+
+ if (trans == FFLAS::FflasTrans){
+ maxP = m;
+ maxQ = n;
+ }
+ else{ // trans == FFLAS::FflasNoTrans
+ maxP = n;
+ maxQ = m;
+ }
+
+ size_t * P = FFLAS::fflas_new<size_t>(maxP) ;
+ size_t * Q = FFLAS::fflas_new<size_t>(maxQ) ;
+
+ size_t R = FFPACK::LUdivine (F, diag, trans, m, n, B, lda, P, Q);
+
+ if (R != r) {
+ std::cout << "rank is wrong (expecting " << r << " but got " << R << ")" << std::endl;
+ FFLAS::fflas_delete( B );
+ FFLAS::fflas_delete( P );
+ FFLAS::fflas_delete( Q );
+ return fail = true;
+ }
+
+ Element_ptr X = FFLAS::fflas_new(F, m, n); // compute X=CUP and check X == A
+ /* Build L,U */
+ Element_ptr L, U;
+ if (trans == FFLAS::FflasNoTrans){
+ L = FFLAS::fflas_new(F, m, m);
+ U = FFLAS::fflas_new(F, m, n);
+
+ Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ /* build U */
+ for (size_t i=0; i<R; ++i){
+ for (size_t j=0; j<i; ++j)
+ F.assign ( *(U + i*n + j), zero);
+ for (size_t j=i+1; j<n; ++j)
+ F.assign (*(U + i*n + j), *(B+ i*lda+j));
+ }
+ for (size_t i=R;i<m; ++i) {
+ for (size_t j=0; j<n; ++j)
+ F.assign(*(U+i*n+j), zero);
+ }
+ /* build L */
+ for ( size_t i=0; i<m; ++i ){
+ size_t j=0;
+ for (; j< ((i<R)?i:R) ; ++j )
+ F.assign( *(L + i*m+j), *(B+i*lda+j));
+ for (; j<m; ++j )
+ F.assign( *(L+i*m+j), zero);
+ }
+
+ /* reconstruct the diagonal */
+ if (diag == FFLAS::FflasNonUnit) {
+ for ( size_t i=0; i<R; ++i ){
+ F.assign (*(U+i*(n+1)), *(B+i*(lda+1)));
+ F.assign (*(L+Q[i]*m+i), F.one);
+ }
+ }
+ else{ // diag == FFLAS::FflasUnit
+ for ( size_t i=0; i<R; ++i ){
+ F.assign (*(L+Q[i]*m+i), *(B+Q[i]*lda+i));
+ F.assign (*(U+i*(n+1)),one);
+ }
+ }
+
+ /* Compute CUP */
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ m,0,(int) R, U, n, P);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ m,n,R, 1.0, L,m, U,n, 0.0, X,n);
+ }
+ else { /* trans == FFLAS::FflasTrans */
+
+ L = FFLAS::fflas_new(F, m, n);
+ U = FFLAS::fflas_new(F, n, n);
+
+
+ typename Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ /* build L */
+ for (size_t i=0; i<R; ++i){
+ for (size_t j=0; j<i; ++j)
+ F.assign ( *(L + i + j*n), zero);
+ for (size_t j=i+1; j<m; ++j)
+ F.assign (*(L + i + j*n), *(B+ i+j*lda));
+ }
+ for (size_t i=R;i<n; ++i) {
+ for (size_t j=0; j<m; ++j)
+ F.assign(*(L+i+j*n), zero);
+ }
+ /* build U */
+ for ( size_t i=0; i<n; ++i ){
+ size_t j=0;
+ for (; j< ((i<R)?i:R) ; ++j )
+ F.assign( *(U + i+j*n), *(B+i+j*lda));
+ for (; j<n; ++j )
+ F.assign( *(U+i+j*n), zero);
+ }
+
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ n,0,(int)R, U, n, Q);
+
+ for (size_t i=0; i<n; ++i)
+ F.assign (*(U+i*(n+1)),one);
+
+ /* reconstruct the diagonal */
+ if (diag == FFLAS::FflasNonUnit) {
+ for ( size_t i=0; i<R; ++i )
+ F.assign (*(L+i*(n+1)), *(B+i*(lda+1)));
+ }
+ else{ // diag == FFLAS::FflasUnit
+ for ( size_t i=0; i<R; ++i ){
+ *(U+Q[i]*(n+1)) = *(B+Q[i]+i*lda);
+ F.assign (*(L+i*(n+1)),one);
+ }
+ }
+
+ /* Compute LQUP */
+ FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ n,0,(int)R, L, n, P);
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ m,0,(int)R, L, n, Q);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+ m,n,n, 1.0, L,n, U,n, 0.0, X,n);
+ }
+ /* check equality */
+ for (size_t i=0; i<m; ++i) {
+ for (size_t j=0; j<n; ++j)
+ if (!F.areEqual (*(A+i*lda+j), *(X+i*n+j))){
+ std::cerr << std::endl<<" A["<<i<<","<<j<<"] = " << (*(A+i*lda+j))
+ << " LQUP["<<i<<","<<j<<"] = " << (*(X+i*n+j));
+ fail|=true;
+ }
+ }
+ // if (fail){
+ // write_field(F,cerr<<"A = "<<endl,A,m,n,lda);
+ // write_field(F,cerr<<"LU = "<<endl,B,m,n,lda);
+ // write_field(F,cerr<<"L = "<<endl,L,m,m,m);
+ // write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+ // }
+
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( Q);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( X);
+ return fail;
+
+
+}
+
+
+/*! Verifies that B = PLUQ where A stores [L\U]
+ * @tparam Field Field
+ * @tparam Diag Unit diagonal in U
+ * @param F field
+ * @param A Matrix (preallocated)
+ * @param r rank of A
+ * @param m rows
+ * @param n cols
+ * @param lda leading dim of A
+ * @return 0 iff correct, 1 otherwise
+ */
+template<class Field, FFLAS::FFLAS_DIAG diag>
+bool verifPLUQ (const Field & F, typename Field::ConstElement_ptr A, size_t lda,
+ typename Field::Element_ptr PLUQ, size_t ldpluq,
+ size_t * P, size_t * Q, size_t m, size_t n, size_t R)
+{
+
+
+ typename Field::Element_ptr X = FFLAS::fflas_new (F, m, n);
+ typename Field::Element_ptr L = FFLAS::fflas_new (F, m, R);
+ typename Field::Element_ptr U = FFLAS::fflas_new (F, R, n);
+ FFLAS::fzero(F, m, R, L, R);
+ FFLAS::fzero(F, R, n, U, n);
+
+ typename Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ FFPACK::getTriangular(F, FFLAS::FflasUpper, diag, m,n,R, PLUQ, ldpluq, U, n, true);
+ FFPACK::getTriangular(F, FFLAS::FflasLower, (diag==FFLAS::FflasNonUnit)?FFLAS::FflasUnit:FFLAS::FflasNonUnit,
+ m,n,R, PLUQ, ldpluq, L, R, true);
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, R,0,m, L, R, P);
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R,0,n, U, n, Q);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,R, F.one, L,R, U,n, F.zero, X,n);
+
+ bool fail = false;
+ for(size_t i=0; i<m; ++i)
+ for (size_t j=0; j<n; ++j)
+ if (!F.areEqual (*(A+i*lda+j), *(X+i*n+j))){
+ std::cerr << std::endl<<" A ["<<i<<","<<j<<"] = " << (*(A+i*lda+j))
+ << " PLUQ ["<<i<<","<<j<<"] = " << (*(X+i*n+j))
+ << std::endl;
+ fail=true;
+ }
+ //write_field(F, std::cerr<<"X = "<<std::endl,X, m,n,n);
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+ return fail;
+}
+/*! Tests the LUdivine routine.
+ * @tparam Field Field
+ * @tparam Diag Unit diagonal in U
+ * @tparam Trans
+ * @param F field
+ * @param A Matrix (preallocated)
+ * @param r rank of A
+ * @param m rows
+ * @param n cols
+ * @param lda leading dim of A
+ * @return 0 iff correct, 1 otherwise
+ */
+template<class Field, FFLAS::FFLAS_DIAG diag>
+bool test_pluq (const Field & F,
+ typename Field::ConstElement_ptr A,
+ size_t r, size_t m, size_t n, size_t lda)
+{
+ bool fail = false;
+ typedef typename Field::Element_ptr Element_ptr ;
+ Element_ptr B = FFLAS::fflas_new(F,m,lda) ;
+ FFLAS::fassign(F,m,n,A,lda,B,lda);
+
+ size_t * P = FFLAS::fflas_new<size_t> (m);
+ size_t * Q = FFLAS::fflas_new<size_t> (n);
+
+ //write_field(F,std::cerr<<"\n B = \n",B,m,n,lda);
+ size_t R = FFPACK::PLUQ (F, diag, m, n, B, lda, P, Q);
+ //write_field(F,std::cerr<<"\n PLUQ = \n",B,m,n,lda);
+
+ if (R != r) {
+ std::cout << "rank is wrong (expected " << r << " but got " << R << ")" << std::endl;
+ FFLAS::fflas_delete (B);
+ FFLAS::fflas_delete (P);
+ FFLAS::fflas_delete (Q);
+ return fail = true;
+ }
+ fail |= verifPLUQ<Field,diag> (F,A, lda, B, lda, P, Q, m, n, r);
+ FFLAS::fflas_delete (B);
+ FFLAS::fflas_delete(P);
+ FFLAS::fflas_delete(Q);
+ return fail;
+}
+/*! Tests the LUpdate routine.
+ * @tparam Field Field
+ * @tparam Diag Unit diagonal in L ?
+ * @tparam Trans ?
+ * @param F field
+ * @param A Matrix (preallocated)
+ * @param r rank of A
+ * @param B Matrix (preallocated)
+ * @param m rows in A
+ * @param n cols in A (and B)
+ * @param k rows in B
+ * @param lda leading dim of A (and B)
+ * @return 0 iff correct, 1 otherwise
+ */
+// template<class Field, FFLAS::FFLAS_DIAG diag, FFLAS::FFLAS_TRANSPOSE trans>
+// bool test_lu_append(const Field & F,
+// const typename Field::Element_ptr A,
+// const typename Field::Element_ptr B,
+// size_t m, size_t n, size_t k, size_t lda)
+// {
+// FFLASFFPACK_check(n<=lda);
+
+// bool fail = false;
+// size_t M = m + k ;
+// typedef typename Field::Element Element ;
+// Element_ptr Acop = FFLAS::fflas_new(F, m, lda) ;
+// FFLAS::fassign(F,m,n,A,lda,Acop,lda) ;
+
+// Element_ptr Bcop = FFLAS::fflas_new(F, k, lda) ;
+// FFLAS::fassign(F,k,n,B,lda,Bcop,lda) ;
+
+// Element_ptr Append = FFLAS::fflas_new (F, M, lda);
+// FFLAS::fassign(F,m,n,A,lda,Append,lda) ;
+// FFLAS::fassign(F,k,n,B,lda,Append+m*lda,lda) ;
+
+// #if 0 /* paranoid check */
+// for (size_t i = 0 ; i < m ; ++i) {
+// for (size_t j = 0 ; j < n ; ++j) {
+// FFLASFFPACK_check(Append[i*lda+j]==A[i*lda+j]);
+// }
+// }
+// for (size_t i = 0 ; i < k ; ++i) {
+// for (size_t j = 0 ; j < n ; ++j) {
+// FFLASFFPACK_check(Append[(i+m)*lda+j]==B[i*lda+j]);
+// }
+// }
+// #endif
+
+// Element_ptr Afull = FFLAS::fflas_new(F, M, lda);
+// FFLAS::fassign(F,M,n,Append,lda,Afull,lda) ;
+// // FFLAS::fassign(F,m,n,A,lda,Afull,lda) ;
+// // FFLAS::fassign(F,k,n,B,lda,Afull+m*lda,lda) ;
+
+// #if 0
+// std::cout << "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" << std::endl;
+// for (size_t i = 0 ; i < m ; ++i) {
+// for (size_t j = 0 ; j < n ; ++j) {
+// std::cout << Append[i*lda+j] << "(" << A[i*lda+j] << ") " ;
+// } std::cout << std::endl;
+// }
+// std::cout << "-----------------------------------" << std::endl;
+// for (size_t i = 0 ; i < k ; ++i) {
+// for (size_t j = 0 ; j < n ; ++j) {
+// std::cout << Append[(i+m)*lda+j] ;
+// std::cout << "(" << B[i*lda+j] << ") " ;
+// }std::cout << std::endl;
+// }
+// std::cout << "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" << std::flush << std::endl;
+// #endif
+
+
+
+// #if 0
+// for (size_t i = 0 ; i < m ; ++i)
+// for (size_t j = 0 ; j < n ; ++j)
+// FFLASFFPACK_check(Acop[i*lda+j]==A[i*lda+j]);
+// for (size_t i = 0 ; i < k ; ++i)
+// for (size_t j = 0 ; j < n ; ++j)
+// FFLASFFPACK_check(Bcop[i*lda+j]==B[i*lda+j]);
+// for (size_t i = 0 ; i < M ; ++i)
+// for (size_t j = 0 ; j < n ; ++j)
+// if (i < m)
+// FFLASFFPACK_check(Afull[i*lda+j]==A[i*lda+j]);
+// else
+// FFLASFFPACK_check(Afull[i*lda+j]==B[(i-m)*lda+j]);
+// #endif
+
+
+
+
+// size_t maxP, maxQ ;
+
+// if (trans == FFLAS::FflasTrans){
+// maxP = M;
+// maxQ = n;
+// }
+// else{ // trans == FFLAS::FflasNoTrans
+// maxP = n;
+// maxQ = M;
+// }
+
+// size_t * P = FFLAS::fflas_new<size_t>(maxP) ;
+// size_t * Q = FFLAS::fflas_new<size_t>(maxQ) ;
+
+// size_t * PP = FFLAS::fflas_new<size_t>(maxP) ;
+// size_t * QQ = FFLAS::fflas_new<size_t>(maxQ) ;
+
+// /* valgrind says the following leaks. Just incroyable. */
+// size_t R = FFPACK::LUdivine (F, diag, trans, M, n, Append, lda, PP, QQ);
+
+// size_t R1 = FFPACK::LUdivine (F, diag, trans, m, n, Acop, lda, P, Q);
+
+// size_t R2 = FFPACK::LUpdate (F,diag,trans,m,n,Acop,lda,R1,k,Bcop,lda,P,Q,
+// FFPACK::FfpackLQUP);
+// #if 0
+// std::cout << "P := [ " ;
+// for (size_t i = 0 ; i < maxP ; ++i)
+// std::cout << P[i] << " " ;
+// std::cout << ']' << std::endl;
+// std::cout << "Q := [ ";
+// for (size_t i = 0 ; i < maxQ ; ++i)
+// std::cout << Q[i] << " " ;
+// std::cout << ']' << std::endl;
+// std::cout << "PP := [ ";
+// for (size_t i = 0 ; i < maxP ; ++i)
+// std::cout << PP[i] << " " ;
+// std::cout << ']' << std::endl;
+// std::cout << "QQ := [ ";
+// for (size_t i = 0 ; i < maxQ ; ++i)
+// std::cout << QQ[i] << " " ;
+// std::cout << ']' << std::endl;
+// #endif
+
+// if (R2 != R) {
+// std::cout << "error, bad rank " << R2 << " <> " << R << " (expected) " << std::endl;
+// FFLAS::fflas_delete( Bcop );
+// FFLAS::fflas_delete( Acop );
+// FFLAS::fflas_delete( Append );
+// FFLAS::fflas_delete( PP);
+// FFLAS::fflas_delete( QQ);
+// FFLAS::fflas_delete( P );
+// FFLAS::fflas_delete( Q );
+// return fail=true;
+
+// }
+
+// // compute C=LQUP and check C == A
+// Element_ptr C = FFLAS::fflas_new (F, M, lda);
+// /* Build L,U */
+// Element_ptr L, U;
+// if (trans == FFLAS::FflasNoTrans){
+// L = FFLAS::fflas_new(F, M, M);
+// U = FFLAS::fflas_new(F, M, n);
+
+// typename Field::Element zero,one;
+// F.init(zero,0.0);
+// F.init(one,1.0);
+// /* build U */
+// for (size_t i=0; i<R; ++i){
+// for (size_t j=0; j<i; ++j)
+// F.assign ( *(U + i*n + j), zero);
+// for (size_t j=i+1; j<n; ++j)
+// if (i < m)
+// F.assign (*(U + i*n + j), *(Acop+ i*lda+j));
+// else
+// F.assign (*(U + i*n + j), *(Bcop+ (i-m)*lda+j));
+// }
+
+// for (size_t i=R;i<M; ++i) {
+// for (size_t j=0; j<n; ++j)
+// F.assign(*(U+i*n+j), zero);
+// }
+// /* build L */
+// for ( size_t i=0; i<M; ++i ){
+// size_t j=0;
+// for (; j< ((i<R)?i:R) ; ++j ) {
+// if (i<m)
+// F.assign( *(L + i*M+j), *(Acop+i*lda+j));
+// else
+// F.assign( *(L + i*M+j), *(Bcop+(i-m)*lda+j));
+// }
+// for (; j<M; ++j )
+// F.assign( *(L+i*M+j), zero);
+// }
+
+// // write_field(F,cerr<<"L = "<<endl,L,m,m,m);
+// //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+// FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+// M,0,(int)R, L, M, Q);
+// for ( size_t i=0; i<M; ++i )
+// F.assign(*(L+i*(M+1)), one);
+
+// /* reconstruct the diagonal */
+// //write_field(F,cerr<<"L = "<<endl,L,m,m,m);
+// //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+// if (diag == FFLAS::FflasNonUnit) {
+// for ( size_t i=0; i<R; ++i ) {
+// if (i<m)
+// F.assign (*(U+i*(n+1)), *(Acop+i*(lda+1)));
+// else
+// F.assign (*(U+i*(n+1)), *(Bcop+(i-m)*(lda+1)));
+// }
+// }
+// else{ // diag == FFLAS::FflasUnit
+// for ( size_t i=0; i<R; ++i ){
+// if (Q[i] < m)
+// *(L+Q[i]*(M+1)) = *(Acop+Q[i]*lda+i);
+// else
+// *(L+Q[i]*(M+1)) = *(Bcop+(Q[i]-m)*lda+i);
+// F.assign (*(U+i*(n+1)),one);
+// }
+// }
+// // write_field(F,cerr<<"L = "<<endl,L,(int)M,(int)M,(int)M);
+// // write_field(F,cerr<<"U = "<<endl,U,(int)M,(int)n,(int)n);
+
+// /* Compute LQUP */
+// FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+// M,0,(int) R, U, n, P);
+// FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+// n,0,(int)R, U, n, Q);
+// FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+// M,n,M, 1.0, L,M, U,n, 0.0, C,lda);
+// //FFLAS::fflas_delete( A);
+// }
+// #if 0 /* not working */
+// else { /* trans == FFLAS::FflasTrans */
+
+// L = FFLAS::fflas_new(F, M, n);
+// U = FFLAS::fflas_new(F, n, n);
+
+
+// typename Field::Element zero,one;
+// F.init(zero,0.0);
+// F.init(one,1.0);
+// /* build L */
+// for (size_t i=0; i<R; ++i){
+// for (size_t j=0; j<i; ++j)
+// F.assign ( *(L + i + j*n), zero);
+// for (size_t j=i+1; j<M; ++j) {
+// if (i < m)
+// F.assign (*(L + i + j*n), *(Acop+ i+j*lda));
+// else
+// F.assign (*(L + i + j*n), *(Bcop+ (i-m)+j*lda));
+// }
+// }
+// for (size_t i=R;i<n; ++i) {
+// for (size_t j=0; j<M; ++j)
+// F.assign(*(L+i+j*n), zero);
+// }
+// /* build U */
+// for ( size_t i=0; i<n; ++i ){
+// size_t j=0;
+// for (; j< ((i<R)?i:R) ; ++j ) {
+// if (i < m)
+// F.assign( *(U + i+j*n), *(Acop+i+j*lda));
+// else
+// F.assign( *(U + i+j*n), *(Bcop+(i-m)+j*lda));
+// }
+// for (; j<n; ++j )
+// F.assign( *(U+i+j*n), zero);
+// }
+// //write_field(F,cerr<<"L = "<<endl,L,m,n,n);
+// //write_field(F,cerr<<"U = "<<endl,U,n,n,n);
+
+// FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+// n,0,(int)R, U, n, Q);
+
+// for (size_t i=0; i<n; ++i)
+// F.assign (*(U+i*(n+1)),one);
+
+// /* reconstruct the diagonal */
+// if (diag == FFLAS::FflasNonUnit) {
+// for ( size_t i=0; i<R; ++i ) {
+// if (i < m)
+// F.assign (*(L+i*(n+1)), *(Acop+i*(lda+1)));
+// else
+// F.assign (*(L+i*(n+1)), *(Bcop+(i-m)*(lda+1)));
+// }
+// }
+// else{ // diag == FFLAS::FflasUnit
+// for ( size_t i=0; i<R; ++i ){
+// if (i<m)
+// *(U+Q[i]*(n+1)) = *(Acop+Q[i]+i*lda);
+// else
+// *(U+Q[i]*(n+1)) = *(Bcop+Q[i]+(i-m)*lda);
+// F.assign (*(L+i*(n+1)),one);
+// }
+// }
+// // write_field(F,cerr<<"L = "<<endl,L,(int)M,(int)n,(int)n);
+// // write_field(F,cerr<<"U = "<<endl,U,(int)n,(int)n,(int)n);
+
+// /* Compute LQUP */
+// FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+// n,0,(int)R, L, n, P);
+// FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+// M,0,(int)R, L, n, Q);
+// FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+// M,n,n, 1.0, L,n, U,n, 0.0, C,lda);
+// }
+// #endif
+// #if 0 /* check CC == LL UU */
+// Element_ptr LL, UU;
+// Element_ptr CC = FFLAS::fflas_new (F, M, lda);
+// if (trans == FFLAS::FflasNoTrans){
+// LL = FFLAS::fflas_new (F, M, M);
+// UU = FFLAS::fflas_new (F, M, n);
+
+// Element zero,one;
+// F.init(zero,0.0);
+// F.init(one,1.0);
+// /* build U */
+// for (size_t i=0; i<R; ++i){
+// for (size_t j=0; j<i; ++j)
+// F.assign ( *(UU + i*n + j), zero);
+// for (size_t j=i+1; j<n; ++j)
+// F.assign (*(UU + i*n + j), *(Append+ i*lda+j));
+// }
+
+// for (size_t i=R;i<M; ++i) {
+// for (size_t j=0; j<n; ++j)
+// F.assign(*(UU+i*n+j), zero);
+// }
+// /* build L */
+// for ( size_t i=0; i<M; ++i ){
+// size_t j=0;
+// for (; j< ((i<R)?i:R) ; ++j ) {
+// F.assign( *(LL + i*M+j), *(Append+i*lda+j));
+// }
+// for (; j<M; ++j )
+// F.assign( *(LL+i*M+j), zero);
+// }
+
+// // write_field(F,cerr<<"LL = "<<endl,LL,m,m,m);
+// //write_field(F,cerr<<"UU = "<<endl,UU,m,n,n);
+// FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+// M,0,(int)R, LL, M, Q);
+// for ( size_t i=0; i<M; ++i )
+// F.assign(*(LL+i*(M+1)), one);
+
+// /* reconstruct the diagonal */
+// //write_field(F,cerr<<"LL = "<<endl,LL,m,m,m);
+// //write_field(F,cerr<<"UU = "<<endl,UU,m,n,n);
+// if (diag == FFLAS::FflasNonUnit) {
+// for ( size_t i=0; i<R; ++i ) {
+// F.assign (*(UU+i*(n+1)), *(Append+i*(lda+1)));
+// }
+// }
+// else{ // diag == FFLAS::FflasUnit
+// for ( size_t i=0; i<R; ++i ){
+// *(LL+Q[i]*(M+1)) = *(Append+Q[i]*lda+i);
+// F.assign (*(UU+i*(n+1)),one);
+// }
+// }
+// write_field(F,cerr<<"L = "<<endl,LL,(int)M,(int)M,(int)M);
+// write_field(F,cerr<<"U = "<<endl,UU,(int)M,(int)n,(int)n);
+
+// /* Compute LQUP */
+// FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+// M,0,(int) R, UU, n, P);
+// FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+// n,0,(int)R, UU, n, Q);
+// FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+// M,n,M, 1.0, LL,M, UU,n, 0.0, CC,lda);
+// //FFLAS::fflas_delete( A);
+// }
+// else { /* trans == FFLAS::FflasTrans */
+
+// LL = FFLAS::fflas_new(F, M, n);
+// UU = FFLAS::fflas_new(F, n, n);
+
+
+// typename Field::Element zero,one;
+// F.init(zero,0.0);
+// F.init(one,1.0);
+// /* build L */
+// for (size_t i=0; i<R; ++i){
+// for (size_t j=0; j<i; ++j)
+// F.assign ( *(LL + i + j*n), zero);
+// for (size_t j=i+1; j<M; ++j) {
+// F.assign (*(LL + i + j*n), *(Append+ i+j*lda));
+// }
+// }
+// for (size_t i=R;i<n; ++i) {
+// for (size_t j=0; j<M; ++j)
+// F.assign(*(LL+i+j*n), zero);
+// }
+// /* build UU */
+// for ( size_t i=0; i<n; ++i ){
+// size_t j=0;
+// for (; j< ((i<R)?i:R) ; ++j ) {
+// F.assign( *(UU + i+j*n), *(Append+i+j*lda));
+// }
+// for (; j<n; ++j )
+// F.assign( *(UU+i+j*n), zero);
+// }
+// // write_field(F,cerr<<"LL = "<<endl,LL,m,n,n);
+// // write_field(F,cerr<<"UU = "<<endl,UU,n,n,n);
+
+// FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+// n,0,(int)R, UU, n, Q);
+
+// for (size_t i=0; i<n; ++i)
+// F.assign (*(UU+i*(n+1)),one);
+
+// /* reconstruct the diagonal */
+// if (diag == FFLAS::FflasNonUnit) {
+// for ( size_t i=0; i<R; ++i ) {
+// F.assign (*(LL+i*(n+1)), *(Append+i*(lda+1)));
+// }
+// }
+// else{ // diag == FFLAS::FflasUnit
+// for ( size_t i=0; i<R; ++i ){
+// *(UU+Q[i]*(n+1)) = *(Append+Q[i]+i*lda);
+// F.assign (*(LL+i*(n+1)),one);
+// }
+// }
+// write_field(F,cerr<<"LL = "<<endl,LL,(int)M,(int)n,(int)n);
+// write_field(F,cerr<<"UU = "<<endl,UU,(int)n,(int)n,(int)n);
+
+// /* Compute LQUP */
+// FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+// n,0,(int)R, LL, n, P);
+// FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+// M,0,(int)R, LL, n, Q);
+// FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
+// M,n,n, 1.0, LL,n, UU,n, 0.0, CC,lda);
+// }
+// for (size_t i=0; i<M; ++i) {
+// for (size_t j=0; j<n; ++j)
+// if (!F.areEqual (*(Afull+i*lda+j), *(CC+i*lda+j))){
+// std::cerr << " A["<<i<<","<<j<<"] = " << (*(Afull+i*lda+j))
+// << " LQUP["<<i<<","<<j<<"] = " << (*(CC+i*lda+j))
+// << endl << "xxxx" << endl;
+// fail|=true;
+// }
+// }
+
+// #endif
+
+// /* check equality */
+// for (size_t i=0; i<M; ++i) {
+// for (size_t j=0; j<n; ++j)
+// if (!F.areEqual (*(Afull+i*lda+j), *(C+i*lda+j))){
+// std::cerr << " A["<<i<<","<<j<<"] = " << (*(Afull+i*lda+j))
+// << " LQUP(append)["<<i<<","<<j<<"] = " << (*(C+i*lda+j))
+// << endl;
+// fail|=true;
+// }
+// }
+
+// FFLAS::fflas_delete( PP);
+// FFLAS::fflas_delete( P);
+// FFLAS::fflas_delete( L);
+// FFLAS::fflas_delete( U);
+// FFLAS::fflas_delete( Q);
+// FFLAS::fflas_delete( QQ);
+// FFLAS::fflas_delete( Acop);
+// FFLAS::fflas_delete( Bcop);
+// FFLAS::fflas_delete( Append);
+// FFLAS::fflas_delete( Afull);
+// FFLAS::fflas_delete( C);
+
+// return fail;
+
+
+// }
+
+
+
+template<class Field, FFLAS::FFLAS_DIAG diag, FFLAS::FFLAS_TRANSPOSE trans>
+bool launch_test(const Field & F,
+ size_t r,
+ size_t m, size_t n)
+{
+ //typedef typename Field::Element Element ;
+ typedef typename Field::Element_ptr Element_ptr ;
+ bool fail = false ;
+ { /* user given and lda bigger */
+ size_t lda = n+10 ;
+ Element_ptr A = FFLAS::fflas_new (F, m, lda);
+ RandomMatrixWithRankandRandomRPM(F,A,lda,r,m,n);
+ fail |= test_LUdivine<Field,diag,trans>(F,A,lda,r,m,n);
+ fail |= test_pluq<Field,diag>(F,A,r,m,n,lda);
+ if (fail) std::cout << "failed at big lda" << std::endl;
+ FFLAS::fflas_delete( A );
+ }
+ { /* user given and lda bigger. Rank is max */
+ size_t lda = n+10 ;
+ size_t R = std::min(m,n);
+ Element_ptr A = FFLAS::fflas_new (F, m, lda);
+ RandomMatrixWithRankandRandomRPM(F,A,lda,R,m,n);
+ fail |= test_LUdivine<Field,diag,trans>(F,A,lda,R,m,n);
+ fail |= test_pluq<Field,diag>(F,A,R,m,n,lda);
+ if (fail) std::cout << "failed at big lda max rank" << std::endl;
+ FFLAS::fflas_delete( A );
+ }
+ { /* user given and lda bigger. Rank is min */
+ size_t lda = n+10 ;
+ size_t R = 0;
+ Element_ptr A = FFLAS::fflas_new (F, m, lda);
+ RandomMatrixWithRankandRandomRPM(F,A,lda,R,m,n);
+ fail |= test_LUdivine<Field,diag,trans>(F,A,lda,R,m,n);
+ fail |= test_pluq<Field,diag>(F,A,R,m,n,lda);
+ if (fail) std::cout << "failed at big lda, rank 0" << std::endl;
+ FFLAS::fflas_delete( A );
+ }
+ { /* square */
+ size_t M = std::max(m,n);
+ size_t N = M ;
+ size_t R = M/2 ;
+ size_t lda = N+10 ;
+ Element_ptr A = FFLAS::fflas_new (F, M, lda);
+ RandomMatrixWithRankandRandomRPM(F,A,lda,R,M,N);
+ fail |= test_LUdivine<Field,diag,trans>(F,A,lda,R,M,N);
+ fail |= test_pluq<Field,diag>(F,A,R,M,N,lda);
+ if (fail) std::cout << "failed at square" << std::endl;
+ FFLAS::fflas_delete( A );
+ }
+ { /* wide */
+ size_t M = std::max(m,n);
+ size_t N = 2*M ;
+ size_t R = 3*M/4 ;
+ size_t lda = N+5 ;
+ Element_ptr A = FFLAS::fflas_new (F, M, lda);
+ RandomMatrixWithRankandRandomRPM(F,A,lda,R,M,N);
+ fail |= test_LUdivine<Field,diag,trans>(F,A,lda,R,M,N);
+ fail |= test_pluq<Field,diag>(F,A,R,M,N,lda);
+ if (fail) std::cout << "failed at wide" << std::endl;
+ FFLAS::fflas_delete( A );
+ }
+ { /* narrow */
+ size_t M = std::max(m,n);
+ size_t N = M/2 ;
+ size_t R = 3*M/8 ;
+ size_t lda = N+5 ;
+ Element_ptr A = FFLAS::fflas_new (F, M, lda);
+ RandomMatrixWithRankandRandomRPM(F,A,lda,R,M,N);
+ fail |= test_LUdivine<Field,diag,trans>(F,A,lda,R,M,N);
+ fail |= test_pluq<Field,diag>(F,A,R,M,N,lda);
+ if (fail) std::cout << "failed at narrow" << std::endl;
+ FFLAS::fflas_delete( A );
+ }
+ return !fail;
+}
+
+// template<class Field, FFLAS::FFLAS_DIAG diag, FFLAS::FFLAS_TRANSPOSE trans>
+// bool launch_test_append(const Field & F,
+// size_t r,
+// size_t m, size_t n)
+// {
+// typedef typename Field::Element Element ;
+// bool fail = false ;
+// { /* user given and lda bigger */
+// size_t lda = n+10 ;
+// size_t k = m/2+1 ;
+// Element_ptr A = FFLAS::fflas_new (F, m, lda);
+// Element_ptr B = FFLAS::fflas_new (F, k, lda);
+// RandomMatrixWithRank(F,A,lda,r,m,n);
+// RandomMatrixWithRank(F,B,lda,k/2+1,k,n);
+// fail |= test_lu_append<Field,diag,trans>(F,A,B,m,n,k,lda);
+// if (fail) std::cout << "failed" << std::endl;
+// FFLAS::fflas_delete( A );
+// FFLAS::fflas_delete( B );
+// }
+// { /* user given and lda bigger. Rank is max */
+// size_t lda = n+10 ;
+// size_t R = std::min(m,n);
+// size_t k = m/2+1 ;
+// Element_ptr A = FFLAS::fflas_new (F, m, lda);
+// Element_ptr B = FFLAS::fflas_new (F, k, lda);
+// RandomMatrixWithRank(F,A,lda,R,m,n);
+// RandomMatrixWithRank(F,B,lda,k/2+1,k,n);
+// fail |= test_lu_append<Field,diag,trans>(F,A,B,m,n,k,lda);
+// if (fail) std::cout << "failed" << std::endl;
+// FFLAS::fflas_delete( A );
+// FFLAS::fflas_delete( B );
+// }
+// { /* user given and lda bigger. Appended Rank is min */
+// size_t lda = n+10 ;
+// size_t R = std::min(m,n);
+// size_t k = m/2+1 ;
+// Element_ptr A = FFLAS::fflas_new (F, m, lda);
+// Element_ptr B = FFLAS::fflas_new (F, k, lda);
+// RandomMatrixWithRank(F,A,lda,R,m,n);
+// RandomMatrixWithRank(F,B,lda,0,k,n);
+// fail |= test_lu_append<Field,diag,trans>(F,A,B,m,n,k,lda);
+// if (fail) std::cout << "failed" << std::endl;
+// FFLAS::fflas_delete( A );
+// FFLAS::fflas_delete( B );
+// }
+// { /* user given and lda bigger. Rank is min */
+// size_t lda = n+10 ;
+// size_t R = 0;
+// size_t k = m/2+1 ;
+// Element_ptr A = FFLAS::fflas_new (F, m, lda);
+// Element_ptr B = FFLAS::fflas_new (F, k, lda);
+// RandomMatrixWithRank(F,A,lda,R,m,n);
+// RandomMatrixWithRank(F,B,lda,k/2+1,k,n);
+// fail |= test_lu_append<Field,diag,trans>(F,A,B,m,n,k,lda);
+// if (fail) std::cout << "failed" << std::endl;
+// FFLAS::fflas_delete( A );
+// FFLAS::fflas_delete( B );
+// }
+// { /* square */
+// size_t M = std::max(m,n);
+// size_t N = M ;
+// size_t R = M/2 ;
+// size_t lda = N+10 ;
+// size_t k = R ;
+// Element_ptr A = FFLAS::fflas_new (F, M, lda);
+// Element_ptr B = FFLAS::fflas_new (F, k, lda);
+// RandomMatrixWithRank(F,A,lda,R,M,N);
+// RandomMatrixWithRank(F,B,lda,R/2,k,N);
+// fail |= test_lu_append<Field,diag,trans>(F,A,B,M,N,k,lda);
+// if (fail) std::cout << "failed" << std::endl;
+// FFLAS::fflas_delete( A );
+// FFLAS::fflas_delete( B );
+// }
+// { /* wide */
+// size_t M = std::max(m,n);
+// size_t N = 2*M ;
+// size_t R = M/2 ;
+// size_t k = R ;
+// size_t lda = N+10 ;
+// Element_ptr A = FFLAS::fflas_new (F, M, lda);
+// Element_ptr B = FFLAS::fflas_new (F, k, lda);
+// RandomMatrixWithRank(F,A,lda,R,M,N);
+// RandomMatrixWithRank(F,B,lda,k/2,k,N);
+// fail |= test_lu_append<Field,diag,trans>(F,A,B,M,N,k,lda);
+// if (fail) std::cout << "failed" << std::endl;
+// FFLAS::fflas_delete( A );
+// FFLAS::fflas_delete( B );
+// }
+// //! @bug leaks :
+// #if 0 /* leak here */
+// { /* narrow */
+// size_t M = std::max(m,n);
+// size_t N = M/2 ;
+// size_t R = M/3 ;
+// size_t k = N ;
+// size_t lda = N+10 ;
+// Element_ptr A = FFLAS::fflas_new (F, M, lda);
+// Element_ptr B = FFLAS::fflas_new (F, k, lda);
+// RandomMatrixWithRank(F,A,lda,R,M,N);
+// RandomMatrixWithRank(F,A,lda,std::min(k/2,M/2),k,N);
+// fail |= test_lu_append<Field,diag,trans>(F,A,B,M,N,k,lda);
+// if (fail) std::cout << "failed" << std::endl;
+// FFLAS::fflas_delete( A );
+// FFLAS::fflas_delete( B );
+// }
+// #endif
+
+// return fail;
+// }
+
+
+template<class Field>
+bool run_with_field(Givaro::Integer q, uint64_t b, size_t m, size_t n, size_t r, size_t iters){
+ bool ok = true ;
+ int nbit=(int)iters;
+
+ while (ok && nbit){
+ // choose Field
+ Field* F= chooseField<Field>(q,b);
+ if (F==nullptr)
+ return true;
+ std::ostringstream oss;
+ F->write(oss);
+
+ std::cout.fill('.');
+ std::cout<<"Checking ";
+ std::cout.width(40);
+ std::cout<<oss.str();
+ std::cout<<" ... ";
+
+
+ ok&= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans> (*F,r,m,n);
+ ok&= launch_test<Field,FFLAS::FflasUnit,FFLAS::FflasTrans> (*F,r,m,n);
+ ok&= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans> (*F,r,m,n);
+ ok&= launch_test<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans> (*F,r,m,n);
+
+#if 0 /* may be bogus */
+ ok&= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasNoTrans> (*F,r,m,n);
+ ok&= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasNoTrans>(*F,r,m,n);
+ ok&= launch_test_append<Field,FFLAS::FflasUnit,FFLAS::FflasTrans> (*F,r,m,n);
+ ok&= launch_test_append<Field,FFLAS::FflasNonUnit,FFLAS::FflasTrans> (*F,r,m,n);
+#endif
+ nbit--;
+ if ( !ok )
+ //std::cout << "\033[1;31mFAILED\033[0m "<<std::endl;
+ std::cout << "FAILED "<<std::endl;
+ else
+ //std::cout << "\033[1;32mPASSED\033[0m "<<std::endl;
+ std::cout << "PASSED "<<std::endl;
+ delete F;
+ }
+ return ok;
+}
+
+int main(int argc, char** argv)
+{
+ cerr<<setprecision(20);
+ static Givaro::Integer q=-1;
+ static size_t b=0;
+ static size_t m=120;
+ static size_t n=120;
+ static size_t r=80;
+ static size_t iters=2;
+ static bool loop=false;
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field characteristic (-1 for random).", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the field characteristic.", TYPE_INT , &b },
+ { 'm', "-m M", "Set the row dimension of the matrix.", TYPE_INT , &m },
+ { 'n', "-n N", "Set the column dimension of the matrix.", TYPE_INT , &n },
+ { 'r', "-r R", "Set the rank.", TYPE_INT , &r },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ { 'l', "-loop Y/N", "run the test in an infinite loop.", TYPE_BOOL , &loop },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ if (r > std::min (m,n))
+ r = std::min (m, n);
+
+ bool ok=true;
+ do{
+ ok&=run_with_field<Givaro::Modular<float> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::Modular<double> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::ModularBalanced<float> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::ModularBalanced<double> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::Modular<int32_t> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::ModularBalanced<int32_t> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::Modular<int64_t> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::ModularBalanced<int64_t> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::Modular<Givaro::Integer> > (q,(b?b:512),m/6,n/6,r/6,iters);
+ } while (loop && ok);
+
+ return !ok;
+}
diff --git a/fflas-ffpack/fflas-ffpack.h b/tests/test-matrix-io.h
similarity index 71%
copy from fflas-ffpack/fflas-ffpack.h
copy to tests/test-matrix-io.h
index ceeb9c0..4316a6b 100644
--- a/fflas-ffpack/fflas-ffpack.h
+++ b/tests/test-matrix-io.h
@@ -1,7 +1,12 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) 2011 FFLAS-FFPACK
- * Written by <brice.boyer at imag.fr>
+
+
+/*
+ * Copyright (C) 2015 the FFLAS-FFPACK group
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
*
* ========LICENCE========
* This file is part of the library FFLAS-FFPACK.
@@ -23,17 +28,9 @@
*
*/
-/*! @file fflas-ffpack/fflas-ffpack.h
- * @ingroup fflas-ffpack
- * @brief Includes FFLAS and FFPACK
- */
-
-
-#ifndef __FFLASFFPACK_fflas_ffpack_H
-#define __FFLASFFPACK_fflas_ffpack_H
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "fflas-ffpack/utils/args-parser.h"
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-#include "fflas/fflas.h"
-#include "ffpack/ffpack.h"
-#endif // __FFLASFFPACK_fflas_ffpack_H
diff --git a/tests/test-multifile1.C b/tests/test-multifile1.C
new file mode 100644
index 0000000..edc1a27
--- /dev/null
+++ b/tests/test-multifile1.C
@@ -0,0 +1,5 @@
+#include "fflas-ffpack/fflas-ffpack.h"
+
+ // See test-multifile2.C - it is a test
+ // to confirm that the lib is *really* header-only and full inline.
+
diff --git a/tests/test-multifile2.C b/tests/test-multifile2.C
new file mode 100644
index 0000000..b6701d6
--- /dev/null
+++ b/tests/test-multifile2.C
@@ -0,0 +1,7 @@
+#include "fflas-ffpack/fflas-ffpack.h"
+
+int main(void)
+{
+ // If it compiles, it is OK.
+ return 0;
+}
diff --git a/tests/test-nullspace.C b/tests/test-nullspace.C
new file mode 100644
index 0000000..5ae81d2
--- /dev/null
+++ b/tests/test-nullspace.C
@@ -0,0 +1,130 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for nullspace
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+//#define DEBUG 1
+#define TIME 1
+using namespace std;
+
+#include <iomanip>
+#include <iostream>
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+
+using namespace FFPACK;
+typedef ModularBalanced<double> Field;
+
+int main(int argc, char** argv){
+
+ int n,m;
+ int nbit=atoi(argv[3]); // number of times the product is performed
+ cerr<<setprecision(10);
+ Field::Element zero, one;
+
+ if (argc != 4) {
+ cerr<<"Usage : test-nullspace <p> <A> <<i>"
+ <<endl
+ <<" to compute the nullspace of A mod p (i computations)"
+ <<endl;
+ exit(-1);
+ }
+ Field F(atof(argv[1]));
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ Field::Element * A, *NS;
+ A = read_field(F,argv[2],&m,&n);
+
+ FFLAS::Timer tim,t; t.clear();tim.clear();
+ size_t ldn, NSdim;
+
+ for(int i = 0;i<nbit;++i){
+ t.clear();
+ t.start();
+ FFPACK::NullSpaceBasis (F, FFLAS::FflasRight, m,n,
+ A, n, NS, ldn, NSdim);
+// FFPACK::NullSpaceBasis (F, FFLAS::FflasLeft, m,n,
+// A, n, NS, ldn, NSdim);
+ t.stop();
+ tim+=t;
+ }
+
+#if DEBUG
+ Field::Element *Ab = read_field(F,argv[2],&m,&n);
+ Field::Element *C = FFLAS::fflas_new<Field::Element>(NSdim*n);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m, NSdim, n,
+ 1.0, Ab, n, NS, ldn, 0.0, C, NSdim);
+// FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, NSdim, n, m,
+// 1.0, NS, ldn, Ab, n, 0.0, C, n);
+ bool wrong = false;
+
+ for (int i=0;i<m;++i)
+ for (size_t j=0;j<NSdim;++j)
+ if (!F.areEqual(*(C+i*NSdim+j),zero))
+ wrong = true;
+// for (int i=0;i<NSdim;++i)
+// for (int j=0;j<n;++j)
+// if (!F.areEqual(*(C+i*n+j),zero))
+// wrong = true;
+
+ if ( wrong ){
+ cerr<<"FAIL"<<endl;
+ write_field (F,cerr<<"A="<<endl,Ab,m,n,n);
+ write_field (F,cerr<<"NS="<<endl,NS, n,NSdim, NSdim);
+ write_field (F,cerr<<"C="<<endl,C,m,NSdim, NSdim);
+// write_field (F,cerr<<"NS="<<endl,NS, NSdim, m,m);
+// write_field (F,cerr<<"C="<<endl,C,NSdim,n, n);
+ } else {
+ cerr<<"PASS"<<endl;
+ }
+ FFLAS::fflas_delete( C);
+ FFLAS::fflas_delete( Ab);
+
+#endif
+ FFLAS::fflas_delete( NS);
+ FFLAS::fflas_delete( A);
+
+#if TIME
+ double mflops = 2*(n*n/1000000.0)*nbit*n/tim.usertime();
+ cerr<<"NSdim = "<<NSdim<<" Nullspace over Z/"<<atoi(argv[1])<<"Z : t= "
+ << tim.usertime()/nbit
+ << " s, Mffops = "<<mflops
+ << endl;
+
+ cout<<n<<" "<<mflops<<" "<<tim.usertime()/nbit<<endl;
+#endif
+}
diff --git a/tests/test-paladin-splitter.C b/tests/test-paladin-splitter.C
new file mode 100644
index 0000000..a3f65ec
--- /dev/null
+++ b/tests/test-paladin-splitter.C
@@ -0,0 +1,268 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/*
+ * Copyright (C) the FFLAS-FFPACK group
+ * Written by Ziad Sultan <ziad.sultan at imag.fr>
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//#define DEBUG 1
+//#define __FFLASFFPACK_FORCE_SEQ
+
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include <iomanip>
+#include <iostream>
+#include <givaro/modular.h>
+#include <givaro/udl.h>
+#include <recint/rint.h>
+#include <string>
+#include <givaro/givintprime.h>
+
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+
+#include "fflas-ffpack/utils/args-parser.h"
+#include "test-utils.h"
+#include "fflas-ffpack/utils/Matio.h"
+
+typedef Givaro::ModularBalanced<double> Field;
+
+template<class CutStrat, class StratParam>
+bool tmain(int argc, char** argv, std::string printStrat)
+{
+
+ std::cerr << "tmain: " << printStrat << std::endl;
+
+
+ size_t n = 2000;
+ bool p = true;
+ size_t iters = 3;
+ int64_t q = 131071 ;
+ bool dataPar = true;
+ int proc = MAX_THREADS;
+
+ int strat = 1;
+
+ Argument as[] = {
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i N", "Set number of repetitions.", TYPE_INT , &iters },
+ { 't', "-t N", "Set number of processors.", TYPE_INT , &proc },
+ { 's', "-s N", "Set the strategy parameter using t: 1 for (t, BLOCK, THREADS), 2 for (t, BLOCK, GRAIN), 3 for (t, BLOCK, FIXED), 4 for (t, ROW, THREADS), 5 for (t, ROW, GRAIN), 6 for (t, ROW, FIXED), 7 for (t, COLUMN, THREADS), 8 for (t, COLUMN, GRAIN), 9 for (t, COLUMN, FIXED), 10 for SINGLE strategy.", TYPE_INT , &strat },
+ { 'p', "-p Y/N", "run the parallel program using Parallel(Y)/Sequential(N).", TYPE_BOOL , &p },
+ { 'd', "-d Y/N", "run the parallel program using data parallelism(Y)/task parallelism(N).", TYPE_BOOL , &dataPar },
+ END_OF_ARGUMENTS
+ };
+ FFLAS::parseArguments(argc,argv,as);
+
+ size_t m = n; // matrices are square in this test
+
+ Field F(q);
+ Field::RandIter G(F);
+
+// Allocate matrices
+ typename Field::Element_ptr A = FFLAS::fflas_new (F, m, n);
+ typename Field::Element_ptr B = FFLAS::fflas_new (F, m, n);
+ typename Field::Element_ptr C = FFLAS::fflas_new (F, m, n);
+ typename Field::Element_ptr Acop = FFLAS::fflas_new (F, m, n);
+
+
+ auto CUTTER = SPLITTER(proc, CutStrat, StratParam);
+
+// initialize
+ if(dataPar){
+ PARFOR1D(i, m, CUTTER,
+ for (size_t j=0; j<(size_t)n; ++j)
+ G.random (*(A+i*n+j));
+ );
+
+ PARFOR1D(i, m, CUTTER,
+ for (size_t j=0; j<(size_t)n; ++j)
+ G.random (*(B+i*n+j));
+ );
+
+ PARFOR1D(i, m, CUTTER,
+ for (size_t j=0; j<(size_t)n; ++j)
+ G.random (*(C+i*n+j));
+ );
+ }
+ else{ // initialize with tasks using FORBLOCK1D
+ PAR_BLOCK{
+ SYNCH_GROUP(
+ FORBLOCK1D(itt, m*n, CUTTER,
+ TASK(MODE(WRITE(A)),
+ for(size_t i=itt.begin(); i!=itt.end(); ++i)
+ G.random (*(A+i)););
+
+ TASK(MODE(WRITE(B)),
+ for(size_t i=itt.begin(); i!=itt.end(); ++i)
+ G.random (*(B+i)););
+
+ TASK(MODE(WRITE(C)),
+ for(size_t i=itt.begin(); i!=itt.end(); ++i)
+ G.random (*(C+i)););
+ );// end of FORBLOCK1D
+ );// end of SYNCH_GROUP
+ }// end of PAR_BLOCK
+ }
+
+// copy A for verification
+ FFLAS::fassign(F,m,n,A,n,Acop,n);
+
+// time
+ FFLAS::Timer chrono;
+ double *time=new double[iters];
+
+// parallel add using PARFOR1D
+ for (size_t it=0;it<=iters;++it){
+ chrono.clear();
+ if (it) chrono.start();
+
+ if(dataPar){
+
+ PARFOR1D(i, m, CUTTER,
+ for (size_t j=0; j<(size_t)n; ++j)
+ A[i*n+j] = B[i*n+j] + C[i*n+j];
+ );
+ }
+ else{
+ PAR_BLOCK{
+ FORBLOCK1D(itt, m*n, CUTTER,
+ TASK(MODE(READ(B,C) WRITE(A)),
+ for(size_t i=itt.begin(); i!=itt.end(); ++i)
+ A[i] = B[i] + C[i];
+ );
+ );
+ }
+ }
+
+
+ if (it) {chrono.stop(); time[it-1]=chrono.realtime();}
+
+ }
+ std::sort(time, time+iters);
+ double meantime = time[iters/2];
+ delete[] time;
+
+// sequential add
+ chrono.clear();
+ chrono.start();
+ for(size_t i=0; i<m*n; ++i)
+ Acop[i]=B[i]+C[i];
+ chrono.stop();
+ double timeseq = chrono.usertime();
+
+
+// verification of the parallel result
+ bool fail = false;
+ for(size_t i=0; i<m; ++i)
+ for (size_t j=0; j<n; ++j)
+ if (!F.areEqual (*(Acop+i*n+j), *(A+i*n+j))){
+ std::cout << " Seq["<<i<<","<<j<<"] = " << (*(Acop+i*n+j))
+ << " Par["<<i<<","<<j<<"] = " << (*(A+i*n+j))
+ << std::endl;
+ fail=true;
+ }
+
+ if (fail)
+ std::cout<<"FAIL"<<std::endl;
+ else
+ std::cout<<"PASS"<<std::endl;
+
+
+ std::cout<<"m: "<<m<<" n: "<<n;
+ std::cout<<" SeqTime: "<<timeseq;
+ std::cout<<" ParTime: " << meantime;
+
+// std::cout<<" Strategy:("<<proc<<", "<<CutStrat<<", "<<StratParam<<")";
+ std::cout<<" Strategy:("<<proc<<", "<<printStrat<<")";
+
+ std::string dataflow;
+
+
+#ifdef __FFLASFFPACK_USE_DATAFLOW // OMP/KAAPI dataflow option
+ dataflow = " with dataflow synch!";
+#else
+ dataflow = " with explicit synch!";
+#endif
+
+ if(dataPar)
+ std::cout<<" Data parallelism is used!"<<std::endl;
+ else
+ std::cout<<" TASK parallelism is used"<<dataflow<<std::endl;
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(Acop);
+ FFLAS::fflas_delete(B);
+ FFLAS::fflas_delete(C);
+
+ return fail;
+
+}
+
+
+
+int main(int argc, char** argv)
+{
+
+ size_t n = 2000;
+ bool p = true;
+ size_t iters = 3;
+ int64_t q = 131071 ;
+ bool dataPar = true;
+ int proc = MAX_THREADS;
+
+ int strat = 1;
+
+ Argument as[] = {
+ { 'n', "-n N", "Set the dimension of the matrix.", TYPE_INT , &n },
+ { 'i', "-i N", "Set number of repetitions.", TYPE_INT , &iters },
+ { 't', "-t N", "Set number of processors.", TYPE_INT , &proc },
+ { 's', "-s N", "Set the strategy parameter using t: 1 for (t, BLOCK, THREADS), 2 for (t, BLOCK, GRAIN), 3 for (t, BLOCK, FIXED), 4 for (t, ROW, THREADS), 5 for (t, ROW, GRAIN), 6 for (t, ROW, FIXED), 7 for (t, COLUMN, THREADS), 8 for (t, COLUMN, GRAIN), 9 for (t, COLUMN, FIXED), 10 for SINGLE strategy.", TYPE_INT , &strat },
+ { 'p', "-p Y/N", "run the parallel program using Parallel(Y)/Sequential(N).", TYPE_BOOL , &p },
+ { 'd', "-d Y/N", "run the parallel program using data parallelism(Y)/task parallelism(N).", TYPE_BOOL , &dataPar },
+ END_OF_ARGUMENTS
+ };
+ FFLAS::parseArguments(argc,argv,as);
+
+
+
+ bool fail = false;
+
+ switch (strat){
+ case 1: fail |= tmain<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads>(argc,argv,std::string("FFLAS::BLOCK, FFLAS::THREADS"));
+ case 2: fail |= tmain<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Grain>(argc,argv,std::string("FFLAS::BLOCK, FFLAS::GRAIN"));
+ case 3: fail |= tmain<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Fixed>(argc,argv,std::string("FFLAS::BLOCK, FFLAS::FIXED"));
+ case 4: fail |= tmain<FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads>(argc,argv,std::string("FFLAS::ROW, FFLAS::THREADS"));
+ case 5: fail |= tmain<FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Grain>(argc,argv,std::string("FFLAS::ROW, FFLAS::GRAIN"));
+ case 6: fail |= tmain<FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Fixed>(argc,argv,std::string("FFLAS::ROW, FFLAS::FIXED"));
+ case 7: fail |= tmain<FFLAS::CuttingStrategy::Column,FFLAS::StrategyParameter::Threads>(argc,argv,std::string("FFLAS::COLUMN, FFLAS::THREADS"));
+ case 8: fail |= tmain<FFLAS::CuttingStrategy::Column,FFLAS::StrategyParameter::Grain>(argc,argv,std::string("FFLAS::COLUMN, FFLAS::GRAIN"));
+ case 9: fail |= tmain<FFLAS::CuttingStrategy::Column,FFLAS::StrategyParameter::Fixed>(argc,argv,std::string("FFLAS::COLUMN, FFLAS::FIXED"));
+ case 10: fail |= tmain<FFLAS::CuttingStrategy::Single,FFLAS::StrategyParameter::Threads>(argc,argv,std::string("FFLAS::SINGLE, FFLAS::THREADS"));
+ }
+
+ return fail;
+}
diff --git a/tests/test-paladin-task.C b/tests/test-paladin-task.C
new file mode 100644
index 0000000..496449a
--- /dev/null
+++ b/tests/test-paladin-task.C
@@ -0,0 +1,160 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/*
+ * Copyright (C) the FFLAS-FFPACK group
+ * Written by Ziad Sultan <ziad.sultan at imag.fr>
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#undef __FFLASFFPACK_USE_OPENMP
+#define __FFLASFFPACK_USE_TBB
+
+#include <string>
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+
+
+size_t add(const size_t x, const size_t y)
+{
+ return x+y;
+
+}
+
+size_t seq_fib(size_t n) {
+ if (n < 2)
+ return n;
+ else
+ return seq_fib(n-1) + seq_fib(n-2);
+}
+
+size_t par_fib(const size_t n, const size_t cutoff)
+{
+
+ size_t x=0, y=0, z=0;
+ // if (n < 2)
+ // return n;
+ if (n < cutoff) // The bigger the cutoff the bigger is the parallel speed-up
+ return seq_fib(n);
+ else{
+ SYNCH_GROUP(
+ TASK(MODE(READ(n) WRITE(x) CONSTREFERENCE(x)),
+ x = par_fib(n-1, cutoff);
+ );
+
+ TASK(MODE(READ(n) WRITE(y) CONSTREFERENCE(y)),
+ y = par_fib(n-2, cutoff);
+ );
+
+ CHECK_DEPENDENCIES;
+
+ TASK(MODE(READ(x,y) WRITE(z) CONSTREFERENCE(z,x,y)),
+ z=add(x,y);
+ );
+ );//end SYNCH_GROUP
+ return z;
+ }
+}
+
+
+int main(int argc, char** argv)
+{
+
+ size_t n = 20;
+ bool p = true;
+ size_t iters = 3;
+ size_t cutoff = 2;
+// int64_t q = 131071 ;
+// int proc = MAX_THREADS;
+
+ Argument as[] = {
+ { 'n', "-n N", "Set the nth number of fibonacci to compute", TYPE_INT , &n },
+ { 'c', "-c N", "Set the Cutoff at which the sequential base case is called (the bigger the cuttof is the better is the parallel speed-up)", TYPE_INT , &cutoff },
+ { 'i', "-i N", "Set number of repetitions.", TYPE_INT , &iters },
+ { 'p', "-p Y/N", "run the parallel program using Parallel(Y)/Sequential(N).", TYPE_BOOL , &p },
+ END_OF_ARGUMENTS
+ };
+ FFLAS::parseArguments(argc,argv,as);
+// { 't', "-t N", "Set number of processors.", TYPE_INT , &proc },
+
+ size_t f=0;
+
+
+// time
+ FFLAS::Timer chrono;
+ double *time=new double[iters];
+
+// parallel add using PARFOR1D
+ for (size_t it=0;it<=iters;++it){
+ chrono.clear();
+ if (it) chrono.start();
+
+ if(p){
+ PAR_BLOCK{
+ f=par_fib(n, cutoff);
+ }// end of PAR_BLOCK
+ }
+ else
+ f=seq_fib(n);
+ if (it) {chrono.stop(); time[it-1]=chrono.realtime();}
+ }
+
+ std::sort(time, time+iters);
+ double meantime = time[iters/2];
+ delete[] time;
+
+ // sequential add
+ chrono.clear();
+ chrono.start();
+ size_t l=seq_fib(n);
+ chrono.stop();
+ double timeseq = chrono.realtime();
+
+
+ // verification of the parallel result
+ if (f!=l)
+ std::cout<<"FAIL: Par_Fib("<<n<<") = "<<f<<" and Seq_Fib("<<n<<") = "<<l<<std::endl;
+ else
+ std::cout<<"PASS"<<std::endl;
+
+
+ std::cout<<" n: "<<n;
+ std::cout<<" SeqTime: "<<timeseq;
+ std::cout<<" ParTime: " << meantime;
+
+ std::string dataflow;
+
+#ifdef __FFLASFFPACK_USE_DATAFLOW // OMP/KAAPI dataflow option
+ dataflow = " with dataflow synch!";
+#else
+ dataflow = " with explicit synch!";
+#endif
+
+ std::cout<<dataflow<<std::endl;
+
+ return 0;
+
+}
diff --git a/tests/test-pfgemm-DSL.C b/tests/test-pfgemm-DSL.C
new file mode 100644
index 0000000..ea0fe90
--- /dev/null
+++ b/tests/test-pfgemm-DSL.C
@@ -0,0 +1,209 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Ziad Sultan
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// DSL test for pfgemm
+//
+//--------------------------------------------------------------------------
+// Ziad Sultan
+//-------------------------------------------------------------------------
+/*
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+*/
+#define NEWWINO
+#ifndef TIME
+#define TIME 1
+#endif
+
+#define DEBUG 1
+#include <iomanip>
+#include <iostream>
+using namespace std;
+
+#define __FFLASFFPACK_USE_OPENMP
+//#define __FFLASFFPACK_USE_KAAPI
+
+//#define __FFLASFFPACK_FORCE_SEQ
+
+
+#include "fflas-ffpack/field/modular-positive.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "time.h"
+
+/*
+#ifdef __FFLASFFPACK_USE_KAAPI
+#include <kaapi++>
+#endif
+
+#ifdef __FFLASFFPACK_USE_OPENMP
+#include <omp.h>
+#endif
+*/
+
+
+using namespace FFPACK;
+
+typedef Givaro::Modular<double> Field;
+//typedef Givaro::Modular<float> Field;
+//typedef ModularBalanced<double> Field;
+//typedef ModularBalanced<float> Field;
+//typedef Givaro::Modular<int> Field;
+
+BEGIN_PARALLEL_MAIN(int argc, char** argv)
+{
+
+ if (argc != 8) {
+ cerr<<"Testing pfgemm with : test-fgemm-DSL <p> <file-matrixA> <File-matrixB> <w> <i> <alpha> <beta>"
+ <<endl;
+ exit(-1);
+ }
+ srand48( FFLAS::BaseTimer::seed());
+ int m,n, k;
+
+ Field F(atoi(argv[1]));
+
+ typename Field::Element *A = read_field(F, argv[2], &m, &k);
+ typename Field::Element *B = read_field(F, argv[3], &k, &n);
+
+
+ int nbw=atoi(argv[4]); // number of winograd levels
+ int nbit=atoi(argv[5]); // number of times the product is performed
+ cerr<<setprecision(10);
+ Field::Element alpha,beta;
+
+ F.init( alpha, Field::Element(atoi(argv[6])));
+ F.init( beta, Field::Element(atoi(argv[7])));
+
+ size_t lda=m;
+ size_t ldb=n;
+
+
+ enum FFLAS::FFLAS_TRANSPOSE ta = FFLAS::FflasNoTrans;
+ enum FFLAS::FFLAS_TRANSPOSE tb = FFLAS::FflasNoTrans;
+
+ Field::Element * C=NULL;
+ struct timespec t0,t1;
+ double delay, avrg;
+ double t_total=0;
+
+ const FFLAS::CuttingStrategy meth = FFLAS::BLOCK;
+ const FFLAS::StrategyParameter strat = FFLAS::THREADS;
+ FFLAS::MMHelper<Field, FFLAS::MMHelperAlgo::Winograd, FFLAS::FieldTraits<Field>::value,
+ FFLAS::ParSeqHelper::Parallel> pWH (F, nbw,FFLAS::ParSeqHelper::Parallel(MAX_THREADS,meth,strat));
+ for(int i = 0;i<nbit;++i){
+ C = FFLAS::fflas_new<Field::Element>(m*n);
+ clock_gettime(CLOCK_REALTIME, &t0);
+
+ PAR_INSTR{
+
+ FFLAS::fgemm(F, ta, tb,m,n,k,alpha, A,lda, B,ldb,
+ beta,C,n, pWH);
+ }
+ BARRIER;
+ clock_gettime(CLOCK_REALTIME, &t1);
+ delay = (double)(t1.tv_sec-t0.tv_sec)+(double)(t1.tv_nsec-t0.tv_nsec)/1000000000;
+
+ if (i)
+ t_total+=delay;
+
+ }
+ avrg = t_total/(nbit-1);
+
+#if TIME
+
+ double mflops = (2.0*(m*k-((!F.isZero(beta))?m:0))/1000000.0)*n/avrg;
+
+ cerr<<m<<" "<<n<<" "<<k<<" "<<nbw/*<<" "<<RBLOCKSIZE<<" "<<CBLOCKSIZE*/<<" "<<alpha<<" "<<beta<<" "
+ <<mflops<<" "<<avrg<<endl;
+#endif
+
+
+#if DEBUG
+ bool wrong = false;
+ Field::Element zero;
+ F.init(zero, 0.0);
+ Field::Element * Cd;
+ Cd = FFLAS::fflas_new<Field::Element>(m*n);
+ for (int i=0; i<m*n; ++i)
+ F.assign (*(Cd+i), zero);
+
+ Field::Element aij, bij, tmp;
+ // Field::Element beta_alpha;
+ //F.div (beta_alpha, beta, alpha);
+ for (int i = 0; i < m; ++i)
+ for (int j = 0; j < n; ++j){
+ F.mulin(*(Cd+i*n+j),beta);
+ F.assign (tmp, zero);
+ for ( int l = 0; l < k ; ++l ){
+ if ( ta == FFLAS::FflasNoTrans )
+ aij = *(A+i*lda+l);
+ else
+ aij = *(A+l*lda+i);
+ if ( tb == FFLAS::FflasNoTrans )
+ bij = *(B+l*ldb+j);
+ else
+ bij = *(B+j*ldb+l);
+ //F.mul (tmp, aij, bij);
+ //F.axpyin( *(Cd+i*n+j), alpha, tmp );
+ F.axpyin (tmp, aij, bij);
+ }
+ F.axpyin (*(Cd+i*n+j), alpha, tmp);
+ //F.mulin( *(Cd+i*n+j),alpha );
+ if ( !F.areEqual( *(Cd+i*n+j), *(C+i*n+j) ) ) {
+ wrong = true;
+ }
+ }
+ if ( wrong ){
+ cerr<<"FAIL"<<endl;
+ for (int i=0; i<m; ++i){
+ for (int j =0; j<n; ++j)
+ if (!F.areEqual( *(C+i*n+j), *(Cd+i*n+j) ) )
+ cerr<<"Erreur C["<<i<<","<<j<<"]="
+ <<(*(C+i*n+j))<<" C[d"<<i<<","<<j<<"]="
+ <<(*(Cd+i*n+j))<<endl;
+ }
+ }
+ else{
+ cerr<<"PASS"<<endl;
+ }
+ FFLAS::fflas_delete( Cd);
+#endif
+
+ FFLAS::fflas_delete( C);
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
+
+
+}
+END_PARALLEL_MAIN()
diff --git a/tests/test-pluq.C b/tests/test-pluq.C
new file mode 100644
index 0000000..80af6bf
--- /dev/null
+++ b/tests/test-pluq.C
@@ -0,0 +1,272 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2015 the FFLAS-FFPACK group
+ * Written by
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *
+ */
+
+//--------------------------------------------------------------------------
+// Test for the lqup factorisation
+//--------------------------------------------------------------------------
+// usage: test-lqup p A n, for n lqup factorization
+// of A over Z/pZ
+//-------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------
+//#define DEBUG 0
+#define __FFLAS__TRSM_READONLY
+// Debug option 0: no debug
+// 1: check A = LQUP
+//-------------------------------------------------------------------------
+
+
+#define __FFPACK_LUDIVINE_CUTOFF 60
+#include <iostream>
+#include <iomanip>
+#include <algorithm>
+#include "fflas-ffpack/utils/Matio.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "givaro/modular-integer.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "test-utils.h"
+
+using namespace std;
+using namespace FFPACK;
+
+
+typedef Givaro::Modular<Givaro::Integer> Field;
+
+
+int main(int argc, char** argv){
+ //cerr<<setprecision(20);
+ int m,n;
+ size_t R;
+
+ if (argc!=4){
+ cerr<<"usage : test-plup <p> <A> <i>"<<endl
+ <<" to do i PLUQ factorisation of A"
+ <<endl;
+ exit(-1);
+ }
+ Field F(atof(argv[1]));
+ Field::Element * A;
+
+ A = read_field(F,argv[2],&m,&n);
+
+ size_t maxP, maxQ;
+
+ // size_t cutoff = atoi(argv[3]);
+ size_t nbf = atoi(argv[3]);
+
+ FFLAS::Timer tim,timc, timlud,timludc;
+ timc.clear();
+ timludc.clear();
+
+ enum FFLAS::FFLAS_DIAG diag = FFLAS::FflasNonUnit;
+ enum FFLAS::FFLAS_TRANSPOSE trans = FFLAS::FflasNoTrans;
+ if (trans == FFLAS::FflasNoTrans){
+ maxP = m;
+ maxQ = n;
+ } else{
+ maxP = n;
+ maxQ = m;
+ }
+ size_t *P = FFLAS::fflas_new<size_t>(maxP);
+ size_t *Q = FFLAS::fflas_new<size_t>(maxQ);
+
+ //write_field (F,cerr<<"A = "<<endl, A, m,n,n);
+ size_t * RRP, *CRP;
+ for ( size_t i=0;i<nbf;i++){
+ if (i) {
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( RRP);
+ FFLAS::fflas_delete( CRP);
+ A = read_field(F,argv[2],&m,&n);
+ }
+
+ for (size_t j=0;j<maxP;j++)
+ P[j]=0;
+ for (size_t j=0;j<maxQ;j++)
+ Q[j]=0;
+ tim.clear();
+ tim.start();
+
+ R = FFPACK::PLUQ_basecaseCrout (F, diag, m, n, A, n, P, Q);
+ tim.stop();
+ timc+=tim;
+ FFLAS::fflas_delete( A);
+ A = read_field(F,argv[2],&m,&n);
+ timlud.clear();
+ timlud.start();
+ R = FFPACK::LUdivine (F, diag, FFLAS::FflasNoTrans, m, n, A, n, P, Q);
+ timlud.stop();
+ timludc+=timlud;
+// std::cerr<<"Fini LUdivine"<<std::endl;
+ RRP = FFLAS::fflas_new<size_t>(R);
+ CRP = FFLAS::fflas_new<size_t>(R);
+// RankProfilesFromPLUQ(RRP, CRP, P, Q, m, n, R);
+ }
+ // cerr<<"Row Rank Profile = ";
+ // for (size_t i=0;i<R;++i)
+ // cerr<<RRP[i]<<" ";
+ // cerr<<endl;
+ // cerr<<"Column Rank Profile = ";
+ // for (size_t i=0;i<R;++i)
+ // cerr<<CRP[i]<<" ";
+ // cerr<<endl;
+ // std::sort(CRP,(CRP+R));
+ // std::sort(RRP,(RRP+R));
+ // cerr<<"Sorted Row Rank Profile = ";
+ // for (size_t i=0;i<R;++i)
+ // cerr<<RRP[i]<<" ";
+ // cerr<<endl;
+ // cerr<<"Sorted Column Rank Profile = ";
+ // for (size_t i=0;i<R;++i)
+ // cerr<<CRP[i]<<" ";
+ // cerr<<endl;
+
+ if (nbf){
+ FFLAS::fflas_delete( RRP);
+ FFLAS::fflas_delete( CRP);
+ }
+// write_field (F,cerr<<"Result = "<<endl, A, m,n,n);
+
+// cerr<<"P = [";
+// for (size_t i=0; i<maxP; ++i)
+// cerr<<P[i]<<" ";
+// cerr<<"]"<<endl;
+ // cerr<<"Q = [";
+ // for (size_t i=0; i<maxQ; ++i)
+ // cerr<<Q[i]<<" ";
+ // cerr<<"]"<<endl;
+#if DEBUG
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * L, *U;
+ L = FFLAS::fflas_new<Field::Element>(m*R);
+ U = FFLAS::fflas_new<Field::Element>(R*n);
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ for (size_t i=0; i<R; ++i){
+ for (size_t j=0; j<i; ++j)
+ F.assign ( *(U + i*n + j), zero);
+ for (int j=i; j<n; ++j)
+ F.assign (*(U + i*n + j), *(A+ i*n+j));
+ }
+ for ( size_t j=0; j<R; ++j ){
+ for (size_t i=0; i<=j; ++i )
+ F.assign( *(L+i*R+j), zero);
+ F.assign(*(L+j*R+j), one);
+ for (size_t i=j+1; i<(size_t)m; i++)
+ F.assign( *(L + i*R+j), *(A+i*n+j));
+ }
+
+ //write_field(F,cerr<<"L = "<<endl,L,m,R,R);
+ //write_field(F,cerr<<"U = "<<endl,U,R,n,n);
+ // cerr<<endl;
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, R,0,m, L, R, P);
+
+ //write_field(F,cerr<<"L = "<<endl,L,m,m,m);
+ //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+// write_field(F,cerr<<"L = "<<endl,L,m,m,m);
+// write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R,0,n, U, n, Q);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,R,
+ 1.0, L,R, U,n, 0.0, X,n);
+ //FFLAS::fflas_delete( A);
+
+//////
+ //write_field(F,cerr<<"L = "<<endl,L,m ,n,n);
+ //write_field(F,cerr<<"U = "<<endl,U,n,n,n);
+
+ // cerr<<"P = ";
+ // for (int i=0; i<m; ++i)
+ // cerr<<P[i]<<" ";
+ // cerr<<endl;
+ // cerr<<"Q = ";
+ // for (int i=0; i<n; ++i)
+ // cerr<<Q[i]<<" ";
+ // cerr<<endl;
+
+ Field::Element * B = read_field(F,argv[2],&m,&n);
+
+ bool fail = false;
+ for (size_t i=0; i<(size_t)m; ++i)
+ for (size_t j=0; j<(size_t)n; ++j)
+ if (!F.areEqual (*(B+i*n+j), *(X+i*n+j))){
+ std::cerr << " B["<<i<<","<<j<<"] = " << (*(B+i*n+j))
+ << " X["<<i<<","<<j<<"] = " << (*(X+i*n+j))
+ << endl;
+ fail=true;
+ }
+ // write_field(F,cerr<<"X = "<<endl,X,m,n,n);
+ // write_field(F,cerr<<"B = "<<endl,B,m,n,n);
+ FFLAS::fflas_delete( B);
+ if (fail)
+ cerr<<"FAIL"<<endl;
+
+
+ else
+ cerr<<"PASS"<<endl;
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+#endif
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ double t = timc.usertime();
+ double tlud = timludc.usertime();
+ const int sm = std::min(m,n);
+ const int sn = std::max(m,n);
+
+ double numops = sm*sm/1000.0*(sn-sm/3.0);
+
+ cout<<m<<"x"<< n
+ << " Trans = "<<trans
+ << " Diag = "<<diag
+ << " : rank = " << R << " ["
+ << ((double)nbf/1000.0*(double)numops / t)
+ << " MFops "
+ << " in "
+ << t/nbf<<"s"
+ <<" LUdivine : "<<((double)nbf/1000.0*(double)numops / tlud)
+ << " MFops "
+ << " in "
+ <<tlud/nbf
+ <<"s]"<< endl;
+ cerr<<m
+ <<" "<<((double)nbf/1000.0*(double)numops / t)
+ <<" "<<t/nbf
+ <<" "<<((double)nbf/1000.0*(double)numops / tlud)
+ <<" "<<tlud/nbf
+ <<" "<<R
+ <<endl;
+
+ return 0;
+}
diff --git a/tests/test-ppluq.C b/tests/test-ppluq.C
new file mode 100644
index 0000000..f1ac57b
--- /dev/null
+++ b/tests/test-ppluq.C
@@ -0,0 +1,313 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2015 the FFLAS-FFPACK group
+ * Written by
+ *
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *
+ */
+
+
+/*
+*******************************************************
+ Parallel PLUQ quad recurisve with OpenMP
+*******************************************************
+
+g++ -D__FFLASFFPACK_HAVE_CBLAS -Wall -g -fopenmp -O3 -march=native -mavx -I/home/sultan/soft/fflas-ffpack/ -I/usr/local/soft/givaro-3.7.1/include test-ppluq.C -L/home/pernet/Logiciels/ATLAS_1TH/lib -lcblas -latlas -L/usr/local/soft/givaro-3.7.1/lib -lgivaro -lm -lrt -Wl,-rpath -Wl,/usr/local/soft/givaro-3.7.1/lib -o test-ppluq
+*/
+
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+#include <iomanip>
+//#include "omp.h"
+
+#define __FFLASFFPACK_USE_OPENMP
+
+#define __FFLAS__TRSM_READONLY
+#define __PFTRSM_FOR_PLUQ
+#include "fflas-ffpack/utils/Matio.h"
+#include <givaro/modular-balanced.h>
+//#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "fflas-ffpack/fflas-ffpack.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "sys/time.h"
+
+//#define BASECASE_K 256
+
+//#include "fflas-ffpack/ffpack/parallel.h"
+
+using namespace std;
+using namespace FFLAS;
+using namespace FFPACK;
+#ifndef MODULO
+#define MODULO 1
+#endif
+
+#if(MODULO==1)
+typedef Givaro::Modular<double> Field;
+#else
+typedef Givaro::ZRing<double> Field;
+#endif
+
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+
+#ifndef SEQ
+#define SEQ 1
+#endif
+
+void verification_PLUQ(const Field & F, typename Field::Element * B, typename Field::Element * A,
+ size_t * P, size_t * Q, size_t m, size_t n, size_t R)
+{
+
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * L, *U;
+ L = FFLAS::fflas_new<Field::Element>(m*R);
+ U = FFLAS::fflas_new<Field::Element>(R*n);
+ ParSeqHelper::Parallel H;
+
+ PARFOR1D (i,m*R, H,
+ F.init(L[i], 0.0);
+ );
+
+
+ PARFOR1D (i,m*R, H,
+ F.init(U[i], 0.0);
+ );
+
+
+ PARFOR1D (i,m*n, H,
+ F.init(X[i], 0.0);
+ );
+
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ PARFOR1D (i,R, H,
+ for (size_t j=0; j<i; ++j)
+ F.assign ( *(U + i*n + j), zero);
+ for (size_t j=i; j<n; ++j)
+ F.assign (*(U + i*n + j), *(A+ i*n+j));
+ );
+
+ PARFOR1D (j,R, H,
+ for (size_t i=0; i<=j; ++i )
+ F.assign( *(L+i*R+j), zero);
+ F.assign(*(L+j*R+j), one);
+ for (size_t i=j+1; i<m; i++)
+ F.assign( *(L + i*R+j), *(A+i*n+j));
+ );
+
+
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, R,0,m, L, R, P);
+
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, R,0,n, U, n, Q);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,R,
+ 1.0, L,R, U,n, 0.0, X,n);
+ bool fail = false;
+ PARFOR1D (i,m, H,
+ for (size_t j=0; j<n; ++j)
+ if (!F.areEqual (*(B+i*n+j), *(X+i*n+j))){
+ std::stringstream errs;
+ errs << " B["<<i<<","<<j<<"] = " << (*(B+i*n+j))
+ << " X["<<i<<","<<j<<"] = " << (*(X+i*n+j))
+ << std::endl;
+ std::cerr << errs;
+ fail=true;
+ }
+ );
+
+ if (fail)
+ std::cerr<<"FAIL"<<std::endl;
+ else
+ std::cerr<<"PASS"<<std::endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+}
+
+int main(int argc, char** argv)
+{
+
+ int p, n, m, nbf;
+
+ if (argc > 6){
+ std::cerr<<"usage : PLUQ-rec-omp <p> <m> <n> <i> <file>"<<std::endl
+// std::cerr<<"usage : PLUQ-rec-omp <m> <n> <p> <r> <i>"<<std::endl
+ <<std::endl;
+ exit(-1);
+ }
+
+ p = (argc>1 ? atoi( argv[1] ) : 1009);
+
+ m = (argc>2 ? atoi( argv[2] ) : 1024);
+ n = (argc>3 ? atoi( argv[3] ) : 1024);
+ // r = atoi( argv[4] );
+ nbf = (argc>4 ? atoi( argv[4] ) : 1);
+
+ // size_t lda = n;
+
+ // random seed
+ // ifstream f("/dev/urandom");
+ // size_t seed1, seed2, seed3,seed4;
+ // f.read(reinterpret_cast<char*>(&seed1), sizeof(seed1));
+ // f.read(reinterpret_cast<char*>(&seed2), sizeof(seed2));
+ // f.read(reinterpret_cast<char*>(&seed3), sizeof(seed3));
+ // f.read(reinterpret_cast<char*>(&seed4), sizeof(seed4));
+
+// seed1=10;seed2=12;
+// seed3=13;seed4=14;
+
+ enum FFLAS::FFLAS_DIAG diag = FFLAS::FflasNonUnit;
+ size_t R;
+
+ const Field F((double)p);
+ // Field::RandIter G(F, seed1);
+
+ Field::Element alpha, beta;
+ F.init(alpha,1.0);
+ F.init(beta,0.0);
+ // Field::Element * U = FFLAS::fflas_new<Field::Element>(n*n);
+
+ ParSeqHelper::Parallel H;
+
+ typename Field::Element* Acop;
+ if (argc > 5) {
+ Acop = read_field(F,argv[5],&m,&n);
+ } else {
+ Field::RandIter G(F);
+ Acop = FFLAS::fflas_new<Field::Element>(m*n);
+ PARFOR1D(i,(size_t)m, H,
+ for (size_t j=0; j<(size_t)n; ++j)
+ G.random (*(Acop+i*n+j));
+ );
+
+ }
+
+// FFLAS::fflas_new<Field::Element>(n*m);
+ Field::Element* A = FFLAS::fflas_new<Field::Element>(n*m);
+#if(DEBUG==1)
+ Field::Element* Adebug = FFLAS::fflas_new<Field::Element>(n*m);
+#endif
+ // std::vector<size_t> Index_P(r);
+
+ // U = construct_U(F,G, n, r, Index_P, seed4, seed3);
+ // A = construct_L(F,G, m, r, Index_P, seed2);
+ // M_randgen(F, A, U, r, m, n);
+ // size_t taille=m*n;
+ // for(size_t i=0; i<taille;++i) U[i]=A[i];
+
+ struct timespec t0, t1;// tt0, tt1;
+ double delay, avrg;//, avrgg;
+ double t_total=0;
+
+ size_t maxP, maxQ;
+ maxP = m;
+ maxQ = n;
+
+ size_t *P = FFLAS::fflas_new<size_t>(maxP);
+ size_t *Q = FFLAS::fflas_new<size_t>(maxQ);
+
+
+ PARFOR1D(i, (size_t)m, H,
+ for (size_t j=0; j<(size_t)n; ++j) {
+ *(A+i*n+j) = *(Acop+i*n+j) ;
+#if(DEBUG==1)
+ *(Adebug+i*n+j) = *(Acop+i*n+j) ;
+#endif
+ }
+ );
+
+
+
+ for ( int i=0;i<nbf+1;i++){
+ for (size_t j=0;j<maxP;j++)
+ P[j]=0;
+ for (size_t j=0;j<maxQ;j++)
+ Q[j]=0;
+
+ PARFOR1D(i, (size_t)m, H,
+ for (size_t j=0; j<(size_t)n; ++j)
+ *(A+i*n+j) = *(Acop+i*n+j) ;
+ );
+
+
+
+ clock_gettime(CLOCK_REALTIME, &t0);
+ PAR_BLOCK{
+ R = pPLUQ(F, diag, (size_t)m, (size_t)n, A, (size_t)n, P, Q, NUM_THREADS);// Parallel PLUQ
+ }
+ clock_gettime(CLOCK_REALTIME, &t1);
+ delay = (double)(t1.tv_sec-t0.tv_sec)+(double)(t1.tv_nsec-t0.tv_nsec)/1000000000;
+
+ if(i)
+ t_total +=delay;
+
+ }
+ avrg = t_total/nbf;
+ std::cerr << "MODULO: " << (MODULO?p:0) << std::endl;
+
+ PAR_BLOCK{
+ std::cerr<<"Parallel --- m: "<<m<<" , n: " << n << " , r: " <<R<<" "
+ <<avrg<<" "<<(2.0*n*n*n)/(double(3.0*(1000000000)*avrg))<<" "
+ //#ifdef __FFLASFFPACK_USE_OPENMP
+ <<NUM_THREADS<<endl;
+ //#else
+ }
+ //<<endl;
+ //#endi
+
+ // std::cout<<typeid(A).name()<<endl;
+#if(DEBUG==1)
+ cout<<"check equality A == PLUQ ?"<<endl;
+ verification_PLUQ(F,Adebug,A,P,Q,m,n,R);
+ FFLAS::fflas_delete( Adebug);
+#endif
+#if(SEQ==1)
+ struct timespec tt0, tt1;
+ double avrgg;
+ //call sequential PLUQ
+ size_t * PP = FFLAS::fflas_new<size_t>(maxP);
+ size_t * QQ = FFLAS::fflas_new<size_t>(maxQ);
+ for (size_t j=0;j<maxP;j++)
+ PP[j]=0;
+ for (size_t j=0;j<maxQ;j++)
+ QQ[j]=0;
+ clock_gettime(CLOCK_REALTIME, &tt0);
+ size_t R2 = PLUQ(F, diag, m, n, Acop, n, PP, QQ);
+ clock_gettime(CLOCK_REALTIME, &tt1);
+ FFLAS::fflas_delete( Acop);
+ avrgg = (double)(tt1.tv_sec-tt0.tv_sec)+(double)(tt1.tv_nsec-tt0.tv_nsec)/1000000000;
+ //verification
+ std::cerr<<"Sequential : "<<m<<" "<<R2<<" "
+ <<avrgg<<" "<<(2.0*n*n*n)/(double(3.0*(1000000000)*avrgg))<<endl;
+#endif
+
+ FFLAS::fflas_delete( A);
+ return 0;
+}
diff --git a/tests/test-rank.C b/tests/test-rank.C
new file mode 100644
index 0000000..5f1da0e
--- /dev/null
+++ b/tests/test-rank.C
@@ -0,0 +1,91 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for rank
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+#include <iomanip>
+#include <iostream>
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+
+
+using namespace std;
+using namespace FFPACK;
+
+typedef ModularBalanced<double> Field;
+
+int main(int argc, char** argv){
+
+ int n,m;
+ int nbit=atoi(argv[3]); // number of times the product is performed
+ cerr<<setprecision(10);
+ if (argc != 4) {
+ cerr<<"Usage : test-rank <p> <A> <<i>"
+ <<endl
+ <<" to compute the rank of A mod p (i computations)"
+ <<endl;
+ exit(-1);
+ }
+ Field F(atof(argv[1]));
+ Field::Element * A;
+ A = read_field(F,argv[2],&m ,&n);
+
+ FFLAS::Timer tim,t;
+ t.clear();
+ tim.clear();
+ size_t r=0;
+ for(int i = 0;i<nbit;++i){
+ t.clear();
+ t.start();
+ r = FFPACK::Rank (F, m, n, A, n);
+ t.stop();
+ tim+=t;
+ if (i+1<nbit){
+ FFLAS::fflas_delete( A);
+ A = read_field(F,argv[2],&m,&n);
+ }
+ }
+
+ double mflops = 2.0/3.0*(n*(double)r/1000000.0)*nbit*n/tim.usertime();
+ cerr<<"m,n = "<<m<<", "<<n<<" Rank (A) = " << r
+ << " mod "<<atoi(argv[1])<<" : t= "
+ << tim.usertime()/nbit
+ << " s, Mffops = "<<mflops
+ << endl;
+
+ cout<<m<<" "<<n<<" "<<r<<" "<<mflops<<" "<<tim.usertime()/nbit<<endl;
+}
diff --git a/tests/test-rankprofiles.C b/tests/test-rankprofiles.C
new file mode 100644
index 0000000..989a56c
--- /dev/null
+++ b/tests/test-rankprofiles.C
@@ -0,0 +1,213 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+//--------------------------------------------------------------------------
+// Test for the computations of rank profiles
+//--------------------------------------------------------------------------
+#define __FFLASFFPACK_SEQUENTIAL
+#include "fflas-ffpack/fflas-ffpack-config.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "fflas-ffpack/utils/args-parser.h"
+
+#include <iostream>
+#include <iomanip>
+#include <givaro/modular.h>
+
+#include "test-utils.h"
+#include "Matio.h"
+
+using namespace FFPACK;
+
+
+template<class Field>
+bool run_with_field(Givaro::Integer q, uint64_t b, size_t m, size_t n, size_t r, size_t iters){
+ bool ok = true ;
+ int nbit=(int)iters;
+
+ while (ok && nbit){
+ // choose Field
+ Field* F= chooseField<Field>(q,b);
+ if (F==nullptr)
+ return true;
+ std::ostringstream oss;
+ F->write(oss);
+
+ std::cout.fill('.');
+ std::cout<<"Checking ";
+ std::cout.width(40);
+ std::cout<<oss.str();
+ std::cout<<" ... ";
+
+ size_t lda = n;
+ typename Field::Element_ptr A=FFLAS::fflas_new (*F, m,lda);
+ typename Field::Element_ptr B=FFLAS::fflas_new (*F, m,lda);
+ RandomMatrixWithRankandRandomRPM(*F,A,lda,r,m,n);
+ FFLAS::fassign (*F, m, n, A, lda, B, lda);
+
+ {
+ // Testing if LUdivine and PLUQ return the same result
+ size_t* RP1, * RP2;
+ FFPACK::RowRankProfile (*F, m, n, A, lda, RP1, FFPACK::FfpackSlabRecursive);
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ FFPACK::RowRankProfile (*F, m, n, A, lda, RP2, FFPACK::FfpackTileRecursive);
+ for (size_t i=0; i<r; i++)
+ ok &= (RP1[i] == RP2[i]);
+ FFLAS::fflas_delete(RP1);
+ FFLAS::fflas_delete(RP2);
+
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ FFPACK::ColumnRankProfile (*F, m, n, A, lda, RP1, FFPACK::FfpackSlabRecursive);
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ FFPACK::ColumnRankProfile (*F, m, n, A, lda, RP2, FFPACK::FfpackTileRecursive);
+ for (size_t i=0; i<r; i++)
+ ok &= (RP1[i] == RP2[i]);
+ FFLAS::fflas_delete(RP1);
+ FFLAS::fflas_delete(RP2);
+ }
+ {
+ // Testing if 1 PLUQ computes the rank profiles of all leading submatrices
+ size_t* RP1, * RP2;
+ size_t * P = FFLAS::fflas_new<size_t>(m);
+ size_t * Q = FFLAS::fflas_new<size_t>(n);
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ PLUQ(*F, FFLAS::FflasNonUnit, m, n, A, lda, P, Q);
+
+ for (size_t i=0; i<1;i++){
+ size_t mm = 1 + (rand() % m);
+ size_t nn = 1 + (rand() % n);
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ size_t rr = FFPACK::ColumnRankProfile (*F, mm, nn, A, lda, RP1, FFPACK::FfpackSlabRecursive);
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ FFPACK::RowRankProfile (*F, mm, nn, A, lda, RP2, FFPACK::FfpackSlabRecursive);
+ size_t* RRP = FFLAS::fflas_new<size_t>(r);
+ size_t* CRP = FFLAS::fflas_new<size_t>(r);
+
+ LeadingSubmatrixRankProfiles (m,n,r,mm,nn,P,Q,RRP,CRP);
+ for (size_t ii=0; ii<rr; ii++)
+ ok &= (RP1[ii] == CRP[ii]) && (RP2[ii] == RRP[ii]);
+
+ FFLAS::fflas_delete(CRP);
+ FFLAS::fflas_delete(RRP);
+ FFLAS::fflas_delete(RP1);
+ FFLAS::fflas_delete(RP2);
+
+ }
+ FFLAS::fflas_delete(P);
+ FFLAS::fflas_delete(Q);
+ }
+ {
+ // Testing PLUQ and LUDivine return a specified rank profile
+ size_t* RRP = FFLAS::fflas_new<size_t>(r);
+ size_t* CRP = FFLAS::fflas_new<size_t>(r);
+ size_t* RRPLUD, * RRPPLUQ, *CRPLUD, *CRPPLUQ;
+ RandomRankProfile (m, r, RRP);
+ RandomRankProfile (n, r, CRP);
+
+ RandomMatrixWithRankandRPM(*F,A,lda,r,m,n, RRP, CRP);
+ FFLAS::fassign (*F, m, n, A, lda, B, lda);
+ size_t cs = FFPACK::ColumnRankProfile (*F, m, n, A, lda, CRPLUD, FFPACK::FfpackSlabRecursive);
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ size_t ct = FFPACK::ColumnRankProfile (*F, m, n, A, lda, CRPPLUQ, FFPACK::FfpackTileRecursive);
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ size_t rs = FFPACK::RowRankProfile (*F, m, n, A, lda, RRPLUD, FFPACK::FfpackSlabRecursive);
+ FFLAS::fassign (*F, m, n, B, lda, A, lda);
+ size_t rt = FFPACK::RowRankProfile (*F, m, n, A, lda, RRPPLUQ, FFPACK::FfpackTileRecursive);
+ // write_perm (std::cout<<"RRP = ", RRP, r);
+ // write_perm (std::cout<<"CRP = ", CRP, r);
+ std::sort(CRP,CRP+r);
+ std::sort(RRP,RRP+r);
+ ok &= (cs==ct)&(cs==rs)&(cs==rt)&(cs==r);
+ for (size_t i=0; i<r; i++)
+ ok &= (CRPLUD[i] == CRP[i]) && (CRPPLUQ[i] == CRP[i]) &&
+ (RRPLUD[i] == RRP[i]) && (RRPPLUQ[i] == RRP[i]);
+ FFLAS::fflas_delete(CRP);
+ FFLAS::fflas_delete(RRP);
+ FFLAS::fflas_delete(CRPLUD);
+ FFLAS::fflas_delete(RRPLUD);
+ FFLAS::fflas_delete(CRPPLUQ);
+ FFLAS::fflas_delete(RRPPLUQ);
+ }
+
+
+ FFLAS::fflas_delete(A);
+ FFLAS::fflas_delete(B);
+ delete F;
+
+ nbit--;
+ if (!ok)
+ //std::cout << "\033[1;31mFAILED\033[0m "<<std::endl;
+ std::cout << "FAILED "<<std::endl;
+ else
+ //std::cout << "\033[1;32mPASSED\033[0m "<<std::endl;
+ std::cout << "PASSED "<<std::endl;
+ }
+ return ok;
+ }
+
+int main(int argc, char** argv){
+ std::cerr<<std::setprecision(20);
+
+ Givaro::Integer q = -1;
+ size_t b = 0;
+ size_t m = 150;
+ size_t n = 280;
+ size_t r = 85;
+ size_t iters = 6 ;
+ bool loop=false;
+ static Argument as[] = {
+ { 'q', "-q Q", "Set the field cardinality.", TYPE_INTEGER , &q },
+ { 'b', "-b B", "Set the bitsize of the field characteristic.", TYPE_INT , &b },
+ { 'n', "-n N", "Set the number of cols in the matrix.", TYPE_INT , &n },
+ { 'm', "-m N", "Set the number of rows in the matrix.", TYPE_INT , &m },
+ { 'r', "-r r", "Set the rank of the matrix." , TYPE_INT , &r },
+ { 'i', "-i R", "Set number of repetitions.", TYPE_INT , &iters },
+ { 'l', "-loop Y/N", "run the test in an infinite loop.", TYPE_BOOL , &loop },
+ // { 'f', "-f file", "Set input file", TYPE_STR, &file },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(argc,argv,as);
+
+ if (r > std::min (m,n))
+ r = std::min (m, n);
+
+ bool ok=true;
+ do{
+ ok&=run_with_field<Givaro::Modular<float> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::Modular<double> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::ModularBalanced<float> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::ModularBalanced<double> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::Modular<int32_t> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::ModularBalanced<int32_t> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::Modular<int64_t> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::ModularBalanced<int64_t> > (q,b,m,n,r,iters);
+ ok&=run_with_field<Givaro::Modular<Givaro::Integer> >(q,(b?b:128),m/4+1,n/4+1,r/4+1,iters);
+ } while (loop && ok);
+
+ return !ok;
+}
diff --git a/tests/test-redcolechelon.C b/tests/test-redcolechelon.C
new file mode 100644
index 0000000..cb77935
--- /dev/null
+++ b/tests/test-redcolechelon.C
@@ -0,0 +1,207 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for the reduced column echelon factorisation
+//--------------------------------------------------------------------------
+// usage: test-redcolechelon p A n, for n reduced column echelon computations
+// of A over Z/pZ
+//-------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------
+//#define DEBUG 1
+// Debug option 0: no debug
+// 1: check A = LQUP
+//-------------------------------------------------------------------------
+using namespace std;
+
+
+//#define __LUDIVINE_CUTOFF 1
+#include <iostream>
+#include <iomanip>
+#include "Matio.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+using namespace FFPACK;
+typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+ //cerr<<setprecision(20);
+ int i,j,nbf,m,n;
+ int R=0;
+
+ if (argc!=4){
+ cerr<<"usage : test-redcolechelon <p> <A> <i>"<<endl
+ <<" to do i reduced column echelon computations of A"
+ <<endl;
+ exit(-1);
+ }
+ Field F((uint64_t)atoi(argv[1]));
+ Field::Element * A;
+
+ A = read_field(F,argv[2],&m,&n);
+
+ size_t *P = FFLAS::fflas_new<size_t>(n);
+ size_t *Q = FFLAS::fflas_new<size_t>(m);
+
+ // size_t cutoff = atoi(argv[3]);
+ nbf = atoi(argv[3]);
+
+ FFLAS::Timer tim,timc;
+ timc.clear();
+
+
+ for ( i=0;i<nbf;i++){
+ if (i) {
+ FFLAS::fflas_delete( A);
+ A = read_field(F,argv[2],&m,&n);
+ }
+ for (j=0;j<n;j++)
+ P[j]=0;
+ for (j=0;j<m;j++)
+ Q[j]=0;
+ tim.clear();
+ tim.start();
+ R = (int)FFPACK::ReducedColumnEchelonForm (F, m, n, A, n, P, Q);
+ tim.stop();
+ timc+=tim;
+ }
+ //write_field (F,cerr<<"Result = "<<endl, A, m,n,n);
+
+// cerr<<"P = [";
+// for (size_t i=0; i<n; ++i)
+// cerr<<P[i]<<" ";
+// cerr<<"]"<<endl;
+// cerr<<"Q = [";
+// for (size_t i=0; i<m; ++i)
+// cerr<<Q[i]<<" ";
+// cerr<<"]"<<endl;
+#if DEBUG
+ Field::Element * L = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * U = FFLAS::fflas_new<Field::Element>(n*n);
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+
+ for (int i=0; i<R; ++i){
+ for (int j=0; j<n; ++j)
+ F.assign (*(U + i*n + j), *(A+ i*n+j));
+ }
+ for (int i=R;i<n; ++i){
+ for (int j=0; j<n; ++j)
+ F.assign(*(U+i*n+j), zero);
+ F.init(*(U+i*(n+1)),one);
+ }
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, n, 0, R, U, n, P);
+
+ for ( int i=0; i<R; ++i ){
+ for (int j=0; j < n ; ++j)
+ F.assign( *(L + i*n+j),zero);
+ F.assign(*(L+i*(n+1)), one);
+ }
+ for ( int i=R; i<m; ++i ){
+ for (int j=0; j<R; ++j )
+ F.assign (*(L+i*n+j), *(A+i*n+j));
+ for (int j=R; j<n; ++j)
+ F.assign (*(L+i*n+j), zero);
+ }
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, n, 0, R, L, n, Q);
+
+// cerr<<"P = ";
+// for (size_t i=0; i<n;++i)
+// cerr<<" "<<P[i];
+// cerr<<endl;
+// cerr<<"Q = ";
+// for (size_t i=0; i<m;++i)
+// cerr<<" "<<Q[i];
+// cerr<<endl;
+
+ // write_field(F,cerr<<"A = "<<endl,A,m,n,n);
+ // write_field(F,cerr<<"R = "<<endl,L,m,n,n);
+ //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+
+ Field::Element * B = read_field(F,argv[2],&m,&n);
+
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,n, 1.0,
+ B, n, U, n, 0.0, X,n);
+ //FFLAS::fflas_delete( A);
+
+ bool fail = false;
+ for (int i=0; i<m; ++i)
+ for (int j=0; j<n; ++j)
+ if (!F.areEqual (*(L+i*n+j), *(X+i*n+j)))
+ fail=true;
+
+// write_field(F,cerr<<"X = "<<endl,X,m,n,n);
+// write_field(F,cerr<<"R = "<<endl,L,m,n,n);
+
+ FFLAS::fflas_delete( B);
+ if (fail)
+ cerr<<"FAIL"<<endl;
+
+
+ else
+ cerr<<"PASS"<<endl;
+
+// cout<<m<<" "<<n<<" M"<<endl;
+// for (size_t i=0; i<m; ++i)
+// for (size_t j=0; j<n; ++j)
+// if (!F.isZero(*(A+i*n+j)))
+// cout<<i+1<<" "<<j+1<<" "<<(*(A+i*n+j))<<endl;
+// cout<<"0 0 0"<<endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+#endif
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ double t = timc.usertime();
+ double numops = 2*m*m/1000.0*n;
+
+ cerr<<m<<"x"<< n
+ << " : rank = " << R << " ["
+ << ((double)nbf/1000.0*(double)numops / t)
+ << " MFops "
+ << " in "
+ << t/nbf<<"s"
+ <<"]"<< endl;
+// cout<<m
+// <<" "<<((double)nbf/1000.0*(double)numops / t)
+// <<" "<<t/nbf
+// <<endl;
+
+ return 0;
+}
diff --git a/tests/test-redechelon.C b/tests/test-redechelon.C
new file mode 100644
index 0000000..2a86809
--- /dev/null
+++ b/tests/test-redechelon.C
@@ -0,0 +1,207 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for the reduced echelon factorisation
+//--------------------------------------------------------------------------
+// usage: test-redechelon p A n, for n reduced echelon computations
+// of A over Z/pZ
+//-------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------
+//#define DEBUG 1
+// Debug option 0: no debug
+// 1: check A = LQUP
+//-------------------------------------------------------------------------
+using namespace std;
+
+
+//#define __LUDIVINE_CUTOFF 1
+#include <iostream>
+#include <iomanip>
+#include "Matio.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+using namespace FFPACK;
+typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+ //cerr<<setprecision(20);
+ int i,j,nbf,m,n;
+ int R=0;
+
+ if (argc!=4){
+ cerr<<"usage : test-redechelon <p> <A> <i>"<<endl
+ <<" to do i reduced echelon computations of A"
+ <<endl;
+ exit(-1);
+ }
+ Field F((uint64_t)atoi(argv[1]));
+ Field::Element * A;
+
+ A = read_field(F,argv[2],&m,&n);
+
+ size_t *P = FFLAS::fflas_new<size_t>(n);
+ size_t *Q = FFLAS::fflas_new<size_t>(m);
+
+ // size_t cutoff = atoi(argv[3]);
+ nbf = atoi(argv[3]);
+
+ FFLAS::Timer tim,timc;
+ timc.clear();
+
+
+ for ( i=0;i<nbf;i++){
+ if (i) {
+ FFLAS::fflas_delete( A);
+ A = read_field(F,argv[2],&m,&n);
+ }
+ for (j=0;j<n;j++)
+ P[j]=0;
+ for (j=0;j<m;j++)
+ Q[j]=0;
+ tim.clear();
+ tim.start();
+ R = (int)FFPACK::ReducedColumnEchelonForm (F, m, n, A, n, P, Q);
+ tim.stop();
+ timc+=tim;
+ }
+ //write_field (F,cerr<<"Result = "<<endl, A, m,n,n);
+
+// cerr<<"P = [";
+// for (size_t i=0; i<n; ++i)
+// cerr<<P[i]<<" ";
+// cerr<<"]"<<endl;
+// cerr<<"Q = [";
+// for (size_t i=0; i<m; ++i)
+// cerr<<Q[i]<<" ";
+// cerr<<"]"<<endl;
+#if DEBUG
+ Field::Element * L = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * U = FFLAS::fflas_new<Field::Element>(n*n);
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+
+ for (int i=0; i<R; ++i){
+ for (int j=0; j<n; ++j)
+ F.assign (*(U + i*n + j), *(A+ i*n+j));
+ }
+ for (int i=R;i<n; ++i){
+ for (int j=0; j<n; ++j)
+ F.assign(*(U+i*n+j), zero);
+ F.init(*(U+i*(n+1)),one);
+ }
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, n, 0, R, U, n, P);
+
+ for ( int i=0; i<R; ++i ){
+ for (int j=0; j < n ; ++j)
+ F.assign( *(L + i*n+j),zero);
+ F.assign(*(L+i*(n+1)), one);
+ }
+ for ( int i=R; i<m; ++i ){
+ for (int j=0; j<R; ++j )
+ F.assign (*(L+i*n+j), *(A+i*n+j));
+ for (int j=R; j<n; ++j)
+ F.assign (*(L+i*n+j), zero);
+ }
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans, n, 0, R, L, n, Q);
+
+// cerr<<"P = ";
+// for (size_t i=0; i<n;++i)
+// cerr<<" "<<P[i];
+// cerr<<endl;
+// cerr<<"Q = ";
+// for (size_t i=0; i<m;++i)
+// cerr<<" "<<Q[i];
+// cerr<<endl;
+
+ // write_field(F,cerr<<"A = "<<endl,A,m,n,n);
+ // write_field(F,cerr<<"R = "<<endl,L,m,n,n);
+ //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+
+ Field::Element * B = read_field(F,argv[2],&m,&n);
+
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,n, 1.0,
+ B, n, U, n, 0.0, X,n);
+ //FFLAS::fflas_delete( A);
+
+ bool fail = false;
+ for (int i=0; i<m; ++i)
+ for (int j=0; j<n; ++j)
+ if (!F.areEqual (*(L+i*n+j), *(X+i*n+j)))
+ fail=true;
+
+// write_field(F,cerr<<"X = "<<endl,X,m,n,n);
+// write_field(F,cerr<<"R = "<<endl,L,m,n,n);
+
+ FFLAS::fflas_delete( B);
+ if (fail)
+ cerr<<"FAIL"<<endl;
+
+
+ else
+ cerr<<"PASS"<<endl;
+
+// cout<<m<<" "<<n<<" M"<<endl;
+// for (size_t i=0; i<m; ++i)
+// for (size_t j=0; j<n; ++j)
+// if (!F.isZero(*(A+i*n+j)))
+// cout<<i+1<<" "<<j+1<<" "<<(*(A+i*n+j))<<endl;
+// cout<<"0 0 0"<<endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+#endif
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ double t = timc.usertime();
+ double numops = 2*m*m/1000.0*n;
+
+ cerr<<m<<"x"<< n
+ << " : rank = " << R << " ["
+ << ((double)nbf/1000.0*(double)numops / t)
+ << " MFops "
+ << " in "
+ << t/nbf<<"s"
+ <<"]"<< endl;
+// cout<<m
+// <<" "<<((double)nbf/1000.0*(double)numops / t)
+// <<" "<<t/nbf
+// <<endl;
+
+ return 0;
+}
diff --git a/tests/test-redrowechelon.C b/tests/test-redrowechelon.C
new file mode 100644
index 0000000..acbff40
--- /dev/null
+++ b/tests/test-redrowechelon.C
@@ -0,0 +1,210 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for the reduced row echelon factorisation
+//--------------------------------------------------------------------------
+// usage: test-redrowechelon p A n, for n reduced row echelon computations
+// of A over Z/pZ
+//-------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------
+//#define DEBUG 1
+// Debug option 0: no debug
+// 1: check A = LQUP
+//-------------------------------------------------------------------------
+using namespace std;
+
+
+//#define __LUDIVINE_CUTOFF 1
+#include <iostream>
+#include <iomanip>
+#include "Matio.h"
+#include "fflas-ffpack/utils/timer.h"
+//#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/field/modular-positive.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+using namespace FFPACK;
+typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+ //cerr<<setprecision(20);
+ int i,j,nbf,m,n;
+ int R=0;
+
+ if (argc!=4){
+ cerr<<"usage : test-redrowechelon <p> <A> <i>"<<endl
+ <<" to do i reduced row echelon computations of A"
+ <<endl;
+ exit(-1);
+ }
+ Field F((uint64_t)atoi(argv[1]));
+ Field::Element * A;
+
+ A = read_field(F,argv[2],&m,&n);
+
+ size_t *P = FFLAS::fflas_new<size_t>(n);
+ size_t *Q = FFLAS::fflas_new<size_t>(m);
+
+ // size_t cutoff = atoi(argv[3]);
+ nbf = atoi(argv[3]);
+
+ FFLAS::Timer tim,timc;
+ timc.clear();
+
+
+ for ( i=0;i<nbf;i++){
+ if (i) {
+ FFLAS::fflas_delete( A);
+ A = read_field(F,argv[2],&m,&n);
+ }
+ for (j=0;j<n;j++)
+ P[j]=0;
+ for (j=0;j<m;j++)
+ Q[j]=0;
+ tim.clear();
+ tim.start();
+ R = (int)FFPACK::ReducedRowEchelonForm (F, m, n, A, n, P, Q);
+ tim.stop();
+ timc+=tim;
+ }
+ //write_field (F,cerr<<"Result = "<<endl, A, m,n,n);
+
+// cerr<<"P = [";
+// for (size_t i=0; i<n; ++i)
+// cerr<<P[i]<<" ";
+// cerr<<"]"<<endl;
+// cerr<<"Q = [";
+// for (size_t i=0; i<m; ++i)
+// cerr<<Q[i]<<" ";
+// cerr<<"]"<<endl;
+#if DEBUG
+ Field::Element * L = FFLAS::fflas_new<Field::Element>(m*m);
+ Field::Element * U = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+
+ for (int i=0; i<R; ++i){
+ for (int j=0; j<m; ++j)
+ F.assign (*(L + i + j*m), *(A+ i+j*n));
+ }
+ for (int i=R;i<m; ++i){
+ for (int j=0; j<m; ++j)
+ F.assign(*(L+i+j*m), zero);
+ F.init(*(L+i*(m+1)),one);
+ }
+ FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans, m, 0, R, L, m, P);
+
+ for ( int i=0; i<R; ++i ){
+ for (int j=0; j < m ; ++j)
+ F.assign( *(U + i+j*n),zero);
+ F.assign(*(U+i*(n+1)), one);
+ }
+ for ( int i=R; i<n; ++i ){
+ for (int j=0; j<R; ++j )
+ F.assign (*(U+i+j*n), *(A+i+j*n));
+ for (int j=R; j<m; ++j)
+ F.assign (*(U+i+j*n), zero);
+ }
+ //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+ FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans, m, 0, R, U, n, Q);
+ //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+// cerr<<"P = ";
+// for (size_t i=0; i<n;++i)
+// cerr<<" "<<P[i];
+// cerr<<endl;
+// cerr<<"Q = ";
+// for (size_t i=0; i<m;++i)
+// cerr<<" "<<Q[i];
+// cerr<<endl;
+
+
+ // write_field(F,cerr<<"R = "<<endl,L,m,n,n);
+
+
+ Field::Element * B = read_field(F,argv[2],&m,&n);
+ //write_field(F,cerr<<"A = "<<endl,B,m,n,n);
+
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,m, 1.0,
+ L, m, B, n, 0.0, X,n);
+ //FFLAS::fflas_delete( A);
+
+ bool fail = false;
+ for (int i=0; i<m; ++i)
+ for (int j=0; j<n; ++j)
+ if (!F.areEqual (*(U+i*n+j), *(X+i*n+j)))
+ fail=true;
+
+// write_field(F,cerr<<"X = "<<endl,X,m,n,n);
+// write_field(F,cerr<<"R = "<<endl,U,m,n,n);
+
+ FFLAS::fflas_delete( B);
+ if (fail)
+ cerr<<"FAIL"<<endl;
+
+
+ else
+ cerr<<"PASS"<<endl;
+
+// cout<<m<<" "<<n<<" M"<<endl;
+// for (size_t i=0; i<m; ++i)
+// for (size_t j=0; j<n; ++j)
+// if (!F.isZero(*(A+i*n+j)))
+// cout<<i+1<<" "<<j+1<<" "<<(*(A+i*n+j))<<endl;
+// cout<<"0 0 0"<<endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+#endif
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ double t = timc.usertime();
+ double numops = 2*m*m/1000.0*n;
+
+ cerr<<m<<"x"<< n
+ << " : rank = " << R << " ["
+ << ((double)nbf/1000.0*(double)numops / t)
+ << " MFops "
+ << " in "
+ << t/nbf<<"s"
+ <<"]"<< endl;
+// cout<<m
+// <<" "<<((double)nbf/1000.0*(double)numops / t)
+// <<" "<<t/nbf
+// <<endl;
+
+ return 0;
+}
diff --git a/tests/test-rowechelon.C b/tests/test-rowechelon.C
new file mode 100644
index 0000000..be8b3f0
--- /dev/null
+++ b/tests/test-rowechelon.C
@@ -0,0 +1,206 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+
+//--------------------------------------------------------------------------
+// Test for the row echelon factorisation
+//--------------------------------------------------------------------------
+// usage: test-row p A n, for n computations
+// of A over Z/pZ
+//-------------------------------------------------------------------------
+
+//-------------------------------------------------------------------------
+//#define DEBUG 1
+// Debug option 0: no debug
+// 1: check A = LQUP
+//-------------------------------------------------------------------------
+using namespace std;
+
+
+//#define __LUDIVINE_CUTOFF 1
+#include <iostream>
+#include <iomanip>
+#include "Matio.h"
+#include "fflas-ffpack/utils/timer.h"
+//#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/field/modular-positive.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+
+using namespace FFPACK;
+// typedef Givaro::Modular<int16_t> Field;
+typedef Givaro::Modular<int32_t> Field;
+// typedef Givaro::Modular<double> Field;
+
+int main(int argc, char** argv){
+ cerr<<setprecision(20);
+ int i,j,nbf,m,n;
+ int R=0;
+
+ if (argc!=4){
+ cerr<<"usage : test-row <p> <A> <i>"<<endl
+ <<" to do i Row Echelon factorisation of A"
+ <<endl;
+ exit(-1);
+ }
+ Field F((uint64_t)atoi(argv[1]));
+ Field::Element * A;
+
+ A = read_field(F,argv[2],&m,&n);
+
+ size_t *P = FFLAS::fflas_new<size_t>(m);
+ size_t *Q = FFLAS::fflas_new<size_t>(n);
+
+ // size_t cutoff = atoi(argv[3]);
+ nbf = atoi(argv[3]);
+
+ FFLAS::Timer tim,timc;
+ timc.clear();
+
+
+ for ( i=0;i<nbf;i++){
+ if (i) {
+ FFLAS::fflas_delete( A);
+ A = read_field(F,argv[2],&m,&n);
+ }
+ for (j=0;j<m;j++)
+ P[j]=0;
+ for (j=0;j<n;j++)
+ Q[j]=0;
+ tim.clear();
+ tim.start();
+ R = (int)FFPACK::RowEchelonForm (F, m, n, A, n, P, Q);
+ tim.stop();
+ timc+=tim;
+ }
+ //write_field (F,cerr<<"Result = "<<endl, A, m,n,n);
+
+// cerr<<"P = [";
+// for (size_t i=0; i<n; ++i)
+// cerr<<P[i]<<" ";
+// cerr<<"]"<<endl;
+// cerr<<"Q = [";
+// for (size_t i=0; i<m; ++i)
+// cerr<<Q[i]<<" ";
+// cerr<<"]"<<endl;
+#if DEBUG
+ Field::Element * L = FFLAS::fflas_new<Field::Element>(m*m);
+ Field::Element * U = FFLAS::fflas_new<Field::Element>(m*n);
+ Field::Element * X = FFLAS::fflas_new<Field::Element>(m*n);
+
+ Field::Element zero,one;
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ for (int i=0; i<R; ++i){
+ for (int j=0; j<=i; ++j)
+ F.assign ( *(L + i + j*m), zero);
+ F.init (*(L+i*(m+1)),one);
+ for (int j=i+1; j<m; ++j)
+ F.assign (*(L + i + j*m), *(A+ i+j*n));
+ }
+ for (int i=R;i<m; ++i){
+ for (int j=0; j<m; ++j)
+ F.assign(*(L+i+j*m), zero);
+ F.init(*(L+i*(m+1)),one);
+ }
+ FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans, m, 0, R, L, m, P);
+
+ for ( int i=0; i<n; ++i ){
+ int j=0;
+ for (; j <= ((i<R)?i:R) ; ++j )
+ F.assign( *(U + i+j*n), *(A+i+j*n));
+ for (; j<m; ++j )
+ F.assign( *(U+i+j*n), zero);
+ }
+// cerr<<"P = ";
+// for (size_t i=0; i<n;++i)
+// cerr<<" "<<P[i];
+// cerr<<endl;
+// cerr<<"Q = ";
+// for (size_t i=0; i<m;++i)
+// cerr<<" "<<Q[i];
+// cerr<<endl;
+
+// write_field(F,cerr<<"A = "<<endl,A,m,n,n);
+// write_field(F,cerr<<"L = "<<endl,L,m,n,n);
+// write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+
+ Field::Element * B = read_field(F,argv[2],&m,&n);
+
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, m,n,m, 1.0,
+ L, m, B, n, 0.0, X,n);
+ //FFLAS::fflas_delete( A);
+
+ bool fail = false;
+ for (int i=0; i<m; ++i)
+ for (int j=0; j<n; ++j)
+ if (!F.areEqual (*(U+i*n+j), *(X+i*n+j)))
+ fail=true;
+
+ // write_field(F,cerr<<"X = "<<endl,X,m,n,n);
+ //write_field(F,cerr<<"U = "<<endl,U,m,n,n);
+
+ FFLAS::fflas_delete( B);
+ if (fail)
+ cerr<<"FAIL"<<endl;
+
+
+ else
+ cerr<<"PASS"<<endl;
+
+// cout<<m<<" "<<n<<" M"<<endl;
+// for (size_t i=0; i<m; ++i)
+// for (size_t j=0; j<n; ++j)
+// if (!F.isZero(*(A+i*n+j)))
+// cout<<i+1<<" "<<j+1<<" "<<(*(A+i*n+j))<<endl;
+// cout<<"0 0 0"<<endl;
+
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( X);
+#endif
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+
+ double t = timc.usertime();
+ double numops = m*m/1000.0*(n-m/3.0);
+
+ cerr<<m<<"x"<< n
+ << " : rank = " << R << " ["
+ << ((double)nbf/1000.0*(double)numops / t)
+ << " MFops "
+ << " in "
+ << t/nbf<<"s"
+ <<"]"<< endl;
+// cout<<m
+// <<" "<<((double)nbf/1000.0*(double)numops / t)
+// <<" "<<t/nbf
+// <<endl;
+
+ return 0;
+}
diff --git a/tests/test-simd.C b/tests/test-simd.C
new file mode 100644
index 0000000..66a8b19
--- /dev/null
+++ b/tests/test-simd.C
@@ -0,0 +1,370 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/*
+ * Copyright (C) 2014 FFLAS-FFPACK
+ * Written by :
+ * Bastien Vialla <bastien.vialla at lirmm.fr>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+ #include "fflas-ffpack/fflas/fflas_simd.h"
+ #include "fflas-ffpack/utils/args-parser.h"
+ #include "fflas-ffpack/utils/align-allocator.h"
+ #include <vector>
+ #include <algorithm>
+ #include <random>
+ #include <tuple>
+ #include <type_traits>
+ #include <string>
+ #include <iterator>
+ #include <limits>
+ #include <cmath>
+ #include <iomanip>
+
+/**********************************************************************************
+ *
+ * Function Traits
+ *
+ ***********************************************************************************/
+
+template <class F> struct function_traits;
+
+// function pointer
+template <class R, class... Args>
+struct function_traits<R (*)(Args...)> : public function_traits<R(Args...)> {};
+
+template <class R, class... Args> struct function_traits<R(Args...)> {
+ using return_type = R;
+
+ static constexpr std::size_t arity = sizeof...(Args);
+
+ template <std::size_t N> struct argument {
+ static_assert(N < arity, "error: invalid parameter index.");
+ using type = typename std::tuple_element<N, std::tuple<Args...> >::type;
+ };
+};
+
+// member function pointer
+template <class C, class R, class... Args>
+struct function_traits<R (C::*)(Args...)> : public function_traits<
+ R(C&, Args...)> {};
+
+// const member function pointer
+template <class C, class R, class... Args>
+struct function_traits<R (C::*)(Args...)
+ const> : public function_traits<R(C&, Args...)> {};
+
+// member object pointer
+template <class C, class R>
+struct function_traits<R(C::*)> : public function_traits<R(C&)> {};
+
+/**************************************************************************************/
+
+template<class simd, class Element, class SimdFunc, class ScalFunc>
+inline
+typename std::enable_if<
+ (function_traits<SimdFunc>::arity == 1) &&
+ !(std::is_same<typename function_traits<SimdFunc>::return_type, void>::value)
+ , bool>::type
+test_op(SimdFunc fsimd, ScalFunc fscal, size_t seed, size_t vectorSize, Element max, std::string name){
+
+ using vect_t = typename simd::vect_t;
+
+ std::mt19937 generator(seed);
+ std::uniform_real_distribution<> dist(1, (int)max);
+
+ std::vector<Element, AlignedAllocator<Element, Alignment::AVX>> a1(vectorSize), c1(vectorSize), a2(vectorSize), c2(vectorSize);
+ std::generate(a1.begin(), a1.end(), [&](){return dist(generator);});
+ a2 = a1;
+
+ std::transform(a1.begin(), a1.end(), c1.begin(), fscal);
+
+ vect_t va2, vc2;
+ for(size_t i = 0 ; i < vectorSize ; i+=simd::vect_size){
+ va2 = simd::load(a2.data()+i);
+ vc2 = fsimd(va2);
+ simd::store(c2.data()+i, vc2);
+ }
+
+ bool res = std::equal(c1.begin(), c1.end(), c2.begin(), [](Element x1, Element x2){return (std::isnan(x1) && std::isnan(x2)) || x1 == x2;});
+ if(!res)
+ {
+ std::cout << "Error Simd" << sizeof(typename simd::scalar_t)*simd::vect_size*8 << "::" << name << std::endl;
+ std::copy(c1.begin(), c1.end(), std::ostream_iterator<Element>(std::cout, " "));
+ std::cout << std::endl;
+ std::copy(c2.begin(), c2.end(), std::ostream_iterator<Element>(std::cout, " "));
+ std::cout << std::endl;
+ }
+ return res;
+}
+
+template<class simd, class Element, class SimdFunc, class ScalFunc>
+inline
+typename std::enable_if<
+ (function_traits<SimdFunc>::arity == 0) &&
+ !(std::is_same<typename function_traits<SimdFunc>::return_type, void>::value)
+ , bool>::type
+test_op(SimdFunc && fsimd, ScalFunc && fscal, size_t seed, size_t vectorSize, Element max, std::string name){
+
+ using vect_t = typename simd::vect_t;
+
+ std::mt19937 generator(seed);
+ std::uniform_real_distribution<Element> dist(1, (int)max);
+
+ std::vector<Element, AlignedAllocator<Element, Alignment::AVX>> c1(vectorSize), c2(vectorSize);
+
+ std::transform(c1.begin(), c1.end(), c1.begin(), fscal);
+
+ vect_t vc2;
+ for(size_t i = 0 ; i < vectorSize ; i+=simd::vect_size){
+ c2 = fsimd();
+ simd::store(c2.data()+i, c2);
+ }
+
+ bool res = std::equal(c1.begin(), c1.end(), c2.begin(), [](Element x1, Element x2){return (std::isnan(x1) && std::isnan(x2)) || x1 == x2;});
+ if(!res)
+ {
+ std::cout << "Error Simd" << sizeof(typename simd::scalar_t)*simd::vect_size*8 << "::" << name << std::endl;
+ std::copy(c1.begin(), c1.end(), std::ostream_iterator<Element>(std::cout, " "));
+ std::cout << std::endl;
+ std::copy(c2.begin(), c2.end(), std::ostream_iterator<Element>(std::cout, " "));
+ std::cout << std::endl;
+ }
+ return res;
+}
+
+template<class simd, class Element, class SimdFunc, class ScalFunc>
+inline
+typename std::enable_if<
+ (function_traits<SimdFunc>::arity == 2) &&
+ !(std::is_same<typename function_traits<SimdFunc>::return_type, void>::value)
+ , bool>::type
+test_op(SimdFunc fsimd, ScalFunc fscal, size_t seed, size_t vectorSize, Element max, std::string name){
+
+ using vect_t = typename simd::vect_t;
+
+ std::mt19937 generator(seed);
+ std::uniform_real_distribution<> dist(1, (int)max);
+
+ std::vector<Element, AlignedAllocator<Element, Alignment::AVX>> a1(vectorSize), b1(vectorSize), c1(vectorSize), a2(vectorSize), b2(vectorSize), c2(vectorSize);
+ std::generate(a1.begin(), a1.end(), [&](){return dist(generator);});
+ std::generate(b1.begin(), b1.end(), [&](){return dist(generator);});
+ a2 = a1;
+ b2 = b1;
+
+ std::transform(a1.begin(), a1.end(), b1.begin(), c1.begin(), fscal);
+
+ vect_t va2, vb2, vc2;
+ for(size_t i = 0 ; i < vectorSize ; i+=simd::vect_size){
+ va2 = simd::load(a2.data()+i);
+ vb2 = simd::load(b2.data()+i);
+ vc2 = fsimd(va2, vb2);
+ simd::store(c2.data()+i, vc2);
+ }
+
+ bool res = std::equal(c1.begin(), c1.end(), c2.begin(), [](Element x1, Element x2){return (std::isnan(x1) && std::isnan(x2)) || x1 == x2;});
+ if(!res)
+ {
+ std::cout << "Error Simd" << sizeof(typename simd::scalar_t)*simd::vect_size*8 << "::" << name << std::endl;
+ std::copy(c1.begin(), c1.end(), std::ostream_iterator<Element>(std::cout, " "));
+ std::cout << std::endl;
+ std::copy(c2.begin(), c2.end(), std::ostream_iterator<Element>(std::cout, " "));
+ std::cout << std::endl;
+ }
+ return res;
+}
+
+template<class simd, class Element, class SimdFunc, class ScalFunc>
+inline
+typename std::enable_if<
+ (function_traits<SimdFunc>::arity == 3) &&
+ !(std::is_same<typename function_traits<SimdFunc>::return_type, void>::value)
+ , bool>::type
+test_op(SimdFunc fsimd, ScalFunc fscal, size_t seed, size_t vectorSize, Element max, std::string name){
+
+ using vect_t = typename simd::vect_t;
+
+ std::mt19937 generator(seed);
+ std::uniform_real_distribution<> dist(1, (int)max);
+
+ std::vector<Element, AlignedAllocator<Element, Alignment::AVX>> a1(vectorSize), b1(vectorSize), c1(vectorSize), d1(vectorSize), a2(vectorSize), b2(vectorSize), c2(vectorSize), d2(vectorSize);
+ std::generate(a1.begin(), a1.end(), [&](){return dist(generator);});
+ std::generate(b1.begin(), b1.end(), [&](){return dist(generator);});
+ std::generate(c1.begin(), c1.end(), [&](){return dist(generator);});
+ a2 = a1;
+ b2 = b1;
+ c2 = c1;
+
+ for(size_t i = 0 ; i < vectorSize ; ++i){
+ d1[i] = fscal(c1[i], a1[i], b1[i]);
+ }
+
+ vect_t va2, vb2, vc2;
+ for(size_t i = 0 ; i < vectorSize ; i+=simd::vect_size){
+ va2 = simd::load(a2.data()+i);
+ vb2 = simd::load(b2.data()+i);
+ vc2 = simd::load(c2.data()+i);
+ simd::store(d2.data()+i, fsimd(vc2, va2, vb2));
+ }
+
+ bool res = std::equal(d1.begin(), d1.end(), d2.begin(), [](Element x1, Element x2){return (std::isnan(x1) && std::isnan(x2)) || x1 == x2;});
+ if(!res)
+ {
+ std::cout << "Error Simd" << sizeof(typename simd::scalar_t)*simd::vect_size*8 << "::" << name << std::endl;
+
+ std::transform(d1.begin(), d1.end(), d2.begin(), d2.begin(), [](Element x1, Element x2){return x1-x2;});
+
+ //std::copy(d1.begin(), d1.end(), std::ostream_iterator<Element>(std::cout, " "));
+ //std::cout << std::endl;
+ std::copy(d2.begin(), d2.end(), std::ostream_iterator<Element>(std::cout, " "));
+ std::cout << std::endl;
+ }
+ return res;
+}
+
+
+template<class simd, class Element>
+bool test_float_impl(size_t seed, size_t vectorSize, Element max){
+ bool btest = true;
+
+ btest &= test_op<simd>(simd::ceil, [](Element x){return std::ceil(x);}, seed, vectorSize, max, "ceil");
+ btest &= test_op<simd>(simd::floor, [](Element x){return std::floor(x);}, seed, vectorSize, max,"floor");
+ btest &= test_op<simd>(simd::round, [](Element x){return std::round(x);}, seed, vectorSize, max, "round");
+ btest &= test_op<simd>(simd::add, [](Element x1, Element x2){return x1+x2;}, seed, vectorSize, max, "add");
+ btest &= test_op<simd>(simd::sub, [](Element x1, Element x2){return x1-x2;}, seed, vectorSize, max, "sub");
+ btest &= test_op<simd>(simd::mul, [](Element x1, Element x2){return x1*x2;}, seed, vectorSize, max, "mul");
+ btest &= test_op<simd>(simd::fmadd, [](Element x1, Element x2, Element x3){return std::fma(x3,x2,x1);}, seed, vectorSize, max, "fmadd");
+ btest &= test_op<simd>(simd::fmsub, [](Element x1, Element x2, Element x3){return std::fma(x3,x2,-x1);}, seed, vectorSize, max, "fmsub");
+ btest &= test_op<simd>(simd::fnmadd, [](Element x1, Element x2, Element x3){return std::fma(-x3,x2,x1);}, seed, vectorSize, max, "fnmadd");
+ btest &= test_op<simd>(simd::lesser, [](Element x1, Element x2){return (x1<x2)?NAN:0;}, seed, vectorSize, max, "lesser");
+ btest &= test_op<simd>(simd::lesser_eq, [](Element x1, Element x2){return (x1<=x2)?NAN:0;}, seed, vectorSize, max, "lesser_eq");
+ btest &= test_op<simd>(simd::greater, [](Element x1, Element x2){return (x1>x2)?NAN:0;}, seed, vectorSize, max, "greater");
+ btest &= test_op<simd>(simd::greater_eq, [](Element x1, Element x2){return (x1>=x2)?NAN:0;}, seed, vectorSize, max, "greater_eq");
+ btest &= test_op<simd>(simd::eq, [](Element x1, Element x2){return (x1==x2)?NAN:0;}, seed, vectorSize, max, "eq");
+
+ return btest;
+}
+
+template<class simd, class Element>
+bool test_integer_impl(size_t seed, size_t vectorSize, Element max){
+ bool btest = true;
+
+ btest &= test_op<simd>(simd::add, [](Element x1, Element x2){return x1+x2;}, seed, vectorSize, max, "add");
+ btest &= test_op<simd>(simd::sub, [](Element x1, Element x2){return x1-x2;}, seed, vectorSize, max, "sub");
+ btest &= test_op<simd>(simd::mullo, [](Element x1, Element x2){return x1*x2;}, seed, vectorSize, max, "mullo");
+ btest &= test_op<simd>(simd::fmadd, [](Element x1, Element x2, Element x3){return x1+x3*x2;}, seed, vectorSize, max, "fmadd");
+ // btest &= test_op<simd>(simd::fmsub, [](Element x1, Element x2, Element x3){return -x1+x3*x2;}, seed, vectorSize, max, "fmsub");
+ // btest &= test_op<simd>(simd::fnmadd, [](Element x1, Element x2, Element x3){return x1-x3*x2;}, seed, vectorSize, max, "fnmadd");
+ btest &= test_op<simd>(simd::lesser, [](Element x1, Element x2){return (x1<x2)?-1:0;}, seed, vectorSize, max, "lesser");
+ btest &= test_op<simd>(simd::lesser_eq, [](Element x1, Element x2){return (x1<=x2)?-1:0;}, seed, vectorSize, max, "lesser_eq");
+ btest &= test_op<simd>(simd::greater, [](Element x1, Element x2){return (x1>x2)?-1:0;}, seed, vectorSize, max, "greater");
+ btest &= test_op<simd>(simd::greater_eq, [](Element x1, Element x2){return (x1>=x2)?-1:0;}, seed, vectorSize, max, "greater_eq");
+ btest &= test_op<simd>(simd::eq, [](Element x1, Element x2){return (x1==x2)?-1:0;}, seed, vectorSize, max, "eq");
+
+ return btest;
+}
+
+template<class Element>
+bool test_float(size_t seed, size_t vectorSize, size_t max_){
+ bool sse = true, avx = true;
+ sse = test_float_impl<Simd128<Element>>(seed, vectorSize, (Element)max_);
+ if(!sse)
+ std::cout << "bug sse" << std::endl;
+ else
+ std::cout << "SSE OK" << std::endl;
+ avx = test_float_impl<Simd256<Element>>(seed, vectorSize, (Element)max_);
+ if(!avx)
+ std::cout << "bug avx" << std::endl;
+ else
+ std::cout << "AVX OK" << std::endl;
+ return sse && avx;
+}
+
+ template<class Element>
+ bool test_integer(size_t seed, size_t vectorSize, size_t max_){
+ bool sse = true, avx = true;
+ sse = test_integer_impl<Simd128<Element>>(seed, vectorSize, (Element)max_);
+ if(!sse)
+ std::cout << "bug sse" << std::endl;
+ else
+ std::cout << "SSE OK" << std::endl;
+#ifdef __AVX2__
+ avx = test_integer_impl<Simd256<Element>>(seed, vectorSize, (Element)max_);
+ if(!avx)
+ std::cout << "bug avx" << std::endl;
+ else
+ std::cout << "AVX OK" << std::endl;
+#endif
+ return sse && avx;
+ }
+
+
+ int main(int ac, char **av) {
+ int seed = (int) time(NULL);
+ int vectorSize = 32;
+ int max = 100;
+ int loop = false;
+
+ static Argument as[] = {
+ { 's', "-s N", "Set the seed .", TYPE_INT , &seed },
+ { 'l', "-l N", "Set the loop execution .", TYPE_INT , &loop },
+ END_OF_ARGUMENTS
+ };
+
+ FFLAS::parseArguments(ac,av,as);
+
+ srand(seed);
+ srand48(seed);
+
+ bool pass = true ;
+ {
+ do{
+ {
+ pass &= test_float<float>(seed, vectorSize, max);
+ }
+ {
+ pass &= test_float<double>(seed, vectorSize, max);
+ }
+ {
+ pass &= test_integer<int16_t>(seed, vectorSize, max);
+ }
+ {
+ pass &= test_integer<int32_t>(seed, vectorSize, max);
+ }
+ {
+ pass &= test_integer<int64_t>(seed, vectorSize, max);
+ }
+ // {
+ // pass &= test_integer<uint16_t>(seed, vectorSize, max);
+ // }
+ // {
+ // pass &= test_integer<uint32_t>(seed, vectorSize, max);
+ // }
+ // {
+ // pass &= test_integer<uint64_t>(seed, vectorSize, max);
+ // }
+ }while(loop);
+ }
+ std::cout << std::boolalpha << pass << std::endl;
+ return (pass?0:1) ;
+}
diff --git a/tests/test-sparse.C b/tests/test-sparse.C
new file mode 100644
index 0000000..423576e
--- /dev/null
+++ b/tests/test-sparse.C
@@ -0,0 +1,393 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+
+/* Copyright (C) 2014 FFLAS-FFPACK
+ * Written by : Bastien Vialla <bastien.vialla at lirmm.fr>
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+#include "fflas-ffpack/fflas/fflas.h"
+#include "fflas-ffpack/fflas/fflas_sparse.h"
+#include "fflas-ffpack/utils/args-parser.h"
+#include "givaro/modular-double.h"
+#include "givaro/zring.h"
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <algorithm>
+#include <string>
+#include <iterator>
+#include <cstdlib>
+#include <cstdio>
+// #include <stdlib.h>
+
+#include <sstream>
+
+using namespace FFLAS;
+using namespace FFPACK;
+using namespace std;
+using namespace Givaro;
+
+template <typename T> T from_string(std::string const & s) {
+ std::stringstream ss(s);
+ T result;
+ ss >> result; // TODO handle errors
+ return result;
+}
+
+
+
+template <class PtrT> void testEq(PtrT y1, PtrT y2, uint64_t n) {
+ for (uint64_t i = 0; i < n; ++i) {
+ if (y1[i] != y2[i]) {
+ cout << "Error " << i << endl;
+ cout << y1[i] << " != " << y2[i] << endl;
+ break;
+ }
+ }
+}
+
+template <class MatT, class Field, class IndexT>
+void test_spmv(const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim,
+ uint64_t nnz, typename Field::Element_ptr x,
+ typename Field::Element_ptr y, typename Field::Element beta) {
+ MatT matrix;
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ fspmv(F, matrix, x, 1, y);
+ sparse_delete(matrix);
+}
+
+template <class Field, class IndexT>
+void
+test_spmv_sell(const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim,
+ uint64_t nnz, Sparse<Field, SparseMatrix_t::SELL> &matrix,
+ typename Field::Element_ptr x, typename Field::Element_ptr y,
+ typename Field::Element beta) {
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ fspmv(F, matrix, x, 1, y);
+ auto tmp = fflas_new(F, rowdim, 1);
+ for (size_t i = 0; i < rowdim; ++i) {
+ tmp[i] = y[matrix.perm[i]];
+ }
+ for (size_t i = 0; i < rowdim; ++i) {
+ y[i] = tmp[i];
+ }
+ sparse_delete(matrix);
+ fflas_delete(tmp);
+}
+
+template <class MatT, class Field, class IndexT>
+void test_spmm(const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim,
+ uint64_t nnz, int blockSize, typename Field::Element_ptr x,
+ int ldx, typename Field::Element_ptr y, int ldy,
+ typename Field::Element beta) {
+ MatT matrix;
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ fspmm(F, matrix, blockSize, x, ldx, beta, y, ldy);
+ sparse_delete(matrix);
+}
+#if 0
+template <class MatT, class Field, class IndexT>
+void test_pspmm(const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim,
+ uint64_t nnz, int blockSize, typename Field::Element_ptr x,
+ int ldx, typename Field::Element_ptr y, int ldy,
+ typename Field::Element beta) {
+ MatT matrix;
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ pfspmm(F, matrix, blockSize, x, ldx, beta, y, ldy);
+ sparse_delete(matrix);
+}
+
+template <class MatT, class Field, class IndexT>
+void test_pspmv(const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim,
+ uint64_t nnz, typename Field::Element_ptr x,
+ typename Field::Element_ptr y, typename Field::Element beta) {
+ MatT matrix;
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ pfspmv(F, matrix, x, 1, y);
+ sparse_delete(matrix);
+}
+
+template <class Field, class IndexT>
+void
+test_pspmv_sell(const Field &F, IndexT *row, IndexT *col,
+ typename Field::Element_ptr dat, index_t rowdim, index_t coldim,
+ uint64_t nnz, Sparse<Field, SparseMatrix_t::SELL> &matrix,
+ typename Field::Element_ptr x, typename Field::Element_ptr y,
+ typename Field::Element beta) {
+ sparse_init(F, matrix, row, col, dat, rowdim, coldim, nnz);
+ pfspmv(F, matrix, x, 1, y);
+ auto tmp = fflas_new(F, rowdim, 1);
+ for (size_t i = 0; i < rowdim; ++i) {
+ tmp[i] = y[matrix.perm[i]];
+ }
+ for (size_t i = 0; i < rowdim; ++i) {
+ y[i] = tmp[i];
+ }
+ sparse_delete(matrix);
+ fflas_delete(tmp);
+}
+#endif
+
+int main(int argc, char **argv) {
+ using Field = Modular<float>;
+
+ Field F(101);
+ int nbTests = 25;
+ std::string path;
+
+ index_t *row = nullptr, *col = nullptr;
+ typename Field::Element_ptr dat;
+
+ index_t rowdim, coldim;
+ uint64_t nnz;
+
+ if(argc > 1)
+ path = argv[1];
+
+ // path = "data/mat11.sms";
+ index_t * st = nullptr ;
+ readSmsFormat(path, F, row, col, dat, rowdim, coldim, nnz);
+ row = fflas_new<index_t>(nnz);
+ for (index_t j = 0 ; j < rowdim ; ++j) {
+ for (index_t k = st[j] ; k < st[j+1] ; ++k)
+ row[k] = j ;
+ }
+
+
+ auto x = fflas_new(F, coldim, 1, Alignment::CACHE_LINE);
+ auto y = fflas_new(F, rowdim, 1, Alignment::CACHE_LINE);
+ auto y1 = fflas_new(F, rowdim, 1, Alignment::CACHE_LINE);
+
+ for (size_t i = 0; i < coldim; ++i) {
+ x[i] = 1;
+ }
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y[i] = 0;
+ y1[i] = 0;
+ }
+
+ /************************************************************************************
+ *
+ * SPMV
+ *
+ *************************************************************************************/
+ cout << "=== spmv ===" << endl;
+
+ test_spmv<Sparse<Field, SparseMatrix_t::CSR>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y, 1);
+ cout << "CSR: OK" << endl;
+
+ test_spmv<Sparse<Field, SparseMatrix_t::CSR_ZO>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ // for(size_t i = 0 ; i < 10 ; ++i)
+ // {
+ // cout << y[i] << " ";
+ // }
+ // cout << endl;
+
+ // for(size_t i = 0 ; i < 10 ; ++i)
+ // {
+ // cout << y1[i] << " ";
+ // }
+ // cout << endl;
+
+ cout << "CSR_ZO: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_spmv<Sparse<Field, SparseMatrix_t::COO>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ cout << "COO: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_spmv<Sparse<Field, SparseMatrix_t::ELL>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ cout << "ELL: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_spmv<Sparse<Field, SparseMatrix_t::ELL_simd>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ cout << "ELL_simd: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_spmv<Sparse<Field, SparseMatrix_t::CSR_HYB>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ cout << "CSR_HYB: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_spmv<Sparse<Field, SparseMatrix_t::HYB_ZO>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ cout << "HYB_ZO: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ Sparse<Field, SparseMatrix_t::SELL> A;
+ test_spmv_sell(F, row, col, dat, rowdim, coldim, nnz, A, x, y1, 1);
+
+ cout << "SELL: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ /************************************************************************************
+ *
+ * pSPMV
+ *
+ *************************************************************************************/
+#if 0
+ cout << "=== pspmv ===" << endl;
+
+ test_pspmv<Sparse<Field, SparseMatrix_t::CSR>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+ cout << "CSR: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_pspmv<Sparse<Field, SparseMatrix_t::ELL>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ cout << "ELL: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_pspmv<Sparse<Field, SparseMatrix_t::ELL_simd>>(
+ F, row, col, dat, rowdim, coldim, nnz, x, y1, 1);
+
+ cout << "ELL_simd: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_pspmv<Sparse<Field, SparseMatrix_t::CSR_HYB>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ cout << "CSR_HYB: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ test_pspmv<Sparse<Field, SparseMatrix_t::HYB_ZO>>(F, row, col, dat, rowdim,
+ coldim, nnz, x, y1, 1);
+
+ cout << "HYB_ZO: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+
+ Sparse<Field, SparseMatrix_t::SELL> A1;
+ test_pspmv_sell(F, row, col, dat, rowdim, coldim, nnz, A1, x, y1, 1);
+
+ cout << "SELL: " << ((std::equal(y, y + rowdim, y1)) ? "OK" : "ERROR")
+ << endl;
+
+ for (size_t i = 0; i < rowdim; ++i) {
+ y1[i] = 0;
+ }
+#endif
+ // // test_spmm<Sparse<Field, SparseMatrix_t::CSR>>(F, row, col, dat,
+ // rowdim,
+ // coldim, nnz, 1, x, 1, y, 1, 1);
+ // // test_pspmm<Sparse<Field, SparseMatrix_t::CSR>>(F, row, col, dat,
+ // rowdim,
+ // coldim, nnz, 1, x, 1, y, 1, 1);
+ // // test_spmm<Sparse<Field, SparseMatrix_t::COO_ZO>>(F, row, col, dat,
+ // rowdim, coldim, nnz, 1, x, 1, y, 1, 1);
+ // // test_spmm<Sparse<Field, SparseMatrix_t::CSR>>(F, row, col, dat,
+ // rowdim,
+ // coldim, nnz, 1, x, 1, y, 1, 1);
+ // // test_spmv<Sparse<Field, SparseMatrix_t::ELL_ZO>>(F, row, col, dat,
+ // rowdim, coldim, nnz, x, y1, 1);
+
+ // for(size_t i = 0 ; i < 11 ; ++i)
+ // {
+ // cout << y[i] << " ";
+ // }
+ // cout << endl;
+
+ // for(size_t i = 0 ; i < 11 ; ++i)
+ // {
+ // cout << y1[i] << " ";
+ // }
+ // cout << endl;
+
+ // auto bb = std::equal(y, y+rowdim, y1);
+
+ // cout << ((bb) ? "CORRECT" : "ERROR") << endl;
+
+ // if(!bb)
+ // testEq(y, y1, rowdim);
+
+ fflas_delete(x);
+ fflas_delete(y);
+ fflas_delete(y1);
+
+ return 0;
+}
diff --git a/tests/test-utils.h b/tests/test-utils.h
index c2168b0..9abbbf5 100644
--- a/tests/test-utils.h
+++ b/tests/test-utils.h
@@ -3,7 +3,7 @@
/*
* Copyright (C) FFLAS-FFPACK
- * Written by Brice Boyer <bboyer at imag.fr>
+ * Written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
* This file is Free Software and part of FFLAS-FFPACK.
*
* ========LICENCE========
@@ -35,35 +35,18 @@
#ifndef __FFLASFFPACK_tests_test_utils_H
#define __FFLASFFPACK_tests_test_utils_H
-#include "fflas-ffpack/field/nonzero-randiter.h"
+#include "fflas-ffpack/fflas-ffpack-config.h"
#include "fflas-ffpack/utils/debug.h"
#include "fflas-ffpack/ffpack/ffpack.h"
+#include "fflas-ffpack/utils/fflas_randommatrix.h"
+#include <givaro/givinteger.h>
+#include <givaro/givintprime.h>
+#include <givaro/givranditer.h>
+#include <chrono>
+#include <random>
namespace FFPACK {
- /*! @brief Random Matrix.
- * Creates a \c m x \c n matrix with random entries.
- * @param F field
- * @param A pointer to the matrix (preallocated to at least \c m x \c lda field elements)
- * @param m number of rows in \p A
- * @param n number of cols in \p A
- * @param lda leading dimension of \p A
- * @return pointer to \c A.
- */
- template<class Field>
- typename Field::Element * RandomMatrix(const Field & F,
- typename Field::Element * A,
- size_t m, size_t n, size_t lda)
- {
- typedef typename Field::RandIter Randiter ;
- Randiter R(F);
- for (size_t i=0 ; i<m ; ++i)
- for (size_t j= 0; j<n ;++j)
- R.random( A[i*lda+j] );
- return A;
-
- }
-
/*! Random integer in range.
* @param a min bound
* @param b max bound
@@ -76,106 +59,46 @@ namespace FFPACK {
return x ;
}
- /*! Random integer in range.
- * @param a min bound
- * @param b max bound
- * @return a random integer in [a,b[ */
- size_t RandInt(size_t a, size_t b)
- {
- size_t x = a ;
- x += (size_t)rand()%(b-a);
- FFLASFFPACK_check(x<b && x>=a);
- return x ;
- }
+ template<typename Field>
+ Givaro::Integer maxFieldElt() {return (Givaro::Integer)Field::maxCardinality();}
+ template<>
+ Givaro::Integer maxFieldElt<Givaro::ZRing<Givaro::Integer>>() {return (Givaro::Integer)-1;}
- /*! @brief Random Matrix with prescribed rank.
- * Creates a \c m x \c n matrix with random entries and rank \c r.
- * @param F field
- * @param A pointer to the matrix (preallocated to at least \c m x \c lda field elements)
- * @param r rank of the matrix to build
- * @param m number of rows in \p A
- * @param n number of cols in \p A
- * @param lda leading dimension of \p A
- * @return pointer to \c A.
+ /*** Field chooser for test according to characteristic q and bitsize b ***/
+ /* if q=-1 -> field is chosen randomly with a charateristic of b bits
+ if b=0 -> bitsize is chosen randomly according to maxFieldElt
*/
- template<class Field>
- typename Field::Element * RandomMatrixWithRank(const Field & F,
- typename Field::Element * A,
- size_t r,
- size_t m, size_t n, size_t lda)
- {
- FFLASFFPACK_check(r <= std::min(m,n));
- FFLASFFPACK_check(n <= lda);
- typedef typename Field::RandIter Randiter ;
- typedef typename Field::Element Element ;
- Randiter R(F);
- NonzeroRandIter<Field,Randiter> nzR(F,R);
-
- size_t * P = new size_t[n];
- size_t * Q = new size_t[m];
- for (size_t i = 0 ; i < m ; ++i ) Q[i] = 0;
- for (size_t i = 0 ; i < n ; ++i ) P[i] = 0;
-
- Element * U = new Element[m*lda];
- Element * L = new Element[m*m];
-
-
- /* Create L, lower invertible */
- for (size_t i=0 ; i<m ; ++i)
- for (size_t j= 0; j<i ;++j)
- R.random( L[i*m+j] );
-
- for (size_t i=0 ; i<m ; ++i)
- nzR.random( L[i*m+i] );
-
- for (size_t i=0 ; i<m ; ++i)
- for (size_t j= i+1; j<m ;++j)
- F.init(L[i*m+j],0UL);
-
-
- /* Create U, upper or rank r */
- for (size_t i=0 ; i<r ; ++i)
- for (size_t j= i+1; j<r ;++j)
- R.random( U[i*lda+j] );
- for (size_t i=0 ; i<r ; ++i)
- nzR.random( U[i*lda+i] );
- for (size_t i=0 ; i<r ; ++i)
- for (size_t j= 0 ; j<i ;++j)
- F.init(U[i*lda+j],0UL);
-
- for (size_t i=r ; i<m ; ++i)
- for (size_t j= 0 ; j<n ;++j)
- F.init(U[i*lda+j],0UL);
-
- for (size_t i=0 ; i<r ; ++i)
- for (size_t j= r ; j<n ;++j)
- R.random( U[i*lda+j] );
-
- /* Create a random P,Q */
-
- for (size_t i = 0 ; i < n ; ++i)
- P[i] = i + RandInt(0UL,n-i);
- for (size_t i = 0 ; i < m ; ++i)
- Q[i] = i + RandInt(0UL,m-i);
-
- /* compute product */
-
- FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
- m,0,(int)n, U, lda, P);
- FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans,
- m,0,(int)m, L, m, Q);
- FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans,
- m,n,m, 1.0, L,m, U,lda, 0.0, A,lda);
- //! @todo compute LU with ftrtr
-
- delete[] P;
- delete[] L;
- delete[] U;
- delete[] Q;
+ template<typename Field>
+ Field* chooseField(Givaro::Integer q, uint64_t b){
+ Givaro::Integer maxV= maxFieldElt<Field>();
+ auto seed = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+ std::mt19937 mt_rand(seed);
+ if (maxV>0 && (q> maxV || b> maxV.bitsize()))
+ return nullptr;
+ if (b<=1){
+ //srand((double)std::chrono::high_resolution_clock::now());
+ auto bitrand = std::bind(std::uniform_int_distribution<uint64_t>(2,maxV.bitsize()-1),
+ mt_rand);
+ b = bitrand();
+ }
+ Givaro::IntPrimeDom IPD;
+ Givaro::Integer tmp,p;
+ if (q==-1){
+ // Choose characteristic as a random prime of b bits
+ do{
+ Givaro::Integer _p;
+ Givaro::Integer::seeding(Givaro::Integer(mt_rand()));
+ Givaro::Integer::random_exact_2exp(_p,b);
+ IPD.prevprime( tmp, _p+1 );
+ p = tmp;
+ }while( (p < 2) );
+ }
+ else p=q;
+
+ return new Field(p);
+ }
- return A;
- }
} // FFPACK
#endif
diff --git a/tests/testeur_fgemm.C b/tests/testeur_fgemm.C
new file mode 100644
index 0000000..31207c3
--- /dev/null
+++ b/tests/testeur_fgemm.C
@@ -0,0 +1,269 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+//--------------------------------------------------------------------------
+// Test for the fgemm winograd
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+//#define NEWWINO
+
+#include <iostream>
+#include <iomanip>
+using namespace std;
+//#include "fflas-ffpack/modular-int.h"
+//#include "fflas-ffpack/modular-positive.h"
+#include "fflas-ffpack/field/modular-positive.h"
+//#include "timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/fflas/fflas.h"
+
+
+#include "givaro/givintprime.h"
+#include "givaro/modular.h"
+#include "givaro/gfq.h"
+
+using namespace FFPACK;
+using namespace Givaro;
+
+//typedef ModularBalanced<float> Field;
+//typedef ModularBalanced<double> Field;
+typedef Givaro::Modular<double> Field;
+//typedef Givaro::Modular<float> Field;
+//typedef Givaro::Modular<int> Field; //-> bug avec w>=1 (olddynamic pealing)
+//typedef Givaro::Modular<int32_t> Field;
+//typedef GFqDom<int32_t> Field;
+
+int main(int argc, char** argv){
+ FFLAS::Timer tim;
+ IntPrimeDom IPD;
+ Field::Element alpha, beta;
+ long p;
+ size_t M, K, N, Wino;
+ bool keepon = true;
+ Integer _p,tmp;
+ cerr<<setprecision(10);
+ size_t TMAXM = 100, TMAXK = 100, TMAXN = 100;
+ size_t PRIMESIZE = 23;
+ size_t WINOMAX = 8;
+
+ if (argc > 1 )
+ TMAXM = atoi(argv[1]);
+ if (argc > 2 )
+ PRIMESIZE = atoi(argv[2]);
+ if (argc > 3 )
+ WINOMAX = atoi(argv[3]);
+ if (argc > 4 )
+ TMAXK = atoi(argv[4]);
+ else
+ TMAXK = TMAXM;
+ if (argc > 5 )
+ TMAXN = atoi(argv[5]);
+ else
+ TMAXN = TMAXM;
+
+
+ enum FFLAS::FFLAS_TRANSPOSE ta, tb;
+ size_t lda,ldb;
+ Field::Element * A;
+ Field::Element * B;
+ Field::Element * C, *Cbis, *Cter;
+
+ while (keepon){
+ srandom(_p);
+ do{
+ // max = Integer::random(2);
+ _p = random();//max % (2<<30);
+ IPD.prevprime( tmp, (_p% (1<<PRIMESIZE)) );
+ p = tmp;
+
+ }while( (p <= 2) );
+
+ Field F( (size_t) p );
+ Field::RandIter RValue( F );
+ //NonzeroRandIter<Field> RnValue( F, RValue );
+
+
+ do{
+ M = (size_t) random() % TMAXM;
+ K = (size_t) random() % TMAXK;
+ N = (size_t) random() % TMAXN;
+ Wino = random() % WINOMAX;
+ } while (!( (K>>Wino > 0) && (M>>Wino > 0) && (N>>Wino > 0) ));
+
+ if (random()%2){
+ ta = FFLAS::FflasTrans;
+ lda = M;
+ }
+ else{
+ ta = FFLAS::FflasNoTrans;
+ lda = K;
+ }
+ if (random()%2){
+ tb = FFLAS::FflasTrans;
+ ldb = K;
+ }
+ else{
+ tb = FFLAS::FflasNoTrans;
+ ldb = N;
+ }
+
+ A = FFLAS::fflas_new<Field::Element>(M*K);
+ B = FFLAS::fflas_new<Field::Element>(K*N);
+ C = FFLAS::fflas_new<Field::Element>(M*N);
+ Cbis = FFLAS::fflas_new<Field::Element>(M*N);
+ Cter = FFLAS::fflas_new<Field::Element>(M*N);
+
+ for( size_t i = 0; i < M*K; ++i )
+ RValue.random( *(A+i) );
+ for( size_t i = 0; i < K*N; ++i )
+ RValue.random( *(B+i) );
+ for( size_t i = 0; i < M*N; ++i )
+ *(Cter+i) = *(Cbis+i)= RValue.random( *(C+i) );
+
+ RValue.random( alpha );
+ RValue.random( beta );
+
+ cout <<"p = "<<(size_t)p<<" M = "<<M
+ <<" K = "<<K
+ <<" N = "<<N
+ <<" Winolevel = "<<Wino<<" "
+ <<alpha
+ <<((ta==FFLAS::FflasNoTrans)?".Ax":".A^Tx")
+ <<((tb==FFLAS::FflasNoTrans)?"B + ":"B^T + ")
+ <<beta<<".C"
+ <<"....";
+
+ tim.clear();
+ tim.start();
+ FFLAS::MMHelper<Field, FFLAS::MMHelperAlgo::Winograd, FFLAS::FieldTraits<Field>::value> WH (F,Wino,FFLAS::ParSeqHelper::Sequential());
+ FFLAS::fgemm (F, ta, tb, M, N, K, alpha, A, lda, B, ldb, beta, C, N, WH);
+ tim.stop();
+
+// for (int j = 0; j < n; ++j ){
+// FFLAS::fgemv( F, FFLAS::FflasNoTrans, m, k, alpha, A, k, B+j, n, beta, Cbis+j, n);
+// for (int i=0; i<m; ++i)
+// if ( !F.areEqual( *(Cbis+i*n+j), *(C+i*n+j) ) )
+// keepon = false;
+// }
+ Field::Element aij, bij, temp;
+ //F.div(boa, beta, alpha);
+ for (size_t i = 0; i < M; ++i )
+ for ( size_t j = 0; j < N; ++j ){
+ // F.mulin(*(Cbis+i*N+j),boa);
+ F.mulin(*(Cbis+i*N+j),beta);
+ F.assign(temp,F.zero);
+ for ( size_t l = 0; l < K ; ++l ){
+ if ( ta == FFLAS::FflasNoTrans )
+ aij = *(A+i*lda+l);
+ else
+ aij = *(A+l*lda+i);
+ if ( tb == FFLAS::FflasNoTrans )
+ bij = *(B+l*ldb+j);
+ else
+ bij = *(B+j*ldb+l);
+ F.axpyin(temp,aij,bij);
+ //F.axpyin( *(Cbis+i*N+j), aij, bij );
+ }
+ F.axpyin( *(Cbis+i*N+j), alpha, temp);
+ //F.mulin( *(Cbis+i*N+j),alpha );
+ if ( !F.areEqual( *(Cbis+i*N+j), *(C+i*N+j) ) ) {
+ cerr<<"error for i,j="<<i<<" "<<j<<" "<<*(C+i*N+j)<<" "<<*(Cbis+i*N+j)<<" diff = "<< *(C+i*N+j)-*(Cbis+i*N+j) <<endl;
+ keepon = false;
+ }
+ }
+
+ if (keepon){
+ cout<<"Passed "
+ <<(2*M*N/1000.0*K/tim.usertime()/1000.0)<<"Mfops"<<endl;
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( C);
+ FFLAS::fflas_delete( Cbis);
+ FFLAS::fflas_delete( Cter);
+ }
+ else{
+ // cerr<<"C="<<endl;
+// write_field( F, cerr, C, M, N, N );
+// cerr<<"Cbis="<<endl;
+// write_field( F, cerr, Cbis, M, N, N );
+ }
+ }
+ cout<<endl;
+ cerr<<"FAILED with p = "<<(size_t)p<<" M = "<<M<<" N = "<<N<<" K = "<<K
+ <<" Winolevel = "<<Wino
+ <<" alpha = "<<(int)alpha<<" beta = "<<(int)beta<<endl;
+
+ if (M < 100 && N < 100) {
+ cerr << "error locations (X)" << endl;
+ Field F( (size_t) p );
+ for (size_t i = 0; i < M; ++i ) {
+ for ( size_t j = 0; j < N; ++j ){
+ if ( !F.areEqual( *(Cbis+i*N+j), *(C+i*N+j) ) ) {
+ cerr<<"x" ;
+ }
+ else
+ cerr<<"." ;
+ }
+ cerr << endl;
+ }
+ cerr << endl;
+
+ }
+
+ cerr<<"A:"<<endl;
+ if ( ta ==FFLAS::FflasNoTrans ){
+ cerr<<M<<" "<<K<<" M"<<endl;
+ for (size_t i=0; i<M; ++i)
+ for (size_t j=0; j<K; ++j)
+ cerr<<i+1<<" "<<j+1<<" "<<((int) *(A+i*lda+j) )<<endl;
+ }
+ else{
+ cerr<<K<<" "<<M<<" M"<<endl;
+ for (size_t i=0; i<K; ++i)
+ for (size_t j=0; j<M; ++j)
+ cerr<<i+1<<" "<<j+1<<" "<<((int) *(A+j*lda+i) )<<endl;
+
+ }
+ cerr<<"0 0 0"<<endl<<endl;
+ cerr<<"B:"<<endl;
+ if ( tb ==FFLAS::FflasNoTrans ){
+ cerr<<K<<" "<<N<<" M"<<endl;
+ for (size_t i=0; i<K; ++i)
+ for (size_t j=0; j<N; ++j)
+ cerr<<i+1<<" "<<j+1<<" "<<((int) *(B+i*ldb+j) )<<endl;
+ }
+ else{
+ cerr<<N<<" "<<K<<" M"<<endl;
+ for (size_t i=0; i<N; ++i)
+ for (size_t j=0; j<K; ++j)
+ cerr<<i+1<<" "<<j+1<<" "<<((int) *(B+i+j*ldb) )<<endl;
+ }
+ cerr<<"0 0 0"<<endl<<endl;
+ cerr<<"C:"<<endl
+ <<M<<" "<<N<<" M"<<endl;
+ for (size_t i=0; i<M; ++i)
+ for (size_t j=0; j<N; ++j)
+ cerr<<i+1<<" "<<j+1<<" "<<((int) *(Cter+i*N+j) )<<endl;
+ cerr<<"0 0 0"<<endl;
+
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( C);
+ FFLAS::fflas_delete( Cbis);
+ FFLAS::fflas_delete( Cter);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/testeur_ftrsm.C b/tests/testeur_ftrsm.C
new file mode 100644
index 0000000..d5be1e9
--- /dev/null
+++ b/tests/testeur_ftrsm.C
@@ -0,0 +1,233 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//--------------------------------------------------------------------------
+// Sanity check for ftrsm and ftrmm
+//
+//--------------------------------------------------------------------------
+
+/*
+ * Copyright (C) 2007 FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#include <iomanip>
+#include <iostream>
+#include "fflas-ffpack/field/modular-balanced.h"
+//#include "fflas-ffpack/field/modular-int.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/fflas/fflas.h"
+#include "givaro/givintprime.h"
+
+using namespace std;
+using namespace FFPACK;
+
+//typedef Givaro::Modular<int> Field;
+//typedef Givaro::Modular<float> Field;
+typedef ModularBalanced<double> Field;
+
+int main(int argc, char** argv){
+
+
+ FFLAS::Timer tim;
+ Givaro::IntPrimeDom IPD;
+ uint64_t p;
+ size_t M, N, K ;
+ bool keepon = true;
+ Givaro::Integer _p,tmp;
+ Field::Element zero,one;
+ cerr<<setprecision(10);
+
+ size_t TMAX = 300;
+ size_t PRIMESIZE = 23;
+ if (argc > 1 )
+ TMAX = atoi(argv[1]);
+ if (argc > 2 )
+ PRIMESIZE = atoi(argv[2]);
+
+ FFLAS::FFLAS_TRANSPOSE trans;
+ FFLAS::FFLAS_SIDE side;
+ FFLAS::FFLAS_UPLO uplo;
+ FFLAS::FFLAS_DIAG diag;
+ size_t lda, ldb;
+
+ Field::Element * A, *Abis, *B,* Bbis;
+ Field::Element alpha;
+
+ while (keepon){
+ srandom(_p);
+ do{
+ // max = Integer::random(2);
+ _p = random();//max % (2<<30);
+ IPD.prevprime( tmp, (_p% (1<<PRIMESIZE)) );
+ p = tmp;
+ }while( (p <= 2) );
+
+ Field F (p);
+ F.init (zero,0.0);
+ F.init (one,1.0);
+ Field::RandIter RValue (F);
+
+ do{
+ M = (size_t) random() % TMAX;
+ N = (size_t) random() % TMAX;
+ } while ((M == 0) || (N == 0));
+
+ ldb = N;
+
+ if (random()%2)
+ trans = FFLAS::FflasNoTrans;
+ else
+ trans = FFLAS::FflasTrans;
+
+
+ if (random()%2)
+ diag = FFLAS::FflasUnit;
+ else
+ diag = FFLAS::FflasNonUnit;
+
+ if (random()%2){
+ side = FFLAS::FflasLeft;
+ K = M;
+ lda = M;
+ } else {
+ side = FFLAS::FflasRight;
+ K = N;
+ lda = N;
+ }
+
+ if (random()%2)
+ uplo = FFLAS::FflasUpper;
+ else
+ uplo = FFLAS::FflasLower;
+
+ while (F.isZero(RValue.random (alpha)));
+
+ A = FFLAS::fflas_new<Field::Element>(K*K);
+ B = FFLAS::fflas_new<Field::Element>(M*N);
+ Abis = FFLAS::fflas_new<Field::Element>(K*K);
+ Bbis = FFLAS::fflas_new<Field::Element>(M*N);
+ for (size_t i = 0; i < M; ++i)
+ for (size_t j = 0; j < N; ++j){
+ RValue.random (*(B + i*N + j));
+ *(Bbis + i*N + j) = *(B + i*N + j);
+ }
+ for (size_t i = 0; i < K; ++i)
+ for (size_t j = 0; j < K; ++j)
+ *(Abis + i*K + j) = RValue.random (*(A + i*K + j));
+ for (size_t i = 0; i < K; ++i){
+ while (F.isZero(RValue.random (*(A + i*(K+1)))));
+ *(Abis + i*(K +1)) = *(A + i*(K+1));
+ }
+
+ cout <<"p = "<<(size_t)p
+ <<" M = "<<M
+ <<" N = "<<N
+ <<((side==FFLAS::FflasLeft)?" Left ":" Right ")
+ <<((uplo==FFLAS::FflasLower)?" Lower ":" Upper ")
+ <<((trans==FFLAS::FflasTrans)?" Trans ":" NoTrans ")
+ <<((diag==FFLAS::FflasUnit)?" Unit ":" NonUnit ")
+ <<"....";
+
+
+ tim.clear();
+ tim.start();
+ FFLAS::ftrsm (F, side, uplo, trans, diag, M, N, alpha,
+ A, lda, B, ldb);
+ tim.stop();
+
+ // Verification
+ Field::Element invalpha;
+ F.inv(invalpha, alpha);
+ FFLAS::ftrmm (F, side, uplo, trans, diag, M, N, invalpha,
+ A, K, B, N);
+ for (size_t i = 0;i < M;++i)
+ for (size_t j = 0;j < N; ++j)
+ if ( !F.areEqual (*(Bbis + i*N+ j ), *(B + i*N + j))){
+ cerr<<endl
+ <<"Bbis ["<<i<<", "<<j<<"] = "<<(*(Bbis + i*N + j))
+ <<" ; B ["<<i<<", "<<j<<"] = "<<(*(B + i*N + j));
+
+ keepon = false;
+ }
+ for (size_t i = 0;i < K; ++i)
+ for (size_t j = 0;j < K; ++j)
+ if ( !F.areEqual (*(A + i*K + j), *(Abis + i*K + j))){
+ cerr<<endl
+ <<"A ["<<i<<", "<<j<<"] = "<<(*(A + i*K + j))
+ <<" ; Abis ["<<i<<", "<<j<<"] = "<<(*(Abis + i*K + j));
+ keepon = false;
+ }
+ if (keepon) {
+ cout<<" Passed "
+ <<double(M*N)/1000000.0*double(K)/tim.usertime()<<" Mfops"<<endl;
+
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( Bbis);
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( Abis);
+ } else {
+
+ cerr<<endl;
+ write_field (F, cerr<<"A = "<<endl, Abis, (int) K,(int) K,(int) K);
+ write_field (F, cerr<<"B = "<<endl, Bbis, (int) M,(int) N,(int) N);
+ }
+ }
+
+ cout<<endl;
+ cerr<<"FAILED with p = "<<(size_t)p
+ <<" M = "<<M
+ <<" N = "<<N
+ <<" alpha = "<<alpha
+ <<((side==FFLAS::FflasLeft)?" Left ":" Right ")
+ <<((uplo==FFLAS::FflasLower)?" Lower ":" Upper ")
+ <<((trans==FFLAS::FflasTrans)?" Trans ":" NoTrans ")
+ <<((diag==FFLAS::FflasUnit)?" Unit ":" NonUnit ")
+ <<endl;
+
+ cerr<<"A:"<<endl;
+ cerr<<K<<" "<<K<<" M"<<endl;
+ for (size_t i=0; i<K; ++i)
+ for (size_t j=0; j<K; ++j)
+ if ((*(Abis + i*lda + j)))
+ cerr<<i+1<<" "<<j+1<<" "
+ <<((int) *(Abis+i*lda+j) )
+ <<endl;
+ cerr<<"0 0 0"<<endl<<endl;
+
+ cerr<<"B:"<<endl;
+ cerr<<M<<" "<<N<<" M"<<endl;
+ for (size_t i=0; i<M; ++i)
+ for (size_t j=0; j<N; ++j)
+ if ((*(Bbis + i*ldb + j)))
+ cerr<<i+1<<" "<<j+1<<" "
+ <<((int) *(Bbis+i*ldb+j) )
+ <<endl;
+ cerr<<"0 0 0"<<endl<<endl;
+
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( Abis);
+ FFLAS::fflas_delete( B);
+ FFLAS::fflas_delete( Bbis);
+}
+
diff --git a/tests/testeur_lqup.C b/tests/testeur_lqup.C
new file mode 100644
index 0000000..7d66719
--- /dev/null
+++ b/tests/testeur_lqup.C
@@ -0,0 +1,326 @@
+/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+//--------------------------------------------------------------------------
+// Test for the lqup decomposition
+//
+//--------------------------------------------------------------------------
+// Clement Pernet
+//-------------------------------------------------------------------------
+
+/*
+ * Copyright (C) FFLAS-FFPACK
+ * Written by Clément Pernet
+ * This file is Free Software and part of FFLAS-FFPACK.
+ *
+ * ========LICENCE========
+ * This file is part of the library FFLAS-FFPACK.
+ *
+ * FFLAS-FFPACK is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ *.
+ */
+
+#include <iostream>
+#include <iomanip>
+using namespace std;
+//#include "fflas-ffpack/field/modular-int.h"
+//#include "fflas-ffpack/field/modular-positive.h"
+#include "fflas-ffpack/field/modular-balanced.h"
+#include "fflas-ffpack/utils/timer.h"
+#include "Matio.h"
+#include "fflas-ffpack/ffpack/ffpack.h"
+#include "givaro/givintprime.h"
+
+
+using namespace FFPACK;
+
+//typedef Givaro::Modular<double> Field;
+typedef ModularBalanced<double> Field;
+//typedef Givaro::Modular<float> Field;
+//typedef ModularBalanced<float> Field;
+//typedef Givaro::Modular<int> Field;
+//typedef GivaroZpz<int32_t> Field;
+//typedef GivaroGfq Field;
+
+int main(int argc, char** argv){
+ FFLAS::Timer tim;
+ Givaro::IntPrimeDom IPD;
+ uint64_t p;
+ size_t M, N ;
+ bool keepon = true;
+ Givaro::Integer _p,tmp;
+ Field::Element zero,one;
+ cerr<<setprecision(10);
+ size_t TMAX = 100;
+ size_t PRIMESIZE = 23;
+
+ if (argc > 1 )
+ TMAX = atoi(argv[1]);
+ if (argc > 2 )
+ PRIMESIZE = atoi(argv[2]);
+
+ FFLAS::FFLAS_TRANSPOSE ta;
+ FFLAS::FFLAS_DIAG diag;
+ size_t lda;
+
+ Field::Element * A, *Abis, *X,* U, *L;
+ size_t *P, *Q;
+ while (keepon){
+ srandom(_p);
+ do{
+ // max = Integer::random(2);
+ _p = random();//max % (2<<30);
+ IPD.prevprime( tmp, (_p% (1<<PRIMESIZE)) );
+ p = tmp;
+
+ }while( (p <= 2) );
+
+ Field F( p);
+ F.init(zero,0.0);
+ F.init(one,1.0);
+ Field::RandIter RValue( F );
+
+ do{
+ M = (size_t) random() % TMAX;
+ N = (size_t) random() % TMAX;
+ } while ((M == 0) || (N == 0));
+ lda = N;
+ if (random()%2)
+ diag = FFLAS::FflasUnit;
+ else
+ diag = FFLAS::FflasNonUnit;
+
+
+ if (random()%2){
+ ta = FFLAS::FflasTrans;
+ L = FFLAS::fflas_new<Field::Element>(M*N);
+ U = FFLAS::fflas_new<Field::Element>(N*N);
+ P = FFLAS::fflas_new<size_t>(M);
+ Q = FFLAS::fflas_new<size_t>(N);
+ for (size_t i=0; i<M; ++i) P[i] = 0;
+ for (size_t i=0; i<N; ++i) Q[i] = 0;
+ }
+ else{
+ ta = FFLAS::FflasNoTrans;
+ L = FFLAS::fflas_new<Field::Element>(M*M);
+ U = FFLAS::fflas_new<Field::Element>(M*N);
+ P = FFLAS::fflas_new<size_t>(N);
+ Q = FFLAS::fflas_new<size_t>(M);
+ for (size_t i=0; i<N; ++i) P[i] = 0;
+ for (size_t i=0; i<M; ++i) Q[i] = 0;
+ }
+
+ size_t R=0;
+ Field::Element * G = FFLAS::fflas_new<Field::Element>(M*M);
+ Field::Element * H = FFLAS::fflas_new<Field::Element>(M*N);
+ size_t t;
+ do{
+ t = (size_t) random() % 10;
+ } while ((!t)||(t==1));
+ for (size_t i=0; i<M; ++i)
+ if (!(random() % t))
+ for (size_t j=0; j < M; ++j)
+ RValue.random (*(G+i*M+j));
+ else
+ for (size_t j=0; j < M; ++j)
+ F.assign(*(G+i*M+j), zero);
+
+
+
+ for (size_t j=0; j < N; ++j)
+ if (!(random() % t))
+ for (size_t i=0; i<M; ++i)
+ RValue.random (*(H+i*N+j));
+ else
+ for (size_t i=0; i<M; ++i)
+ F.assign(*(H+i*N+j), zero);
+
+// write_field(F,cerr<<"G = "<<endl,G,M,M,M);
+// write_field(F,cerr<<"H = "<<endl,H,M,N,N);
+ A = FFLAS::fflas_new<Field::Element>(M*N);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M, N, M, one, G, M, H, N, zero, A, N);
+ FFLAS::fflas_delete( G);
+ FFLAS::fflas_delete( H);
+
+ Abis = FFLAS::fflas_new<Field::Element>(M*N);
+ for (size_t i=0; i<M*N; ++i)
+ *(Abis+i) = *(A+i);
+
+ X = FFLAS::fflas_new<Field::Element>(M*N);
+
+
+ cout <<"p = "<<(size_t)p<<" M = "<<M
+ <<" N = "<<N
+ <<((diag==FFLAS::FflasUnit)?" Unit ":" Non Unit ")
+ <<((ta==FFLAS::FflasNoTrans)?"LQUP ( A ) ":"LQUP ( A^T ) ")
+ <<"....";
+
+
+ tim.clear();
+ tim.start();
+ R = FFPACK::LUdivine (F, diag, ta, M, N, A, lda, P, Q);
+ tim.stop();
+
+
+ //write_field(F,cerr<<"Result = "<<endl,Abis,M,N,lda);
+
+ if (ta == FFLAS::FflasNoTrans){
+
+ for (size_t i=0; i<R; ++i){
+ for (size_t j=0; j<i; ++j)
+ F.assign ( *(U + i*N + j), zero);
+ for (size_t j=i+1; j<N; ++j)
+ F.assign (*(U + i*N + j), *(A+ i*N+j));
+ }
+ for (size_t i=R;i<M; ++i)
+ for (size_t j=0; j<N; ++j)
+ F.assign(*(U+i*N+j), zero);
+ for ( size_t i=0; i<M; ++i ){
+ size_t j=0;
+ for (; j< ((i<R)?i:R) ; ++j )
+ F.assign( *(L + i*M+j), *(A+i*N+j));
+ for (; j<M; ++j )
+ F.assign( *(L+i*M+j), zero);
+ }
+
+ //write_field(F,cerr<<"L = "<<endl,L,M,M,M);
+ //write_field(F,cerr<<"U = "<<endl,U,M,N,N);
+ FFPACK::applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ M,0,(int) R, L, M, Q);
+ for ( size_t i=0; i<M; ++i )
+ F.assign(*(L+i*(M+1)), one);
+
+ if (diag == FFLAS::FflasNonUnit)
+ for ( size_t i=0; i<R; ++i )
+ F.assign (*(U+i*(N+1)), *(A+i*(lda+1)));
+
+ else{
+ for (size_t i=0; i<R; ++i ){
+ *(L+Q[i]*(M+1)) = *(A+Q[i]*lda+i);
+ F.assign (*(U+i*(N+1)),one);
+ }
+ }
+
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ M,0,(int) R, U, N, P);
+ FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ N,0,(int) R, U, N, Q);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M,N,M, 1.0, L,M, U,N, 0.0, X,N);
+ //FFLAS::fflas_delete( A);
+ } else {
+
+ for (size_t i=0; i<R; ++i){
+ for (size_t j=0; j<i; ++j)
+ F.assign ( *(L + i + j*N), zero);
+ for (size_t j=i+1; j<M; ++j)
+ F.assign (*(L + i + j*N), *(A+ i+j*N));
+ }
+
+ for (size_t i=R;i<N; ++i)
+ for (size_t j=0; j<M; ++j)
+ F.assign(*(L+i+j*N), zero);
+ for ( size_t i=0; i<N; ++i ){
+ size_t j=0;
+ for (; j< ((i<R)?i:R) ; ++j )
+ F.assign( *(U + i+j*N), *(A+i+j*N));
+ for (; j<N; ++j )
+ F.assign( *(U+i+j*N), zero);
+ }
+
+ FFPACK::applyP( F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ N,0,(int) R, U, N, Q);
+ for (size_t i=0; i<N; ++i)
+ F.assign (*(U+i*(N+1)),one);
+ if (diag == FFLAS::FflasNonUnit)
+ for ( size_t i=0; i<R; ++i )
+ F.assign (*(L+i*(N+1)), *(A+i*(lda+1)));
+ else{
+ for ( size_t i=0; i<R; ++i ){
+ *(U+Q[i]*(N+1)) = *(A+Q[i]+i*N);
+ F.assign (*(L+i*(N+1)),one);
+ }
+ }
+ // write_field(F,cerr<<"L = "<<endl,L,M,N,N);
+// write_field(F,cerr<<"U = "<<endl,U,N,N,N);
+
+ FFPACK::applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
+ N,0,(int) R, L, N, P);
+ FFPACK::applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans,
+ M,0,(int) R, L, N, Q);
+ FFLAS::fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M,N,N, 1.0, L,N, U,N, 0.0, X,N);
+ }
+ for (size_t i=0; i<M; ++i)
+ for (size_t j=0; j<N; ++j)
+ if (!F.areEqual (*(Abis+i*N+j), *(X+i*N+j))){
+ cerr<<"error for i,j="<<i<<" "<<j<<" "<<*(Abis+i*N+j)<<" "<<*(X+i*N+j)<<endl;
+ keepon = false;
+ }
+
+ //write_field(F,cerr<<"X = "<<endl,X,m,n,n);
+ //write_field(F,cerr<<"B = "<<endl,B,m,n,n);
+
+ if (keepon){
+ cout<<"R = "<<R
+ <<" Passed "
+ <<(double(M*M)/1000.0*(double(N)-double(M)/3.0)/tim.usertime()/1000.0)<<"Mfops"<<endl;
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( Abis);
+ FFLAS::fflas_delete( X);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+ }
+ else{
+ cerr<<"Abis = "<<endl;
+ write_field( F, cerr, Abis, (int) M, (int) N, (int) N );
+ cerr<<"X = "<<endl;
+ write_field( F, cerr, X, (int) M, (int) N, (int) N );
+ }
+ }
+ cout<<endl;
+ cerr<<"FAILED with p = "<<(size_t)p<<" M = "<<M<<" N = "<<N
+ <<" trans = "<<ta<<" diag = "<<diag<<endl;
+
+ cerr<<"A:"<<endl;
+ cerr<<M<<" "<<N<<" M"<<endl;
+ for (size_t i=0; i<M; ++i)
+ for (size_t j=0; j<N; ++j)
+ if (*(Abis+i*lda+j))
+ cerr<<i+1<<" "<<j+1<<" "<<((int) *(Abis+i*lda+j) )<<endl;
+ cerr<<"0 0 0"<<endl<<endl;
+
+ FFLAS::fflas_delete( A);
+ FFLAS::fflas_delete( Abis);
+ FFLAS::fflas_delete( L);
+ FFLAS::fflas_delete( U);
+ FFLAS::fflas_delete( X);
+ FFLAS::fflas_delete( P);
+ FFLAS::fflas_delete( Q);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/utils/Makefile.am b/utils/Makefile.am
deleted file mode 100644
index c401de8..0000000
--- a/utils/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-
-
-pkgincludesubdir=$(pkgincludedir)/utils
-pkgincludesub_HEADERS= \
- args-parser.h \
- print-utils.h \
- debug.h \
- Matio.h
-
-EXTRA_DIST=timer.h timer.C
-
diff --git a/utils/Makefile.in b/utils/Makefile.in
deleted file mode 100644
index a6bb6dc..0000000
--- a/utils/Makefile.in
+++ /dev/null
@@ -1,549 +0,0 @@
-# Makefile.in generated by automake 1.11.5 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
- at SET_MAKE@
-
-# Copyright (c) 2011 FFLAS-FFPACK
-# written by BB <bboyer at imag.fr>
-# ========LICENCE========
-# This file is part of the library FFLAS-FFPACK.
-#
-# FFLAS-FFPACK is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-# ========LICENCE========
-
-VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = utils
-DIST_COMMON = $(pkgincludesub_HEADERS) $(srcdir)/Makefile.am \
- $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/macros/aclocal-include.m4 \
- $(top_srcdir)/macros/blas-check.m4 \
- $(top_srcdir)/macros/blasATLAS-check.m4 \
- $(top_srcdir)/macros/blasGOTO-check.m4 \
- $(top_srcdir)/macros/blasGSL-check.m4 \
- $(top_srcdir)/macros/blasOTHER-check.m4 \
- $(top_srcdir)/macros/config-header.m4 \
- $(top_srcdir)/macros/debug.m4 \
- $(top_srcdir)/macros/fflas-ffpack-doc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-misc.m4 \
- $(top_srcdir)/macros/fflas-ffpack-opt.m4 \
- $(top_srcdir)/macros/givaro-check.m4 \
- $(top_srcdir)/macros/gmp-check.m4 \
- $(top_srcdir)/macros/lapack-check.m4 \
- $(top_srcdir)/macros/libtool.m4 \
- $(top_srcdir)/macros/ltoptions.m4 \
- $(top_srcdir)/macros/ltsugar.m4 \
- $(top_srcdir)/macros/ltversion.m4 \
- $(top_srcdir)/macros/lt~obsolete.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-depcomp =
-am__depfiles_maybe =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__uninstall_files_from_dir = { \
- test -z "$$files" \
- || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
- || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
- $(am__cd) "$$dir" && rm -f $$files; }; \
- }
-am__installdirs = "$(DESTDIR)$(pkgincludesubdir)"
-HEADERS = $(pkgincludesub_HEADERS)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-BLAS_FOUND = @BLAS_FOUND@
-BLAS_LIBS = @BLAS_LIBS@
-BLAS_PATH = @BLAS_PATH@
-BLAS_VENDOR = @BLAS_VENDOR@
-CBLAS_FLAG = @CBLAS_FLAG@
-CC = @CC@
-CCNAM = @CCNAM@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DBG = @DBG@
-DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEFAULT_CFLAGS = @DEFAULT_CFLAGS@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FFLASFFPACK_DOC_PATH = @FFLASFFPACK_DOC_PATH@
-FGREP = @FGREP@
-GIVARO_CFLAGS = @GIVARO_CFLAGS@
-GIVARO_LIBS = @GIVARO_LIBS@
-GMP_CFLAGS = @GMP_CFLAGS@
-GMP_LIBS = @GMP_LIBS@
-GMP_VERSION = @GMP_VERSION@
-GREP = @GREP@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LAPACK_LIBS = @LAPACK_LIBS@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PROF = @PROF@
-RANLIB = @RANLIB@
-RM = @RM@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-TESTS_CFLAGS = @TESTS_CFLAGS@
-VERSION = @VERSION@
-WARN = @WARN@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-pkgincludesubdir = $(pkgincludedir)/utils
-pkgincludesub_HEADERS = \
- args-parser.h \
- print-utils.h \
- debug.h \
- Matio.h
-
-EXTRA_DIST = timer.h timer.C
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps utils/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps utils/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-install-pkgincludesubHEADERS: $(pkgincludesub_HEADERS)
- @$(NORMAL_INSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(pkgincludesubdir)" || exit 1; \
- fi; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; \
- done | $(am__base_list) | \
- while read files; do \
- echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludesubdir)'"; \
- $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludesubdir)" || exit $$?; \
- done
-
-uninstall-pkgincludesubHEADERS:
- @$(NORMAL_UNINSTALL)
- @list='$(pkgincludesub_HEADERS)'; test -n "$(pkgincludesubdir)" || list=; \
- files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- dir='$(DESTDIR)$(pkgincludesubdir)'; $(am__uninstall_files_from_dir)
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(HEADERS)
-installdirs:
- for dir in "$(DESTDIR)$(pkgincludesubdir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
- done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am: install-pkgincludesubHEADERS
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-pkgincludesubHEADERS
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
- clean-libtool ctags distclean distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-pkgincludesubHEADERS install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags uninstall uninstall-am uninstall-pkgincludesubHEADERS
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/utils/Matio.h b/utils/Matio.h
deleted file mode 100644
index ffd7a48..0000000
--- a/utils/Matio.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* Copyright (C) LinBox,FFLAS-FFPACK
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- */
-
-#ifndef __FFLASFFPACK_matio_H
-#define __FFLASFFPACK_matio_H
-
-#include <cstring>
-#include <stdio.h>
-#include <stdlib.h>
-#include "fflas-ffpack/fflas/fflas.h"
-
-// Reading and writing matrices over double
-
-#if 0
-// Reading a matrice from a (eventually zipped) file
-double * read_dbl(char * mat_file,int* tni,int* tnj)
-{
- char *UT, *File_Name;
- int is_gzipped = 0;
- size_t s = strlen(mat_file);
- double* X;
- if ((mat_file[--s] == 'z') &&
- (mat_file[--s] == 'g') &&
- (mat_file[--s] == '.')) {
- is_gzipped = 1;
- File_Name = "/tmp/bbXXXXXX_";
- mkstemp(File_Name);
- UT = new char[s+34+strlen(File_Name)];
- sprintf(UT,"gunzip -c %s > %s", mat_file, File_Name);
- system(UT);
- sprintf(UT,"\\rm %s", File_Name);
- } else
- File_Name = mat_file;
-
- FILE* FileDes = fopen(File_Name, "r");
- if (FileDes != NULL) {
- char * tmp = new char[200];// usigned long tni, tnj;
- fscanf(FileDes,"%d %d %s\n",tni, tnj, &tmp) ;
- int n=*tni;
- int p=*tnj;
- X = new double[n*p];
- for (int i=0;i<n*p;++i)
- X[i] = (double) 0;
- long i,j; long val;
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- while(i && j) {
- X[p*(i-1)+j-1] = (double) val;
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- }
-
- fclose(FileDes);
- if (is_gzipped) system(UT);
- return X;
-}
-
-// Displays a matrix
-std::ostream& write_dbl(std::ostream& c,
- double* E,
- int n, int m, int id)
-{
-
- for (int i = 0; i<n;++i){
- for (int j=0; j<m;++j)
- c << *(E+j+id*i) << " ";
- c << std::endl;
- }
- return c << std::endl;
-}
-#endif
-// Reading and writing matrices over field
-
-// Reading a matrice from a (eventually zipped) file
-template<class Field>
-typename Field::Element * read_field(const Field& F,char * mat_file,int* tni,int* tnj)
-{
- char *UT, *File_Name;
- int is_gzipped = 0;
- size_t s = strlen(mat_file);
- typename Field::Element zero;
- F.init(zero,0UL);
- typename Field::Element * X = NULL;
- if ((mat_file[--s] == 'z') &&
- (mat_file[--s] == 'g') &&
- (mat_file[--s] == '.')) {
- is_gzipped = 1;
- char tmp_nam[] = "/tmp/bbXXXXXX_";
- File_Name = tmp_nam;
- mkstemp(File_Name);
- UT = new char[s+34+strlen(File_Name)];
- sprintf(UT,"gunzip -c %s > %s", mat_file, File_Name);
- system(UT);
- sprintf(UT,"\\rm %s", File_Name);
- } else
- File_Name = mat_file;
- FILE* FileDes = fopen(File_Name, "r");
- if (FileDes != NULL) {
- char tmp [200];// usigned long tni, tnj;
- fscanf(FileDes,"%d %d %s\n",tni, tnj, tmp) ;
- int n=*tni;
- int p=*tnj;
- X = new typename Field::Element[n*p];
- for (int i=0;i<n*p;++i)
- X[i] = zero;
- long i,j; long val;
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- while(i && j) {
- F.init(X[p*(i-1)+j-1],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- }
-
- fclose(FileDes);
- if (is_gzipped) system(UT);
- return X;
-}
-
-template<class Field>
-void read_field4(const Field& F,char * mat_file,int* tni,int* tnj,
- typename Field::Element *& NW,typename Field::Element *& NE,
- typename Field::Element *& SW,typename Field::Element *& SE)
-{
- char *UT, *File_Name;
- int is_gzipped = 0;
- size_t s = strlen(mat_file);
- typename Field::Element zero;
- F.init(zero,0);
- typename Field::Element * X;
- if ((mat_file[--s] == 'z') &&
- (mat_file[--s] == 'g') &&
- (mat_file[--s] == '.')) {
- is_gzipped = 1;
- // XXX on fait pas ça !
- File_Name = "/tmp/bbXXXXXX_";
- mkstemp(File_Name);
- UT = new char[s+34+strlen(File_Name)];
- sprintf(UT,"gunzip -c %s > %s", mat_file, File_Name);
- system(UT);
- sprintf(UT,"\\rm %s", File_Name);
- } else
- File_Name = mat_file;
- FILE* FileDes = fopen(File_Name, "r");
- if (FileDes != NULL) {
- char * tmp = new char[200];// usigned long tni, tnj;
- fscanf(FileDes,"%d %d %s\n",tni, tnj, &tmp) ;
- int n=*tni;
- int p=*tnj;
- int no2= n>>1;
- int po2 = p>>1;
- NW = new typename Field::Element[no2*po2];
- NE = new typename Field::Element[no2*(p-po2)];
- SW = new typename Field::Element[(n-no2)*po2];
- SE = new typename Field::Element[(n-no2)*(p-po2)];
-
- for (int i=0;i<no2*po2;++i)
- NW[i] = zero;
- for (int i=0;i<no2*(p-po2);++i)
- NE[i] = zero;
- for (int i=0;i<(n-no2)*po2;++i)
- SW[i] = zero;
- for (int i=0;i<(n-no2)*(p-po2);++i)
- SE[i] = zero;
- long i,j; long val;
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- while(i && j) {
- if (i<=no2){
- if (j<=po2){
- F.init(NW[po2*(i-1)+j-1],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- else{
- F.init(NE[po2*(i-1)+j-1-po2],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- }
- else{
- if (j<=po2){
- F.init(SW[(p-po2)*(i-1-no2)+j-1],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- else{
- F.init(SE[(p-po2)*(i-1-no2)+j-1-po2],val);
- fscanf(FileDes,"%ld %ld %ld\n",&i, &j, &val) ;
- }
- }
- }
- // *A1 = NW;
- //*A2 = NE;
- //*A3 = SW;
- //*A4 = SE;
-
- }
-
- fclose(FileDes);
- if (is_gzipped) system(UT);
-}
-
-// Displays a matrix
-template<class Field>
-std::ostream& write_field(const Field& F,std::ostream& c,
- const typename Field::Element* E,
- int n, int m, int id, bool mapleFormat = false)
-{
-
- double tmp;
- if (mapleFormat) c << "Matrix(" << n <<',' << m << ", [" ;
- for (int i = 0; i<n;++i){
- if (mapleFormat) c << '[';
- for (int j=0; j<m;++j){
- F.convert(tmp,*(E+j+id*i));
- c << tmp;
- if (mapleFormat && j<m-1) c << ',';
- c << ' ';
- }
- if (mapleFormat) c << ']';
- if (mapleFormat && i<n-1) c << ',';
- if (!mapleFormat) c << std::endl;
- }
- if (mapleFormat) c << "])";
- return c ;
-}
-
-// Displays a triangular matrix
-//! @todo let the user choose to convert to a non destructive format (not double but long or Integer...)
-template<class Field>
-std::ostream& write_field(const Field& F,std::ostream& c,
- const FFLAS::FFLAS_UPLO uplo, const FFLAS::FFLAS_DIAG unit,
- const typename Field::Element* E,
- int n, int m, int id, bool mapleFormat = false)
-{
-
- double tmp;
- if (mapleFormat) c << "Matrix(" << n <<',' << m << ",[";
- for (int i = 0; i<n;++i){
- if (mapleFormat) c << '[';
- // under diag
- for (int j=0; j<i ;++j){
- if (uplo == FFLAS::FflasLower)
- F.convert(tmp,*(E+j+id*i));
- else tmp = 0 ;
- c << tmp;
- if (mapleFormat && j<m-1) c << ',';
- c << ' ';
- }
- // on diag
- if (unit == FFLAS::FflasNonUnit)
- F.convert(tmp,*(E+i+id*i));
- else
- tmp = 1.;
- c << tmp;
- if (mapleFormat && i<m-1) c << ',';
- c << ' ';
- // over diag
- for (int j=i+1; j<m;++j){
- if (uplo == FFLAS::FflasUpper)
- F.convert(tmp,*(E+j+id*i));
- else
- tmp = 0 ;
- c << tmp;
- if (mapleFormat && j<m-1) c << ',';
- c << ' ';
- }
- if (mapleFormat) c << ']';
- if (mapleFormat && i<n-1) c << ',';
- if (!mapleFormat) c << std::endl;
- }
- if (mapleFormat) c << "])";
- return c ;
-}
-
-#endif //__FFLASFFPACK_matio_H
diff --git a/utils/args-parser.h b/utils/args-parser.h
deleted file mode 100644
index 059fa4f..0000000
--- a/utils/args-parser.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* utils/args-parser.C
- * Copyright (C) 2001, 2002 Bradford Hovinen
- *
- * Written by Bradford Hovinen <hovinen at cis.udel.edu>
- * Modified by Dmitriy Morozov <linbox at foxcub.org>. May 27, 2002.
- * Modified 2011 Brice Boyer (more types,...)
- *
- * Added parametrization to the VectorCategory tags to make them fit the
- * Rootbeer meeting design of VectorCategories being parametrized by
- * VectorTraits.
- *
- * ------------------------------------
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- */
-
-#ifndef __FFLASFFPACK_args_parser_H
-#define __FFLASFFPACK_args_parser_H
-
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <cstring>
-#include <list>
-#include <stdlib.h>
-#include "fflas-ffpack/utils/print-utils.h"
-
-enum ArgumentType {
- TYPE_NONE, TYPE_INT, TYPE_INTEGER, TYPE_DOUBLE, TYPE_INTLIST, TYPE_STR
-};
-#define TYPE_BOOL TYPE_NONE
-
-#define END_OF_ARGUMENTS \
-{ '\0', "\0", "\0", TYPE_NONE, NULL }
-
-struct Argument
-{
- char c;
- const char *example ;
- const char *helpString ;
- ArgumentType type ;
- void *data ;
-};
-// example may be passed as null and will be generated intelligently
-// eg "-b {YN+-}" for bools, "-v v" for all else
-
-namespace FFLAS {
- void parseArguments (int argc, char **argv, Argument *args, bool printDefaults = true);
-}
-
-
-/** writes the values of all arguments, preceded by the programName */
-std::ostream& writeCommandString (std::ostream& os, Argument *args, char* programName);
-
-void printHelpMessage (const char *program, Argument *args, bool printDefaults = false)
-{
- int i, l;
-
- // Skip past libtool prefix in program name
- if (!strncmp (program, "lt-", strlen ("lt-")))
- program += strlen ("lt-");
-
- std::cout << "Usage: " << program << " [options] [<report file>]" << std::endl;
- std::cout << std::endl;
- std::cout << "Where [options] are the following:" << std::endl;
-
- bool messageboolean(false),messageprimality(false);
-
- for (i = 0; args[i].c != '\0'; ++i) {
- if (args[i].example != 0) {
- std::cout << " " << args[i].example;
- l = 10 - (int)strlen (args[i].example);
- do std::cout << ' '; while (--l > 0);
- }
- else if (args[i].type == TYPE_NONE) {
- std::cout << " -" << args[i].c << " {YN+-} ";
- messageboolean = true;
- }
- else
- std::cout << " -" << args[i].c << ' ' << args[i].c << " ";
-
- std::cout << args[i].helpString;
- if (strncmp(args[i].helpString,"Operate over the \"field\"",24) == 0)
- messageprimality = true;
- if (printDefaults) {
- l = 54 - (int)strlen (args[i].helpString);
- do std::cout << ' '; while (--l > 0);
- std::cout << " (default ";
- switch (args[i].type) {
- case TYPE_NONE:
- std::cout << ((*(bool *)args[i].data)?"ON":"OFF");
- break;
- case TYPE_INT:
- std::cout << *(int *) args[i].data;
- break;
- case TYPE_INTEGER:
- std::cout << *(long int *) args[i].data;
- break;
- case TYPE_DOUBLE:
- std::cout << *(double *) args[i].data;
- break;
- case TYPE_INTLIST:
- std::cout << *(std::list<int> *) args[i].data ;
- break;
- case TYPE_STR:
- std::cout << *(std::string *) args[i].data ;
- break;
- }
- std::cout << ")";
- }
- std::cout << std::endl;
- }
-
- std::cout << " -h or -? Display this message" << std::endl;
- if (messageboolean)
- std::cout << "For boolean switches, the argument may be omitted, meaning the switch should be ON" << std::endl;
- std::cout << std::endl;
- std::cout << "If <report file> is '-' the report is written to std output. If <report file> is" << std::endl;
- std::cout << "not given, then no detailed reporting is done. This is suitable if you wish only" << std::endl;
- std::cout << "to determine whether the tests succeeded." << std::endl;
- std::cout << std::endl;
- if (messageprimality)
- std::cout << "[1] N.B. This program does not verify the primality of Q, and does not use a" << std::endl
- << " field extension in the event that Q=p^n, n > 1" << std::endl;
- std::cout << std::endl;
-}
-
-/* Find an argument in the argument list for a character */
-
-Argument *findArgument (Argument *args, char c)
-{
- int i;
-
- for (i = 0; args[i].c != '\0' && args[i].c != c; ++i) ;
-
- if (args[i].c != '\0')
- return &(args[i]);
- else
- return (Argument *) 0;
-}
-
-/* Parse command line arguments */
-
-/*! @internal
- * @brief transforms a string list of ints to a list of int
- * string "12,13,15" is turned into list of ints {12,13,15}
- * @param outlist list once converted
- * @param instring list to be converted
- * @return status message.
- */
-int getListArgs(std::list<int> & outlist, std::string & instring)
-{
- int start = 0 ;
- int count = 0 ;
- size_t i = 0 ;
- for( ; i < instring.size() ; ++i) {
- if (isdigit(instring[i])) {
- ++count;
- continue ;
- }
- if (ispunct(instring[i])) {
- if (!count) {
- std::cout << std::endl << "ill formed list " << instring << std::endl;
- for (size_t sp = 0 ; sp < 16+i ; ++sp)
- std::cout << '-' ;
- std::cout << '^' << std::endl;
- return(1);
- }
- int j = atoi(instring.substr((size_t)start,(size_t)count).c_str());
- outlist.push_front(j);
- count = 0 ;
- start = int(i+1) ;
- }
- else {
- std::cout << std::endl << "ill formed list " << instring << std::endl;
- for (size_t sp = 0 ; sp < 16+i ; ++sp)
- std::cout << '-' ;
- std::cout << '^' << std::endl;
- return(1);
- }
-
- }
- std::cout << std::endl;
- if (!count) {
- std::cout << std::endl << "ill formed list " << instring << std::endl;
- for (size_t sp = 0 ; sp < 15+i ; ++sp)
- std::cout << '-' ;
- std::cout << '^' << std::endl;
- return(1);
- }
-
- int j = atoi(instring.substr((size_t)start,(size_t)count).c_str());
- outlist.push_front(j);
-
- return 0 ;
-}
-
-
-namespace FFLAS {
- void parseArguments (int argc, char **argv, Argument *args, bool printDefaults)
- {
- int i;
- Argument *current;
-
- for (i = 1; i < argc; ++i) {
- // std::cout << "i=" << i << std::endl;
- if (argv[i][0] == '-') {
- if (argv[i][1] == 0) {
- std::cout << "Writing report data to cout (intermingled with brief report)" << std::endl << std::endl;
- std::cout.flush ();
- }
- else if (argv[i][1] == 'h' || argv[i][1] == '?') {
- printHelpMessage (argv[0], args, printDefaults);
- exit (1);
- }
- else if ((current = findArgument (args, argv[i][1])) != (Argument *) 0) {
- switch (current->type) {
- case TYPE_NONE:
- {
- if (argc == i+1 || (argv[i+1][0] == '-' && argv[i+1][1] != '\0')) {
- // if at last argument, or next argument is a switch, set to true
- *(bool *) current->data = true;
- break;
- }
- *(bool *) current->data =
- (argv[i+1][0] == '+'
- || argv[i+1][0] == 'Y'
- || argv[i+1][0] == 'y'
- || argv[i+1][0] == 'T'
- || argv[i+1][0] == 't') ;
- ++i;
- }
- break;
-
- case TYPE_INT:
- {
- *(int *) current->data = atoi (argv[i+1]);
- ++i;
- }
- break;
-
- case TYPE_INTEGER:
- {
- long int tmp = atoi(argv[i+1]);
- *(long int *) current->data = tmp;
- }
- ++i;
- break;
-
- case TYPE_DOUBLE:
- {
- *(double *) current->data = atof (argv[i+1]);
- ++i;
- }
- break;
-
- case TYPE_INTLIST:
- {
- std::string lst = argv[i+1] ;
- std::list<int> LST ;
- getListArgs(LST,lst);
- *(std::list<int> *) current->data = LST ;
- ++i;
- }
- break;
-
- case TYPE_STR:
- {
- *(std::string *) current->data = argv[i+1] ;
- ++i;
- }
- break;
-
- }
- } else {
- std::cerr << "ERROR: Bad argument " << argv[i] << std::endl;
- break;
- }
- } else {
- std::cout << "Writing report data to " << argv[i] << std::endl << std::endl;
- std::cout.flush ();
- }
- }
- }
-}
-
-
-std::ostream& writeCommandString (std::ostream& os, Argument *args, char* programName)
-{
- os << programName;
- for (int i = 0; args[i].c != '\0'; ++i) {
- os << " -" << args[i].c;
- switch (args[i].type) {
- case TYPE_NONE:
- if (! (*(bool *)args[i].data)) os << " N";
- break;
- case TYPE_INT:
- os << ' ' << *(int *) args[i].data;
- break;
- case TYPE_INTEGER:
- os << ' ' << *(long int *) args[i].data;
- break;
- case TYPE_DOUBLE:
- os << ' ' << *(double *) args[i].data;
- break;
- case TYPE_INTLIST:
- os << ' ' << *(std::list<int> *) args[i].data;
- break;
- case TYPE_STR:
- os << ' ' << *(std::string *) args[i].data;
- break;
- }
- }
- return os << std::endl;
-}
-
-
-#endif // __FFLASFFPACK_args_parser_H
diff --git a/utils/debug.h b/utils/debug.h
deleted file mode 100644
index aacea0e..0000000
--- a/utils/debug.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* utils/debug.h
- *
- * Copyright (C) 2011 Fflas-ffpack
- * Modified by BB, from LinBox
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *
- */
-
-/*! @file utils/debug.h
- * @ingroup util
- * Various utilities for debugging.
- * @todo we should put vector printing elsewhere.
- */
-
-#ifndef __FFLASFFPACK_util_debug_H
-#define __FFLASFFPACK_util_debug_H
-
-#include <sstream>
-
-#include "fflas-ffpack/fflas-ffpack-configuration.h"
-
-
-#ifdef __FFLASFFPACK_HAVE_STDINT_H
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS
-#endif
-#include <stdint.h>
-
-#ifndef INT64_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
-#define INT64_MAX std::numeric_limits<int64_t>::max()
-#endif
-
-#ifndef UINT64_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
-#define UINT64_MAX std::numeric_limits<uint64_t>::max()
-#endif
-
-#ifndef INT32_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
-#define INT32_MAX std::numeric_limits<int32_t>::max()
-#endif
-
-#ifndef UINT32_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
-#define UINT32_MAX std::numeric_limits<uint32_t>::max()
-#endif
-
-#ifndef INT16_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
-#define INT16_MAX std::numeric_limits<int16_t>::max()
-#endif
-
-#ifndef UINT16_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
-#define UINT16_MAX std::numeric_limits<uint16_t>::max()
-#endif
-
-#ifndef INT8_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
-#define INT8_MAX std::numeric_limits<int8_t>::max()
-#endif
-
-#ifndef UINT8_MAX
-#warning "somebody nasty previously included <stdint.h> without __STDC_LIMIT_MACROS :)"
-#include <limits>
-#define UINT8_MAX std::numeric_limits<uint8_t>::max()
-#endif
-
-#else
-#error "you need intXX_t types"
-#endif
-
-#ifndef DEBUG
-#define FFLASFFPACK_check(check) ((void) 0)
-#else
-#define FFLASFFPACK_check(check) \
-if (!(check)) {\
-throw FFPACK::Failure (__func__, __FILE__, __LINE__, #check); /*BB : should work on non gnu compilers too */ \
-}
-#endif
-
-
-
-namespace FFPACK {
-
-
- /*! A precondtion failed.
- * @ingroup util
- * The \c throw mechanism is usually used here as in
- \code
- if (!check)
- throw(Failure(__func__,__LINE__,"this check just failed");
- \endcode
- * The parameters of the constructor help debugging.
- */
- class Failure {
- protected:
- static std::ostream *_errorStream;
-
- public:
- /*! @internal
- * A precondtion failed.
- * @param function usually \c __func__, the function that threw the error
- * @param line usually \c __LINE__, the line where it happened
- * @param check a string telling what failed.
- */
- Failure (const char *function, int line, const char *check)
- {
- if (_errorStream == (std::ostream *) 0)
- _errorStream = &std::cerr;
-
- (*_errorStream) << std::endl << std::endl;
- (*_errorStream) << "ERROR (" << function << ":" << line << "): ";
- (*_errorStream) << "Precondition not met:" << check << std::endl;
- }
-
- /*! @internal
- * A precondtion failed.
- * The parameter help debugging. This is not much different from the previous
- * except we can digg faster in the file where the exception was triggered.
- * @param function usually \c __func__, the function that threw the error
- * @param file usually \c __FILE__, the file where this function is
- * @param line usually \c __LINE__, the line where it happened
- * @param check a string telling what failed.
- */
- Failure (const char* function, const char *file, int line, const char *check)
- {
- if (_errorStream == (std::ostream *) 0)
- _errorStream = &std::cerr;
-
- (*_errorStream) << std::endl << std::endl;
- (*_errorStream) << "ERROR (at " << function << " in " << file << ':' << line << "): " << std::endl;
- (*_errorStream) << "Precondition not met:" << check << std::endl;
- }
-
- static void setErrorStream (std::ostream &stream);
-
- /*! @internal overload the virtual print of LinboxError.
- * @param o output stream
- */
- std::ostream &print (std::ostream &o) const
- {
- if (std::ostringstream * str = dynamic_cast<std::ostringstream*>(_errorStream))
- return o << str->str() ;
- else
- throw "FFLAS-FFPACK ERROR: Failure exception is not initialized correctly";
- }
- };
-
-#if 0
- /*! @internal A function is "not implemented yet(tm)".
- * where, why ?
- */
- class NotImplementedYet {
- protected:
- static std::ostream *_errorStream;
-
- public:
- /*! @internal
- * A precondtion failed.
- * The parameter help debugging. This is not much different from the previous
- * except we can digg faster in the file where the exception was triggered.
- * @param function usually \c __func__, the function that threw the error
- * @param file usually \c __FILE__, the file where this function is
- * @param line usually \c __LINE__, the line where it happened
- * @param why by default, lazy people don't provide an explanation.
- */
- NotImplementedYet() {}
-
- NotImplementedYet(const char * function,
- const char* file,
- int line,
- const char * why='\0')
- {
- if (_errorStream == (std::ostream *) 0)
- _errorStream = &std::cerr;
-
- (*_errorStream) << std::endl << std::endl;
- (*_errorStream) << "ERROR (at " << function << " in " << file << ':' << line << "): " << std::endl;
- (*_errorStream) << " This function is not implemented yet" ;
- if (why)
- (*_errorStream) << " (" << why << ")" <<std::endl;
- else
- (*_errorStream) << "." << std::endl;
-
- }
- };
-
-#endif
-
- std::ostream *Failure::_errorStream;
-} // FFPACK
-
-#endif // __FFLASFFPACK_util_debug_H
diff --git a/utils/print-utils.h b/utils/print-utils.h
deleted file mode 100644
index 5ecfbc1..0000000
--- a/utils/print-utils.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* tests/print-utils.h
- * Copyright (C) 2011, Brice Boyer <bboyer at imag.fr>
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- */
-
-#ifndef __FFLASFFPACK_print_utils_H
-#define __FFLASFFPACK_print_utils_H
-
-#include <vector>
-// #include <pair>
-#include <list>
-#include <set>
-
-namespace std
-{
-
- /*! Prints a vector on output.
- * @param o output stream
- * @param v vector
- * @warning <<(ostream&,T&) exists !
- */
- template<class T>
- std::ostream & operator<<(std::ostream&o, const std::vector<T> & v)
- {
- o << '[' ;
- if (v.size()) {
- size_t i = 0 ;
- for (; i < v.size()-1 ; ++i)
- o << v[i] << ',' ;
- o << v[i] ;
- }
- return o << ']' ;
- }
-
-
- /*! Prints a pair.
- * @param o output stream
- * @param C a pair
- * @warning <<(ostream&,T&) exists !
- */
- template<class S, class T>
- std::ostream& operator<<(std::ostream& o, const std::pair<S, T> & C)
- {
- o << '(' << C.first << ", " << C.second << ')';
- return o ;
- }
-
-
- /*! Prints a list.
- * @param o output stream
- * @param C a pair
- * @warning <<(ostream&,T&) exists !
- */
- template<class T>
- std::ostream& operator<< (std::ostream& o, const std::list<T> & L)
- {
- typename std::list<T>::const_iterator it = L.begin() ;
- o << '{' ;
- if (it != L.end() )
- while(true) {
- o << *it ;
- if (++it != L.end())
- o << ", " ;
- else
- break;
- }
- return o << '}' ;
- }
-
-
- /*! Prints a set.
- * @param o output stream
- * @param C a pair
- * @warning <<(ostream&,T&) exists !
- */
- template<class T>
- std::ostream& operator<< (std::ostream& o, const std::set<T> & L)
- {
- typename std::set<T>::const_iterator it = L.begin() ;
- o << '|' ;
- if (it != L.end() )
- while(true) {
- o << *it ;
- if (++it != L.end())
- o << ", " ;
- else
- break;
- }
- return o << '|' ;
- }
-
-
-#if 0
- std::ostream &operator << (std::ostream &out, const std::vector<bool> &S)
- {
- std::vector<bool>::const_iterator i;
-
- for (i = S.begin (); i != S.end (); ++i) {
- out << ((*i) ? "1" : "0");
- if (i != S.end () - 1)
- out << ", ";
- }
-
- return out;
- }
-
- template<class T, template <class T> class Container>
- std::ostream& operator<< (std::ostream& o, const Container<T>& C)
- {
- for(typename Container<T>::const_iterator refs = C.begin();
- refs != C.end() ;
- ++refs )
- o << (*refs) << " " ;
- return o << std::endl;
- }
-
-#endif
-}
-
-#endif // __FFLASFFPACK_print_utils_H
diff --git a/utils/timer.C b/utils/timer.C
deleted file mode 100644
index fa93ff9..0000000
--- a/utils/timer.C
+++ /dev/null
@@ -1,218 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* tests/timer.C
- * Copyright (C) 1994-1997 Givaro Team
- *
- * Written by T. Gautier
- * Imported from LinBox by Clément Pernet.
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *
- * This file implements the C++ interface to commentators (for
- * providing runtime commentary to the user)
- */
-#ifndef __FFLASFFPACK_timer_C
-#define __FFLASFFPACK_timer_C
-// Description:
-// - various timer objects
-// - to be rewritten to be more efficient
-
-#include <cmath>
-
-extern "C" {
-# include <sys/time.h>
-# include <sys/resource.h>
-// int getrusage (int, struct rusage*) ;
-}
-
-#include <iostream>
-
-#include "timer.h"
-
-// Return a value to initialize random generator
-long BaseTimer::seed()
-{
- struct timeval tp;
- gettimeofday(&tp, 0) ;
- return(tp.tv_usec);
-}
-
-// Output the value of the timer :
-std::ostream& BaseTimer::print( std::ostream& o ) const
-{ return o << _t ; }
-
-// Some arithmetic operator :
-BaseTimer& BaseTimer::operator = (const BaseTimer & T)
-{
- _t = T._t ;
- return *this ;
-}
-
-// Computes and returns interval of time
-// beteween *this and T
-const BaseTimer BaseTimer::operator - (const BaseTimer & T) const
-{
- BaseTimer Tmp ;
- Tmp._t = _t - T._t ;
- return Tmp ;
-}
-
-const BaseTimer BaseTimer::operator - ()
-{
- BaseTimer Tmp ;
- Tmp._t = -_t ;
- return Tmp ;
-}
-
-const BaseTimer BaseTimer::operator + (const BaseTimer & T) const
-{
- BaseTimer Tmp ;
- Tmp._t = _t + T._t ;
- return Tmp ;
-}
-
-// Start timer
-void RealTimer::start()
-{
- struct timeval tmp2 ;
- gettimeofday (&tmp2, 0) ;
-
- // real time
- _t = (double) tmp2.tv_sec +
- ((double) tmp2.tv_usec)/ (double)BaseTimer::MSPSEC ;
-}
-
-
-// Stop timer
-void RealTimer::stop()
-{
- struct timeval tmp2 ;
- gettimeofday (&tmp2, 0) ;
-
- // real time
- _t = (double) tmp2.tv_sec +
- ((double) tmp2.tv_usec)/ (double)BaseTimer::MSPSEC - _t ;
-}
-
-// Start timer
-void UserTimer::start()
-{
- struct rusage tmp1 ; // to getrusage (sys+user times)
- getrusage (RUSAGE_SELF, &tmp1) ;
- // user time
- _t = (double) tmp1.ru_utime.tv_sec +
- ((double) tmp1.ru_utime.tv_usec)/ (double)MSPSEC ;
-}
-
-
-// Stop timer
-void UserTimer::stop()
-{
- struct rusage tmp1 ; // to getrusage (sys+user times)
- getrusage (RUSAGE_SELF, &tmp1) ;
- // user time
- _t = (double) tmp1.ru_utime.tv_sec +
- ((double) tmp1.ru_utime.tv_usec)/ (double)MSPSEC - _t ;
-}
-
-
-// Start timer
-void SysTimer::start()
-{
- struct rusage tmp1 ; // to getrusage (sys+user times)
- getrusage (RUSAGE_SELF, &tmp1) ;
- // user time
- _t = (double) tmp1.ru_stime.tv_sec +
- ((double) tmp1.ru_stime.tv_usec)/ (double)MSPSEC ;
-}
-
-
-// Stop timer
-void SysTimer::stop()
-{
- struct rusage tmp1 ; // to getrusage (sys+user times)
- getrusage (RUSAGE_SELF, &tmp1) ;
- // user time
- _t = (double) tmp1.ru_stime.tv_sec +
- ((double) tmp1.ru_stime.tv_usec)/ (double)MSPSEC - _t ;
-}
-
-
-
-// Clear timer :
-void Timer::clear()
-{ rt.clear() ; ut.clear(); st.clear() ; }
-
-// Start timer
-void Timer::start()
-{ rt.start() ; ut.start(); st.start() ; }
-
-// Stop timer
-void Timer::stop()
-{ rt.stop() ; ut.stop(); st.stop() ; }
-
-
-std::ostream& Timer::print( std::ostream& o ) const
-{
- o << "user time: " << usertime() << '\n' ;
- o << "sys. time: " << systime() << '\n' ;
- return o << "real time: " << realtime() << std::endl ;
-}
-
-// Some arithmetic operator :
-Timer& Timer::operator = (const Timer & T)
-{
- ut = T.ut ;
- st = T.st ;
- rt = T.rt ;
- return *this ;
-}
-
-// Comput._tes and returns interval of time
-// beteween *this and T
-const Timer Timer::operator - (const Timer & T) const
-{
- Timer Tmp ;
- Tmp.ut = ut - T.ut ;
- Tmp.st = st - T.st ;
- Tmp.rt = rt - T.rt ;
- return Tmp ;
-}
-
-const Timer Timer::operator - ()
-{
- Timer Tmp ;
- Tmp.ut = -ut ;
- Tmp.st = -st ;
- Tmp.rt = -rt ;
- return Tmp ;
-}
-
-const Timer Timer::operator + (const Timer & T) const
-{
- Timer Tmp ;
- Tmp.ut = ut + T.ut ;
- Tmp.st = st + T.st ;
- Tmp.rt = rt + T.rt ;
- return Tmp ;
-}
-
-
-#endif
diff --git a/utils/timer.h b/utils/timer.h
deleted file mode 100644
index 3eaa045..0000000
--- a/utils/timer.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
-/* test/timer.h
- * Copyright (C) 1994-1997 Givaro Team
- *
- * Written by T. Gautier
- *
- * ------------------------------------
- * Modified by Bradford Hovinen <hovinen at cis.udel.edu>
- *
- * Added _start_t member to BaseTimer, so that stop () does not clobber the
- * class' memory of its start time. This allows it to be called repeatedly to
- * get elapsed times.
- * ------------------------------------
- * Modified by Clement Pernet
- * integrated into FFLAS_FFPACK
- *
- * ------------------------------------
- *
- * ========LICENCE========
- * This file is part of the library FFLAS-FFPACK.
- *
- * FFLAS-FFPACK is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- *
- * This file implements the C++ interface to commentators (for
- * providing runtime commentary to the user)
- */
-
-#ifndef __TIMER_H
-#define __TIMER_H
-
-#include <iostream>
-
-class BaseTimer {
- public:
- enum {
- MSPSEC = 1000000 // microsecond per second
- };
-
- // -- Clear timer :
- inline void clear() { _t = 0; }
-
- // -- total amount of second spent
- inline double time() const { return _t; }
-
- // -- Return a value to initialize random generator
- static long seed();
-
- // -- basic methods:
- std::ostream& print( std::ostream& ) const;
-
- // -- Some arithmetic operators to compute cumulative time :
- BaseTimer& operator = (const BaseTimer & T) ;
- const BaseTimer operator - (const BaseTimer & T) const;
- const BaseTimer operator - () ;
- const BaseTimer operator + (const BaseTimer & T) const;
- BaseTimer& operator += (const BaseTimer & T) { return *this = *this + T; };
- BaseTimer& operator -= (const BaseTimer & T) { return *this = *this - T; };
-
- public:
- double _t; // time
-};
-
-inline std::ostream &operator << (std::ostream &o, const BaseTimer &BT)
- { return BT.print(o); }
-
-class RealTimer : public BaseTimer {
- public:
- inline RealTimer (const BaseTimer &BT) : BaseTimer (BT) {};
- inline RealTimer () {};
- void start ();
- void stop ();
-};
-
-
-class UserTimer : public BaseTimer {
- public:
- inline UserTimer (const BaseTimer &BT) : BaseTimer (BT) {};
- inline UserTimer () {};
- void start ();
- void stop ();
-};
-
-
-class SysTimer : public BaseTimer {
- public:
- inline SysTimer (const BaseTimer &BT): BaseTimer (BT) {};
- inline SysTimer () {};
- void start ();
- void stop ();
-};
-
-
-class Timer {
-public :
-
- // Clear timer :
- void clear();
-
- // Start timer
- void start();
-
- // Stop timer
- void stop();
-
- // total amount of second spent in user mode
- double usertime() const { return ut.time(); }
-
- // total amount of second spent in system mode
- double systime () const { return st.time(); }
-
- // real total amount of second spent.
- double realtime () const { return rt.time(); }
-
- // retourne une petite graine
- // long seed() const { return RealTimer::seed(); }
-
- // Some arithmetic operators to compute cumulative time :
- Timer& operator = (const Timer & T) ;
- const Timer operator - (const Timer & T) const;
- const Timer operator - () ;
- const Timer operator + (const Timer & T) const;
- /* const */Timer& operator += (const Timer & T) { return *this = *this + T; };
- /* const */Timer& operator -= (const Timer & T) { return *this = *this - T; };
-
- // -- methods :
- std::ostream &print (std::ostream &) const;
-
-
-
- RealTimer rt;
- UserTimer ut;
- SysTimer st;
-};
-
-// inline std::ostream &operator << (std::ostream &o, const Timer &T)
-// { return T.print (o); }
-
-inline std::ostream &operator << (std::ostream &o, const Timer &T)
-{
- double ut = T.usertime();
- if (ut < 0.0000000001) ut = 0;
- return o << T.realtime() << "s (" << ut << " cpu) ";
-}
-
-
-#include <omp.h>
-struct OMPTimer {
- double _c;
- void start() { _c = omp_get_wtime(); }
- void stop() { _c = omp_get_wtime() - _c; }
- void clear() { _c = 0.0; }
- double realtime() const { return _c; }
- double usertime() const { return _c; }
- OMPTimer& operator =(const OMPTimer& t) { _c = t._c; return *this; }
- OMPTimer& operator+=(const OMPTimer& t) { _c += t._c; return *this; }
- OMPTimer& operator-=(const OMPTimer& t) { _c -= t._c; return *this; }
- OMPTimer operator +(const OMPTimer& t) const
- {
- OMPTimer r; r._c = _c + t._c; return r;
- }
- OMPTimer operator -(const OMPTimer& t) const
- {
- OMPTimer r; r._c = _c - t._c; return r;
- }
- OMPTimer operator -() { OMPTimer r; r._c = - _c; return r; }
-};
-//#endif
-//
-inline std::ostream &operator << (std::ostream &o, const OMPTimer &T)
-{
- return o << T.usertime() << "s" ;
-}
-
-
-
-#include "timer.C"
-
-#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/fflas-ffpack.git
More information about the debian-science-commits
mailing list