[linbox] 01/02: Imported Upstream version 1.4.2
Doug Torrance
dtorrance-guest at moszumanska.debian.org
Wed Aug 10 03:51:15 UTC 2016
This is an automated email from the git hooks/post-receive script.
dtorrance-guest pushed a commit to branch master
in repository linbox.
commit db70f946ee9677a548fb5d7a1ad472b4411e63fa
Author: Doug Torrance <dtorrance at piedmont.edu>
Date: Tue Aug 9 23:51:01 2016 -0400
Imported Upstream version 1.4.2
---
.gitignore | 2 +-
AUTHORS | 2 +
ChangeLog | 4 +
Makefile.am | 2 +-
README | 34 -
README.md | 46 +
auto-install.sh | 34 +-
benchmarks/benchmark-order-basis.C | 87 +-
benchmarks/perfpublisher.sh | 22 +-
configure.ac | 35 +-
examples/Makefile.am | 16 +-
examples/bench-fft.C | 81 +-
examples/bench-matpoly-mult.C | 29 +-
examples/bench-new-fft.C | 333 ++++++
examples/smith.C | 2 +-
examples/smithvalence.h | 4 +-
examples/solve.C | 207 +---
examples/test.sh | 8 +-
interfaces/sage/Makefile.am | 2 +-
linbox.pc.in | 6 +-
linbox/Makefile.am | 2 +-
linbox/algorithms/Makefile.am | 4 +-
linbox/algorithms/block-wiedemann.h | 7 +-
.../algorithms/classic-rational-reconstruction.h | 2 +-
linbox/algorithms/coppersmith.h | 6 +-
linbox/algorithms/matpoly-mult.h | 16 +-
linbox/algorithms/polynomial-matrix/Makefile.am | 1 +
.../matpoly-mult-fft-multiprecision.inl | 191 ++--
...tiprecision.inl => matpoly-mult-fft-recint.inl} | 613 +++++-----
.../matpoly-mult-fft-wordsize-fast.inl | 16 +-
.../matpoly-mult-fft-wordsize-three-primes.inl | 69 +-
.../matpoly-mult-fft-wordsize.inl | 19 +-
.../polynomial-matrix/matpoly-mult-fft.h | 115 +-
linbox/algorithms/polynomial-matrix/order-basis.h | 229 +++-
.../polynomial-matrix/polynomial-fft-algorithms.h | 401 +++++++
.../polynomial-matrix/polynomial-fft-butterflies.h | 492 ++++++++
.../polynomial-matrix/polynomial-fft-init.h | 299 +++++
.../polynomial-fft-transform-simd.inl | 804 +++++++-------
.../polynomial-matrix/polynomial-fft-transform.h | 211 +++-
.../polynomial-matrix/polynomial-fft-transform.inl | 28 +-
.../polynomial-matrix/polynomial-matrix-domain.h | 10 +-
.../polynomial-matrix/simd-additional-functions.h | 474 ++++++++
linbox/algorithms/polynomial-matrix/simd.h | 7 +-
linbox/algorithms/rational-reconstruction.h | 4 +-
linbox/algorithms/rational-solver.inl | 2 +-
.../algorithms/smith-form-sparseelim-poweroftwo.h | 2 +-
linbox/algorithms/vector-fraction.h | 4 +-
linbox/blackbox/apply.h | 6 +-
linbox/linbox-config.h | 11 +-
linbox/matrix/polynomial-matrix.h | 45 +-
linbox/matrix/sparsematrix/sparse-csr-matrix.h | 6 +-
linbox/randiter/givaro-poly.h | 2 +-
linbox/randiter/mersenne-twister.h | 6 +-
linbox/randiter/random-fftprime.h | 237 ++--
linbox/ring/modular/Makefile.am | 1 -
linbox/ring/modular/modular-int32.h | 271 ++---
linbox/ring/modular/modular-int64.h | 288 +++--
linbox/ring/modular/modular-unsigned.h | 1172 +++++++++++++++-----
linbox/ring/modular/modular-unsigned.inl | 828 --------------
linbox/ring/ntl/ntl-gf2e.h | 2 +-
linbox/ring/ntl/ntl-lzz_pex.h | 2 +-
linbox/ring/ntl/ntl-zz_p.h | 4 +-
linbox/ring/ntl/ntl-zz_pe.h | 6 +-
linbox/ring/pir-modular-int32.h | 2 +
linbox/ring/pir-ntl-zz_p.h | 2 +-
linbox/solutions/smith-form.h | 103 +-
linbox/util/Makefile.am | 2 +-
linbox/vector/blas-vector.h | 6 +-
macros/fflas-ffpack-check.m4 | 4 +-
tests/.gitignore | 2 +-
tests/jenkins-maker.sh | 87 ++
tests/perfpublisher.sh | 26 +-
tests/test-charpoly.C | 6 +-
tests/test-field.h | 8 +-
tests/test-order-basis.C | 38 +-
tests/test-smith-form-adaptive.C | 203 +---
tests/test-smith-form-binary.C | 211 +---
tests/test-smith-form-iliopoulos.C | 2 +
tests/test-smith-form.C | 214 +---
tests/test-smith-form.h | 166 +++
80 files changed, 5482 insertions(+), 3471 deletions(-)
diff --git a/.gitignore b/.gitignore
index 673faa8..ed889a3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,4 +79,4 @@ macros/ltsugar.m4
macros/ltversion.m4
macros/lt~obsolete.m4
stamp-h1
-linbox.pc
+linbox.pc
\ No newline at end of file
diff --git a/AUTHORS b/AUTHORS
index d259730..495d6b3 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -6,10 +6,12 @@ Mark Giesbrecht <mwg at csd.uwo.ca>
Pascal Giorgi <Pascal.Giorgi at lirmm.fr>
Bradford Hovinen <hovinen at cis.udel.edu>
Erich Kaltofen <kaltofen at math.ncsu.edu>
+Romain Lebreton
Clement Pernet <Clement.Pernet at imag.fr>
Daniel Roche <roche at cis.udel.edu>
B. David Saunders <saunders at cis.udel.edu>
Arne Storjohann <storjoha at inf.ethz.ch>
William Turner <turnerw at wabash.edu>
+Bastien Vialla
Gilles Villard <Gilles.Villard at ens-lyon.fr>
Zhendong Wan <wan at cis.udel.edu>
diff --git a/ChangeLog b/ChangeLog
index f6d60cc..c93de72 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2016-07-30 cpernet v1.4.2
+ * cleanup and new features on polynomial matrices
+ * many bug fixes ensuring support of gcc-4.8, 5.3, 6.1 clang-3.4 and
+ icpc on i386, x86_64, ubuntu osx, fedora and ppcle
2016-02-24 cpernet v1.4.1
* update the build system (add pkgconfig file, and a more consistent way
of dealing with dependencies)
diff --git a/Makefile.am b/Makefile.am
index 4d4e4d6..b2d457f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -106,7 +106,7 @@ git:
git commit -a; git pull; git push
-VERSION=1.4.1
+VERSION=1.4.2
EXTRA_DIST=auto-install.sh
#incremente-versions
diff --git a/README b/README
deleted file mode 100644
index b2b4705..0000000
--- a/README
+++ /dev/null
@@ -1,34 +0,0 @@
- ****** The Linbox Library ******
-
-PURPOSE:
-
-The Linbox library provides functionality for exact linear algebra.
-See doc/mainpage.doxy for more info.
-
-INSTALLATION:
-
-See doc/install-dev.html for installation from the git lastest version.
-See doc/install-dist.html for installation from a release tarball.
-See INSTALL for generic installation information.
-
-
-AVAILABILITY: from linalg.org and from github.com/linbox-team
-
-
-REQUIREMENTS: GMP, ATLAS (or other cblas, lapack), Givaro, fflas-ffpack
-OPTIONAL Dependencies: NTL, IML, FLINT, M4RI, M4RIE
-See doc/install*html for details.
-
-This library requires the GNU C++ compiler (gcc-4.3 or newer) or any
-compiler supporting advanced template features.
-
-
-==========================================================
-The linbox website is http://linalg.org
-
-Corrections, suggestions and comments to :
-linbox-use at googlegroups.com
-
-Last update : 2015 July
-
-
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2213a6c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,46 @@
+# The Linbox Library
+
+[](https://ci.inria.fr/linbox/job/LinBox/)
+
+## Purpose
+
+The Linbox library provides functionality for exact linear algebra.
+See doc/mainpage.doxy for more info.
+
+## Installation
+
+See doc/install-dev.html for installation from the git lastest version.
+See doc/install-dist.html for installation from a release tarball.
+See INSTALL for generic installation information.
+
+
+## Availability
+
+From github.com/linbox-team
+
+
+# Requirements
+
+- GMP
+- any BLAS (Fortran or C): e.g. ATLAS, OpenBLAS, ...
+- Givaro
+- fflas-ffpack
+
+# Optional Dependencies
+- NTL,
+- IML,
+- FLINT,
+- M4RI,
+- M4RIE
+
+See doc/install*html for details.
+
+This library requires the GNU C++ compiler (gcc-4.3 or newer) or any
+compiler supporting advanced template features.
+
+## Contact and discussions
+
+Corrections, suggestions and comments to linbox-use at googlegroups.com
+
+
+
diff --git a/auto-install.sh b/auto-install.sh
index 695c825..7241bb7 100755
--- a/auto-install.sh
+++ b/auto-install.sh
@@ -35,7 +35,7 @@ OPTIM="--enable-optimization"
OPTIM_VAR=""
CHECK_VAR=""
#options
-PREFIX_LOC="/usr/local"
+PREFIX_LOC="/tmp"
PREFIX_VAR=""
PREFIX="--prefix=$PREFIX_LOC"
BLAS=""
@@ -86,7 +86,7 @@ help() {
echo " * usage :"
echo
echo " --stable=[yes,no] : install latest stable versions or latest git versions."
- echo " Default : yes, even if switch ommitted. No argument means yes"
+ echo " Default : no, even if switch ommitted. No argument means no"
echo " --prefix=MY/PATH : install all libraries under MY/PATH."
echo " Default : /tmp/"
@@ -97,7 +97,7 @@ help() {
echo
echo " --with-gmp=GMP/PATH : tell where gmp is."
echo " Default : /usr, /usr/local. No argument is Default"
- echo " --with-blas=BLAS/PATH : same as GMP for BLAS. (will check anyway)"
+ echo " --with-blas-libs=BLAS/PATH : same as GMP for BLAS. (will check anyway)"
echo " --with-ntl=NTL/PATH : same as GMP for NTL. (default)"
echo " --with-iml=IML/PATH : same as GMP for IML. (default)"
echo " --extra-flags=\"\" : give extra compiler flags."
@@ -254,8 +254,8 @@ for i in "$@" ; do
GMP="$i"
GMP_VAR="true"
;;
- "--with-blas")
- if [ "x$BLAS_VAR" = "xtrue" ] ; then echo "GMP path already set ?" ; help ; exit -1; fi
+ "--with-blas-libs")
+ if [ "x$BLAS_VAR" = "xtrue" ] ; then echo "BLAS path already set ?" ; help ; exit -1; fi
BLAS=$QUI=\"$QUOI\"
BLAS_VAR="true"
;;
@@ -341,6 +341,10 @@ esac
done
MAKEPROG="make ${MAKEOPT}"
+export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$PREFIX_LOC/lib/pkgconfig
+echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH"
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PREFIX_LOC/lib
+echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
######################
# create build dir #
@@ -505,9 +509,6 @@ ${MAKEPROG} install | tee -a ../../auto-install.log|| die
#return in build
cd ..
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PREFIX_LOC}/lib
-
-
cool| tee -a ../auto-install.log
##########################
@@ -537,14 +538,14 @@ if [ "$STABLE_VAR" = "true" ]; then
chmod +x configure.fflas.exe
./configure.fflas.exe| tee -a ../../auto-install.log
rm -rf configure.fflas.exe
- #./configure "$PREFIX" "$DEBUG" "$OPTIM" "$BLAS" "$GIVARO" "$WARNINGS" || die
+ #./configure "$PREFIX" "$DEBUG" "$OPTIM" "$BLAS" "$WARNINGS" || die
else
echo "./autogen.sh $PREFIX $DEBUG $OPTIM $BLAS $WARNINGS"| tee -a ../../auto-install.log
echo "./autogen.sh $PREFIX $DEBUG $OPTIM $BLAS $WARNINGS" > configure.fflas.exe
chmod +x configure.fflas.exe
./configure.fflas.exe| tee -a ../../auto-install.log
rm -rf configure.fflas.exe
- #./autogen.sh "$PREFIX" "$DEBUG" "$OPTIM" "$BLAS" "$GIVARO" "$WARNINGS" || die
+ #./autogen.sh "$PREFIX" "$DEBUG" "$OPTIM" "$BLAS" "$WARNINGS" || die
fi
echo -e "${BEG}building Fflas-Ffpack..."| tee -a ../../auto-install.log
@@ -592,16 +593,13 @@ echo -e " * to ensure you don't get undefined symbols !"| tee -a ./auto-install.
echo ""| tee -a ./auto-install.log
-GIVARO="--with-givaro=$PREFIX_LOC"
-FFLAFLAS="--with-fflas-ffpack=$PREFIX_LOC"
-
if [ -x autogen.sh ] ; then
- echo "./autogen.sh $PREFIX $DEBUG $OPTIM $GMP $BLAS $NTL $GIVARO $FFLAFLAS $WARNINGS $IML $SAGE $DRIV"| tee -a ./auto-install.log
- ./autogen.sh "$PREFIX" "$DEBUG" "$OPTIM" "$GMP" "$BLAS" "$NTL" "$GIVARO" "$FFLAFLAS" "$WARNINGS" "$IML" "$SAGE" "$DRIV" | tee -a ./auto-install.log|| die
+ echo "./autogen.sh $PREFIX $DEBUG $OPTIM $GMP $BLAS $NTL $WARNINGS $IML $SAGE $DRIV"| tee -a ./auto-install.log
+ ./autogen.sh "$PREFIX" "$DEBUG" "$OPTIM" "$GMP" "$BLAS" "$NTL" "$WARNINGS" "$IML" "$SAGE" "$DRIV" | tee -a ./auto-install.log|| die
else
- echo "./configure $PREFIX $DEBUG $OPTIM $GMP $BLAS $NTL $GIVARO $FFLAFLAS $WARNINGS $IML $SAGE $DRIV"| tee -a ./auto-install.log
- # ./configure $PREFIX $DEBUG $OPTIM $GMP $BLAS $NTL $GIVARO $FFLAFLAS $WARNINGS $IML $SAGE $DRIV || die
- ./configure "$PREFIX" "$DEBUG" "$OPTIM" "$GMP" "$BLAS" "$NTL" "$GIVARO" "$FFLAFLAS" "$WARNINGS" "$IML" "$SAGE" "$DRIV" | tee -a ./auto-install.log|| die
+ echo "./configure $PREFIX $DEBUG $OPTIM $GMP $BLAS $NTL $WARNINGS $IML $SAGE $DRIV"| tee -a ./auto-install.log
+ # ./configure $PREFIX $DEBUG $OPTIM $GMP $BLAS $NTL $WARNINGS $IML $SAGE $DRIV || die
+ ./configure "$PREFIX" "$DEBUG" "$OPTIM" "$GMP" "$BLAS" "$NTL" "$WARNINGS" "$IML" "$SAGE" "$DRIV" | tee -a ./auto-install.log|| die
fi
echo -e "${BEG}building LinBox..."| tee -a ./auto-install.log
diff --git a/benchmarks/benchmark-order-basis.C b/benchmarks/benchmark-order-basis.C
index 5b68e77..e0ffa94 100755
--- a/benchmarks/benchmark-order-basis.C
+++ b/benchmarks/benchmark-order-basis.C
@@ -1,4 +1,6 @@
/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+//#define __FFLASFFPACK_SEQUENTIAL
+
#include <iostream>
#include <iomanip>
size_t getPeakRSS( );
@@ -6,7 +8,7 @@ size_t getCurrentRSS( );
//#define MEMINFO std::right<<std::setw(20)<<" ----> Max Mem: "<<getPeakRSS()/1000000.<<"Mo"
#define MB(x) ((x)/(double)(1<<20))
//#define MB(x) ((x)>>20)
-#define MEMINFO std::right<<std::setw(20)<<" ----> Mem: "<<MB(getCurrentRSS())<<" Mo (Max: "<<MB(getPeakRSS())<<" Mo)"
+#define MEMINFO std::right<<" ----> Mem: "<<MB(getCurrentRSS())<<" Mo (Max: "<<MB(getPeakRSS())<<" Mo)"
#include "linbox/matrix/polynomial-matrix.h"
#include "linbox/randiter/random-fftprime.h"
#include "linbox/randiter/random-prime.h"
@@ -262,42 +264,33 @@ void bench_sigma(const Field& F, RandIter& Gen, size_t m, size_t n, size_t d, s
//typedef typename Field::Element Element;
//typedef PolynomialMatrix<PMType::matfirst,PMStorage::plain,Field> MatrixP;
typedef PolynomialMatrix<PMType::polfirst,PMStorage::plain,Field> MatrixP;
-
+ std::cout<<"Order Basis computation over ";F.write(cout)<<endl;
integer p;
F.characteristic(p);
- size_t memp=length(p)+(p.bitsize()>=26?8:0);
- size_t data_in=3*m*n*d*memp;
- size_t data_out=2*m*m*(d+1)*memp;
- size_t data_comp= 2*m*m*d*(length(uint64_t(m*d)*p*p)+(p.bitsize()>26?8:0));
+ size_t memp=length(p)+(p.bitsize()>=64?16:0);
+ //size_t data_in=3*m*n*d*memp;
+ //size_t data_out=2*m*m*(d+1)*memp;
+ //size_t data_comp= 2*m*m*d*(length(uint64_t(m*d)*p*p)+(p.bitsize()>26?8:0));
std::cout<<"**************************"<<std::endl;
std::cout<<"mem(p) : "<<memp<<std::endl;
- std::cout<<"mem(p) : "<<sizeof(p)<<std::endl;
- std::cout<<"Projected Memory : "<< MB(data_in+data_out+data_comp)<<"Mo"<<std::endl;
+ //std::cout<<"Projected Memory : "<< MB(data_in+data_out+data_comp)<<"Mo"<<std::endl;
std::cout<<"Available memory : "<<MB(getMemorySize())<<std::endl;
std::cout<<"**************************"<<std::endl;
std::cout<<"**************************"<<std::endl<<std::endl<<std::endl;
std::cout<<"[begin ] : "<<MEMINFO<<std::endl;
- MatrixP Serie(F, m, n, d);
- std::cout<<"[initial sequence] : "<<MB(m*n*d*memp)<<"Mo"<<MEMINFO<<std::endl;
- std::cout<<"--> " <<MB(Serie.realmeminfo())<<std::endl;
- std::cout<<"--> " <<MB(Serie.meminfo())<<" "<<std::endl;
+ MatrixP *Serie = new MatrixP(F, m, n, d);
// set the Serie at random
for (size_t k=0;k<d;++k)
for (size_t i=0;i<m;++i)
for (size_t j=0;j<n;++j)
- Gen.random(Serie.ref(i,j,k));
- std::cout<<"[initial sequence] : "<<MB(m*n*d*memp)<<"Mo"<<MEMINFO<<std::endl;
-
- MatrixP Sigma2(F, m, m, d+1);
- std::cout<<"[output sigma ] : "<<MB(m*m*(d+1)*memp)<<"Mo"<<MEMINFO<<std::endl;
- std::cout<<"--> " <<MB(Sigma2.meminfo())<<std::endl;
- std::cout<<"--> " <<MB(Sigma2.realmeminfo())<<std::endl;
-
+ Gen.random(Serie->ref(i,j,k));
+ std::cout<<"[initial sequence] : "<<MB(Serie->realmeminfo())<<"Mo"<<MEMINFO<<std::endl;
+
// define the shift
vector<size_t> shift(m,0);
-
+
OrderBasis<Field> SB(F);
Timer chrono;
#ifdef BENCH_MBASIS
@@ -305,18 +298,36 @@ void bench_sigma(const Field& F, RandIter& Gen, size_t m, size_t n, size_t d, s
MatrixP Sigma1(F, m, m, d+1);
vector<size_t> shift2(m,0);
chrono.start();
- SB.M_Basis(Sigma1, Serie, d, shift2);
+ SB.M_Basis(Sigma1, *Serie, d, shift2);
chrono.stop();
std::cout << "M-Basis : " <<chrono.usertime()<<" s"<<std::endl;
}
#endif
+
+
+#ifndef LOW_MEMORY_PMBASIS
+ MatrixP Sigma2(F, m, m, d+1);
+ std::cout<<"[output sigma ] : "<<MB(Sigma2.realmeminfo())<<"Mo"<<MEMINFO<<std::endl;
chrono.clear();
chrono.start();
- SB.PM_Basis(Sigma2, Serie, d, shift);
+ SB.PM_Basis(Sigma2, *Serie, d, shift);
chrono.stop();
std::cout << "PM-Basis : " <<chrono.usertime()<<" s"<<std::endl;
chrono.clear();
+ delete Serie;
+#else
+ MatrixP* sigma_ptr;
+ chrono.clear();
+ chrono.start();
+ SB.PM_Basis_low(sigma_ptr, Serie, d, shift);
+ // Serie is deleted within PM_Basis_low
+ chrono.stop();
+ std::cout << "PM-Basis : " <<chrono.usertime()<<" s"<<std::endl;
+ chrono.clear();
+ delete sigma_ptr;
+#endif
+
// MatrixP Sigma3(F, m, m, d+1);
//vector<size_t> shift3(m,0);
// chrono.start();
@@ -328,18 +339,6 @@ void bench_sigma(const Field& F, RandIter& Gen, size_t m, size_t n, size_t d, s
}
int main(int argc, char** argv){
-
- // std::cout<<"Real memory usage: "<<MEMINFO<<std::endl;
- // const size_t N=32<<20;
- // double * T= new double[N];
- // std::cout<<"allocating :"<<((N*sizeof(double))>>20)<<"Mo"<<std::endl;
- // T[0]=1;
- // for (size_t i=1;i<N;i++)
- // T[i]=T[i-1];
- // std::cout<<"Real memory usage: "<<MEMINFO<<std::endl;
- // delete[] T;
- // std::cout<<"Real memory usage: "<<MEMINFO<<std::endl;
-
static size_t m = 64; // matrix dimension
static size_t n = 32; // matrix dimension
@@ -361,12 +360,18 @@ int main(int argc, char** argv){
parseArguments (argc, argv, args);
typedef Givaro::Modular<double> SmallField;
- typedef Givaro::Modular<Givaro::Integer> LargeField;
+ //typedef Givaro::Modular<Givaro::Integer> LargeField;
+ typedef Givaro::Modular<RecInt::ruint128,RecInt::ruint256> LargeField;
+
+ size_t logd=integer((uint64_t)d).bitsize();
- size_t logd=integer((uint64_t)d).bitsize();
std::cout<<"### matrix series is of size "<<m<<" x "<<n<<" of degree "<<d<<std::endl;
if (b < 26){
+#ifdef FFT_PROFILER
+ FFT_PROF_LEVEL=1;
+#endif
+
if (logd>b-4){
std::cout<<"degree is to large for field bitsize: "<<b<<std::endl;
exit(0);
@@ -379,11 +384,15 @@ int main(int argc, char** argv){
bench_sigma(F,G,m,n,d,target);
}
else {
+#ifdef FFT_PROFILER
+ FFT_PROF_LEVEL=2;
+#endif
+
RandomPrimeIterator Rd(b,seed);
integer p = Rd.randomPrime();
std::cout<<"# starting sigma basis computation over Fp[x] with p="<<p<<endl;;
- LargeField F(p);
- typename LargeField::RandIter G(F,0,seed);
+ LargeField F(p);
+ typename LargeField::RandIter G(F,b,seed);
bench_sigma(F,G,m,n,d,target);
diff --git a/benchmarks/perfpublisher.sh b/benchmarks/perfpublisher.sh
index 8be3168..9be8431 100755
--- a/benchmarks/perfpublisher.sh
+++ b/benchmarks/perfpublisher.sh
@@ -8,12 +8,24 @@ XMLFILE=$1
benchmarks=$2
COMPILER=$3
+# choose gdate on OS X
+if command -v "gdate" >/dev/null; then
+ DATE=gdate
+else
+ DATE=date
+fi
#=================#
# Plateform infos #
#=================#
COMPILERVERSION=$($COMPILER --version 2>&1 | head -1)
-CPUFREQ=$(lscpu | grep "MHz" | rev | cut -f1 -d' ' | rev)
+
+if command -v "lscpu" >/dev/null; then
+ CPUFREQ=$(lscpu | grep "MHz" | rev | cut -f1 -d' ' | rev)
+else
+ CPUFREQ=$((`sysctl -n hw.cpufrequency`/1000000))
+fi
+
ARCH=$(uname -m)
OSNAME=$(uname -s)
OSVERSION=$(uname -r)
@@ -45,8 +57,8 @@ echo '<report name="benchmarks-report" categ="benchmarks">' >> $XMLFILE
#=======#
echo '<start>' >> $XMLFILE
-echo '<date format="YYYYMMDD" val="'$(date +%Y%m%d)'" />' >> $XMLFILE
-echo '<time format="HHMMSS" val="'$(date +%H%M%S)'" />' >> $XMLFILE
+echo '<date format="YYYYMMDD" val="'$($DATE +%Y%m%d)'" />' >> $XMLFILE
+echo '<time format="HHMMSS" val="'$($DATE +%H%M%S)'" />' >> $XMLFILE
echo '</start>' >> $XMLFILE
#============#
@@ -59,9 +71,9 @@ do
then
#File does not exist: compile it
echo '[Compiling]' $benchmark
- COMPILESTART=$(date +%s%3N)
+ COMPILESTART=$($DATE +%s%3N)
COMPILELOG=$(make $benchmark 2>&1; echo 'Returned state: '$?)
- COMPILEEND=$(date +%s%3N)
+ COMPILEEND=$($DATE +%s%3N)
COMPILETIME=$(($COMPILEEND - $COMPILESTART))
COMPILECHECK=$(echo $COMPILELOG | grep -o '[^ ]*$')
COMPILETIMERELEVANT='true'
diff --git a/configure.ac b/configure.ac
index 12c8981..ec1764c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -21,12 +21,12 @@
AC_PREREQ([2.61])
-AC_INIT([LinBox], [1.4.1],[linbox-use at googlegroups.com],[linbox],
+AC_INIT([LinBox], [1.4.2],[linbox-use at googlegroups.com],[linbox],
[http://www.linalg.org/])
AC_CONFIG_MACRO_DIR([macros])
AC_CONFIG_AUX_DIR([build-aux])
-AM_INIT_AUTOMAKE([1.8 gnu no-dependencies -Wall -Wno-portability])
+AM_INIT_AUTOMAKE([1.8 gnu no-dependencies -Wall -Wno-portability foreign])
AC_CONFIG_HEADERS([config.h])
AX_PREFIX_CONFIG_H(linbox/config.h, __LINBOX)
AC_PATH_PROG(RM, rm, $FALSE)
@@ -79,7 +79,7 @@ AC_SUBST([DEFAULT_CFLAGS])
AC_SUBST([DEBUG_CFLAGS])
AC_SUBST([TESTS_CFLAGS])
-TESTS_CFLAGS="-O0"
+TESTS_CFLAGS="-O2"
DEBUG_CFLAGS="-g"
DEFAULT_CFLAGS=""
WARN_CFLAGS="-Wall"
@@ -207,19 +207,20 @@ LB_DRIVER
echo "-----------------------------------------------"
# Now getting GMP and Givaro from FFLAS-FFPACK - AB 2014-12-10
-
-LB_CHECK_FFLAS_FFPACK(,,[
-echo ''
-echo '*******************************************************************************'
-echo ' ERROR: Fflas-Ffpack not found!'
-echo
-echo ' Fflas-Ffpack routines are required for this library to compile. Please'
-echo ' make sure they are installed and specify its location with the option'
-echo ' --with-fflas-ffpack=<lib> when running configure.'
-echo ' Also make sure your compiler supports cxx-11...'
-echo '*******************************************************************************'
-exit 1
-])
+PKG_CHECK_MODULES([FFLAS_FFPACK], [fflas-ffpack])
+
+dnl LB_CHECK_FFLAS_FFPACK(,,[
+dnl echo ''
+dnl echo '*******************************************************************************'
+dnl echo ' ERROR: Fflas-Ffpack not found!'
+dnl echo
+dnl echo ' Fflas-Ffpack routines are required for this library to compile. Please'
+dnl echo ' make sure they are installed and specify its location with the option'
+dnl echo ' --with-fflas-ffpack=<lib> when running configure.'
+dnl echo ' Also make sure your compiler supports cxx-11...'
+dnl echo '*******************************************************************************'
+dnl exit 1
+dnl ])
LB_CHECK_LAPACK
@@ -256,7 +257,7 @@ if test ! -d ./benchmarks/data ; then
fi
DEPS_CFLAGS="${FFLAS_FFPACK_CFLAGS} ${NTL_CFLAGS} ${MPFR_CFLAGS} ${FPLLL_CFLAGS} ${IML_CFLAGS} ${FLINT_CFLAGS}"
-DEPS_LIBS="${FFLAS_FFPACK_LIBS} ${NTL_LIBS} ${MPFR_LIBS} ${FPLLL_LIBS} ${IML_LIBS} ${FLINT_LIBS} ${OCL_LIBS}"
+DEPS_LIBS=" ${NTL_LIBS} ${MPFR_LIBS} ${FPLLL_LIBS} ${IML_LIBS} ${FLINT_LIBS} ${OCL_LIBS} ${FFLAS_FFPACK_LIBS} ${XML_LIBS}"
CXXFLAGS="${CXXFLAGS} ${STDFLAG}"
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 11b459d..dc206a2 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -42,7 +42,7 @@ LDADD += $(top_builddir)/linbox/liblinbox.la
# SUBDIRS=fields solver data blackbox
-EXAMPLES=rank det minpoly valence solve dot-product echelon sparseelimdet sparseelimrank checksolve doubledet smithvalence charpoly polysmith benchfft benchmatpolymult
+EXAMPLES=rank det minpoly valence solve dot-product echelon sparseelimdet sparseelimrank checksolve doubledet smithvalence charpoly polysmith #bench-fft bench-matpoly-mult
# EXAMPLES+=nulp yabla
GIVARONTL_EXAMPLES=smith graph-charpoly
if LINBOX_HAVE_NTL
@@ -77,18 +77,22 @@ smithvalence_SOURCES = smithvalence.C
sparseelimdet_SOURCES = sparseelimdet.C
sparseelimrank_SOURCES = sparseelimrank.C
polysmith_SOURCES = poly-smith.C
-benchfft_SOURCES = bench-fft.C
-benchmatpolymult_SOURCES = bench-matpoly-mult.C
+#bench_fft_SOURCES = bench-fft.C
+#bench_matpoly_mult_SOURCES = bench-matpoly-mult.C
LINBOX=@prefix@
LINBOX_BIN=@bindir@
# for compilation of new examples
-new_examp_comp = $(CXX) -I at includedir@ $(CXXFLAGS) $(AM_CPPFLAGS) $(OPTFLAGS) ${INCLUDES} $< -o $@ -L at libdir@ -llinbox $(AM_LDFLAGS) $(LDADD) $(LIBS)
+#new_examp_comp = $(CXX) -I at includedir@ $(CXXFLAGS) $(AM_CPPFLAGS) $(OPTFLAGS) ${INCLUDES} $< -o $@ -L at libdir@ -llinbox $(AM_LDFLAGS) $(LDADD) $(LIBS)
%:%.C
- $(new_examp_comp)
+# $(LTCXXCOMPILE) -c -o $@.$(OBJEXT) $<
+ $(AM_V_CXX)$(CXXCOMPILE) -c -o $@.$(OBJEXT) $<
+ $(AM_V_CXXLD)$(CXXLINK) $@.$(OBJEXT) $(LDADD)
%:%.cpp
- $(new_examp_comp)
+# $(LTCXXCOMPILE) -c -o $@.$(OBJEXT) $<
+ $(AM_V_CXX)$(CXXCOMPILE) -c -o $@.$(OBJEXT) $<
+ $(AM_V_CXXLD)$(CXXLINK) $@.$(OBJEXT) $(LDADD)
diff --git a/examples/bench-fft.C b/examples/bench-fft.C
index 9c8583f..b06d6c7 100755
--- a/examples/bench-fft.C
+++ b/examples/bench-fft.C
@@ -62,7 +62,7 @@ struct congruent{
bool operator()(T a, T b) const { return ((uint64_t)a%(uint64_t)p) == ((uint64_t)b%(uint64_t)p);}
};
template<typename Funct, typename FFT, typename Vect>
-void DFT_sanity_check(FFT& FFTDom, Funct f, const Vect& x, const Vect& y, string msg){
+bool DFT_sanity_check(FFT& FFTDom, Funct f, const Vect& x, const Vect& y, string msg){
typedef typename FFT::Element Element ;
Vect z(x);
auto Functor = bind(f, &FFTDom, &z[0]);
@@ -72,19 +72,21 @@ void DFT_sanity_check(FFT& FFTDom, Funct f, const Vect& x, const Vect& y, string
cout<<" Checking ... "<<msg
<< (equal(y.begin(),y.end(),z.begin(),congruent<Element>(FFTDom._p))?" done":" error")<<endl;
- // if (!(equal(y.begin(),y.end(),z.begin(),congruent<Element>(FFTDom._p)))){
- // std::ostream_iterator<Element> out_it (std::cout,", ");
- // std::copy ( z.begin(), z.end(), out_it );
- // std::cout<<std::endl;
- // std::copy ( y.begin(), y.end(), out_it );
- // std::cout<<std::endl;
- // }
-
+ if (!(equal(y.begin(),y.end(),z.begin(),congruent<Element>(FFTDom._p)))){
+ std::ostream_iterator<Element> out_it (std::cout,", ");
+ std::copy ( z.begin(), z.end(), out_it );
+ std::cout<<std::endl;
+ std::copy ( y.begin(), y.end(), out_it );
+ std::cout<<std::endl;
+ return false;
+ }
+ return true;
}
template<typename Field>
-void check_DIF(const Field& fld, size_t kmax, long seed) {
+bool check_DIF(const Field& fld, size_t kmax, long seed) {
typedef typename Field::Element Element;
+ bool passed = true;
for (size_t lpts = 1; lpts < kmax ; lpts++){
size_t pts = 1 << lpts;
cout<<"********************************************************"<<endl;
@@ -106,39 +108,40 @@ void check_DIF(const Field& fld, size_t kmax, long seed) {
// compute the correct result
MulDom.FFT_DIF_Harvey_mod2p_iterative(y.data());
// check 2x2
- DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative2x2,x,y, "DIF_Harvey_mod2p_iterative2x2");
+ passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative2x2,x,y, "DIF_Harvey_mod2p_iterative2x2");
// check 3x3
- DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative3x3,x,y, "DIF_Harvey_mod2p_iterative3x3");
+ passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative3x3,x,y, "DIF_Harvey_mod2p_iterative3x3");
// check 4x1 SSE
- //DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative4x1_SSE,x,y, "DIF_Harvey_mod2p_iterative4x1_SSE");
+ //passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative4x1_SSE,x,y, "DIF_Harvey_mod2p_iterative4x1_SSE");
// check 4x2 SSE
- //DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative4x2_SSE,x,y, "DIF_Harvey_mod2p_iterative4x2_SSE");
-#ifdef __AVX2__
+ //passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative4x2_SSE,x,y, "DIF_Harvey_mod2p_iterative4x2_SSE");
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
// check 8x1 AVX
- //DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative8x1_AVX,x,y, "DIF_Harvey_mod2p_iterative8x1_AVX");
+ //passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative8x1_AVX,x,y, "DIF_Harvey_mod2p_iterative8x1_AVX");
#endif
// check Harvey SSE
- DFT_sanity_check(MulDom,&FFT_t::template FFT_DIF<Element>,x,y, "DIF_Harvey_SSE");
- cout<<"---------------------------------------------------------------"<<endl;
+ passed &= DFT_sanity_check(MulDom,&FFT_t::template FFT_DIF<Element>,x,y, "DIF_Harvey_SSE");
+// cout<<"---------------------------------------------------------------"<<endl;
+
/* CHECK DIT */
// compute the correct result
y=x;
MulDom.FFT_DIT_Harvey_mod4p_iterative2x2(y.data());
// check 2x2
- DFT_sanity_check(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative2x2,x,y, "DIT_Harvey_mod4p_iterative2x2");
+ passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative2x2,x,y, "DIT_Harvey_mod4p_iterative2x2");
// check 3x3
- DFT_sanity_check(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative3x3,x,y, "DIT_Harvey_mod4p_iterative3x3");
+ passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative3x3,x,y, "DIT_Harvey_mod4p_iterative3x3");
// check 4x1 SSE
- //DFT_sanity_check(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative4x1_SSE,x,y, "DIT_Harvey_mod4p_iterative4x1_SSE");
-#ifdef __AVX2__
+ //passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative4x1_SSE,x,y, "DIT_Harvey_mod4p_iterative4x1_SSE");
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
// check 8x1 AVX
- //DFT_sanity_check(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative8x1_AVX,x,y, "DIT_Harvey_mod4p_iterative8x1_AVX");
+ //passed &= DFT_sanity_check(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative8x1_AVX,x,y, "DIT_Harvey_mod4p_iterative8x1_AVX");
#endif
// check Harvey SSE
- DFT_sanity_check(MulDom,&FFT_t::template FFT_DIT<Element>,x,y, "DIT_Harvey_SSE");
-
- cout<<endl;
+ passed &= DFT_sanity_check(MulDom,&FFT_t::template FFT_DIT<Element>,x,y, "DIT_Harvey_SSE");
+// cout<<endl;
}
+ return passed;
}
/**************************************
@@ -179,9 +182,9 @@ void bench_DIF(const Field& fld, size_t kmax, long seed) {
typedef typename Field::Element Element;
for (size_t lpts = 5; lpts < kmax ; lpts++){
size_t pts = 1 << lpts;
- cout<<"********************************************************"<<endl;
- cout<<"*** Testing polynomials of size 2^" << lpts <<endl;
- cout<<"********************************************************"<<endl;
+ cout<<"*********************************************************"<<endl;
+ cout<<"*** Benching polynomials of size 2^" << lpts <<endl;
+ cout<<"*********************************************************"<<endl;
vector<Element> x(pts);
// Generate random inputs
@@ -190,6 +193,8 @@ void bench_DIF(const Field& fld, size_t kmax, long seed) {
FFT_transform<Field> MulDom(fld,lpts);
typedef FFT_transform<Field> FFT_t;
+ // check 1x1
+ DFT_performance(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative,lpts, x, "DIF_Harvey_mod2p_iterative");
// check 2x2
DFT_performance(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative2x2,lpts, x, "DIF_Harvey_mod2p_iterative2x2");
// check 3x3
@@ -198,20 +203,23 @@ void bench_DIF(const Field& fld, size_t kmax, long seed) {
//DFT_performance(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative4x1_SSE,lpts, x, "DIF_Harvey_mod2p_iterative4x1_SSE");
// check 4x2 SSE
//DFT_performance(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative4x2_SSE,lpts, x, "DIF_Harvey_mod2p_iterative4x2_SSE");
-#ifdef __AVX2__
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
// check 8x1 AVX
//DFT_performance(MulDom,&FFT_t::FFT_DIF_Harvey_mod2p_iterative8x1_AVX,lpts, x, "DIF_Harvey_mod2p_iterative8x1_AVX");
#endif
// check Harvey SSE
DFT_performance(MulDom,&FFT_t::template FFT_DIF<Element>,lpts, x, "DIF_Harvey_SSE");
cout<<"---------------------------------------------------------------"<<endl;
- // check 2x2
+
+ // check 1x1
+ DFT_performance(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative,lpts, x, "DIT_Harvey_mod4p_iterative");
+ // check 2x2
DFT_performance(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative2x2,lpts, x, "DIT_Harvey_mod4p_iterative2x2");
// check 3x3
DFT_performance(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative3x3,lpts, x, "DIT_Harvey_mod4p_iterative3x3");
// check 4x1 SSE
//DFT_performance(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative4x1_SSE,lpts, x, "DIT_Harvey_mod4p_iterative4x1_SSE");
-#ifdef __AVX2__
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
// check 8x1 AVX
//DFT_performance(MulDom,&FFT_t::FFT_DIT_Harvey_mod4p_iterative8x1_AVX,lpts, x, "DIT_Harvey_mod4p_iterative8x1_AVX");
#endif
@@ -237,10 +245,11 @@ int main(int argc, char** argv){
cout<<"prime : "<<p<<endl;
cout<<endl;
- //Givaro::Modular<uint32_t> F(p);
- Givaro::Modular<double> F(p);
- check_DIF(F,k,seed);
- bench_DIF(F,k,seed);
+ // No need to test on Modular<double> since the implementation will convert to uint32
+ // and use the uint32 implementation
+ Givaro::Modular<uint32_t,uint64_t> Fi(p);
+ cout << "Test : " << ((check_DIF(Fi,k,seed))?"OK":"KO!!!!") << endl;
+ bench_DIF(Fi,k,seed);
return 0;
diff --git a/examples/bench-matpoly-mult.C b/examples/bench-matpoly-mult.C
index 6779c8d..eefe235 100755
--- a/examples/bench-matpoly-mult.C
+++ b/examples/bench-matpoly-mult.C
@@ -91,9 +91,9 @@ using namespace LinBox;
template <typename Rand, typename Vect>
void randomVect (Rand& r, Vect& v) {
- size_t s = v.size();
+ size_t s = v.size();
for (size_t i = 0; i < s; ++i)
- r.random(v[i]);
+ r.random(v[i]);
}
template <typename Rand, typename Mat>
@@ -149,8 +149,12 @@ template<typename Field, typename RandIter>
void check_matpol_mul(const Field& fld, RandIter& Gen, size_t n, size_t d) {
typedef PolynomialMatrix<PMType::polfirst,PMStorage::plain,Field> MatrixP;
typedef PolynomialMatrix<PMType::matfirst,PMStorage::plain,Field> PMatrix;
- PMatrix A(fld,n,n,d),B(fld,n,n,d),C(fld,n,n,2*d-1);
- MatrixP AA(fld,n,n,d),BB(fld,n,n,d),CC(fld,n,n,2*d-1);
+
+ // product m*n n*m
+ size_t m=n;
+
+ PMatrix A(fld,m,n,d),B(fld,n,m,d),C(fld,m,m,2*d-1);
+ MatrixP AA(fld,m,n,d),BB(fld,n,m,d),CC(fld,m,m,2*d-1);
// Generate random matrix of polynomial
for (size_t i=0;i<d;i++){
randomMat(Gen,A[i]);
@@ -164,10 +168,10 @@ void check_matpol_mul(const Field& fld, RandIter& Gen, size_t n, size_t d) {
Naive NMD(fld);
Kara PMKD(fld);
FFT PMFFT(fld);
-
+
// compute the correct result
- for (size_t r=0;r<n;r++)
- for (size_t c=0;c<n;c++)
+ for (size_t r=0;r<m;r++)
+ for (size_t c=0;c<m;c++)
for (size_t k=0;k<n;k++)
for (size_t i=0;i<A.size();i++)
for (size_t j=0;j<B.size();j++)
@@ -182,6 +186,7 @@ void check_matpol_mul(const Field& fld, RandIter& Gen, size_t n, size_t d) {
AA.copy(A);
BB.copy(B);
CC.copy(C);
+
// check fft
MATPOLMUL_sanity_check(PMFFT,CC,AA,BB, "FFT Multiplication");
@@ -298,7 +303,6 @@ template<typename Field, typename RandIter>
void profile_matpol_mulfft(const Field& fld, RandIter& Gen, size_t n, size_t d) {
typedef PolynomialMatrix<PMType::polfirst,PMStorage::plain,Field> MatrixP;
MatrixP A(fld,n,n,d),B(fld,n,n,d),C(fld,n,n,2*d-1);
-
// Generate random matrix of polynomial
for (size_t i=0;i<n*n;i++){
randomVect(Gen,A(i));
@@ -415,8 +419,9 @@ void profile_matpol_mulkara(const Field& fld, RandIter& Gen, size_t n, size_t d
template<typename Field>
void runTest(const Field& F, size_t n, long b, long d, long seed, std::string test){
- //typename Field::RandIter G(F,b,seed);
- typename Field::RandIter G(F,seed);
+
+ typename Field::RandIter G(F,b,seed);
+ //typename Field::RandIter G(F,seed);
if (test == "check"|| test == "all")
check_matpol_mul(F,G,n,d);
if (test == "bench" || test == "all")
@@ -468,8 +473,8 @@ int main(int argc, char** argv){
#endif
RandomPrimeIter Rd(b,seed);
integer p= Rd.random();
- Givaro::Modular<integer> F(p);
- //Givaro::Modular<RecInt::ruint128,RecInt::ruint512> F(p);
+ Givaro::Modular<integer> F(p);
+ //Givaro::Modular<RecInt::ruint128,RecInt::ruint256> F(p);
cout<<"Computation over Fp[x] with p= "<<p<<" (Generic prime)"<<endl;
cout<<"++++++++++++++++++++++++++++++++++++"<<endl;
runTest (F,n,b,d,seed,test);
diff --git a/examples/bench-new-fft.C b/examples/bench-new-fft.C
new file mode 100755
index 0000000..af40ae8
--- /dev/null
+++ b/examples/bench-new-fft.C
@@ -0,0 +1,333 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2013 Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+#include <linbox/linbox-config.h>
+
+#include <functional>
+#include <iostream>
+#include <vector>
+
+#include <givaro/modular.h>
+#include <givaro/givranditer.h>
+
+using namespace std;
+
+#include "linbox/algorithms/polynomial-matrix/polynomial-fft-butterflies.h"
+#include "linbox/algorithms/polynomial-matrix/polynomial-fft-algorithms.h"
+
+#include "linbox/algorithms/polynomial-matrix/polynomial-fft-transform.h"
+#include "linbox/randiter/random-fftprime.h"
+#include "linbox/ring/modular.h"
+#include "fflas-ffpack/utils/align-allocator.h"
+
+using namespace LinBox;
+
+
+
+template <typename Rand, typename Vect>
+void randomVect (Rand& r, Vect& v) {
+ size_t s = v.size();
+ for (size_t i = 0; i < s; ++i)
+ r.random(v[i]);
+}
+
+
+/**********************************
+ ****** DFT CHECKING FUNCTION *****
+ *********************************/
+template<typename T>
+struct congruent{
+ T p;
+ congruent(T _p): p(_p){}
+ bool operator()(T a, T b) const { return ((uint64_t)a%(uint64_t)p) == ((uint64_t)b%(uint64_t)p);}
+};
+template<typename Funct, typename FFT, typename Vect>
+bool DFT_sanity_check(FFT& FFTDom, Funct f, const Vect& x, const Vect& y, string msg){
+ typedef typename FFT::Element Element ;
+ Vect z(x);
+ auto Functor = bind(f, &FFTDom, &z[0]);
+ Functor();
+ msg+=" ";
+ msg.resize(45,'.');
+ cout<<" Checking ... "<<msg
+ << (equal(y.begin(),y.end(),z.begin(),congruent<Element>(FFTDom._p))?" done":" error")<<endl;
+
+ if (!(equal(y.begin(),y.end(),z.begin(),congruent<Element>(FFTDom._p)))){
+// std::ostream_iterator<Element> out_it (std::cout,", ");
+// std::copy ( z.begin(), z.end(), out_it );
+// std::cout<<std::endl;
+// std::copy ( y.begin(), y.end(), out_it );
+// std::cout<<std::endl;
+ return false;
+ }
+ return true;
+}
+
+template<typename Field>
+bool check_DIF(const Field& fld, size_t kmax, long seed) {
+ typedef typename Field::Element Element;
+ bool passed = true;
+ for (size_t lpts = 1; lpts < kmax ; lpts++){
+ size_t pts = 1 << lpts;
+ cout<<"********************************************************"<<endl;
+ cout<<"*** Testing polynomials of size 2^" << lpts <<endl;
+ cout<<"********************************************************"<<endl;
+ //vector<Element> x(pts),y(pts);
+ std::vector<Element,AlignedAllocator<Element, Alignment::DEFAULT>> x(pts),y(pts);
+
+ // Generate random inputs
+ typename Field::RandIter Gen(fld);//,fld.characteristic(),seed);
+ randomVect(Gen,y);
+ x=y;
+
+// FFT_transform<Field> MulDom(fld,lpts);
+// typedef FFT_transform<Field> FFT_t;
+
+ FFT_init<Field> fft_init (fld, lpts);
+
+ FFT_algorithms<Field,NoSimd<typename Field::Element> > fft_algo_nosimd (fft_init);
+// using FFT_a = FFT_algorithms<Field,NoSimd<typename Field::Element> >;
+
+
+ /* CHECK DIF */
+ // compute the correct result
+ fft_algo_nosimd.DIF(y.data());
+
+#if defined(__FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS)
+ // check FFT_algorithms::DIF
+ if (Simd128<typename Field::Element>::vect_size == 4 || Simd128<typename Field::Element>::vect_size == 8){
+ FFT_algorithms<Field,Simd128<typename Field::Element> > fft_algo_simd128 (fft_init);
+ using FFT_a128 = FFT_algorithms<Field,Simd128<typename Field::Element> >;
+ passed &= DFT_sanity_check(fft_algo_simd128,&FFT_a128::DIF,x,y, "FFT_algorithms<Field,Simd128>::DIF");
+ }
+#endif
+
+#if defined(__FFLASFFPACK_HAVE_AVX2_INSTRUCTIONS)
+ // check FFT_algorithms::DIF
+ if (Simd256<typename Field::Element>::vect_size == 4 || Simd256<typename Field::Element>::vect_size == 8){
+ FFT_algorithms<Field,Simd256<typename Field::Element> > fft_algo_simd256 (fft_init);
+ using FFT_a256 = FFT_algorithms<Field,Simd256<typename Field::Element> >;
+ passed &= DFT_sanity_check(fft_algo_simd256,&FFT_a256::DIF,x,y, "FFT_algorithms<Field,Simd256>::DIF");
+ }
+#endif
+ cout<<"---------------------------------------------------------------"<<endl;
+
+ /* CHECK DIT */
+ // compute the correct result
+ y=x;
+ fft_algo_nosimd.DIT(y.data());
+
+#if defined(__FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS)
+ // check FFT_algorithms::DIT
+ if (Simd128<typename Field::Element>::vect_size == 4 || Simd128<typename Field::Element>::vect_size == 8){
+ FFT_algorithms<Field,Simd128<typename Field::Element> > fft_algo_simd128 (fft_init);
+ using FFT_a128 = FFT_algorithms<Field,Simd128<typename Field::Element> >;
+ passed &= DFT_sanity_check(fft_algo_simd128,&FFT_a128::DIT,x,y, "FFT_algorithms<Field,Simd128>::DIT");
+ }
+#endif
+
+#if defined(__FFLASFFPACK_HAVE_AVX2_INSTRUCTIONS)
+ // check FFT_algorithms::DIT
+ if (Simd256<typename Field::Element>::vect_size == 4 || Simd256<typename Field::Element>::vect_size == 8){
+ FFT_algorithms<Field,Simd256<typename Field::Element> > fft_algo_simd256 (fft_init);
+ using FFT_a256 = FFT_algorithms<Field,Simd256<typename Field::Element> >;
+ passed &= DFT_sanity_check(fft_algo_simd256,&FFT_a256::DIT,x,y, "FFT_algorithms<Field,Simd256>::DIT");
+ }
+#endif
+
+ cout<<endl;
+ }
+ return passed;
+}
+
+/**************************************
+ ****** DFT PERFORMANCE FUNCTION ******
+ **************************************/
+template<typename Funct, typename FFT, typename Vect>
+void DFT_performance(FFT& FFTDom, Funct f, size_t lpts, const Vect& x, string msg){
+ Vect z(x);
+ auto Functor = bind(f, &FFTDom, &z[0]);
+ Timer chrono;
+ double time;
+ double Miops;
+ size_t ct,minct=4;
+ ct = 0;
+ chrono.start();
+ while (chrono.realElapsedTime() < 1){
+ for (size_t k=0;k<minct;k++)
+ Functor();
+ ct+=minct;
+ }
+ time = chrono.userElapsedTime()/ct;
+ Miops = 17 * (lpts<<(lpts-1)) /(1e6 * time); // 3/2 n log n
+ msg+=" ";
+ msg.resize(45,'.');
+ cout << "Timings ... " << msg <<" : ";
+ cout.precision(2);
+ cout.width(10);
+ cout<<scientific<<time << " s, ";
+ cout.precision(2);
+ cout.width(10);
+ cout<<fixed<<Miops << " Miops\n";
+}
+
+
+
+template<typename Field>
+void bench_DIF(const Field& fld, size_t kmax, long seed) {
+ typedef typename Field::Element Element;
+ for (size_t lpts = 5; lpts < kmax ; lpts++){
+ uint64_t pts = 1UL << lpts;
+ cout<<"*********************************************************"<<endl;
+ cout<<"*** Benching polynomials of size 2^" << lpts <<endl;
+ cout<<"*********************************************************"<<endl;
+ vector<Element> x(pts);
+
+ // Generate random inputs
+ typename Field::RandIter Gen(fld,seed);
+ randomVect(Gen,x);
+
+ FFT_init<Field> fft_init (fld, lpts);
+
+ FFT_algorithms<Field,NoSimd<typename Field::Element> > fft_algo_nosimd (fft_init);
+ using FFT_a = FFT_algorithms<Field,NoSimd<typename Field::Element> >;
+ DFT_performance(fft_algo_nosimd,&FFT_a::DIF, lpts, x, "FFT_algorithms<Field,NoSimd>::DIF");
+
+#if defined(__FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS)
+ if (Simd128<typename Field::Element>::vect_size == 4 || Simd128<typename Field::Element>::vect_size == 8){
+ FFT_algorithms<Field,Simd128<typename Field::Element> > fft_algo_simd128 (fft_init);
+ using FFT_a128 = FFT_algorithms<Field,Simd128<typename Field::Element> >;
+ DFT_performance(fft_algo_simd128,&FFT_a128::DIF, lpts, x, "FFT_algorithms<Field,Simd128>::DIF");
+ }
+#endif
+
+#if defined(__FFLASFFPACK_HAVE_AVX2_INSTRUCTIONS)
+ if (Simd256<typename Field::Element>::vect_size == 4 || Simd256<typename Field::Element>::vect_size == 8){
+ FFT_algorithms<Field,Simd256<typename Field::Element> > fft_algo_simd256 (fft_init);
+ using FFT_a256 = FFT_algorithms<Field,Simd256<typename Field::Element> >;
+ DFT_performance(fft_algo_simd256,&FFT_a256::DIF, lpts, x, "FFT_algorithms<Field,Simd256>::DIF");
+ }
+#endif
+ cout<<"---------------------------------------------------------------"<<endl;
+
+ DFT_performance(fft_algo_nosimd,&FFT_a::DIT, lpts, x, "FFT_algorithms<Field,NoSimd>::DIT");
+
+#if defined(__FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS)
+ if (Simd128<typename Field::Element>::vect_size == 4 || Simd128<typename Field::Element>::vect_size == 8){
+ FFT_algorithms<Field,Simd128<typename Field::Element> > fft_algo_simd128 (fft_init);
+ using FFT_a128 = FFT_algorithms<Field,Simd128<typename Field::Element> >;
+ DFT_performance(fft_algo_simd128,&FFT_a128::DIT, lpts, x, "FFT_algorithms<Field,Simd128>::DIT");
+ }
+#endif
+
+#if defined(__FFLASFFPACK_HAVE_AVX2_INSTRUCTIONS)
+ if (Simd256<typename Field::Element>::vect_size == 4 || Simd256<typename Field::Element>::vect_size == 8){
+ FFT_algorithms<Field,Simd256<typename Field::Element> > fft_algo_simd256 (fft_init);
+ using FFT_a256 = FFT_algorithms<Field,Simd256<typename Field::Element> >;
+ DFT_performance(fft_algo_simd256,&FFT_a256::DIT, lpts, x, "FFT_algorithms<Field,Simd256>::DIT");
+ }
+#endif
+
+ cout<<endl;
+ }
+}
+
+
+int main(int argc, char** argv){
+ // if (argc < 2 || argc >3){
+ // cerr<<"usage : prime_bitsize , (seed)"<<endl;
+ // exit(0);
+ // }
+ uint64_t bits = 0; //atoi(argv[1]);
+ long seed=((argc>2)?atoi(argv[2]):time(NULL));
+ size_t l2n = 12;
+ size_t k = l2n;
+ RandomFFTPrime Rd;
+ uint32_t p;
+
+ //Modular<double,double>
+ bits = 22;
+ Rd = RandomFFTPrime (1<<bits,seed);
+ p = (double)Rd.randomPrime(l2n);
+
+ cout<<"prime : "<<p<<endl;
+ cout<<endl;
+
+ Givaro::Modular<double,double> Fd(p);
+// cout << "Test Modular<double,double>: " << ((check_DIF(Fd,k,seed))?"OK":"KO!!!!") << endl;
+
+#ifdef __FFLASFFPACK_HAVE_INT128
+ //Modular<int64_t,uint128_t>
+ bits = 59;
+ Rd = RandomFFTPrime (1ul<<bits,seed);
+ p = (uint64_t)Rd.randomPrime(l2n);
+
+ cout<<"prime : "<<p<<endl;
+ cout<<endl;
+
+ Givaro::Modular<uint64_t,uint128_t> Fi64(p);
+ cout << "Test Modular<int64_t,uint128_t> : " << ((check_DIF(Fi64,k,seed))?"OK":"KO!!!!") << endl;
+#endif
+
+ //Modular<uint32_t,uint64_t>
+ bits = 28;
+ Rd = RandomFFTPrime (1<<bits,seed);
+ p = (uint32_t)Rd.randomPrime(l2n);
+
+ cout<<"prime : "<<p<<endl;
+ cout<<endl;
+
+ Givaro::Modular<uint32_t,uint64_t> Fi32(p);
+ cout << "Test Modular<uint32_t,uint64_t>: " << ((check_DIF(Fi32,k,seed))?"OK":"KO!!!!") << endl;
+
+// bench_DIF(Fi32,k,seed);
+
+
+ //Modular<uint16_t,uint32_t>
+ bits = 12;
+ k = l2n = 8;
+ Rd = RandomFFTPrime (1<<bits,seed);
+ p = (uint16_t)Rd.randomPrime(l2n);
+
+ cout<<"prime : "<<p<<endl;
+ cout<<endl;
+
+ Givaro::Modular<uint16_t,uint32_t> Fi16(p);
+ cout << "Test Modular<uint16_t,uint32_t> : " << ((check_DIF(Fi16,k,seed))?"OK":"KO!!!!") << endl;
+
+
+ // Bench FFT
+
+ // cout << "Test : " << ((check_DIF(Fi16,k,seed))?"OK":"KO!!!!") << endl;
+ // cout << "Test : " << ((check_DIF(Fd,k,seed))?"OK":"KO!!!!") << endl;
+ // bench_DIF(Fi,k,seed);
+ // bench_DIF(Fd,k,seed);
+
+
+ return 0;
+}
+
+
diff --git a/examples/smith.C b/examples/smith.C
index f359bb4..c4344c6 100644
--- a/examples/smith.C
+++ b/examples/smith.C
@@ -64,7 +64,7 @@ using namespace std;
#include <linbox/util/timer.h>
#include <linbox/ring/local2_32.h>
-#include <linbox/ring/PIR-modular-int32.h>
+#include <linbox/ring/pir-modular-int32.h>
#include <linbox/algorithms/smith-form-local.h>
#include <linbox/algorithms/smith-form-iliopoulos.h>
#include <linbox/algorithms/smith-form-adaptive.h>
diff --git a/examples/smithvalence.h b/examples/smithvalence.h
index 91cdc0d..eb606ef 100644
--- a/examples/smithvalence.h
+++ b/examples/smithvalence.h
@@ -116,7 +116,7 @@ std::vector<size_t>& PRank(std::vector<size_t>& ranks, size_t& effective_exponen
if (p <= maxmod) {
typedef Givaro::Modular<int64_t> Ring;
int64_t lp(p);
- Givaro::Integer q = pow(p,e); int64_t lq(q);
+ Givaro::Integer q = pow(p,uint64_t(e)); int64_t lq(q);
if (q >Givaro::Integer(lq)) {
std::cerr << "Power rank might need extra large composite (" << p << '^' << e << ")." << std::endl;
q = p;
@@ -183,7 +183,7 @@ std::vector<size_t>& PRankPowerOfTwo(std::vector<size_t>& ranks, size_t& effecti
std::vector<size_t>& PRankInteger(std::vector<size_t>& ranks, char * filename,Givaro::Integer p, size_t e, size_t intr)
{
typedef Givaro::Modular<Givaro::Integer> Ring;
- Givaro::Integer q = pow(p,e);
+ Givaro::Integer q = pow(p,uint64_t(e));
Ring F(q);
std::ifstream input(filename);
LinBox::MatrixStream<Ring> ms( F, input );
diff --git a/examples/solve.C b/examples/solve.C
index b7d0dd8..dcff6b5 100644
--- a/examples/solve.C
+++ b/examples/solve.C
@@ -33,11 +33,9 @@
#include <iostream>
#include <givaro/modular.h>
-#include <givaro/zring.h>
#include <linbox/matrix/sparse-matrix.h>
-#include <linbox/solutions/solve.h>
-#include <linbox/util/matrix-stream.h>
-#include <linbox/solutions/methods.h>
+#include <linbox/algorithms/gauss.h>
+#include <linbox/util/timer.h>
using namespace LinBox;
using namespace std;
@@ -45,110 +43,45 @@ using namespace std;
int main (int argc, char **argv)
{
- commentator().setMaxDetailLevel (-1);
- commentator().setMaxDepth (-1);
- commentator().setReportStream (std::cerr);
-
- if (argc < 2 || argc > 4) {
- cerr << "Usage: solve <matrix-file-in-supported-format> [<dense-vector-file>] [<p>]" << endl;
+ if (argc != 4) {
+ cerr << "Usage: solve <matrix-file-in-supported-format> <output-file> <p>" << endl;
return 0;
}
srand48( BaseTimer::seed() );
std::ifstream input (argv[1]);
if (!input) { cerr << "Error opening matrix file " << argv[1] << endl; return -1; }
- std::ifstream invect;
- bool createB = false;
- int ModComp = 0;
- if (argc == 2) {
- createB = true;
- ModComp = 0;
- }
+ std::ofstream nsb;
+ nsb.open (argv[2], std::ofstream::out);
+ if (!nsb) { cerr << "Error opening nullspace output file " << argv[2] << endl; return -1; }
- if (argc == 3) {
- invect.open (argv[2], std::ifstream::in);
- if (!invect) {
- createB = true;
- ModComp = 2;
- }
- else {
- createB = false;
- ModComp = 0;
- }
- }
-
- if (argc == 4) {
- ModComp = 3;
- invect.open (argv[2], std::ifstream::in);
- if (!invect) {
- createB = true;
- }
- else
- createB = false;
- }
-
- if (ModComp) {
- cout<<"Computation is done over Z/("<<atoi(argv[ModComp])<<")"<<endl;
- typedef Givaro::Modular<double> Field;
- double q = atof(argv[ModComp]);
+ cout<<"Computation is done over Z/("<<atoi(argv[3])<<")"<<endl;
+ typedef Givaro::Modular<int64_t> Field;
+ double q = atof(argv[3]);
typedef DenseVector<Field> DenseVector ;
Field F(q);
MatrixStream< Field > ms ( F, input );
SparseMatrix<Field> A (ms); // A.write(std::cout);
cout << "A is " << A.rowdim() << " by " << A.coldim() << endl;
if (A.rowdim() <= 20 && A.coldim() <= 20) A.write(std::cerr << "A:=",Tag::FileFormat::Maple) << ';' << std::endl;
- DenseVector X(F, A.coldim()),B(F, A.rowdim());
- if (createB) {
- cerr << "Creating a random {-1,1} vector U, B is AU (to have a consistent system)" << endl;
- DenseVector U(F, A.coldim() );
- for(DenseVector::iterator it=U.begin();
- it != U.end(); ++it)
- if (drand48() <0.5)
- F.assign(*it,F.mOne);
- else
- F.assign(*it,F.one);
- A.apply(B,U);
- }
- else {
- for(DenseVector::iterator it=B.begin();
- it != B.end(); ++it)
- F.read(invect,*it);
- }
-
- // A.write(std::cout << "A: ") << std::endl;
-
- std::cout << "B is " << B << std::endl;
-
+ DenseMatrix<Field> N(F, A.rowdim(), 15);
Timer chrono;
// Sparse Elimination
- std::cout << "Sparse Elimination" << std::endl;
chrono.clear();
chrono.start();
- solve (X, A, B, Method::SparseElimination());
- chrono.stop();
-
- std::cout << "(Sparse Gauss) Solution is [";
- for(DenseVector::const_iterator it=X.begin();it != X.end(); ++it)
- F.write(cout, *it) << " ";
- std::cout << "]" << std::endl;
- std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl<<std::endl;;
+ GaussDomain<Field> GD ( A.field() );
+ GD.nullspacebasisin(N, A);
- // BlasElimination
- std::cout << "BlasElimination" << std::endl;
- chrono.start();
- solve (X, A, B, Method::BlasElimination());
chrono.stop();
- std::cout << "(BlasElimination) Solution is [";
- for(DenseVector::const_iterator it=X.begin();it != X.end(); ++it)
- F.write(cout, *it) << " ";
- std::cout << "]" << std::endl;
- std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl<< std::endl;
+ N.write(nsb) << std::endl;
+ std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl<<std::endl;;
+#if 0
// Wiedemann
std::cout << "Blackbox" << std::endl;
chrono.clear();
@@ -161,6 +94,7 @@ int main (int argc, char **argv)
F.write(cout, *it) << " ";
std::cout << "]" << std::endl;
std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl<<std::endl;;
+#endif
#if 0
// Lanczos
std::cout << "Lanczos" << std::endl;
@@ -192,113 +126,6 @@ int main (int argc, char **argv)
std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl<< std::endl;
#endif
- }
- else {
- cout<<"Computation is done over Q"<<endl;
- Givaro::ZRing<Integer> ZZ;
- typedef DenseVector<Givaro::ZRing<Integer> > DenseVector ;
- MatrixStream< Givaro::ZRing<Integer> > ms( ZZ, input );
- SparseMatrix<Givaro::ZRing<Integer> > A (ms);
- Givaro::ZRing<Integer>::Element d;
- std::cout << "A is " << A.rowdim() << " by " << A.coldim() << std::endl;
- if (A.rowdim() <= 20 && A.coldim() <= 20) A.write(std::cerr << "A:=",Tag::FileFormat::Maple) << ';' << std::endl;
- DenseVector X(ZZ, A.coldim()),B(ZZ, A.rowdim());
-
- if (createB) {
- cerr << "Creating a random {-1,1} vector U, B is AU" << endl;
- DenseVector U(ZZ, A.coldim() );
- for(DenseVector::iterator it=U.begin();
- it != U.end(); ++it)
- if (drand48() <0.5)
- *it = -1;
- else
- *it = 1;
- A.apply(B,U);
- }
- else {
- for(DenseVector::iterator it=B.begin();
- it != B.end(); ++it)
- invect >> *it;
- }
-
-
- std::cout << "B is " << B << std::endl;
-
-
- Timer chrono;
-
- // BlasElimination
- std::cout << "BlasElimination" << std::endl;
- chrono.start();
- solve (X, d, A, B, Method::BlasElimination());
- chrono.stop();
-
- std::cout << "(BlasElimination) Solution is [";
- for(DenseVector::const_iterator it=X.begin();it != X.end(); ++it)
- ZZ.write(cout, *it) << " ";
- std::cout << "] / ";
- ZZ.write(std::cout, d)<< std::endl;
- std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl;
-
- // Sparse Elimination
- std::cout << "Sparse Elimination" << std::endl;
- chrono.start();
- solve (X, d, A, B, Method::SparseElimination());
- chrono.stop();
-
- std::cout << "(SparseElimination) Solution is [";
- for(DenseVector::const_iterator it=X.begin();it != X.end(); ++it)
- ZZ.write(cout, *it) << " ";
- std::cout << "] / ";
- ZZ.write(std::cout, d)<< std::endl;
- std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl;
-
- // Wiedemann
- std::cout << "Wiedemann" << std::endl;
- chrono.start();
- solve (X, d, A, B, Method::Wiedemann());
- chrono.stop();
-
- std::cout << "(Wiedemann) Solution is [";
- for(DenseVector::const_iterator it=X.begin();it != X.end(); ++it)
- ZZ.write(cout, *it) << " ";
- std::cout << "] / ";
- ZZ.write(std::cout, d) << std::endl;
- std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl;
-
-
-
-#if 0
- // Lanczos
- std::cout << "Lanczos" << std::endl;
- chrono.start();
- solve (X, d, A, B, Method::Lanczos());
- chrono.stop();
-
- std::cout << "(Lanczos) Solution is [";
- for(DenseVector::const_iterator it=X.begin();it != X.end(); ++it)
- ZZ.write(cout, *it) << " ";
- std::cout << "] / ";
- ZZ.write(std::cout, d) << std::endl;
- std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl;
-
-
- // Block Lanczos
- std::cout << "Block Lanczos" << std::endl;
- chrono.clear();
- chrono.start();
- solve (X, d, A, B, Method::BlockLanczos());
- chrono.stop();
-
- std::cout << "(Block Lanczos) Solution is [";
- for(DenseVector::const_iterator it=X.begin();it != X.end(); ++it)
- ZZ.write(cout, *it) << " ";
- std::cout << "] / ";
- ZZ.write(std::cout, d) << std::endl;
- std::cout << "CPU time (seconds): " << chrono.usertime() << std::endl;
-#endif
- }
-
return 0;
}
diff --git a/examples/test.sh b/examples/test.sh
index 5ec2367..18fc03d 100755
--- a/examples/test.sh
+++ b/examples/test.sh
@@ -2,6 +2,10 @@
# written by Brice Boyer (briceboyer) <boyer.brice at gmail.com>
# part of LinBox, see COPYING
+SED="sed"
+case "`uname`" in
+ Darwin*) SED="gsed" ;;
+esac
set -o nounset # Treat unset variables as an error
@@ -19,12 +23,12 @@ pass="true"
echo -n "check rank ... "
rank_cmd="Rank\sis\s"
./rank data/test.matrix 7 > linbox-tmp.data
-result=`cat linbox-tmp.data | grep ${rank_cmd} | sed 's/'"$rank_cmd"'\([0-9]*\).*/\1/'`
+result=`cat linbox-tmp.data | grep ${rank_cmd} | $SED 's/'"$rank_cmd"'\([0-9]*\).*/\1/'`
[ "$result" -eq "9" ] && success || { fail ; pass="false" ; }
echo -n "check rank ... "
./rank data/test.matrix > linbox-tmp.data
-result=`cat linbox-tmp.data | grep ${rank_cmd} | sed 's/'"$rank_cmd"'\([0-9]*\).*/\1/'`
+result=`cat linbox-tmp.data | grep ${rank_cmd} | $SED 's/'"$rank_cmd"'\([0-9]*\).*/\1/'`
[ "$result" -eq "10" ] && success || { fail ; pass="false" ; }
diff --git a/interfaces/sage/Makefile.am b/interfaces/sage/Makefile.am
index a3c6f47..c8a7bf4 100644
--- a/interfaces/sage/Makefile.am
+++ b/interfaces/sage/Makefile.am
@@ -23,7 +23,7 @@ if LINBOX_HAVE_SAGE
#AM_CPPFLAGS=-I$(top_srcdir) -I. -I../../linbox
-AM_CPPFLAGS = -DDISABLE_COMMENTATOR -I$(top_srcdir)/linbox $(DEPS_CFLAGS)
+AM_CPPFLAGS = -DDISABLE_COMMENTATOR -I$(top_srcdir)/linbox $(DEPS_CFLAGS) $(DEFAULT_CFLAGS)
LDADD = $(DEPS_LIBS) $(LDFLAGS)
#AM_CXXFLAGS = @DEFAULT_CFLAGS@ -DDISABLE_COMMENTATOR $(NTL_CFLAGS) $(OPTFLAGS) $(PARFLAGS)
diff --git a/linbox.pc.in b/linbox.pc.in
index 6a95e29..b37f9c2 100644
--- a/linbox.pc.in
+++ b/linbox.pc.in
@@ -1,14 +1,14 @@
/------------------ linbox.pc ------------------------
prefix=@prefix@
-exec_prefix=@prefix@/bin
+exec_prefix=@prefix@
libdir=@prefix@/lib
includedir=@prefix@/include
Name: linbox
Description: Exact Linear Algebra library
-URL: http://linbox-team.github.io/linbox/
+URL: http://github.com/linbox-team/linbox
Version: @VERSION@
-Requires: fflas-ffpack >= 2.2.0
+Requires: fflas-ffpack >= 2.2.2
Libs: -L${libdir} -llinbox @LINBOXSAGE_LIBS@ @NTL_LIBS@ @MPFR_LIBS@ @FPLLL_LIBS@ @IML_LIBS@ @FLINT_LIBS@ @OCL_LIBS@
Cflags: @DEFAULT_CFLAGS@ -DDISABLE_COMMENTATOR -I${includedir}/linbox @NTL_CFLAGS@ @MPFR_CFLAGS@ @FPLLL_CFLAGS@ @IML_CFLAGS@ @FLINT_CFLAGS@
\-------------------------------------------------------
diff --git a/linbox/Makefile.am b/linbox/Makefile.am
index b8c3d59..7232401 100644
--- a/linbox/Makefile.am
+++ b/linbox/Makefile.am
@@ -19,7 +19,7 @@
# ========LICENCE========
#/
-AM_CPPFLAGS= -I$(top_srcdir)/linbox $(DEPS_CFLAGS)
+AM_CPPFLAGS= -I$(top_srcdir)/linbox $(DEPS_CFLAGS) $(DEFAULT_CFLAGS)
LDADD = $(DEPS_LIBS) $(LDFLAGS)
#AM_CPPFLAGS = -I at includedir@ -I$(top_srcdir)/linbox -I$(top_srcdir) $(DEFCPPFLAGS) $(OPTFLAGS) $(FFLAS_FFPACK_CFLAGS) $(NTL_CFLAGS) $(MPFR_CFLAGS) $(FPLLL_CFLAGS) $(IML_CFLAGS) $(FLINT_CFLAGS) $(PARFLAGS)
diff --git a/linbox/algorithms/Makefile.am b/linbox/algorithms/Makefile.am
index 6a7b999..e7b343e 100644
--- a/linbox/algorithms/Makefile.am
+++ b/linbox/algorithms/Makefile.am
@@ -30,7 +30,7 @@ libalgorithms_la_SOURCES= diophantine-solver.C
# AM_CPPFLAGS= $(CBLAS_FLAG) $(GMP_CFLAGS) $(NTL_CFLAGS)
-AM_CPPFLAGS = -I$(top_srcdir)/linbox $(DEPS_CFLAGS)
+AM_CPPFLAGS= -I$(top_srcdir)/linbox $(DEPS_CFLAGS) $(DEFAULT_CFLAGS)
LDADD = $(DEPS_LIBS) $(LDFLAGS)
#AM_CXXFLAGS = @DEFAULT_CFLAGS@ -DDISABLE_COMMENTATOR $(NTL_CFLAGS) $(ATLAS_CFLAGS) $(FPLLL_CFLAGS) $(OCL_CFLAGS) $(OMPFLAGS)
@@ -148,6 +148,8 @@ pkgincludesub_HEADERS = \
short-vector.h \
rns.h \
rns.inl \
+ invariant-factors.h \
+ smith-form-iliopoulos2.h \
$(USE_OCL_HDRS)
# iml.h \
diff --git a/linbox/algorithms/block-wiedemann.h b/linbox/algorithms/block-wiedemann.h
index e19a073..384bcf6 100644
--- a/linbox/algorithms/block-wiedemann.h
+++ b/linbox/algorithms/block-wiedemann.h
@@ -79,12 +79,13 @@ namespace LinBox
m = A.rowdim();
n = A.coldim();
- size_t p,q;
+ uint32_t p,q;
+ // CP : converting to GMP int to get the bitsize is unsane ! Should be replaced by a tablelookup
integer tmp;
- tmp = m;
+ tmp = uint32_t(m);
p = tmp.bitsize()-1;
//p=sqrt(tmp);
- tmp = n;
+ tmp = uint32_t(n);
q = tmp.bitsize()-1;
//q=sqrt(tmp);
//std::cout<<"row block: "<<p<<std::endl;
diff --git a/linbox/algorithms/classic-rational-reconstruction.h b/linbox/algorithms/classic-rational-reconstruction.h
index 1de07be..0ed9271 100644
--- a/linbox/algorithms/classic-rational-reconstruction.h
+++ b/linbox/algorithms/classic-rational-reconstruction.h
@@ -260,7 +260,7 @@ namespace LinBox
b=1;
//Element s0,s; s0=1,s=0;//test time gcdex;
- Element T = m.bitsize();
+ Element T = (uint32_t) m.bitsize();
int c = 5; //should be changed here to enhance probability of correctness
while((a>0) && (r0.bitsize() > T.bitsize() + (unsigned long)c))
diff --git a/linbox/algorithms/coppersmith.h b/linbox/algorithms/coppersmith.h
index 53acf26..11664c5 100644
--- a/linbox/algorithms/coppersmith.h
+++ b/linbox/algorithms/coppersmith.h
@@ -76,7 +76,7 @@ namespace LinBox
//Set up the projection matrices and their dimensions
size_t d = B.coldim();
size_t r,c;
- integer tmp = d;
+ integer tmp = uint64_t(d);
//Set the blocking size, Using Pascal Giorgi's convention
if(blocking==0){
@@ -256,7 +256,7 @@ namespace LinBox
//Set up the projection matrices and their dimensions
size_t d = B.coldim();
size_t r,c;
- integer tmp = d;
+ integer tmp = uint64_t(d);
//Set the blocking size, Using Pascal Giorgi's convention
if(blocking==0){
@@ -394,7 +394,7 @@ namespace LinBox
//Set up the projection matrices and their dimensions
size_t d = B.coldim();
size_t r,c;
- integer tmp = d;
+ integer tmp = uint64_t(d);
//Use given blocking size, if not given use Pascal Giorgi's convention
if(blocking==0){
diff --git a/linbox/algorithms/matpoly-mult.h b/linbox/algorithms/matpoly-mult.h
index 93ce6d4..e77ae1d 100644
--- a/linbox/algorithms/matpoly-mult.h
+++ b/linbox/algorithms/matpoly-mult.h
@@ -485,7 +485,7 @@ namespace LinBox
_fftsize=0;
//check if field is based on fft prime
- size_t p = _p;
+ uint64_t p = _p;
if (p&1){
p-=1;
do { p=p>>1; _fftsize++;} while(!(p&0x0001));
@@ -519,13 +519,13 @@ namespace LinBox
while ( k ) {k>>=1; ++ln;}
// taking primes greater than current prime
- size_t bit = std::max((53-ln)>>1, _p.bitsize());
+ uint64_t bit = std::max((53-ln)>>1, _p.bitsize());
// get number of necessary primes
- integer ibound = uint64_t(n) * _p * _p * std::max(b.size(), c.size());
+ integer ibound = uint64_t(n) * _p * _p * uint64_t(std::max(b.size(), c.size()));
integer primesprod;
size_t nbrprimes=1;
- RandomFFTPrime fftprime((size_t)bit, FFT_PRIME_SEED);
+ RandomFFTPrime fftprime(bit, FFT_PRIME_SEED);
std::vector<integer> lprimes(10); lprimes.resize(nbrprimes);
lprimes[0] = fftprime.generatePrime();
primesprod = lprimes[0];
@@ -635,10 +635,10 @@ namespace LinBox
}
// taking primes greater than current prime
- size_t bit = std::max((53-ln)>>1, _p.bitsize());
+ uint64_t bit = std::max((53-ln)>>1, _p.bitsize());
// get number of necessary primes
- integer ibound = uint64_t(n) * _p * _p * std::max(b.size(), c.size());
+ integer ibound = uint64_t(n) * _p * _p * uint64_t(std::max(b.size(), c.size()));
integer primesprod;
size_t nbrprimes=1;
RandomFFTPrime fftprime(bit, FFT_PRIME_SEED);
@@ -1044,7 +1044,7 @@ namespace LinBox
size_t deg = b.size()+c.size()-1;
size_t lpts = 0;
- size_t pts =1; while (pts < deg) { pts= pts<<1; ++lpts; }
+ uint64_t pts =1; while (pts < deg) { pts= pts<<1; ++lpts; }
#ifdef FFT_TIMING
std::cout<<"FFT: points "<<pts<<"\n";
@@ -1266,7 +1266,7 @@ namespace LinBox
size_t deg = c.size()+1;
size_t lpts = 0;
- size_t pts =1; while (pts < deg) { pts= pts<<1; ++lpts; }
+ uint64_t pts =1; while (pts < deg) { pts= pts<<1; ++lpts; }
if (_p%pts != 1) {
std::cout<<"Error the prime is not a FFTPrime or it has too small power of 2\n";
diff --git a/linbox/algorithms/polynomial-matrix/Makefile.am b/linbox/algorithms/polynomial-matrix/Makefile.am
index a58ec59..dfdc3d5 100644
--- a/linbox/algorithms/polynomial-matrix/Makefile.am
+++ b/linbox/algorithms/polynomial-matrix/Makefile.am
@@ -31,6 +31,7 @@ pkgincludesub_HEADERS = \
matpoly-mult-fft-wordsize-fast.inl \
matpoly-mult-fft-wordsize-three-primes.inl \
matpoly-mult-fft-multiprecision.inl \
+ matpoly-mult-fft-recint.inl \
polynomial-fft-transform-simd.inl \
polynomial-fft-transform.h \
polynomial-fft-transform.inl \
diff --git a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-multiprecision.inl b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-multiprecision.inl
index 743d14b..549c7f7 100644
--- a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-multiprecision.inl
+++ b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-multiprecision.inl
@@ -56,7 +56,7 @@ namespace LinBox{
integer _maxnorm;
template<typename PMatrix1>
- size_t logmax(const PMatrix1 A) const {
+ size_t logmax(const PMatrix1& A) const {
size_t mm=A.get(0,0,0).bitsize();
for(size_t k=0;k<A.size();k++)
for (size_t i=0;i<A.rowdim()*A.coldim();i++){
@@ -66,31 +66,7 @@ namespace LinBox{
return mm;
}
- public:
- void getFFTPrime(uint64_t prime_max, size_t lpts, integer bound, std::vector<integer> &bas){
-
- RandomFFTPrime RdFFT(prime_max);
- size_t nbp=0;
- if (!RdFFT.generatePrimes(lpts,bound,bas)){
- integer MM=1;
- for(std::vector<integer>::size_type i=0;i<bas.size();i++)
- MM*=bas[i];
- RandomPrimeIter Rd(integer(prime_max).bitsize());
- integer tmp;
- do {
- do {Rd.random(tmp);}
- while (MM%tmp==0);
- bas.push_back(tmp);
- nbp++;
- MM*=tmp;
- } while (MM<bound);
- }
-#ifdef VERBOSE_FFT
- std::cout<<"MatPoly Multiprecision FFT : using "<<bas.size()-nbp<<" FFT primes and "<<nbp<<" normal primes "<<std::endl;
-#endif
- }
-
-
+ public:
inline const IntField & field() const { return *_field; }
@@ -99,7 +75,7 @@ namespace LinBox{
_field(&F), _maxnorm(maxnorm) {}
template<typename PMatrix1, typename PMatrix2, typename PMatrix3>
- void mul (PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b) {
+ void mul (PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b, size_t max_rowdeg=0) {
//compute a bound on the entry of the input matrix a and b
FFT_PROFILE_START(2);
integer maxA,maxB;
@@ -108,10 +84,11 @@ namespace LinBox{
maxA=1;maxA<<=uint64_t(logmax(a));
maxB=1;maxB<<=uint64_t(logmax(b));
}
- integer bound=2*maxA*maxB*uint64_t(a.coldim())*uint64_t(std::min(a.size(),b.size()));
+ integer bound=maxA*maxB*uint64_t(a.coldim())*uint64_t(std::min(a.size(),b.size()));
+ if (_maxnorm==0) bound*=2; //seems to compute over Z, need to double to handle possible negative value
FFT_PROFILING(2,"max norm computation");
- mul_crtla(c,a,b,maxA,maxB,bound);
+ mul_crtla(c,a,b,maxA,maxB,bound,max_rowdeg);
}
template<typename PMatrix1, typename PMatrix2, typename PMatrix3>
@@ -125,7 +102,12 @@ namespace LinBox{
maxA=1;maxA<<=uint64_t(logmax(a));
maxB=1;maxB<<=uint64_t(logmax(b));
}
- integer bound=2*maxA*maxB*integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));;
+ //std::cout<<"MIDP RNS bound: "<<maxA<<" "<<maxB<<" "<<a.coldim()<<" "<<a.size()<<" "<<b.size()<<std::endl;
+
+ //integer bound=2*maxA*maxB*integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));;
+ integer bound=maxA*maxB*integer((uint64_t)a.coldim());
+ if (_maxnorm==0) bound*=2; //seems to compute over Z, need to double to handle possible negative value
+
if (smallLeft)
bound*= (uint64_t)a.size();
else
@@ -160,7 +142,7 @@ namespace LinBox{
// WARNING: Polynomial Matrix should stored as matrix of polynomial with integer coefficient
template< typename PMatrix1,typename PMatrix2, typename PMatrix3>
void mul_crtla(PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b,
- const integer& maxA, const integer& maxB, const integer& bound) {
+ const integer& maxA, const integer& maxB, const integer& bound, size_t max_rowdeg=0) {
FFT_PROFILE_START(2);
linbox_check(a.coldim() == b.rowdim());
@@ -168,6 +150,7 @@ namespace LinBox{
size_t k = a.coldim();
size_t n = b.coldim();
size_t s= a.size()+b.size()-1;
+ if (max_rowdeg!=0) s = max_rowdeg+1;
c.resize(s);
size_t lpts=0;
size_t pts = 1; while (pts < s) { pts= pts<<1; ++lpts; }
@@ -178,9 +161,9 @@ namespace LinBox{
//size_t prime_bitsize= (53-lk)>>1;
// compute max prime value for FFLAS
- uint64_t prime_max= std::sqrt( (1ULL<<53) / k)+1;
+ uint64_t prime_max= std::min(uint64_t(std::sqrt( (1ULL<<53) / k)+1), uint64_t(Givaro::Modular<double>::maxCardinality()));
std::vector<integer> bas;
- getFFTPrime(prime_max,lpts,bound,bas);
+ getFFTPrime(prime_max,lpts,bound,bas,k,s);
// RandomFFTPrime RdFFT(prime_bitsize);
// if (!RdFFT.generatePrimes(lpts,bound,bas)){
// std::cout<<"COULD NOT FIND ENOUGH FFT PRIME in MatPoly FFTMUL taking normal primes..."<<std::endl;
@@ -194,6 +177,7 @@ namespace LinBox{
#ifdef FFT_PROFILER
//double tMul=0.,tCopy=0;;
if (FFT_PROF_LEVEL<3){
+ std::cout << "*** MatPoly FFT - MUL ***"<<std::endl;
std::cout << "number of FFT primes :" << num_primes << std::endl;
std::cout << "max prime : "<<prime_max<<" ("<<integer(prime_max).bitsize()<<")"<<std::endl;
std::cout << "bitsize of the output: "<<bound.bitsize()
@@ -204,33 +188,31 @@ namespace LinBox{
FFT_PROFILING(2,"init of CRT approach");
// reduce t_a and t_b modulo each FFT primes
size_t n_ta=m*k*a.size(), n_tb=k*n*b.size();
- //size_t n_ta=m*k*pts, n_tb=k*n*pts;
- //std::cout<<"----------------------------------------------"<<std::endl;
- //std::cout<<"MUL FFT RNS: "<<MEMINFO<<std::endl;
- std::cout<<"MUL FFT RNS: need "<<MB((m*n*pts+n_ta+n_tb)*num_primes*8 + 2*(m*k+k*n)*pts*8)<<"Mo"<<std::endl;
-
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((n_ta+n_tb)*num_primes*8)<<"Mo"<<std::endl;
+ ADD_MEM(8*(n_ta+n_tb)*num_primes);
double* t_a_mod= new double[n_ta*num_primes];
double* t_b_mod= new double[n_tb*num_primes];
RNS.init(1, n_ta, t_a_mod, n_ta, a.getPointer(), n_ta, maxA);
RNS.init(1, n_tb, t_b_mod, n_tb, b.getPointer(), n_tb, maxB);
+ ADD_MEM(n_ta* (maxA.bitsize()/16 + (maxA.bitsize()%16?1:0)) *8); // needed by RNS init
+ DEL_MEM(n_ta* (maxA.bitsize()/16 + (maxA.bitsize()%16?1:0)) *8);
+ ADD_MEM(n_tb* (maxB.bitsize()/16 + (maxB.bitsize()%16?1:0)) *8); // needed by RNS init
+ DEL_MEM(n_tb* (maxB.bitsize()/16 + (maxB.bitsize()%16?1:0)) *8);
+
FFT_PROFILING(2,"reduction mod pi of input matrices");
std::vector<MatrixP_F*> c_i (num_primes);
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((m*n*pts)*num_primes*8)<<"Mo"<<std::endl;
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((2*(m*k+k*n)*pts)*8)<<"Mo"<<std::endl;
-
- // FFT_PROFILE_START(2);
- // auto sp=SPLITTER();
- // PARFOR1D(l,num_primes,sp,
+
for (size_t l=0;l<num_primes;l++)
{
//FFT_PROFILE_START;
ModField f(RNS._basis[l]);
MatrixP_F a_i (f, m, k, pts);
MatrixP_F b_i (f, k, n, pts);
-
+ //a_i.changeField(f);
+ //b_i.changeField(f);
+
c_i[l] = new MatrixP_F(f, m, n, pts);
+
// copy reduced data
for (size_t i=0;i<m*k;i++)
for (size_t j=0;j<a.size();j++)
@@ -238,14 +220,24 @@ namespace LinBox{
for (size_t i=0;i<k*n;i++)
for (size_t j=0;j<b.size();j++)
b_i.ref(i,j)=t_b_mod[l*n_tb+j+i*b.size()];
+ //std::cout<<"a"<<l<<":="<<a_i<<";\n";
+ //std::cout<<"b"<<l<<":="<<b_i<<";\n";
+
//FFT_PROFILE_GET(tCopy);
//PolynomialMatrixFFTPrimeMulDomain<ModField> fftdomain (f);
- PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
- fftdomain.mul_fft(lpts, *c_i[l], a_i, b_i);
+ PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
+ integer bound=integer(RNS._basis[l]-1)*integer(RNS._basis[l]-1)
+ *integer((uint64_t) k)*integer((uint64_t)std::min(a.size(),b.size()));
+
+ fftdomain.mul_fft(lpts, *c_i[l], a_i, b_i, bound);
+ //std::cout<<"c"<<l<<":="<<*c_i[l]<<";\n";
+ //std::cout<<"p"<<l<<":="<<uint64_t(RNS._basis[l])<<";\n";
//FFT_PROFILE_GET(tMul);
}
+ //std::cout<<"MUL FFT RNS: output polmat -> allocating "<<MB(num_primes*c_i[0]->realmeminfo())<<"Mo"<<std::endl;
//)
FFT_PROFILING(2,"FFTprime mult+copying");
+ DEL_MEM(8*(n_ta+n_tb)*num_primes);
delete[] t_a_mod;
delete[] t_b_mod;
//FFT_PROFILE(2,"copying linear reduced matrix",tCopy);
@@ -259,7 +251,9 @@ namespace LinBox{
// construct contiguous storage for c_i
size_t n_tc=m*n*s;
//std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB(n_tc*num_primes*8)<<"Mo"<<std::endl;
+ ADD_MEM(8*n_tc*num_primes);
double *t_c_mod = new double[n_tc*num_primes];
+ //std::cout<<"MUL FFT RNS: output RNS -> allocating "<<MB((n_tc)*num_primes*8)<<"Mo"<<std::endl;
for (size_t l=0;l<num_primes;l++){
for (size_t i=0;i<m*n;i++)
for (size_t j=0;j<s;j++)
@@ -270,8 +264,12 @@ namespace LinBox{
// reconstruct the result in C
RNS.convert(1,n_tc,0,c.getWritePointer(),n_tc, t_c_mod, n_tc);
+ ADD_MEM(n_tc*RNS._ldm*8);
+ DEL_MEM(n_tc*RNS._ldm*8);
+
//std::cout<<"MUL FFT RNS: "<<MEMINFO<<std::endl;
//std::cout<<"----------------------------------------------"<<std::endl;
+ DEL_MEM(8*n_tc*num_primes);
delete[] t_c_mod;
}
@@ -296,9 +294,9 @@ namespace LinBox{
size_t pts = 1; while (pts < s) { pts= pts<<1; ++lpts; }
// compute max prime value for FFLAS
- uint64_t prime_max= std::sqrt( (1ULL<<53) / k)+1;
+ uint64_t prime_max= std::min(uint64_t(std::sqrt( (1ULL<<53) / k)+1), uint64_t(Givaro::Modular<double>::maxCardinality()));
std::vector<integer> bas;
- getFFTPrime(prime_max,lpts,bound,bas);
+ getFFTPrime(prime_max,lpts,bound,bas,k,s);
std::vector<double> basis(bas.size());
std::copy(bas.begin(),bas.end(),basis.begin());
@@ -326,14 +324,10 @@ namespace LinBox{
// loop for memory saving
size_t CRT_NBPRIME=4;
+ ADD_MEM(8*(n_ta+n_tb)*CRT_NBPRIME);
double* t_a_mod= new double[n_ta*CRT_NBPRIME];
double* t_b_mod= new double[n_tb*CRT_NBPRIME];
- std::cout<<"MUL FFT RNS: input/output data: "<< MB((n_ta*(maxA.bitsize()+128) +n_tb*(maxB.bitsize()+128) +m*k*s*(bound.bitsize()+128))/8)<<"Mo"<<std::endl;
- std::cout<<"MUL FFT RNS: initial need "<<MB((m*n*pts+n_ta+n_tb)*num_primes*8 + 2*(m*k+k*n)*pts*8)<<"Mo"<<std::endl;
- std::cout<<"MUL FFT RNS: RNS in: "<<MB( (n_ta+n_tb)*CRT_NBPRIME*8)<<"Mo"<<std::endl;
- std::cout<<"MUL FFT RNS: RNC com: "<<MB(2*(m*k+k*n)*pts*8)<<"Mo"<<std::endl;
- std::cout<<"MUL FFT RNS: RNS out: "<<MB((m*n*pts)*num_primes*8 )<<"Mo"<<std::endl;
-
+
for(size_t loop=0;loop<num_primes;loop+=CRT_NBPRIME){
// create chunk of RNS
@@ -343,14 +337,10 @@ namespace LinBox{
FFPACK::rns_double smallRNS(smallBasis);
smallRNS.precompute_cst(RNS._ldm);
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((n_ta+n_tb)*num_primes*8)<<"Mo"<<std::endl;
smallRNS.init(1, n_ta, t_a_mod, n_ta, a.getPointer(), n_ta, maxA);
smallRNS.init(1, n_tb, t_b_mod, n_tb, b.getPointer(), n_tb, maxB);
FFT_PROFILING(2,"reduction mod pi of input matrices");
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((m*n*pts)*num_primes*8)<<"Mo"<<std::endl;
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((2*(m*k+k*n)*pts)*8)<<"Mo"<<std::endl;
-
for (size_t l=0;l<rns_chunk;l++)
{
//FFT_PROFILE_START;
@@ -368,8 +358,11 @@ namespace LinBox{
b_i.ref(i,j)=t_b_mod[l*n_tb+j+i*b.size()];
//FFT_PROFILE_GET(tCopy);
//PolynomialMatrixFFTPrimeMulDomain<ModField> fftdomain (f);
- PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
- fftdomain.mul_fft(lpts, *c_i[loop+l], a_i, b_i);
+ PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
+ integer bound=integer(smallRNS._basis[l]-1)*integer(smallRNS._basis[l]-1)
+ *integer(k)*integer((uint64_t)std::min(a.size(),b.size()));
+
+ fftdomain.mul_fft(lpts, *c_i[loop+l], a_i, b_i, bound);
//FFT_PROFILE_GET(tMul);
}
FFT_PROFILING(2,"FFTprime mult+copying");
@@ -377,9 +370,9 @@ namespace LinBox{
//FFT_PROFILE(2,"FFTprime multiplication",tMul);
} // end of loop for memory saving
- delete[] t_a_mod;
- delete[] t_b_mod;
-
+ DEL_MEM(8*(n_ta+n_tb)*CRT_NBPRIME);
+ delete[] t_a_mod;
+ delete[] t_b_mod;
if (num_primes < 2) {
FFT_PROFILE_START(2);
@@ -389,6 +382,7 @@ namespace LinBox{
// construct contiguous storage for c_i
size_t n_tc=m*n*s;
//std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB(n_tc*num_primes*8)<<"Mo"<<std::endl;
+ ADD_MEM(8*(n_tc)*num_primes);
double *t_c_mod = new double[n_tc*num_primes];
for (size_t l=0;l<num_primes;l++){
for (size_t i=0;i<m*n;i++)
@@ -400,10 +394,13 @@ namespace LinBox{
// reconstruct the result in C
RNS.convert(1,n_tc,0,c.getWritePointer(),n_tc, t_c_mod, n_tc);
+ ADD_MEM(n_tc*RNS._ldm*8);
+ DEL_MEM(n_tc*RNS._ldm*8);
+
//std::cout<<"MUL FFT RNS: "<<MEMINFO<<std::endl;
//std::cout<<"----------------------------------------------"<<std::endl;
+ DEL_MEM(8*n_tc*num_primes);
delete[] t_c_mod;
-
}
FFT_PROFILING(2,"k prime reconstruction");
// std::cout<<"CC:="<<c<<std::endl;
@@ -412,6 +409,7 @@ namespace LinBox{
+
// template< typename PMatrix1,typename PMatrix2, typename PMatrix3>
// void midproduct_crtla(PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b,
// const integer& maxA, const integer& maxB, const integer& bound,
@@ -464,12 +462,7 @@ namespace LinBox{
// compute max prime value for FFLAS
uint64_t prime_max= std::sqrt( (1ULL<<53) / k)+1;
std::vector<integer> bas;
- getFFTPrime(prime_max,lpts,bound,bas);
- //RandomFFTPrime RdFFT(prime_bitsize);
- // if (!RdFFT.generatePrimes(bound,bas)){
- // std::cout<<"COULD NOT FIND ENOUGH FFT PRIME in MatPoly FFTMUL exiting..."<<std::endl;
- // throw LinboxError("LinBox ERROR: not enough FFT Prime\n");
- // }
+ getFFTPrime(prime_max,lpts,bound,bas,k,deg);
std::vector<double> basis(bas.size());
std::copy(bas.begin(),bas.end(),basis.begin());
@@ -478,7 +471,8 @@ namespace LinBox{
#ifdef FFT_PROFILER
double tMul=0.,tCopy=0;;
if (FFT_PROF_LEVEL<3){
- std::cout << "number of FFT primes :" << num_primes << std::endl;
+ std::cout << "*** MatPoly FFT - MIDP ***"<<std::endl;
+ std::cout << "number of FFT primes :" << num_primes << std::endl;
std::cout << "max prime : "<<prime_max<<" ("<<integer(prime_max).bitsize()<<")"<<std::endl;
std::cout << "bitsize of the output: "<<bound.bitsize()
<<"( "<< RNS._M.bitsize()<<" )"<<std::endl;
@@ -488,20 +482,19 @@ namespace LinBox{
FFT_PROFILING(2,"init of CRT approach");
// reduce t_a and t_b modulo each FFT primes
size_t n_ta=m*k*a.size(), n_tb=k*n*b.size();
+ ADD_MEM(8*(n_ta+n_tb)*num_primes);
double* t_a_mod= new double[n_ta*num_primes];
double* t_b_mod= new double[n_tb*num_primes];
+ //std::cout<<"MIDP FFT RNS: input RNS -> allocating "<<MB((n_ta+n_tb)*num_primes*8)<<"Mo"<<std::endl;
+
RNS.init(1, n_ta, t_a_mod, n_ta, a.getPointer(), n_ta, maxA);
RNS.init(1, n_tb, t_b_mod, n_tb, b.getPointer(), n_tb, maxB);
- FFT_PROFILING(2,"reduction mod pi of input matrices");
-
- //std::cout<<"----------------------------------------------"<<std::endl;
- //std::cout<<"MIDP FFT RNS: "<<MEMINFO<<std::endl;
- //std::cout<<"MIDP FFT RNS: need "<<MB((m*n*pts+n_ta+n_tb)*num_primes*8 + 2*(m*k+k*n)*pts*8)<<"Mo"<<std::endl;
+ ADD_MEM(n_ta* (maxA.bitsize()/16 + (maxA.bitsize()%16?1:0)) *8); // needed by RNS init
+ DEL_MEM(n_ta* (maxA.bitsize()/16 + (maxA.bitsize()%16?1:0)) *8);
+ ADD_MEM(n_tb* (maxB.bitsize()/16 + (maxB.bitsize()%16?1:0)) *8); // needed by RNS init
+ DEL_MEM(n_tb* (maxB.bitsize()/16 + (maxB.bitsize()%16?1:0)) *8);
-
- //std::cout<<"MIDP FFT RNS: RNS -> allocating "<<MB((n_ta+n_tb)*num_primes*8)<<"Mo"<<std::endl;
- //std::cout<<"MIDP FFT RNS: RNS -> allocating "<<MB((m*n)*pts*num_primes*8)<<"Mo"<<std::endl;
- //std::cout<<"MIDP FFT RNS: "<<MEMINFO<<std::endl;
+ FFT_PROFILING(2,"reduction mod pi of input matrices");
std::vector<MatrixP_F*> c_i (num_primes);
@@ -532,8 +525,11 @@ namespace LinBox{
FFT_PROFILE_GET(2,tMul);
}
+
+ DEL_MEM(8*(n_ta+n_tb)*num_primes);
delete[] t_a_mod;
delete[] t_b_mod;
+
FFT_PROFILE(2,"copying linear reduced matrix",tCopy);
FFT_PROFILE(2,"FFTprime multiplication",tMul);
@@ -545,7 +541,9 @@ namespace LinBox{
// construct contiguous storage for c_i
double *t_c_mod;
size_t n_tc=m*n*c.size();
+ ADD_MEM(8*(n_tc)*num_primes);
t_c_mod = new double[n_tc*num_primes];
+ //std::cout<<"MIDP FFT RNS: output RNS -> allocating "<<MB((n_tc)*num_primes*8)<<"Mo"<<std::endl;
for (size_t l=0;l<num_primes;l++){
for (size_t i=0;i<m*n;i++)
for (size_t j=0;j<c.size();j++)
@@ -556,12 +554,12 @@ namespace LinBox{
// reconstruct the result in C
RNS.convert(1,n_tc,0,c.getWritePointer(),n_tc, t_c_mod, n_tc);
- //std::cout<<"MIDP FFT RNS: "<<MEMINFO<<std::endl;
- delete[] t_c_mod;
-
- //std::cout<<"MUL FFT RNS: "<<MEMINFO<<std::endl;
- //std::cout<<"----------------------------------------------"<<std::endl;
+ ADD_MEM(n_tc*RNS._ldm*8); // needed by RNS
+ DEL_MEM(n_tc*RNS._ldm*8);
+ DEL_MEM(8*n_tc*num_primes);
+ delete[] t_c_mod;
+
FFT_PROFILING(2,"k prime reconstruction");
}
}
@@ -594,7 +592,7 @@ namespace LinBox{
}
template<typename Matrix1, typename Matrix2, typename Matrix3>
- void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b) {
+ void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b, size_t max_rowdeg=0) {
FFT_PROFILE_START(2);
MatrixP_F a2(field(),a.rowdim(),a.coldim(),a.size());
MatrixP_F b2(field(),b.rowdim(),b.coldim(),b.size());
@@ -602,7 +600,7 @@ namespace LinBox{
a2.copy(a,0,a.size()-1);
b2.copy(b,0,b.size()-1);
FFT_PROFILING(2,"converting rep of input");
- mul(c2,a2,b2);
+ mul(c2,a2,b2, max_rowdeg);
FFT_PROFILE_START(2);
c.copy(c2,0,c.size()-1);
FFT_PROFILING(2,"converting rep of output");
@@ -610,14 +608,18 @@ namespace LinBox{
}
// Matrix with polynomials
- void mul (MatrixP_F &c, const MatrixP_F &a, const MatrixP_F &b) {
-
+ void mul (MatrixP_F &c, const MatrixP_F &a, const MatrixP_F &b, size_t max_rowdeg=0) {
+
FFT_PROFILE_START(2);
IntField Z;
PolynomialMatrixFFTMulDomain<IntField> Zmul(Z,_p);
integer bound=2*_p*_p*integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));
- //Zmul.mul_crtla(c,a,b,_p,_p,bound);
+#ifdef TRY1
Zmul.mul_crtla2(c,a,b,_p,_p,bound);
+#else
+ Zmul.mul_crtla(c,a,b,_p,_p,bound, max_rowdeg);
+#endif
+
// reduce the result mod p
FFT_PROFILE_START(2);
@@ -658,11 +660,6 @@ namespace LinBox{
}
};
-
-
-
-
-
} // end of namespace LinBox
#endif // __LINBOX_matpoly_mult_ftt_multiprecision_INL
diff --git a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-multiprecision.inl b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-recint.inl
similarity index 52%
copy from linbox/algorithms/polynomial-matrix/matpoly-mult-fft-multiprecision.inl
copy to linbox/algorithms/polynomial-matrix/matpoly-mult-fft-recint.inl
index 743d14b..447cc65 100644
--- a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-multiprecision.inl
+++ b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-recint.inl
@@ -1,11 +1,9 @@
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
/*
- * Copyright (C) 2013 Pascal Giorgi
- * Romain Lebreton
+ * Copyright (C) 2015 Pascal Giorgi
*
* Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
- * Romain Lebreton <lebreton at lirmm.fr>
*
* ========LICENCE========
* This file is part of the library LinBox.
@@ -25,10 +23,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* ========LICENCE========
*/
-#ifndef __LINBOX_matpoly_mult_ftt_multiprecision_INL
-#define __LINBOX_matpoly_mult_ftt_multiprecision_INL
+#ifndef __LINBOX_matpoly_mult_ftt_recint_INL
+#define __LINBOX_matpoly_mult_ftt_recint_INL
#include <givaro/zring.h>
+#include <recint/rint.h>
#include "linbox/ring/modular.h"
#include "linbox/randiter/random-fftprime.h"
#include "linbox/randiter/random-prime.h"
@@ -37,17 +36,24 @@
#ifndef MEMINFO
#define MEMINFO ""
#endif
+
+#ifdef LOW_MEMORY_PMBASIS
+#define MEMFACTOR 4
+#define CRT_SIZE 3
+#endif
+
namespace LinBox{
/***************************************************
**** Polynomial Matrix Multiplication over Z[x] ***
***************************************************/
- template<>
- class PolynomialMatrixFFTMulDomain<Givaro::ZRing<integer> > {
+ template<size_t K >
+ class PolynomialMatrixFFTMulDomain<Givaro::ZRing<RecInt::ruint<K> > > {
public:
- typedef Givaro::ZRing<integer> IntField;
+ typedef Givaro::ZRing<RecInt::ruint<K> > IntField;
+ typedef RecInt::ruint<K> Element;
//typedef Givaro::Modular<uint32_t> ModField;
- typedef Givaro::Modular<double> ModField;
+ typedef Givaro::Modular<double> ModField;
typedef PolynomialMatrix<PMType::polfirst,PMStorage::plain,ModField> MatrixP_F; // Polynomial matrix stored as a matrix of polynomials
typedef PolynomialMatrix<PMType::polfirst,PMStorage::plain,IntField> MatrixP_I; // Polynomial matrix stored as a matrix of polynomials
@@ -56,39 +62,11 @@ namespace LinBox{
integer _maxnorm;
template<typename PMatrix1>
- size_t logmax(const PMatrix1 A) const {
- size_t mm=A.get(0,0,0).bitsize();
- for(size_t k=0;k<A.size();k++)
- for (size_t i=0;i<A.rowdim()*A.coldim();i++){
- size_t tmp=A.get(i,k).bitsize();
- mm=std::max(mm,tmp);
- }
- return mm;
+ size_t logmax(const PMatrix1& A) const {
+ return size_t(1)<<K;
}
public:
- void getFFTPrime(uint64_t prime_max, size_t lpts, integer bound, std::vector<integer> &bas){
-
- RandomFFTPrime RdFFT(prime_max);
- size_t nbp=0;
- if (!RdFFT.generatePrimes(lpts,bound,bas)){
- integer MM=1;
- for(std::vector<integer>::size_type i=0;i<bas.size();i++)
- MM*=bas[i];
- RandomPrimeIter Rd(integer(prime_max).bitsize());
- integer tmp;
- do {
- do {Rd.random(tmp);}
- while (MM%tmp==0);
- bas.push_back(tmp);
- nbp++;
- MM*=tmp;
- } while (MM<bound);
- }
-#ifdef VERBOSE_FFT
- std::cout<<"MatPoly Multiprecision FFT : using "<<bas.size()-nbp<<" FFT primes and "<<nbp<<" normal primes "<<std::endl;
-#endif
- }
@@ -99,7 +77,7 @@ namespace LinBox{
_field(&F), _maxnorm(maxnorm) {}
template<typename PMatrix1, typename PMatrix2, typename PMatrix3>
- void mul (PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b) {
+ void mul (PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b, size_t max_rowdeg=0) {
//compute a bound on the entry of the input matrix a and b
FFT_PROFILE_START(2);
integer maxA,maxB;
@@ -108,11 +86,13 @@ namespace LinBox{
maxA=1;maxA<<=uint64_t(logmax(a));
maxB=1;maxB<<=uint64_t(logmax(b));
}
- integer bound=2*maxA*maxB*uint64_t(a.coldim())*uint64_t(std::min(a.size(),b.size()));
+ integer bound=maxA*maxB*uint64_t(a.coldim())*uint64_t(std::min(a.size(),b.size()));
+ if (_maxnorm==0) bound*=2; //seems to compute over Z, need to double to handle possible negative value
FFT_PROFILING(2,"max norm computation");
- mul_crtla(c,a,b,maxA,maxB,bound);
+ mul_crtla(c,a,b,maxA,maxB,bound, max_rowdeg);
}
+
template<typename PMatrix1, typename PMatrix2, typename PMatrix3>
void midproduct (PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b,
@@ -125,7 +105,8 @@ namespace LinBox{
maxA=1;maxA<<=uint64_t(logmax(a));
maxB=1;maxB<<=uint64_t(logmax(b));
}
- integer bound=2*maxA*maxB*integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));;
+ integer bound=maxA*maxB*integer((uint64_t)a.coldim());
+ if (_maxnorm==0) bound*=2; //seems to compute over Z, need to double to handle possible negative value
if (smallLeft)
bound*= (uint64_t)a.size();
else
@@ -137,56 +118,33 @@ namespace LinBox{
}
- // template< typename PMatrix1,typename PMatrix2, typename PMatrix3>
- // void mul_crtla(PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b,
- // const integer& maxA, const integer& maxB, const integer& bound) {
- // // (convert to MatrixP representation)
- // FFT_PROFILE_START;
- // MatrixP_I a2(field(),a.rowdim(),a.coldim(),a.size());
- // MatrixP_I b2(field(),b.rowdim(),b.coldim(),b.size());
- // a2.copy(a,0,a.size()-1);
- // b2.copy(b,0,b.size()-1);
- // MatrixP_I c2(field(),c.rowdim(),c.coldim(),c.size());
- // FFT_PROFILING(2,"converting rep of input matrices");
- // mul_crtla(c2,a2,b2,maxA,maxB,bound);
- // c.copy(c2,0,c.size()-1);
- // FFT_PROFILING(2,"converting rep of output matrices");
- // }
-
-
- // void mul_crtla(MatrixP_I &c, const MatrixP_I &a, const MatrixP_I &b,
- // const integer& maxA, const integer& maxB, const integer& bound){
-
- // WARNING: Polynomial Matrix should stored as matrix of polynomial with integer coefficient
+
+ // WARNING: Polynomial Matrix should stored as matrix of polynomial with integer coefficient
+ // outputsize -> its the size of the output if known in advance and less than a.size()+b.size()-1
template< typename PMatrix1,typename PMatrix2, typename PMatrix3>
void mul_crtla(PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b,
- const integer& maxA, const integer& maxB, const integer& bound) {
-
+ const integer& maxA, const integer& maxB, const integer& bound, size_t max_rowdeg=0) {
+ //std::cout<<"MUL CRT LA STARTING: "<<STR_MEMINFO<<std::endl;
FFT_PROFILE_START(2);
linbox_check(a.coldim() == b.rowdim());
size_t m = a.rowdim();
size_t k = a.coldim();
size_t n = b.coldim();
- size_t s= a.size()+b.size()-1;
+ size_t s= a.size()+b.size()-1; // MUST BE CHANGED TO the 0-rowdeg of (a.b)
+ if (max_rowdeg!=0) s = max_rowdeg+1;
c.resize(s);
size_t lpts=0;
size_t pts = 1; while (pts < s) { pts= pts<<1; ++lpts; }
- // compute bit size of feasible prime for FFLAS
- // size_t _k=k,lk=0;
- //while ( _k ) {_k>>=1; ++lk;}
- //size_t prime_bitsize= (53-lk)>>1;
-
+ //std::cout<<"MULCRT_LA: "<<c.size()<<" -> "<<a.size()<<"x"<<b.size()<<" (nb pts=2^"<<lpts<<")\n";
+
// compute max prime value for FFLAS
- uint64_t prime_max= std::sqrt( (1ULL<<53) / k)+1;
+ //uint64_t prime_max=std::sqrt( (1ULL<<53) /k)+1;
+ uint64_t prime_max=maxFFTPrimeValue(k,pts); // CAREFUL: only for Modular<double>
+
std::vector<integer> bas;
- getFFTPrime(prime_max,lpts,bound,bas);
- // RandomFFTPrime RdFFT(prime_bitsize);
- // if (!RdFFT.generatePrimes(lpts,bound,bas)){
- // std::cout<<"COULD NOT FIND ENOUGH FFT PRIME in MatPoly FFTMUL taking normal primes..."<<std::endl;
- // exit(1);
- // }
-
+ getFFTPrime(prime_max,lpts,bound,bas,k,s);
+
std::vector<double> basis(bas.size());
std::copy(bas.begin(),bas.end(),basis.begin());
FFPACK::rns_double RNS(basis);
@@ -203,158 +161,61 @@ namespace LinBox{
#endif
FFT_PROFILING(2,"init of CRT approach");
// reduce t_a and t_b modulo each FFT primes
- size_t n_ta=m*k*a.size(), n_tb=k*n*b.size();
- //size_t n_ta=m*k*pts, n_tb=k*n*pts;
- //std::cout<<"----------------------------------------------"<<std::endl;
- //std::cout<<"MUL FFT RNS: "<<MEMINFO<<std::endl;
- std::cout<<"MUL FFT RNS: need "<<MB((m*n*pts+n_ta+n_tb)*num_primes*8 + 2*(m*k+k*n)*pts*8)<<"Mo"<<std::endl;
-
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((n_ta+n_tb)*num_primes*8)<<"Mo"<<std::endl;
+ size_t n_ta=m*k*a.size(), n_tb=k*n*b.size();
+ std::vector<MatrixP_F*> c_i (num_primes);
+
+#ifndef LOW_MEMORY_PMBASIS
+ ADD_MEM(8*(n_ta+n_tb)*num_primes);
double* t_a_mod= new double[n_ta*num_primes];
double* t_b_mod= new double[n_tb*num_primes];
RNS.init(1, n_ta, t_a_mod, n_ta, a.getPointer(), n_ta, maxA);
RNS.init(1, n_tb, t_b_mod, n_tb, b.getPointer(), n_tb, maxB);
FFT_PROFILING(2,"reduction mod pi of input matrices");
-
- std::vector<MatrixP_F*> c_i (num_primes);
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((m*n*pts)*num_primes*8)<<"Mo"<<std::endl;
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((2*(m*k+k*n)*pts)*8)<<"Mo"<<std::endl;
-
- // FFT_PROFILE_START(2);
- // auto sp=SPLITTER();
- // PARFOR1D(l,num_primes,sp,
+
+ FFT_PROFILE_START(2);
for (size_t l=0;l<num_primes;l++)
- {
- //FFT_PROFILE_START;
- ModField f(RNS._basis[l]);
- MatrixP_F a_i (f, m, k, pts);
- MatrixP_F b_i (f, k, n, pts);
-
- c_i[l] = new MatrixP_F(f, m, n, pts);
- // copy reduced data
- for (size_t i=0;i<m*k;i++)
- for (size_t j=0;j<a.size();j++)
- a_i.ref(i,j)=t_a_mod[l*n_ta+j+i*a.size()];
- for (size_t i=0;i<k*n;i++)
- for (size_t j=0;j<b.size();j++)
- b_i.ref(i,j)=t_b_mod[l*n_tb+j+i*b.size()];
- //FFT_PROFILE_GET(tCopy);
- //PolynomialMatrixFFTPrimeMulDomain<ModField> fftdomain (f);
- PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
- fftdomain.mul_fft(lpts, *c_i[l], a_i, b_i);
- //FFT_PROFILE_GET(tMul);
- }
- //)
+ {
+ //FFT_PROFILE_START;
+ ModField f(RNS._basis[l]);
+ MatrixP_F a_i (f, m, k, pts);
+ MatrixP_F b_i (f, k, n, pts);
+
+ c_i[l] = new MatrixP_F(f, m, n, pts);
+ // copy reduced data
+ for (size_t i=0;i<m*k;i++)
+ for (size_t j=0;j<a.size();j++)
+ a_i.ref(i,j)=t_a_mod[l*n_ta+j+i*a.size()];
+ for (size_t i=0;i<k*n;i++)
+ for (size_t j=0;j<b.size();j++)
+ b_i.ref(i,j)=t_b_mod[l*n_tb+j+i*b.size()];
+ PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
+ integer bound=integer(RNS._basis[l]-1)*integer(RNS._basis[l]-1)
+ *integer((uint64_t) k)*integer((uint64_t)std::min(a.size(),b.size()));
+
+ fftdomain.mul_fft(lpts, *c_i[l], a_i, b_i, bound);
+ }
FFT_PROFILING(2,"FFTprime mult+copying");
+ DEL_MEM(8*(n_ta+n_tb)*num_primes);
delete[] t_a_mod;
delete[] t_b_mod;
- //FFT_PROFILE(2,"copying linear reduced matrix",tCopy);
- //FFT_PROFILE(2,"FFTprime multiplication",tMul);
-
- if (num_primes < 2) {
- FFT_PROFILE_START(2);
- c.copy(*c_i[0],0,s-1);
- } else {
- FFT_PROFILE_START(2);
- // construct contiguous storage for c_i
- size_t n_tc=m*n*s;
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB(n_tc*num_primes*8)<<"Mo"<<std::endl;
- double *t_c_mod = new double[n_tc*num_primes];
- for (size_t l=0;l<num_primes;l++){
- for (size_t i=0;i<m*n;i++)
- for (size_t j=0;j<s;j++)
- t_c_mod[l*n_tc + (j+i*s)]= c_i[l]->get(i,j);
- delete c_i[l];
- }
- FFT_PROFILING(2,"linearization of results mod pi");
-
- // reconstruct the result in C
- RNS.convert(1,n_tc,0,c.getWritePointer(),n_tc, t_c_mod, n_tc);
- //std::cout<<"MUL FFT RNS: "<<MEMINFO<<std::endl;
- //std::cout<<"----------------------------------------------"<<std::endl;
- delete[] t_c_mod;
-
- }
- FFT_PROFILING(2,"k prime reconstruction");
- // std::cout<<"CC:="<<c<<std::endl;
- // std::cout<<"<-----------------: "<<std::endl;;
- }
-
- // WARNING: Polynomial Matrix should stored as matrix of polynomial with integer coefficient
- template< typename PMatrix1,typename PMatrix2, typename PMatrix3>
- void mul_crtla2(PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b,
- const integer& maxA, const integer& maxB, const integer& bound) {
-
- FFT_PROFILE_START(2);
- linbox_check(a.coldim() == b.rowdim());
- size_t m = a.rowdim();
- size_t k = a.coldim();
- size_t n = b.coldim();
- size_t s= a.size()+b.size()-1;
- c.resize(s);
- size_t lpts=0;
- size_t pts = 1; while (pts < s) { pts= pts<<1; ++lpts; }
-
- // compute max prime value for FFLAS
- uint64_t prime_max= std::sqrt( (1ULL<<53) / k)+1;
- std::vector<integer> bas;
- getFFTPrime(prime_max,lpts,bound,bas);
-
- std::vector<double> basis(bas.size());
- std::copy(bas.begin(),bas.end(),basis.begin());
- FFPACK::rns_double RNS(basis);
- size_t num_primes = RNS._size;
-
-
-#ifdef FFT_PROFILER
- //double tMul=0.,tCopy=0;;
- if (FFT_PROF_LEVEL<3){
- std::cout << "number of FFT primes :" << num_primes << std::endl;
- std::cout << "max prime : "<<prime_max<<" ("<<integer(prime_max).bitsize()<<")"<<std::endl;
- std::cout << "bitsize of the output: "<<bound.bitsize()
- <<"( "<< RNS._M.bitsize()<<" )"<<std::endl;
- std::cout <<" +++++++++++++++++++++++++++++++"<<std::endl;
- }
-#endif
-
-
- FFT_PROFILING(2,"init of CRT approach");
- // reduce t_a and t_b modulo each FFT primes
- size_t n_ta=m*k*a.size(), n_tb=k*n*b.size();
- std::vector<MatrixP_F*> c_i (num_primes);
-
-
- // loop for memory saving
- size_t CRT_NBPRIME=4;
+#else
+ size_t CRT_NBPRIME=CRT_SIZE;
+ ADD_MEM(8*(n_ta+n_tb)*CRT_NBPRIME);
double* t_a_mod= new double[n_ta*CRT_NBPRIME];
double* t_b_mod= new double[n_tb*CRT_NBPRIME];
- std::cout<<"MUL FFT RNS: input/output data: "<< MB((n_ta*(maxA.bitsize()+128) +n_tb*(maxB.bitsize()+128) +m*k*s*(bound.bitsize()+128))/8)<<"Mo"<<std::endl;
- std::cout<<"MUL FFT RNS: initial need "<<MB((m*n*pts+n_ta+n_tb)*num_primes*8 + 2*(m*k+k*n)*pts*8)<<"Mo"<<std::endl;
- std::cout<<"MUL FFT RNS: RNS in: "<<MB( (n_ta+n_tb)*CRT_NBPRIME*8)<<"Mo"<<std::endl;
- std::cout<<"MUL FFT RNS: RNC com: "<<MB(2*(m*k+k*n)*pts*8)<<"Mo"<<std::endl;
- std::cout<<"MUL FFT RNS: RNS out: "<<MB((m*n*pts)*num_primes*8 )<<"Mo"<<std::endl;
- for(size_t loop=0;loop<num_primes;loop+=CRT_NBPRIME){
-
+ for(size_t loop=0;loop<num_primes;loop+=CRT_NBPRIME){
// create chunk of RNS
size_t rns_chunk=std::min(CRT_NBPRIME,num_primes-loop); // nbr of primes in the current smallRNS basis
std::vector<double> smallBasis(rns_chunk);
std::copy(basis.begin()+loop,basis.begin()+loop+rns_chunk,smallBasis.begin());
FFPACK::rns_double smallRNS(smallBasis);
- smallRNS.precompute_cst(RNS._ldm);
-
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((n_ta+n_tb)*num_primes*8)<<"Mo"<<std::endl;
+ smallRNS.precompute_cst(RNS._ldm);
smallRNS.init(1, n_ta, t_a_mod, n_ta, a.getPointer(), n_ta, maxA);
smallRNS.init(1, n_tb, t_b_mod, n_tb, b.getPointer(), n_tb, maxB);
FFT_PROFILING(2,"reduction mod pi of input matrices");
-
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((m*n*pts)*num_primes*8)<<"Mo"<<std::endl;
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB((2*(m*k+k*n)*pts)*8)<<"Mo"<<std::endl;
-
for (size_t l=0;l<rns_chunk;l++)
- {
- //FFT_PROFILE_START;
- //std::cout<<"prime: "<<(long)smallRNS._basis[l]<<std::endl;
+ {
ModField f(smallRNS._basis[l]);
MatrixP_F a_i (f, m, k, pts);
MatrixP_F b_i (f, k, n, pts);
@@ -366,30 +227,32 @@ namespace LinBox{
for (size_t i=0;i<k*n;i++)
for (size_t j=0;j<b.size();j++)
b_i.ref(i,j)=t_b_mod[l*n_tb+j+i*b.size()];
- //FFT_PROFILE_GET(tCopy);
- //PolynomialMatrixFFTPrimeMulDomain<ModField> fftdomain (f);
- PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
- fftdomain.mul_fft(lpts, *c_i[loop+l], a_i, b_i);
- //FFT_PROFILE_GET(tMul);
+
+ PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
+ integer bound=integer(smallRNS._basis[l]-1)*integer(smallRNS._basis[l]-1)
+ *integer((int64_t)k)*integer((uint64_t)std::min(a.size(),b.size()));
+
+ fftdomain.mul_fft(lpts, *c_i[loop+l], a_i, b_i, bound);
}
FFT_PROFILING(2,"FFTprime mult+copying");
- //FFT_PROFILE(2,"copying linear reduced matrix",tCopy);
- //FFT_PROFILE(2,"FFTprime multiplication",tMul);
-
} // end of loop for memory saving
- delete[] t_a_mod;
- delete[] t_b_mod;
-
+ DEL_MEM(8*(n_ta+n_tb)*CRT_NBPRIME);
+ delete[] t_a_mod;
+ delete[] t_b_mod;
+#endif
- if (num_primes < 2) {
+ if (false && num_primes < 2) {
FFT_PROFILE_START(2);
- c.copy(*c_i[0],0,s-1);
+ //c.copy(*c_i[0],0,s-1);
} else {
FFT_PROFILE_START(2);
+
+#ifndef LOW_MEMORY_PMBASIS
// construct contiguous storage for c_i
size_t n_tc=m*n*s;
- //std::cout<<"MUL FFT RNS: RNS -> allocating "<<MB(n_tc*num_primes*8)<<"Mo"<<std::endl;
+ ADD_MEM(8*n_tc*num_primes);
double *t_c_mod = new double[n_tc*num_primes];
+ //std::cout<<"RNS OUT ALLOC done: "<<STR_MEMINFO<<std::endl;
for (size_t l=0;l<num_primes;l++){
for (size_t i=0;i<m*n;i++)
for (size_t j=0;j<s;j++)
@@ -399,32 +262,57 @@ namespace LinBox{
FFT_PROFILING(2,"linearization of results mod pi");
// reconstruct the result in C
- RNS.convert(1,n_tc,0,c.getWritePointer(),n_tc, t_c_mod, n_tc);
- //std::cout<<"MUL FFT RNS: "<<MEMINFO<<std::endl;
- //std::cout<<"----------------------------------------------"<<std::endl;
+ RNS.convert(1,n_tc,0,c.getWritePointer(),n_tc, t_c_mod, n_tc, _maxnorm);
+ //std::cout<<"RNS OUT COMP done: "<<STR_MEMINFO<<std::endl;
+ DEL_MEM(8*n_tc*num_primes);
delete[] t_c_mod;
-
+#else
+ size_t s_small= s/MEMFACTOR + 1;
+ size_t s_last = s- s_small*(MEMFACTOR-1);
+ size_t n_tc_small= m*n*s_small;
+ size_t n_tc_last = m*n*s_last;
+ {
+ ADD_MEM(8*n_tc_small*num_primes);
+ //std::cout<<"RNS OUT ALLOC done: "<<STR_MEMINFO<<std::endl;
+ double *t_c_mod = new double[n_tc_small*num_primes];
+ for (size_t memiter=0;memiter<MEMFACTOR-1;memiter++){
+ for (size_t l=0;l<num_primes;l++){
+ for (size_t i=0;i<m*n;i++)
+ for (size_t j=0;j<s_small;j++)
+ t_c_mod[l*n_tc_small + (j+i*s_small)]= c_i[l]->get(i,memiter*s_small+j);
+ }
+ // reconstruct the result in C
+ RNS.convert(m*n,s_small,0,c.getWritePointer()+memiter*s_small,s, t_c_mod, n_tc_small, _maxnorm);
+ //std::cout<<"RNS OUT COMP done: "<<STR_MEMINFO<<std::endl;
+ }
+ DEL_MEM(8*n_tc_small*num_primes);
+ delete[] t_c_mod;
+ }
+ {
+ ADD_MEM(8*n_tc_last*num_primes);
+ double *t_c_mod = new double[n_tc_last*num_primes];
+ // perform the last step
+ for (size_t l=0;l<num_primes;l++){
+ for (size_t i=0;i<m*n;i++)
+ for (size_t j=0;j<s_last;j++)
+ t_c_mod[l*n_tc_last + (j+i*s_last)]= c_i[l]->get(i,(MEMFACTOR-1)*s_small+j);
+ delete c_i[l];
+ }
+ // reconstruct the result in C
+ RNS.convert(m*n,s_last,0,c.getWritePointer()+(MEMFACTOR-1)*s_small,s, t_c_mod, n_tc_last, _maxnorm);
+ DEL_MEM(8*n_tc_last*num_primes);
+ delete[] t_c_mod;
+ }
+
+#endif
}
+
+ // std::cout<<"c"<<":="<<c<<";\n";
FFT_PROFILING(2,"k prime reconstruction");
// std::cout<<"CC:="<<c<<std::endl;
// std::cout<<"<-----------------: "<<std::endl;;
}
-
-
-
- // template< typename PMatrix1,typename PMatrix2, typename PMatrix3>
- // void midproduct_crtla(PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b,
- // const integer& maxA, const integer& maxB, const integer& bound,
- // bool smallLeft=true, size_t n0=0, size_t n1=0) {
- // // (convert to MatrixP representation)
- // MatrixP_I a2(field(),a.rowdim(),a.coldim(),a.size());
- // MatrixP_I b2(field(),b.rowdim(),b.coldim(),b.size());
- // a2.copy(a,0,a.size()-1);
- // b2.copy(b,0,b.size()-1);
- // MatrixP_I c2(field(),c.rowdim(),c.coldim(),c.size());
- // midproduct_crtla(c2,a2,b2,maxA,maxB,bound,smallLeft,n0,n1);
- // c.copy(c2,0,c2.size()-1);
- // }
+
// WARNING: Polynomial Matrix should stored as matrix of polynomial with integer coefficient
template< typename PMatrix1,typename PMatrix2, typename PMatrix3>
@@ -444,10 +332,10 @@ namespace LinBox{
linbox_check(c.size()>=deg-hdeg);
if (smallLeft){
- linbox_check(b.size()<hdeg+deg);
+ linbox_check(b.size()<hdeg+deg);
}
else
- linbox_check(a.size()<hdeg+deg);
+ linbox_check(a.size()<hdeg+deg);
//linbox_check(2*c.size()-1 == b.size());
//size_t deg= b.size()+1;
@@ -455,22 +343,11 @@ namespace LinBox{
size_t lpts=0;
size_t pts = 1; while (pts < deg) { pts= pts<<1; ++lpts; }
-
- // compute bit size of feasible prime for FFLAS
- // size_t _k=k,lk=0;
- //while ( _k ) {_k>>=1; ++lk;}
- //size_t prime_bitsize= (53-lk)>>1;
-
// compute max prime value for FFLAS
uint64_t prime_max= std::sqrt( (1ULL<<53) / k)+1;
std::vector<integer> bas;
- getFFTPrime(prime_max,lpts,bound,bas);
- //RandomFFTPrime RdFFT(prime_bitsize);
- // if (!RdFFT.generatePrimes(bound,bas)){
- // std::cout<<"COULD NOT FIND ENOUGH FFT PRIME in MatPoly FFTMUL exiting..."<<std::endl;
- // throw LinboxError("LinBox ERROR: not enough FFT Prime\n");
- // }
-
+ getFFTPrime(prime_max,lpts,bound,bas,k,deg);
+
std::vector<double> basis(bas.size());
std::copy(bas.begin(),bas.end(),basis.begin());
FFPACK::rns_double RNS(basis);
@@ -486,24 +363,21 @@ namespace LinBox{
}
#endif
FFT_PROFILING(2,"init of CRT approach");
- // reduce t_a and t_b modulo each FFT primes
+
size_t n_ta=m*k*a.size(), n_tb=k*n*b.size();
+ std::vector<MatrixP_F*> c_i (num_primes);
+
+#ifndef LOW_MEMORY_PMBASIS
+ // reduce t_a and t_b modulo each FFT primes
+ ADD_MEM(8*(n_ta+n_tb)*num_primes);
double* t_a_mod= new double[n_ta*num_primes];
double* t_b_mod= new double[n_tb*num_primes];
+
RNS.init(1, n_ta, t_a_mod, n_ta, a.getPointer(), n_ta, maxA);
RNS.init(1, n_tb, t_b_mod, n_tb, b.getPointer(), n_tb, maxB);
FFT_PROFILING(2,"reduction mod pi of input matrices");
-
- //std::cout<<"----------------------------------------------"<<std::endl;
- //std::cout<<"MIDP FFT RNS: "<<MEMINFO<<std::endl;
- //std::cout<<"MIDP FFT RNS: need "<<MB((m*n*pts+n_ta+n_tb)*num_primes*8 + 2*(m*k+k*n)*pts*8)<<"Mo"<<std::endl;
-
- //std::cout<<"MIDP FFT RNS: RNS -> allocating "<<MB((n_ta+n_tb)*num_primes*8)<<"Mo"<<std::endl;
- //std::cout<<"MIDP FFT RNS: RNS -> allocating "<<MB((m*n)*pts*num_primes*8)<<"Mo"<<std::endl;
- //std::cout<<"MIDP FFT RNS: "<<MEMINFO<<std::endl;
- std::vector<MatrixP_F*> c_i (num_primes);
for (size_t l=0;l<num_primes;l++){
FFT_PROFILE_START(2);
@@ -526,14 +400,76 @@ namespace LinBox{
else
b_i.ref(i,hdeg-1-j)=t_b_mod[l*n_tb+j+i*b.size()];
FFT_PROFILE_GET(2,tCopy);
- //PolynomialMatrixFFTPrimeMulDomain<ModField> fftdomain (f);
+
PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
fftdomain.midproduct_fft(lpts, *(c_i[l]), a_i, b_i, smallLeft);
-
+
FFT_PROFILE_GET(2,tMul);
- }
+ }
+ DEL_MEM(8*(n_ta+n_tb)*num_primes);
delete[] t_a_mod;
delete[] t_b_mod;
+#else
+ // loop for memory saving
+ size_t CRT_NBPRIME=CRT_SIZE;
+ ADD_MEM(8*(n_ta+n_tb)*CRT_NBPRIME);
+ double* t_a_mod= new double[n_ta*CRT_NBPRIME];
+ double* t_b_mod= new double[n_tb*CRT_NBPRIME];
+
+ for(size_t loop=0;loop<num_primes;loop+=CRT_NBPRIME){
+ // create chunk of RNS
+ size_t rns_chunk=std::min(CRT_NBPRIME,num_primes-loop); // nbr of primes in the current smallRNS basis
+ std::vector<double> smallBasis(rns_chunk);
+ std::copy(basis.begin()+loop,basis.begin()+loop+rns_chunk,smallBasis.begin());
+ FFPACK::rns_double smallRNS(smallBasis);
+ smallRNS.precompute_cst(RNS._ldm);
+ smallRNS.init(1, n_ta, t_a_mod, n_ta, a.getPointer(), n_ta, maxA);
+ smallRNS.init(1, n_tb, t_b_mod, n_tb, b.getPointer(), n_tb, maxB);
+ FFT_PROFILING(2,"reduction mod pi of input matrices");
+
+ for (size_t l=0;l<rns_chunk;l++)
+ {
+ //FFT_PROFILE_START;
+ //std::cout<<"prime: "<<(long)smallRNS._basis[l]<<std::endl;
+ ModField f(smallRNS._basis[l]);
+ MatrixP_F a_i (f, m, k, pts);
+ MatrixP_F b_i (f, k, n, pts);
+ c_i[loop+l] = new MatrixP_F(f, m, n, pts);
+
+ // copy reduced data
+ for (size_t i=0;i<m*k;i++)
+ for (size_t j=0;j<a.size();j++)
+ if (smallLeft)
+ a_i.ref(i,hdeg-1-j)=t_a_mod[l*n_ta+j+i*a.size()];
+ else
+ a_i.ref(i,j)=t_a_mod[l*n_ta+j+i*a.size()];
+ for (size_t i=0;i<k*n;i++)
+ for (size_t j=0;j<b.size();j++)
+ if (smallLeft)
+ b_i.ref(i,j)=t_b_mod[l*n_tb+j+i*b.size()];
+ else
+ b_i.ref(i,hdeg-1-j)=t_b_mod[l*n_tb+j+i*b.size()];
+ FFT_PROFILE_GET(2,tCopy);
+
+ PolynomialMatrixThreePrimesFFTMulDomain<ModField> fftdomain (f);
+ fftdomain.midproduct_fft(lpts, *(c_i[loop+l]), a_i, b_i, smallLeft);
+ FFT_PROFILE_GET(2,tMul);
+
+ }
+ FFT_PROFILING(2,"FFTprime mult+copying");
+ //FFT_PROFILE(2,"copying linear reduced matrix",tCopy);
+ //FFT_PROFILE(2,"FFTprime multiplication",tMul);
+
+ } // end of loop for memory saving
+ DEL_MEM(8*(n_ta+n_tb)*CRT_NBPRIME);
+ delete[] t_a_mod;
+ delete[] t_b_mod;
+
+#endif
+
+
+
+
FFT_PROFILE(2,"copying linear reduced matrix",tCopy);
FFT_PROFILE(2,"FFTprime multiplication",tMul);
@@ -542,41 +478,78 @@ namespace LinBox{
c.copy(*(c_i[0]),0,c.size()-1);
} else {
FFT_PROFILE_START(2);
+
+ size_t s=c.size();
+#ifndef LOW_MEMORY_PMBASIS
// construct contiguous storage for c_i
- double *t_c_mod;
- size_t n_tc=m*n*c.size();
- t_c_mod = new double[n_tc*num_primes];
+ size_t n_tc=m*n*s;
+ ADD_MEM(8*n_tc*num_primes);
+ double *t_c_mod = new double[n_tc*num_primes];
for (size_t l=0;l<num_primes;l++){
for (size_t i=0;i<m*n;i++)
- for (size_t j=0;j<c.size();j++)
- t_c_mod[l*n_tc + (j+i*c.size())]= c_i[l]->get(i,j);
+ for (size_t j=0;j<s;j++)
+ t_c_mod[l*n_tc + (j+i*s)]= c_i[l]->get(i,j);
delete c_i[l];
}
FFT_PROFILING(2,"linearization of results mod pi");
// reconstruct the result in C
- RNS.convert(1,n_tc,0,c.getWritePointer(),n_tc, t_c_mod, n_tc);
- //std::cout<<"MIDP FFT RNS: "<<MEMINFO<<std::endl;
+ RNS.convert(1,n_tc,0,c.getWritePointer(),n_tc, t_c_mod, n_tc, _maxnorm);
+ DEL_MEM(8*n_tc*num_primes);
delete[] t_c_mod;
-
- //std::cout<<"MUL FFT RNS: "<<MEMINFO<<std::endl;
- //std::cout<<"----------------------------------------------"<<std::endl;
-
- FFT_PROFILING(2,"k prime reconstruction");
+#else
+ size_t s_small= s/MEMFACTOR + 1;
+ size_t s_last = s- s_small*(MEMFACTOR-1);
+ size_t n_tc_small= m*n*s_small;
+ size_t n_tc_last = m*n*s_last;
+ {
+ ADD_MEM(8*n_tc_small*num_primes);
+ double *t_c_mod = new double[n_tc_small*num_primes];
+ for (size_t memiter=0;memiter<MEMFACTOR-1;memiter++){
+ for (size_t l=0;l<num_primes;l++){
+ for (size_t i=0;i<m*n;i++)
+ for (size_t j=0;j<s_small;j++)
+ t_c_mod[l*n_tc_small + (j+i*s_small)]= c_i[l]->get(i,memiter*s_small+j);
+ }
+ // reconstruct the result in C
+ RNS.convert(m*n,s_small,0,c.getWritePointer()+memiter*s_small,s, t_c_mod, n_tc_small, _maxnorm);
+ }
+ DEL_MEM(8*n_tc_small*num_primes);
+ delete[] t_c_mod;
+ }
+ {
+ ADD_MEM(8*n_tc_last*num_primes);
+ double *t_c_mod = new double[n_tc_last*num_primes];
+ // perform the last step
+ for (size_t l=0;l<num_primes;l++){
+ for (size_t i=0;i<m*n;i++)
+ for (size_t j=0;j<s_last;j++)
+ t_c_mod[l*n_tc_last + (j+i*s_last)]= c_i[l]->get(i,(MEMFACTOR-1)*s_small+j);
+ delete c_i[l];
+ }
+ // reconstruct the result in C
+ RNS.convert(m*n,s_last,0,c.getWritePointer()+(MEMFACTOR-1)*s_small,s, t_c_mod, n_tc_last, _maxnorm);
+ DEL_MEM(8*n_tc_last*num_primes);
+ delete[] t_c_mod;
+ }
+
+#endif
}
}
+
};
/***************************************************************************
**** Polynomial Matrix Multiplication over Fp[x], with p multiprecision ***
***************************************************************************/
- template <>
- class PolynomialMatrixFFTMulDomain<Givaro::Modular<integer> > {
+ template <size_t K, size_t L>
+ class PolynomialMatrixFFTMulDomain<Givaro::Modular<RecInt::ruint<K>,RecInt::ruint<L> > > {
public:
- typedef Givaro::Modular<integer> Field;
+ typedef Givaro::Modular<RecInt::ruint<K>,RecInt::ruint<L> > Field;
typedef typename Field::Element Element;
- typedef Givaro::ZRing<integer> IntField;
+ typedef Givaro::ZRing<RecInt::ruint<L>> IntField;
+
// Polynomial matrix stored as a polynomial of matrix
typedef PolynomialMatrix<PMType::polfirst,PMStorage::plain,Field> MatrixP_F;
// Polynomial matrix stored as a polynomial of matrix
@@ -584,17 +557,17 @@ namespace LinBox{
private:
const Field *_field; // Read only
- integer _p;
-
+ RecInt::ruint<K> _p;
+
public:
inline const Field & field() const { return *_field; }
-
+
PolynomialMatrixFFTMulDomain(const Field &F) : _field(&F) {
- field().cardinality(_p);
+ _p=field().cardinality();
}
template<typename Matrix1, typename Matrix2, typename Matrix3>
- void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b) {
+ void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b, size_t max_rowdeg=0) {
FFT_PROFILE_START(2);
MatrixP_F a2(field(),a.rowdim(),a.coldim(),a.size());
MatrixP_F b2(field(),b.rowdim(),b.coldim(),b.size());
@@ -602,7 +575,7 @@ namespace LinBox{
a2.copy(a,0,a.size()-1);
b2.copy(b,0,b.size()-1);
FFT_PROFILING(2,"converting rep of input");
- mul(c2,a2,b2);
+ mul(c2,a2,b2, max_rowdeg);
FFT_PROFILE_START(2);
c.copy(c2,0,c.size()-1);
FFT_PROFILING(2,"converting rep of output");
@@ -610,23 +583,21 @@ namespace LinBox{
}
// Matrix with polynomials
- void mul (MatrixP_F &c, const MatrixP_F &a, const MatrixP_F &b) {
-
+ void mul (MatrixP_F &c, const MatrixP_F &a, const MatrixP_F &b, size_t max_rowdeg=0) {
FFT_PROFILE_START(2);
- IntField Z;
- PolynomialMatrixFFTMulDomain<IntField> Zmul(Z,_p);
- integer bound=2*_p*_p*integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));
- //Zmul.mul_crtla(c,a,b,_p,_p,bound);
- Zmul.mul_crtla2(c,a,b,_p,_p,bound);
+ IntField Z;
+ Givaro::Integer pp(_p);
+ //std::cerr<<"FFT RECINT MUL 1: "<<c.size()<<" -> "<<a.size()<<"x"<<b.size()<<" "<<STR_MEMINFO<<MEMINFO<<std::endl;
+ PolynomialMatrixFFTMulDomain<IntField> Zmul(Z,pp);
+ integer bound=pp*pp*integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));
+ Zmul.mul_crtla(c,a,b,_p,_p,bound, max_rowdeg);
+ //std::cerr<<"FFT RECINT MUL 2: "<<c.size()<<" -- "<<STR_MEMINFO<<MEMINFO<<std::endl;
- // reduce the result mod p
- FFT_PROFILE_START(2);
- for (size_t i=0;i<c.rowdim()*c.coldim();i++)
- for (size_t j=0;j<c.size();j++)
- c.ref(i,j)%=_p;
FFT_PROFILING(2,"reduction mod p of output");
}
+
+
template<typename Matrix1, typename Matrix2, typename Matrix3>
void midproduct (Matrix1 &c, const Matrix2 &a, const Matrix3 &b,
bool smallLeft=true, size_t n0=0, size_t n1=0) {
@@ -638,26 +609,30 @@ namespace LinBox{
MatrixP_F c2(field(),c.rowdim(),c.coldim(),c.size());
midproduct(c2,a2,b2,smallLeft,n0,n1);
c.copy(c2,0,c.size()-1);
- }
+ }
void midproduct (MatrixP_F &c, const MatrixP_F &a, const MatrixP_F &b,
bool smallLeft=true, size_t n0=0, size_t n1=0) {
+ FFT_PROFILE_START(2);
IntField Z;
- PolynomialMatrixFFTMulDomain<IntField> Zmul(Z,_p);
- //const MatrixP_I* a2 = reinterpret_cast<const MatrixP_I*>(&a);
- //const MatrixP_I* b2 = reinterpret_cast<const MatrixP_I*>(&b);
- //MatrixP_I* c2 = reinterpret_cast<MatrixP_I*>(&c);
- //Zmul.midproduct(*c2,*a2,*b2,smallLeft,n0,n1);
+ Givaro::Integer pp(_p);
+ PolynomialMatrixFFTMulDomain<IntField> Zmul(Z,pp);
+ //MatrixP_I c2(Zmul,c.rowdim(),c.coldim(),c.size());
+ //Zmul.midproduct(c2,a,b,smallLeft,n0,n1);
Zmul.midproduct(c,a,b,smallLeft,n0,n1);
+
// reduce the result mod p
- FFT_PROFILE_START(2);
- for (size_t i=0;i<c.rowdim()*c.coldim();i++)
- for (size_t j=0;j<c.size();j++)
- c.ref(i,j)%=_p;
+ // FFT_PROFILE_START(2);
+ // for (size_t i=0;i<c.rowdim()*c.coldim();i++)
+ // for (size_t j=0;j<c.size();j++)
+ // c.ref(i,j)=integer(c2.ref(i,j))%pp;
FFT_PROFILING(2,"reduction mod p of output");
}
};
+
+
+
diff --git a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-fast.inl b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-fast.inl
index 50e260d..0136bf4 100644
--- a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-fast.inl
+++ b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-fast.inl
@@ -60,11 +60,11 @@ namespace LinBox {
: _field(&F), _p(field().cardinality()), _BMD(F){}
template<typename Matrix1, typename Matrix2, typename Matrix3>
- void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b) {
+ void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b, size_t max_rowdeg=0) {
linbox_check(a.coldim()==b.rowdim());
- size_t deg = a.size()+b.size()-1;
+ size_t deg = (max_rowdeg?max_rowdeg:a.size()+b.size()-2); //size_t deg = a.size()+b.size()-1;
size_t lpts = 0;
- size_t pts = 1; while (pts < deg) { pts= pts<<1; ++lpts; }
+ size_t pts = 1; while (pts <= deg) { pts= pts<<1; ++lpts; }
// padd the input a and b to 2^lpts (convert to MatrixP representation)
MatrixP a2(field(),a.rowdim(),a.coldim(),pts);
MatrixP b2(field(),b.rowdim(),b.coldim(),pts);
@@ -72,14 +72,14 @@ namespace LinBox {
b2.copy(b,0,b.size()-1);
MatrixP c2(field(),c.rowdim(),c.coldim(),pts);
mul_fft (lpts,c2, a2, b2);
- c.copy(c2,0,deg-1);
+ c.copy(c2,0,deg);
}
- void mul (MatrixP &c, const MatrixP &a, const MatrixP &b) {
+ void mul (MatrixP &c, const MatrixP &a, const MatrixP &b, size_t max_rowdeg=0) {
linbox_check(a.coldim()==b.rowdim());
- size_t deg = a.size()+b.size()-1;
+ size_t deg = (max_rowdeg?max_rowdeg:a.size()+b.size()-2); //size_t deg = a.size()+b.size()-1;
size_t lpts = 0;
- size_t pts = 1; while (pts < deg) { pts= pts<<1; ++lpts; }
+ size_t pts = 1; while (pts <= deg) { pts= pts<<1; ++lpts; }
// padd the input a and b to 2^lpts
MatrixP a2(field(),a.rowdim(),a.coldim(),pts);
@@ -89,7 +89,7 @@ namespace LinBox {
// resize c to 2^lpts
c.resize(pts);
mul_fft (lpts,c, a2, b2);
- c.resize(deg);
+ c.resize(deg+1);
}
// a,b and c must have size: 2^lpts
diff --git a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-three-primes.inl b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-three-primes.inl
index e68d0dc..daed2de 100644
--- a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-three-primes.inl
+++ b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-three-primes.inl
@@ -38,7 +38,7 @@ namespace LinBox {
/***********************************************************************************
**** Polynomial Matrix Multiplication over Zp[x] with p (FFLAS prime) ***
- ***********************************************************************************/
+ *********************************x**************************************************/
template<class Field>
class PolynomialMatrixThreePrimesFFTMulDomain {
public:
@@ -65,39 +65,48 @@ namespace LinBox {
}
template<typename Matrix1, typename Matrix2, typename Matrix3>
- void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b) {
+ void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b, size_t max_rowdeg=0) {
linbox_check(a.coldim()==b.rowdim());
- size_t deg = a.size()+b.size()-1;
+ // deg is the max rowdegree of the product
+ size_t deg = (max_rowdeg?max_rowdeg:a.size()+b.size()-2); //size_t deg = a.size()+b.size()-1;
+ c.resize(deg+1);
size_t lpts = 0;
- size_t pts = 1; while (pts < deg) { pts= pts<<1; ++lpts; }
+ size_t pts = 1; while (pts <= deg) { pts= pts<<1; ++lpts; }
// padd the input a and b to 2^lpts (convert to MatrixP representation)
MatrixP a2(field(),a.rowdim(),a.coldim(),pts);
MatrixP b2(field(),b.rowdim(),b.coldim(),pts);
- a2.copy(a,0,a.size()-1);
- b2.copy(b,0,b.size()-1);
+ a2.copy(a,0,a.degree());
+ b2.copy(b,0,b.degree());
MatrixP c2(field(),c.rowdim(),c.coldim(),pts);
- mul_fft (lpts,c2, a2, b2);
- c.copy(c2,0,deg-1);
+ integer bound=integer(_p-1)*integer(_p-1)
+ *integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));
+ mul_fft (lpts,c2, a2, b2, bound);
+ c.copy(c2,0,deg);
}
- void mul (MatrixP &c, const MatrixP &a, const MatrixP &b) {
+ void mul (MatrixP &c, const MatrixP &a, const MatrixP &b, size_t max_rowdeg=0) {
linbox_check(a.coldim()==b.rowdim());
- size_t deg = a.size()+b.size()-1;
+ // deg is the max rowdegree of the product
+ size_t deg = (max_rowdeg?max_rowdeg:a.size()+b.size()-2); //size_t deg = a.size()+b.size()-1;
size_t lpts = 0;
- size_t pts = 1; while (pts < deg) { pts= pts<<1; ++lpts; }
+ size_t pts = 1; while (pts <= deg) { pts= pts<<1; ++lpts; }
// padd the input a and b to 2^lpts
MatrixP a2(field(),a.rowdim(),a.coldim(),pts);
MatrixP b2(field(),b.rowdim(),b.coldim(),pts);
- a2.copy(a,0,a.size()-1);
- b2.copy(b,0,b.size()-1);
+ a2.copy(a,0,a.degree());
+ b2.copy(b,0,b.degree());
// resize c to 2^lpts
c.resize(pts);
- mul_fft (lpts,c, a2, b2);
- c.resize(deg);
- }
+ integer bound=integer(_p-1)*integer(_p-1)
+ *integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));
- void mul_fft (size_t lpts, MatrixP &c, MatrixP &a, MatrixP &b) {
- size_t pts=c.size();
+ mul_fft (lpts,c, a2, b2, bound);
+ c.resize(deg+1);
+ }
+
+ // a,b and c must have size: 2^lpts
+ void mul_fft (size_t lpts, MatrixP &c, MatrixP &a, MatrixP &b, const integer& bound) {
+ size_t pts=c.size();
if ((_p-1) % pts == 0){
PolynomialMatrixFFTPrimeMulDomain<ModField> fftprime_domain (field());
fftprime_domain.mul_fft(lpts,c,a,b);
@@ -110,15 +119,7 @@ namespace LinBox {
size_t k = a.coldim();
size_t n = b.coldim();
-
- integer bound=integer((uint64_t)_p)*integer((uint64_t)_p)*integer((uint64_t)k)*integer((uint64_t)pts);
- // compute bit size of feasible prime for FFLAS
- // size_t _k=k,lk=0;
- // while ( _k ) {_k>>=1; ++lk;}
- // size_t prime_bitsize= (53-lk)>>1;
-
- // compute max prime value for FFLAS
- uint64_t prime_max= std::sqrt( (1ULL<<53) / k)+1;
+ uint64_t prime_max=maxFFTPrimeValue(k,pts); // CAREFUL: only for Modular<double>;
RandomFFTPrime RdFFT(prime_max);
std::vector<integer> bas;
if (!RdFFT.generatePrimes(lpts,bound,bas)){
@@ -202,8 +203,10 @@ namespace LinBox {
for (size_t j=0;j<b2.rowdim()*b2.coldim();j++)
for (size_t i=0;i<hdeg/2;i++)
std::swap(b2.ref(j,i),b2.ref(j,hdeg-1-i));
-
- midproduct_fft (lpts,c2, a2, b2, smallLeft);
+ integer bound=integer(_p-1)*integer(_p-1)
+ *integer((uint64_t)a.coldim())*integer((uint64_t)std::min(a.size(),b.size()));
+
+ midproduct_fft (lpts,c2, a2, b2, bound, smallLeft);
c.copy(c2,0,c.size()-1);
}
@@ -211,7 +214,7 @@ namespace LinBox {
// a,b and c must have size: 2^lpts
// -> a must have been already reversed according to the midproduct algorithm
void midproduct_fft (size_t lpts, MatrixP &c, MatrixP &a, MatrixP &b,
- bool smallLeft=true) {
+ const integer& bound, bool smallLeft=true) {
size_t pts=c.size();
if ((_p-1) % pts == 0){
PolynomialMatrixFFTPrimeMulDomain<ModField> fftprime_domain (field());
@@ -222,15 +225,15 @@ namespace LinBox {
size_t k = a.coldim();
size_t n = b.coldim();
- integer bound=integer(_p)*integer(_p)*integer((uint64_t)k)*integer((uint64_t)pts);
-
// compute bit size of feasible prime for FFLAS
// size_t _k=k,lk=0;
// while ( _k ) {_k>>=1; ++lk;}
// size_t prime_bitsize= (53-lk)>>1;
// compute max prime value for FFLAS
- uint64_t prime_max= std::sqrt( (1ULL<<53) / k)+1;
+ //uint64_t prime_max= std::min(uint64_t(std::sqrt( (1ULL<<53) / k)+1), uint64_t(Givaro::Modular<double>::maxCardinality()))
+ uint64_t prime_max=maxFFTPrimeValue(k,pts); // CAREFUL: only for Modular<double>;
+
RandomFFTPrime RdFFT(prime_max);
std::vector<integer> bas;
diff --git a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize.inl b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize.inl
index c09f31e..f0f4338 100644
--- a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize.inl
+++ b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize.inl
@@ -55,16 +55,19 @@ namespace LinBox {
PolynomialMatrixFFTMulDomain (const Field& F) : _field(&F), _p(F.cardinality()) {}
template<typename Matrix1, typename Matrix2, typename Matrix3>
- void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b) {
- uint64_t pts= 1<<(integer((uint64_t)a.size()+b.size()-1).bitsize());
+ void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b, size_t max_rowdeg=0) {
+ size_t deg = (max_rowdeg?max_rowdeg:a.size()+b.size()-2); //size_t deg = a.size()+b.size()-1;
+ c.resize(deg+1);
+ size_t lpts = 0;
+ size_t pts = 1; while (pts <= deg) { pts= pts<<1; ++lpts; }
if ( _p< 536870912ULL && ((_p-1) % pts)==0){
PolynomialMatrixFFTPrimeMulDomain<Field> MulDom(field());
- MulDom.mul(c,a,b);
+ MulDom.mul(c,a,b, max_rowdeg);
}
else {
if (_p< 536870912ULL){
PolynomialMatrixThreePrimesFFTMulDomain<Field> MulDom(field());
- MulDom.mul(c,a,b);
+ MulDom.mul(c,a,b, max_rowdeg);
}
else {
// use computation with Givaro::Modular<integer>
@@ -75,11 +78,11 @@ namespace LinBox {
MatrixP_L a2(Fp,a.rowdim(),a.coldim(),a.size());
MatrixP_L b2(Fp,b.rowdim(),b.coldim(),b.size());
MatrixP_L c2(Fp,c.rowdim(),c.coldim(),c.size());
- a2.copy(a,0,a.size()-1);
- b2.copy(b,0,b.size()-1);
+ a2.copy(a,0,a.degree());
+ b2.copy(b,0,b.degree());
FFT_PROFILING(2,"converting rep of polynomial matrix input");
- MulDom.mul(c2,a2,b2);
- c.copy(c2,0,c.size()-1);
+ MulDom.mul(c2,a2,b2, max_rowdeg);
+ c.copy(c2,0,c.degree());
FFT_PROFILING(2,"converting rep of polynomial matrix output");
}
}
diff --git a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft.h b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft.h
index 8e7a06d..772ff88 100755
--- a/linbox/algorithms/polynomial-matrix/matpoly-mult-fft.h
+++ b/linbox/algorithms/polynomial-matrix/matpoly-mult-fft.h
@@ -52,8 +52,8 @@ Givaro::Timer mychrono[3];
mychrono[lvl].stop();std::cout<<"FFT("<<lvl<<"):"; \
std::cout.width(FFT_PROF_MSG_SIZE);std::cout<<std::left<<msg<<" : "; \
std::cout.precision(6);std::cout<<mychrono[lvl]<<std::endl; \
- mychrono[lvl].clear();mychrono[lvl].start(); \
-}
+ mychrono[lvl].clear();mychrono[lvl].start(); \
+ }
#ifdef HAVE_OPENMP
#define FFT_PROFILE_GET(lvl,x) \
@@ -63,11 +63,11 @@ Givaro::Timer mychrono[3];
mychrono[lvl].stop();(x)+=mychrono[lvl].usertime();mychrono[lvl].clear();mychrono[lvl].start();
#endif
#define FFT_PROFILE(lvl,msg,x) \
-if ((lvl)>=FFT_PROF_LEVEL) { \
- std::cout<<"FFT: "; \
- std::cout.width(FFT_PROF_MSG_SIZE);std::cout<<std::left<<msg<<" : "; \
- std::cout.precision(6);std::cout<<x<<" s"<<std::endl; \
-}
+ if ((lvl)>=FFT_PROF_LEVEL) { \
+ std::cout<<"FFT: "; \
+ std::cout.width(FFT_PROF_MSG_SIZE);std::cout<<std::left<<msg<<" : "; \
+ std::cout.precision(6);std::cout<<x<<" s"<<std::endl; \
+ }
#else
#define FFT_PROFILE_START(lvl)
#define FFT_PROFILING(lvl,msg)
@@ -82,33 +82,94 @@ if ((lvl)>=FFT_PROF_LEVEL) { \
namespace LinBox
{
- // generic handler for multiplication using FFT
- template <class Field>
- class PolynomialMatrixFFTMulDomain {
- public:
- inline const Field & field() const;
+// generic handler for multiplication using FFT
+ template <class Field>
+ class PolynomialMatrixFFTMulDomain {
+ public:
+ inline const Field & field() const;
- PolynomialMatrixFFTMulDomain (const Field& F);
+ PolynomialMatrixFFTMulDomain (const Field& F);
- template<typename Matrix1, typename Matrix2, typename Matrix3>
- void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b);
+ template<typename Matrix1, typename Matrix2, typename Matrix3>
+ void mul (Matrix1 &c, const Matrix2 &a, const Matrix3 &b);
- template<typename Matrix1, typename Matrix2, typename Matrix3>
- void midproduct (Matrix1 &c, const Matrix2 &a, const Matrix3 &b, bool smallLeft=true, size_t n0=0,size_t n1=0);
- };
+ template<typename Matrix1, typename Matrix2, typename Matrix3>
+ void midproduct (Matrix1 &c, const Matrix2 &a, const Matrix3 &b, bool smallLeft=true, size_t n0=0,size_t n1=0);
+ };
- //class PolynomialMatrixFFTPrimeMulDomain ; // Mul in Zp[x] with p <2^32, (fflas, fourier)
+ //class PolynomialMatrixFFTPrimeMulDomain ; // Mul in Zp[x] with p <2^32, (fflas, fourier)
- // template <class T>
- // class PolynomialMatrixFFTMulDomain<Givaro::Modular<T> > ; // Mul in Zp[x] with p^2 storable in type T
-
- // template<>
- // class PolynomialMatrixFFTMulDomain<Givaro::ZRing<integer> >; // Mul in Z[x]
-
- // template <>
- // class PolynomialMatrixFFTMulDomain<Givaro::Modular<integer> > ; // Mul in Zp[x] with p multiprecision
+ // template <class T>
+ // class PolynomialMatrixFFTMulDomain<Givaro::Modular<T> > ; // Mul in Zp[x] with p^2 storable in type T
+
+ // template<>
+ // class PolynomialMatrixFFTMulDomain<Givaro::ZRing<integer> >; // Mul in Z[x]
+
+ // template <>
+ // class PolynomialMatrixFFTMulDomain<Givaro::Modular<integer> > ; // Mul in Zp[x] with p multiprecision
+
+ // get the maximum prime for fft with modular<double> (matrix dim =k, nbr point = pts)
+ uint64_t maxFFTPrimeValue(uint64_t k, uint64_t pts) {
+ uint64_t prime_max=std::sqrt( (1ULL<<53) /k)+1;
+ size_t c=1;
+ const int fct=24;
+ while (c<k && prime_max < (1UL<<26) && prime_max< pts*fct){
+ prime_max=std::sqrt( (1ULL<<53) /(k/c))+1;
+ c<<=1;
+ }
+
+ //std::cout<<"maxFFTPrime: pts -> "<<pts<<std::endl;
+ //std::cout<<"maxFFTPrime: replacing "<<k<<" -> "<<k/c<<std::endl;
+
+ if (c>=k){
+ std::cout<<"MatPoly FFT (maxPrimeValue): impossible to find enough FFT Prime\n";
+ std::terminate();
+ }
+
+ return std::min(prime_max, uint64_t(Givaro::Modular<double>::maxCardinality()));
+ }
+
+ void getFFTPrime(uint64_t prime_max, size_t lpts, integer bound, std::vector<integer> &bas, size_t k, size_t d){
+
+ RandomFFTPrime RdFFT(prime_max);
+ size_t nbp=0;
+
+ if (!RdFFT.generatePrimes(lpts,bound,bas)){ // not enough FFT prime found
+ integer MM=1;
+ for(std::vector<integer>::size_type i=0;i<bas.size();i++){
+ MM*=bas[i];
+ //std::cout<<bas[i]<<std::endl;
+ }
+
+ // compute max bitsize for prime allowing three prime fft
+ integer prime_max_tp=MM/uint64_t(d*k);
+ while (k>1 && prime_max_tp<100) {k/=2;prime_max_tp*=2;}
+ if (k<=1) {std::cout<<"getFFTPrime error: impossible to have enough primes satisfying constraints: FFLAS prime (<2^26) and FFT (2^"<<lpts<<")\n";}
+
+ RandomPrimeIter Rd(std::min(prime_max_tp.bitsize()/2,integer(prime_max).bitsize())-1);
+#ifdef VERBOSE_FFT
+ std::cout<<"MM="<<MM<<std::endl;
+ std::cout<<"normal primemax: "<<prime_max_tp<<" "<<prime_max<<std::endl;
+ std::cout<<"normal prime bitmax: "<<std::min(prime_max_tp.bitsize()/2,integer(prime_max).bitsize()-1)<<std::endl;
+#endif
+ integer tmp;
+ do {
+ do {Rd.random(tmp);}
+ while (MM%tmp==0 || tmp>prime_max);
+ bas.push_back(tmp);
+ nbp++;
+ MM*=tmp;
+ } while (MM<bound);
+ }
+#ifdef VERBOSE_FFT
+ std::cout<<"MatPoly Multiprecision FFT : using "<<bas.size()-nbp<<" FFT primes and "<<nbp<<" normal primes "<<std::endl;
+#endif
+ for(auto i: bas)
+ if (i>prime_max) std::cout<<"ERROR\n";
+ }
+
} // end of namespace LinBox
#include "linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-fast.inl"
diff --git a/linbox/algorithms/polynomial-matrix/order-basis.h b/linbox/algorithms/polynomial-matrix/order-basis.h
index 4affeb2..55f2744 100755
--- a/linbox/algorithms/polynomial-matrix/order-basis.h
+++ b/linbox/algorithms/polynomial-matrix/order-basis.h
@@ -22,8 +22,17 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* ========LICENCE========
*/
+
+
#include "linbox/matrix/dense-matrix.h"
#include "linbox/matrix/polynomial-matrix.h"
+
+
+#ifdef TRACK_MEMORY_MATPOL
+#define MEMINFO2 STR_MEMINFO<<MEMINFO
+#else
+#define MEMINFO2 ""
+#endif
#include "linbox/algorithms/polynomial-matrix/polynomial-matrix-domain.h"
#include <vector>
#include <algorithm>
@@ -32,14 +41,16 @@
#define MBASIS_THRESHOLD_LOG 5
#define MBASIS_THRESHOLD (1<<MBASIS_THRESHOLD_LOG)
+
namespace LinBox {
#ifdef __CHECK_ORDERBASIS
#define __CHECK_MBASIS
#define __CHECK_PMBASIS
#endif
-
-#if (__CHECK_MBASIS) or (__CHECK_PMBASIS)
+
+
+#if defined (__CHECK_MBASIS) or defined (__CHECK_PMBASIS)
#include <string>
template<typename Field, typename Mat>
std::string check_orderbasis(const Field& F, const Mat& sigma, const Mat& serie, size_t ord){
@@ -51,15 +62,15 @@ namespace LinBox {
std::string msg(".....");
bool nul_sigma=true;
while(i<ord && MD.isZero(T[i])){
- if (!MD.isZero(sigma[i])) nul_sigma=false;
+ if (i<sigma.size() && !MD.isZero(sigma[i])) nul_sigma=false;
i++;
}
if (i<ord){
std::cout<<"error at degree="<<i<<std::endl;
T[i].write(std::cout, Tag::FileFormat::Plain);
std::cout<<"***"<<std::endl;
- std::cout<<serie<<std::endl;
- std::cout<<sigma<<std::endl;
+ //std::cout<<serie<<std::endl;
+ //std::cout<<sigma<<std::endl;
exit(1);
}
@@ -99,7 +110,7 @@ namespace LinBox {
void reset() {_count=0;_val=0;}
};
- template<class Field, class ET=EarlyTerm<-1> >
+ template<class Field, class ET=EarlyTerm<(size_t) -1> >
class OrderBasis {
public:
typedef PolynomialMatrix<PMType::polfirst,PMStorage::plain,Field> MatrixP;
@@ -110,7 +121,7 @@ namespace LinBox {
BlasMatrixDomain<Field> _BMD;
ET _EarlyStop;
public:
-#if 1 or (PROFILE_PMBASIS) or (__CHECK_MBASIS)or (__CHECK_PMBASIS)
+#if defined(PROFILE_PMBASIS) or defined(__CHECK_MBASIS) or defined(__CHECK_PMBASIS)
size_t _idx=0;
size_t _target=0;
double _eta=0.;
@@ -138,7 +149,6 @@ namespace LinBox {
// serie must have exactly order elements (i.e. its degree = order-1)
// sigma can have at most order+1 elements (i.e. its degree = order)
- // BEWARE: serie can be modified
template<typename PMatrix1, typename PMatrix2>
size_t PM_Basis(PMatrix1 &sigma,
const PMatrix2 &serie,
@@ -146,15 +156,15 @@ namespace LinBox {
std::vector<size_t> &shift)
{
-#if 1 or (PROFILE_PMBASIS)
- //std::cout<<"Start PM-Basis : "<<order<<" ("<<_idx<<"/"<<_target<<")] : "<<std::endl;//MEMINFO<<std::endl;
+#ifdef PROFILE_PMBASIS
+ //std::cout<<"Start PM-Basis : "<<order<<" ("<<_idx<<"/"<<_target<<")] : "<<std::endl;//MEMINFO2<<std::endl;
if (_target==0) _target=order;
if (!_started) {_started=true; _start = std::chrono::system_clock::now();}
std::chrono::time_point<std::chrono::system_clock> _chrono_start=std::chrono::system_clock::now();
#endif
if (order <= MBASIS_THRESHOLD) {
-#if 1 or (PROFILE_PMBASIS) or (__CHECK_PMBASIS)
+#if defined (PROFILE_PMBASIS) or defined(__CHECK_PMBASIS)
_idx+=order;
#endif
return M_Basis(sigma, serie, order, shift);
@@ -174,52 +184,53 @@ namespace LinBox {
integer p;
// first recursive call
- PMatrix1 sigma1(field(),m,n,ord1+1);
-
+ PMatrix1 sigma1(field(),m,n,ord1+1);
+
#ifdef MEM_PMBASIS
- std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [Sigma1] -> "<<MB(m*n*(ord1+1)*length(field().characteristic(p)))<<"Mo"<<MEMINFO<<std::endl;
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [Sigma1] -> "<<MB(sigma1.realmeminfo())<<"Mo"<<MEMINFO2<<std::endl;
#endif
- //typename PMatrix2::const_view serie1=serie.at(0,ord1-1);
- PMatrix2 *serie1=new PMatrix2(field(),n,k,ord1);
+ PMatrix2 *serie1 = new PMatrix2(field(),n,k,ord1);
#ifdef MEM_PMBASIS
- std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [Serie1] -> "<<MB(n*k*ord1*length(field().characteristic(p)))<<"Mo"<<MEMINFO<<std::endl;
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [Serie1] -> "<<MB(serie1->realmeminfo())<<"Mo"<<MEMINFO2<<std::endl;
#endif
serie1->copy(serie,0,ord1-1);
d1 = PM_Basis(sigma1, *serie1, ord1, shift);
+ //DEL_MEM(serie1->realmeminfo())
delete serie1;
if (_EarlyStop.terminated()){
sigma=sigma1;
return d1;
}
-
+
// compute the serie update
// TODO: for Block Wiedemann, this step can use only the first column of sigma
PMatrix2 *serie2=new PMatrix2(field(),n,k,ord2);//serie2 size=ord1+1 -> midproduct)
+ //ADD_MEM(serie2->realmeminfo());
#ifdef MEM_PMBASIS
- std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [Serie2] -> "<<MB(n*k*ord2*length(field().characteristic(p)))<<"Mo"<<MEMINFO<<std::endl;
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [Serie2] -> "<<MB(serie2->realmeminfo())<<"Mo"<<MEMINFO2<<std::endl;
#endif
_PMD.midproductgen(*serie2, sigma1, serie, true, ord1+1,ord1+ord2);
+
#ifdef PROFILE_PMBASIS
//chrono.stop();
- //std::cout<<" -> serie update "<<sigma1.size()<<"x"<<order<<" --> "<<chrono.usertime()<<std::endl;//MEMINFO<<std::endl;
+ //std::cout<<" -> serie update "<<sigma1.size()<<"x"<<order<<" --> "<<chrono.usertime()<<std::endl;//MEMINFO2<<std::endl;
//chrono.clear();chrono.start();
#endif
// second recursive call
-
+
PMatrix1 sigma2(field(),m,n,ord2+1);
#ifdef MEM_PMBASIS
- std::cerr<<"[PM-Basis("<<order<<") "<<_idx<<"/"<<_target<<"] [Sigma2] -> "<<MB(m*n*(ord1+1)*length(field().characteristic(p)))<<"Mo"<<MEMINFO<<std::endl;
+ std::cerr<<"[PM-Basis("<<order<<") "<<_idx<<"/"<<_target<<"] [Sigma2] -> "<<MB(sigma2.realmeminfo())<<"Mo"<<MEMINFO2<<std::endl;
#endif
d2 = PM_Basis(sigma2, *serie2, ord2, shift);
delete serie2;
// compute the result
_PMD.mul(sigma, sigma2, sigma1);
- //sigma.resize(d1+d2+1);
- sigma.setsize(d1+d2+1);
+ sigma.resize(d1+d2+1);
#ifdef PROFILE_PMBASIS
//chrono.stop();
- //std::cout<<" -> basis product "<<sigma1.size()<<"x"<<sigma2.size()<<" = "<<d1+d2+1<<" -->"<<chrono.usertime()<<MEMINFO<<std::endl;
+ //std::cout<<" -> basis product "<<sigma1.size()<<"x"<<sigma2.size()<<" = "<<d1+d2+1<<" -->"<<chrono.usertime()<<MEMINFO2<<std::endl;
#endif
#ifdef __CHECK_PMBASIS
@@ -237,7 +248,7 @@ namespace LinBox {
_eta=(_eta!=0.0?std::min(_eta,tcomp*magicnumber):tcomp*magicnumber);
std::cerr<<"[PM-Basis : "<<order<<" ("<<_idx<<"/"<<_target<<")] : "<<chrono.usertime()
- << " (ETA: "<< telap<<"s / "<<_eta<<"s)"<<MEMINFO<<std::endl;
+ << " (ETA: "<< telap<<"s / "<<_eta<<"s)"<<MEMINFO2<<std::endl;
chrono.clear();chrono.start();
#endif
@@ -261,7 +272,9 @@ namespace LinBox {
return d;
}
-
+
+
+
// serie must have exactly order elements (i.e. its degree = order-1)
template<typename PMatrix1, typename PMatrix2>
size_t M_Basis(PMatrix1 &sigma,
@@ -628,8 +641,170 @@ namespace LinBox {
// cout<<"Early termination at order "<<sss<<" ("<<order<<")"<<endl;
}
+#ifdef LOW_MEMORY_PMBASIS
+ // serie must have exactly order elements (i.e. its degree = order-1)
+ // sigma can have at most order+1 elements (i.e. its degree = order)
+ // !!! sigma is not allocated apriori !!!
+ template<typename PMatrix1, typename PMatrix2>
+ size_t PM_Basis_low(PMatrix1* &sigma_ptr,
+ const PMatrix2 *serie_ptr,
+ size_t order,
+ std::vector<size_t> &shift)
+ {
+
+#ifdef PROFILE_PMBASIS
+ //std::cout<<"Start PM-Basis : "<<order<<" ("<<_idx<<"/"<<_target<<")] : "<<std::endl;//MEMINFO2<<std::endl;
+ if (_target==0) _target=order;
+ if (!_started) {_started=true; _start = std::chrono::system_clock::now();}
+ std::chrono::time_point<std::chrono::system_clock> _chrono_start=std::chrono::system_clock::now();
+#endif
+
+ if (order <= MBASIS_THRESHOLD) {
+#if defined (PROFILE_PMBASIS) or defined(__CHECK_PMBASIS)
+ _idx+=order;
+#endif
+ sigma_ptr = new PMatrix1(field(),serie_ptr->rowdim(),serie_ptr->rowdim(),order+1);
+ size_t res= M_Basis(*sigma_ptr, *serie_ptr, order, shift);
+ delete serie_ptr;
+ return res;
+ }
+ else {
+#ifdef PROFILE_PMBASIS
+ Timer chrono;
+ chrono.start();
+#endif
+ size_t ord1,ord2,d1,d2;
+ ord1 = order>>1;
+ ord2 = order-ord1; // ord1+ord2=order
+ size_t m,n,k;
+ m=serie_ptr->rowdim();
+ n=serie_ptr->rowdim();
+ k=serie_ptr->coldim();
+ integer p;
+
+ // first recursive call
+ PMatrix1 *sigma1_ptr, *sigma2_ptr;
+ PMatrix2 *serie1_ptr, *serie2_ptr;
+
+ // Allocate serie1
+ serie1_ptr= new PMatrix2(field(),n,k,ord1);
+#ifdef MEM_PMBASIS
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [ALLOC Serie1] -> "<<MB(serie1_ptr->realmeminfo())<<"Mo"<<MEMINFO2<<std::endl;
+#endif
+ serie1_ptr->copy(*serie_ptr,0,ord1-1);
+ d1 = PM_Basis_low(sigma1_ptr, serie1_ptr, ord1, shift);
+ // no more needed
+ // delete serie1_ptr;
+
+#ifdef MEM_PMBASIS
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [DEL Serie1] -> "<<MEMINFO2<<std::endl;
+#endif
+
+
+ if (_EarlyStop.terminated()){
+ sigma_ptr=sigma1_ptr;
+ delete serie_ptr;
+ return d1;
+ }
+
+ // Allocate serie2
+ serie2_ptr=new PMatrix2(field(),n,k,ord2);//serie2 size=ord1+1 -> midproduct)
+#ifdef MEM_PMBASIS
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [ALLOC Serie2] -> "<<MB(serie2_ptr->realmeminfo())<<"Mo"<<MEMINFO2<<std::endl;
+#endif
+
+ _PMD.midproductgen(*serie2_ptr, *sigma1_ptr, *serie_ptr, true, ord1+1,ord1+ord2);
+#ifndef __CHECK_PMBASIS
+ delete serie_ptr; // the initial serie is no more needed (except with checking pmbasis)
+#endif
+ // second recursive call
+ d2 = PM_Basis_low(sigma2_ptr, serie2_ptr, ord2, shift);
+ // no more needed
+ // delete serie2_ptr;
+#ifdef MEM_PMBASIS
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [DEL Serie2] -> "<<MEMINFO2<<std::endl;
+#endif
+ // compute the result
+ sigma_ptr = new PMatrix1(field(),m,n,d1+d2+1);
+ //sigma_ptr = new PMatrix1(field(),m,n,order+1);
+#ifdef MEM_PMBASIS
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [ALLOC Sigma] -> "<<MB(sigma_ptr->realmeminfo())<<"Mo"<<MEMINFO2<<std::endl;
+#endif
+ _PMD.mul(*sigma_ptr, *sigma2_ptr, *sigma1_ptr, d1+d2);
+ //sigma_ptr->resize(d1+d2+1);
+ delete sigma1_ptr;
+ delete sigma2_ptr;
+#ifdef MEM_PMBASIS
+ std::cerr<<"[PM-Basis ("<<order<<") "<<_idx<<"/"<<_target<<"] [DEL Sigma 1/2] -> "<<MEMINFO2<<std::endl;
+#endif
+
+
+#ifdef PROFILE_PMBASIS
+ //chrono.stop();
+ //std::cout<<" -> basis product "<<sigma1.size()<<"x"<<sigma2.size()<<" = "<<d1+d2+1<<" -->"<<chrono.usertime()<<MEMINFO2<<std::endl;
+#endif
+
+#ifdef __CHECK_PMBASIS
+ std::cout<<"PMBASIS: order "<<_idx<<check_orderbasis(field(),*sigma_ptr,*serie_ptr,order)<<std::endl;
+ delete serie_ptr;
+#endif
+#ifdef PROFILE_PMBASIS
+ chrono.stop();
+ _end = std::chrono::system_clock::now();
+ std::chrono::duration<double> elapsed_beginning = _end-_start;
+ std::chrono::duration<double> elapsed_comp = _end-_chrono_start;
+
+ double magicnumber=double(_target)/double(order)*log(double(_target)/double(order))/log(2.);
+ double tcomp = elapsed_comp.count();
+ double telap = elapsed_beginning.count();
+
+ _eta=(_eta!=0.0?std::min(_eta,tcomp*magicnumber):tcomp*magicnumber);
+ std::cerr<<"[PM-Basis : "<<order<<" ("<<_idx<<"/"<<_target<<")] : "<<chrono.usertime()
+ << " (ETA: "<< telap<<"s / "<<_eta<<"s)"<<MEMINFO2<<std::endl;
+ chrono.clear();chrono.start();
+#endif
+
+
+ return d1+d2;
+ }
+ }
+#endif // LOW_MEMORY_PMBASIS
+
+
};
+
+ typedef Givaro::Modular<RecInt::ruint128,RecInt::ruint256> MYRECINT;
+ template<>
+ size_t OrderBasis<MYRECINT,EarlyTerm<(size_t) -1> >::M_Basis(PolynomialMatrix<PMType::polfirst,PMStorage::plain, MYRECINT> &sigma,
+ const PolynomialMatrix<PMType::polfirst,PMStorage::plain, MYRECINT> &serie,
+ size_t order,
+ std::vector<size_t> &shift)
+ {
+ Givaro::Integer p; field().cardinality(p);
+ typedef Givaro::Modular<Givaro::Integer> NewField;
+ NewField F(p);
+ OrderBasis<NewField > SB(F);
+ typedef PolynomialMatrix<PMType::matfirst,PMStorage::plain, NewField> NewMatrix;
+
+ NewMatrix sigma1(F,sigma.rowdim(),sigma.coldim(),order+1);
+ NewMatrix serie1(F,serie.rowdim(),serie.coldim(),order);
+ serie1.copy(serie,0,order-1);
+
+ //std::cout<<"Serie: "<<serie<<std::endl;
+ //std::cout<<"Serie1: "<<serie1<<std::endl;
+
+ size_t d= SB.M_Basis(sigma1,serie1,order,shift);
+ sigma.copy(sigma1,0,d);
+
+ //std::cout<<"Sigma1: "<<sigma1<<std::endl;
+ //std::cout<<"Sigma: "<<sigma<<std::endl;
+
+
+ return d;
+ }
+
+
} // end of namespace LinBox
// Local Variables:
diff --git a/linbox/algorithms/polynomial-matrix/polynomial-fft-algorithms.h b/linbox/algorithms/polynomial-matrix/polynomial-fft-algorithms.h
new file mode 100644
index 0000000..65ca574
--- /dev/null
+++ b/linbox/algorithms/polynomial-matrix/polynomial-fft-algorithms.h
@@ -0,0 +1,401 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2016 Romain Lebreton, Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * Romain Lebreton <romain.lebreton at lirmm.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+
+#ifndef __LINBOX_polynomial_fft_algorithms_H
+#define __LINBOX_polynomial_fft_algorithms_H
+
+#include <iostream>
+#include "linbox/linbox-config.h"
+#include "fflas-ffpack/fflas/fflas_simd.h"
+#include "linbox/algorithms/polynomial-matrix/simd-additional-functions.h"
+#include "linbox/algorithms/polynomial-matrix/polynomial-fft-init.h"
+#include "linbox/algorithms/polynomial-matrix/polynomial-fft-butterflies.h"
+
+namespace LinBox {
+
+ template<typename Field, typename simd = Simd<typename Field::Element>, uint8_t vect_size = simd::vect_size>
+ class FFT_algorithms : public FFT_butterflies<Field, simd, vect_size> {
+ public:
+ using Element = typename Field::Element;
+ FFT_algorithms(const FFT_init<Field>& f_i) : FFT_butterflies<Field, simd, vect_size>(f_i) {}
+ void DIF_mod2p (Element *fft);
+ void DIT_mod4p (Element *fft);
+ void DIF (Element *fft);
+ void DIT (Element *fft);
+ }; // FFT_algorithms
+
+ template<typename Field>
+ class FFT_algorithms<Field, NoSimd<typename Field::Element>, 1> : public FFT_butterflies<Field, NoSimd<typename Field::Element>, 1> {
+ public:
+ using Element = typename Field::Element;
+
+ FFT_algorithms(const FFT_init<Field>& f_i) : FFT_butterflies<Field, NoSimd<typename Field::Element>, 1>(f_i) {}
+
+ void DIF_mod2p (Element *fft) {
+ for (size_t w = this->n >> 1, f = 1; w != 0; f <<= 1, w >>= 1){
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ this->Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], (this->pow_w)[j*f], (this->pow_wp)[j*f]);
+ }
+ }
+
+ void DIT_mod4p (Element *fft) {
+ for (size_t w = 1, f = this->n >> 1; f >= 1; w <<= 1, f >>= 1)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ this->Butterfly_DIT_mod4p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], (this->pow_w)[j*f], (this->pow_wp)[j*f]);
+ }
+
+ void DIF (Element *fft) {
+ DIF_mod2p(fft);
+ //DIF_mod2p_iterative(fft);
+ for (uint64_t i = 0; i < this->n; i++) {
+ // if (fft[i] >= (_pl << 1)) fft[i] -= (_pl << 1);
+ if (fft[i] >= this->_pl) fft[i] -= this->_pl;
+ }
+ }
+
+ void DIT (Element *fft) {
+ DIT_mod4p(fft);
+ //DIF_mod2p_iterative(fft);
+ for (uint64_t i = 0; i < this->n; i++) {
+ if (fft[i] >= (this->_pl << 1)) fft[i] -= (this->_pl << 1);
+ if (fft[i] >= this->_pl) fft[i] -= this->_pl;
+ }
+ }
+
+ }; // FFT_algorithms<Field, NoSimd<typename Field::Element>, 1>
+
+ template<typename Field, typename simd>
+ class FFT_algorithms<Field, simd, 4> : public FFT_butterflies<Field, simd, 4> {
+ public:
+ using Element = typename Field::Element;
+ using Compute_t = typename Field::Compute_t;
+ using Residu_t = typename Field::Residu_t;
+ using vect_t = typename simd::vect_t;
+
+ FFT_algorithms(const FFT_init<Field>& f_i) : FFT_butterflies<Field, simd, 4>(f_i) {
+ linbox_check(simd::vect_size == 4);
+ }
+
+ void DIF_mod2p (Element *fft) {
+ const uint64_t& n = this->n;
+ const Residu_t& _pl = this->_pl;
+ const Residu_t& _dpl = this->_dpl;
+
+ vect_t P,P2;
+ P = simd::set1(_pl);
+ P2 = simd::set1(_dpl);
+ Element * tab_w = &(this->pow_w) [0];
+ Element * tab_wp= &(this->pow_wp)[0];
+ size_t w, f;
+ for (w = n >> 1, f = 1; w >= 4; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1){
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j+=4)
+
+#define A0 &fft[0] + (i << 1) *w+ j
+#define A4 &fft[0] + ((i << 1)+1)*w+ j
+ this->Butterfly_DIF_mod2p(A0,A4, tab_w+j,tab_wp+j,P,P2);
+#undef A0
+#undef A4
+ //std::cout<<fft<<std::endl;
+ }
+ // Last two steps
+ if (n >= 8) {
+ vect_t W,Wp;
+ W = simd::set1 (tab_w [1]);
+ Wp= simd::set1 (tab_wp[1]);
+
+ for (size_t i = 0; i < f; i+=2)
+#define A0 &fft[0] + (i << 2)
+#define A4 &fft[0] + ((i << 2)+4)
+ this->Butterfly_DIF_mod2p_laststeps(A0,A4,W,Wp,P,P2);
+ //std::cout<<fft<<std::endl;
+#undef A0
+#undef A4
+ } else {
+ FFT_algorithms<Field, NoSimd<Element>, 1> fft_algo_1 (FFT_init<Field> (this->field(),this->ln,this->getRoot()));
+
+ for (; w >= 1; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ fft_algo_1.Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
+ }
+ }
+
+ void DIT_mod4p (Element *fft) {
+ const uint64_t& n = this->n;
+ const Residu_t& _pl = this->_pl;
+ const Residu_t& _dpl = this->_dpl;
+
+ vect_t P,P2;
+ P = simd::set1(_pl);
+ P2 = simd::set1(_dpl);
+ // First two steps
+ if (n >= 8) {
+ vect_t W,Wp;
+ W = simd::set1 ((this->pow_w) [n-3]);
+ Wp= simd::set1 ((this->pow_wp)[n-3]);
+
+ for (size_t i = 0; i < n; i+=8)
+ this->Butterfly_DIT_mod4p_firststeps(&fft[i],&fft[i+4],W,Wp,P,P2);
+
+ Element * tab_w = &(this->pow_w) [n-8];
+ Element * tab_wp= &(this->pow_wp)[n-8];
+ for (size_t w = 4, f = n >> 3; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w){
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j+=4)
+#define A0 &fft[0] + (i << 1) *w+ j
+#define A4 &fft[0] + ((i << 1)+1)*w+ j
+ this->Butterfly_DIT_mod4p(A0,A4, tab_w+j,tab_wp+j,P,P2);
+
+#undef A0
+#undef A4
+
+ }
+ } else {
+ FFT_algorithms<Field, NoSimd<Element>, 1> fft_algo_1 (FFT_init<Field> (this->field(),this->ln,this->getRoot()));
+
+ Element * tab_w = &(this->pow_w) [n-2];
+ Element * tab_wp= &(this->pow_wp)[n-2];
+ for (size_t w = 1, f = n >> 1; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ fft_algo_1.Butterfly_DIT_mod4p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
+ }
+ }
+
+ void DIF (Element *fft) {
+ DIF_mod2p(fft);
+
+ if (this->n >= 4) {
+ vect_t P;
+ P = simd::set1(this->_pl);
+ for (uint64_t i = 0; i < this->n; i += 8)
+ reduce<Element,simd>(&fft[i],P);
+ return;
+ } else {
+ for (uint64_t i = 0; i < this->n; i++)
+ if (fft[i] >= this->_pl) fft[i] -= this->_pl;
+ }
+ }
+
+ void DIT (Element *fft) {
+ DIT_mod4p(fft);
+
+ if (this->n >= 4) {
+ vect_t P,P2;
+ P = simd::set1(this->_pl);
+ P2 = simd::set1(this->_dpl);
+ for (uint64_t i = 0; i < this->n; i += 8){
+ reduce<Element,simd>(&fft[i],P2);
+ reduce<Element,simd>(&fft[i],P);
+ }
+ return;
+
+ } else {
+ for (uint64_t i = 0; i < this->n; i++) {
+ if (fft[i] >= (this->_pl << 1)) fft[i] -= (this->_pl << 1);
+ if (fft[i] >= this->_pl) fft[i] -= this->_pl;
+ }
+ }
+ }
+
+ }; // FFT_algorithms<Field, NoSimd<typename Field::Element>, 4>
+
+ template<typename Field, typename simd>
+ class FFT_algorithms<Field, simd, 8> : public FFT_butterflies<Field, simd, 8> {
+ public:
+ using Element = typename Field::Element;
+ using vect_t = typename simd::vect_t;
+
+ FFT_algorithms(const FFT_init<Field>& f_i) : FFT_butterflies<Field, simd, 8>(f_i) {
+ linbox_check(simd::vect_size == 8);
+ }
+
+ void DIF_mod2p (Element *fft) {
+ vect_t P,P2;
+ P = simd::set1(this->_pl);
+ P2 = simd::set1(this->_dpl);
+
+ Element * tab_w = &(this->pow_w) [0];
+ Element * tab_wp= &(this->pow_wp)[0];
+ size_t w, f;
+ for (w = this->n >> 1, f = 1; w >= 8; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1){
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j+=8)
+#define A0 &fft[0] + (i << 1) *w+ j
+#define A4 &fft[0] + ((i << 1)+1)*w+ j
+ this->Butterfly_DIF_mod2p(A0,A4, tab_w+j,tab_wp+j,P,P2);
+
+#undef A0
+#undef A4
+ //std::cout<<fft<<std::endl;
+ }
+ // Last three steps
+ if (this->n >= 16) {
+ vect_t alpha,alphap,beta,betap;
+ Element tmp[8];
+ tmp[0]=tmp[4]=tab_w[0];
+ tmp[1]=tmp[5]=tab_w[1];
+ tmp[2]=tmp[6]=tab_w[2];
+ tmp[3]=tmp[7]=tab_w[3];
+ alpha = MemoryOp<Element,simd>::load(tmp);
+ tmp[0]=tmp[4]=tab_wp[0];
+ tmp[1]=tmp[5]=tab_wp[1];
+ tmp[2]=tmp[6]=tab_wp[2];
+ tmp[3]=tmp[7]=tab_wp[3];
+ alphap = MemoryOp<Element,simd>::load(tmp);
+ beta = simd::set1(tab_w [5]);
+ betap = simd::set1(tab_wp [5]);
+
+ for (size_t i = 0; i < f; i+=2)
+#define A0 &fft[0] + (i << 3)
+#define A4 &fft[0] + (i << 3)+8
+ this->Butterfly_DIF_mod2p_laststeps(A0,A4,alpha,alphap,beta,betap,P,P2);
+#undef A0
+#undef A4
+ //std::cout<<fft<<std::endl;
+ } else {
+ // TODO : improve ?
+ //FFT_algorithms<Field, NoSimd<Element>, 1> fft_algo_1 ((FFT_init<Field>) *this);
+ FFT_algorithms<Field, NoSimd<Element>, 1> fft_algo_1 (FFT_init<Field> (this->field(),this->ln,this->getRoot()));
+
+ for (; w >= 1; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ fft_algo_1.Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
+ }
+ }
+
+ void DIT_mod4p (Element *fft) {
+ const auto &pow_w = this->pow_w;
+ const auto &pow_wp = this->pow_wp;
+ const uint64_t &n = this->n;
+
+ vect_t P,P2;
+ P = simd::set1(this->_pl);
+ P2 = simd::set1(this->_dpl);
+
+ // first three steps
+ if (n >= 16) {
+ vect_t alpha,alphap,beta,betap;
+ alpha = simd::set1((pow_w)[n-3]);
+ alphap = simd::set1((pow_wp)[n-3]);
+ Element tmp[8];
+ tmp[0]=tmp[4]=(pow_w)[n-8];
+ tmp[1]=tmp[5]=(pow_w)[n-7];
+ tmp[2]=tmp[6]=(pow_w)[n-6];
+ tmp[3]=tmp[7]=(pow_w)[n-5];
+ beta = MemoryOp<Element,simd>::load(tmp);
+ tmp[0]=tmp[4]=(pow_wp)[n-8];
+ tmp[1]=tmp[5]=(pow_wp)[n-7];
+ tmp[2]=tmp[6]=(pow_wp)[n-6];
+ tmp[3]=tmp[7]=(pow_wp)[n-5];
+ betap = MemoryOp<Element,simd>::load(tmp);
+ for (uint64_t i = 0; i < n; i+=16) {
+ this->Butterfly_DIT_mod4p_firststeps(&fft[i],&fft[i+8],alpha,alphap,beta,betap,P,P2);
+ }
+ const Element * tab_w = &(pow_w) [n-16];
+ const Element * tab_wp= &(pow_wp)[n-16];
+ for (size_t w = 8, f = n >> 4; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w){
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j+=8) {
+#define A0 &fft[0] + (i << 1) *w+ j
+#define A4 &fft[0] + ((i << 1)+1)*w+ j
+ this->Butterfly_DIT_mod4p(A0,A4, tab_w+j,tab_wp+j,P,P2);
+#undef A0
+#undef A4
+ }
+ }
+ } else {
+
+ FFT_algorithms<Field, NoSimd<Element>, 1> fft_algo_1 (FFT_init<Field> (this->field(),this->ln,this->getRoot()));
+
+ const Element * tab_w = &(pow_w) [n-2];
+ const Element * tab_wp= &(pow_wp)[n-2];
+ for (size_t w = 1, f = n >> 1; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ fft_algo_1.Butterfly_DIT_mod4p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
+ }
+ }
+
+ void DIF (Element *fft) {
+ DIF_mod2p(fft);
+
+ if (this->n >= 8) {
+ vect_t P;
+ P = simd::set1(this->_pl);
+ for (uint64_t i = 0; i < this->n; i += 8){
+ reduce<Element,simd>(&fft[i],P);
+ }
+ return;
+
+ } else {
+ for (uint64_t i = 0; i < this->n; i++)
+ if (fft[i] >= this->_pl) fft[i] -= this->_pl;
+ }
+ }
+
+ void DIT (Element *fft) {
+ DIT_mod4p(fft);
+
+ if (this->n >= 8) {
+ vect_t P,P2;
+ P = simd::set1(this->_pl);
+ P2 = simd::set1(this->_dpl);
+ for (uint64_t i = 0; i < this->n; i += 8){
+ reduce<Element,simd>(&fft[i],P2);
+ reduce<Element,simd>(&fft[i],P);
+ }
+ return;
+
+ } else {
+ for (uint64_t i = 0; i < this->n; i++) {
+ if (fft[i] >= (this->_pl << 1)) fft[i] -= (this->_pl << 1);
+ if (fft[i] >= this->_pl) fft[i] -= this->_pl;
+ }
+ }
+ }
+
+ }; // FFT_algorithms<Field, NoSimd<typename Field::Element>, 8>
+
+}
+
+#endif // __LINBOX_polynomial_fft_algorithms_H
diff --git a/linbox/algorithms/polynomial-matrix/polynomial-fft-butterflies.h b/linbox/algorithms/polynomial-matrix/polynomial-fft-butterflies.h
new file mode 100644
index 0000000..8afc717
--- /dev/null
+++ b/linbox/algorithms/polynomial-matrix/polynomial-fft-butterflies.h
@@ -0,0 +1,492 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2016 Romain Lebreton, Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * Romain Lebreton <romain.lebreton at lirmm.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+
+#ifndef __LINBOX_polynomial_fft_butterflies_H
+#define __LINBOX_polynomial_fft_butterflies_H
+
+#include <iostream>
+#include "linbox/util/debug.h"
+#include "linbox/linbox-config.h"
+#include "fflas-ffpack/fflas/fflas_simd.h"
+#include "linbox/algorithms/polynomial-matrix/polynomial-fft-init.h"
+#include "linbox/algorithms/polynomial-matrix/simd-additional-functions.h"
+
+namespace LinBox {
+
+ template<typename Field, typename simd = Simd<typename Field::Element>, uint8_t byn = simd::vect_size>
+ class FFT_butterflies : public FFT_init<Field> {
+ public:
+ FFT_butterflies(const FFT_init<Field>& f_i) : FFT_init<Field>(f_i) {
+ std::cerr<<"Not implemented !\n";
+ }
+ }; // FFT_butterflies
+
+ template<typename Field>
+ class FFT_butterflies<Field, NoSimd<typename Field::Element>, 1> : public FFT_init<Field> {
+ public:
+
+ using Element = typename Field::Element;
+
+ FFT_butterflies(const FFT_init<Field>& f_i) : FFT_init<Field>(f_i) {}
+
+ inline void Butterfly_DIT_mod4p (Element& A, Element& B, const Element& alpha, const Element& alphap) {
+ using Compute_t = typename Field::Compute_t;
+ // Harvey's algorithm
+ // 0 <= A,B < 4*p, p < 2^32 / 4
+ // alphap = Floor(alpha * 2^ 32 / p])
+
+ // TODO : replace by substract if greater
+ if (A >= this->_dpl) A -= this->_dpl;
+
+ // TODO : replace by mul_mod_shoup
+ Element tmp = ((Element) alphap * (Compute_t)B) >> (8*sizeof(Element));
+ tmp = alpha * B - tmp * this->_pl;
+
+ // TODO : replace by add_r and sub_r
+ B = A + (this->_dpl - tmp);
+ // B &= 0XFFFFFFFF;
+ A += tmp;
+ }
+
+ inline void Butterfly_DIF_mod2p (Element& A, Element& B, const Element& alpha, const Element& alphap) {
+ //std::cout<<A<<" $$ "<<B<<"("<<alpha<<","<<alphap<<" ) -> ";
+ using Compute_t = typename Field::Compute_t;
+ // Harvey's algorithm
+ // 0 <= A,B < 2*p, p < 2^32 / 4
+ // alphap = Floor(alpha * 2^ 32 / p])
+
+ Element tmp = A;
+
+ A += B;
+
+ if (A >= this->_dpl) A -= this->_dpl;
+
+ B = tmp + (this->_dpl - B);
+
+ tmp = ((Element) alphap * (Compute_t) B) >> (8*sizeof(Element));
+ B = alpha * B - tmp * this->_pl;
+ //B &= 0xFFFFFFFF;
+ //std::cout<<A<<" $$ "<<B<<"\n ";
+ }
+
+ }; // FFT_butterflies<Field, 1>
+
+ // ATTENTION à tous les uint64_t, SimdComp restants !!!!
+
+ template<typename Field, typename simd>
+ class FFT_butterflies<Field, simd, 4> : public FFT_init<Field> {
+ public:
+
+ using Element = typename Field::Element;
+ using vect_t = typename simd::vect_t;
+ using SimdComp = typename SimdCompute_t<simd,Field>::Compute_t;
+
+ FFT_butterflies(const FFT_init<Field>& f_i) : FFT_init<Field>(f_i) {
+ linbox_check(simd::vect_size == 4);
+ }
+
+ // TODO include P, P2 in precomp
+ // TODO : Same functions Butterfly_DIT_mod4p Butterfly_DIF_mod2p in FFT_butterflies<Field, 8>
+ inline void Butterfly_DIT_mod4p (Element* ABCD, Element* EFGH,
+ const Element* alpha, const Element* alphap,
+ const vect_t& P, const vect_t& P2) {
+ vect_t V1,V2,V3,V4,W,Wp,T1;
+ // V1=[A B C D E F G H], V2=[I J K L M N O P]
+ V1 = MemoryOp<Element,simd>::load(ABCD);
+ V2 = MemoryOp<Element,simd>::load(EFGH);
+ W = MemoryOp<Element,simd>::load(alpha);
+ Wp = MemoryOp<Element,simd>::load(alphap);
+
+ // V3 = V1 mod 2P
+ V3 = reduce<simd>(V1, P2);
+
+ // V4 = V2 * W mod P
+ V4 = mul_mod<simd>(V2,W,P,Wp);
+
+ // V1 = V3 + V4
+ V1 = simd::add(V3,V4);
+ MemoryOp<Element,simd>::store(ABCD,V1);
+
+ // V2 = V3 - (V4 - 2P)
+ T1 = simd::sub(V4,P2);
+ V2 = simd::sub(V3,T1);
+ MemoryOp<Element,simd>::store(EFGH,V2);
+ }
+
+ inline void Butterfly_DIT_mod4p_firststeps (Element* ABCD, Element* EFGH,
+ const vect_t& W,
+ const vect_t& Wp,
+ const vect_t& P, const vect_t& P2) {
+ // First 2 steps
+ // First step
+ vect_t V1,V2,V3,V4,T1,T2,T3,T4;
+ // T1=[A B C D], T2=[E F G H]
+ T1 = MemoryOp<Element,simd>::load(ABCD);
+ T2 = MemoryOp<Element,simd>::load(EFGH);
+
+ // V1=[AECG], V2=[BFDH]
+ MemoryOp<Element,simd>::unpacklohi_twice4(V1,V2,T1,T2);
+
+ // V3 = V1 + V2
+ // Rk: No need for (. mod 2P) since entries are <P
+ V3 = simd::add(V1,V2);
+ // V4 = V1 + (P - V2)
+ // Rk: No need for (. mod 2P) since entries are <P
+ T1 = simd::sub(V2,P);
+ V4 = simd::sub(V1,T1);
+
+ MemoryOp<Element,simd>::unpacklohi4(V1,V2,V3,V4);
+
+ // Second step
+ // T1 = [D D H H]
+ T1 = MemoryOp<Element,simd>::unpackhi4(V4,V4);
+
+ T2 = mul_mod_half<simd, SimdComp>(T1, W, P, Wp);
+
+ T2 = simd::template shuffle<0xDD>(T2);
+ //T2 = simd::template shuffle<0xDD>(T2);
+
+ //At this point, T2 = [D*Wmodp H*Wmodp D*Wmodp H*Wmodp]
+
+ // At this time I have V3=[A E C G], V4=[B F ? ?], T2=[? ? D H]
+ // I need V1 = [A B E F], V2 = [C D G H]
+ // This is not refactored in MemoryOp::... because of different arguments (V3,V4) and (V3,T2)
+ V1 = MemoryOp<Element,simd>::unpacklo4(V3,V4);
+ V2 = MemoryOp<Element,simd>::unpackhi4(V3,T2);
+
+ // T1 = V1 + V2
+ T1 = simd::add(V1,V2);
+ // T2 = V1 - (V2 - 2P)
+ T3 = simd::sub(V2,P2);
+ T2 = simd::sub(V1,T3);
+
+ MemoryOp<Element,simd>::unpacklohi2(V1,V2,T1,T2);
+
+ // Store
+ MemoryOp<Element,simd>::store(ABCD,V1);
+ MemoryOp<Element,simd>::store(EFGH,V2);
+ }
+
+ inline void Butterfly_DIF_mod2p (Element* ABCD, Element* EFGH,
+ const Element* alpha, const Element* alphap,
+ const vect_t& P, const vect_t& P2) {
+ vect_t V1,V2,V3,V4,W,Wp,T;
+ // V1=[A B C D], V2=[E F G H]
+ V1 = MemoryOp<Element,simd>::load(ABCD);
+ V2 = MemoryOp<Element,simd>::load(EFGH);
+ W = MemoryOp<Element,simd>::load(alpha);
+ Wp = MemoryOp<Element,simd>::load(alphap);
+ // V3 = V1 + V2 mod
+ V3 = add_mod<simd >(V1,V2,P2);
+ MemoryOp<Element,simd>::store(ABCD,V3);
+ // V4 = (V1+(2P-V2))alpha mod 2P
+ T = simd::sub(V2,P2);
+ V4 = simd::sub(V1,T);
+
+ T = mul_mod<simd >(V4,W,P,Wp);// T is the result
+ MemoryOp<Element,simd>::store(EFGH,T);
+ }
+
+ inline void Butterfly_DIF_mod2p_laststeps(Element* ABCD, Element* EFGH,
+ const vect_t& W,
+ const vect_t& Wp,
+ const vect_t& P, const vect_t& P2) {
+ vect_t V1,V2,V3,V4,V5,V6,V7;
+ // V1=[A B C D], V2=[E F G H]
+ V1 = MemoryOp<Element,simd>::load(ABCD);
+ V2 = MemoryOp<Element,simd>::load(EFGH);
+
+ /* 1st step */
+ // V3=[A E B F], V4=[C G D H]
+ MemoryOp<Element,simd>::unpacklohi4(V3,V4,V1,V2);
+
+ // V1 = V3 + V4 mod 2P
+ // P2 = [2p 2p 2p 2p]
+ V1 = add_mod<simd >(V3,V4,P2);
+ // V2 = (V3+(2P-V4))alpha mod 2P
+ V5 = simd::sub(V4,P2);
+ V6 = simd::sub(V3,V5);
+ V2 = reduce<simd >(V6, P2);
+ // V4 = [D D H H]
+ V4 = MemoryOp<Element,simd>::unpackhi4(V2,V2);
+
+ // V3 = [* D * H]
+ V3 = mul_mod_half<simd, SimdComp>(V4, W, P, Wp);
+
+ //At this point, V3 = [D*Wmodp H*Wmodp D*Wmodp H*Wmodp]
+ V3 = simd::template shuffle<0xDD>(V3);
+ //V3 = simd::template shuffle<0xDD>(V3); // 0xDD = [3 1 3 1]_base4
+
+ // At this time I have V1=[A E B F], V2=[C G ? ?], V3=[? ? D H]
+ // I need V3 = [A C E G], V4 = [B D F H]
+ // This is not refactored in MemoryOp::... because of different arguments (V1,V3) and (V1,V2)
+ V4 = MemoryOp<Element,simd>::unpackhi4(V1,V3);
+ V3 = MemoryOp<Element,simd>::unpacklo4(V1,V2);
+
+ /* 2nd step */
+ // V1 = V3 + V4 mod 2P
+ V1 = add_mod<simd >(V3,V4,P2);
+ // V2 = V3 + (2P - V4) mod 2P
+ V5 = simd::sub(V4,P2);
+ V6 = simd::sub(V3,V5);
+ V2 = reduce<simd >(V6, P2);
+ // Result in V1 = [A C E G] and V2 = [B D F H]
+ // Transform to V3=[A B C D], V4=[E F G H]
+ MemoryOp<Element,simd>::unpacklohi4(V3,V4,V1,V2);
+ // Store
+ MemoryOp<Element,simd>::store(ABCD,V3);
+ MemoryOp<Element,simd>::store(EFGH,V4);
+ }
+
+ }; // FFT_butterflies<Field, 4>
+
+
+ template<typename Field, typename simd>
+ class FFT_butterflies<Field, simd, 8> : public FFT_init<Field> {
+ public:
+
+ using Element = typename Field::Element;
+ using vect_t = typename simd::vect_t;
+ using SimdComp = typename SimdCompute_t<simd,Field>::Compute_t;
+
+ FFT_butterflies(const FFT_init<Field>& f_i) : FFT_init<Field>(f_i) {
+ linbox_check(simd::vect_size == 8);
+ }
+
+ // TODO include P, P2 in precomp
+ inline void Butterfly_DIT_mod4p (Element* ABCDEFGH, Element* IJKLMNOP,
+ const Element* alpha, const Element* alphap,
+ const vect_t& P, const vect_t& P2) {
+ vect_t V1,V2,V3,V4,W,Wp,T1;
+ // V1=[A B C D E F G H], V2=[I J K L M N O P]
+ V1 = MemoryOp<Element,simd>::load(ABCDEFGH);
+ V2 = MemoryOp<Element,simd>::load(IJKLMNOP);
+ W = MemoryOp<Element,simd>::load(alpha);
+ Wp = MemoryOp<Element,simd>::load(alphap);
+
+ // V3 = V1 mod 2P
+ V3 = reduce<simd>(V1, P2);
+
+ // V4 = V2 * W mod P
+ V4 = mul_mod<simd>(V2,W,P,Wp);
+
+ // V1 = V3 + V4
+ V1 = simd::add(V3,V4);
+ MemoryOp<Element,simd>::store(ABCDEFGH,V1);
+
+ // V2 = V3 - (V4 - 2P)
+ T1 = simd::sub(V4,P2);
+ V2 = simd::sub(V3,T1);
+ MemoryOp<Element,simd>::store(IJKLMNOP,V2);
+ }
+
+ inline void Butterfly_DIT_mod4p_firststeps (Element* ABCDEFGH, Element* IJKLMNOP,
+ const vect_t& alpha,const vect_t& alphap,
+ const vect_t& beta ,const vect_t& betap,
+ const vect_t& P ,const vect_t& P2) {
+ // First 3 steps
+ vect_t V1,V2,V3,V4,V5,V6,V7,Q;
+ // V1=[A B C D E F G H], V2=[I J K L M N O P]
+ V1 = MemoryOp<Element,simd>::load(ABCDEFGH);
+ V2 = MemoryOp<Element,simd>::load(IJKLMNOP);
+
+ /*********************************************/
+ /* 1st STEP */
+ /*********************************************/
+ // Transform to V3=[A I C K E M G O], V4=[B J D L F N H P]
+ MemoryOp<Element,simd>::unpacklohi_twice8(V6,V7,V1,V2);
+ MemoryOp<Element,simd>::unpacklohi_twice4(V3,V4,V6,V7);
+
+ // V1 = V3 + V4; V1 = [A I C K E M G O]
+ // Rk: No need for (. mod 2P) since entries are <P
+ V1 = simd::add(V3,V4);
+
+ // V2 = V3 + (P - V4); V2 = [B J D L F N H P]
+ // Rk: No need for (. mod 2P) since entries are <P
+ V6 = simd::sub(V4,P);
+ V2 = simd::sub(V3,V6);
+
+ /*********************************************/
+ /* 2nd STEP */
+ /*********************************************/
+ // V5 = [D D L L H H P P]
+ V5 = MemoryOp<Element,simd>::unpackhi_twice8(V2,V2);
+
+ // V3 = [* D * L * H * P]
+ V3 = mul_mod_half<simd,SimdComp>(V5,alpha,P,alphap);
+
+ // V7 = [D L D L H P H P]
+ V7 = MemoryOp<Element,simd>::shuffletwice8_DD(V3);
+ //V7 = simd::template shuffle_twice<0xDD>(V3); // 0xDD = 221 = [3 1 3 1]_base4
+
+ // V3= [A B I J E F M N], V4=[C D K L G H O P]
+ V3 = MemoryOp<Element,simd>::unpacklo_twice8(V1,V2);
+ V4 = MemoryOp<Element,simd>::unpackhi_twice8(V1,V7);
+
+ // V1 = V3+V4
+ V1 = simd::add(V3,V4);
+ // V2 = V3 - (V4 - 2P)
+ V7 = simd::sub(V4,P2);
+ V2 = simd::sub(V3,V7);
+
+ /*********************************************/
+ /* 3nd STEP */
+ /*********************************************/
+ // V3= [A B C D I J K L] V4= [E F G H M N O P]
+ MemoryOp<Element,simd>::unpacklohi_twice4(V6,V7,V1,V2);
+ MemoryOp<Element,simd>::unpacklohi2(V3,V4,V6,V7);
+
+ // V6= V3 mod 2P
+ V6 = reduce<simd >(V3, P2);
+
+ // V7= V4.beta mod p
+ V7 = mul_mod<simd >(V4,beta,P,betap);
+
+ // V1 = V6+V7
+ V1 = simd::add(V6,V7);
+
+ // V2 = V6 - (V7 - 2P)
+ V5 = simd::sub(V7,P2);
+ V2 = simd::sub(V6,V5);
+
+ /*********************************************/
+ // V3=[A B C D E F G H] V4=[I J K L M N O P]
+ MemoryOp<Element,simd>::unpacklohi2(V3,V4,V1,V2);
+
+ // Store
+ MemoryOp<Element,simd>::store(ABCDEFGH,V3);
+ MemoryOp<Element,simd>::store(IJKLMNOP,V4);
+ }
+
+ inline void Butterfly_DIF_mod2p (Element* ABCDEFGH, Element* IJKLMNOP,
+ const Element* alpha, const Element* alphap,
+ const vect_t& P, const vect_t& P2) {
+ vect_t V1,V2,V3,V4,W,Wp,T;
+ // V1=[A B C D E F G H], V2=[I J K L M N O P]
+ V1 = MemoryOp<Element,simd>::load(ABCDEFGH);
+ V2 = MemoryOp<Element,simd>::load(IJKLMNOP);
+ W = MemoryOp<Element,simd>::load(alpha);
+ Wp = MemoryOp<Element,simd>::load(alphap);
+
+ // V3 = V1 + V2 mod
+
+ V3 = add_mod<simd >(V1,V2,P2);
+
+ MemoryOp<Element,simd>::store(ABCDEFGH,V3);
+
+ // V4 = (V1+(2P-V2))alpha mod 2P
+ T = simd::sub(V2,P2);
+ V4 = simd::sub(V1,T);
+ T = mul_mod<simd >(V4,W,P,Wp);// T is the result
+ MemoryOp<Element,simd>::store(IJKLMNOP,T);
+ }
+
+ inline void Butterfly_DIF_mod2p_laststeps(Element* ABCDEFGH, Element* IJKLMNOP,
+ const vect_t& alpha,const vect_t& alphap,
+ const vect_t& beta ,const vect_t& betap,
+ const vect_t& P, const vect_t& P2) {
+ // Last 3 steps
+ vect_t V1,V2,V3,V4,V5,V6,V7,Q;
+
+ // V1=[A B C D E F G H], V2=[I J K L M N O P]
+ V1 = MemoryOp<Element,simd>::load(ABCDEFGH);
+ V2 = MemoryOp<Element,simd>::load(IJKLMNOP);
+
+ /* 1st step */
+ // V3=[A B C D I J K L] V4=[E F G H M N O P]
+ MemoryOp<Element,simd>::unpacklohi2(V3,V4,V1,V2);
+
+ // V1 = V3 + V4 mod 2P
+ // P2 = [2p 2p 2p 2p]
+ V1 = add_mod<simd >(V3,V4,P2);
+
+ // V2 = (V3+(2P-V4))alpha mod 2P
+ V5 = simd::sub(V4,P2);
+ V6 = simd::sub(V3,V5);
+ V7 = reduce<simd >(V6, P2);
+ V2 = mul_mod<simd >(V7,alpha,P,alphap);
+
+ /* 2nd step */
+
+ // V3=[A E B F I M J N] V4=[C G D H K O L P]
+ MemoryOp<Element,simd>::unpacklohi_twice8(V3,V4,V1,V2);
+
+ // V1 = V3 + V4 mod 2P
+ // P2 = [2p 2p 2p 2p]
+ V1 = add_mod<simd >(V3,V4,P2);
+
+ // V2 = (V3+(2P-V4))alpha mod 2P
+ // V7 = (V3+(2P-V4)) mod 2P
+ V5 = simd::sub(V4,P2);
+ V6 = simd::sub(V3,V5);
+ V7 = reduce<simd >(V6, P2);
+
+ // V4 = [D D H H L L P P ]
+ V4 = MemoryOp<Element,simd>::unpackhi_twice8(V7,V7);
+
+ // V3 = [ * D * H * L * P]
+ V3 = mul_mod_half<simd,SimdComp>(V4,beta,P,betap);
+
+ // V2=[D H D H L P L P] but only [* * D H * * L P] matters
+ V2 = MemoryOp<Element,simd>::shuffletwice8_DD(V3);
+ //V2 = simd::template shuffle_twice<0xDD>(V3); // 0xDD = 221 = [3 1 3 1]_base4
+
+ /* 3rd step */
+ // At this time I have V1=[A B E F I J M N], V7=[C G * * K O * *], V2=[* * D H * * L P]
+ // I need V3 = [A C E G I K M O], V4=[B D F H J L N P]
+ V3 = MemoryOp<Element,simd>::unpacklo_twice8(V1,V7);
+ V4 = MemoryOp<Element,simd>::unpackhi_twice8(V1,V2);
+
+ // V1 = V3 + V4 mod 2P
+ V1 = add_mod<simd >(V3,V4,P2);
+
+ // V2 = V3 + (2P - V4) mod 2P
+ V5 = simd::sub(V4,P2);
+ V6 = simd::sub(V3,V5);
+ V2 = reduce<simd >(V6, P2);
+
+ // Result in V1=[A C E G I K M O] V2=[B D F H J L N P]
+ // Transform to V3=[A B C D I J K L],V4=[E F G H M N O P]
+ MemoryOp<Element,simd>::unpacklohi_twice8(V3,V4,V1,V2);
+
+ // Transform to V1=[A B C D E F G H], V2=[I J K L M N O P]
+ MemoryOp<Element,simd>::unpacklohi2(V1,V2,V3,V4);
+
+ // Store
+ MemoryOp<Element,simd>::store(ABCDEFGH,V1);
+ MemoryOp<Element,simd>::store(IJKLMNOP,V2);
+
+
+ }
+
+
+ }; // FFT_butterflies<Field, 8>
+
+}
+
+#endif // __LINBOX_polynomial_fft_butterflies_H
diff --git a/linbox/algorithms/polynomial-matrix/polynomial-fft-init.h b/linbox/algorithms/polynomial-matrix/polynomial-fft-init.h
new file mode 100644
index 0000000..19dcd66
--- /dev/null
+++ b/linbox/algorithms/polynomial-matrix/polynomial-fft-init.h
@@ -0,0 +1,299 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2016 Romain Lebreton, Pascal Giorgi
+ *
+ * Written by Pascal Giorgi <pascal.giorgi at lirmm.fr>
+ * Romain Lebreton <romain.lebreton at lirmm.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+
+#ifndef __LINBOX_polynomial_fft_init_H
+#define __LINBOX_polynomial_fft_init_H
+
+
+#include <iostream>
+#include "linbox/linbox-config.h"
+#include "linbox/util/debug.h"
+#include "givaro/givinteger.h"
+#include <fflas-ffpack/fflas/fflas_simd.h>
+
+#ifndef ROUND_DOWN
+#define ROUND_DOWN(x, s) ((x) & ~((s)-1))
+#endif
+
+// template<typename T>
+// std::ostream& operator<<(std::ostream& os, const std::vector<T> &x){
+// std::ostream_iterator<T> out_it (os,", ");
+// std::copy ( x.begin(), x.end(), out_it );
+// return os;
+// }
+
+#include "fflas-ffpack/utils/align-allocator.h"
+
+#ifdef __LINBOX_HAVE_SSE4_1_INSTRUCTIONS
+
+//#include "linbox/algorithms/polynomial-matrix/simd.h"
+
+#include "fflas-ffpack/fflas/fflas_simd.h"
+
+#ifdef __LINBOX_USE_AVX2
+/* 256 bits CODE HERE */
+#define __LINBOX_USE_AVX2
+// define 256 bits simd vector type
+typedef __m256i _vect256_t;
+#endif
+// define 128 bits simd vector type
+typedef __m128i _vect128_t;
+#endif
+
+namespace LinBox {
+
+ enum SimdLevel {NOSIMD,SSE41,AVX,AVX2};
+
+ struct SimdLevelFinder {
+#ifdef __LINBOX_USE_AVX2
+ const static SimdLevel simdlevel = AVX2;
+#else
+#ifdef __LINBOX_USE_AVX
+ const static SimdLevel simdlevel = AVX;
+#else
+#ifdef __LINBOX_USE_SIMD
+ const static SimdLevel simdlevel = SSE41;
+#else
+ const static SimdLevel simdlevel = NOSIMD;
+#endif
+#endif
+#endif
+ };
+
+ // class to handle FFT transform over wordsize prime field Fp (p < 2^29)
+ // template <class Field, int SL = SimdLevelFinder::simdlevel>
+ // TODO : A rendre générique / Simd si on doit faire des précalculs dans des Simd::vect_t
+ template <class Field>
+ class FFT_init {
+ public:
+ using Element = typename Field::Element;
+ using Compute_t = typename Field::Compute_t;
+ using Residu_t = typename Field::Residu_t;
+
+ const Field *fld;
+ Residu_t _pl, _dpl;
+ uint64_t n;
+ size_t ln;
+ //Compute_t _logp;
+ //Compute_t _I;
+ //double _pinv;
+ Element _w;
+ Element _invw;
+ // Du type qui est donné aux Butterfly
+ typedef std::vector<Element,AlignedAllocator<Element, Alignment::DEFAULT> > VECT;
+ VECT pow_w;
+ VECT pow_wp; // Precomputations in shoup
+ VECT _data;
+ Element _p;
+ // pow_w = table of roots of unity. If w = primitive K-th root, then the table is:
+ // 1, w, w^2, ..., w^{K/2-1},
+ // 1, w^2, w^4, ..., w^{K/2-2},
+ // 1, w^4, w^8, ..., w^{K/2-4}
+ // ...
+ // 1, w^{K/8}, w^{K/4}, w^{3K/8},
+ // 1, w^{K/4},
+ // 1.
+
+ inline const Field & field() const { return *fld; }
+
+ Element find_gen (Residu_t _m, uint64_t _val2p) {
+ // find a primitive 2^k root of unity where
+ // _p - 1 = 2^val2p * m
+ srand((unsigned int) time(NULL));
+ Element y,z;
+ uint64_t j;
+ Element _gen;
+ for (;;) {
+ fld->init(_gen,rand());
+ fld->init(z, 1);
+ for (Residu_t i=0; i < _m; ++i) fld->mulin(z,_gen); // z = z*_gen;
+ if (z == 1) continue;
+ // _gen^i =/ 1 pour 0 <= i < m
+ _gen = z;
+ j = 0;
+ do {
+ y = z;
+ fld->mul(z,y,y); // z = y * y;
+ j++;
+ } while (j != _val2p && z != 1);
+ if (j == _val2p)
+ break;
+ }
+ return _gen;
+ }
+
+ template<typename T=Element>
+ typename std::enable_if<std::is_integral<T>::value>::type init_powers () {
+
+ size_t pos = 0;
+ //uint64_t wi = 1;
+ Element wi = 1;
+
+ // Precomp Quo(2^32,p)
+ Compute_t invp; fld->precomp_p(invp);
+
+ if (ln>0){
+// using simd=Simd<uint32_t>;
+// using vect_t =typename simd::vect_t;
+
+ size_t tpts = 1 << (ln - 1);
+ size_t i=0;
+// for( ;i<std::min(simd::vect_size+1, tpts);i++,pos++){
+ // Precompute pow_wp[1] for faster mult by pow_w[1]
+ for( ;i<std::min((size_t) 2, tpts);i++,pos++){
+ pow_w[pos] = wi;
+
+ // Fake conversion since precomp_b will be used as a Compute_t in mul_precomp_b
+ Compute_t temp;
+ fld->precomp_b(temp, wi); //(((Compute_t)wi*invp)>>(fld->_bitsizep));
+ pow_wp[pos] = static_cast<Element>(temp);
+
+ fld->mulin(wi, _w);
+ }
+ /*
+ vect_t wp_vect, Q_vect,BAR_vect,w_vect,pow_w_vect,pow_wp_vect, pl_vect;
+ BAR_vect= simd::set1(BAR);
+ wp_vect = simd::set1(pow_wp[simd::vect_size]);
+ w_vect = simd::set1(pow_w[simd::vect_size]);
+ pl_vect = simd::set1(_pl);
+ for (; i < ROUND_DOWN(tpts,simd::vect_size);
+ i+=simd::vect_size,pos+=simd::vect_size) {
+ pow_w_vect = simd::loadu((int32_t*)pow_w.data()+pos-simd::vect_size);
+ Q_vect=simd::mulhi(pow_w_vect,wp_vect);
+ pow_w_vect = simd::sub(simd::mullo(pow_w_vect,w_vect),simd::mullo(Q_vect,pl_vect));
+ pow_w_vect=simd::sub(pow_w_vect, simd::vandnot(simd::greater(pow_w_vect,pl_vect),pl_vect));
+ simd::storeu((int32_t*)pow_w.data()+pos,pow_w_vect);
+ pow_wp_vect= simd::mulhi(simd::sll(pow_w_vect,32-_logp),BAR_vect);
+ simd::storeu((int32_t*)pow_wp.data()+pos,pow_wp_vect);
+ }
+ */
+ // Use pow_wp[1] for speed-up mult by pow_w[1]
+ for( ;i<tpts;i++,pos++){
+ pow_w[pos] = wi;
+
+ // Fake conversion since precomp_b will be used as a Compute_t in mul_precomp_b
+ Compute_t temp;
+ fld->precomp_b(temp, wi); //(((Compute_t)wi*invp)>>(fld->_bitsizep));
+ pow_wp[pos] = static_cast<Element>(temp);
+
+ fld->mul_precomp_b(wi, wi, _w, static_cast<Compute_t>(pow_wp[1]));
+ }
+
+ // Other pow_w elements can be read from previously computed pow_w
+ for(size_t k=2;k<=tpts;k<<=1)
+ for(size_t i=0;i<tpts;i+=k,pos++){
+ pow_w[pos] = pow_w[i];
+ pow_wp[pos] = pow_wp[i];
+ }
+
+// std::cout << "Check precomputations : pow_w, pow_wp \n";
+// std::cout << "[";
+// for (size_t i=0; i < tpts; i++) std::cout << pow_w[i] << ", ";
+// std::cout << "]\n";
+// std::cout << "[";
+// for (size_t i=0; i < tpts; i++) std::cout << pow_wp[i] << ", ";
+// std::cout << "]\n\n";
+ }
+ }
+
+ template<typename T=Element>
+ typename std::enable_if<std::is_floating_point<T>::value>::type init_powers () {
+
+ size_t pos = 0;
+ //uint64_t wi = 1;
+ Element wi = 1;
+
+ if (ln>0){
+ size_t tpts = 1 << (ln - 1);
+
+ for(size_t i=0; i<tpts;i++,pos++){
+ pow_w[pos] = wi;
+ fld->mulin(wi,_w);
+ }
+
+ // Other pow_w elements can be read from previously computed pow_w
+ for(size_t k=2;k<=tpts;k<<=1)
+ for(size_t i=0;i<tpts;i+=k,pos++){
+ pow_w[pos] = pow_w[i];
+ }
+
+ }
+ }
+
+ FFT_init (const Field& fld2, size_t ln2, Element w = 0)
+ : fld (&fld2), n ((1UL << ln2)), ln (ln2), pow_w(n - 1), pow_wp(n - 1), _data(n) {
+ _pl = fld->characteristic();
+ _p = fld->characteristic();
+
+ linbox_check(_pl <= (field()->maxCardinality() >> 3)); // 8*p <= field()->maxCardinality() for Harvey's butterflies
+ _dpl = (_pl << 1);
+ //_pinv = 1 / (double) _pl;
+
+ Givaro::Timer chrono;
+ chrono.start();
+
+ uint64_t _val2p = 0;
+ Residu_t _m = _pl;
+ _m = _pl - 1;
+ while ((_m & 1) == 0) {
+ _m >>= 1;
+ _val2p++;
+ }
+
+ linbox_check(ln <= _val2p); // Otherwise no 2 _ln roots of unity
+
+ if (w == 0){ // find a pseudo 2^lpts-th primitive root of unity
+ //_I = (1L << (_logp << 1)) / _pl;
+ Element _gen = find_gen (_m, _val2p);
+ _w = Givaro::powmod(_gen, 1UL<<(_val2p-ln), _pl);
+ }
+ else {
+ _w = w;
+ }
+
+ // compute w^(-1) mod p = w^(2^lpts - 1)
+ _invw = Givaro::powmod(_w, (1UL<<ln) - 1, _pl);
+
+ chrono.clear();
+ chrono.start();
+
+ init_powers();
+
+ chrono.stop();
+ //cout<<"FFT: table="<<chrono<<endl;
+ }
+
+
+ Element getRoot() const {return _w;}
+ Element getInvRoot() const {return _invw;}
+
+ }; //FFT_init
+
+}
+
+#endif // __LINBOX_polynomial_fft_init_H
diff --git a/linbox/algorithms/polynomial-matrix/polynomial-fft-transform-simd.inl b/linbox/algorithms/polynomial-matrix/polynomial-fft-transform-simd.inl
index 6c5e0f8..d986e16 100644
--- a/linbox/algorithms/polynomial-matrix/polynomial-fft-transform-simd.inl
+++ b/linbox/algorithms/polynomial-matrix/polynomial-fft-transform-simd.inl
@@ -1,4 +1,5 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/*
* Copyright (C) 2014 Pascal Giorgi, Romain Lebreton
*
@@ -28,7 +29,36 @@
#ifndef __LINBOX_polynomial_fft_transform_simd_INL
#define __LINBOX_polynomial_fft_transform_simd_INL
-#include "linbox/algorithms/polynomial-matrix/simd.h"
+#include "fflas-ffpack/fflas/fflas_simd.h"
+
+//#include "linbox/algorithms/polynomial-matrix/simd.h"
+
+#ifndef additional_modular_simd_functions
+#define additional_modular_simd_functions
+
+#define Simd_vect typename Simd::vect_t
+
+template <class Simd>
+inline Simd_vect reduce (const Simd_vect a, const Simd_vect p) {
+ Simd_vect t = Simd::greater(p,a);
+ return Simd::sub(a, Simd::vandnot(p,t));
+}
+
+template <class Simd>
+inline Simd_vect add_mod (const Simd_vect a, const Simd_vect b, const Simd_vect p) {
+ Simd_vect c = Simd::add(a,b);
+ return reduce<Simd>(c, p);
+}
+
+template <class Simd>
+inline Simd_vect mul_mod (const Simd_vect a, const Simd_vect b, const Simd_vect p, const Simd_vect bp) {
+ Simd_vect q = Simd::mulhi(a,bp);
+ Simd_vect c = Simd::mullo(a,b);
+ Simd_vect t = Simd::mullo(q,p);
+ return Simd::sub(c,t);
+}
+#undef Simd_vect
+#endif
namespace LinBox {
@@ -41,11 +71,11 @@ namespace LinBox {
template <class Field>
inline void FFT_transform<Field>::reduce128_modp(uint32_t* ABCD, const _vect128_t& P) {
- _vect128_t V1,T;
+ _vect128_t V1;
// V1=[A B C D], V2=[E F G H]
- VEC128_LOAD(V1,ABCD);
- VEC128_MOD_P(V1,V1,P,T);
- VEC128_STORE(ABCD,V1);
+ V1 = Simd128<uint32_t>::load(ABCD);
+ V1 = reduce<Simd128<uint32_t> >(V1, P);
+ Simd128<uint32_t>::store(ABCD,V1);
}
/*-----------------------------------*/
@@ -55,161 +85,162 @@ namespace LinBox {
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIF_mod2p_4x1_SSE(uint32_t* ABCD, uint32_t* EFGH,
- const uint32_t* alpha,
- const uint32_t* alphap,
- const _vect128_t& P, const _vect128_t& P2)
+ const uint32_t* alpha,
+ const uint32_t* alphap,
+ const _vect128_t& P, const _vect128_t& P2)
{
_vect128_t V1,V2,V3,V4,W,Wp,T;
// V1=[A B C D], V2=[E F G H]
- VEC128_LOAD(V1,ABCD);
- VEC128_LOAD(V2,EFGH);
- VEC128_LOAD(W ,alpha);
- VEC128_LOAD(Wp,alphap);
- // V3 = V1 + V2 mod
- VEC128_ADD_MOD(V3,V1,V2,P2,T);
- VEC128_STORE(ABCD,V3);
+ V1 = Simd128<uint32_t>::load(ABCD);
+ V2 = Simd128<uint32_t>::load(EFGH);
+ W = Simd128<uint32_t>::load(alpha);
+ Wp = Simd128<uint32_t>::load(alphap);
+ // V3 = V1 + V2 mod 2P
+ V3 = add_mod<Simd128<uint32_t> >(V1,V2,P2);
+ Simd128<uint32_t>::store(ABCD,V3);
// V4 = (V1+(2P-V2))alpha mod 2P
- VEC128_SUB_32(T,V2,P2);
- VEC128_SUB_32(V4,V1,T);
- VEC128_MUL_MOD(T,V4,W,P,Wp,V1,V2,V3);// V3 is the result
- VEC128_STORE(EFGH,T);
+ T = Simd128<uint32_t>::sub(V2,P2);
+ V4 = Simd128<uint32_t>::sub(V1,T);
+ T = mul_mod<Simd128<uint32_t> >(V4,W,P,Wp);// T is the result
+ Simd128<uint32_t>::store(EFGH,T);
}
-
+/*
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIF_mod2p_4x1_SSE_laststep(uint32_t* ABCD, uint32_t* EFGH, const _vect128_t& P2) {
_vect128_t V1,V2,V3,V4,V5;
// V1=[A B C D], V2=[E F G H]
- VEC128_LOAD(V1,ABCD);
- VEC128_LOAD(V2,EFGH);
+ V1 = Simd128<uint32_t>::load(ABCD);
+ V2 = Simd128<uint32_t>::load(EFGH);
// V3 = [A C B D], V4 = [E G F H]
- VEC128_SHUFFLE_32(V3,V1,0xD8);
- VEC128_SHUFFLE_32(V4,V2,0xD8);
+ V3 = Simd128<uint32_t>::shuffle<0xD8>(V1);
+ V4 = Simd128<uint32_t>::shuffle<0xD8>(V2);
// V1 = [A E C G], V2 = [B F D H]
- VEC128_UNPACK_LO_32(V1,V3,V4);
- VEC128_UNPACK_HI_32(V2,V3,V4);
+ V1 = Simd128<uint32_t>::unpacklo(V3,V4);
+ V2 = Simd128<uint32_t>::unpackhi(V3,V4);
// V3 = V1 + V2 mod 2P
- VEC128_ADD_MOD(V3,V1,V2,P2,V5);
+ V3 = add_mod<Simd128<uint32_t> >(V1,V2,P2);
// V4 = V1 + (2P - V2) mod 2P
- VEC128_SUB_32(V5,V2,P2);
- VEC128_SUB_32(V2,V1,V5);
- VEC128_MOD_P(V4,V2,P2,V5);
+ V5 = Simd128<uint32_t>::sub(V2,P2);
+ V2 = Simd128<uint32_t>::sub(V1,V5);
+ V4 = reduce<Simd128<uint32_t> >(V2, P2);
// V1 = [A C E G], V2 = [B D F H]
- VEC128_SHUFFLE_32(V1,V3,0xD8);
- VEC128_SHUFFLE_32(V2,V4,0xD8);
+ V1 = Simd128<uint32_t>::shuffle<0xD8>(V3);
+ V2 = Simd128<uint32_t>::shuffle<0xD8>(V4);
// V3 = [A B C D], V4 = [E F G H]
- VEC128_UNPACK_LO_32(V3,V1,V2);
- VEC128_UNPACK_HI_32(V4,V1,V2);
+ V3 = Simd128<uint32_t>::unpacklo(V1,V2);
+ V4 = Simd128<uint32_t>::unpackhi(V1,V2);
// Store
- VEC128_STORE(ABCD,V3);
- VEC128_STORE(EFGH,V4);
+ Simd128<uint32_t>::store(ABCD,V3);
+ Simd128<uint32_t>::store(EFGH,V4);
}
+*/
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIF_mod2p_4x2_SSE(uint32_t* ABCD, uint32_t* EFGH, uint32_t* IJKL, uint32_t* MNOP,
- const uint32_t* alpha, const uint32_t*beta , const uint32_t* gamma,
- const uint32_t* alphap, const uint32_t*betap , const uint32_t* gammap,
- const _vect128_t& P, const _vect128_t& P2) {
+ const uint32_t* alpha, const uint32_t*beta , const uint32_t* gamma,
+ const uint32_t* alphap, const uint32_t*betap , const uint32_t* gammap,
+ const _vect128_t& P, const _vect128_t& P2) {
_vect128_t V1,V2,V3,V4,W,Wp,T1,T2,T3,T4,T5,T6,T7,T8;
// V1=[A B C D], V2=[E F G H], V3=[I J K L], V4=[M N O P]
- VEC128_LOAD(V1,ABCD);
- VEC128_LOAD(V2,IJKL);
- VEC128_LOAD(W ,alpha);
- VEC128_LOAD(Wp,alphap);
+ V1 = Simd128<uint32_t>::load(ABCD);
+ V2 = Simd128<uint32_t>::load(IJKL);
+ W = Simd128<uint32_t>::load(alpha);
+ Wp = Simd128<uint32_t>::load(alphap);
/**************/
// T1 = V1 + V2 mod 2P
- VEC128_ADD_MOD(T1,V1,V2,P2,T8);
+ T1 = add_mod<Simd128<uint32_t> >(V1,V2,P2);
// T2 = (V1+(2P-V2))alpha mod 2P
- VEC128_SUB_32(T7,V2,P2);
- VEC128_SUB_32(T6,V1,T7);
- VEC128_MUL_MOD(T2,T6,W,P,Wp,T3,T4,T5);
+ T7 = Simd128<uint32_t>::sub(V2,P2);
+ T6 = Simd128<uint32_t>::sub(V1,T7);
+ T2 = mul_mod<Simd128<uint32_t> >(T6,W,P,Wp);
/**************/
- VEC128_LOAD(V3,EFGH);
- VEC128_LOAD(V4,MNOP);
- VEC128_LOAD(W ,beta);
- VEC128_LOAD(Wp,betap);
+ V3 = Simd128<uint32_t>::load(EFGH);
+ V4 = Simd128<uint32_t>::load(MNOP);
+ W = Simd128<uint32_t>::load(beta);
+ Wp = Simd128<uint32_t>::load(betap);
/**************/
// T3 = V3 + V4 mod 2P
- VEC128_ADD_MOD(T3,V3,V4,P2,T8);
+ T3 = add_mod<Simd128<uint32_t> >(V3,V4,P2);
// T4 = (V3+(2P-V4))beta mod 2P
- VEC128_SUB_32(T7,V4,P2);
- VEC128_SUB_32(T6,V3,T7);
- VEC128_MUL_MOD(T4,T6,W,P,Wp,V1,V2,T8);// T1 is the result
+ T7 = Simd128<uint32_t>::sub(V4,P2);
+ T6 = Simd128<uint32_t>::sub(V3,T7);
+ T4 = mul_mod<Simd128<uint32_t> >(T6,W,P,Wp);// T1 is the result
/**************/
- VEC128_LOAD(W ,gamma);
- VEC128_LOAD(Wp,gammap);
+ W = Simd128<uint32_t>::load(gamma);
+ Wp = Simd128<uint32_t>::load(gammap);
/**************/
// V1 = T1 + T3 mod 2P
- VEC128_ADD_MOD(V1,T1,T3,P2,T8);
+ V1 = add_mod<Simd128<uint32_t> >(T1,T3,P2);
// V3 = (T1+(2P-T3))gamma mod 2P
- VEC128_SUB_32(T7,T3,P2);
- VEC128_SUB_32(T6,T1,T7);
- VEC128_MUL_MOD(V3,T6,W,P,Wp,T3,T5,T8);// T1 is the result
+ T7 = Simd128<uint32_t>::sub(T3,P2);
+ T6 = Simd128<uint32_t>::sub(T1,T7);
+ V3 = mul_mod<Simd128<uint32_t> >(T6,W,P,Wp);// T1 is the result
/**************/
// V2 = T2 + T4 mod 2P
- VEC128_ADD_MOD(V2,T2,T4,P2,T8);
+ V2 = add_mod<Simd128<uint32_t> >(T2,T4,P2);
// V4 = (T2+(2P-T4))gamma mod 2P
- VEC128_SUB_32(T7,T4,P2);
- VEC128_SUB_32(T6,T2,T7);
- VEC128_MUL_MOD(V4,T6,W,P,Wp,T1,T3,T8);// T1 is the result
+ T7 = Simd128<uint32_t>::sub(T4,P2);
+ T6 = Simd128<uint32_t>::sub(T2,T7);
+ V4 = mul_mod<Simd128<uint32_t> >(T6,W,P,Wp);// T1 is the result
/**************/
- VEC128_STORE(ABCD,V1);
- VEC128_STORE(EFGH,V3);
- VEC128_STORE(IJKL,V2);
- VEC128_STORE(MNOP,V4);
+ Simd128<uint32_t>::store(ABCD,V1);
+ Simd128<uint32_t>::store(EFGH,V3);
+ Simd128<uint32_t>::store(IJKL,V2);
+ Simd128<uint32_t>::store(MNOP,V4);
}
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIF_mod2p_4x2_SSE_last2step(uint32_t* ABCD, uint32_t* EFGH,
- const _vect128_t& W,
- const _vect128_t& Wp,
- const _vect128_t& P, const _vect128_t& P2) {
+ const _vect128_t& W,
+ const _vect128_t& Wp,
+ const _vect128_t& P, const _vect128_t& P2) {
_vect128_t V1,V2,V3,V4,V5,V6,V7;
// V1=[A B C D], V2=[E F G H]
- VEC128_LOAD(V1,ABCD);
- VEC128_LOAD(V2,EFGH);
+ V1 = Simd128<uint32_t>::load(ABCD);
+ V2 = Simd128<uint32_t>::load(EFGH);
// V3=[A E B F], V4=[C G D H]
- VEC128_UNPACK_LO_32(V3,V1,V2);
- VEC128_UNPACK_HI_32(V4,V1,V2);
+ V3 = Simd128<uint32_t>::unpacklo(V1,V2);
+ V4 = Simd128<uint32_t>::unpackhi(V1,V2);
// V1 = V3 + V4 mod 2P
// P2 = [2p 2p 2p 2p]
- VEC128_ADD_MOD(V1,V3,V4,P2,V5);
+ V1 = add_mod<Simd128<uint32_t> >(V3,V4,P2);
// V2 = (V3+(2P-V4))alpha mod 2P
- VEC128_SUB_32(V5,V4,P2);
- VEC128_SUB_32(V6,V3,V5);
- VEC128_MOD_P(V2,V6,P2,V2);
+ V5 = Simd128<uint32_t>::sub(V4,P2);
+ V6 = Simd128<uint32_t>::sub(V3,V5);
+ V2 = reduce<Simd128<uint32_t> >(V6, P2);
// V4 = [D D H H]
- VEC128_UNPACK_HI_32(V4,V2,V2);
+ V4 = Simd128<uint32_t>::unpackhi(V2,V2);
// V6 = V4 * Wp mod 2^64
// Wp = [Wp ? Wp ?]
- VEC128_MUL_32(V7,V4,Wp);
- VEC128_MUL_LO_32(V5,V7,P);
+ V7 = Simd128<uint64_t>::mulx(V4,Wp);
+ V5 = Simd128<uint32_t>::mullo(V7,P);
// At this point V4= [? Q_D*p ? Q_H*p]
// V5 = [D D H H] * [W W W W] mod 2^32
- VEC128_MUL_LO_32(V6,V4,W);
- VEC128_SUB_32(V4,V6,V5);
- VEC128_SHUFFLE_32(V3,V4,0xDD);
+ V6 = Simd128<uint32_t>::mullo(V4,W);
+ V4 = Simd128<uint32_t>::sub(V6,V5);
+ V3 = Simd128<uint32_t>::shuffle<0xDD>(V4);
//At this point, V2 = [D*Wmodp H*Wmodp D*Wmodp H*Wmodp]
// At this time I have V1=[A E B F], V2=[C G ? ?], V3=[? ? D H]
// I need V3 = [A C E G], V4 = [B D F H]
- VEC128_UNPACK_HI_32(V4,V1,V3);
- VEC128_UNPACK_LO_32(V3,V1,V2);
+ V4 = Simd128<uint32_t>::unpackhi(V1,V3);
+ V3 = Simd128<uint32_t>::unpacklo(V1,V2);
// V1 = V3 + V4 mod 2P
- VEC128_ADD_MOD(V1,V3,V4,P2,V5);
+ V1 = add_mod<Simd128<uint32_t> >(V3,V4,P2);
// V2 = V3 + (2P - V4) mod 2P
- VEC128_SUB_32(V5,V4,P2);
- VEC128_SUB_32(V6,V3,V5);
- VEC128_MOD_P(V2,V6,P2,V2);
+ V5 = Simd128<uint32_t>::sub(V4,P2);
+ V6 = Simd128<uint32_t>::sub(V3,V5);
+ V2 = reduce<Simd128<uint32_t> >(V6, P2);
// Result in V1 = [A C E G] and V2 = [B D F H]
// Transform to V3=[A B C D], V4=[E F G H]
- VEC128_UNPACK_LO_32(V3,V1,V2);
- VEC128_UNPACK_HI_32(V4,V1,V2);
+ V3 = Simd128<uint32_t>::unpacklo(V1,V2);
+ V4 = Simd128<uint32_t>::unpackhi(V1,V2);
// Store
- VEC128_STORE(ABCD,V3);
- VEC128_STORE(EFGH,V4);
+ Simd128<uint32_t>::store(ABCD,V3);
+ Simd128<uint32_t>::store(EFGH,V4);
}
@@ -220,81 +251,81 @@ namespace LinBox {
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIT_mod4p_4x1_SSE(uint32_t* ABCD, uint32_t* EFGH,
- const uint32_t* alpha,
- const uint32_t* alphap,
- const _vect128_t& P, const _vect128_t& P2) {
+ const uint32_t* alpha,
+ const uint32_t* alphap,
+ const _vect128_t& P, const _vect128_t& P2) {
_vect128_t V1,V2,V3,V4,W,Wp,T1,T2;
// V1=[A B C D], V2=[E F G H]
- VEC128_LOAD(V1,ABCD);
- VEC128_LOAD(V2,EFGH);
- VEC128_LOAD(W ,alpha);
- VEC128_LOAD(Wp,alphap);
+ V1 = Simd128<uint32_t>::load(ABCD);
+ V2 = Simd128<uint32_t>::load(EFGH);
+ W = Simd128<uint32_t>::load(alpha);
+ Wp = Simd128<uint32_t>::load(alphap);
// V3 = V1 mod 2P
- VEC128_MOD_P (V3,V1,P2,T1);
+ V3 = reduce<Simd128<uint32_t> >(V1, P2);
// V4 = V2 * W mod P
- VEC128_MUL_MOD(V4,V2,W,P,Wp,V1,T1,T2);
+ V4 = mul_mod<Simd128<uint32_t> >(V2,W,P,Wp);
// V1 = V3 + V4
- VEC128_ADD_32(V1,V3,V4);
- VEC128_STORE(ABCD,V1);
+ V1 = Simd128<uint32_t>::add(V3,V4);
+ Simd128<uint32_t>::store(ABCD,V1);
// V2 = V3 - (V4 - 2P)
- VEC128_SUB_32(T1,V4,P2);
- VEC128_SUB_32(V2,V3,T1);
- VEC128_STORE(EFGH,V2);
+ T1 = Simd128<uint32_t>::sub(V4,P2);
+ V2 = Simd128<uint32_t>::sub(V3,T1);
+ Simd128<uint32_t>::store(EFGH,V2);
}
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIT_mod4p_4x2_SSE_first2step(uint32_t* ABCD, uint32_t* EFGH,
- const _vect128_t& W,
- const _vect128_t& Wp,
- const _vect128_t& P, const _vect128_t& P2) {
+ const _vect128_t& W,
+ const _vect128_t& Wp,
+ const _vect128_t& P, const _vect128_t& P2) {
_vect128_t V1,V2,V3,V4,T1,T2,T3,T4;
// V1=[A B C D], V2=[E F G H]
- VEC128_LOAD(V1,ABCD);
- VEC128_LOAD(V2,EFGH);
+ V1 = Simd128<uint32_t>::load(ABCD);
+ V2 = Simd128<uint32_t>::load(EFGH);
// T1 = [A C B D], T2 = [E G F H]
- VEC128_SHUFFLE_32(T1,V1,0xD8);
- VEC128_SHUFFLE_32(T2,V2,0xD8);
+ T1 = Simd128<uint32_t>::shuffle<0xD8>(V1);
+ T2 = Simd128<uint32_t>::shuffle<0xD8>(V2);
// V1 = [A E C G], V2 = [B F D H]
- VEC128_UNPACK_LO_32(V1,T1,T2);
- VEC128_UNPACK_HI_32(V2,T1,T2);
+ V1 = Simd128<uint32_t>::unpacklo(T1,T2);
+ V2 = Simd128<uint32_t>::unpackhi(T1,T2);
// V3 = V1 + V2
// Rk: No need for (. mod 2P) since entries are <P
- VEC128_ADD_32(V3,V1,V2);
+ V3 = Simd128<uint32_t>::add(V1,V2);
// V4 = V1 + (P - V2)
// Rk: No need for (. mod 2P) since entries are <P
- VEC128_SUB_32(T1,V2,P);
- VEC128_SUB_32(V4,V1,T1);
+ T1 = Simd128<uint32_t>::sub(V2,P);
+ V4 = Simd128<uint32_t>::sub(V1,T1);
// T1 = [D D H H]
- VEC128_UNPACK_HI_32(T1,V4,V4);
+ T1 = Simd128<uint32_t>::unpackhi(V4,V4);
// T2 = T1 * Wp mod 2^64
// Wp = [Wp ? Wp ?]
- VEC128_MUL_32(T2,T1,Wp);
- VEC128_MUL_LO_32(T3,T2,P);
+ T2 = Simd128<uint64_t>::mulx(T1,Wp);
+ T3 = Simd128<uint32_t>::mullo(T2,P);
// At this point T3= [? Q_D*p ? Q_H*p]
// T4 = [D D H H] * [W W W W] mod 2^32
- VEC128_MUL_LO_32(T4,T1,W);
- VEC128_SUB_32(T1,T4,T3);
- VEC128_SHUFFLE_32(T2,T1,0XDD);
+ T4 = Simd128<uint32_t>::mullo(T1,W);
+ T1 = Simd128<uint32_t>::sub(T4,T3);
+ T2 = Simd128<uint32_t>::shuffle<0xDD>(T1);
//At this point, T2 = [D*Wmodp H*Wmodp D*Wmodp H*Wmodp]
// At this time I have V3=[A E C G], V4=[B F ? ?], T2=[? ? D H]
// I need V1 = [A B E F], V2 = [C D G H]
- VEC128_UNPACK_LO_32(V1,V3,V4);
- VEC128_UNPACK_HI_32(V2,V3,T2);
+ V1 = Simd128<uint32_t>::unpacklo(V3,V4);
+ V2 = Simd128<uint32_t>::unpackhi(V3,T2);
// T1 = V1 + V2
- VEC128_ADD_32(T1,V1,V2);
+ T1 = Simd128<uint32_t>::add(V1,V2);
// T2 = V1 - (V2 - 2P)
- VEC128_SUB_32(T3,V2,P2);
- VEC128_SUB_32(T2,V1,T3);
+ T3 = Simd128<uint32_t>::sub(V2,P2);
+ T2 = Simd128<uint32_t>::sub(V1,T3);
// Result in T1 = [A B E F] and T2 = [C D G H]
// Transform to V1=[A C B D], V2=[E G F H]
- VEC128_UNPACK_LO_32(V1,T1,T2);
- VEC128_UNPACK_HI_32(V2,T1,T2);
+ V1 = Simd128<uint32_t>::unpacklo(T1,T2);
+ V2 = Simd128<uint32_t>::unpackhi(T1,T2);
// Then T1=[A B C D], T2=[E F G H]
- VEC128_SHUFFLE_32(T1,V1,0xD8);
- VEC128_SHUFFLE_32(T2,V2,0xD8);
+ T1 = Simd128<uint32_t>::shuffle<0xD8>(V1);
+ T2 = Simd128<uint32_t>::shuffle<0xD8>(V2);
// Store
- VEC128_STORE(ABCD,T1);
- VEC128_STORE(EFGH,T2);
+ Simd128<uint32_t>::store(ABCD,T1);
+ Simd128<uint32_t>::store(EFGH,T2);
}
/*-----------------------------------*/
@@ -304,51 +335,51 @@ namespace LinBox {
template <class Field>
void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative4x1_SSE (uint32_t *fft) {
_vect128_t P,P2;
- P = _mm_set1_epi32(_pl);
- P2 = _mm_set1_epi32(_dpl);
+ P = Simd128<uint32_t>::set1(_pl);
+ P2 = Simd128<uint32_t>::set1(_dpl);
uint32_t * tab_w = &pow_w [0];
uint32_t * tab_wp= &pow_wp[0];
size_t w, f;
for (w = n >> 1, f = 1; w >= 4; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1){
- // w : witdh of butterflies
- // f : # families of butterflies
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j+=4)
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j+=4)
#define A0 &fft[0] + (i << 1) *w+ j
#define A4 &fft[0] + ((i << 1)+1)*w+ j
- Butterfly_DIF_mod2p_4x1_SSE(A0,A4, tab_w+j,tab_wp+j,P,P2);
+ Butterfly_DIF_mod2p_4x1_SSE(A0,A4, tab_w+j,tab_wp+j,P,P2);
#undef A0
#undef A4
- //std::cout<<fft<<std::endl;
- }
+ //std::cout<<fft<<std::endl;
+ }
// Last two steps
if (n >= 8) {
- _vect128_t W,Wp;
- W = _mm_set1_epi32 ((int)tab_w [1]);
- Wp= _mm_set1_epi32 ((int)tab_wp[1]);
+ _vect128_t W,Wp;
+ W = Simd128<uint32_t>::set1 ((int)tab_w [1]);
+ Wp= Simd128<uint32_t>::set1 ((int)tab_wp[1]);
- for (size_t i = 0; i < f; i+=2)
+ for (size_t i = 0; i < f; i+=2)
#define A0 &fft[0] + (i << 2)
#define A4 &fft[0] + ((i << 2)+4)
- Butterfly_DIF_mod2p_4x2_SSE_last2step(A0,A4,W,Wp,P,P2);
- //std::cout<<fft<<std::endl;
+ Butterfly_DIF_mod2p_4x2_SSE_last2step(A0,A4,W,Wp,P,P2);
+ //std::cout<<fft<<std::endl;
#undef A0
#undef A4
- } else {
- for (; w >= 1; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1)
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j++)
- Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
- }
+ } else {
+ for (; w >= 1; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
+ }
}
template <class Field>
void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative4x2_SSE (uint32_t *fft) {
size_t w, f;
_vect128_t P,P2;
- P = _mm_set1_epi32(_pl);
- P2 = _mm_set1_epi32(_dpl);
+ P = Simd128<uint32_t>::set1(_pl);
+ P2 = Simd128<uint32_t>::set1(_dpl);
uint32_t * tab_w = &pow_w[0];
uint32_t * tab_wp= &pow_wp[0];
for (w = n >> 1, f = 1; w >= 8; tab_w+=w+(w>>1), tab_wp+=w+(w>>1), w >>= 2, f <<= 2)
@@ -361,87 +392,87 @@ namespace LinBox {
#define A2 &fft[0] + ((i << 1)+1)*w+ j
#define A3 &fft[0] + ((i << 1)+1)*w+(j+(w >> 1))
- Butterfly_DIF_mod2p_4x2_SSE(A0, A1, A2, A3,
- tab_w +j, tab_w +j+(w >> 1), tab_w +j+w,
- tab_wp+j, tab_wp+j+(w >> 1), tab_wp+j+w,
- P,P2);
+ Butterfly_DIF_mod2p_4x2_SSE(A0, A1, A2, A3,
+ tab_w +j, tab_w +j+(w >> 1), tab_w +j+w,
+ tab_wp+j, tab_wp+j+(w >> 1), tab_wp+j+w,
+ P,P2);
#undef A0
#undef A1
#undef A2
#undef A3
- }
+ }
// Last two steps
if (n >= 8) {
- if (w == 4) {
- for (size_t i = 0; i < f; i++)
+ if (w == 4) {
+ for (size_t i = 0; i < f; i++)
#define A0 &fft[0] + (i << 1) *w
#define A4 &fft[0] + ((i << 1)+1)*w
- Butterfly_DIF_mod2p_4x1_SSE(A0,A4, tab_w,tab_wp,P,P2);
+ Butterfly_DIF_mod2p_4x1_SSE(A0,A4, tab_w,tab_wp,P,P2);
#undef A0
#undef A4
- tab_w+=w;
- tab_wp+=w;
- w >>= 1;
- f <<= 1;
- }
+ tab_w+=w;
+ tab_wp+=w;
+ w >>= 1;
+ f <<= 1;
+ }
- _vect128_t W,Wp;
- W = _mm_set1_epi32 ((int)tab_w [1]);
- Wp= _mm_set1_epi32 ((int)tab_wp[1]);
+ _vect128_t W,Wp;
+ W = Simd128<uint32_t>::set1 ((int)tab_w [1]);
+ Wp= Simd128<uint32_t>::set1 ((int)tab_wp[1]);
- for (size_t i = 0; i < f; i+=2)
+ for (size_t i = 0; i < f; i+=2)
#define A0 &fft[0] + (i << 2)
#define A4 &fft[0] + ((i << 2)+4)
- Butterfly_DIF_mod2p_4x2_SSE_last2step(A0,A4,W,Wp,P,P2);
+ Butterfly_DIF_mod2p_4x2_SSE_last2step(A0,A4,W,Wp,P,P2);
#undef A0
#undef A4
- } else {
- for (; w >= 1; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1)
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j++)
- Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
- }
+ } else {
+ for (; w >= 1; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
+ }
}
template <class Field>
void FFT_transform<Field>::FFT_DIT_Harvey_mod4p_iterative4x1_SSE (uint32_t *fft)
{
_vect128_t P,P2;
- VEC128_SET_32(P,_pl);
- VEC128_SET_32(P2,_dpl);
+ P = Simd128<uint32_t>::set1(_pl);
+ P2 = Simd128<uint32_t>::set1(_dpl);
// Last two steps
if (n >= 8) {
- _vect128_t W,Wp;
- W = _mm_set1_epi32 ((int)pow_w [n-3]);
- Wp= _mm_set1_epi32 ((int)pow_wp[n-3]);
-
- for (size_t i = 0; i < n; i+=8)
- Butterfly_DIT_mod4p_4x2_SSE_first2step(&fft[i],&fft[i+4],W,Wp,P,P2);
-
- uint32_t * tab_w = &pow_w [n-8];
- uint32_t * tab_wp= &pow_wp[n-8];
- for (size_t w = 4, f = n >> 3; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w){
- // w : witdh of butterflies
- // f : # families of butterflies
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j+=4)
+ _vect128_t W,Wp;
+ W = Simd128<uint32_t>::set1 ((int)pow_w [n-3]);
+ Wp= Simd128<uint32_t>::set1 ((int)pow_wp[n-3]);
+
+ for (size_t i = 0; i < n; i+=8)
+ Butterfly_DIT_mod4p_4x2_SSE_first2step(&fft[i],&fft[i+4],W,Wp,P,P2);
+
+ uint32_t * tab_w = &pow_w [n-8];
+ uint32_t * tab_wp= &pow_wp[n-8];
+ for (size_t w = 4, f = n >> 3; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w){
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j+=4)
#define A0 &fft[0] + (i << 1) *w+ j
#define A4 &fft[0] + ((i << 1)+1)*w+ j
- Butterfly_DIT_mod4p_4x1_SSE(A0,A4, tab_w+j,tab_wp+j,P,P2);
+ Butterfly_DIT_mod4p_4x1_SSE(A0,A4, tab_w+j,tab_wp+j,P,P2);
#undef A0
#undef A4
+ }
+ } else {
+ uint32_t * tab_w = &pow_w [n-2];
+ uint32_t * tab_wp= &pow_wp[n-2];
+ for (size_t w = 1, f = n >> 1; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ Butterfly_DIT_mod4p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
}
- } else {
- uint32_t * tab_w = &pow_w [n-2];
- uint32_t * tab_wp= &pow_wp[n-2];
- for (size_t w = 1, f = n >> 1; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w)
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j++)
- Butterfly_DIT_mod4p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
- }
}
@@ -452,14 +483,14 @@ namespace LinBox {
******************************************************************************************************************/
-#ifdef __LINBOX_HAVE_AVX2
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
template <class Field>
inline void FFT_transform<Field>::reduce256_modp(uint32_t* ABCD, const _vect256_t& P) {
- _vect256_t V1,T;
- VEC256_LOADU(V1,ABCD);
- VEC256_MOD_P(V1,V1,P,T);
- VEC256_STOREU(ABCD,V1);
+ _vect256_t V1;
+ V1 = Simd256<uint32_t>::loadu(ABCD);
+ V1 = reduce<Simd256<uint32_t> >(V1, P);
+ Simd256<uint32_t>::storeu(ABCD,V1);
}
@@ -469,111 +500,113 @@ namespace LinBox {
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIF_mod2p_8x1_AVX(uint32_t* ABCDEFGH, uint32_t* IJKLMNOP,
- const uint32_t* alpha,
- const uint32_t* alphap,
- const _vect256_t& P, const _vect256_t& P2) {
+ const uint32_t* alpha,
+ const uint32_t* alphap,
+ const _vect256_t& P, const _vect256_t& P2) {
_vect256_t V1,V2,V3,V4,W,Wp,T;
// V1=[A B C D E F G H], V2=[I J K L M N O P]
- VEC256_LOADU(V1,ABCDEFGH);
- VEC256_LOADU(V2,IJKLMNOP);
- VEC256_LOADU(W ,alpha);
- VEC256_LOADU(Wp,alphap);
+ V1 = Simd256<uint32_t>::loadu(ABCDEFGH);
+ V2 = Simd256<uint32_t>::loadu(IJKLMNOP);
+ W = Simd256<uint32_t>::loadu(alpha);
+ Wp = Simd256<uint32_t>::loadu(alphap);
// V3 = V1 + V2 mod
- VEC256_ADD_MOD(V3,V1,V2,P2,T);
- VEC256_STOREU(ABCDEFGH,V3);
+
+ V3 = add_mod<Simd256<uint32_t> >(V1,V2,P2);
+
+ Simd256<uint32_t>::storeu(ABCDEFGH,V3);
// V4 = (V1+(2P-V2))alpha mod 2P
- VEC256_SUB_32(T,V2,P2);
- VEC256_SUB_32(V4,V1,T);
- VEC256_MUL_MOD(T,V4,W,P,Wp,V1,V2,V3);// V3 is the result
- VEC256_STOREU(IJKLMNOP,T);
+ T = Simd256<uint32_t>::sub(V2,P2);
+ V4 = Simd256<uint32_t>::sub(V1,T);
+ T = mul_mod<Simd256<uint32_t> >(V4,W,P,Wp);// T is the result
+ Simd256<uint32_t>::storeu(IJKLMNOP,T);
}
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIF_mod2p_8x3_AVX_last3step(uint32_t* ABCDEFGH, uint32_t* IJKLMNOP,
- const _vect256_t& alpha,const _vect256_t& alphap,
- const _vect256_t& beta ,const _vect256_t& betap,
- const _vect256_t& P ,const _vect256_t& P2) {
+ const _vect256_t& alpha,const _vect256_t& alphap,
+ const _vect256_t& beta ,const _vect256_t& betap,
+ const _vect256_t& P ,const _vect256_t& P2) {
_vect256_t V1,V2,V3,V4,V5,V6,V7,Q;
// V1=[A B C D E F G H], V2=[I J K L M N O P]
- VEC256_LOADU(V1,ABCDEFGH);
- VEC256_LOADU(V2,IJKLMNOP);
+ V1 = Simd256<uint32_t>::loadu(ABCDEFGH);
+ V2 = Simd256<uint32_t>::loadu(IJKLMNOP);
/* 1st step */
// V3=[A B C D I J K L] V4=[E F G H M N O P]
- VEC256_UNPACK_LO_128(V3,V1,V2);
- VEC256_UNPACK_HI_128(V4,V1,V2);
+ V3 = Simd256<uint64_t>::unpacklo128(V1,V2);
+ V4 = Simd256<uint64_t>::unpackhi128(V1,V2);
// V1 = V3 + V4 mod 2P
// P2 = [2p 2p 2p 2p]
- VEC256_ADD_MOD(V1,V3,V4,P2,V5);
+ V1 = add_mod<Simd256<uint32_t> >(V3,V4,P2);
// V2 = (V3+(2P-V4))alpha mod 2P
- VEC256_SUB_32(V5,V4,P2);
- VEC256_SUB_32(V6,V3,V5);
- VEC256_MOD_P(V7,V6,P2,V2);
- VEC256_MUL_MOD(V2,V7,alpha,P,alphap,V3,V4,V5);
+ V5 = Simd256<uint32_t>::sub(V4,P2);
+ V6 = Simd256<uint32_t>::sub(V3,V5);
+ V7 = reduce<Simd256<uint32_t> >(V6, P2);
+ V2 = mul_mod<Simd256<uint32_t> >(V7,alpha,P,alphap);
/* 2nd step */
// V3=[A E B F I M J N] V4=[C G D H K O L P]
- VEC256_UNPACK_LO_32(V3,V1,V2);
- VEC256_UNPACK_HI_32(V4,V1,V2);
+ V3 = Simd256<uint32_t>::unpacklo_twice(V1,V2);
+ V4 = Simd256<uint32_t>::unpackhi_twice(V1,V2);
// V1 = V3 + V4 mod 2P
// P2 = [2p 2p 2p 2p]
- VEC256_ADD_MOD(V1,V3,V4,P2,V5);
+ V1 = add_mod<Simd256<uint32_t> >(V3,V4,P2);
// V2 = (V3+(2P-V4))alpha mod 2P
// V7 = (V3+(2P-V4)) mod 2P
- VEC256_SUB_32(V5,V4,P2);
- VEC256_SUB_32(V6,V3,V5);
- VEC256_MOD_P(V7,V6,P2,V2);
+ V5 = Simd256<uint32_t>::sub(V4,P2);
+ V6 = Simd256<uint32_t>::sub(V3,V5);
+ V7 = reduce<Simd256<uint32_t> >(V6, P2);
// V4 = [D D H H L L P P ]
- VEC256_UNPACK_HI_32(V4,V7,V7);
+ V4 = Simd256<uint32_t>::unpackhi_twice(V7,V7);
// Q = V4 * beta mod 2^64 = [* Qd * Qh * Ql * Qp]
// with betap= [ betap * betap * betap * betap *]
- VEC256_MUL_32(Q,V4,betap);
+ Q = Simd256<uint64_t>::mulx(V4,betap);
// V5 = [* Qd.P * Qh.P * Ql.P * Qp.P]
- VEC256_MUL_LO_32(V5,Q,P);
+ V5 = Simd256<uint32_t>::mullo(Q,P);
// V6 = V4 * beta mod 2^32
- VEC256_MUL_LO_32(V6,V4,beta);
+ V6 = Simd256<uint32_t>::mullo(V4,beta);
// V3 = V6 - V5 = [* (D.beta mod p) * (H.beta mod p) * (L.beta mod p) * (P.beta mod p)]
- VEC256_SUB_32(V3,V6,V5);
+ V3 = Simd256<uint32_t>::sub(V6,V5);
// V2=[* * D H * * L P]
- VEC256_SHUFFLE_32(V2,V3,0xDD);
+ V2 = Simd256<uint32_t>::shuffle_twice<0xDD>(V3);
/* 3nd step */
// At this time I have V1=[A B E F I J M N], V7=[C G * * K O * *], V2=[* * D H * * L P]
// I need V3 = [A C E G I K M O], V4=[B D F H J L N P]
- VEC256_UNPACK_LO_32(V3,V1,V7);
- VEC256_UNPACK_HI_32(V4,V1,V2);
+ V3 = Simd256<uint32_t>::unpacklo_twice(V1,V7);
+ V4 = Simd256<uint32_t>::unpackhi_twice(V1,V2);
// V1 = V3 + V4 mod 2P
- VEC256_ADD_MOD(V1,V3,V4,P2,V5);
+ V1 = add_mod<Simd256<uint32_t> >(V3,V4,P2);
// V2 = V3 + (2P - V4) mod 2P
- VEC256_SUB_32(V5,V4,P2);
- VEC256_SUB_32(V6,V3,V5);
- VEC256_MOD_P(V2,V6,P2,V2);
+ V5 = Simd256<uint32_t>::sub(V4,P2);
+ V6 = Simd256<uint32_t>::sub(V3,V5);
+ V2 = reduce<Simd256<uint32_t> >(V6, P2);
// Result in V1=[A C E G I K M O] V2=[B D F H J L N P]
// Transform to V3=[A B C D I J K L],V4=[E F G H M N O P]
- VEC256_UNPACK_LO_32(V3,V1,V2);
- VEC256_UNPACK_HI_32(V4,V1,V2);
+ V3 = Simd256<uint32_t>::unpacklo_twice(V1,V2);
+ V4 = Simd256<uint32_t>::unpackhi_twice(V1,V2);
// Transform to V1=[A B C D E F G H], V2=[I J K L M N O P]
- VEC256_UNPACK_LO_128(V1,V3,V4);
- VEC256_UNPACK_HI_128(V2,V3,V4);
+ V1 = Simd256<uint64_t>::unpacklo128(V3,V4);
+ V2 = Simd256<uint64_t>::unpackhi128(V3,V4);
// Store
- VEC256_STOREU(ABCDEFGH,V1);
- VEC256_STOREU(IJKLMNOP,V2);
+ Simd256<uint32_t>::storeu(ABCDEFGH,V1);
+ Simd256<uint32_t>::storeu(IJKLMNOP,V2);
}
@@ -581,55 +614,56 @@ namespace LinBox {
template <class Field>
void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative8x1_AVX (uint32_t *fft) {
_vect256_t P,P2;
- VEC256_SET_32(P,_pl);
- VEC256_SET_32(P2,_dpl);
+ P = Simd256<uint32_t>::set1(_pl);
+ P2 = Simd256<uint32_t>::set1(_dpl);
uint32_t * tab_w = &pow_w [0];
uint32_t * tab_wp= &pow_wp[0];
size_t w, f;
for (w = n >> 1, f = 1; w >= 8; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1){
- // w : witdh of butterflies
- // f : # families of butterflies
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j+=8)
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j+=8)
#define A0 &fft[0] + (i << 1) *w+ j
#define A4 &fft[0] + ((i << 1)+1)*w+ j
- Butterfly_DIF_mod2p_8x1_AVX(A0,A4, tab_w+j,tab_wp+j,P,P2);
+ Butterfly_DIF_mod2p_8x1_AVX(A0,A4, tab_w+j,tab_wp+j,P,P2);
#undef A0
#undef A4
- //std::cout<<fft<<std::endl;
- }
+ //std::cout<<fft<<std::endl;
+ }
// Last three steps
if (n >= 16) {
- _vect256_t alpha,alphap,beta,betap;
- uint32_t tmp[8];
- tmp[0]=tmp[4]=tab_w[0];
- tmp[1]=tmp[5]=tab_w[1];
- tmp[2]=tmp[6]=tab_w[2];
- tmp[3]=tmp[7]=tab_w[3];
- VEC256_LOADU(alpha,tmp);
- tmp[0]=tmp[4]=tab_wp[0];
- tmp[1]=tmp[5]=tab_wp[1];
- tmp[2]=tmp[6]=tab_wp[2];
- tmp[3]=tmp[7]=tab_wp[3];
- VEC256_LOADU(alphap,tmp);
- VEC256_SET_32(beta,tab_w [5]);
- VEC256_SET_32(betap,tab_wp [5]);
-
- for (size_t i = 0; i < f; i+=2)
+ _vect256_t alpha,alphap,beta,betap;
+ uint32_t tmp[8];
+ tmp[0]=tmp[4]=tab_w[0];
+ tmp[1]=tmp[5]=tab_w[1];
+ tmp[2]=tmp[6]=tab_w[2];
+ tmp[3]=tmp[7]=tab_w[3];
+ alpha = Simd256<uint32_t>::loadu(tmp);
+ tmp[0]=tmp[4]=tab_wp[0];
+ tmp[1]=tmp[5]=tab_wp[1];
+ tmp[2]=tmp[6]=tab_wp[2];
+ tmp[3]=tmp[7]=tab_wp[3];
+ alphap = Simd256<uint32_t>::loadu(tmp);
+ beta = Simd256<uint32_t>::set1(tab_w [5]);
+ betap = Simd256<uint32_t>::set1(tab_wp [5]);
+
+ for (size_t i = 0; i < f; i+=2)
#define A0 &fft[0] + (i << 3)
#define A4 &fft[0] + (i << 3)+8
- Butterfly_DIF_mod2p_8x3_AVX_last3step(A0,A4,alpha,alphap,beta,betap,P,P2);
+ Butterfly_DIF_mod2p_8x3_AVX_last3step(A0,A4,alpha,alphap,beta,betap,P,P2);
#undef A0
#undef A4
- //std::cout<<fft<<std::endl;
- } else {
- for (; w >= 1; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1)
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j++)
- Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
- }
+ //std::cout<<fft<<std::endl;
+ } else {
+ for (; w >= 1; tab_w+=w, tab_wp+=w, w >>= 1, f <<= 1)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
+ }
+
}
@@ -639,123 +673,123 @@ namespace LinBox {
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIT_mod4p_8x1_AVX(uint32_t* ABCDEFGH, uint32_t* IJKLMNOP,
- const uint32_t* alpha,
- const uint32_t* alphap,
- const _vect256_t& P, const _vect256_t& P2) {
- _vect256_t V1,V2,V3,V4,W,Wp,T1,T2;
+ const uint32_t* alpha,
+ const uint32_t* alphap,
+ const _vect256_t& P, const _vect256_t& P2) {
+ _vect256_t V1,V2,V3,V4,W,Wp,T1;
// V1=[A B C D E F G H], V2=[I J K L M N O P]
- VEC256_LOADU(V1,ABCDEFGH);
- VEC256_LOADU(V2,IJKLMNOP);
- VEC256_LOADU(W ,alpha);
- VEC256_LOADU(Wp,alphap);
+ V1 = Simd256<uint32_t>::loadu(ABCDEFGH);
+ V2 = Simd256<uint32_t>::loadu(IJKLMNOP);
+ W = Simd256<uint32_t>::loadu(alpha);
+ Wp = Simd256<uint32_t>::loadu(alphap);
// V3 = V1 mod 2P
- VEC256_MOD_P (V3,V1,P2,T1);
+ V3 = reduce<Simd256<uint32_t> >(V1, P2);
// V4 = V2 * W mod P
- VEC256_MUL_MOD(V4,V2,W,P,Wp,V1,T1,T2);
+ V4 = mul_mod<Simd256<uint32_t> >(V2,W,P,Wp);
// V1 = V3 + V4
- VEC256_ADD_32(V1,V3,V4);
- VEC256_STOREU(ABCDEFGH,V1);
+ V1 = Simd256<uint32_t>::add(V3,V4);
+ Simd256<uint32_t>::storeu(ABCDEFGH,V1);
// V2 = V3 - (V4 - 2P)
- VEC256_SUB_32(T1,V4,P2);
- VEC256_SUB_32(V2,V3,T1);
- VEC256_STOREU(IJKLMNOP,V2);
+ T1 = Simd256<uint32_t>::sub(V4,P2);
+ V2 = Simd256<uint32_t>::sub(V3,T1);
+ Simd256<uint32_t>::storeu(IJKLMNOP,V2);
}
template <class Field>
inline void FFT_transform<Field>::Butterfly_DIT_mod4p_8x3_AVX_first3step(uint32_t* ABCDEFGH, uint32_t* IJKLMNOP,
- const _vect256_t& alpha,const _vect256_t& alphap,
- const _vect256_t& beta ,const _vect256_t& betap,
- const _vect256_t& P, const _vect256_t& P2) {
+ const _vect256_t& alpha,const _vect256_t& alphap,
+ const _vect256_t& beta ,const _vect256_t& betap,
+ const _vect256_t& P, const _vect256_t& P2) {
_vect256_t V1,V2,V3,V4,V5,V6,V7,Q;
// V1=[A B C D E F G H], V2=[I J K L M N O P]
- VEC256_LOADU(V1,ABCDEFGH);
- VEC256_LOADU(V2,IJKLMNOP);
+ V1 = Simd256<uint32_t>::loadu(ABCDEFGH);
+ V2 = Simd256<uint32_t>::loadu(IJKLMNOP);
/*********************************************/
/* 1st STEP */
/*********************************************/
// Transform to V3=[A I C K E M G O], V4=[B J D L F N H P]
- VEC256_UNPACK_LO_32(V6,V1,V2); // V6=[A I B J E M F N]
- VEC256_UNPACK_HI_32(V7,V1,V2); // V7=[C K D L G O H P]
- VEC256_UNPACK_LO_64(V3,V6,V7); // V3=[A I C K E M G O]
- VEC256_UNPACK_HI_64(V4,V6,V7); // V4=[B J D L F N H P]
+ V6 = Simd256<uint32_t>::unpacklo_twice(V1,V2); // V6=[A I B J E M F N]
+ V7 = Simd256<uint32_t>::unpackhi_twice(V1,V2); // V7=[C K D L G O H P]
+ V3 = Simd256<uint64_t>::unpacklo_twice(V6,V7); // V3=[A I C K E M G O]
+ V4 = Simd256<uint64_t>::unpackhi_twice(V6,V7); // V4=[B J D L F N H P]
// V1 = V3 + V4; V1 = [A I C K E M G O]
// Rk: No need for (. mod 2P) since entries are <P
- VEC256_ADD_32(V1,V3,V4);
+ V1 = Simd256<uint32_t>::add(V3,V4);
// V2 = V3 + (P - V4); V2 = [B J D L F N H P]
// Rk: No need for (. mod 2P) since entries are <P
- VEC256_SUB_32(V6,V4,P);
- VEC256_SUB_32(V2,V3,V6);
+ V6 = Simd256<uint32_t>::sub(V4,P);
+ V2 = Simd256<uint32_t>::sub(V3,V6);
/*********************************************/
/* 2nd STEP */
/*********************************************/
// V5 = [D D L L H H P P]
- VEC256_UNPACK_HI_32(V5,V2,V2);
+ V5 = Simd256<uint32_t>::unpackhi_twice(V2,V2);
// Q = V5 * alpha mod 2^64 = [* Qd * Qh * Ql * Qp]
// with betap= [ alphap * alphap * alphap * alphap *]
- VEC256_MUL_32(Q,V5,alphap);
+ Q = Simd256<uint64_t>::mulx(V5,alphap);
// V6 = [* Qd.P * Qh.P * Ql.P * Qp.P]
- VEC256_MUL_LO_32(V6,Q,P);
+ V6 = Simd256<uint32_t>::mullo(Q,P);
// V7 = V5 * alpha mod 2^32
- VEC256_MUL_LO_32(V7,V5,alpha);
+ V7 = Simd256<uint32_t>::mullo(V5,alpha);
// V3 = V7 - V6 = [* (D.alpha mod p) * (L.alpha mod p) * (H.alpha mod p) * (P.alpha mod p)]
- VEC256_SUB_32(V3,V7,V6);
+ V3 = Simd256<uint32_t>::sub(V7,V6);
// V7=[D L * * H P * *]
- VEC256_SHUFFLE_32(V7,V3,0xFD);
+ V7 = Simd256<uint32_t>::shuffle_twice<0xFD>(V3);
// V6 = [B J D L F N H P]
- VEC256_UNPACK_LO_64(V6,V2,V7);
+ V6 = Simd256<uint64_t>::unpacklo_twice(V2,V7);
// V3= [A B I J E F M N], V4=[C D K L G H O P]
- VEC256_UNPACK_LO_32(V3,V1,V6);
- VEC256_UNPACK_HI_32(V4,V1,V6);
+ V3 = Simd256<uint32_t>::unpacklo_twice(V1,V6);
+ V4 = Simd256<uint32_t>::unpackhi_twice(V1,V6);
// V1 = V3+V4
- VEC256_ADD_32(V1,V3,V4);
+ V1 = Simd256<uint32_t>::add(V3,V4);
// V2 = V3 - (V4 - 2P)
- VEC256_SUB_32(V7,V4,P2);
- VEC256_SUB_32(V2,V3,V7);
+ V7 = Simd256<uint32_t>::sub(V4,P2);
+ V2 = Simd256<uint32_t>::sub(V3,V7);
/*********************************************/
/* 3nd STEP */
/*********************************************/
// V3= [A B C D I J K L] V4= [E F G H M N O P]
- VEC256_UNPACK_LO_64(V6,V1,V2);
- VEC256_UNPACK_HI_64(V7,V1,V2);
- VEC256_UNPACK_LO_128(V3,V6,V7);
- VEC256_UNPACK_HI_128(V4,V6,V7);
+ V6 = Simd256<uint64_t>::unpacklo_twice(V1,V2);
+ V7 = Simd256<uint64_t>::unpackhi_twice(V1,V2);
+ V3 = Simd256<uint64_t>::unpacklo128(V6,V7);
+ V4 = Simd256<uint64_t>::unpackhi128(V6,V7);
// V6= V3 mod 2P
- VEC256_MOD_P(V6,V3,P2,V7);
+ V6 = reduce<Simd256<uint32_t> >(V3, P2);
// V7= V4.beta mod p
- VEC256_MUL_MOD(V7,V4,beta,P,betap,V1,V2,V5);
+ V7 = mul_mod<Simd256<uint32_t> >(V4,beta,P,betap);
// V1 = V6+V7
- VEC256_ADD_32(V1,V6,V7);
+ V1 = Simd256<uint32_t>::add(V6,V7);
// V2 = V6 - (V7 - 2P)
- VEC256_SUB_32(V5,V7,P2);
- VEC256_SUB_32(V2,V6,V5);
+ V5 = Simd256<uint32_t>::sub(V7,P2);
+ V2 = Simd256<uint32_t>::sub(V6,V5);
/*********************************************/
// V3=[A B C D E F G H] V4=[I J K L M N O P]
- VEC256_UNPACK_LO_128(V3,V1,V2);
- VEC256_UNPACK_HI_128(V4,V1,V2);
+ V3 = Simd256<uint64_t>::unpacklo128(V1,V2);
+ V4 = Simd256<uint64_t>::unpackhi128(V1,V2);
// Store
- VEC256_STOREU(ABCDEFGH,V3);
- VEC256_STOREU(IJKLMNOP,V4);
+ Simd256<uint32_t>::storeu(ABCDEFGH,V3);
+ Simd256<uint32_t>::storeu(IJKLMNOP,V4);
}
@@ -763,48 +797,48 @@ namespace LinBox {
template <class Field>
void FFT_transform<Field>::FFT_DIT_Harvey_mod4p_iterative8x1_AVX (uint32_t *fft) {
_vect256_t P,P2;
- VEC256_SET_32(P,_pl);
- VEC256_SET_32(P2,_dpl);
+ P = Simd256<uint32_t>::set1(_pl);
+ P2 = Simd256<uint32_t>::set1(_dpl);
// first three steps
if (n >= 16) {
- _vect256_t alpha,alphap,beta,betap;
- VEC256_SET_32(alpha,pow_w[n-3]);
- VEC256_SET_32(alphap,pow_wp[n-3]);
- uint32_t tmp[8];
- tmp[0]=tmp[4]=pow_w[n-8];
- tmp[1]=tmp[5]=pow_w[n-7];
- tmp[2]=tmp[6]=pow_w[n-6];
- tmp[3]=tmp[7]=pow_w[n-5];
- VEC256_LOADU(beta,tmp);
- tmp[0]=tmp[4]=pow_wp[n-8];
- tmp[1]=tmp[5]=pow_wp[n-7];
- tmp[2]=tmp[6]=pow_wp[n-6];
- tmp[3]=tmp[7]=pow_wp[n-5];
- VEC256_LOADU(betap,tmp);
- for (uint64_t i = 0; i < n; i+=16)
- Butterfly_DIT_mod4p_8x3_AVX_first3step(&fft[i],&fft[i+8],alpha,alphap,beta,betap,P,P2);
- uint32_t * tab_w = &pow_w [n-16];
- uint32_t * tab_wp= &pow_wp[n-16];
- for (size_t w = 8, f = n >> 4; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w){
- // w : witdh of butterflies
- // f : # families of butterflies
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j+=8)
+ _vect256_t alpha,alphap,beta,betap;
+ alpha = Simd256<uint32_t>::set1(pow_w[n-3]);
+ alphap = Simd256<uint32_t>::set1(pow_wp[n-3]);
+ uint32_t tmp[8];
+ tmp[0]=tmp[4]=pow_w[n-8];
+ tmp[1]=tmp[5]=pow_w[n-7];
+ tmp[2]=tmp[6]=pow_w[n-6];
+ tmp[3]=tmp[7]=pow_w[n-5];
+ beta = Simd256<uint32_t>::loadu(tmp);
+ tmp[0]=tmp[4]=pow_wp[n-8];
+ tmp[1]=tmp[5]=pow_wp[n-7];
+ tmp[2]=tmp[6]=pow_wp[n-6];
+ tmp[3]=tmp[7]=pow_wp[n-5];
+ betap = Simd256<uint32_t>::loadu(tmp);
+ for (uint64_t i = 0; i < n; i+=16)
+ Butterfly_DIT_mod4p_8x3_AVX_first3step(&fft[i],&fft[i+8],alpha,alphap,beta,betap,P,P2);
+ uint32_t * tab_w = &pow_w [n-16];
+ uint32_t * tab_wp= &pow_wp[n-16];
+ for (size_t w = 8, f = n >> 4; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w){
+ // w : witdh of butterflies
+ // f : # families of butterflies
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j+=8)
#define A0 &fft[0] + (i << 1) *w+ j
#define A4 &fft[0] + ((i << 1)+1)*w+ j
- Butterfly_DIT_mod4p_8x1_AVX(A0,A4, tab_w+j,tab_wp+j,P,P2);
+ Butterfly_DIT_mod4p_8x1_AVX(A0,A4, tab_w+j,tab_wp+j,P,P2);
#undef A0
#undef A4
+ }
+ } else {
+ uint32_t * tab_w = &pow_w [n-2];
+ uint32_t * tab_wp= &pow_wp[n-2];
+ for (size_t w = 1, f = n >> 1; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ Butterfly_DIT_mod4p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
}
- } else {
- uint32_t * tab_w = &pow_w [n-2];
- uint32_t * tab_wp= &pow_wp[n-2];
- for (size_t w = 1, f = n >> 1; f >= 1; w <<= 1, f >>= 1, tab_w-=w, tab_wp-=w)
- for (size_t i = 0; i < f; i++)
- for (size_t j = 0; j < w; j++)
- Butterfly_DIT_mod4p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], tab_w[j], tab_wp[j]);
- }
}
diff --git a/linbox/algorithms/polynomial-matrix/polynomial-fft-transform.h b/linbox/algorithms/polynomial-matrix/polynomial-fft-transform.h
index bd19bff..1c98fdf 100755
--- a/linbox/algorithms/polynomial-matrix/polynomial-fft-transform.h
+++ b/linbox/algorithms/polynomial-matrix/polynomial-fft-transform.h
@@ -1,4 +1,5 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/*
* Copyright (C) 2014 Pascal Giorgi, Romain Lebreton
*
@@ -30,9 +31,13 @@
#include <iostream>
#include "linbox/linbox-config.h"
-#include "linbox/algorithms/polynomial-matrix/simd.h"
#include "linbox/util/debug.h"
#include "givaro/givinteger.h"
+#include <fflas-ffpack/fflas/fflas_simd.h>
+
+#ifndef ROUND_DOWN
+#define ROUND_DOWN(x, s) ((x) & ~((s)-1))
+#endif
// template<typename T>
// std::ostream& operator<<(std::ostream& os, const std::vector<T> &x){
@@ -43,6 +48,26 @@
#include "fflas-ffpack/utils/align-allocator.h"
+#ifdef __LINBOX_HAVE_SSE4_1_INSTRUCTIONS
+
+//#include "linbox/algorithms/polynomial-matrix/simd.h"
+
+#include "fflas-ffpack/fflas/fflas_simd.h"
+
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
+/* 256 bits CODE */
+#define __LINBOX_HAVE_AVX_INSTRUCTIONS2
+
+// define 256 bits simd vector type
+typedef __m256i _vect256_t;
+
+#endif
+
+// define 128 bits simd vector type
+typedef __m128i _vect128_t;
+
+#endif
+
namespace LinBox {
@@ -79,7 +104,7 @@ namespace LinBox {
uint64_t find_gen (uint64_t _m, uint64_t _val2p) {
// find a primitive 2^k root of unity where
- // _p - 1 = 2^k * m
+ // _p - 1 = 2^val2p * m
srand((unsigned int) time(NULL));
uint64_t y,z,j;
uint64_t _gen;
@@ -103,22 +128,21 @@ namespace LinBox {
}
FFT_transform (const Field& fld2, size_t ln2, Element w = 0)
- : fld (&fld2), n ((1U << ln2)), ln (ln2), pow_w(n - 1), pow_wp(n - 1), _data(n) {
+ : fld (&fld2), n ((1UL << ln2)), ln (ln2), pow_w(n - 1), pow_wp(n - 1), _data(n) {
_pl = fld->characteristic();
_p = fld->characteristic();
linbox_check((_pl >> 29) == 0 ); // 8*p < 2^31 for Harvey's butterflies
_dpl = (_pl << 1);
//_pinv = 1 / (double) _pl;
-
+ Givaro::Timer chrono;
+
if (w == 0){ // find a pseudo 2^lpts-th primitive root of unity
+
+ chrono.start();
+
uint64_t _val2p = 0;
uint64_t _m = _pl;
- uint64_t _logp = 0;
- while (_m) {
- _m >>= 1;
- _logp++;
- }
_m = _pl - 1;
while ((_m & 1) == 0) {
_m >>= 1;
@@ -127,32 +151,89 @@ namespace LinBox {
//_I = (1L << (_logp << 1)) / _pl;
uint64_t _gen = find_gen (_m, _val2p);
_w = Givaro::powmod(_gen, 1<<(_val2p-ln), _pl);
+
}
else {
_w = (uint32_t)w;
}
+ chrono.clear();
+ chrono.start();
// compute w^(-1) mod p = w^(2^lpts - 1)
- _invw = Givaro::powmod(_w, (1<<ln) - 1, _pl);
+ _invw = Givaro::powmod(_w, ((uint64_t)1<<ln) - 1, _pl);
size_t pos = 0;
- uint64_t wi = 1;
- uint64_t __w = _w;
+ //uint64_t wi = 1;
+ uint32_t wi = 1;
+ uint32_t __w = _w;
+ uint64_t _logp = Givaro::Integer(_pl).bitsize() -1;
+ uint32_t BAR= (Givaro::Integer(1)<<(32+_logp))/Givaro::Integer(_pl);
+ uint32_t Q;
+ //cout<<"log Bar: "<<Integer(BAR).bitsize()<<endl;
if (ln>0){
+#ifdef MYOLD_FFTINIT
size_t tpts = 1 << (ln - 1);
while (tpts > 0) {
for (size_t i = 0; i < tpts; i++, pos++) {
pow_w[pos] = wi;
pow_wp[pos] = ((uint64_t) pow_w[pos] << 32UL) / _pl;
wi= (wi*__w)%_pl;
- //field().mulin(wi, __w);
}
wi = 1;
__w = (__w * __w) % _pl;
//field().mulin(__w, __w);
tpts >>= 1;
}
- }
+#else
+// using simd=Simd<uint32_t>;
+// using vect_t =typename simd::vect_t;
+
+ size_t tpts = 1 << (ln - 1);
+ size_t i=0;
+// for( ;i<std::min(simd::vect_size+1, tpts);i++,pos++){
+ // Precompute pow_wp[1] for faster mult by pow_w[1]
+ for( ;i<std::min((size_t) 2, tpts);i++,pos++){
+ pow_w[pos] = wi;
+ pow_wp[pos] = ((uint64_t) pow_w[pos] << 32UL) / _pl;
+ wi= ((uint64_t)wi*__w)%_pl;
+ }
+ /*
+ vect_t wp_vect, Q_vect,BAR_vect,w_vect,pow_w_vect,pow_wp_vect, pl_vect;
+ BAR_vect= simd::set1(BAR);
+ wp_vect = simd::set1(pow_wp[simd::vect_size]);
+ w_vect = simd::set1(pow_w[simd::vect_size]);
+ pl_vect = simd::set1(_pl);
+ for (; i < ROUND_DOWN(tpts,simd::vect_size);
+ i+=simd::vect_size,pos+=simd::vect_size) {
+ pow_w_vect = simd::loadu((int32_t*)pow_w.data()+pos-simd::vect_size);
+ Q_vect=simd::mulhi(pow_w_vect,wp_vect);
+ pow_w_vect = simd::sub(simd::mullo(pow_w_vect,w_vect),simd::mullo(Q_vect,pl_vect));
+ pow_w_vect=simd::sub(pow_w_vect, simd::vandnot(simd::greater(pow_w_vect,pl_vect),pl_vect));
+ simd::storeu((int32_t*)pow_w.data()+pos,pow_w_vect);
+ pow_wp_vect= simd::mulhi(simd::sll(pow_w_vect,32-_logp),BAR_vect);
+ simd::storeu((int32_t*)pow_wp.data()+pos,pow_wp_vect);
+ }
+ */
+ // Use pow_wp[1] for speed-up mult by pow_w[1]
+ for( ;i<tpts;i++,pos++){
+ pow_w[pos] = wi;
+ pow_wp[pos]= (((uint64_t)wi*BAR)>>_logp);
+ Q= ((uint64_t)wi*pow_wp[1])>>32;
+ wi= (uint32_t)(wi*__w - Q*_pl);
+ wi-=(wi>=_pl?_pl:0);
+ }
+
+ // Other pow_w elements can be read from previously computed pow_w
+ for(size_t k=2;k<=tpts;k<<=1)
+ for(size_t i=0;i<tpts;i+=k,pos++){
+ pow_w[pos] = pow_w[i];
+ pow_wp[pos] = pow_wp[i];
+ }
+#endif
+
+ }
+ chrono.stop();
+ //cout<<"FFT: table="<<chrono<<endl;
}
@@ -160,13 +241,13 @@ namespace LinBox {
Element getInvRoot() const {return _invw;}
- void FFT_DIF_Harvey (uint32_t *fft) {
-#ifdef __LINBOX_USE_SIMD
-#ifdef __AVX2__
+ void FFT_DIF_Harvey (uint32_t *fft) {
+#ifdef __LINBOX_HAVE_SSE4_1_INSTRUCTIONS
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
FFT_DIF_Harvey_mod2p_iterative8x1_AVX(fft);
if (n>=8){
_vect256_t P;
- VEC256_SET_32(P,_pl);
+ P = Simd256<uint32_t>::set1(_pl);
for (uint64_t i = 0; i < n; i += 8)
reduce256_modp(fft+i,P);
return;
@@ -176,7 +257,7 @@ namespace LinBox {
#endif
if (n >=4) {
_vect128_t P;
- VEC128_SET_32(P,_pl);
+ P = Simd128<uint32_t>::set1(_pl);
for (uint64_t i = 0; i < n; i += 4)
reduce128_modp(fft+i,P);
} else {
@@ -187,20 +268,20 @@ namespace LinBox {
// FALLBACK WHEN NO SIMD VERSION
FFT_DIF_Harvey_mod2p_iterative2x2(fft);
for (uint64_t i = 0; i < n; i++) {
- if (fft[i] >= (_pl << 1)) fft[i] -= (_pl << 1);
+// if (fft[i] >= (_pl << 1)) fft[i] -= (_pl << 1);
if (fft[i] >= _pl) fft[i] -= _pl;
- }
+ }
#endif
}
void FFT_DIT_Harvey (uint32_t *fft) {
-#ifdef __LINBOX_USE_SIMD
-#ifdef __AVX2__
+#ifdef __LINBOX_HAVE_SSE4_1_INSTRUCTIONS
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
FFT_DIT_Harvey_mod4p_iterative8x1_AVX(fft);
if (n>=8){
_vect256_t P,P2;
- VEC256_SET_32(P, _pl);
- VEC256_SET_32(P2,_dpl);
+ P = Simd256<uint32_t>::set1( _pl);
+ P2 = Simd256<uint32_t>::set1(_dpl);
for (uint64_t i = 0; i < n; i += 8){
reduce256_modp(&fft[i],P2);
reduce256_modp(&fft[i],P);
@@ -212,8 +293,8 @@ namespace LinBox {
#endif
if (n >=4) {
_vect128_t P,P2;
- VEC128_SET_32(P,_pl);
- VEC128_SET_32(P2,_dpl);
+ P = Simd128<uint32_t>::set1(_pl);
+ P2 = Simd128<uint32_t>::set1(_dpl);
for (uint64_t i = 0; i < n; i += 4){
reduce128_modp(&fft[i],P2);
reduce128_modp(&fft[i],P);
@@ -230,7 +311,7 @@ namespace LinBox {
for (uint64_t i = 0; i < n; i++) {
if (fft[i] >= (_pl << 1)) fft[i] -= (_pl << 1);
if (fft[i] >= _pl) fft[i] -= _pl;
- }
+ }
#endif
}
@@ -246,27 +327,35 @@ namespace LinBox {
FFT_DIT_Harvey(fft);
}
- // FFT with conversion from Element to uint32_t
+ // FFT with conversion from Element to uint32_t
template <typename T=Element>
typename std::enable_if<!std::is_same<T,uint32_t>::value>::type
FFT_DIF (T *fft) {
- for(uint64_t i=0;i<n;i++)
- _data[i]=fft[i];
- FFT_DIF_Harvey(&_data[0]);
- for(uint64_t i=0;i<n;i++)
- fft[i]=_data[i];
-
+ // for(uint64_t i=0;i<n;i++)
+ // _data[i]=fft[i];
+ // FFT_DIF_Harvey(&_data[0]);
+ // for(uint64_t i=0;i<n;i++)
+ // fft[i]=_data[i];
+ std::copy(fft,fft+n,_data.data());
+ FFT_DIF_Harvey(_data.data());
+ std::copy(_data.begin(),_data.end(),fft);
+
}
template <typename T=Element>
typename std::enable_if<!std::is_same<T,uint32_t>::value>::type
FFT_DIT (T *fft) {
- for(uint64_t i=0;i<n;i++)
- _data[i]=fft[i];
- FFT_DIT_Harvey(&_data[0]);
- for(uint64_t i=0;i<n;i++)
- fft[i]=_data[i];
+ // for(uint64_t i=0;i<n;i++)
+ // _data[i]=fft[i];
+ // FFT_DIT_Harvey(&_data[0]);
+ // for(uint64_t i=0;i<n;i++)
+ // fft[i]=_data[i];
+ std::copy(fft,fft+n,_data.data());
+ FFT_DIT_Harvey(_data.data());
+ std::copy(_data.begin(),_data.end(),fft);
+
+
}
-
+
/*
* Different implementations for the butterfly operations
*/
@@ -279,24 +368,24 @@ namespace LinBox {
inline void Butterfly_DIF_mod2p_4x1_SSE(uint32_t* ABCD, uint32_t* EFGH,const uint32_t* alpha, const uint32_t* alphap, const __m128i& P, const __m128i& P2);
inline void Butterfly_DIF_mod2p_4x1_SSE_laststep(uint32_t* ABCD, uint32_t* EFGH, const __m128i& P2);
inline void Butterfly_DIF_mod2p_4x2_SSE(uint32_t* , uint32_t* ,uint32_t* , uint32_t* ,
- const uint32_t* ,const uint32_t* ,const uint32_t* ,
- const uint32_t* ,const uint32_t* ,const uint32_t* ,
- const __m128i& P, const __m128i& P2);
+ const uint32_t* ,const uint32_t* ,const uint32_t* ,
+ const uint32_t* ,const uint32_t* ,const uint32_t* ,
+ const __m128i& P, const __m128i& P2);
inline void Butterfly_DIF_mod2p_4x2_SSE_last2step(uint32_t* ABCD, uint32_t* EFGH, const __m128i& W,
- const __m128i& Wp, const __m128i& P, const __m128i& P2);
+ const __m128i& Wp, const __m128i& P, const __m128i& P2);
inline void Butterfly_DIT_mod4p_4x1_SSE(uint32_t* ABCD, uint32_t* EFGH, const uint32_t* alpha,
- const uint32_t* alphap,const __m128i& P, const __m128i& P2);
+ const uint32_t* alphap,const __m128i& P, const __m128i& P2);
inline void Butterfly_DIT_mod4p_4x2_SSE_first2step(uint32_t* ABCD, uint32_t* EFGH, const __m128i& W,
- const __m128i& Wp, const __m128i& P, const __m128i& P2);
-#ifdef __AVX2__
+ const __m128i& Wp, const __m128i& P, const __m128i& P2);
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
inline void reduce256_modp(uint32_t*, const __m256i&);
inline void Butterfly_DIF_mod2p_8x1_AVX(uint32_t* ABCD, uint32_t* EFGH, const uint32_t* alpha,const uint32_t* alphap,const __m256i& P, const __m256i& P2);
inline void Butterfly_DIF_mod2p_8x3_AVX_last3step(uint32_t* ABCDEFGH, uint32_t* IJKLMNOP, const __m256i& alpha,const __m256i& alphap,
- const __m256i& beta ,const __m256i& betap, const __m256i& P ,const __m256i& P2);
+ const __m256i& beta ,const __m256i& betap, const __m256i& P ,const __m256i& P2);
inline void Butterfly_DIT_mod4p_8x1_AVX(uint32_t* ABCD, uint32_t* EFGH, const uint32_t* alpha,const uint32_t* alphap,
- const __m256i& P, const __m256i& P2);
+ const __m256i& P, const __m256i& P2);
inline void Butterfly_DIT_mod4p_8x3_AVX_first3step(uint32_t* ABCDEFGH, uint32_t* IJKLMNOP, const __m256i& alpha,const __m256i& alphap,
- const __m256i& beta ,const __m256i& betap, const __m256i& P ,const __m256i& P2);
+ const __m256i& beta ,const __m256i& betap, const __m256i& P ,const __m256i& P2);
#endif
@@ -305,25 +394,27 @@ namespace LinBox {
* Different implementation of DIF/DIT with Harvey's trick
*/
- void FFT_DIF_Harvey_mod2p_iterative (Element *fft);
- void FFT_DIF_Harvey_mod2p_iterative2x2 (Element *fft);
- void FFT_DIF_Harvey_mod2p_iterative3x3 (Element *fft);
- void FFT_DIT_Harvey_mod4p_iterative2x2 (Element *fft);
- void FFT_DIT_Harvey_mod4p_iterative3x3 (Element *fft);
+ void FFT_DIF_Harvey_mod2p_iterative (uint32_t *fft);
+ void FFT_DIF_Harvey_mod2p_iterative2x2 (uint32_t *fft);
+ void FFT_DIF_Harvey_mod2p_iterative3x3 (uint32_t *fft);
+ void FFT_DIT_Harvey_mod4p_iterative (uint32_t *fft);
+ void FFT_DIT_Harvey_mod4p_iterative2x2 (uint32_t *fft);
+ void FFT_DIT_Harvey_mod4p_iterative3x3 (uint32_t *fft);
// SIMD implementations follow
void FFT_DIF_Harvey_mod2p_iterative4x1_SSE (uint32_t *fft);
void FFT_DIF_Harvey_mod2p_iterative4x2_SSE (uint32_t *fft);
void FFT_DIT_Harvey_mod4p_iterative4x1_SSE (uint32_t *fft);
-#ifdef __AVX2__
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
void FFT_DIF_Harvey_mod2p_iterative8x1_AVX (uint32_t *fft);
void FFT_DIT_Harvey_mod4p_iterative8x1_AVX (uint32_t *fft);
#endif
- };
+ }; // class FFT_transform
+
} // end of namespace LinBox
#include "linbox/algorithms/polynomial-matrix/polynomial-fft-transform.inl"
-#ifdef __LINBOX_USE_SIMD
+#ifdef __LINBOX_HAVE_SSE4_1_INSTRUCTIONS
#include "linbox/algorithms/polynomial-matrix/polynomial-fft-transform-simd.inl"
#endif
#endif // __LINBOX_FFT_H
diff --git a/linbox/algorithms/polynomial-matrix/polynomial-fft-transform.inl b/linbox/algorithms/polynomial-matrix/polynomial-fft-transform.inl
index 84cc795..1ef4584 100644
--- a/linbox/algorithms/polynomial-matrix/polynomial-fft-transform.inl
+++ b/linbox/algorithms/polynomial-matrix/polynomial-fft-transform.inl
@@ -1,4 +1,5 @@
-/* -*- mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/*
* Copyright (C) 2014 Pascal Giorgi, Romain Lebreton
*
@@ -59,18 +60,18 @@ namespace LinBox {
template <class Field>
- void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative (Element *fft) {
- for (size_t w = n >> 1, f = 1, pos_w = 0; w != 0; f <<= 1, pos_w += w, w >>= 1)
+ void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative (uint32_t *fft) {
+ for (size_t w = n >> 1, f = 1, pos_w = 0; w != 0; f <<= 1, pos_w += w, w >>= 1){
// w : witdh of butterflies
// f : # families of butterflies
- for (size_t i = 0; i < f; i++){
+ for (size_t i = 0; i < f; i++)
for (size_t j = 0; j < w; j++)
- Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], pow_w[j*f], pow_wp[j*f]);
- }
+ Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], pow_w[j*f], pow_wp[j*f]);
+ }
}
template <class Field>
- void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative2x2 (Element *fft) {
+ void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative2x2 (uint32_t *fft) {
size_t w, f;
for (w = n >> 1, f = 1; w >= 2; w >>= 2, f <<= 2)
// w : witdh of butterflies
@@ -104,7 +105,7 @@ namespace LinBox {
}
template <class Field>
- void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative3x3 (Element *fft) {
+ void FFT_transform<Field>::FFT_DIF_Harvey_mod2p_iterative3x3 (uint32_t *fft) {
size_t w, f;
for (w = n >> 1, f = 1; w >= 4; w >>= 3, f <<= 3)
// w : witdh of butterflies
@@ -154,9 +155,16 @@ namespace LinBox {
Butterfly_DIF_mod2p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], pow_w[j*f], pow_wp[j*f]);
}
+ template <class Field>
+ void FFT_transform<Field>::FFT_DIT_Harvey_mod4p_iterative (uint32_t *fft) {
+ for (size_t w = 1, f = n >> 1; f >= 1; w <<= 1, f >>= 1)
+ for (size_t i = 0; i < f; i++)
+ for (size_t j = 0; j < w; j++)
+ Butterfly_DIT_mod4p(fft[(i << 1)*w+j], fft[((i << 1)+1)*w+j], pow_w[j*f], pow_wp[j*f]);
+ }
template <class Field>
- void FFT_transform<Field>::FFT_DIT_Harvey_mod4p_iterative2x2 (Element *fft) {
+ void FFT_transform<Field>::FFT_DIT_Harvey_mod4p_iterative2x2 (uint32_t *fft) {
size_t w, f;
for (w = 1, f = n >> 1; f >= 2; w <<= 2, f >>= 2)
// w : witdh of butterflies
@@ -187,7 +195,7 @@ namespace LinBox {
}
template <class Field>
- void FFT_transform<Field>::FFT_DIT_Harvey_mod4p_iterative3x3 (Element *fft) {
+ void FFT_transform<Field>::FFT_DIT_Harvey_mod4p_iterative3x3 (uint32_t *fft) {
size_t w, f;
for (w = 1, f = n >> 1; f >= 4; w <<= 3, f >>= 3)
// w : witdh of butterflies
diff --git a/linbox/algorithms/polynomial-matrix/polynomial-matrix-domain.h b/linbox/algorithms/polynomial-matrix/polynomial-matrix-domain.h
index 7274dd3..830a808 100755
--- a/linbox/algorithms/polynomial-matrix/polynomial-matrix-domain.h
+++ b/linbox/algorithms/polynomial-matrix/polynomial-matrix-domain.h
@@ -34,6 +34,9 @@
#include "linbox/algorithms/polynomial-matrix/matpoly-mult-kara.h"
#include "linbox/algorithms/polynomial-matrix/matpoly-mult-fft.h"
#include <algorithm>
+
+
+
namespace LinBox
{
@@ -51,12 +54,12 @@ namespace LinBox
inline const Field& field() const {return *_field;}
template< class PMatrix1,class PMatrix2,class PMatrix3>
- void mul(PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b)
+ void mul(PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b, size_t max_rowdeg=0)
{
size_t d = a.size()+b.size();
if (d > FFT_DEG_THRESHOLD){
//std::cout<<"PolMul FFT"<<std::endl;
- _fft.mul(c,a,b);
+ _fft.mul(c,a,b,max_rowdeg);
}
else
if ( d > KARA_DEG_THRESHOLD){
@@ -68,7 +71,8 @@ namespace LinBox
_naive.mul(c,a,b);
}
}
-
+
+
template< class PMatrix1,class PMatrix2,class PMatrix3>
void midproduct (PMatrix1 &c, const PMatrix2 &a, const PMatrix3 &b)
{
diff --git a/linbox/algorithms/polynomial-matrix/simd-additional-functions.h b/linbox/algorithms/polynomial-matrix/simd-additional-functions.h
new file mode 100644
index 0000000..949240a
--- /dev/null
+++ b/linbox/algorithms/polynomial-matrix/simd-additional-functions.h
@@ -0,0 +1,474 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+/*
+ * Copyright (C) 2016 Romain Lebreton
+ *
+ * Written by Romain Lebreton <romain.lebreton at lirmm.fr>
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+#ifndef __LINBOX_simd_additional_functions_H
+#define __LINBOX_simd_additional_functions_H
+
+#include <iostream>
+#include "linbox/util/debug.h"
+#include "linbox/linbox-config.h"
+#include "fflas-ffpack/fflas/fflas_simd.h"
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define INLINE __attribute__((always_inline)) inline
+#else
+#define INLINE inline
+#endif
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define CONST __attribute__((const))
+#else
+#define CONST
+#endif
+
+
+namespace LinBox {
+
+
+ template <typename simd, typename Field>
+ struct SimdCompute_t {};
+
+#if defined(__FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS)
+ template <typename Field>
+ struct SimdCompute_t<Simd128<typename Field::Element>, Field> {
+ using Compute_t = Simd128<typename Field::Compute_t>;
+ };
+#endif
+
+#if defined(__FFLASFFPACK_HAVE_AVX_INSTRUCTIONS)
+ template <typename Field>
+ struct SimdCompute_t<Simd256<typename Field::Element>, Field> {
+ using Compute_t = Simd256<typename Field::Compute_t>;
+ };
+#endif
+
+
+#define Simd_vect typename Simd::vect_t
+
+ /*
+ * Generic memory operations
+ */
+ template<class T, class Simd = Simd<T>>
+ struct MemoryOp {
+
+ // Call load /store (16 bits alignement) if Simd128
+ static INLINE Simd_vect load (const T* const p);
+
+ // Call loadu/storeu (no alignement requirement) if Simd256
+ static INLINE void store(T *p, Simd_vect v);
+
+ static INLINE Simd_vect shuffletwice8_DD (Simd_vect& s1);
+
+ static INLINE Simd_vect unpacklo2 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklo4 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklo8 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklo16 (const Simd_vect& a, const Simd_vect& b);
+
+ static INLINE Simd_vect unpackhi2 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpackhi4 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpackhi8 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpackhi16 (const Simd_vect& a, const Simd_vect& b);
+
+ static INLINE Simd_vect unpacklo_twice2 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklo_twice4 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklo_twice8 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklo_twice16 (const Simd_vect& a, const Simd_vect& b);
+
+ static INLINE Simd_vect unpackhi_twice2 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpackhi_twice4 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpackhi_twice8 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpackhi_twice16 (const Simd_vect& a, const Simd_vect& b);
+
+ static INLINE Simd_vect unpacklohi_twice2 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklohi_twice4 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklohi_twice8 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklohi_twice16 (const Simd_vect& a, const Simd_vect& b);
+
+ static INLINE Simd_vect unpacklohi2 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklohi4 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklohi8 (const Simd_vect& a, const Simd_vect& b);
+ static INLINE Simd_vect unpacklohi16 (const Simd_vect& a, const Simd_vect& b);
+
+ }; // MemoryOp
+
+#undef Simd_vect
+
+#if defined(__FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS)
+ template<class T>
+ struct MemoryOp<T, Simd128<T>> {
+ using simd = Simd128<T>;
+ using simd_vect = typename simd::vect_t;
+
+ /**************/
+ /* load/store */
+ /**************/
+ static INLINE simd_vect load (const T* const p) {return simd::load(p);}
+ static INLINE void store(T *p, simd_vect v) {return simd::store(p, v);}
+
+ /*********************/
+ /* Specific shuffles */
+ /*********************/
+ static INLINE simd_vect shuffletwice8_DD (simd_vect& s1) {
+ using simd128_16 = Simd128<uint16_t>;
+ using simd128_64 = Simd128<uint64_t>;
+ // std::cout << "Test shuffletwice8_DD :\n"; FFLAS::print<simd128_16>(std::cout,s1);
+ simd_vect s2 = simd128_64::sll(s1,16);
+ // std::cout << "\n"; FFLAS::print<simd128_16>(std::cout,s2);
+ // std::cout << "\n"; FFLAS::print<simd128_16>(std::cout,simd128_16::template blend<0x44>(s1,s2)); std::cout << "\n\n";
+ return simd128_16::template blend<0x44>(s1,s2); // 0x44 = [0 1 0 0 0 1 0 0]_base2
+ }
+
+ /********************/
+ /* unpacklo */
+ /********************/
+ static INLINE simd_vect unpacklo2 (const simd_vect& a, const simd_vect& b) {return Simd128<uint64_t>::unpacklo(a,b); }
+ static INLINE simd_vect unpacklo4 (const simd_vect& a, const simd_vect& b) {return Simd128<uint32_t>::unpacklo(a,b); }
+ static INLINE simd_vect unpacklo8 (const simd_vect& a, const simd_vect& b) {return Simd128<uint16_t>::unpacklo(a,b); }
+
+ /********************/
+ /* unpackhi */
+ /********************/
+ static INLINE simd_vect unpackhi2 (const simd_vect& a, const simd_vect& b) {return Simd128<uint64_t>::unpackhi(a,b); }
+ static INLINE simd_vect unpackhi4 (const simd_vect& a, const simd_vect& b) {return Simd128<uint32_t>::unpackhi(a,b); }
+ static INLINE simd_vect unpackhi8 (const simd_vect& a, const simd_vect& b) {return Simd128<uint16_t>::unpackhi(a,b); }
+
+ /**************/
+ /* unpacklohi */
+ /**************/
+ static INLINE void unpacklohi2 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd128_64 = Simd128<uint64_t>;
+ s1 = simd128_64::unpacklo(a, b);
+ s2 = simd128_64::unpackhi(a, b);
+ }
+
+ static INLINE void unpacklohi4 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd128_32 = Simd128<uint32_t>;
+ s1 = simd128_32::unpacklo(a, b);
+ s2 = simd128_32::unpackhi(a, b);
+ }
+
+ static INLINE void unpacklohi8 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd128_16 = Simd128<uint16_t>;
+ s1 = simd128_16::unpacklo(a, b);
+ s2 = simd128_16::unpackhi(a, b);
+ }
+
+ /********************/
+ /* unpacklo_twice */
+ /********************/
+ static INLINE simd_vect unpacklo_twice2 (const simd_vect& a, const simd_vect& b) { return unpacklo2(a,b); }
+
+ static INLINE simd_vect unpacklo_twice4 (const simd_vect& a, const simd_vect& b) {
+ using simd128_32 = Simd128<uint32_t>;
+ simd_vect a1 = simd128_32::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd128_32::template shuffle<0xD8>(b);
+ return simd128_32::unpacklo(a1,b1);
+ }
+
+ static INLINE simd_vect unpacklo_twice8 (const simd_vect& a, const simd_vect& b) {
+ using simd128_16 = Simd128<uint16_t>;
+ using simd128_32 = Simd128<uint32_t>;
+ simd_vect a1 = simd128_32::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd128_32::template shuffle<0xD8>(b);
+ return simd128_16::unpacklo(a1,b1);
+ }
+
+ /********************/
+ /* unpackhi_twice */
+ /********************/
+ static INLINE simd_vect unpackhi_twice2 (const simd_vect& a, const simd_vect& b) { return unpackhi2(a,b); }
+
+ static INLINE simd_vect unpackhi_twice4 (const simd_vect& a, const simd_vect& b) {
+ using simd128_32 = Simd128<uint32_t>;
+ simd_vect a1 = simd128_32::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd128_32::template shuffle<0xD8>(b);
+ return simd128_32::unpackhi(a1,b1);
+ }
+
+ static INLINE simd_vect unpackhi_twice8 (const simd_vect& a, const simd_vect& b) {
+ using simd128_16 = Simd128<uint16_t>;
+ using simd128_32 = Simd128<uint32_t>;
+ simd_vect a1 = simd128_32::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd128_32::template shuffle<0xD8>(b);
+ return simd128_16::unpackhi(a1,b1);
+ }
+
+ /********************/
+ /* unpacklohi_twice */
+ /********************/
+ static INLINE void unpacklohi_twice2 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ unpacklohi2(s1, s2, a, b);
+ }
+
+ static INLINE void unpacklohi_twice4 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd128_32 = Simd128<uint32_t>;
+ simd_vect a1 = simd128_32::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd128_32::template shuffle<0xD8>(b);
+ s1 = simd128_32::unpacklo(a1,b1);
+ s2 = simd128_32::unpackhi(a1,b1);
+ }
+
+ static INLINE void unpacklohi_twice8 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd128_16 = Simd128<uint16_t>;
+ using simd128_32 = Simd128<uint32_t>;
+ simd_vect a1 = simd128_32::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd128_32::template shuffle<0xD8>(b);
+ s1 = simd128_16::unpacklo(a1,b1);
+ s2 = simd128_16::unpackhi(a1,b1);
+ }
+ }; // MemoryOp<T, Simd128<T>>
+#endif
+
+#if defined(__FFLASFFPACK_HAVE_AVX2_INSTRUCTIONS)
+ template<class T>
+ struct MemoryOp<T, Simd256<T>> {
+ using simd = Simd256<T>;
+ using simd_vect = typename simd::vect_t;
+
+ /**************/
+ /* load/store */
+ /**************/
+ static INLINE simd_vect load (const T* const p) {return simd::loadu(p);}
+ static INLINE void store(T *p, simd_vect v) {return simd::storeu(p, v);}
+
+ /*********************/
+ /* Specific shuffles */
+ /*********************/
+ static INLINE simd_vect shuffletwice8_DD (simd_vect& s1) {
+ using simd256_32 = Simd256<uint32_t>;
+ return simd256_32::template shuffle_twice<0xDD>(s1);
+ }
+
+ /********************/
+ /* unpacklo */
+ /********************/
+ static INLINE simd_vect unpacklo2 (const simd_vect& a, const simd_vect& b) {return simd::unpacklo128(a, b); }
+
+ static INLINE simd_vect unpacklo4 (const simd_vect& a, const simd_vect& b) {
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ return simd256_64::unpacklo_twice(a1,b1);
+ }
+
+ static INLINE simd_vect unpacklo8 (const simd_vect& a, const simd_vect& b) {
+ using simd256_32 = Simd256<uint32_t>;
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ return simd256_32::unpacklo_twice(a1, b1);
+ }
+
+ static INLINE simd_vect unpacklo16 (const simd_vect& a, const simd_vect& b) {
+ using simd256_16 = Simd256<uint16_t>;
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ return simd256_16::unpacklo_twice(a1, b1);
+ }
+
+ /********************/
+ /* unpackhi */
+ /********************/
+ static INLINE simd_vect unpackhi2 (const simd_vect& a, const simd_vect& b) {return simd::unpackhi128(a, b); }
+
+ static INLINE simd_vect unpackhi4 (const simd_vect& a, const simd_vect& b) {
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ return simd256_64::unpackhi_twice(a1,b1);
+ }
+
+ static INLINE simd_vect unpackhi8 (const simd_vect& a, const simd_vect& b) {
+ using simd256_32 = Simd256<uint32_t>;
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ return simd256_32::unpackhi_twice(a1, b1);
+ }
+
+ static INLINE simd_vect unpackhi16 (const simd_vect& a, const simd_vect& b) {
+ using simd256_16 = Simd256<uint16_t>;
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ return simd256_16::unpackhi_twice(a1, b1);
+ }
+
+ /**************/
+ /* unpacklohi */
+ /**************/
+ static INLINE void unpacklohi2 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ s1 = simd::unpacklo128(a, b);
+ s2 = simd::unpackhi128(a, b);
+ }
+
+ static INLINE void unpacklohi4 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ s1 = simd256_64::unpacklo_twice(a1, b1);
+ s2 = simd256_64::unpackhi_twice(a1, b1);
+ }
+
+ static INLINE void unpacklohi8 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd256_32 = Simd256<uint32_t>;
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ s1 = simd256_32::unpacklo_twice(a1, b1);
+ s2 = simd256_32::unpackhi_twice(a1, b1);
+ }
+
+ static INLINE void unpacklohi16 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd256_16 = Simd256<uint16_t>;
+ using simd256_64 = Simd256<uint64_t>;
+ simd_vect a1 = simd256_64::template shuffle<0xD8>(a); // 0xD8 = 3120 base_4
+ simd_vect b1 = simd256_64::template shuffle<0xD8>(b);
+ s1 = simd256_16::unpacklo_twice(a1, b1);
+ s2 = simd256_16::unpackhi_twice(a1, b1);
+ }
+
+ /********************/
+ /* unpacklo_twice */
+ /********************/
+ static INLINE simd_vect unpacklo_twice2 (const simd_vect& a, const simd_vect& b) { return unpacklo2(a,b); }
+
+ static INLINE simd_vect unpacklo_twice4 (const simd_vect& a, const simd_vect& b) { return Simd256<uint64_t>::unpacklo_twice(a, b); }
+
+ static INLINE simd_vect unpacklo_twice8 (const simd_vect& a, const simd_vect& b) { return Simd256<uint32_t>::unpacklo_twice(a, b); }
+
+ static INLINE simd_vect unpacklo_twice16 (const simd_vect& a, const simd_vect& b) { return Simd256<uint16_t>::unpacklo_twice(a, b); }
+
+ /********************/
+ /* unpackhi_twice */
+ /********************/
+ static INLINE simd_vect unpackhi_twice2 (const simd_vect& a, const simd_vect& b) { return unpackhi2(a,b); }
+
+ static INLINE simd_vect unpackhi_twice4 (const simd_vect& a, const simd_vect& b) { return Simd256<uint64_t>::unpackhi_twice(a, b); }
+
+ static INLINE simd_vect unpackhi_twice8 (const simd_vect& a, const simd_vect& b) { return Simd256<uint32_t>::unpackhi_twice(a, b); }
+
+ static INLINE simd_vect unpackhi_twice16 (const simd_vect& a, const simd_vect& b) { return Simd256<uint16_t>::unpackhi_twice(a, b); }
+
+ /********************/
+ /* unpacklohi_twice */
+ /********************/
+ static INLINE void unpacklohi_twice2 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ unpacklohi2(s1, s2, a, b);
+ }
+
+ static INLINE void unpacklohi_twice4 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd256_64 = Simd256<uint64_t>;
+ s1 = simd256_64::unpacklo_twice(a, b);
+ s2 = simd256_64::unpackhi_twice(a, b);
+ }
+
+ static INLINE void unpacklohi_twice8 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd256_32 = Simd256<uint32_t>;
+ s1 = simd256_32::unpacklo_twice(a, b);
+ s2 = simd256_32::unpackhi_twice(a, b);
+ }
+
+ static INLINE void unpacklohi_twice16 (simd_vect& s1, simd_vect& s2, const simd_vect& a, const simd_vect& b) {
+ using simd256_16 = Simd256<uint16_t>;
+ s1 = simd256_16::unpacklo_twice(a, b);
+ s2 = simd256_16::unpackhi_twice(a, b);
+ }
+
+ };// MemoryOp<T, Simd256<T>>
+#endif
+
+#define Simd_vect typename Simd::vect_t
+
+ /*
+ * Generic arithmetic operation
+ */
+ template <class Simd>
+ INLINE Simd_vect reduce (const Simd_vect& a, const Simd_vect& p) {
+ Simd_vect t = Simd::greater(p,a);
+ return Simd::sub(a, Simd::vandnot(p,t));
+ }
+
+ template <class Element, class Simd>
+ INLINE void reduce (Element* a, const Simd_vect& p) {
+ Simd_vect V1;
+ V1 = MemoryOp<Element, Simd>::load(a);
+ V1 = reduce<Simd>(V1, p);
+ MemoryOp<Element, Simd>::store(a,V1);
+ }
+
+ template <class Simd>
+ INLINE Simd_vect add_mod (const Simd_vect& a, const Simd_vect& b, const Simd_vect& p) {
+ Simd_vect c = Simd::add(a,b);
+ return reduce<Simd>(c, p);
+ }
+
+ template <class Simd>
+ INLINE Simd_vect mul_mod (const Simd_vect& a, const Simd_vect& b, const Simd_vect& p, const Simd_vect& bp) {
+ // std::cout << "Inputs of mul_mod : a, b, p, bp, q, c, t, c - t\n";
+ Simd_vect q = Simd::mulhi(a,bp);
+ Simd_vect c = Simd::mullo(a,b);
+ Simd_vect t = Simd::mullo(q,p);
+ // FFLAS::print<Simd>(std::cout, a); std::cout << "\n";
+ // FFLAS::print<Simd>(std::cout, b); std::cout << "\n";
+ // FFLAS::print<Simd>(std::cout, p); std::cout << "\n";
+ // FFLAS::print<Simd>(std::cout, bp); std::cout << "\n";
+ // FFLAS::print<Simd>(std::cout, q); std::cout << "\n";
+ // FFLAS::print<Simd>(std::cout, c); std::cout << "\n";
+ // FFLAS::print<Simd>(std::cout, t); std::cout << "\n";
+ // FFLAS::print<Simd>(std::cout, Simd::sub(c,t)); std::cout << "\n\n";
+ return Simd::sub(c,t);
+ }
+
+ /*
+ * a = [a0, a0, a2, a2, ...]
+ * b = [?, b0, ?, b2, ...] with bp its shoup mul_mod precomputation [b0p ? b2p ?, ... ]
+ * Return [?, (a0*b0) mod p, ?, (a2*b2) mod p, ... ]
+ */
+ template <class Simd, class SimdCompute_t>
+ INLINE Simd_vect mul_mod_half (const Simd_vect& a, const Simd_vect& b, const Simd_vect& p, const Simd_vect& bp) {
+#if 1
+ return mul_mod<Simd>(a, b , p, bp);
+#else
+ // TODO : DO SOMETHING IF Modular<uint64, uint128> and no mulx exits
+
+ // T2 = a * bp mod 2^64 (for Modular<Element = uint32, Compute_t = uint64>)
+ // bp = [b0p ? b2p ?, ... ] is enough
+ Simd_vect T2 = SimdCompute_t::mulx(a,bp);
+ Simd_vect T3 = Simd::mullo(T2,p);
+ // At this point T3= [? quo(D)*p ? quo(H)*p] mod 2^32
+ // T4 = [D D H H] * [?, b0, ?, b2] mod 2^32
+ T2 = Simd::mullo(a,b);
+ return Simd::sub(T2,T3);
+#endif
+ }
+
+#undef Simd_vect
+
+}
+
+#endif // __LINBOX_simd_additional_functions_H
diff --git a/linbox/algorithms/polynomial-matrix/simd.h b/linbox/algorithms/polynomial-matrix/simd.h
index badcc2d..6bd7987 100644
--- a/linbox/algorithms/polynomial-matrix/simd.h
+++ b/linbox/algorithms/polynomial-matrix/simd.h
@@ -32,9 +32,8 @@
#include <iostream>
-#ifdef __AVX2__
+#ifdef __LINBOX_HAVE_AVX_INSTRUCTIONS2
/* 256 bits CODE HERE */
-#define __LINBOX_HAVE_AVX2
// define 256 bits simd vector type
typedef __m256i _vect256_t;
@@ -197,11 +196,11 @@ typedef __m128i _vect128_t;
#define VEC128_UNPACK_HI_32(C,A,B) \
C = _mm_unpackhi_epi32(A,B);
-// C = unpack_lo32(A,B)
+// C = unpack_lo64(A,B)
#define VEC128_UNPACK_LO_64(C,A,B) \
C = _mm_unpacklo_epi64(A,B);
-// C = unpack_hi32(A,B)
+// C = unpack_hi64(A,B)
#define VEC128_UNPACK_HI_64(C,A,B) \
C = _mm_unpackhi_epi64(A,B);
diff --git a/linbox/algorithms/rational-reconstruction.h b/linbox/algorithms/rational-reconstruction.h
index a14c2f0..7f1d808 100644
--- a/linbox/algorithms/rational-reconstruction.h
+++ b/linbox/algorithms/rational-reconstruction.h
@@ -445,7 +445,7 @@ namespace LinBox
std::vector<Integer> zz(_lcontainer.size(), modulus); // stores each truncated p-adic approximation
_r.assign(modulus, _r.one);
- size_t len = _lcontainer.length();
+ uint64_t len = _lcontainer.length();
/* should be ceil(log(2*numbound*denbound)/log(prime))
*
* should grow in rough proportion to overall
@@ -490,7 +490,7 @@ namespace LinBox
_r.convert(iD, _lcontainer.numbound());
_r.convert(iN, _lcontainer.denbound());
_r.convert(pPower, prime);
- pPower = pow(pPower, uint64_t(len)-1);
+ pPower = Givaro::pow(pPower, uint64_t(len-1));
tmp = pPower * iN;
tmp /= iD;
diff --git a/linbox/algorithms/rational-solver.inl b/linbox/algorithms/rational-solver.inl
index 7e92d61..8f8e95d 100644
--- a/linbox/algorithms/rational-solver.inl
+++ b/linbox/algorithms/rational-solver.inl
@@ -523,7 +523,7 @@ namespace LinBox
#endif
// m = n =
root(tmproot, tmp,3);
- m = n = tmproot;
+ m = n = uint32_t(tmproot);
// std::cout<<"block factor= "<<m<<"\n";;
typedef SparseMatrix<Field> FMatrix;
diff --git a/linbox/algorithms/smith-form-sparseelim-poweroftwo.h b/linbox/algorithms/smith-form-sparseelim-poweroftwo.h
index 879558a..b668d00 100644
--- a/linbox/algorithms/smith-form-sparseelim-poweroftwo.h
+++ b/linbox/algorithms/smith-form-sparseelim-poweroftwo.h
@@ -396,7 +396,7 @@ namespace LinBox
ranks.resize(0);
typedef typename BB::Row Vecteur;
- size_t EXPONENT = EXPONENTMAX;
+ uint64_t EXPONENT = EXPONENTMAX;
UInt_t TWOK(1U); TWOK <<= EXPONENT;
UInt_t TWOKMONE(TWOK); --TWOKMONE;
ENSURE( TWOK == (UInt_t(1U) << EXPONENT) );
diff --git a/linbox/algorithms/vector-fraction.h b/linbox/algorithms/vector-fraction.h
index f128a09..89eca12 100644
--- a/linbox/algorithms/vector-fraction.h
+++ b/linbox/algorithms/vector-fraction.h
@@ -220,7 +220,7 @@ namespace LinBox
// find A s.t. gcd(denBound, denom + A*other.denom) = g
// strategy: pick random values of A <= d(y_0)
- integer tmp;
+ uint64_t tmp;
_domain.convert(tmp, denBound);
typename Domain::RandIter randiter(_domain, tmp); //seed omitted
// TODO: I don't think this random iterator has high-quality low order bits, which are needed
@@ -274,7 +274,7 @@ namespace LinBox
// find A s.t. gcd(denBound, denom + A*other.denom) = g
// strategy: pick random values of A <= lcm(d(denom), d(other.denom))
- integer tmp;
+ uint64_t tmp;
_domain.mul(tmpe, denom, other.denom);
_domain.convert(tmp, tmpe);
typename Domain::RandIter randiter(_domain, tmp); //seed omitted
diff --git a/linbox/blackbox/apply.h b/linbox/blackbox/apply.h
index 42cc7ce..9993864 100644
--- a/linbox/blackbox/apply.h
+++ b/linbox/blackbox/apply.h
@@ -158,7 +158,7 @@ namespace LinBox
integer tmp;
bool use_neg=false;
- size_t maxword=0;
+ uint32_t maxword=0;
for (size_t i=0;i<n;++i){
_domain.convert(tmp,x[i]);
if (tmp <0)
@@ -424,7 +424,7 @@ namespace LinBox
maxBitSize+=1;
}
// compute the number of chunk
- if (maxValue*prime*_matM.coldim() < integer("9007199254740992")){
+ if (maxValue*prime* uint32_t(_matM.coldim()) < integer("9007199254740992")){
num_chunks=1;
use_neg=false;
}
@@ -988,7 +988,7 @@ namespace LinBox
LinBox::integer result, tmp;
if (use_neg) {
result = -ctd[i];
- result <<= (num_chunks-1)*16;
+ result <<= uint64_t((num_chunks-1)*16);
#ifdef DEBUG_CHUNK_APPLYM
cout << "rcneg: " << result << endl;
#endif
diff --git a/linbox/linbox-config.h b/linbox/linbox-config.h
index 2aadc0b..51629e4 100644
--- a/linbox/linbox-config.h
+++ b/linbox/linbox-config.h
@@ -58,12 +58,19 @@ using std::ptrdiff_t;
#endif
#endif
-#ifdef __FFLASFFPACK_USE_SIMD
-#define __LINBOX_USE_SIMD
+#ifdef __FFLASFFPACK_HAVE_SSE4_1_INSTRUCTIONS
+#define __LINBOX_HAVE_SSE4_1_INSTRUCTIONS
#else
#define __LINBOX_NO_SIMD
#endif
+#ifdef __FFLASFFPACK_HAVE_AVX_INSTRUCTIONS
+#define __LINBOX_HAVE_AVX_INSTRUCTIONS
+#endif
+
+#ifdef __FFLASFFPACK_HAVE_AVX2_INSTRUCTIONS
+#define __LINBOX_HAVE_AVX_INSTRUCTIONS2
+#endif
namespace LinBox {
diff --git a/linbox/matrix/polynomial-matrix.h b/linbox/matrix/polynomial-matrix.h
index afe31cc..69263e9 100755
--- a/linbox/matrix/polynomial-matrix.h
+++ b/linbox/matrix/polynomial-matrix.h
@@ -34,6 +34,16 @@
#include "givaro/modular.h"
#include <algorithm>
+#ifdef TRACK_MEMORY_MATPOL
+uint64_t max_memory=0, cur_memory=0;
+#define ADD_MEM(x) {cur_memory+=x; max_memory=std::max(max_memory,cur_memory);}
+#define DEL_MEM(x) {cur_memory-=x;}
+#define STR_MEMINFO std::right<<"\033[31m [ MEM: cur="<<cur_memory/1000000.<<" Mo --- max="<<max_memory/1000000.<<" Mo \033[0m]"
+#define PRINT_MEMINFO std::cerr<<"\033[31m[ MEM: cur="<<cur_memory/1000000.<<" Mo --- max="<<max_memory/1000000.<<" Mo ]\033[0m"<<std::endl;
+#else
+#define ADD_MEM(X) ;
+#define DEL_MEM(X) ;
+#endif
#define COPY_BLOCKSIZE 32
@@ -46,6 +56,9 @@ namespace LinBox{
template<size_t type, size_t storage, class Field>
class PolynomialMatrix;
+ template<typename Field> uint64_t element_storage(const Field& F) { integer p;F.characteristic(p); return length(p);}
+ template<> uint64_t element_storage(const Givaro::Modular<Givaro::Integer> &F) { integer p;F.characteristic(p); return length(p)+sizeof(Givaro::Integer);}
+
// Class for Polynomial Matrix stored as a Matrix of Polynomials
template<class _Field>
class PolynomialMatrix<PMType::polfirst,PMStorage::plain,_Field> {
@@ -73,9 +86,17 @@ namespace LinBox{
_repview[i*_col+j]= Polynomial(_rep.begin()+(i*_col+j)*_store,_size);
//integer p;
//std::cout<<"MatrixP allocating : "<<r*c*s*length(f.characteristic(p))/1000000.<<"Mo"<<std::endl;
+ //std::cout<<"(ALLOC) PolynomialMatrix<polfirst> at "<<this<<" : "<<r<<"x"<<c<<" - size= "<<s<<" ==> "<<MB(realmeminfo())<<" Mo "<<STR_MEMINFO<<std::endl;
+ ADD_MEM(realmeminfo());
}
+ PolynomialMatrix(const Self_t&) = delete;
+
~PolynomialMatrix(){
+ DEL_MEM(realmeminfo());
+ _rep.clear();
+ //std::cout<<"(FREE) PolynomialMatrix<polfirst> at "<<this<<" : "<<_row<<"x"<<_col<<" - size= "<<_store<<" ==> "<<MB(realmeminfo())<<" Mo "<<STR_MEMINFO<<std::endl;
+
//integer p;
//std::cout<<"MatrixP Desallocating : "<<_row*_col*_store*length(_fld->characteristic(p))/1000000.<<"Mo"<<std::endl;
@@ -106,6 +127,8 @@ namespace LinBox{
// resize the polynomial length of the polynomial matrix
void resize(size_t s){
+ if (s==_store) return;
+ //std::cout<<"MATPOL RESIZING : "<<_store<<" --> "<<s<<std::endl;
if (s>_store){
_rep.resize(s*_row*_col);
size_t k=s*_row*_col-1;
@@ -123,9 +146,14 @@ namespace LinBox{
for (size_t j=0;j<s;j++,k++)
_rep[k]=_rep[i*_store+j];
_rep.resize(s*_row*_col);
+ //_rep.shrink_to_fit();
}
+ integer p;_fld->characteristic(p); size_t bb=p.bitsize(); if(bb>64) bb+=128; bb/=8;
+ size_t mem=realmeminfo();
_store=s;
setsize(s);
+ ADD_MEM(realmeminfo());
+ DEL_MEM(mem);
}
void changesize(size_t s){
@@ -289,7 +317,10 @@ namespace LinBox{
Element* getWritePointer(){return &_rep[0];}
const Element* getPointer() const {return &_rep[0];}
- size_t realmeminfo()const { return _rep.capacity()*sizeof(Element)+_repview.capacity()*sizeof(Polynomial);}
+ size_t realmeminfo()const {
+ return _row*_col*(_store*element_storage(field())+sizeof(Polynomial));}
+ // return _rep.capacity()*sizeof(Element)+_repview.capacity()*sizeof(Polynomial);}
+
size_t meminfo()const { return _rep.size()*sizeof(Element);}
void changeField(const Field& F){_fld=&F;}
@@ -320,16 +351,21 @@ namespace LinBox{
PolynomialMatrix() {}
+ PolynomialMatrix(const Self_t&) = delete;
+
PolynomialMatrix(const Field& f, size_t r, size_t c, size_t s) :
_rep(s,Matrix(f)), _row(r), _col(c), _size(s), _fld(&f) {
//_row(r), _col(c), _size(s), _fld(&f) {
for(size_t i=0;i<s;i++)
_rep[i].init(f,r,c);
//integer p;
- //std::cout<<"PMatrix allocating : "<<r*c*s*length(f.characteristic(p))/1000000.<<"Mo"<<std::endl;
+ //std::cout<<"(ALLOC) matfirst at "<<this<<" : "<<r<<"x"<<c<<" - size= "<<s<<" ==> "<<MB(realmeminfo())<<" Mo"<<std::endl;
+ ADD_MEM(realmeminfo());
}
~PolynomialMatrix(){
+ DEL_MEM(realmeminfo());
+ //std::cout<<"(FREE) matfirst at "<<this<<" : "<<_row<<"x"<<_col<<" - size= "<<_size<<" ==> "<<MB(realmeminfo())<<" Mo"<<std::endl;
//integer p;
//std::cout<<"PMatrix Desallocating : "<<_row*_col*_size*length(_fld->characteristic(p))/1000000.<<"Mo"<<std::endl;
}
@@ -511,6 +547,11 @@ namespace LinBox{
return os;
}
+ size_t realmeminfo()const {
+
+ return _size*(_row*_col*element_storage(field())+sizeof(Matrix));
+ }
+
// NEED FOR YUHASZ
typedef typename std::vector<Matrix>::const_iterator const_iterator;
const_iterator begin() const {return _rep.begin();}
diff --git a/linbox/matrix/sparsematrix/sparse-csr-matrix.h b/linbox/matrix/sparsematrix/sparse-csr-matrix.h
index f60d125..71cbd50 100644
--- a/linbox/matrix/sparsematrix/sparse-csr-matrix.h
+++ b/linbox/matrix/sparsematrix/sparse-csr-matrix.h
@@ -501,7 +501,7 @@ namespace LinBox {
S._start[i+1] += S._start[i] ;
{
- size_t i = 0 ;
+ index_t i = 0 ;
svector_t done_col(S.rowdim(),0);
for (size_t nextlig = 1 ; nextlig <= rowdim() ; ++nextlig) {
// treating line before nextlig
@@ -635,7 +635,7 @@ namespace LinBox {
/// make matrix ready to use after a sequence of setEntry calls.
void finalize()
{
- if (_start[rowdim()] != _nbnz) { /* if it is so, then all before are 0 and we are fine... */
+ if (_start[rowdim()] != (index_t)_nbnz) { /* if it is so, then all before are 0 and we are fine... */
for (size_t i = 2 ; i <= rowdim() ; ++i)
_start[i] += _start[i-1];
linbox_check(_start[rowdim()] == _nbnz);
@@ -685,7 +685,7 @@ namespace LinBox {
myIterator low = std::lower_bound (beg, end, j);
ibeg = (index_t)(low-_colid.begin());
// insert
- if ( low == end || j != _colid[ibeg] ) {
+ if ( low == end || (index_t)j != _colid[ibeg] ) {
// std::cout << "# 2 insert " << i << ',' << j << ':' << e << std::endl;
for (size_t k = i+1 ; k <= _rownb ; ++k)
_start[k] += 1 ;
diff --git a/linbox/randiter/givaro-poly.h b/linbox/randiter/givaro-poly.h
index 48308c8..9ce0b0c 100644
--- a/linbox/randiter/givaro-poly.h
+++ b/linbox/randiter/givaro-poly.h
@@ -48,7 +48,7 @@ namespace LinBox
GivaroPolyRandIter(Field pd,
const integer& size = 0,
const integer& seed = 0) :
- _randIter(Givaro::GIV_randIter<SubDomain,integer>(pd.subdomain(), size, seed))
+ _randIter(Givaro::GIV_randIter<SubDomain,integer>(pd.subdomain(), uint64_t(size), seed))
{_pd = pd;}
GivaroPolyRandIter(const GivaroPolyRandIter &R)
diff --git a/linbox/randiter/mersenne-twister.h b/linbox/randiter/mersenne-twister.h
index 4687176..d3af870 100644
--- a/linbox/randiter/mersenne-twister.h
+++ b/linbox/randiter/mersenne-twister.h
@@ -93,9 +93,9 @@ namespace LinBox
}
-#if defined(LinBoxSrcOnly) or defined(LinBoxTestOnly)
-#include "linbox/randiter/mersenne-twister.C"
-#endif
+//#if defined(LinBoxSrcOnly) or defined(LinBoxTestOnly)
+//#include "linbox/randiter/mersenne-twister.C"
+//#endif
#endif // __LINBOX_mersenne_twister_H
diff --git a/linbox/randiter/random-fftprime.h b/linbox/randiter/random-fftprime.h
index bd4bb18..183ed67 100644
--- a/linbox/randiter/random-fftprime.h
+++ b/linbox/randiter/random-fftprime.h
@@ -1,3 +1,5 @@
+/* -*- mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
/* linbox/algorithms/
* Copyright (C) 2005 Pascal Giorgi
*
@@ -35,12 +37,12 @@ namespace LinBox
class RandomFFTPrime {
public:
- // define the prime type
+ // define the prime type
typedef integer Prime_Type;
-
+
uint64_t _bits;
- Prime_Type _prime_bound;
-
+ Prime_Type _prime_bound;
+
RandomFFTPrime(Prime_Type pbound=0x100000, unsigned long seed = 0) :
_bits(pbound.bitsize()), _prime_bound(pbound)
{
@@ -51,29 +53,29 @@ namespace LinBox
}
- /** @brief randomPrime(size_t b)
+ /** @brief randomPrime(size_t b)
* return a random FFT prime with a 2-valuation larger than b in its order
- * the randomness is on the FFT primes lying in the given range
- * an error is thrown if no such prime exist
+ * the randomness is on the FFT primes lying in the given range
+ * an error is thrown if no such prime exist
*/
inline Prime_Type randomPrime (size_t b) const
{
- integer tmp;
- randomPrime(tmp,b);
+ integer tmp;
+ randomPrime(tmp,b);
return tmp;
- }
+ }
- /** @brief randomPrime(Prime_Type& p, size_t b)
+ /** @brief randomPrime(Prime_Type& p, size_t b)
* return a random FFT prime with a 2-valuation larger than b in its order
- * the randomness is on the FFT primes lying in the given range
- * an error is thrown if no such prime exist
+ * the randomness is on the FFT primes lying in the given range
+ * an error is thrown if no such prime exist
*/
inline Prime_Type randomPrime (Prime_Type& t, uint64_t b) const
{
- linbox_check(b<_bits);
+ linbox_check(b<_bits);
size_t tresh;
do {
- size_t cbits= (size_t)rand() %(_bits-b);
+ size_t cbits= (size_t)rand() %(_bits-b);
tresh = 1<<(cbits);
uint64_t p = 1<<((size_t)_bits-cbits);
do {
@@ -83,8 +85,8 @@ namespace LinBox
} while (!Givaro::Protected::probab_prime(t,25) && (tresh));
}
while(tresh==0);
- linbox_check(Givaro::Protected::probab_prime(t,25))
- return t;
+ linbox_check(Givaro::Protected::probab_prime(t,25))
+ return t;
}
/** @brief generatePrime()
@@ -93,7 +95,7 @@ namespace LinBox
inline Prime_Type generatePrime() const
{
integer tmp;
- generatePrime(tmp);
+ generatePrime(tmp);
return tmp;
}
@@ -119,97 +121,112 @@ namespace LinBox
return t;
}
- // generate a vector of distinct FFT primes with largest 2-valuation
- // s.t. their product is larger than a given bound
- inline std::vector<Prime_Type> generatePrimes (const Prime_Type & bound) const {
- std::vector<Prime_Type> primes;
- Prime_Type prod=1;
- integer tmp;
- for (int64_t b = _bits - 1; b >= 0; b--)
- for (int64_t l = ((int64_t)1 << (_bits - b - 1)) + 1; l < (1L << (_bits - b)); l +=2) {
- tmp = ((int64_t)1 << b) * l + 1;
- if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
- primes.push_back(tmp);
- prod*=tmp;
- if (prod > bound)
- return primes;
- }
- }
- linbox_check(prod > bound ); // Could not find enough primes
- return primes;
- }
-
- // generate a vector of distinct FFT primes with largest 2-valuation
- // s.t. their product is larger than a given bound
- inline bool generatePrimes (const Prime_Type & bound, std::vector<Prime_Type> &primes) const {
- primes.clear();
- Prime_Type prod=1;
- integer tmp;
- for (int64_t b = (int64_t)_bits - 1; b >= 0; b--)
- for (int64_t l = (1L << ((int64_t)_bits - b - 1)) + 1; l < (1L << ((int64_t)_bits - b)); l +=2) {
- tmp = (1L << b) * l + 1;
- if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
- primes.push_back(tmp);
- prod*=tmp;
- if (prod > bound){
- return true;
- }
- }
- }
- return false; // false -> Could not find enough primes
- }
-
- size_t twoVal(integer t) const {
- integer x=t;
- size_t v=0;
- while(x%2 == 0) {v++;x/=2;}
- return v;
- }
-
- // generate a vector of distinct FFT primes with 2-valuation largest than val
- // s.t. their product is larger than a given bound
- inline bool generatePrimes ( uint64_t val, const Prime_Type & bound, std::vector<Prime_Type> &primes) const {
- primes.clear();
- Prime_Type prod=1;
- integer tmp;
- // std::cout<<"rns bound: "<<bound<<std::endl;
- // std::cout<<"2 valuation: "<<val<<std::endl;
- // std::cout<<"prime bitmax: "<<_bits<<std::endl;
- // std::cout<<"prime max: "<<_prime_bound<<std::endl;
-
- if (val > _bits) return false;
+ // generate a vector of distinct FFT primes with largest 2-valuation
+ // s.t. their product is larger than a given bound
+ inline std::vector<Prime_Type> generatePrimes (const Prime_Type & bound) const {
+ std::vector<Prime_Type> primes;
+ Prime_Type prod=1;
+ integer tmp;
+ for (int64_t b = _bits - 1; b >= 0; b--)
+ for (int64_t l = ((int64_t)1 << (_bits - b - 1)) + 1; l < (1L << (_bits - b)); l +=2) {
+ tmp = ((int64_t)1 << b) * l + 1;
+ if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
+ primes.push_back(tmp);
+ prod*=tmp;
+ if (prod > bound)
+ return primes;
+ }
+ }
+ linbox_check(prod > bound ); // Could not find enough primes
+ return primes;
+ }
+
+ // generate a vector of distinct FFT primes with largest 2-valuation
+ // s.t. their product is larger than a given bound
+ inline bool generatePrimes (const Prime_Type & bound, std::vector<Prime_Type> &primes) const {
+ primes.clear();
+ Prime_Type prod=1;
+ integer tmp;
+ for (int64_t b = (int64_t)_bits - 1; b >= 0; b--)
+ for (int64_t l = (1L << ((int64_t)_bits - b - 1)) + 1; l < (1L << ((int64_t)_bits - b)); l +=2) {
+ tmp = (1L << b) * l + 1;
+ if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
+ primes.push_back(tmp);
+ prod*=tmp;
+ if (prod > bound){
+ return true;
+ }
+ }
+ }
+ return false; // false -> Could not find enough primes
+ }
+
+ size_t twoVal(integer t) const {
+ integer x=t;
+ size_t v=0;
+ while(x%2 == 0) {v++;x/=2;}
+ return v;
+ }
+
+ // generate a vector of distinct FFT primes with 2-valuation largest than val
+ // s.t. their product is larger than a given bound
+ inline bool generatePrimes ( uint64_t val, const Prime_Type & bound, std::vector<Prime_Type> &primes) const {
+ primes.clear();
+ Prime_Type prod=1;
+ integer tmp;
+ // std::cout<<"rns bound: "<<bound<<std::endl;
+ // std::cout<<"2 valuation: "<<val<<std::endl;
+ // std::cout<<"prime bitmax: "<<_bits<<std::endl;
+ // std::cout<<"prime max: "<<_prime_bound<<std::endl;
+
+ if (val > _bits) return false;
#if 0
- for (int64_t b = (int64_t)_bits; b >= (int64_t)val; b--)
- // for (uint64_t l = (1ULL << ((int64_t)_bits - b - 1)) + 1; l < (1ULL << ((int64_t)_bits - b)); l +=2) {
- for (int64_t l = ((int64_t)1 << ((int64_t)_bits - b)) - 1; l >=1; l -=2) {
- tmp = ((int64_t)1 << b) * l + 1;
- if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
- primes.push_back(tmp);
- prod*=tmp;
- //std::cout<<tmp<<" -> "<<tmp.bitsize()<<" (order="<<twoVal(tmp-1)<<") "<<prod<<std::endl;
- if (prod > bound){
- return true;
- }
- }
- }
+ for (int64_t b = (int64_t)_bits; b >= (int64_t)val; b--)
+ // for (uint64_t l = (1ULL << ((int64_t)_bits - b - 1)) + 1; l < (1ULL << ((int64_t)_bits - b)); l +=2) {
+ for (int64_t l = ((int64_t)1 << ((int64_t)_bits - b)) - 1; l >=1; l -=2) {
+ tmp = ((int64_t)1 << b) * l + 1;
+ if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
+ primes.push_back(tmp);
+ prod*=tmp;
+ //std::cout<<tmp<<" -> "<<tmp.bitsize()<<" (order="<<twoVal(tmp-1)<<") "<<prod<<std::endl;
+ if (prod > bound){
+ return true;
+ }
+ }
+ }
#else
- for (int64_t l = (_prime_bound -1) >>val ; l >=1; l -=1) {
- tmp = ((int64_t)1 << val) * l + 1;
- if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
- primes.push_back(tmp);
- prod*=tmp;
- //std::cout<<tmp<<" -> "<<tmp.bitsize()<<" (order="<<twoVal(tmp-1)<<") "<<prod<<std::endl;
- if (prod > bound){
- return true;
- }
- }
- }
-
-
+ for (int64_t l = (_prime_bound -1) >>val ; l >=1; l -=1) {
+ tmp = ((int64_t)1 << val) * l + 1;
+ if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
+ primes.push_back(tmp);
+ prod*=tmp;
+ //std::cout<<tmp<<" -> "<<tmp.bitsize()<<" (order="<<twoVal(tmp-1)<<") "<<prod<<std::endl;
+ if (prod > bound){
+ // try to replace the last prime with a smallest one
+ for (int64_t k=1;k<l;k++){
+ tmp = ((int64_t)1 << val) * k + 1;
+ if (Givaro::Protected::probab_prime(tmp, 25) >= 1) {
+ if (prod*tmp > bound*primes.back()){
+ //std::cout<<"replacing prime "<<primes.back()<<" with "<<tmp<< " -> "<<tmp.bitsize()<<" (order="<<twoVal(tmp-1)<<") ";
+ prod/=primes.back();
+ primes.back()=tmp;
+ prod*=tmp;
+ //std::cout<<prod<<std::endl;
+ return true;
+ }
+ }
+ }
+
+ return true;
+ }
+ }
+ }
+
+
#endif
- return false; // false -> Could not find enough primes
- }
+ return false; // false -> Could not find enough primes
+ }
/** @brief setSeed (unsigned long ul)
* Set the random seed to be ul.
@@ -222,13 +239,3 @@ namespace LinBox
}
#endif //__LINBOX_random_fftprime_H
-
-
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,:0,t0,+0,=s
-// Local Variables:
-// mode: C++
-// tab-width: 8
-// indent-tabs-mode: nil
-// c-basic-offset: 8
-// End:
-
diff --git a/linbox/ring/modular/Makefile.am b/linbox/ring/modular/Makefile.am
index 7234edb..88f62fe 100644
--- a/linbox/ring/modular/Makefile.am
+++ b/linbox/ring/modular/Makefile.am
@@ -23,7 +23,6 @@ pkgincludesubdir=$(pkgincludedir)/ring/modular
BASIC_HDRS = \
modular-unsigned.h \
- modular-unsigned.inl \
modular-int32.h \
modular-int64.h \
modular-short.h \
diff --git a/linbox/ring/modular/modular-int32.h b/linbox/ring/modular/modular-int32.h
index 7c02cff..44889e3 100644
--- a/linbox/ring/modular/modular-int32.h
+++ b/linbox/ring/modular/modular-int32.h
@@ -57,8 +57,7 @@ namespace LinBox
template<class Field>
class MVProductDomain;
- template <>
- template<class Compute>
+ template<class Compute>
class FieldAXPY<Givaro::Modular<int32_t,Compute> > {
public:
@@ -133,7 +132,6 @@ namespace LinBox
};
- template <>
template <class Compute>
class DotProductDomain<Givaro::Modular<int32_t,Compute> > : public VectorDomainBase<Givaro::Modular<int32_t,Compute> > {
@@ -201,7 +199,6 @@ namespace LinBox
// Specialization of MVProductDomain for int32_t modular field
- template <>
template <class Compute>
class MVProductDomain<Givaro::Modular<int32_t,Compute> > {
public:
@@ -222,198 +219,176 @@ namespace LinBox
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const;
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &mulColDenseSpecialized
- (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const;
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &mulColDenseSpecialized
- (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const;
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &mulColDenseSpecialized
- (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const;
+ VectorCategories::DenseVectorTag) const
+ {
- mutable std::vector<uint64_t> _tmp;
- };
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
- template <class Compute>
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<int32_t,Compute> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<int32_t,Compute> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const
- {
-
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
- uint64_t t;
+ uint64_t t;
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
- for (j = v.begin (); j != v.end (); ++j, ++i)
- {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- {
- t = ((uint64_t) *k) * ((uint64_t) *j);
+ for (j = v.begin (); j != v.end (); ++j, ++i)
+ {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ {
+ t = ((uint64_t) *k) * ((uint64_t) *j);
- *l += t;
+ *l += t;
- if (*l < t)
- *l += (uint64_t) VD.faxpy ()._two_64;
- }
- }
+ if (*l < t)
+ *l += (uint64_t) VD.faxpy ()._two_64;
+ }
+ }
- typename Vector1::iterator w_j;
- typedef typename Vector1::value_type elements ;
+ typename Vector1::iterator w_j;
+ typedef typename Vector1::value_type elements ;
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = elements(*l % VD.field ().characteristic());
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = elements(*l % VD.field ().characteristic());
- return w;
- }
+ return w;
+ }
- template <class Compute>
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<int32_t,Compute> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<int32_t,Compute> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
+ template <class Vector1, class Matrix, class Vector2>
+ Vector1 &mulColDenseSpecialized
+ (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
+ VectorCategories::SparseSequenceVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
- uint64_t t;
+ uint64_t t;
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
- for (j = v.begin (); j != v.end (); ++j, ++i) {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l) {
- t = ((uint64_t) k->second) * ((uint64_t) *j);
+ for (j = v.begin (); j != v.end (); ++j, ++i) {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l) {
+ t = ((uint64_t) k->second) * ((uint64_t) *j);
- _tmp[k->first] += t;
+ _tmp[k->first] += t;
- if (_tmp[k->first] < t)
- _tmp[k->first] += (uint64_t)VD.faxpy ()._two_64;
- }
- }
+ if (_tmp[k->first] < t)
+ _tmp[k->first] += (uint64_t)VD.faxpy ()._two_64;
+ }
+ }
- typename Vector1::iterator w_j;
- typedef typename Vector1::value_type val_t;
+ typename Vector1::iterator w_j;
+ typedef typename Vector1::value_type val_t;
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = (val_t)( (int32_t)(*l) % VD.field ().characteristic() );
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = (val_t)( (int32_t)(*l) % VD.field ().characteristic() );
- return w;
- }
+ return w;
+ }
- template <class Compute>
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<int32_t,Compute> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<int32_t,Compute> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const
- {
+ template <class Vector1, class Matrix, class Vector2>
+ Vector1 &mulColDenseSpecialized
+ (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
+ VectorCategories::SparseAssociativeVectorTag) const
+ {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
- uint64_t t;
+ uint64_t t;
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
- for (j = v.begin (); j != v.end (); ++j, ++i)
- {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- {
- t = ((uint64_t) k->second) * ((uint64_t) *j);
+ for (j = v.begin (); j != v.end (); ++j, ++i)
+ {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ {
+ t = ((uint64_t) k->second) * ((uint64_t) *j);
- _tmp[k->first] += t;
+ _tmp[k->first] += t;
- if (_tmp[k->first] < t)
- _tmp[k->first] += VD.faxpy ()._two_64;
- }
- }
+ if (_tmp[k->first] < t)
+ _tmp[k->first] += VD.faxpy ()._two_64;
+ }
+ }
- typename Vector1::iterator w_j;
+ typename Vector1::iterator w_j;
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l % VD.field ().characteristic();
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l % VD.field ().characteristic();
- return w;
- }
+ return w;
+ }
- template <class Compute>
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<int32_t,Compute> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<int32_t,Compute> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const
- {
+ template <class Vector1, class Matrix, class Vector2>
+ Vector1 &mulColDenseSpecialized
+ (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
+ VectorCategories::SparseParallelVectorTag) const
+ {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::first_type::const_iterator k_idx;
- typename Matrix::Column::second_type::const_iterator k_elt;
- std::vector<uint64_t>::iterator l;
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::first_type::const_iterator k_idx;
+ typename Matrix::Column::second_type::const_iterator k_elt;
+ std::vector<uint64_t>::iterator l;
- uint64_t t;
+ uint64_t t;
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
- for (j = v.begin (); j != v.end (); ++j, ++i)
- {
- for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
- k_idx != i->first.end ();
- ++k_idx, ++k_elt, ++l)
- {
- t = ((uint64_t) *k_elt) * ((uint64_t) *j);
+ for (j = v.begin (); j != v.end (); ++j, ++i)
+ {
+ for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
+ k_idx != i->first.end ();
+ ++k_idx, ++k_elt, ++l)
+ {
+ t = ((uint64_t) *k_elt) * ((uint64_t) *j);
- _tmp[*k_idx] += t;
+ _tmp[*k_idx] += t;
- if (_tmp[*k_idx] < t)
- _tmp[*k_idx] += VD.faxpy()._two_64;
- }
- }
+ if (_tmp[*k_idx] < t)
+ _tmp[*k_idx] += VD.faxpy()._two_64;
+ }
+ }
- typename Vector1::iterator w_j;
+ typename Vector1::iterator w_j;
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l % VD.field().characteristic();
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l % VD.field().characteristic();
- return w;
- }
+ return w;
+ }
+ mutable std::vector<uint64_t> _tmp;
+ };
}
#endif //__LINBOX_modular_int32_H
diff --git a/linbox/ring/modular/modular-int64.h b/linbox/ring/modular/modular-int64.h
index 7a860a7..8c0c563 100644
--- a/linbox/ring/modular/modular-int64.h
+++ b/linbox/ring/modular/modular-int64.h
@@ -61,7 +61,6 @@ namespace LinBox
template<class Field>
class MVProductDomain;
- template <>
template <typename Compute_t>
class FieldAXPY<Givaro::Modular<int64_t,Compute_t> > {
public:
@@ -134,7 +133,6 @@ namespace LinBox
};
- template <>
template <typename Compute_t>
class DotProductDomain<Givaro::Modular<int64_t,Compute_t> > : public VectorDomainBase<Givaro::Modular<int64_t,Compute_t> > {
@@ -201,7 +199,6 @@ namespace LinBox
// Specialization of MVProductDomain for int64_t modular field
- template <>
template <typename Compute_t>
class MVProductDomain<Givaro::Modular<int64_t,Compute_t> > {
public:
@@ -222,210 +219,175 @@ namespace LinBox
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field> &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const;
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &mulColDenseSpecialized
- (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const;
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &mulColDenseSpecialized
- (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const;
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &mulColDenseSpecialized
- (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const;
+ VectorCategories::DenseVectorTag) const
+ {
- mutable std::vector<uint64_t> _tmp;
- };
-
- template <typename Compute_t>
- template <class Vector1, class Matrix, class Vector2>
- Vector1 & MVProductDomain<Givaro::Modular<int64_t,Compute_t> >::
- mulColDenseSpecialized (const VectorDomain<Givaro::Modular<int64_t,Compute_t> > &VD,
- Vector1 &w,
- const Matrix &A,
- const Vector2 &v,
- VectorCategories::DenseVectorTag) const
- {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
+ uint64_t t;
- uint64_t t;
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
+ std::fill (_tmp.begin (), _tmp.begin () + w.size (), 0);
- std::fill (_tmp.begin (), _tmp.begin () + w.size (), 0);
+ for (j = v.begin (); j != v.end (); ++j, ++i)
+ {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ {
+ t = ((uint64_t) *k) * ((uint64_t) *j);
- for (j = v.begin (); j != v.end (); ++j, ++i)
- {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- {
- t = ((uint64_t) *k) * ((uint64_t) *j);
+ *l += t;
- *l += t;
+ if (*l < t)
+ *l += VD.faxpy()._two_64;
+ }
+ }
- if (*l < t)
- *l += VD.faxpy()._two_64;
- }
- }
+ typename Vector1::iterator w_j;
- typename Vector1::iterator w_j;
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l % VD.field ().characteristic();
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l % VD.field ().characteristic();
+ return w;
+ }
+ template <class Vector1, class Matrix, class Vector2>
+ Vector1 &mulColDenseSpecialized
+ (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
+ VectorCategories::SparseSequenceVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
- return w;
- }
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
- template <typename Compute_t>
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<int64_t,Compute_t> >::
- mulColDenseSpecialized (const VectorDomain<Givaro::Modular<int64_t,Compute_t> > &VD,
- Vector1 &w,
- const Matrix &A,
- const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
-
- uint64_t t;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () + w.size (), 0);
-
- for (j = v.begin (); j != v.end (); ++j, ++i)
- {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- {
- t = ((uint64_t) k->second) * ((uint64_t) *j);
+ uint64_t t;
- _tmp[k->first] += t;
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
- if (_tmp[k->first] < t)
- _tmp[k->first] += VD.faxpy()._two_64;
- }
- }
+ std::fill (_tmp.begin (), _tmp.begin () + w.size (), 0);
- typename Vector1::iterator w_j;
+ for (j = v.begin (); j != v.end (); ++j, ++i)
+ {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ {
+ t = ((uint64_t) k->second) * ((uint64_t) *j);
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l % VD.field ().characteristic();
+ _tmp[k->first] += t;
- return w;
- }
+ if (_tmp[k->first] < t)
+ _tmp[k->first] += VD.faxpy()._two_64;
+ }
+ }
- template <typename Compute_t>
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<int64_t,Compute_t> > ::
- mulColDenseSpecialized(const VectorDomain<Givaro::Modular<int64_t,Compute_t> > &VD,
- Vector1 &w,
- const Matrix &A,
- const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const
- {
+ typename Vector1::iterator w_j;
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l % VD.field ().characteristic();
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
+ return w;
+ }
- uint64_t t;
+ template <class Vector1, class Matrix, class Vector2>
+ Vector1 &mulColDenseSpecialized
+ (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
+ VectorCategories::SparseAssociativeVectorTag) const
+ {
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
- std::fill (_tmp.begin (), _tmp.begin () + w.size (), 0);
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
- for (j = v.begin (); j != v.end (); ++j, ++i)
- {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- {
- t = ((uint64_t) k->second) * ((uint64_t) *j);
+ uint64_t t;
- _tmp[k->first] += t;
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
- if (_tmp[k->first] < t)
- _tmp[k->first] += VD.faxpy()._two_64;
- }
- }
+ std::fill (_tmp.begin (), _tmp.begin () + w.size (), 0);
- typename Vector1::iterator w_j;
+ for (j = v.begin (); j != v.end (); ++j, ++i)
+ {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ {
+ t = ((uint64_t) k->second) * ((uint64_t) *j);
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l % VD.field ().characteristic();
+ _tmp[k->first] += t;
- return w;
- }
+ if (_tmp[k->first] < t)
+ _tmp[k->first] += VD.faxpy()._two_64;
+ }
+ }
- template <typename Compute_t>
- template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<int64_t,Compute_t> > ::
- mulColDenseSpecialized (const VectorDomain<Givaro::Modular<int64_t,Compute_t> > &VD,
- Vector1 &w,
- const Matrix &A,
- const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const
- {
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l % VD.field ().characteristic();
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
+ return w;
+ }
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::first_type::const_iterator k_idx;
- typename Matrix::Column::second_type::const_iterator k_elt;
- std::vector<uint64_t>::iterator l;
+ template <class Vector1, class Matrix, class Vector2>
+ Vector1 &mulColDenseSpecialized
+ (const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
+ VectorCategories::SparseParallelVectorTag) const
+ {
- uint64_t t;
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::first_type::const_iterator k_idx;
+ typename Matrix::Column::second_type::const_iterator k_elt;
+ std::vector<uint64_t>::iterator l;
- std::fill (_tmp.begin (), _tmp.begin () + w.size (), 0);
+ uint64_t t;
- for (j = v.begin (); j != v.end (); ++j, ++i)
- {
- for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
- k_idx != i->first.end ();
- ++k_idx, ++k_elt, ++l)
- {
- t = ((uint64_t) *k_elt) * ((uint64_t) *j);
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
- _tmp[*k_idx] += t;
+ std::fill (_tmp.begin (), _tmp.begin () + w.size (), 0);
- if (_tmp[*k_idx] < t)
- _tmp[*k_idx] += VD.faxpy()._two_64;
- }
- }
+ for (j = v.begin (); j != v.end (); ++j, ++i)
+ {
+ for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
+ k_idx != i->first.end ();
+ ++k_idx, ++k_elt, ++l)
+ {
+ t = ((uint64_t) *k_elt) * ((uint64_t) *j);
+
+ _tmp[*k_idx] += t;
+
+ if (_tmp[*k_idx] < t)
+ _tmp[*k_idx] += VD.faxpy()._two_64;
+ }
+ }
- typename Vector1::iterator w_j;
+ typename Vector1::iterator w_j;
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l % VD.field ().characteristic();
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l % VD.field ().characteristic();
- return w;
- }
+ return w;
+ }
+ mutable std::vector<uint64_t> _tmp;
+ };
}
#undef LINBOX_MAX_INT64
diff --git a/linbox/ring/modular/modular-unsigned.h b/linbox/ring/modular/modular-unsigned.h
index ec7230a..a94f058 100755
--- a/linbox/ring/modular/modular-unsigned.h
+++ b/linbox/ring/modular/modular-unsigned.h
@@ -40,12 +40,16 @@
#ifndef __LINBOX_field_modular_unsigned_H
#define __LINBOX_field_modular_unsigned_H
-namespace LinBox { /* uint8_t */
+//Dan Roche 7-2-04
+#ifndef __LINBOX_MIN
+#define __LINBOX_MIN(a,b) ( (a) < (b) ? (a) : (b) )
+#endif
- /*! Specialization of FieldAXPY for uint8_t modular field */
+namespace LinBox { /* uint8_t */
- template <>
- template<class Compute_t>
+ /*! Specialization of FieldAXPY for uint8_t modular field */
+
+ template<class Compute_t>
class FieldAXPY<Givaro::Modular<uint8_t,Compute_t > > {
public:
@@ -54,71 +58,71 @@ namespace LinBox { /* uint8_t */
typedef Givaro::Modular<uint8_t, Compute_t> Field;
FieldAXPY (const Field &F) :
- _k (((uint64_t) -1LL) / ((F.characteristic() - 1) * (F.characteristic() - 1))),
- _field (&F),
- _y (0),
- i (_k)
- {
- }
+ _k (((uint64_t) -1LL) / ((F.characteristic() - 1) * (F.characteristic() - 1))),
+ _field (&F),
+ _y (0),
+ i (_k)
+ {
+ }
FieldAXPY (const FieldAXPY &faxpy) :
- _k (faxpy._k),
- _field (faxpy._field),
- _y (0),
- i (_k)
- {}
+ _k (faxpy._k),
+ _field (faxpy._field),
+ _y (0),
+ i (_k)
+ {}
FieldAXPY<Field> &operator = (const FieldAXPY &faxpy)
- {
- _field = faxpy._field;
- _y = faxpy._y;
- _k = faxpy._k;
- return *this;
- }
-
+ {
+ _field = faxpy._field;
+ _y = faxpy._y;
+ _k = faxpy._k;
+ return *this;
+ }
+
inline uint64_t& mulacc (const Element &a, const Element &x)
- {
- uint32_t t = (uint32_t) a * (uint32_t) x;
+ {
+ uint32_t t = (uint32_t) a * (uint32_t) x;
- if (!i--) {
- i = int(_k);
- return _y = _y % (uint32_t) field().characteristic() + t;
- }
- else
- return _y += t;
- }
+ if (!i--) {
+ i = int(_k);
+ return _y = _y % (uint32_t) field().characteristic() + t;
+ }
+ else
+ return _y += t;
+ }
inline uint64_t& accumulate (const Element &t)
- {
+ {
- if (!i--) {
- i = int(_k);
- return _y = _y % (uint32_t) field().characteristic() + t;
- }
- else
- return _y += t;
- }
+ if (!i--) {
+ i = int(_k);
+ return _y = _y % (uint32_t) field().characteristic() + t;
+ }
+ else
+ return _y += t;
+ }
inline Element &get (Element &y) const
- {
- const_cast<FieldAXPY<Field>*>(this)->_y %= (uint32_t) field().characteristic();
- if ((int32_t) _y < 0) const_cast<FieldAXPY<Field>*>(this)->_y += field().characteristic();
- y = (uint8_t) _y;
- const_cast<FieldAXPY<Field>*>(this)->i = int(_k);
- return y;
- }
+ {
+ const_cast<FieldAXPY<Field>*>(this)->_y %= (uint32_t) field().characteristic();
+ if ((int32_t) _y < 0) const_cast<FieldAXPY<Field>*>(this)->_y += field().characteristic();
+ y = (uint8_t) _y;
+ const_cast<FieldAXPY<Field>*>(this)->i = int(_k);
+ return y;
+ }
inline FieldAXPY &assign (const Element y)
- {
- _y = y;
- i = int(_k);
- return *this;
- }
+ {
+ _y = y;
+ i = int(_k);
+ return *this;
+ }
inline void reset()
- {
- _y = 0;
- }
+ {
+ _y = 0;
+ }
inline const Field & field() const { return *_field; }
@@ -131,9 +135,8 @@ namespace LinBox { /* uint8_t */
int64_t i;
};
- //! Specialization of DotProductDomain for unsigned short modular field
+ //! Specialization of DotProductDomain for unsigned short modular field
- template <>
template <class Compute_t>
class DotProductDomain<Givaro::Modular<uint8_t, Compute_t> > : public VectorDomainBase<Givaro::Modular<uint8_t, Compute_t> > {
public:
@@ -143,22 +146,84 @@ namespace LinBox { /* uint8_t */
DotProductDomain(){}
DotProductDomain (const Field &F) :
- VectorDomainBase<Field> (F)
- {}
+ VectorDomainBase<Field> (F)
+ {}
using VectorDomainBase<Field>::field;
using VectorDomainBase<Field>::faxpy;
protected:
template <class Vector1, class Vector2>
- inline Element &dotSpecializedDD (Element &res, const Vector1 &v1, const Vector2 &v2) const;
+ inline Element &dotSpecializedDD (Element &res, const Vector1 &v1, const Vector2 &v2) const
+ {
+ typename Vector1::const_iterator i = v1.begin ();
+ typename Vector2::const_iterator j = v2.begin ();
+
+ typename Vector1::const_iterator iterend = v1.begin () + (ptrdiff_t)(v1.size() % faxpy()._k);
+
+ uint64_t y = 0;
+
+ for (; i != iterend; ++i, ++j)
+ y += (uint64_t) *i * (uint64_t) *j;
+
+ y %= (uint64_t) field().characteristic();
+
+ for (; iterend != v1.end (); j += (ptrdiff_t)faxpy()._k) {
+ typename Vector1::const_iterator iter_i = iterend;
+ typename Vector2::const_iterator iter_j;
+
+ iterend += (ptrdiff_t)faxpy()._k;
+
+ for (iter_j = j; iter_i != iterend; ++iter_i, ++iter_j)
+ y += (uint64_t) *iter_i * (uint64_t) *j;
+
+ y %= (uint64_t) field().characteristic();
+ }
+
+ return res = (uint8_t) y;
+ }
+
template <class Vector1, class Vector2>
- inline Element &dotSpecializedDSP (Element &res, const Vector1 &v1, const Vector2 &v2) const;
-
+ inline Element &dotSpecializedDSP (Element &res, const Vector1 &v1, const Vector2 &v2) const
+ {
+ typename Vector1::first_type::const_iterator i_idx = v1.first.begin ();
+ typename Vector1::second_type::const_iterator i_elt = v1.second.begin ();
+
+ uint64_t y = 0;
+
+ if (v1.first.size () < faxpy()._k) {
+ for (; i_idx != v1.first.end (); ++i_idx, ++i_elt)
+ y += (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
+
+ return res = uint8_t (y % (uint64_t) field().characteristic());
+ }
+ else {
+ typename Vector1::first_type::const_iterator iterend = v1.first.begin () +(ptrdiff_t)( v1.first.size() % faxpy()._k);
+
+ for (; i_idx != iterend; ++i_idx, ++i_elt)
+ y += (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
+
+ y %= (uint64_t) field().characteristic();
+
+ while (iterend != v1.first.end ()) {
+ typename Vector1::first_type::const_iterator iter_i_idx = iterend;
+ typename Vector1::second_type::const_iterator iter_i_elt = i_elt;
+
+ iterend += (ptrdiff_t)faxpy()._k;
+ i_elt += (ptrdiff_t)faxpy()._k;
+
+ for (; iter_i_idx != iterend; ++iter_i_idx, ++iter_i_elt)
+ y += (uint64_t) *iter_i_elt * (uint64_t) v2[*iter_i_idx];
+
+ y %= (uint64_t) field().characteristic();
+ }
+
+ return res = (uint8_t) y;
+ }
+ }
};
- //! Specialization of MVProductDomain for uint8_t modular field
+ //! Specialization of MVProductDomain for uint8_t modular field
- template <>
template<class Compute_t>
class MVProductDomain<Givaro::Modular<uint8_t,Compute_t> > {
public:
@@ -170,27 +235,188 @@ namespace LinBox { /* uint8_t */
template <class Vector1, class Matrix, class Vector2>
inline Vector1 &mulColDense
(const VectorDomain<Field> &VD, Vector1 &w, const Matrix &A, const Vector2 &v) const
- {
- return mulColDenseSpecialized (VD, w, A, v, typename VectorTraits<typename Matrix::Column>::VectorCategory ());
- }
+ {
+ return mulColDenseSpecialized (VD, w, A, v, typename VectorTraits<typename Matrix::Column>::VectorCategory ());
+ }
private:
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field> &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const;
+ VectorCategories::DenseVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j, j_end;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint32_t>::iterator l, l_end;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t)w.size (), 0);
+
+ l_end = _tmp.begin () +(ptrdiff_t) w.size ();
+
+ do {
+ j = v.begin ();
+ j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
+
+ for (; j != j_end; ++j, ++i)
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ *l += *k * *j;
+
+ j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
+
+ for (l =_tmp.begin (); l != l_end; ++l)
+ *l %= VD.field ().characteristic();
+
+ } while (j_end != v.end ());
+
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l;
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field> &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const;
+ VectorCategories::SparseSequenceVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j, j_end;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint32_t>::iterator l, l_end;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t)w.size (), 0);
+
+ l_end = _tmp.begin () + (ptrdiff_t)w.size ();
+
+
+ do {
+ j = v.begin ();
+ j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
+
+ for (; j != j_end; ++j, ++i)
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ _tmp[k->first] += k->second * *j;
+
+ j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
+
+ for (l =_tmp.begin (); l != l_end; ++l)
+ *l %= VD.field ().characteristic();
+
+ } while (j_end != v.end ());
+
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l;
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const;
+ VectorCategories::SparseAssociativeVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j, j_end;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint32_t>::iterator l, l_end;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t)w.size (), 0);
+
+ l_end = _tmp.begin () +(ptrdiff_t) w.size ();
+
+ do {
+ j = v.begin ();
+ j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
+
+ for (; j != j_end; ++j, ++i)
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ _tmp[k->first] += k->second * *j;
+
+ j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
+
+ for (l =_tmp.begin (); l != l_end; ++l)
+ *l %= VD.field ().characteristic();
+
+ } while (j_end != v.end ());
+
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l;
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const;
+ VectorCategories::SparseParallelVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j, j_end;
+ typename Matrix::Column::first_type::const_iterator k_idx;
+ typename Matrix::Column::second_type::const_iterator k_elt;
+ std::vector<uint32_t>::iterator l, l_end;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t)w.size (), 0);
+
+ l_end = _tmp.begin () + (ptrdiff_t)w.size ();
+
+ do {
+ j = v.begin ();
+ j_end = j + (ptrdiff_t)__LINBOX_MIN (uint64_t (A.coldim ()), VD.faxpy()._k);
+
+ for (; j != j_end; ++j, ++i)
+ for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
+ k_idx != i->first.end ();
+ ++k_idx, ++k_elt, ++l)
+ _tmp[*k_idx] += *k_elt * *j;
+
+ j_end += (ptrdiff_t) __LINBOX_MIN (uint64_t (A.coldim () - (size_t)(j_end - v.begin ())), VD.faxpy()._k);
+
+ for (l =_tmp.begin (); l != l_end; ++l)
+ *l %= VD.field ().characteristic();
+
+ } while (j_end != v.end ());
+
+ typename Vector1::iterator w_j;
+ typedef typename Vector1::value_type val_t ;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = (val_t) *l;
+
+ return w;
+ }
+
mutable std::vector<uint32_t> _tmp;
};
@@ -199,8 +425,7 @@ namespace LinBox { /* uint8_t */
namespace LinBox { /* uint16_t */
- /*! Specialization of FieldAXPY for uint16_t modular field */
- template <>
+ /*! Specialization of FieldAXPY for uint16_t modular field */
template<class Compute_t>
class FieldAXPY<Givaro::Modular<uint16_t,Compute_t> > {
public:
@@ -209,66 +434,66 @@ namespace LinBox { /* uint16_t */
typedef Givaro::Modular<uint16_t,Compute_t> Field;
FieldAXPY (const Field &F) :
- _k (((uint64_t) -1LL) / ((F.characteristic() - 1) * (F.characteristic() - 1))),
- _field (&F),
- _y (0),
- i (_k)
- {}
+ _k (((uint64_t) -1LL) / ((F.characteristic() - 1) * (F.characteristic() - 1))),
+ _field (&F),
+ _y (0),
+ i (_k)
+ {}
FieldAXPY (const FieldAXPY &faxpy) :
- _k (faxpy._k), _field (faxpy._field), _y (0), i (_k)
- {}
+ _k (faxpy._k), _field (faxpy._field), _y (0), i (_k)
+ {}
FieldAXPY<Field > &operator = (const FieldAXPY &faxpy)
- {
- _field = faxpy._field;
- _y = faxpy._y;
- _k = faxpy._k;
- return *this;
- }
+ {
+ _field = faxpy._field;
+ _y = faxpy._y;
+ _k = faxpy._k;
+ return *this;
+ }
inline uint64_t& mulacc (const Element &a, const Element &x)
- {
- uint64_t t = (uint64_t) ((long long) a * (long long) x);
+ {
+ uint64_t t = (uint64_t) ((long long) a * (long long) x);
- if (!i--) {
- i = (int)_k;
- return _y = _y % (uint64_t) field().characteristic() + t;
- }
- else
- return _y += t;
- }
+ if (!i--) {
+ i = (int)_k;
+ return _y = _y % (uint64_t) field().characteristic() + t;
+ }
+ else
+ return _y += t;
+ }
inline uint64_t& accumulate (const Element &t)
- {
- if (!i--) {
- i = (int)_k;
- return _y = _y % (uint64_t) field().characteristic() + t;
- }
- else
- return _y += t;
- }
+ {
+ if (!i--) {
+ i = (int)_k;
+ return _y = _y % (uint64_t) field().characteristic() + t;
+ }
+ else
+ return _y += t;
+ }
inline Element &get (Element &y) const
- {
- const_cast<FieldAXPY<Field>*>(this)->_y %= (uint64_t) field().characteristic();
- if ((int64_t) _y < 0) const_cast<FieldAXPY<Field>*>(this)->_y += field().characteristic();
- y = (uint16_t) _y;
- const_cast<FieldAXPY<Field>*>(this)->i = int(_k);
- return y;
- }
+ {
+ const_cast<FieldAXPY<Field>*>(this)->_y %= (uint64_t) field().characteristic();
+ if ((int64_t) _y < 0) const_cast<FieldAXPY<Field>*>(this)->_y += field().characteristic();
+ y = (uint16_t) _y;
+ const_cast<FieldAXPY<Field>*>(this)->i = int(_k);
+ return y;
+ }
inline FieldAXPY &assign (const Element y)
- {
- _y = y;
- i = (int)_k;
- return *this;
- }
+ {
+ _y = y;
+ i = (int)_k;
+ return *this;
+ }
inline void reset()
- {
- _y = 0;
- }
+ {
+ _y = 0;
+ }
inline const Field & field() const {return *_field;}
@@ -281,66 +506,297 @@ namespace LinBox { /* uint16_t */
int64_t i;
};
- //! Specialization of DotProductDomain for unsigned short modular field
+ //! Specialization of DotProductDomain for unsigned short modular field
- template <>
template<class Compute_t>
class DotProductDomain<Givaro::Modular<uint16_t,Compute_t> > : public VectorDomainBase<Givaro::Modular<uint16_t,Compute_t> > {
public:
typedef uint16_t Element;
- typedef Givaro::Modular<uint16_t,Compute_t> Field;
+ typedef Givaro::Modular<uint16_t,Compute_t> Field;
DotProductDomain () {}
DotProductDomain (const Field &F) :
- VectorDomainBase<Field > (F)
- {}
+ VectorDomainBase<Field > (F)
+ {}
using VectorDomainBase<Field>::field;
using VectorDomainBase<Field>::faxpy;
protected:
template <class Vector1, class Vector2>
- inline Element &dotSpecializedDD (Element &res, const Vector1 &v1, const Vector2 &v2) const;
+ inline Element &dotSpecializedDD (Element &res, const Vector1 &v1, const Vector2 &v2) const
+ {
+ typename Vector1::const_iterator i = v1.begin ();
+ typename Vector2::const_iterator j = v2.begin ();
+
+ typename Vector1::const_iterator iterend = v1.begin () + (ptrdiff_t)(v1.size() % faxpy()._k);
+
+ uint64_t y = 0;
+
+ for (; i != iterend; ++i, ++j)
+ y += (uint64_t) *i * (uint64_t) *j;
+
+ y %= (uint64_t) field().characteristic();
+
+ for (; iterend != v1.end (); j += faxpy()._k) {
+ typename Vector1::const_iterator iter_i = iterend;
+ typename Vector2::const_iterator iter_j;
+
+ iterend += faxpy()._k;
+
+ for (iter_j = j; iter_i != iterend; ++iter_i, ++iter_j)
+ y += (uint64_t) *iter_i * (uint64_t) *j;
+
+ y %= (uint64_t) field().characteristic();
+ }
+
+ return res = (uint16_t) y;
+ }
+
template <class Vector1, class Vector2>
- inline Element &dotSpecializedDSP (Element &res, const Vector1 &v1, const Vector2 &v2) const;
+ inline Element &dotSpecializedDSP (Element &res, const Vector1 &v1, const Vector2 &v2) const
+ {
+ typename Vector1::first_type::const_iterator i_idx = v1.first.begin ();
+ typename Vector1::second_type::const_iterator i_elt = v1.second.begin ();
+
+ uint64_t y = 0;
+
+ if (v1.first.size () < faxpy()._k) {
+ for (; i_idx != v1.first.end (); ++i_idx, ++i_elt)
+ y += (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
+
+ return res = (uint16_t) (y % (uint64_t) field().characteristic());
+ }
+ else {
+ typename Vector1::first_type::const_iterator iterend = v1.first.begin () +(ptrdiff_t)( v1.first.size() % faxpy()._k );
+
+ for (; i_idx != iterend; ++i_idx, ++i_elt)
+ y += (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
+
+ y %= (uint64_t) field().characteristic();
+
+ while (iterend != v1.first.end ()) {
+ typename Vector1::first_type::const_iterator iter_i_idx = iterend;
+ typename Vector1::second_type::const_iterator iter_i_elt = i_elt;
+
+ iterend += faxpy()._k;
+ i_elt += faxpy()._k;
+
+ for (; iter_i_idx != iterend; ++iter_i_idx, ++iter_i_elt)
+ y += (uint64_t) *iter_i_elt * (uint64_t) v2[*iter_i_idx];
+
+ y %= (uint64_t) field().characteristic();
+ }
+
+ return res = (Element) y;
+ }
+ }
};
- //! Specialization of MVProductDomain for uint16_t modular field
+ //! Specialization of MVProductDomain for uint16_t modular field
- template <>
template<class Compute_t>
class MVProductDomain<Givaro::Modular<uint16_t,Compute_t> > {
public:
typedef uint16_t Element;
- typedef Givaro::Modular<uint16_t,Compute_t> Field;
+ typedef Givaro::Modular<uint16_t,Compute_t> Field;
protected:
template <class Vector1, class Matrix, class Vector2>
inline Vector1 &mulColDense
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v) const
- {
- return mulColDenseSpecialized (VD, w, A, v, VectorTraits<typename Matrix::Column>::VectorCategory ());
- }
+ {
+ return mulColDenseSpecialized (VD, w, A, v, VectorTraits<typename Matrix::Column>::VectorCategory ());
+ }
private:
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const;
+ VectorCategories::DenseVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j = v.begin (), j_end;
+ typename Matrix::Column::const_iterator k;
+ // Dan Roche, 7-1-04
+ // std::vector<uint32_t>::iterator l, l_end;
+ std::vector<uint64_t>::iterator l, l_end;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+
+ l_end = _tmp.begin () +(ptrdiff_t) w.size ();
+
+ do {
+ j = v.begin ();
+ j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
+
+ for (; j != j_end; ++j, ++i)
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ *l += *k * *j;
+
+ j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
+
+ for (l =_tmp.begin (); l != l_end; ++l)
+ *l %= VD.field ().characteristic();
+
+ } while (j_end != v.end ());
+
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l;
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const;
+ VectorCategories::SparseSequenceVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j, j_end;
+ typename Matrix::Column::const_iterator k;
+ // Dan Roche, 7-1-04
+ // std::vector<uint32_t>::iterator l, l_end;
+ std::vector<uint64_t>::iterator l, l_end;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+
+ l_end = _tmp.begin () +(ptrdiff_t) w.size ();
+
+ do {
+ j = v.begin ();
+ j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
+
+ for (; j != j_end; ++j, ++i)
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ _tmp[k->first] += k->second * *j;
+
+ j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
+
+ for (l =_tmp.begin (); l != l_end; ++l)
+ *l %= VD.field ().characteristic();
+
+ } while (j_end != v.end ());
+
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l;
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const;
+ VectorCategories::SparseAssociativeVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j, j_end;
+ typename Matrix::Column::const_iterator k;
+ // Dan Roche, 7-1-04
+ // std::vector<uint32_t>::iterator l, l_end;
+ std::vector<uint64_t>::iterator l, l_end;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+
+ l_end = _tmp.begin () +(ptrdiff_t) w.size ();
+
+ do {
+ j = v.begin ();
+ j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
+
+ for (; j != j_end; ++j, ++i)
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
+ _tmp[k->first] += k->second * *j;
+
+ j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
+
+ for (l =_tmp.begin (); l != l_end; ++l)
+ *l %= VD.field ().characteristic();
+
+ } while (j_end != v.end ());
+
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l;
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const;
+ VectorCategories::SparseParallelVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j, j_end;
+ typename Matrix::Column::first_type::const_iterator k_idx;
+ typename Matrix::Column::second_type::const_iterator k_elt;
+ // Dan Roche, 7-1-04
+ // std::vector<uint32_t>::iterator l, l_end;
+ std::vector<uint64_t>::iterator l, l_end;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+
+ l_end = _tmp.begin () +(ptrdiff_t) w.size ();
+
+ do {
+ j = v.begin ();
+ //Dan Roche, 7-2-04
+ //j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
+ j_end = j + __LINBOX_MIN (A.coldim (), VD.faxpy()._k);
+
+ for (; j != j_end; ++j, ++i)
+ for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
+ k_idx != i->first.end ();
+ ++k_idx, ++k_elt, ++l)
+ _tmp[*k_idx] += *k_elt * *j;
+
+ //j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
+ j_end += __LINBOX_MIN (A.coldim () - (j_end - v.begin ()), VD.faxpy()._k);
+
+ for (l =_tmp.begin (); l != l_end; ++l)
+ *l %= VD.field ().characteristic();
+
+ } while (j_end != v.end ());
+
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = *l;
+
+ return w;
+ }
mutable std::vector<uint64_t> _tmp;
};
@@ -359,9 +815,8 @@ namespace LinBox { /* uint32_t */
class MVProductDomain;
- /*! Specialization of FieldAXPY for unsigned short modular field */
+ /*! Specialization of FieldAXPY for unsigned short modular field */
- template <>
template<class Compute_t>
class FieldAXPY<Givaro::Modular<uint32_t, Compute_t> > {
public:
@@ -370,62 +825,62 @@ namespace LinBox { /* uint32_t */
typedef Givaro::Modular<uint32_t, Compute_t> Field;
FieldAXPY (const Field &F) :
- _field (&F), _y(0)
- {
- _two_64 = (uint64_t(1) << 32) % uint64_t(F.characteristic());
- _two_64 = (_two_64 * _two_64) % uint64_t(F.characteristic());
- }
+ _field (&F), _y(0)
+ {
+ _two_64 = (uint64_t(1) << 32) % uint64_t(F.characteristic());
+ _two_64 = (_two_64 * _two_64) % uint64_t(F.characteristic());
+ }
FieldAXPY (const FieldAXPY &faxpy) :
- _two_64 (faxpy._two_64), _field (faxpy._field), _y (0)
- {}
+ _two_64 (faxpy._two_64), _field (faxpy._field), _y (0)
+ {}
FieldAXPY<Field > &operator = (const FieldAXPY &faxpy)
- {
- _field = faxpy._field;
- _y = faxpy._y;
- _two_64 = faxpy._two_64;
- return *this;
- }
+ {
+ _field = faxpy._field;
+ _y = faxpy._y;
+ _two_64 = faxpy._two_64;
+ return *this;
+ }
inline uint64_t& mulacc (const Element &a, const Element &x)
- {
- uint64_t t = (uint64_t) a * (uint64_t) x;
- _y += t;
+ {
+ uint64_t t = (uint64_t) a * (uint64_t) x;
+ _y += t;
- if (_y < t)
- return _y += _two_64;
- else
- return _y;
- }
+ if (_y < t)
+ return _y += _two_64;
+ else
+ return _y;
+ }
inline uint64_t& accumulate (const Element &t)
- {
- _y += t;
+ {
+ _y += t;
- if (_y < t)
- return _y += _two_64;
- else
- return _y;
- }
+ if (_y < t)
+ return _y += _two_64;
+ else
+ return _y;
+ }
inline uint64_t& accumulate_special (const Element &t)
- {
- return _y += t;
- }
+ {
+ return _y += t;
+ }
inline Element &get (Element &y) const
- {
- const_cast<FieldAXPY<Field>*>(this)->_y %= (uint64_t) field().characteristic();
- //if ((int64_t) _y < 0) const_cast<FieldAXPY<Field>*>(this)->_y += field().characteristic();
- return y = (uint32_t) _y;
- }
+ {
+ const_cast<FieldAXPY<Field>*>(this)->_y %= (uint64_t) field().characteristic();
+ //if ((int64_t) _y < 0) const_cast<FieldAXPY<Field>*>(this)->_y += field().characteristic();
+ return y = (uint32_t) _y;
+ }
inline FieldAXPY &assign (const Element y)
- {
- _y = y;
- return *this;
- }
+ {
+ _y = y;
+ return *this;
+ }
inline void reset() {
_y = 0;
@@ -443,9 +898,8 @@ namespace LinBox { /* uint32_t */
uint64_t _y;
};
- //! Specialization of DotProductDomain for uint32_t modular field
+ //! Specialization of DotProductDomain for uint32_t modular field
- template <>
template<class Compute_t>
class DotProductDomain<Givaro::Modular<uint32_t,Compute_t> > : public VectorDomainBase<Givaro::Modular<uint32_t,Compute_t> > {
public:
@@ -455,24 +909,61 @@ namespace LinBox { /* uint32_t */
DotProductDomain () {}
DotProductDomain (const Field &F) :
- VectorDomainBase<Field > (F)
- {}
+ VectorDomainBase<Field > (F)
+ {}
using VectorDomainBase<Field >::field;
using VectorDomainBase<Field >::faxpy;
protected:
template <class Vector1, class Vector2>
- inline Element &dotSpecializedDD (Element &res, const Vector1 &v1, const Vector2 &v2) const;
+ inline Element &dotSpecializedDD (Element &res, const Vector1 &v1, const Vector2 &v2) const
+ {
+ typename Vector1::const_iterator i;
+ typename Vector2::const_iterator j;
+
+ uint64_t y = 0;
+ uint64_t t;
+
+ for (i = v1.begin (), j = v2.begin (); i < v1.end (); ++i, ++j) {
+ t = (uint64_t) *i * (uint64_t) *j;
+ y += t;
+
+ if (y < t)
+ y += faxpy()._two_64;
+ }
+
+ y %= (uint64_t) field().characteristic();
+
+ return res = (uint32_t) y;
+ }
template <class Vector1, class Vector2>
- inline Element &dotSpecializedDSP (Element &res, const Vector1 &v1, const Vector2 &v2) const;
+ inline Element &dotSpecializedDSP (Element &res, const Vector1 &v1, const Vector2 &v2) const
+ {
+ typename Vector1::first_type::const_iterator i_idx;
+ typename Vector1::second_type::const_iterator i_elt;
+
+ uint64_t y = 0;
+ uint64_t t = 0;
+
+ for (i_idx = v1.first.begin (), i_elt = v1.second.begin (); i_idx != v1.first.end (); ++i_idx, ++i_elt) {
+ t = (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
+ y += t;
+ if (y < t)
+ y += faxpy()._two_64;
+ }
+
+ y %= (uint64_t) field().characteristic();
+
+ return res = (uint32_t)y;
+ }
+
};
- //! Specialization of MVProductDomain for uint32_t modular field
+ //! Specialization of MVProductDomain for uint32_t modular field
- template <>
template <class Compute_t>
class MVProductDomain<Givaro::Modular<uint32_t,Compute_t> > {
public:
@@ -484,27 +975,174 @@ namespace LinBox { /* uint32_t */
template <class Vector1, class Matrix, class Vector2>
inline Vector1 &mulColDense
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v) const
- {
- return mulColDenseSpecialized (VD, w, A, v, typename VectorTraits<typename Matrix::Column>::VectorCategory ());
- }
+ {
+ return mulColDenseSpecialized (VD, w, A, v, typename VectorTraits<typename Matrix::Column>::VectorCategory ());
+ }
private:
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const;
+ VectorCategories::DenseVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
+
+ uint64_t t;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+
+ for (j = v.begin (); j != v.end (); ++j, ++i) {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l) {
+ t = ((uint64_t) *k) * ((uint64_t) *j);
+
+ *l += t;
+
+ if (*l < t)
+ *l += VD.faxpy()._two_64;
+ }
+ }
+
+ typename Vector1::iterator w_j;
+ typedef typename Vector1::value_type element;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = (element)(*l % VD.field ().characteristic());
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const;
+ VectorCategories::SparseSequenceVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
+
+ uint64_t t;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t) w.size (), 0);
+
+ for (j = v.begin (); j != v.end (); ++j, ++i) {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l) {
+ t = ((uint64_t) k->second) * ((uint64_t) *j);
+
+ _tmp[k->first] += t;
+
+ if (_tmp[k->first] < t)
+ _tmp[k->first] += VD.faxpy()._two_64;
+ }
+ }
+
+ typename Vector1::iterator w_j;
+ typedef typename Vector1::value_type val_t;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = val_t(*l % VD.field ().characteristic());
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const;
+ VectorCategories::SparseAssociativeVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::const_iterator k;
+ std::vector<uint64_t>::iterator l;
+
+ uint64_t t;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+
+ for (j = v.begin (); j != v.end (); ++j, ++i) {
+ for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l) {
+ t = ((uint64_t) k->second) * ((uint64_t) *j);
+
+ _tmp[k->first] += t;
+
+ if (_tmp[k->first] < t)
+ _tmp[k->first] += VD.faxpy()._two_64;
+ }
+ }
+
+ typename Vector1::iterator w_j;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = (uint32_t) (uint32_t)*l % VD.field ().characteristic();
+
+ return w;
+ }
+
template <class Vector1, class Matrix, class Vector2>
Vector1 &mulColDenseSpecialized
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const;
+ VectorCategories::SparseParallelVectorTag) const
+ {
+ linbox_check (A.coldim () == v.size ());
+ linbox_check (A.rowdim () == w.size ());
+
+ typename Matrix::ConstColIterator i = A.colBegin ();
+ typename Vector2::const_iterator j;
+ typename Matrix::Column::first_type::const_iterator k_idx;
+ typename Matrix::Column::second_type::const_iterator k_elt;
+ std::vector<uint64_t>::iterator l;
+
+ uint64_t t;
+
+ if (_tmp.size () < w.size ())
+ _tmp.resize (w.size ());
+
+ std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
+
+ for (j = v.begin (); j != v.end (); ++j, ++i) {
+ for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
+ k_idx != i->first.end ();
+ ++k_idx, ++k_elt, ++l)
+ {
+ t = ((uint64_t) *k_elt) * ((uint64_t) *j);
+
+ _tmp[*k_idx] += t;
+
+ if (_tmp[*k_idx] < t)
+ _tmp[*k_idx] += VD.faxpy()._two_64;
+ }
+ }
+
+ typename Vector1::iterator w_j;
+ typedef typename Vector1::value_type val_t;
+
+ for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
+ *w_j = val_t(*l % VD.field ().characteristic());
+
+ return w;
+ }
+
mutable std::vector<uint64_t> _tmp;
};
@@ -522,9 +1160,8 @@ namespace LinBox { /* uint64_t */
template<class Field>
class MVProductDomain;
- /*! Specialization of FieldAXPY for unsigned short modular field */
+ /*! Specialization of FieldAXPY for unsigned short modular field */
- template <>
template<typename Compute_t>
class FieldAXPY<Givaro::Modular<uint64_t,Compute_t> > {
public:
@@ -533,61 +1170,61 @@ namespace LinBox { /* uint64_t */
typedef Givaro::Modular<uint64_t,Compute_t> Field;
FieldAXPY (const Field &F) :
- _field (&F), _y(0)
- {
- _two_64 = (uint64_t(1) << 32) % uint64_t(F.characteristic());
- _two_64 = (_two_64 * _two_64) % uint64_t(F.characteristic());
- }
+ _field (&F), _y(0)
+ {
+ _two_64 = (uint64_t(1) << 32) % uint64_t(F.characteristic());
+ _two_64 = (_two_64 * _two_64) % uint64_t(F.characteristic());
+ }
FieldAXPY (const FieldAXPY &faxpy) :
- _two_64 (faxpy._two_64), _field (faxpy._field), _y (0)
- {}
+ _two_64 (faxpy._two_64), _field (faxpy._field), _y (0)
+ {}
FieldAXPY<Field > &operator = (const FieldAXPY &faxpy)
- {
- _field = faxpy._field;
- _y = faxpy._y;
- return *this;
- }
+ {
+ _field = faxpy._field;
+ _y = faxpy._y;
+ return *this;
+ }
inline uint64_t& mulacc (const Element &a, const Element &x)
- {
- uint64_t t = (uint64_t) a * (uint64_t) x;
- _y += t;
+ {
+ uint64_t t = (uint64_t) a * (uint64_t) x;
+ _y += t;
- if (_y < t)
- return _y += _two_64;
- else
- return _y;
- }
+ if (_y < t)
+ return _y += _two_64;
+ else
+ return _y;
+ }
inline uint64_t& accumulate (const Element &t)
- {
- _y += t;
+ {
+ _y += t;
- if (_y < t)
- return _y += _two_64;
- else
- return _y;
- }
+ if (_y < t)
+ return _y += _two_64;
+ else
+ return _y;
+ }
inline uint64_t& accumulate_special (const Element &t)
- {
- return _y += t;
- }
+ {
+ return _y += t;
+ }
inline Element &get (Element &y) const
- {
- const_cast<FieldAXPY<Field>*>(this)->_y %= (uint64_t) field().characteristic();
- //if ((int64_t) _y < 0) const_cast<FieldAXPY<Field>*>(this)->_y += field().characteristic();
- return y = (uint64_t) _y;
- }
+ {
+ const_cast<FieldAXPY<Field>*>(this)->_y %= (uint64_t) field().characteristic();
+ //if ((int64_t) _y < 0) const_cast<FieldAXPY<Field>*>(this)->_y += field().characteristic();
+ return y = (uint64_t) _y;
+ }
inline FieldAXPY &assign (const Element y)
- {
- _y = y;
- return *this;
- }
+ {
+ _y = y;
+ return *this;
+ }
inline void reset() {
_y = 0;
@@ -605,12 +1242,11 @@ namespace LinBox { /* uint64_t */
uint64_t _y;
};
- //! Specialization of DotProductDomain for uint64_t modular field
+ //! Specialization of DotProductDomain for uint64_t modular field
- template <>
template <typename Compute_t>
class DotProductDomain<Givaro::Modular<uint64_t,Compute_t>> : public VectorDomainBase<Givaro::Modular<uint64_t,Compute_t> > {
- public:
+ public:
typedef uint64_t Element;
typedef Givaro::Modular<uint64_t,Compute_t> Field;
@@ -622,19 +1258,60 @@ namespace LinBox { /* uint64_t */
using VectorDomainBase<Field >::field;
using VectorDomainBase<Field >::faxpy;
- protected:
+ protected:
template <class Vector1, class Vector2>
- inline Element &dotSpecializedDD (Element &res, const Vector1 &v1, const Vector2 &v2) const;
-
+ inline Element &dotSpecializedDD (Element &res, const Vector1 &v1, const Vector2 &v2) const
+ {
+
+ typename Vector1::const_iterator i;
+ typename Vector2::const_iterator j;
+
+ uint64_t y = 0;
+ uint64_t t;
+
+ for (i = v1.begin (), j = v2.begin (); i < v1.end (); ++i, ++j)
+ {
+ t = ( (uint64_t) *i ) * ( (uint64_t) *j );
+ y += t;
+
+ if (y < t)
+ y += faxpy()._two_64;
+ }
+
+ y %= (uint64_t) field().characteristic();
+ return res = (Element)y;
+
+ }
template <class Vector1, class Vector2>
- inline Element &dotSpecializedDSP (Element &res, const Vector1 &v1, const Vector2 &v2) const;
+ inline Element &dotSpecializedDSP (Element &res, const Vector1 &v1, const Vector2 &v2) const
+ {
+ typename Vector1::first_type::const_iterator i_idx;
+ typename Vector1::second_type::const_iterator i_elt;
+
+ uint64_t y = 0;
+ uint64_t t;
+
+ for (i_idx = v1.first.begin (), i_elt = v1.second.begin (); i_idx != v1.first.end (); ++i_idx, ++i_elt)
+ {
+ t = ( (uint64_t) *i_elt ) * ( (uint64_t) v2[*i_idx] );
+ y += t;
+
+ if (y < t)
+ y += faxpy()._two_64;
+ }
+
+
+ y %= (uint64_t) field().characteristic();
+
+ return res = (Element) y;
+ }
+
};
- //! Specialization of MVProductDomain for uint64_t modular field
+ //! Specialization of MVProductDomain for uint64_t modular field
- template <>
template <typename Compute_t>
class MVProductDomain<Givaro::Modular<uint64_t,Compute_t> > {
public:
@@ -646,9 +1323,9 @@ namespace LinBox { /* uint64_t */
template <class Vector1, class Matrix, class Vector2>
inline Vector1 &mulColDense
(const VectorDomain<Field > &VD, Vector1 &w, const Matrix &A, const Vector2 &v) const
- {
- return mulColDenseSpecialized (VD, w, A, v, typename VectorTraits<typename Matrix::Column>::VectorCategory ());
- }
+ {
+ return mulColDenseSpecialized (VD, w, A, v, typename VectorTraits<typename Matrix::Column>::VectorCategory ());
+ }
private:
template <class Vector1, class Matrix, class Vector2>
@@ -673,14 +1350,13 @@ namespace LinBox { /* uint64_t */
}
-#include "linbox/ring/modular/modular-unsigned.inl"
#endif // __LINBOX_field_modular_unsigned_H
// Local Variables:
// mode: C++
-// tab-width: 8
+// tab-width: 4
// indent-tabs-mode: nil
-// c-basic-offset: 8
+// c-basic-offset: 4
// End:
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
+// vim:sts=4:sw=4:ts=4:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
diff --git a/linbox/ring/modular/modular-unsigned.inl b/linbox/ring/modular/modular-unsigned.inl
deleted file mode 100755
index d50ad34..0000000
--- a/linbox/ring/modular/modular-unsigned.inl
+++ /dev/null
@@ -1,828 +0,0 @@
-/* linbox/field/modular.inl
- * Copyright (C) 2002 Bradford Hovinen
- * Copyright (C) 2002 Ahmet Duran
- * Copyright (C) 2002 B. David Saunders
- *
- * Written by Bradford Hovinen <hovinen at cis.udel.edu>,
- * Ahmet Duran <duran at cis.udel.edu>,
- * Dave Saunders <saunders at cis.udel.edu>
- *
- * ------------------------------------
- *
- *
- * ========LICENCE========
- * This file is part of the library LinBox.
- *
- * LinBox is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- * ========LICENCE========
- *.
- */
-
-#ifndef __LINBOX_field_modular_INL
-#define __LINBOX_field_modular_INL
-
-//Dan Roche 7-2-04
-#ifndef __LINBOX_MIN
-#define __LINBOX_MIN(a,b) ( (a) < (b) ? (a) : (b) )
-#endif
-
-#include <iostream>
-
-namespace LinBox {
-
- template<typename Compute_t>
- template <class Vector1, class Vector2>
- inline uint8_t &DotProductDomain<Givaro::Modular<uint8_t,Compute_t> >::dotSpecializedDD
- (uint8_t &res, const Vector1 &v1, const Vector2 &v2) const
- {
- typename Vector1::const_iterator i = v1.begin ();
- typename Vector2::const_iterator j = v2.begin ();
-
- typename Vector1::const_iterator iterend = v1.begin () + (ptrdiff_t)(v1.size() % faxpy()._k);
-
- uint64_t y = 0;
-
- for (; i != iterend; ++i, ++j)
- y += (uint64_t) *i * (uint64_t) *j;
-
- y %= (uint64_t) field().characteristic();
-
- for (; iterend != v1.end (); j += (ptrdiff_t)faxpy()._k) {
- typename Vector1::const_iterator iter_i = iterend;
- typename Vector2::const_iterator iter_j;
-
- iterend += (ptrdiff_t)faxpy()._k;
-
- for (iter_j = j; iter_i != iterend; ++iter_i, ++iter_j)
- y += (uint64_t) *iter_i * (uint64_t) *j;
-
- y %= (uint64_t) field().characteristic();
- }
-
- return res = (uint8_t) y;
- }
-
- template<typename Compute_t>
- template <class Vector1, class Vector2>
- inline uint8_t &DotProductDomain<Givaro::Modular<uint8_t,Compute_t> >::dotSpecializedDSP
- (uint8_t &res, const Vector1 &v1, const Vector2 &v2) const
- {
- typename Vector1::first_type::const_iterator i_idx = v1.first.begin ();
- typename Vector1::second_type::const_iterator i_elt = v1.second.begin ();
-
- uint64_t y = 0;
-
- if (v1.first.size () < faxpy()._k) {
- for (; i_idx != v1.first.end (); ++i_idx, ++i_elt)
- y += (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
-
- return res = uint8_t (y % (uint64_t) field().characteristic());
- }
- else {
- typename Vector1::first_type::const_iterator iterend = v1.first.begin () +(ptrdiff_t)( v1.first.size() % faxpy()._k);
-
- for (; i_idx != iterend; ++i_idx, ++i_elt)
- y += (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
-
- y %= (uint64_t) field().characteristic();
-
- while (iterend != v1.first.end ()) {
- typename Vector1::first_type::const_iterator iter_i_idx = iterend;
- typename Vector1::second_type::const_iterator iter_i_elt = i_elt;
-
- iterend += (ptrdiff_t)faxpy()._k;
- i_elt += (ptrdiff_t)faxpy()._k;
-
- for (; iter_i_idx != iterend; ++iter_i_idx, ++iter_i_elt)
- y += (uint64_t) *iter_i_elt * (uint64_t) v2[*iter_i_idx];
-
- y %= (uint64_t) field().characteristic();
- }
-
- return res = (uint8_t) y;
- }
- }
-
- template<typename Compute_t>
- template <class Vector1, class Vector2>
- inline uint16_t &DotProductDomain<Givaro::Modular<uint16_t,Compute_t> >::dotSpecializedDD
- (uint16_t &res, const Vector1 &v1, const Vector2 &v2) const
- {
- typename Vector1::const_iterator i = v1.begin ();
- typename Vector2::const_iterator j = v2.begin ();
-
- typename Vector1::const_iterator iterend = v1.begin () + (ptrdiff_t)(v1.size() % faxpy()._k);
-
- uint64_t y = 0;
-
- for (; i != iterend; ++i, ++j)
- y += (uint64_t) *i * (uint64_t) *j;
-
- y %= (uint64_t) field().characteristic();
-
- for (; iterend != v1.end (); j += faxpy()._k) {
- typename Vector1::const_iterator iter_i = iterend;
- typename Vector2::const_iterator iter_j;
-
- iterend += faxpy()._k;
-
- for (iter_j = j; iter_i != iterend; ++iter_i, ++iter_j)
- y += (uint64_t) *iter_i * (uint64_t) *j;
-
- y %= (uint64_t) field().characteristic();
- }
-
- return res = (uint16_t) y;
- }
-
- template<typename Compute_t> template <class Vector1, class Vector2>
- inline uint16_t &DotProductDomain<Givaro::Modular<uint16_t,Compute_t> >::dotSpecializedDSP
- (uint16_t &res, const Vector1 &v1, const Vector2 &v2) const
- {
- typename Vector1::first_type::const_iterator i_idx = v1.first.begin ();
- typename Vector1::second_type::const_iterator i_elt = v1.second.begin ();
-
- uint64_t y = 0;
-
- if (v1.first.size () < faxpy()._k) {
- for (; i_idx != v1.first.end (); ++i_idx, ++i_elt)
- y += (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
-
- return res = (uint16_t) (y % (uint64_t) field().characteristic());
- }
- else {
- typename Vector1::first_type::const_iterator iterend = v1.first.begin () +(ptrdiff_t)( v1.first.size() % faxpy()._k );
-
- for (; i_idx != iterend; ++i_idx, ++i_elt)
- y += (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
-
- y %= (uint64_t) field().characteristic();
-
- while (iterend != v1.first.end ()) {
- typename Vector1::first_type::const_iterator iter_i_idx = iterend;
- typename Vector1::second_type::const_iterator iter_i_elt = i_elt;
-
- iterend += faxpy()._k;
- i_elt += faxpy()._k;
-
- for (; iter_i_idx != iterend; ++iter_i_idx, ++iter_i_elt)
- y += (uint64_t) *iter_i_elt * (uint64_t) v2[*iter_i_idx];
-
- y %= (uint64_t) field().characteristic();
- }
-
- return res = (Element) y;
- }
- }
-
- template<typename Compute_t> template <class Vector1, class Vector2>
- inline uint32_t &DotProductDomain<Givaro::Modular<uint32_t,Compute_t> >::dotSpecializedDD
- (uint32_t &res, const Vector1 &v1, const Vector2 &v2) const
- {
- typename Vector1::const_iterator i;
- typename Vector2::const_iterator j;
-
- uint64_t y = 0;
- uint64_t t;
-
- for (i = v1.begin (), j = v2.begin (); i < v1.end (); ++i, ++j) {
- t = (uint64_t) *i * (uint64_t) *j;
- y += t;
-
- if (y < t)
- y += faxpy()._two_64;
- }
-
- y %= (uint64_t) field().characteristic();
-
- return res = (uint32_t) y;
- }
-
- template<typename Compute_t> template <class Vector1, class Vector2>
- inline uint32_t &DotProductDomain<Givaro::Modular<uint32_t,Compute_t> >::dotSpecializedDSP
- (uint32_t &res, const Vector1 &v1, const Vector2 &v2) const
- {
- typename Vector1::first_type::const_iterator i_idx;
- typename Vector1::second_type::const_iterator i_elt;
-
- uint64_t y = 0;
- uint64_t t = 0;
-
- for (i_idx = v1.first.begin (), i_elt = v1.second.begin (); i_idx != v1.first.end (); ++i_idx, ++i_elt) {
- t = (uint64_t) *i_elt * (uint64_t) v2[*i_idx];
- y += t;
- if (y < t)
- y += faxpy()._two_64;
- }
-
- y %= (uint64_t) field().characteristic();
-
- return res = (uint32_t)y;
- }
-
- template<typename Compute_t> template <class Vector1, class Vector2>
- inline uint64_t &DotProductDomain<Givaro::Modular<uint64_t,Compute_t> >::dotSpecializedDD
- (uint64_t &res, const Vector1 &v1, const Vector2 &v2) const
- {
-
- typename Vector1::const_iterator i;
- typename Vector2::const_iterator j;
-
- uint64_t y = 0;
- uint64_t t;
-
- for (i = v1.begin (), j = v2.begin (); i < v1.end (); ++i, ++j)
- {
- t = ( (uint64_t) *i ) * ( (uint64_t) *j );
- y += t;
-
- if (y < t)
- y += faxpy()._two_64;
- }
-
- y %= (uint64_t) field().characteristic();
- return res = (Element)y;
-
- }
-
-
- template<typename Compute_t> template <class Vector1, class Vector2>
- inline uint64_t &DotProductDomain<Givaro::Modular<uint64_t,Compute_t> >::dotSpecializedDSP
- (uint64_t &res, const Vector1 &v1, const Vector2 &v2) const
- {
- typename Vector1::first_type::const_iterator i_idx;
- typename Vector1::second_type::const_iterator i_elt;
-
- uint64_t y = 0;
- uint64_t t;
-
- for (i_idx = v1.first.begin (), i_elt = v1.second.begin (); i_idx != v1.first.end (); ++i_idx, ++i_elt)
- {
- t = ( (uint64_t) *i_elt ) * ( (uint64_t) v2[*i_idx] );
- y += t;
-
- if (y < t)
- y += faxpy()._two_64;
- }
-
-
- y %= (uint64_t) field().characteristic();
-
- return res = (Element) y;
- }
-
-
-
-
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint8_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint8_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j, j_end;
- typename Matrix::Column::const_iterator k;
- std::vector<uint32_t>::iterator l, l_end;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t)w.size (), 0);
-
- l_end = _tmp.begin () +(ptrdiff_t) w.size ();
-
- do {
- j = v.begin ();
- j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
-
- for (; j != j_end; ++j, ++i)
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- *l += *k * *j;
-
- j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
-
- for (l =_tmp.begin (); l != l_end; ++l)
- *l %= VD.field ().characteristic();
-
- } while (j_end != v.end ());
-
- typename Vector1::iterator w_j;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l;
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint8_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint8_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j, j_end;
- typename Matrix::Column::const_iterator k;
- std::vector<uint32_t>::iterator l, l_end;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t)w.size (), 0);
-
- l_end = _tmp.begin () + (ptrdiff_t)w.size ();
-
-
- do {
- j = v.begin ();
- j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
-
- for (; j != j_end; ++j, ++i)
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- _tmp[k->first] += k->second * *j;
-
- j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
-
- for (l =_tmp.begin (); l != l_end; ++l)
- *l %= VD.field ().characteristic();
-
- } while (j_end != v.end ());
-
- typename Vector1::iterator w_j;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l;
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint8_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint8_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j, j_end;
- typename Matrix::Column::const_iterator k;
- std::vector<uint32_t>::iterator l, l_end;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t)w.size (), 0);
-
- l_end = _tmp.begin () +(ptrdiff_t) w.size ();
-
- do {
- j = v.begin ();
- j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
-
- for (; j != j_end; ++j, ++i)
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- _tmp[k->first] += k->second * *j;
-
- j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
-
- for (l =_tmp.begin (); l != l_end; ++l)
- *l %= VD.field ().characteristic();
-
- } while (j_end != v.end ());
-
- typename Vector1::iterator w_j;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l;
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint8_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint8_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j, j_end;
- typename Matrix::Column::first_type::const_iterator k_idx;
- typename Matrix::Column::second_type::const_iterator k_elt;
- std::vector<uint32_t>::iterator l, l_end;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t)w.size (), 0);
-
- l_end = _tmp.begin () + (ptrdiff_t)w.size ();
-
- do {
- j = v.begin ();
- j_end = j + (ptrdiff_t)__LINBOX_MIN (uint64_t (A.coldim ()), VD.faxpy()._k);
-
- for (; j != j_end; ++j, ++i)
- for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
- k_idx != i->first.end ();
- ++k_idx, ++k_elt, ++l)
- _tmp[*k_idx] += *k_elt * *j;
-
- j_end += (ptrdiff_t) __LINBOX_MIN (uint64_t (A.coldim () - (size_t)(j_end - v.begin ())), VD.faxpy()._k);
-
- for (l =_tmp.begin (); l != l_end; ++l)
- *l %= VD.field ().characteristic();
-
- } while (j_end != v.end ());
-
- typename Vector1::iterator w_j;
- typedef typename Vector1::value_type val_t ;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = (val_t) *l;
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint16_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint16_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j = v.begin (), j_end;
- typename Matrix::Column::const_iterator k;
- // Dan Roche, 7-1-04
- // std::vector<uint32_t>::iterator l, l_end;
- std::vector<uint64_t>::iterator l, l_end;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
-
- l_end = _tmp.begin () +(ptrdiff_t) w.size ();
-
- do {
- j = v.begin ();
- j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
-
- for (; j != j_end; ++j, ++i)
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- *l += *k * *j;
-
- j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
-
- for (l =_tmp.begin (); l != l_end; ++l)
- *l %= VD.field ().characteristic();
-
- } while (j_end != v.end ());
-
- typename Vector1::iterator w_j;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l;
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint16_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint16_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j, j_end;
- typename Matrix::Column::const_iterator k;
- // Dan Roche, 7-1-04
- // std::vector<uint32_t>::iterator l, l_end;
- std::vector<uint64_t>::iterator l, l_end;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
-
- l_end = _tmp.begin () +(ptrdiff_t) w.size ();
-
- do {
- j = v.begin ();
- j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
-
- for (; j != j_end; ++j, ++i)
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- _tmp[k->first] += k->second * *j;
-
- j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
-
- for (l =_tmp.begin (); l != l_end; ++l)
- *l %= VD.field ().characteristic();
-
- } while (j_end != v.end ());
-
- typename Vector1::iterator w_j;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l;
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint16_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint16_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j, j_end;
- typename Matrix::Column::const_iterator k;
- // Dan Roche, 7-1-04
- // std::vector<uint32_t>::iterator l, l_end;
- std::vector<uint64_t>::iterator l, l_end;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
-
- l_end = _tmp.begin () +(ptrdiff_t) w.size ();
-
- do {
- j = v.begin ();
- j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
-
- for (; j != j_end; ++j, ++i)
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l)
- _tmp[k->first] += k->second * *j;
-
- j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
-
- for (l =_tmp.begin (); l != l_end; ++l)
- *l %= VD.field ().characteristic();
-
- } while (j_end != v.end ());
-
- typename Vector1::iterator w_j;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l;
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint16_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint16_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j, j_end;
- typename Matrix::Column::first_type::const_iterator k_idx;
- typename Matrix::Column::second_type::const_iterator k_elt;
- // Dan Roche, 7-1-04
- // std::vector<uint32_t>::iterator l, l_end;
- std::vector<uint64_t>::iterator l, l_end;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
-
- l_end = _tmp.begin () +(ptrdiff_t) w.size ();
-
- do {
- j = v.begin ();
- //Dan Roche, 7-2-04
- //j_end = j + __LINBOX_MIN (A->coldim (), VD.faxpy()._k);
- j_end = j + __LINBOX_MIN (A.coldim (), VD.faxpy()._k);
-
- for (; j != j_end; ++j, ++i)
- for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
- k_idx != i->first.end ();
- ++k_idx, ++k_elt, ++l)
- _tmp[*k_idx] += *k_elt * *j;
-
- //j_end += __LINBOX_MIN (A->coldim () - (j_end - v.begin ()), VD.faxpy()._k);
- j_end += __LINBOX_MIN (A.coldim () - (j_end - v.begin ()), VD.faxpy()._k);
-
- for (l =_tmp.begin (); l != l_end; ++l)
- *l %= VD.field ().characteristic();
-
- } while (j_end != v.end ());
-
- typename Vector1::iterator w_j;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = *l;
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint32_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint32_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::DenseVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
-
- uint64_t t;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
-
- for (j = v.begin (); j != v.end (); ++j, ++i) {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l) {
- t = ((uint64_t) *k) * ((uint64_t) *j);
-
- *l += t;
-
- if (*l < t)
- *l += VD.faxpy()._two_64;
- }
- }
-
- typename Vector1::iterator w_j;
- typedef typename Vector1::value_type element;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = (element)(*l % VD.field ().characteristic());
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint32_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint32_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseSequenceVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
-
- uint64_t t;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () + (ptrdiff_t) w.size (), 0);
-
- for (j = v.begin (); j != v.end (); ++j, ++i) {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l) {
- t = ((uint64_t) k->second) * ((uint64_t) *j);
-
- _tmp[k->first] += t;
-
- if (_tmp[k->first] < t)
- _tmp[k->first] += VD.faxpy()._two_64;
- }
- }
-
- typename Vector1::iterator w_j;
- typedef typename Vector1::value_type val_t;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = val_t(*l % VD.field ().characteristic());
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint32_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint32_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseAssociativeVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::const_iterator k;
- std::vector<uint64_t>::iterator l;
-
- uint64_t t;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
-
- for (j = v.begin (); j != v.end (); ++j, ++i) {
- for (k = i->begin (), l = _tmp.begin (); k != i->end (); ++k, ++l) {
- t = ((uint64_t) k->second) * ((uint64_t) *j);
-
- _tmp[k->first] += t;
-
- if (_tmp[k->first] < t)
- _tmp[k->first] += VD.faxpy()._two_64;
- }
- }
-
- typename Vector1::iterator w_j;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = (uint32_t) (uint32_t)*l % VD.field ().characteristic();
-
- return w;
- }
-
- template<typename Compute_t> template <class Vector1, class Matrix, class Vector2>
- Vector1 &MVProductDomain<Givaro::Modular<uint32_t,Compute_t> >::mulColDenseSpecialized
- (const VectorDomain<Givaro::Modular<uint32_t,Compute_t> > &VD, Vector1 &w, const Matrix &A, const Vector2 &v,
- VectorCategories::SparseParallelVectorTag) const
- {
- linbox_check (A.coldim () == v.size ());
- linbox_check (A.rowdim () == w.size ());
-
- typename Matrix::ConstColIterator i = A.colBegin ();
- typename Vector2::const_iterator j;
- typename Matrix::Column::first_type::const_iterator k_idx;
- typename Matrix::Column::second_type::const_iterator k_elt;
- std::vector<uint64_t>::iterator l;
-
- uint64_t t;
-
- if (_tmp.size () < w.size ())
- _tmp.resize (w.size ());
-
- std::fill (_tmp.begin (), _tmp.begin () +(ptrdiff_t) w.size (), 0);
-
- for (j = v.begin (); j != v.end (); ++j, ++i) {
- for (k_idx = i->first.begin (), k_elt = i->second.begin (), l = _tmp.begin ();
- k_idx != i->first.end ();
- ++k_idx, ++k_elt, ++l)
- {
- t = ((uint64_t) *k_elt) * ((uint64_t) *j);
-
- _tmp[*k_idx] += t;
-
- if (_tmp[*k_idx] < t)
- _tmp[*k_idx] += VD.faxpy()._two_64;
- }
- }
-
- typename Vector1::iterator w_j;
- typedef typename Vector1::value_type val_t;
-
- for (w_j = w.begin (), l = _tmp.begin (); w_j != w.end (); ++w_j, ++l)
- *w_j = val_t(*l % VD.field ().characteristic());
-
- return w;
- }
-
-}
-
-#endif // __LINBOX_field_modular_INL
-
-
-// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,:0,t0,+0,=s
-// Local Variables:
-// mode: C++
-// tab-width: 8
-// indent-tabs-mode: nil
-// c-basic-offset: 8
-// End:
-
diff --git a/linbox/ring/ntl/ntl-gf2e.h b/linbox/ring/ntl/ntl-gf2e.h
index 77b0457..67e6fd3 100644
--- a/linbox/ring/ntl/ntl-gf2e.h
+++ b/linbox/ring/ntl/ntl-gf2e.h
@@ -128,7 +128,7 @@ namespace LinBox
const Element zero,one,mOne ;
- NTL_GF2E (const integer &p, const integer &k) :
+ NTL_GF2E (const integer &p, const int32_t &k) :
NTL_GF2E_Initialiser(p,k),Father_t ()
,zero( NTL::to_GF2E(0)),one( NTL::to_GF2E(1)),mOne(-one)
{ }
diff --git a/linbox/ring/ntl/ntl-lzz_pex.h b/linbox/ring/ntl/ntl-lzz_pex.h
index a34fef5..d047f53 100644
--- a/linbox/ring/ntl/ntl-lzz_pex.h
+++ b/linbox/ring/ntl/ntl-lzz_pex.h
@@ -92,7 +92,7 @@ namespace LinBox
/** Standard LinBox field constructor. The paramters here
* (prime, exponent) are only used to initialize the coefficient field.
*/
- NTL_zz_pEX( const integer& p, size_t e = 1 ) :
+ NTL_zz_pEX( const integer& p, int32_t e = 1 ) :
// Givaro::ZRing<NTL::zz_pEX>(p, e), _CField(p,e)
NTL_zz_pEX_Initialiser(p,e),Father_t ()
, zero( NTL::to_zz_pEX(0)),one( NTL::to_zz_pEX(1)),mOne(-one)
diff --git a/linbox/ring/ntl/ntl-zz_p.h b/linbox/ring/ntl/ntl-zz_p.h
index dda2dc6..b1d3067 100644
--- a/linbox/ring/ntl/ntl-zz_p.h
+++ b/linbox/ring/ntl/ntl-zz_p.h
@@ -81,7 +81,7 @@ namespace Givaro
x = 0;
for (ptrdiff_t i = 0; i < nb; i++) {
- x += Integer( txt[i] )<<(8*i) ;
+ x += Integer( txt[i] )<< int32_t(8*i) ;
}
delete [] txt;
return x;
@@ -283,7 +283,7 @@ namespace LinBox
x = 0;
for (ptrdiff_t i = 0; i < nb; i++) {
- x += LinBox::integer( txt[i] )<<(8*i) ;
+ x += LinBox::integer( txt[i] )<<int32_t(8*i) ;
}
delete [] txt;
return x;
diff --git a/linbox/ring/ntl/ntl-zz_pe.h b/linbox/ring/ntl/ntl-zz_pe.h
index f61d9a7..a540d3e 100644
--- a/linbox/ring/ntl/ntl-zz_pe.h
+++ b/linbox/ring/ntl/ntl-zz_pe.h
@@ -139,7 +139,7 @@ namespace LinBox
const Element zero,one,mOne ;
- NTL_ZZ_pE (const integer &p, const integer &k) :
+ NTL_ZZ_pE (const integer &p, const int32_t &k) :
NTL_ZZ_pE_Initialiser(p,k),Father_t ()
,zero( NTL::to_ZZ_pE(0)),one( NTL::to_ZZ_pE(1)),mOne(-one)
@@ -370,8 +370,8 @@ namespace LinBox
public:
typedef NTL::ZZ_pE Element;
UnparametricRandIter<NTL::ZZ_pE>(const NTL_ZZ_pE & F ,
- const integer& size =0,
- const integer& seed =0
+ const int32_t& size =0,
+ const int32_t& seed =0
) :
_size(size), _seed(seed), _ring(F)
{
diff --git a/linbox/ring/pir-modular-int32.h b/linbox/ring/pir-modular-int32.h
index 4b7d4d3..002d1f2 100644
--- a/linbox/ring/pir-modular-int32.h
+++ b/linbox/ring/pir-modular-int32.h
@@ -26,6 +26,8 @@
#define __LINBOX_pir_modular_int32_H
#include <givaro/modular-int32.h>
+//#include <linbox/util/debug.h>
+#include <linbox/vector/vector-domain.h>
//#include "linbox/ring/modular.h"
#ifndef LINBOX_MAX_INT
diff --git a/linbox/ring/pir-ntl-zz_p.h b/linbox/ring/pir-ntl-zz_p.h
index c4bca76..673832e 100644
--- a/linbox/ring/pir-ntl-zz_p.h
+++ b/linbox/ring/pir-ntl-zz_p.h
@@ -643,7 +643,7 @@ namespace LinBox
class DotProductDomain;
template <>
- class DotProductDomain<PIR_ntl_ZZ_p> : private VectorDomainBase<PIR_ntl_ZZ_p> {
+ class DotProductDomain<PIR_ntl_ZZ_p> : public VectorDomainBase<PIR_ntl_ZZ_p> {
public:
typedef PIR_ntl_ZZ_p::Element Element;
diff --git a/linbox/solutions/smith-form.h b/linbox/solutions/smith-form.h
index 94d5a05..9b62d20 100644
--- a/linbox/solutions/smith-form.h
+++ b/linbox/solutions/smith-form.h
@@ -39,25 +39,32 @@
namespace LinBox
{
- //! no doc.
- template<class I1, class Lp>
- void distinct (I1 a, I1 b, Lp& c)
+
+ // EC: pair(e,c) denotes c repetitions of element e.
+#define EC(Elt) std::pair<typename Elt, size_t>
+ // EC_LIST: list of such pairs, compact form of invariant list.
+#define EC_LIST(Elt) std::list<EC(Elt) >
+ // Convert from vector of invariants (with repeats) to EC_LIST form.
+ template<class Ring>
+ EC_LIST(Ring::Element) &
+ distinct(EC_LIST(Ring::Element) & c, const BlasVector<Ring>& v)
{
- typename iterator_traits<I1>::value_type e;
+ typename Ring::Element e;
size_t count = 0;
- if (a != b) {e = *a; ++a; count = 1;}
- else return;
- while (a != b)
- { if (*a == e)
- ++count;
+ const Ring& R = v.field();
+ size_t n = v.size();
+ if (n > 0) R.assign(e, v[0]); else return c;
+ count = 1;
+ for (size_t i = 1; i < v.size(); ++i)
+ { if (R.areEqual(v[i], e))
+ ++count;
else
- { c.push_back(typename Lp::value_type(e, count));
- e = *a; count = 1;
+ { c.push_back(EC(Ring::Element)(e, count));
+ R.assign(e, v[i]); count = 1;
}
- ++a;
}
- c.push_back(typename Lp::value_type(e, count));
- return;
+ c.push_back(EC(Ring::Element)(e, count));
+ return c;
}
@@ -75,14 +82,37 @@ namespace LinBox
For now see the examples/smith.C
for ways to call other smith form algorithms.
*/
- template <class Output, class Blackbox, class MyMethod>
- Output &smithForm(Output & S,
- const Blackbox &A,
- const MyMethod &M)
+ /*
+ BB has to be dense matrix
+ PL means EC_list (list of value repcount pairs)
+ VL means diag of smith form as a BlasVector.
+ SNF function forms:
+ template<BB> smithForm(PL, BB) -> add Hybrid
+ template<BB> smithForm(VL, BB) -> add Hybrid
+ template<BB,Meth> smithForm(PL, BB, Meth) -> add IntegerTag
+ template<BB,Meth> smithForm(VL, BB, Meth) -> add IntegerTag
+ smithForm(PL, BB, IntegerTag, Hybrid) -> call adaptive
+ smithForm(VL, BB, IntegerTag, Hybrid) -> call adaptive
+ */
+
+ template <class Blackbox, class Method>
+ EC_LIST(Blackbox::Field::Element) &
+ smithForm(EC_LIST(Blackbox::Field::Element) & S,
+ const Blackbox & A,
+ const Method & M)
{
smithForm(S, A, typename FieldTraits<typename Blackbox::Field>::categoryTag(), M);
return S;
}
+ template <class Blackbox, class Method>
+ BlasVector<typename Blackbox::Field> &
+ smithForm(BlasVector<typename Blackbox::Field> & V,
+ const Blackbox & A,
+ const Method & M)
+ {
+ smithForm(V, A, typename FieldTraits<typename Blackbox::Field>::categoryTag(), M);
+ return V;
+ }
#if 0
// for specialization with respect to the DomainCategory
@@ -97,14 +127,22 @@ namespace LinBox
#endif
// The smithForm with default Method
- template<class Output, class Blackbox>
- Output &smithForm(Output& S,
+ template<class Blackbox>
+ EC_LIST(Blackbox::Field::Element) &
+ smithForm(EC_LIST(Blackbox::Field::Element) & S,
const Blackbox& A)
{
-
smithForm(S, A, Method::Hybrid());
return S;
}
+ template<class Blackbox>
+ BlasVector<typename Blackbox::Field> &
+ smithForm(BlasVector<typename Blackbox::Field> & V,
+ const Blackbox& A)
+ {
+ smithForm(V, A, Method::Hybrid());
+ return V;
+ }
#if 0
// The smithForm for ModularTag
@@ -125,7 +163,7 @@ namespace LinBox
}
else
{
- integr x; size_t c;
+ integer x; size_t c;
for(x = p, c = 0; divides(2, x); x /= 2, ++c);
if (x == 1 && c <= 32) // (a low power of 2)
@@ -157,8 +195,8 @@ namespace LinBox
std::list<std::pair<integer, size_t> > &
smithForm(std::list<std::pair<integer, size_t> >& S,
*/
- template<class Output> Output&
- smithForm(Output & S,
+ EC_LIST(Givaro::ZRing<Integer>::Element) &
+ smithForm(EC_LIST(Givaro::ZRing<Integer>::Element) & S,
const BlasMatrix<Givaro::ZRing<Integer> > &A,
const RingCategories::IntegerTag &tag,
const Method::Hybrid & M)
@@ -166,9 +204,18 @@ namespace LinBox
Givaro::ZRing<Integer> Z;
BlasVector<Givaro::ZRing<Integer> > v (Z,A.rowdim() < A.coldim() ? A.rowdim() : A.coldim());
SmithFormAdaptive::smithForm(v, A);
- distinct(v.begin(), v.end(), S);
-
- return S;
+ //distinct(v.begin(), v.end(), S);
+ return distinct(S,v);
+ }
+ BlasVector<typename Givaro::ZRing<Integer> > &
+ smithForm(BlasVector<typename Givaro::ZRing<Integer> > & V,
+ const BlasMatrix<Givaro::ZRing<Integer> > &A,
+ const RingCategories::IntegerTag &tag,
+ const Method::Hybrid & M)
+ {
+ Givaro::ZRing<Integer> Z;
+ SmithFormAdaptive::smithForm(V, A);
+ return V;
}
//#endif
@@ -185,6 +232,8 @@ namespace LinBox
}
#endif
+#undef EC
+#undef EC_LIST
} // end of LinBox namespace
#endif // __LINBOX_smith_form_H
diff --git a/linbox/util/Makefile.am b/linbox/util/Makefile.am
index 7feebe7..32786c9 100644
--- a/linbox/util/Makefile.am
+++ b/linbox/util/Makefile.am
@@ -21,7 +21,7 @@
#we now need to include givaro headers for timer ?
-AM_CPPFLAGS= -I$(top_srcdir)/linbox $(DEPS_CFLAGS)
+AM_CPPFLAGS= -I$(top_srcdir)/linbox $(DEPS_CFLAGS) $(DEFAULT_CFLAGS)
LDADD = $(DEPS_LIBS) $(LDFLAGS)
SUBDIRS=formats
diff --git a/linbox/vector/blas-vector.h b/linbox/vector/blas-vector.h
index 8dcdef5..4ee7675 100644
--- a/linbox/vector/blas-vector.h
+++ b/linbox/vector/blas-vector.h
@@ -198,7 +198,7 @@ namespace LinBox { /* BlasVector */
#if (__GNUC__ == 4 && __GNUC_MINOR__ ==4 && __GNUC_PATCHLEVEL__==5)
BlasVector (const _Field &F, const long &m, const Element e=Element()) :
Father_t(),
- _size((size_t)m),_1stride(1),_rep((size_t)_size, e),_ptr(&_rep[0]),_field(&F)
+ _size((uint32_t)m),_1stride(1),_rep(_size, e),_ptr(&_rep[0]),_field(&F)
{
// Father_t is garbage until then:
setIterators();
@@ -212,7 +212,7 @@ namespace LinBox { /* BlasVector */
#if defined(__APPLE__) || (defined(__s390__) && !defined(__s390x__))
BlasVector (const _Field &F, const unsigned long &m, const Element e=Element()) :
Father_t(),
- _size((size_t)m),_1stride(1),_rep((size_t)_size, e),_ptr(&_rep[0]),_field(&F)
+ _size((uint32_t)m),_1stride(1),_rep(_size, e),_ptr(&_rep[0]),_field(&F)
{
// Father_t is garbage until then:
setIterators();
@@ -273,7 +273,7 @@ namespace LinBox { /* BlasVector */
BlasVector (const _Field &F, const Integer & m, const Element e=Element()) :
Father_t(),
- _size((size_t)m),_1stride(1),_rep((size_t)_size, e),_ptr(&_rep[0]),_field(&F)
+ _size((uint32_t)m),_1stride(1),_rep(_size, e),_ptr(&_rep[0]),_field(&F)
{
// Father_t is garbage until then:
setIterators();
diff --git a/macros/fflas-ffpack-check.m4 b/macros/fflas-ffpack-check.m4
index ec18e85..8ce2f6f 100644
--- a/macros/fflas-ffpack-check.m4
+++ b/macros/fflas-ffpack-check.m4
@@ -58,7 +58,7 @@ dnl FFLAS-FFPACK VERSION dnl
dnl -------------------- dnl
version_min=20200
-version_max=20202
+version_max=20300
dnl Check for existence
@@ -75,7 +75,7 @@ for FFLAS_FFPACK_HOME in ${FFLAS_FFPACK_HOME_PATH}
FFLAS_FFPACK_LIBS=`$FFLAS_FFPACK_HOME/bin/fflas-ffpack-config --libs`
FFLAS_FFPACK_CFLAGS=`$FFLAS_FFPACK_HOME/bin/fflas-ffpack-config --cflags`
- CXXFLAGS="${BACKUP_CXXFLAGS} ${FFLAS_FFPACK_CFLAGS}"
+ CXXFLAGS="${BACKUP_CXXFLAGS} ${FFLAS_FFPACK_CFLAGS} -O2"
LIBS="${BACKUP_LIBS} ${FFLAS_FFPACK_LIBS}"
AC_TRY_LINK(
diff --git a/tests/.gitignore b/tests/.gitignore
index 8158140..174a91a 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -1,4 +1,4 @@
-checker
+machecker
test-bitonic-sort
test-blackbox-block-container
test-blas-domain
diff --git a/tests/jenkins-maker.sh b/tests/jenkins-maker.sh
new file mode 100755
index 0000000..ee3b3d5
--- /dev/null
+++ b/tests/jenkins-maker.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# This file is part of the LinBox library.
+# It is distributed under the terms of the LGPL licence version 2.1 or later
+# (see COPYING)
+# Created by AB - 2014/12/03
+# Modified by AC - 2016/06/20
+# Modified by CP - 2016/06/22
+
+# Some influential environment variables:
+# CXX C++ compiler command
+# CXXFLAGS C++ compiler flags
+
+# Note: This script is intended to be launched
+# by the Jenkins web interface whenever it needs
+# to compile the project.
+# It is launched from the svn:trunk root directory.
+# But should be stored in /<slave_jenkins_path>/makers/
+
+SOURCE_DIRECTORY=$( cd "$( dirname "$0" )" && pwd )
+
+#=============================#
+# Change only these variables #
+#=============================#
+CXX=`pwd | awk -F/ '{print $(NF-2)}'`
+NTL=`pwd | awk -F/ '{print $NF}'`
+JENKINS_DIR=${SOURCE_DIRECTORY%%/workspace/*}
+LOCAL_DIR="$JENKINS_DIR"/local
+# Add path to compilers (if needed)
+export PATH=$PATH:/usr/local/bin:"$LOCAL_DIR/$CXX/bin"
+echo $PATH
+# Add specific locations (if needed)
+export LD_LIBRARY_PATH="$LD_LIBRARY_PATH":/usr/local/lib:"$LOCAL_DIR/$CXX/lib":"$LOCAL_DIR/$CXX/withSSE/lib"
+echo "LD_LIBRARY_PATH = $LD_LIBRARY_PATH"
+export PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:"$LOCAL_DIR/$CXX/lib/pkgconfig":"$LOCAL_DIR/$CXX/withSSE/lib/pkgconfig"
+echo "PKG_CONFIG_PATH = $PKG_CONFIG_PATH"
+
+# Where to install linbox binaries
+# Keep default for local installation.
+PREFIX_INSTALL="$LOCAL_DIR/$CXX"
+
+# Job Linbox with Ntl option flag
+if [ "$NTL" == "withNTL" ]; then
+ LINBOX_NTLFLAG="--with-ntl=$PREFIX_INSTALL"
+fi
+
+# /!\ Warning /!\ This could be an issue if you changed
+# the local installation directory
+rm -rf "$PREFIX_INSTALL"/bin/linbox* "$PREFIX_INSTALL"/include/linbox* "$PREFIX_INSTALL"/lib/liblinbox*
+
+#================#
+# Setup Variables#
+#================#
+
+if [ "$CXX" == "icpc" ]; then
+ distribution=`uname -m`
+ if [ "$distribution" == "i686" ]; then
+ source /usr/local/bin/compilervars.sh ia32
+ else
+ source /usr/local/bin/compilervars.sh intel64
+ fi
+fi
+
+# Particular case for Fedora23: g++=g++-5.3
+#vm_name=`uname -n | cut -d"-" -f1`
+#if [[ "$vm_name" == "fedora" && "$CXX" == "g++-5.3" ]]; then
+# CXX="g++"
+#fi
+
+#==================================#
+# Automated installation and tests #
+#==================================#
+
+echo "|=== JENKINS AUTOMATED SCRIPT ===| ./autogen.sh CXX=$CXX CXXFLAGS=$CXXFLAGS --prefix=$PREFIX_INSTALL $LINBOX_NTLFLAG $LINBOX_FFLASFFPACKFLAG""
+./autogen.sh CXX=$CXX CXXFLAGS=$CXXFLAGS --prefix="$PREFIX_INSTALL" "$LINBOX_NTLFLAG" "$LINBOX_FFLASFFPACKFLAG"
+V="$?"; if test "x$V" != "x0";then exit "$V"; fi
+
+echo "|=== JENKINS AUTOMATED SCRIPT ===| make install"
+make install
+V="$?"; if test "x$V" != "x0"; then exit "$V"; fi
+
+echo "|=== JENKINS AUTOMATED SCRIPT ===| make perfpublisher"
+make perfpublisher
+
+echo "|=== JENKINS AUTOMATED SCRIPT ===| make examples"
+make examples
+V="$?"; if test "x$V" != "x0"; then exit "$V"; fi
+
diff --git a/tests/perfpublisher.sh b/tests/perfpublisher.sh
index 2c3c452..8c751de 100755
--- a/tests/perfpublisher.sh
+++ b/tests/perfpublisher.sh
@@ -8,12 +8,24 @@ XMLFILE=$1
tests=$2
COMPILER=$3
+# choose gdate on OS X
+if command -v "gdate" >/dev/null; then
+ DATE=gdate
+else
+ DATE=date
+fi
#=================#
# Plateform infos #
#=================#
COMPILERVERSION=$($COMPILER --version 2>&1 | head -1)
-CPUFREQ=$(lscpu | grep "MHz" | rev | cut -f1 -d' ' | rev)
+
+if command -v "lscpu" >/dev/null; then
+ CPUFREQ=$(lscpu | grep "MHz" | rev | cut -f1 -d' ' | rev)
+else
+ CPUFREQ=$((`sysctl -n hw.cpufrequency`/1000000))
+fi
+
ARCH=$(uname -m)
OSNAME=$(uname -s)
OSVERSION=$(uname -r)
@@ -45,8 +57,8 @@ echo '<report name="tests-report" categ="tests">' >> $XMLFILE
#=======#
echo '<start>' >> $XMLFILE
-echo '<date format="YYYYMMDD" val="'$(date +%Y%m%d)'" />' >> $XMLFILE
-echo '<time format="HHMMSS" val="'$(date +%H%M%S)'" />' >> $XMLFILE
+echo '<date format="YYYYMMDD" val="'$($DATE +%Y%m%d)'" />' >> $XMLFILE
+echo '<time format="HHMMSS" val="'$($DATE +%H%M%S)'" />' >> $XMLFILE
echo '</start>' >> $XMLFILE
#=======#
@@ -59,9 +71,9 @@ do
then
#File does not exist: compile it
echo '[Compiling]' $test
- COMPILESTART=$(date +%s%3N)
+ COMPILESTART=$($DATE +%s%3N)
COMPILELOG=$(make $test 2>&1; echo 'Returned state: '$?)
- COMPILEEND=$(date +%s%3N)
+ COMPILEEND=$($DATE +%s%3N)
COMPILETIME=$(($COMPILEEND - $COMPILESTART))
COMPILECHECK=$(echo $COMPILELOG | grep -o '[^ ]*$')
COMPILETIMERELEVANT='true'
@@ -92,9 +104,9 @@ do
#Compilation success
echo '[Executing]' $test
EXECUTED='yes'
- EXECUTIONSTART=$(date +%s%3N)
+ EXECUTIONSTART=$($DATE +%s%3N)
EXECUTIONLOG=$(./$test 2>&1; echo 'Returned state: '$?)
- EXECUTIONEND=$(date +%s%3N)
+ EXECUTIONEND=$($DATE +%s%3N)
EXECUTIONTIME=$(($EXECUTIONEND - $EXECUTIONSTART))
EXECUTIONCHECK=$(echo $EXECUTIONLOG | grep -o '[^ ]*$')
diff --git a/tests/test-charpoly.C b/tests/test-charpoly.C
index 348b470..381da8f 100644
--- a/tests/test-charpoly.C
+++ b/tests/test-charpoly.C
@@ -194,9 +194,9 @@ static bool testSageBug(){
Givaro::ZRing<Givaro::Integer> Z;
DenseMatrix<Givaro::ZRing<Givaro::Integer> > A(Z,4,4);
- for (size_t i=0; i<4; ++i)
- for (size_t j=0; j<4; ++j)
- A.setEntry(i,j, i*4+j+1);
+ for (uint32_t i=0; i<4; ++i)
+ for (uint32_t j=0; j<4; ++j)
+ A.setEntry(i,j, Givaro::Integer(i*4+j+1));
typedef BlasVector <Givaro::ZRing<Givaro::Integer> > Polynomial;
Polynomial phi(Z);
charpoly(phi,A);
diff --git a/tests/test-field.h b/tests/test-field.h
index b5189b7..959799e 100755
--- a/tests/test-field.h
+++ b/tests/test-field.h
@@ -1321,10 +1321,10 @@ namespace field_subtests {
// C++ ints. Otherwise, I don't know how to place the numbers into
// categories in any well-defined manner.
for (i = 0; i < num_trials; ++i) {
- LinBox::integer ix, id;
+ LinBox::integer ix;
F.convert(ix, iter.random (x));
-
- LinBox::Integer ix2 = ix % num_categories;
+ int32_t id;
+ int32_t ix2 = ix % num_categories;
if (ix2<0) ix2+=num_categories;
categories1[ix2]++;
categories2[(unsigned int) (double (ix2) / double (card) * num_categories) %num_categories]++;
@@ -1337,7 +1337,7 @@ namespace field_subtests {
F.convert(id, F.sub (d, *x_queue_iter, x));
id %= num_categories;
if (id<0) id += num_categories;
- (*diff_cat_iter)[(size_t) id]++;
+ (*diff_cat_iter)[id]++;
}
x_queue.push_front (x);
diff --git a/tests/test-order-basis.C b/tests/test-order-basis.C
index 18e0d0a..ea024e1 100644
--- a/tests/test-order-basis.C
+++ b/tests/test-order-basis.C
@@ -13,11 +13,13 @@
using namespace LinBox;
using namespace std;
-ostream& report = commentator().report();
+
+//ostream& report = commentator().report();
//ostream& report = std::cout;
template<typename Field, typename Mat>
string check_sigma(const Field& F, const Mat& sigma, Mat& serie, size_t ord){
+ ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
Mat T(F,sigma.rowdim(),serie.coldim(),sigma.size()+serie.size()-1);
PolynomialMatrixMulDomain<Field> PMD(F);
PMD.mul(T,sigma,serie);
@@ -30,11 +32,11 @@ string check_sigma(const Field& F, const Mat& sigma, Mat& serie, size_t ord){
i++;
}
if (i<ord){
- cout<<"error at degree="<<i<<endl;
+ report<<"error at degree="<<i<<endl;
T[i].write(report, Tag::FileFormat::Plain);
- cout<<"***"<<endl;
- cout<<serie<<endl;
- cout<<sigma<<endl;
+ report<<"***"<<endl;
+ report<<serie<<endl;
+ report<<sigma<<endl;
}
@@ -47,9 +49,10 @@ string check_sigma(const Field& F, const Mat& sigma, Mat& serie, size_t ord){
template<typename MatPol>
bool operator==(const MatPol& A, const MatPol& B){
+ ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
MatrixDomain<typename MatPol::Field> MD(A.field());
if (A.real_degree()!=B.real_degree()|| A.rowdim()!= B.rowdim() || A.coldim()!=B.coldim()){
- cout<<A.size()<<"("<<A.rowdim()<<"x"<<A.coldim()<<") <> "
+ report<<A.size()<<"("<<A.rowdim()<<"x"<<A.coldim()<<") <> "
<<B.size()<<"("<<B.rowdim()<<"x"<<B.coldim()<<") <> "<<endl;
return false;
}
@@ -58,8 +61,8 @@ bool operator==(const MatPol& A, const MatPol& B){
i++;
if (i<=A.real_degree() && A.rowdim()<10 && A.coldim()<10){
- cout<<"first:"<<endl<<A<<endl;
- cout<<"second:"<<endl<<B<<endl;
+ report<<"first:"<<endl<<A<<endl;
+ report<<"second:"<<endl<<B<<endl;
}
return i>A.real_degree();
@@ -68,6 +71,7 @@ bool operator==(const MatPol& A, const MatPol& B){
template<typename Field, typename RandIter>
void check_sigma(const Field& F, RandIter& Gen, size_t m, size_t n, size_t d) {
+ ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
//typedef typename Field::Element Element;
typedef PolynomialMatrix<PMType::matfirst,PMStorage::plain,Field> MatrixP;
//typedef PolynomialMatrix<PMType::polfirst,PMStorage::plain,Field> MatrixP;
@@ -90,17 +94,17 @@ void check_sigma(const Field& F, RandIter& Gen, size_t m, size_t n, size_t d) {
SB.M_Basis(Sigma3, Serie, d, shift3);
report << "M-Basis : " <<check_sigma(F,Sigma3,Serie,d)<<endl;
- SB.PM_Basis2(Sigma1,Serie, d, shift);
+ SB.PM_Basis(Sigma1,Serie, d, shift);
report << "PM-Basis : " <<check_sigma(F,Sigma1,Serie,d)<<endl;
//SB.oPM_Basis(Sigma2, Serie, d, shift2);
//report << "PM-Basis iter : " <<check_sigma(F,Sigma2,Serie,d)<<endl;
// if (!(Sigma1==Sigma2)){
- // cout<<"---> different basis for PM-Basis and PM-Basis iter"<<endl;
- // cout<<Sigma1<<endl;
- // cout<<Sigma2<<endl;
+ // report<<"---> different basis for PM-Basis and PM-Basis iter"<<endl;
+ // report<<Sigma1<<endl;
+ // report<<Sigma2<<endl;
// }
- cout<<endl;
+ report<<endl;
}
int main(int argc, char** argv){
@@ -118,14 +122,17 @@ int main(int argc, char** argv){
{ 's', "-s s", "Set the random seed to a specific value", TYPE_INT, &seed},
END_OF_ARGUMENTS
};
-
+
parseArguments (argc, argv, args);
typedef Givaro::Modular<double> SmallField;
typedef Givaro::Modular<Givaro::Integer> LargeField;
size_t logd=integer((uint64_t)d).bitsize();
+ commentator().start ("Testing order basis computation", "testOrderBasis", 1);
+
+ ostream &report = commentator().report (Commentator::LEVEL_ALWAYS, INTERNAL_DESCRIPTION);
report<<"### matrix series is of size "<<m<<" x "<<n<<" of degree "<<d<<std::endl;
if (b < 26){
if (logd>b-2){
@@ -149,8 +156,7 @@ int main(int argc, char** argv){
check_sigma(F,G,m,n,d);
}
-
-
+ commentator().stop (MSG_STATUS (true), (const char *) 0, "testOrderBasis");
return 0;
}
diff --git a/tests/test-smith-form-adaptive.C b/tests/test-smith-form-adaptive.C
index f7f1aee..b8838a0 100644
--- a/tests/test-smith-form-adaptive.C
+++ b/tests/test-smith-form-adaptive.C
@@ -1,7 +1,7 @@
/* Copyright (C) LinBox
*
* Author: Zhendong Wan
- *
+ * mods: bds
*
* ========LICENCE========
* This file is part of the library LinBox.
@@ -40,191 +40,56 @@
#include "linbox/util/commentator.h"
#include "linbox/vector/stream.h"
#include "linbox/algorithms/smith-form-adaptive.h"
-#include "test-common.h"
-using namespace LinBox; // fragile
-
-
-template <class Ring, class SmithForm, class Vector>
-bool testRandom(const Ring& R,
- const SmithForm& SF,
- LinBox::VectorStream<Vector>& stream1)
-{
-
-
- std::ostringstream str;
-
- str << "Testing the adaptive algorithm for Smith form computation:\n";
-
- commentator().start (str.str ().c_str (), "testRandom");//, stream1.m ());
-
- bool ret = true;
-
- VectorDomain<Ring> VD (R);
-
- Vector d(R), x(R);
-
- VectorWrapper::ensureDim (d, stream1.n ());
-
- VectorWrapper::ensureDim (x, stream1.n ());
-
-
- int n = (int)d. size();
-
- while (stream1) {
-
- commentator().startIteration ((unsigned)stream1.j ());
-
- std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
-
- bool iter_passed = true;
-
- stream1.next (d);
-
- report << "Input vector: ";
- VD.write (report, d);
- report << endl;
-
- BlasMatrix<Ring> D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n);
-
- int i, j;
-
- for(i = 0; i < n; ++i) {
- R. assign (D[(size_t)i][(size_t)i], d[(size_t)i]);
- R. assign(L[(size_t)i][(size_t)i], R.one);
- R. assign (U[(size_t)i][(size_t)i], R.one);}
-
- for (i = 0; i < n; ++ i)
-
- for (j = 0; j < i; ++ j) {
-
- R.init(L[(size_t)i][(size_t)j], rand() % 10);
-
- R.init(U[(size_t)j][(size_t)i], rand() % 10);
- }
-
-
- BlasVector<Ring> tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n);
-
- typename BlasMatrix<Ring>::ColIterator col_p;
-
- i = 0;
- for (col_p = A.colBegin();
- col_p != A.colEnd(); ++ col_p, ++ i) {
-
- R.assign(e[(size_t)i],R.one);
- U.apply(tmp1, e);
- D.apply(tmp2, tmp1);
- // LinBox::BlasSubvector<BlasVector<Ring> > col_p_v (R, *col_p);
- // L.apply(col_p_v, tmp2);
- L.apply(*col_p, tmp2);
- R.assign(e[(size_t)i],R.zero);
- }
-
-
-
- Givaro::ZRing<Integer> Z; //! why switch from Ring ????
- BlasVector<Givaro::ZRing<Integer> > xi(Z,A. rowdim());
-
- SF.smithForm (xi, A);
- typename Vector::iterator x_p;
- BlasVector<Givaro::ZRing<Integer> >::iterator xi_p;
- for (x_p = x. begin(), xi_p = xi. begin(); x_p != x. end(); ++ x_p, ++ xi_p)
- A. field (). init (*x_p, *xi_p);
-
+using namespace LinBox;
- report << "Computed Smith form: \n";
-
- VD. write (report, x);
-
- report << '\n';
-
-
- typename BlasVector<Ring>::iterator p1, p2;
- typename Ring::Element g;
-
-
- for (p1 = d.begin(); p1 != d.end(); ++ p1) {
-
- for ( p2 = p1 + 1; p2 != d.end(); ++ p2) {
-
- if (R. isUnit(*p1)) break;
-
- else if (R. isZero (*p2)) continue;
-
- else if (R. isZero (*p1)) {
- std::swap (*p1, *p2);
- }
-
- else {
- R. gcd (g, *p1, *p2);
-
- R. divin (*p2, g);
-
- R. mulin (*p2, *p1);
-
- R. assign (*p1, g);
- }
- }
- }
-
-
- report << "Expected smith form:\n";
-
- VD.write (report, d);
-
- report << '\n';
-
- if (!VD.areEqual (d, x))
-
- ret = iter_passed = false;
-
- if (!iter_passed)
-
- commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR)
- << "ERROR: Computed Smith form is incorrect" << endl;
-
-
-
- commentator().stop ("done");
-
- commentator().progress ();
-
- }
-
- //stream1.reset ();
-
- commentator().stop (MSG_STATUS (ret), (const char *) 0, "testRandom");
-
- return ret;
-
-}
+#include "test-smith-form.h"
int main(int argc, char** argv)
{
bool pass = true;
+ static size_t m = 3;
static size_t n = 35;
- static unsigned int iterations = 1;
static Argument args[] = {
+ { 'm', "-m M", "Set order of test matrices to M.", TYPE_INT, &m },
{ 'n', "-n N", "Set order of test matrices to N.", TYPE_INT, &n },
- { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT, &iterations },
END_OF_ARGUMENTS
};
parseArguments (argc, argv, args);
- SmithFormAdaptive sf;
commentator().start("Smith form adaptive algorithm test suite", "EGV++");
- commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5);
- //typedef NTL_ZZ Ring; Ring R;
- typedef Givaro::ZRing<Integer> Ring; Ring R; Ring::RandIter gen(R);
- RandomDenseStream<Ring> s1 (R, gen, n, iterations);
- pass = testRandom(R, sf, s1);
+ //!@bug should be tried on NTZ_LL too
+ typedef Givaro::ZRing<Integer> PIR;
+ PIR R;
+
+ size_t k = std::min(m,n);
+ DenseMatrix<PIR> A(R,m,n);
+ BlasVector<PIR> d(R,k), x(R,k), bumps(R,k), lumps(R,19);
+ for (size_t i = 0; i <10; ++i) lumps[i] = i;
+ for (size_t i = 10; i <19; ++i) lumps[i] = i-19;
+
+ makeBumps(bumps, 0);
+ makeSNFExample(A,d,bumps,lumps);
+ SmithFormAdaptive::smithForm (x, A);
+ pass = pass and checkSNFExample(d,x);
+
+ makeBumps(bumps, 1);
+ makeSNFExample(A,d,bumps,lumps);
+ SmithFormAdaptive::smithForm (x, A);
+ pass = pass and checkSNFExample(d,x);
+
+ makeBumps(bumps, 2);
+ makeSNFExample(A,d,bumps,lumps);
+ SmithFormAdaptive::smithForm (x, A);
+ pass = pass and checkSNFExample(d,x);
+
+ makeBumps(bumps, 3);
+ makeSNFExample(A,d,bumps,lumps);
+ SmithFormAdaptive::smithForm (x, A);
+ pass = pass and checkSNFExample(d,x);
- typedef Givaro::ZRing<Integer> Ring2; Ring2 S;Ring2::RandIter gen2(S);
- RandomDenseStream<Ring2> s2 (S, gen2, n, iterations);
- pass = pass && testRandom(S, sf, s2);
commentator().stop(MSG_STATUS(pass));
return pass ? 0 : -1;
diff --git a/tests/test-smith-form-binary.C b/tests/test-smith-form-binary.C
index b2f75d9..89b4caf 100644
--- a/tests/test-smith-form-binary.C
+++ b/tests/test-smith-form-binary.C
@@ -1,7 +1,6 @@
/* Copyright (C) LinBox
*
- *
- * Author: Zhendong Wan
+ * Author: Zhendong Wan, mods -bds
*
* ========LICENCE========
* This file is part of the library LinBox.
@@ -25,18 +24,14 @@
/*! @file tests/test-smith-form-binary.C
* @ingroup tests
- * @brief no doc.
- * @test no doc.
+ * @brief Test the EGV divide and conquer SNF alg.
*/
-
-
#include "linbox/linbox-config.h"
#ifdef __LINBOX_HAVE_NTL
#include "linbox/ring/ntl.h"
#endif
-//#include "linbox/ring/modular.h"
#include "linbox/randiter/random-prime.h"
#include "linbox/algorithms/matrix-rank.h"
#include "linbox/algorithms/last-invariant-factor.h"
@@ -45,197 +40,75 @@
#include "linbox/blackbox/scompose.h"
#include "linbox/blackbox/random-matrix.h"
#include "linbox/algorithms/rational-solver.h"
-#include <time.h>
+//#include <time.h>
#include <givaro/modular.h>
#include "linbox/util/commentator.h"
-#include "linbox/vector/stream.h"
-#include "test-common.h"
+//#include "linbox/vector/stream.h"
+//#include "test-common.h"
using namespace LinBox;
-template <class Ring, class SmithForm, class Vector>
-bool testRandom(const Ring& R,
- const SmithForm& SF,
- LinBox::VectorStream<Vector>& stream1)
-{
-
- std::ostringstream str;
-
- str << "Testing Smith Form binary(EGV++):";
-
- commentator().start (str.str ().c_str (), "testSmithform");//, stream1.m ());
-
- bool ret = true;
-
- LinBox::VectorDomain<Ring> VD (R);
-
- int n = (int) stream1.n();
- Vector d(R,n), x(R,n);
-
- // VectorWrapper::ensureDim (d, stream1.n ());
- // VectorWrapper::ensureDim (x, stream1.n ());
-
-
-
- while (stream1) {
-
- commentator().startIteration ((unsigned)stream1.j ());
-
- std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
-
- bool iter_passed = true;
-
- stream1.next (d);
-
- report << "Input vector: ";
- VD.write (report, d);
- report << endl;
-
- DenseMatrix<Ring> D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n);
-
- int i, j;
-
- for(i = 0; i < n; ++i) {
- R. assign (D[(size_t)i][(size_t)i], d[(size_t)i]);
- R. assign (L[(size_t)i][(size_t)i], R.one);
- R. assign (U[(size_t)i][(size_t)i], R.one);}
-
- for (i = 0; i < n; ++ i)
-
- for (j = 0; j < i; ++ j) {
-
- R.init(L[(size_t)i][(size_t)j], (uint64_t)(rand() % 10));
-
- R.init(U[(size_t)j][(size_t)i], (uint64_t)(rand() % 10));
- }
-
-
- BlasVector<Ring> tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n);
-
- typename DenseMatrix<Ring>::ColIterator col_p;
-
- i = 0;
- for (col_p = A.colBegin();
- col_p != A.colEnd(); ++ col_p, ++ i) {
-
- R.assign (e[(size_t)i],R.one);
- U.apply(tmp1, e);
- D.apply(tmp2, tmp1);
- // LinBox::BlasSubvector<BlasVector<Ring> > col_p_v (R, *col_p);
- // L.apply(col_p_v, tmp2);
- L.apply(*col_p, tmp2);
- R.assign(e[(size_t)i],R.zero);
- }
-
-
-
- SF.smithFormBinary (x, A);
-
-
- report << "Computed Smith form: \n";
-
- VD. write (report, x);
-
- report << '\n';
-
-
- typename BlasVector<Ring>::iterator p1, p2;
- typename Ring::Element g;
-
-
- for (p1 = d.begin(); p1 != d.end(); ++ p1) {
-
- for ( p2 = p1 + 1; p2 != d.end(); ++ p2) {
-
- // CP: changed isUnit to isOne || isMOne as it is only called with ZRing<Integer>.
- if (R. isOne (*p1) || R. isMOne(*p1)) break;
-
- else if (R. isZero (*p2)) continue;
-
- else if (R. isZero (*p1)) {
- std::swap (*p1, *p2);
- }
-
- else {
- R. gcd (g, *p1, *p2);
-
- R. divin (*p2, g);
-
- R. mulin (*p2, *p1);
-
- R. assign (*p1, g);
- }
- }
- }
- // normalize to positive
- for (p1 = d.begin(); p1 != d.end(); ++ p1) if (*p1 < 0) R.negin(*p1);
-
- VD.write (report << "Expected smith form:\n", d) << '\n';
-
- if (!VD.areEqual (d, x))
- ret = iter_passed = false;
-
- if (!iter_passed)
- commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR)
- << "ERROR: Computed Smith form is incorrect" << endl;
-
- commentator().stop ("done");
-
- commentator().progress ();
-
- }
-
- //stream1.reset ();
-
- commentator().stop (MSG_STATUS (ret), (const char *) 0, "testSmithform");
-
- return ret;
-
-}
+#include "test-smith-form.h"
int main(int argc, char** argv)
{
-
bool pass = true;
+ static size_t m =2;
static size_t n =5;
- static int iterations = 1;
-
static Argument args[] = {
- { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT, &n },
- { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT, &iterations },
+ { 'm', "-m M", "Set row dimension of test matrices to M.", TYPE_INT, &m },
+ { 'n', "-n N", "Set col dimension of test matrices to N.", TYPE_INT, &n },
END_OF_ARGUMENTS
};
parseArguments (argc, argv, args);
commentator().start("SmithFormBinary test suite", "SmithFormBinary");
- std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
+ commentator().report() << std::endl << "EGV++ algorithm test suite with LinBox/Givaro ZRing:\n";
{
// typedef Givaro::IntegerDom Ring;
- typedef Givaro::ZRing<Integer> Ring;
+ typedef Givaro::ZRing<Integer> PIR;
+ PIR R;
- Ring R; Ring::RandIter gen(R);
+ typedef Givaro::Modular<int64_t> Field;
+ typedef RationalSolver<PIR, Field, LinBox::RandomPrimeIterator> Solver;
+ typedef LastInvariantFactor<PIR, Solver> LIF;
+ typedef OneInvariantFactor<PIR, LIF, SCompose, RandomMatrix> OIF;
+ typedef SmithFormBinary<PIR, OIF, MatrixRank<PIR, Field > > SF;
- report << std::endl << "EGV++ algorithm test suite with LinBox/Givaro PID:\n";
+ SF sf;
+ sf. setOIFThreshold (30);
+ sf. setLIFThreshold (30);
- commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5);
+ size_t k = std::min(m,n);
+ DenseMatrix<PIR> A(R,m,n);
+ BlasVector<PIR> d(R,k), x(R,k), bumps(R,k), lumps(R,19);
+ for (size_t i = 0; i <10; ++i) lumps[i] = i;
+ for (size_t i = 10; i <19; ++i) lumps[i] = i-19;
- RandomDenseStream<Ring> s1 (R, gen, n, (unsigned int) iterations);
+ makeBumps(bumps, 0);
+ makeSNFExample(A,d,bumps,lumps);
+ sf.smithFormBinary (x, A);
+ pass = pass and checkSNFExample(d,x);
- typedef Givaro::Modular<int32_t> Field;
- typedef RationalSolver<Ring, Field, LinBox::RandomPrimeIterator> Solver;
- typedef LastInvariantFactor<Ring, Solver> LIF;
- typedef OneInvariantFactor<Ring, LIF, SCompose, RandomMatrix> OIF;
- typedef SmithFormBinary<Ring, OIF, MatrixRank<Ring, Field > > SF;
+ makeBumps(bumps, 1);
+ makeSNFExample(A,d,bumps,lumps);
+ sf.smithFormBinary (x, A);
+ pass = pass and checkSNFExample(d,x);
- SF sf;
- sf. setOIFThreshold (30);
- sf. setLIFThreshold (30);
+ makeBumps(bumps, 2);
+ makeSNFExample(A,d,bumps,lumps);
+ sf.smithFormBinary (x, A);
+ pass = pass and checkSNFExample(d,x);
+
+ makeBumps(bumps, 3);
+ makeSNFExample(A,d,bumps,lumps);
+ sf.smithFormBinary (x, A);
+ pass = pass and checkSNFExample(d,x);
- if (!testRandom(R, sf, s1)) pass = false;
}
#if 0
diff --git a/tests/test-smith-form-iliopoulos.C b/tests/test-smith-form-iliopoulos.C
index 50314a8..263a9a6 100644
--- a/tests/test-smith-form-iliopoulos.C
+++ b/tests/test-smith-form-iliopoulos.C
@@ -53,6 +53,7 @@
using namespace LinBox;
+#if 0
template <class Ring>
bool testRead(const Ring& R, string file) {
BlasMatrix<Ring> A(R);
@@ -82,6 +83,7 @@ bool testRead(const Ring& R, string file) {
SmithFormIliopoulos::smithFormIn (B);
return BMD.areEqual(A, B);
}
+#endif
template <class Ring>
bool testRandom(const Ring& R, size_t n)
diff --git a/tests/test-smith-form.C b/tests/test-smith-form.C
index a986fe2..2ea29f9 100644
--- a/tests/test-smith-form.C
+++ b/tests/test-smith-form.C
@@ -1,7 +1,7 @@
/* Copyright (C) LinBox
*
* Author: Zhendong Wan
- *
+ * Mods: bds
*
* ========LICENCE========
* This file is part of the library LinBox.
@@ -31,194 +31,65 @@
#include <linbox/linbox-config.h>
+#include "linbox/solutions/smith-form.h"
-#include <time.h>
#include "givaro/zring.h"
-#include "givaro/givinteger.h"
#include "linbox/util/commentator.h"
-#include "linbox/vector/stream.h"
-#include "test-common.h"
+#include "linbox/matrix/dense-matrix.h"
#include "linbox/vector/blas-vector.h"
-#include "linbox/solutions/smith-form.h"
-using LinBox::parseArguments;
-using LinBox::commentator;
-using LinBox::Commentator;
-using Givaro::Integer;
-using Givaro::ZRing;
-using LinBox::BlasMatrix;
-using LinBox::BlasVector;
-
-template <class Ring, class Vector>
-bool testRandom(const Ring& R,
- LinBox::VectorStream<Vector>& stream1)
-{
-
- std::ostringstream str;
-
- str << "Testing the smithForm function in solutions directory:\n";
-
- commentator().start (str.str ().c_str (), "testRandom");//, stream1.m ());
-
- bool ret = true;
-
- LinBox::VectorDomain<Ring> VD (R);
-
- Vector d(R), x(R);
-
- LinBox::VectorWrapper::ensureDim (d, stream1.n ());
-
- LinBox::VectorWrapper::ensureDim (x, stream1.n ());
-
-
- int n = (int)d. size();
-
- while (stream1) {
-
- commentator().startIteration ((unsigned)stream1.j ());
-
- std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION);
-
- bool iter_passed = true;
-
- stream1.next (d);
-
- report << "Input vector: ";
- VD.write (report, d);
- report << endl;
-
- BlasMatrix<Ring> D(R, (size_t)n, (size_t)n), L(R, (size_t)n, (size_t)n), U(R, (size_t)n, (size_t)n), A(R,(size_t)n,(size_t)n);
-
- int i, j;
-
- for(i = 0; i < n; ++i) {
- R. assign (D[(size_t)i][(size_t)i], d[(size_t)i]);
- R. assign(L[(size_t)i][(size_t)i], R.one);
- R. assign(U[(size_t)i][(size_t)i], R.one);}
-
- for (i = 0; i < n; ++ i)
-
- for (j = 0; j < i; ++ j) {
-
- R.init(L[(size_t)i][(size_t)j], rand() % 10);
-
- R.init(U[(size_t)j][(size_t)i], rand() % 10);
- }
+using namespace LinBox;
-
- BlasVector<Ring> tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n);
-
- typename BlasMatrix<Ring>::ColIterator col_p;
-
- i = 0;
- for (col_p = A.colBegin();
- col_p != A.colEnd(); ++ col_p, ++ i) {
-
- R.assign(e[(size_t)i],R.one);
- U.apply(tmp1, e);
- D.apply(tmp2, tmp1);
- // LinBox::BlasSubvector<BlasVector<Ring> > col_p_v (R, *col_p);
- // L.apply(col_p_v, tmp2);
- L.apply(*col_p, tmp2); //! @internal @bug should use Triangular apply ? We are doing this many times, factor somewhere in test-utils.h ? why not some ftrtr routine for that ?
- R.assign(e[(size_t)i],R.zero);
- }
-
- typename Vector::iterator x_p;
- Givaro::ZRing<Integer> Z;
- BlasVector<Givaro::ZRing<Integer> > xi(Z,A. rowdim());
- BlasVector<Givaro::ZRing<Integer> >::iterator xi_p;
- std::list<std::pair<Integer, size_t> > cpt;
- smithForm (cpt, A);
- std::list<std::pair<Integer, size_t> >::iterator cpt_p;
-
- xi_p = xi. begin();
- for (cpt_p = cpt.begin(); cpt_p != cpt.end(); ++ cpt_p) {
- for (size_t ii = 0; ii < cpt_p -> second; ++ ii, ++ xi_p)
- *xi_p = cpt_p -> first;
- }
-
- for (x_p = x. begin(), xi_p = xi. begin(); x_p != x. end(); ++ x_p, ++ xi_p)
- A. field (). init (*x_p, *xi_p);
-
- report << "Computed Smith form: \n";
- VD. write (report, x);
-
- report << '\n';
-
- typename BlasVector<Ring>::iterator p1, p2;
- typename Ring::Element g;
-
- for (p1 = d.begin(); p1 != d.end(); ++ p1) {
- for ( p2 = p1 + 1; p2 != d.end(); ++ p2) {
- if (R. isUnit(*p1)) break;
- else if (R. isZero (*p2)) continue;
- else if (R. isZero (*p1)) std::swap (*p1, *p2);
- else { // (*p1, *p2) <-- (g, *p1 * *p2 / g), where g = gcd(*p1, *p2)
- R. gcd (g, *p1, *p2);
- R. divin (*p2, g);
- R. mulin (*p2, *p1);
- R. assign (*p1, g);
- }
- }
- }
-
- report << "Expected smith form:\n";
- VD.write (report, d) << endl;
-
- if (!VD.areEqual (d, x))
- ret = iter_passed = false;
-
- if (!iter_passed)
- commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR)
- << "ERROR: Computed Smith form is incorrect" << endl;
-
- commentator().stop ("done");
- commentator().progress ();
- }
-
- //stream1.reset ();
-
- commentator().stop (MSG_STATUS (ret), (const char *) 0, "testRandom");
-
- return ret;
-}
+#include "test-smith-form.h"
int main(int argc, char** argv)
{
bool pass = true;
- static size_t n =3;
+ static size_t m =30;
+ static size_t n =20;
static int iterations = 2;
static Argument args[] = {
- { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT, &n },
+ { 'm', "-n M", "Set row dim of test matrices to N.", TYPE_INT, &m },
+ { 'n', "-n N", "Set col dim of test matrices to N.", TYPE_INT, &n },
{ 'i', "-i I", "Perform each test for I iterations.", TYPE_INT, &iterations },
END_OF_ARGUMENTS
};
parseArguments (argc, argv, args);
//!@bug should be tried on NTZ_LL too
- typedef Givaro::ZRing<Integer> Ring;
-
- Ring R; Ring::RandIter gen(R);
-
-
- commentator().start("Smith form test suite", "Smith");
- commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5);
-
- LinBox::RandomDenseStream<Ring> s1 (R, gen, n, (unsigned int)iterations);
- if (!testRandom(R, s1)) pass = false;
-
-#if 0
-#ifdef __LINBOX_HAVE_NTL
- typedef LinBox::NTL_ZZ Ring2;
- Ring2 R2;Ring2::RandIter gen2(R2);
-
- LinBox::RandomDenseStream<Ring2> s2 (R2, gen2, n, (unsigned int)iterations);
- if (!testRandom(R2, s2)) pass = false;
-
-#endif
-#endif
-
- commentator().stop("Smith form test suite");
+ typedef Givaro::ZRing<Integer> PIR;
+ PIR R;
+
+ commentator().start("Smith form test", "Smith");
+
+ size_t k = std::min(m,n);
+ DenseMatrix<PIR> A(R,m,n);
+ BlasVector<PIR> d(R,k), x(R,k), bumps(R,k), lumps(R,19);
+ for (size_t i = 0; i <10; ++i) lumps[i] = i;
+ for (size_t i = 10; i <19; ++i) lumps[i] = i-19;
+
+ makeBumps(bumps, 0);
+ makeSNFExample(A,d,bumps,lumps);
+ smithForm (x, A);
+ pass = pass and checkSNFExample(d,x);
+
+ makeBumps(bumps, 1);
+ makeSNFExample(A,d,bumps,lumps);
+ smithForm (x, A);
+ pass = pass and checkSNFExample(d,x);
+
+ makeBumps(bumps, 2);
+ makeSNFExample(A,d,bumps,lumps);
+ smithForm (x, A);
+ //SmithFormAdaptive::compute_local_long(x, A, 2, 64);
+ pass = pass and checkSNFExample(d,x);
+
+ makeBumps(bumps, 3);
+ makeSNFExample(A,d,bumps,lumps);
+ smithForm (x, A);
+ pass = pass and checkSNFExample(d,x);
+
+ commentator().stop("Smith form test");
return pass ? 0 : -1;
}
@@ -230,4 +101,3 @@ int main(int argc, char** argv)
// c-basic-offset: 8
// End:
// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
-
diff --git a/tests/test-smith-form.h b/tests/test-smith-form.h
new file mode 100644
index 0000000..73d7e5b
--- /dev/null
+++ b/tests/test-smith-form.h
@@ -0,0 +1,166 @@
+/* Copyright (C) LinBox
+ *
+ * Author: bds
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+ * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ * ========LICENCE========
+ */
+
+/*! @file tests/test-smith-form.h
+ * @ingroup tests
+ * @brief tools for making matrix with known SNF.
+ */
+
+#ifndef __TEST_SMITH_FORM_H
+#define __TEST_SMITH_FORM_H
+#include <linbox/linbox-config.h>
+
+#include "linbox/util/commentator.h"
+#include "linbox/matrix/dense-matrix.h"
+#include "linbox/vector/blas-vector.h"
+using std::endl;
+using namespace LinBox;
+
+template <class PIR> // This is for PIR = Z or Z_n
+BlasVector<PIR> & makeBumps(BlasVector<PIR> & b, int choice) {
+ const PIR & R = b.field();
+ typename PIR::Element two, three, nine, x;
+ R.init(two,2);
+ R.init(three,3);
+ R.init(nine,9);
+ R.init(x,202);
+ // b is a single row
+ size_t n = b.size();
+ switch (choice) {
+ case 0: // all zero
+ for(size_t i = 0; i < n; ++i) b.setEntry(i,R.zero);
+ break;
+ case 1: // identity
+ for(size_t i = 0; i < n; ++i) b.setEntry(i,R.one);
+ break;
+ case 2: // powers of 2
+ for(size_t i = 0; i < n; ++i) b.setEntry(i,two);
+ break;
+ case 3: // random followed by 202,0. Random part is largely 1's.
+ for(size_t i = 0; i < n-2; ++i) {
+ size_t r = rand()%20;
+ if (r < 17) b.setEntry(i,R.one);
+ if (r == 17) b.setEntry(i,two);
+ if (r == 18) b.setEntry(i,three);
+ if (r == 19) b.setEntry(i,nine);
+ }
+ b.setEntry(n-2,x);
+ b.setEntry(n-1,R.zero);
+ }
+ // negate a few
+ for (size_t k = rand()%4; k > 0; --k){
+ size_t i = rand()%n;
+ b.getEntry(x,i);
+ b.setEntry(i,R.negin(x));
+ }
+
+ return b;
+}
+
+
+// For any PIR, build an increasing sequence of smith invariants d from "bumps" b.
+template <class PIR>
+BlasVector<PIR> & prefixProduct (BlasVector<PIR> & d, const BlasVector<PIR> & b) {
+ const PIR& R = d.field();
+ typename PIR::Element x,y; R.init(x); R.init(y);
+ d.setEntry(0,b.getEntry(x,0));
+ for (size_t i = 1; i < d.size(); ++i){
+ d.getEntry(x,i-1);
+ b.getEntry(y,i);
+ d.setEntry(i,R.mulin(x, y));
+ }
+ return d;
+}
+
+// Generate A with snf = diag(d) (up to sign), based on the bumps.
+// Think of bumps[i] as s_i/s_{i-1}, quotient of smith invariants.
+// The lumps are used for off diagonal entries in L,U (triangular scramblers).
+template <class PIR>
+void makeSNFExample(DenseMatrix<PIR>& A,
+ BlasVector<PIR> & d,
+ const BlasVector<PIR> & bumps,
+ const BlasVector<PIR> & lumps) {
+ //LinBox::VectorWrapper::ensureDim (d, bumps.size());
+ //LinBox::VectorWrapper::ensureDim (d, std::min(A.rowdim(), A.coldim()));
+ prefixProduct(d, bumps);
+
+ // make A = UDL for random unimodular L,U
+ const PIR & R = A.field();
+ DenseMatrix<PIR> L(R, A.coldim(), A.coldim()),
+ U(R, A.rowdim(), A.rowdim());
+ typename PIR::Element x; R.init(x);
+ size_t i, j, k;
+ k = lumps.size();
+ A.zero();
+ for(i = 0; i < d.size(); ++i) A.setEntry(i,i,d.getEntry(x,i));
+
+
+ L.zero();
+ for(i = 0; i < L.rowdim(); ++i) L.setEntry(i,i,R.one);
+ for (i = 0; i < L.rowdim(); ++ i)
+ for (j = 0; j < i; ++ j) L.setEntry(i,j,lumps[rand()%k]);
+
+ U.zero();
+ for(i = 0; i < U.rowdim(); ++i) U.setEntry(i,i,R.one);
+ for (i = 0; i < U.rowdim(); ++ i)
+ for (j = i+1; j < U.coldim(); ++ j) U.setEntry(i,j,lumps[rand()%k]);
+
+
+
+ // A <- UAL
+ BlasMatrixDomain<PIR> MD(R);
+ MD.mulin_left(A,L);
+ MD.mulin_right(U,A);
+
+ for (i = 0; i < d.size(); ++ i)
+ d.setEntry(i,R.abs(x, d.getEntry(x,i)));
+ // Now A is matrix equivalent to diag prefix product of bumps.
+ // Now d is SNF diagonal (vector of invariants) for A.
+}
+
+template <class PIR>
+bool checkSNFExample( const BlasVector<PIR>& d, const BlasVector<PIR>& x ){
+ VectorDomain<PIR> VD(d.field());
+ std::ostream & report = commentator().report();
+
+ report << "Computed Smith form:" << endl;
+ VD. write (report, x) << endl;
+
+ report << "Expected smith form:" << endl;
+ VD.write (report, d) << endl;
+
+ if (not VD.areEqual (d, x)) {
+ report << "ERROR: Computed not as Expected" << endl;
+ return false;
+ } else
+ return true;
+}
+#endif // __TEST_SMITH_FORM_H
+
+// Local Variables:
+// mode: C++
+// tab-width: 8
+// indent-tabs-mode: nil
+// c-basic-offset: 8
+// End:
+// vim:sts=8:sw=8:ts=8:noet:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/linbox.git
More information about the debian-science-commits
mailing list