[libmath-prime-util-perl] 115/181: Removed old SQUFOF code. Faster is_perfect_square. Streamline trial division pre-factoring.
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:51:12 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.36
in repository libmath-prime-util-perl.
commit 5be203368d0f96cf19210e3697df459ce62ace9a
Author: Dana Jacobsen <dana at acm.org>
Date: Sun Jan 5 00:19:58 2014 -0800
Removed old SQUFOF code. Faster is_perfect_square. Streamline trial division pre-factoring.
---
Changes | 3 +
XS.xs | 23 +--
examples/bench-factor-extra.pl | 2 -
examples/bench-factor-semiprime.pl | 8 +-
examples/test-factor-gnufactor.pl | 13 +-
factor.c | 375 ++++++++++---------------------------
factor.h | 1 -
lib/Math/Prime/Util.pm | 8 +-
primality.c | 24 +--
t/50-factoring.t | 3 +-
util.h | 20 +-
11 files changed, 144 insertions(+), 336 deletions(-)
diff --git a/Changes b/Changes
index 4712ef8..050084c 100644
--- a/Changes
+++ b/Changes
@@ -63,6 +63,9 @@ Revision history for Perl module Math::Prime::Util
- While Math::BigInt has the bgcd function, it is slow for native numbers,
even with the Pari or GMP back ends. The gcd here is 20-100x faster.
+ - Removed the old SQUFOF code, so the racing version is the only one. It
+ was already the only one being used.
+
0.35 2013-12-08
diff --git a/XS.xs b/XS.xs
index 2cae867..c6f7254 100644
--- a/XS.xs
+++ b/XS.xs
@@ -361,11 +361,10 @@ trial_factor(IN UV n, ...)
fermat_factor = 1
holf_factor = 2
squfof_factor = 3
- rsqufof_factor = 4
- pbrent_factor = 5
- prho_factor = 6
- pplus1_factor = 7
- pminus1_factor = 8
+ pbrent_factor = 4
+ prho_factor = 5
+ pplus1_factor = 6
+ pminus1_factor = 7
PPCODE:
if (n == 0) XSRETURN_UV(0);
while ( (n% 2) == 0 ) { n /= 2; XPUSHs(sv_2mortal(newSVuv( 2 ))); }
@@ -386,19 +385,16 @@ trial_factor(IN UV n, ...)
case 3: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1));
nfactors = squfof_factor (n, factors, arg1); break;
case 4: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1));
- nfactors = racing_squfof_factor(n, factors, arg1); break;
- case 5: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1));
arg2 = (items < 3) ? 1 : SvUV(ST(2));
nfactors = pbrent_factor (n, factors, arg1, arg2); break;
- case 6: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1));
+ case 5: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1));
nfactors = prho_factor (n, factors, arg1); break;
- case 7: arg1 = (items < 2) ? 200 : SvUV(ST(1));
+ case 6: arg1 = (items < 2) ? 200 : SvUV(ST(1));
nfactors = pplus1_factor (n, factors, arg1); break;
- case 8: arg1 = (items < 2) ? 1*1024*1024 : SvUV(ST(1));
- arg2 = (items < 3) ? 0 : SvUV(ST(2));
- if (arg2 == 0) arg2 = 10*arg1; /* default B2 */
+ case 7:
+ default: arg1 = (items < 2) ? 1*1024*1024 : SvUV(ST(1));
+ arg2 = (items < 3) ? 10*arg1 : SvUV(ST(2));
nfactors = pminus1_factor(n, factors, arg1, arg2); break;
- default: break;
}
EXTEND(SP, nfactors);
for (i = 0; i < nfactors; i++)
@@ -606,7 +602,6 @@ factor(IN SV* svn)
return; /* skip implicit PUTBACK */
}
-
void
divisor_sum(IN SV* svn, ...)
PREINIT:
diff --git a/examples/bench-factor-extra.pl b/examples/bench-factor-extra.pl
index 674ce19..d5726d8 100755
--- a/examples/bench-factor-extra.pl
+++ b/examples/bench-factor-extra.pl
@@ -58,7 +58,6 @@ sub test_at_digits {
$nfactored{'pminus1'} += $calc_nfacs->(Math::Prime::Util::pminus1_factor($_, $p1smooth));
$nfactored{'pplus1'} += $calc_nfacs->(Math::Prime::Util::pplus1_factor($_, $p1smooth));
$nfactored{'squfof'} += $calc_nfacs->(Math::Prime::Util::squfof_factor($_, $sqrounds));
- $nfactored{'rsqufof'} += $calc_nfacs->(Math::Prime::Util::rsqufof_factor($_, $rsqrounds));
#$nfactored{'trial'} += $calc_nfacs->(Math::Prime::Util::trial_factor($_));
#$nfactored{'fermat'} += $calc_nfacs->(Math::Prime::Util::fermat_factor($_, $rounds));
$nfactored{'holf'} += $calc_nfacs->(Math::Prime::Util::holf_factor($_, $hrounds));
@@ -79,7 +78,6 @@ sub test_at_digits {
"fermat" => sub { Math::Prime::Util::fermat_factor($_, $rounds) for @nums},
"holf" => sub { Math::Prime::Util::holf_factor($_, $hrounds) for @nums },
"squfof" => sub { Math::Prime::Util::squfof_factor($_, $sqrounds) for @nums },
- "rsqufof" => sub { Math::Prime::Util::rsqufof_factor($_) for @nums },
"trial" => sub { Math::Prime::Util::trial_factor($_) for @nums },
};
delete $lref->{'fermat'} if $digits >= 9;
diff --git a/examples/bench-factor-semiprime.pl b/examples/bench-factor-semiprime.pl
index b9973f0..53b73ed 100755
--- a/examples/bench-factor-semiprime.pl
+++ b/examples/bench-factor-semiprime.pl
@@ -4,7 +4,7 @@ use warnings;
$| = 1; # fast pipes
srand(377);
-use Math::Prime::Util qw/factor -nobigint/;
+use Math::Prime::Util qw/factor/;
use Math::Factor::XS qw/prime_factors/;
use Math::Pari qw/factorint/;
use Benchmark qw/:all/;
@@ -93,9 +93,9 @@ foreach my $sp (@semiprimes) {
print "OK\n";
my %compare = (
- 'MPU' => sub { factor($_) for @semiprimes; },
- 'MFXS' => sub { prime_factors($_) for @semiprimes; },
- 'Pari' => sub { foreach my $n (@semiprimes) { my @factors; my ($pn,$pc) = @{factorint($n)}; push @factors, (int($pn->[$_])) x $pc->[$_] for (0 .. $#{$pn}); } }
+ 'MPU' => sub { do { my @f = factor($_) } for @semiprimes; },
+ 'MFXS' => sub { do { my @f = prime_factors($_) } for @semiprimes; },
+ 'Pari' => sub { do { my ($pn,$pc) = @{factorint($_)}; my @f = map { int($pn->[$_]) x $pc->[$_] } 0 .. $#$pn; } for @semiprimes; },
);
delete $compare{'MFXS'} if $skip_mfxs;
diff --git a/examples/test-factor-gnufactor.pl b/examples/test-factor-gnufactor.pl
index 4546910..2e3b43e 100755
--- a/examples/test-factor-gnufactor.pl
+++ b/examples/test-factor-gnufactor.pl
@@ -25,15 +25,16 @@ my $num = 1000;
# large numbers. You'll probably want to turn it off here as it will be
# many thousands of times slower than MPU and Pari.
-# A performance note: MPU and Pari get their results by calling a function.
-# GNU factor gets its result by multiple shells out to /usr/bin/factor with
-# the numbers as command line arguments. This adds a lot of overhead that
-# has nothing to do with their implementation. For comparison, try turning
-# on the MPU factor.pl script, and weep at Perl's startup cost.
+# A benchmarking note: in this script, getting MPU and Pari results are done
+# by calling a function, where getting GNU factor results are done via
+# multiple shells to /usr/bin/factor with the inputs as command line
+# arguments. This adds a lot of overhead that has nothing to do with their
+# implementation. For comparison, I've included an option for getting MPU
+# factoring via calling the factor.pl script. Weep at the startup cost.
my $do_gnu = 1;
my $do_pari = 1;
-my $use_mpu_factor_script = 0;
+my $use_mpu_factor_script = 1;
my $rgen = sub {
my $range = shift;
diff --git a/factor.c b/factor.c
index 092a2c9..ed61fea 100644
--- a/factor.c
+++ b/factor.c
@@ -11,8 +11,12 @@
#include "primality.h"
#define FUNC_isqrt 1
#define FUNC_gcd_ui 1
+#define FUNC_is_perfect_square 1
#include "util.h"
+/* factor will do trial division through this prime number, must be in table */
+#define TRIAL_TO_PRIME 81
+
/*
* You need to remember to use UV for unsigned and IV for signed types that
* are large enough to hold our data.
@@ -24,6 +28,29 @@
* match the native integer type used inside our Perl, so just use those.
*/
+static const unsigned short primes_small[] =
+ {0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
+ 101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,
+ 193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,
+ 293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,
+ 409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509,
+ 521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631,
+ 641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751,
+ 757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877,
+ 881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009,
+ 1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093,
+ 1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201,
+ 1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297,
+ 1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427,
+ 1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499,
+ 1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607,
+ 1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709,
+ 1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823,
+ 1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933,
+ 1949,1951,1973,1979,1987,1993,1997,1999,2003,2011};
+#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0]))
+
+
/* The main factoring loop */
/* Puts factors in factors[] and returns the number found. */
int factor(UV n, UV *factors)
@@ -31,23 +58,41 @@ int factor(UV n, UV *factors)
int nfactors = 0; /* Number of factored in factors result */
int const verbose = _XS_get_verbose();
- UV const tlim_lower = 401; /* Trial division through this prime */
- UV const tlim = 409; /* This means we've checked through here */
+ UV f;
UV tofac_stack[MPU_MAX_FACTORS+1];
UV fac_stack[MPU_MAX_FACTORS+1];
int ntofac = 0; /* Number of items on tofac_stack */
int nfac = 0; /* Number of items on fac_stack */
- if (n < 10000000)
- return trial_factor(n, factors, 0);
-
- /* Trial division for all factors below tlim */
- nfactors = trial_factor(n, factors, tlim_lower);
- n = factors[--nfactors];
+ if (n < 4) {
+ factors[0] = n;
+ return (n == 1) ? 0 : 1;
+ }
+ while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n /= 2; }
+ while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; }
+ while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; }
+ f = 7;
+
+ if (f*f <= n) {
+ UV sp = 3;
+ while (++sp < TRIAL_TO_PRIME) {
+ f = primes_small[sp];
+ if (f*f > n) break;
+ while ( (n%f) == 0 ) {
+ factors[nfactors++] = f;
+ n /= f;
+ }
+ }
+ }
+ if (n < f*f) {
+ if (n != 1)
+ factors[nfactors++] = n;
+ return nfactors;
+ }
/* loop over each remaining factor, until ntofac == 0 */
do {
- while ( (n >= (tlim*tlim)) && (!_XS_is_prime(n)) ) {
+ while ( (n >= f*f) && (!_XS_is_prime(n)) ) {
int split_success = 0;
/* Adjust the number of rounds based on the number size */
UV const br_rounds = ((n>>29) < 100000) ? 1500 : 1500;
@@ -60,8 +105,8 @@ int factor(UV n, UV *factors)
}
/* SQUFOF with these parameters gets 99.9% of everything left */
if (!split_success && n < (UV_MAX>>2)) {
- split_success = racing_squfof_factor(n,tofac_stack+ntofac, sq_rounds)-1;
- if (verbose) printf("rsqufof %d\n", split_success);
+ split_success = squfof_factor(n,tofac_stack+ntofac, sq_rounds)-1;
+ if (verbose) printf("squfof %d\n", split_success);
}
/* At this point we should only have 16+ digit semiprimes. */
/* This p-1 gets about 2/3 of what makes it through the above */
@@ -89,8 +134,7 @@ int factor(UV n, UV *factors)
n = tofac_stack[ntofac]; /* Set n to the other one */
} else {
/* Factor via trial division. Nothing should make it here. */
- UV f = tlim;
- UV m = tlim % 30;
+ UV m = f % 30;
UV limit = isqrt(n);
if (verbose) printf("doing trial on %"UVuf"\n", n);
while (f <= limit) {
@@ -132,6 +176,7 @@ int factor(UV n, UV *factors)
return nfactors;
}
+
int factor_exp(UV n, UV *factors, UV* exponents)
{
int i, j, nfactors;
@@ -159,32 +204,8 @@ int factor_exp(UV n, UV *factors, UV* exponents)
}
-
-static const unsigned short primes_small[] =
- {0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
- 101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,
- 193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,
- 293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,
- 409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509,
- 521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631,
- 641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751,
- 757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877,
- 881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009,
- 1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093,
- 1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201,
- 1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297,
- 1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427,
- 1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499,
- 1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607,
- 1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709,
- 1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823,
- 1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933,
- 1949,1951,1973,1979,1987,1993,1997,1999,2003,2011};
-#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0]))
-
int trial_factor(UV n, UV *factors, UV maxtrial)
{
- UV f, limit, newlimit;
int nfactors = 0;
if (maxtrial == 0) maxtrial = UV_MAX;
@@ -194,56 +215,38 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
factors[0] = n;
return (n == 1) ? 0 : 1;
}
- /* Trial division for 2, 3, 5, 7, and see if we're done */
+ /* Trial division for 2, 3, 5 immediately */
while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n /= 2; }
if (3<=maxtrial) while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; }
if (5<=maxtrial) while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; }
- if (7<=maxtrial) while ( (n % 7) == 0 ) { factors[nfactors++] = 7; n /= 7; }
- f = 11;
- if ( (n < (f*f)) || (maxtrial < f) ) {
- if (n != 1)
- factors[nfactors++] = n;
- return nfactors;
- }
-
- /* Trial division to this number at most. Reduced as we find factors. */
- limit = isqrt(n);
- if (limit > maxtrial)
- limit = maxtrial;
- /* Use the table of small primes to quickly do trial division. */
- {
- UV sp = 5;
- UV slimit = (limit < 2003) ? limit : 2003;
- f = primes_small[sp];
- while (f <= slimit) {
- if ( (n%f) == 0 ) {
- do {
- factors[nfactors++] = f;
- n /= f;
- } while ( (n%f) == 0 );
- newlimit = isqrt(n);
- if (newlimit < slimit) slimit = newlimit;
- if (newlimit < limit) limit = newlimit;
+ if (7*7 <= n) {
+ UV f, sp = 3;
+ while (++sp < NPRIMES_SMALL) {
+ f = primes_small[sp];
+ if (f*f > n || f > maxtrial) break;
+ while ( (n%f) == 0 ) {
+ factors[nfactors++] = f;
+ n /= f;
}
- f = primes_small[++sp];
}
- }
-
- /* Trial division using a mod-30 wheel for larger values */
- if (f <= limit) {
- UV m = f % 30;
- while (f <= limit) {
- if ( (n%f) == 0 ) {
- do {
- factors[nfactors++] = f;
- n /= f;
- } while ( (n%f) == 0 );
- newlimit = isqrt(n);
- if (newlimit < limit) limit = newlimit;
+ /* Trial division using a mod-30 wheel for larger values */
+ if (f*f <= n && f <= maxtrial) {
+ UV newlimit, limit = isqrt(n);
+ if (limit > maxtrial) limit = maxtrial;
+ UV m = f % 30;
+ while (f <= limit) {
+ if ( (n%f) == 0 ) {
+ do {
+ factors[nfactors++] = f;
+ n /= f;
+ } while ( (n%f) == 0 );
+ newlimit = isqrt(n);
+ if (newlimit < limit) limit = newlimit;
+ }
+ f += wheeladvance30[m];
+ m = nextwheel30[m];
}
- f += wheeladvance30[m];
- m = nextwheel30[m];
}
}
/* All done! */
@@ -253,74 +256,6 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
}
-/* Return 0 if n is not a perfect square. Set sqrtn to int(sqrt(n)) if so.
- *
- * Some simple solutions:
- *
- * return ( ((n&2)!= 0) || ((n&7)==5) || ((n&11) == 8) ) ? 0 : 1;
- *
- * or:
- *
- * m = n & 31;
- * if ( m==0 || m==1 || m==4 || m==9 || m==16 || m==17 || m==25 )
- * ...test for perfect square...
- *
- * or:
- *
- * if ( ((0x0202021202030213ULL >> (n & 63)) & 1) &&
- * ((0x0402483012450293ULL >> (n % 63)) & 1) &&
- * ((0x218a019866014613ULL >> ((n % 65) & 63)) & 1) &&
- * ((0x23b >> (n % 11) & 1)) ) {
- *
- *
- * The following Bloom filter cascade works very well indeed. Read all
- * about it here: http://mersenneforum.org/showpost.php?p=110896
- */
-static int is_perfect_square(UV n, UV* sqrtn)
-{
- UV m;
- m = n & 127;
- if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0;
- /* 82% of non-squares rejected here */
-
-#if 0
- /* The big deal with this technique is that you do two total operations,
- * one cheap (the & 127 above), one expensive (the modulo below) on n.
- * The rest of the operations are 32-bit operations. This is a huge win
- * if n is multiprecision.
- * However, in this file we're doing native precision sqrt, so it just
- * isn't expensive enough to justify this second filter set.
- */
- lm = n % UVCONST(63*25*11*17*19*23*31);
- m = lm % 63;
- if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0;
- m = lm % 25;
- if ((m*0x1929fc1b) & (m*0x4c9ea3b2) & 0x51001005) return 0;
- m = 0xd10d829a*(lm%31);
- if (m & (m+0x672a5354) & 0x21025115) return 0;
- m = lm % 23;
- if ((m*0x7bd28629) & (m*0xe7180889) & 0xf8300) return 0;
- m = lm % 19;
- if ((m*0x1b8bead3) & (m*0x4d75a124) & 0x4280082b) return 0;
- m = lm % 17;
- if ((m*0x6736f323) & (m*0x9b1d499) & 0xc0000300) return 0;
- m = lm % 11;
- if ((m*0xabf1a3a7) & (m*0x2612bf93) & 0x45854000) return 0;
- /* 99.92% of non-squares are rejected now */
-#endif
-#if 0
- /* This could save time on some platforms, but not on x86 */
- m = n % 63;
- if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0;
-#endif
- m = isqrt(n);
- if (n != (m*m))
- return 0;
-
- if (sqrtn != 0) *sqrtn = m;
- return 1;
-}
-
static int _divisors_from_factors(UV v, UV npe, UV* fp, UV* fe, UV* res) {
UV p, e, i;
if (npe == 0) return 0;
@@ -401,9 +336,9 @@ UV divisor_sum(UV n, UV k)
UV product = 1;
if (k > 5 || (k > 0 && n >= sigma_overflow[k-1])) return 0;
- if (n == 0) return (k == 0) ? 2 : 1; /* divisors are [0,1] */
- if (n == 1) return 1; /* divisors are [1] */
- nfac = factor(n, factors);
+ if (n <= 1) /* n=0 divisors are [0,1] */
+ return (n == 1) ? 1 : (k == 0) ? 2 : 1; /* n=1 divisors are [1] */
+ nfac = factor(n,factors);
if (k == 0) {
for (i = 0; i < nfac; i++) {
UV e = 1, f = factors[i];
@@ -491,7 +426,8 @@ int holf_factor(UV n, UV *factors, UV rounds)
* so we won't be able to accurately detect it anyway. */
s++; /* s = ceil(sqrt(n*i)) */
m = sqrmod(s, n);
- if (is_perfect_square(m, &f)) {
+ if (is_perfect_square(m)) {
+ f = isqrt(m);
f = gcd_ui( (s>f) ? s-f : f-s, n);
/* This should always succeed, but with overflow concerns.... */
if ((f == 1) || (f == n))
@@ -799,131 +735,7 @@ int pplus1_factor(UV n, UV *factors, UV B1)
}
-/* My modification of Ben Buhrow's modification of Bob Silverman's SQUFOF code.
- */
-
-int squfof_factor(UV n, UV *factors, UV rounds)
-{
- IV qqueue[100+1];
- IV qpoint;
- IV rounds2 = (IV) (rounds/16);
- UV temp;
- IV iq,ll,l2,p,pnext,q,qlast,r,s,t,i;
- IV jter, iter;
- int reloop;
-
- MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in squfof_factor");
-
- /* TODO: What value of n leads to overflow? */
-
- qlast = 1;
- s = isqrt(n);
-
- p = s;
- temp = n - (s*s); /* temp = n - floor(sqrt(n))^2 */
- if (temp == 0) {
- factors[0] = s;
- factors[1] = s;
- return 2;
- }
-
- q = temp; /* q = excess of n over next smaller square */
- ll = 1 + 2*(IV)sqrt((double)(p+p));
- l2 = ll/2;
- qpoint = 0;
-
- /* In the loop below, we need to check if q is a square right before */
- /* the end of the loop. Is there a faster way? The current way is */
- /* EXPENSIVE! (many branches and double prec sqrt) */
-
- for (jter=0; (UV)jter < rounds; jter++) {
- iq = (s + p)/q;
- pnext = iq*q - p;
- if (q <= ll) {
- if ((q & 1) == 0) { qqueue[qpoint] = q/2; if (++qpoint>=100) jter = -1; }
- else if (q <= l2) { qqueue[qpoint] = q; if (++qpoint>=100) jter = -1; }
- if (jter < 0) {
- factors[0] = n; return 1;
- }
- }
-
- t = qlast + iq*(p - pnext);
- qlast = q;
- q = t;
- p = pnext; /* check for square; even iter */
- if (jter & 1) continue; /* jter is odd:omit square test */
- r = isqrt(q); /* r = floor(sqrt(q)) */
- if (q != r*r) continue;
- if (qpoint == 0) break;
- qqueue[qpoint] = 0;
- reloop = 0;
- for (i=0; i<qpoint-1; i+=2) { /* treat queue as list for simplicity*/
- if (r == qqueue[i]) { reloop = 1; break; }
- if (r == qqueue[i+1]) { reloop = 1; break; }
- }
- if (reloop || (r == qqueue[qpoint-1])) continue;
- break;
- } /* end of main loop */
-
- if ((UV)jter >= rounds) {
- factors[0] = n; return 1;
- }
-
- qlast = r;
- p = p + r*((s - p)/r);
- q = (n - (p*p)) / qlast; /* q = (n - p*p)/qlast (div is exact)*/
- for (iter=0; iter<rounds2; iter++) { /* unrolled second main loop */
- iq = (s + p)/q;
- pnext = iq*q - p;
- if (p == pnext) break;
- t = qlast + iq*(p - pnext);
- qlast = q;
- q = t;
- p = pnext;
- iq = (s + p)/q;
- pnext = iq*q - p;
- if (p == pnext) break;
- t = qlast + iq*(p - pnext);
- qlast = q;
- q = t;
- p = pnext;
- iq = (s + p)/q;
- pnext = iq*q - p;
- if (p == pnext) break;
- t = qlast + iq*(p - pnext);
- qlast = q;
- q = t;
- p = pnext;
- iq = (s + p)/q;
- pnext = iq*q - p;
- if (p == pnext) break;
- t = qlast + iq*(p - pnext);
- qlast = q;
- q = t;
- p = pnext;
- }
-
- if (iter >= rounds2) {
- factors[0] = n; return 1;
- }
-
- if ((q & 1) == 0) q/=2; /* q was factor or 2*factor */
-
- if ( (q == 1) || ((UV)q == n) ) {
- factors[0] = n; return 1;
- }
-
- p = n/q;
-
- /* printf(" squfof found %lu = %lu * %lu in %ld/%ld rounds\n", n, p, q, jter, iter); */
-
- factors[0] = p;
- factors[1] = q;
- MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
- return 2;
-}
-
-/* Another version, based on Ben Buhrow's racing SQUFOF. */
+/* SQUFOF, based on Ben Buhrow's racing version. */
typedef struct
{
@@ -985,15 +797,16 @@ static void squfof_unit(UV n, mult_t* mult_save, UV* f)
SQUARE_SEARCH_ITERATION;
/* Even iteration. Check for square: Qn = S*S */
- if (is_perfect_square( Qn, &S ))
+ if (is_perfect_square(Qn))
break;
/* Odd iteration. */
SQUARE_SEARCH_ITERATION;
}
+ S = isqrt(Qn);
/* printf("found square %lu after %lu iterations with mult %d\n", Qn, i, mult_save->mult); */
- /* Reduce to G0 */
+ /* Reduce to G0 */
Ro = P + S*((b0 - P)/S);
t1 = Ro;
So = (n - t1*t1)/S;
@@ -1033,7 +846,7 @@ static const UV squfof_multipliers[] =
3*11, 3, 5*11, 5, 7*11, 7, 11, 1 };
#define NSQUFOF_MULT (sizeof(squfof_multipliers)/sizeof(squfof_multipliers[0]))
-int racing_squfof_factor(UV n, UV *factors, UV rounds)
+int squfof_factor(UV n, UV *factors, UV rounds)
{
const UV big2 = UV_MAX >> 2;
mult_t mult_save[NSQUFOF_MULT];
@@ -1042,7 +855,7 @@ int racing_squfof_factor(UV n, UV *factors, UV rounds)
UV rounds_done = 0;
/* Caller should have handled these trivial cases */
- MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in racing_squfof_factor");
+ MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in squfof_factor");
/* Too big */
if (n > big2) {
diff --git a/factor.h b/factor.h
index 7ee466a..7c4682c 100644
--- a/factor.h
+++ b/factor.h
@@ -18,7 +18,6 @@ extern int prho_factor(UV n, UV *factors, UV maxrounds);
extern int pminus1_factor(UV n, UV *factors, UV B1, UV B2);
extern int pplus1_factor(UV n, UV *factors, UV B);
extern int squfof_factor(UV n, UV *factors, UV rounds);
-extern int racing_squfof_factor(UV n, UV *factors, UV rounds);
extern UV* _divisor_list(UV n, UV *num_divisors);
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index c8b410c..1018b31 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -136,7 +136,6 @@ BEGIN {
*fermat_factor = \&Math::Prime::Util::PP::fermat_factor;
*holf_factor = \&Math::Prime::Util::PP::holf_factor;
*squfof_factor = \&Math::Prime::Util::PP::squfof_factor;
- *rsqufof_factor = \&Math::Prime::Util::PP::squfof_factor;
*pbrent_factor = \&Math::Prime::Util::PP::pbrent_factor;
*prho_factor = \&Math::Prime::Util::PP::prho_factor;
*pminus1_factor = \&Math::Prime::Util::PP::pminus1_factor;
@@ -3863,10 +3862,7 @@ same advantages and disadvantages as Fermat's method.
=head2 squfof_factor
-=head2 rsqufof_factor
-
my @factors = squfof_factor($n);
- my @factors = rsqufof_factor($n); # racing multiplier version
Produces factors, not necessarily prime, of the positive number input. An
optional number of rounds can be given as a second parameter. It is possible
@@ -4799,9 +4795,7 @@ The current implementation does not use these ideas. Future versions might.
The SQUFOF implementation being used is a slight modification to the public
domain racing version written by Ben Buhrow. Enhancements with ideas from
Ben's later code as well as Jason Papadopoulos's public domain implementations
-are planned for a later version. The old SQUFOF implementation, still included
-in the code, is my modifications to Ben Buhrow's modifications to Bob
-Silverman's code.
+are planned for a later version.
The LMO implementation is based on the 2003 preprint from Christian Bau,
as well as the 2006 paper from Tomás Oliveira e Silva. I also want to
diff --git a/primality.c b/primality.c
index 33880a5..5d0fdbf 100644
--- a/primality.c
+++ b/primality.c
@@ -8,6 +8,7 @@
#include "mulmod.h"
#define FUNC_isqrt 1
#define FUNC_gcd_ui 1
+#define FUNC_is_perfect_square
#include "util.h"
/* Primality related functions, including Montgomery math */
@@ -148,19 +149,6 @@ static int monty_mr64(const uint64_t n, const UV* bases, int cnt)
-/* Helper functions */
-static int is_perfect_square(UV n, UV* sqrtn)
-{
- UV m;
- m = n & 127;
- if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0;
- m = isqrt(n);
- if (n != (m*m))
- return 0;
- if (sqrtn != 0)
- *sqrtn = m;
- return 1;
-}
static int jacobi_iu(IV in, UV m) {
int j = 1;
UV n = (in < 0) ? -in : in;
@@ -286,7 +274,7 @@ int _XS_BPSW(UV const n)
return 0;
if (jacobi_iu(D, n) == -1)
break;
- if (P == (3+20) && is_perfect_square(n, 0)) return 0;
+ if (P == (3+20) && is_perfect_square(n)) return 0;
P++;
if (P > 65535)
croak("lucas_extrastrong_params: P exceeded 65535");
@@ -423,7 +411,7 @@ int _XS_is_lucas_pseudoprime(UV n, int strength)
if (gcd_ui(Du, n) > 1 && gcd_ui(Du, n) != n) return 0;
if (jacobi_iu(D, n) == -1)
break;
- if (Du == 21 && is_perfect_square(n, 0)) return 0;
+ if (Du == 21 && is_perfect_square(n)) return 0;
Du += 2;
sign = -sign;
}
@@ -437,7 +425,7 @@ int _XS_is_lucas_pseudoprime(UV n, int strength)
if (gcd_ui(D, n) > 1 && gcd_ui(D, n) != n) return 0;
if (jacobi_iu(D, n) == -1)
break;
- if (P == 21 && is_perfect_square(n, 0)) return 0;
+ if (P == 21 && is_perfect_square(n)) return 0;
P++;
}
}
@@ -595,7 +583,7 @@ int _XS_is_almost_extra_strong_lucas_pseudoprime(UV n, UV increment)
return 0;
if (jacobi_iu(D, n) == -1)
break;
- if (P == (3+20*increment) && is_perfect_square(n, 0)) return 0;
+ if (P == (3+20*increment) && is_perfect_square(n)) return 0;
P += increment;
if (P > 65535)
croak("lucas_extrastrong_params: P exceeded 65535");
@@ -680,7 +668,7 @@ int _XS_is_frobenius_underwood_pseudoprime(UV n)
if (n < 7) return (n == 2 || n == 3 || n == 5);
if ((n % 2) == 0 || n == UV_MAX) return 0;
- if (is_perfect_square(n,0)) return 0;
+ if (is_perfect_square(n)) return 0;
x = 0;
t = -1;
diff --git a/t/50-factoring.t b/t/50-factoring.t
index a3af134..49cfbce 100644
--- a/t/50-factoring.t
+++ b/t/50-factoring.t
@@ -113,7 +113,7 @@ plan tests => (3 * scalar @testn)
+ 2*scalar(keys %prime_factors)
+ 4*scalar(keys %all_factors)
+ 2*scalar(keys %factor_exponents)
- + 10*9
+ + 10*8 # 10 extra factoring tests * 8 algorithms
+ 8
+ 1;
@@ -159,7 +159,6 @@ extra_factor_test("trial_factor", sub {Math::Prime::Util::trial_factor(shift)})
extra_factor_test("fermat_factor", sub {Math::Prime::Util::fermat_factor(shift)});
extra_factor_test("holf_factor", sub {Math::Prime::Util::holf_factor(shift)});
extra_factor_test("squfof_factor", sub {Math::Prime::Util::squfof_factor(shift)});
-extra_factor_test("rsqufof_factor", sub {Math::Prime::Util::rsqufof_factor(shift)});
extra_factor_test("pbrent_factor", sub {Math::Prime::Util::pbrent_factor(shift)});
extra_factor_test("prho_factor", sub {Math::Prime::Util::prho_factor(shift)});
extra_factor_test("pminus1_factor",sub {Math::Prime::Util::pminus1_factor(shift)});
diff --git a/util.h b/util.h
index 0c40e59..ff4a99d 100644
--- a/util.h
+++ b/util.h
@@ -46,7 +46,7 @@ extern UV znorder(UV a, UV n);
#define MPU_PROB_PRIME_BEST UVCONST(100000000)
#endif
-#ifdef FUNC_isqrt
+#if defined(FUNC_isqrt) || defined(FUNC_is_perfect_square)
static UV isqrt(UV n) {
UV root;
#if BITS_PER_WORD == 32
@@ -101,4 +101,22 @@ static UV lcm_ui(UV x, UV y) {
}
#endif
+#ifdef FUNC_is_perfect_square
+/* See: http://mersenneforum.org/showpost.php?p=110896 */
+static int is_perfect_square(UV n)
+{
+ UV m;
+ m = n & 127;
+ if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0;
+ /* If your sqrt is particularly slow, this cuts out another 80%:
+ m = n % 63; if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0;
+ and this cuts out some more:
+ m = n % 25; if ((m*0x1929fc1b) & (m*0x4c9ea3b2) & 0x51001005) return 0;
+ */
+ m = (UV) ( sqrt((double) n) + 0.5 );
+ return m*m == n;
+}
+#endif
+
+
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list