[libmath-prime-util-perl] 30/72: Speedups for divisor_sum
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:49:38 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.32
in repository libmath-prime-util-perl.
commit a7f63a02366f3005abd8e20d12ed51b873e2a1cd
Author: Dana Jacobsen <dana at acm.org>
Date: Mon Sep 16 00:16:13 2013 -0700
Speedups for divisor_sum
---
Changes | 4 +-
aks.c | 1 -
factor.c | 68 +++++++++------
lib/Math/Prime/Util.pm | 209 +++++++++++++++++++++++++---------------------
lib/Math/Prime/Util/PP.pm | 10 +--
sieve.c | 2 +-
util.c | 10 +--
7 files changed, 166 insertions(+), 138 deletions(-)
diff --git a/Changes b/Changes
index d41e4f0..316fda4 100644
--- a/Changes
+++ b/Changes
@@ -20,7 +20,9 @@ Revision history for Perl module Math::Prime::Util
- divisor_sum can take an integer 'k' in the second argument to compute
sigma_k. This is much faster than using subs, especially when the
- result can be computed in XS using native precision.
+ result can be computed in XS using native precision. For integer
+ second arguments, the result will automatically be a bigint if needed.
+ It is also much faster for larger inputs.
- Incorporate Montgomery reduction for primality testing, thanks to
Wojciech Izykowski. This is a 1.3 to 1.5x speedup for is_prob_prime,
diff --git a/aks.c b/aks.c
index 119d55b..bc4a369 100644
--- a/aks.c
+++ b/aks.c
@@ -27,7 +27,6 @@
#include "ptypes.h"
#include "aks.h"
#include "util.h"
-#include "sieve.h"
#include "cache.h"
#include "mulmod.h"
diff --git a/factor.c b/factor.c
index 94e919c..4c08193 100644
--- a/factor.c
+++ b/factor.c
@@ -185,15 +185,17 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
/* Use the table of small primes to quickly do trial division. */
{
UV sp = 5;
+ UV slimit = (limit < 2003) ? limit : 2003;
f = primes_small[sp];
- while (f <= limit && f <= 2003) {
+ while (f <= slimit) {
if ( (n%f) == 0 ) {
do {
factors[nfactors++] = f;
n /= f;
} while ( (n%f) == 0 );
newlimit = isqrt(n);
- if (newlimit < limit) limit = newlimit;
+ if (newlimit < slimit) slimit = newlimit;
+ if (newlimit < limit) limit = newlimit;
}
f = primes_small[++sp];
}
@@ -291,43 +293,55 @@ static int is_perfect_square(UV n, UV* sqrtn)
}
+/*
+ * The usual method, on OEIS for instance, is:
+ * (p^(k*(e+1))-1) / (p^k-1)
+ * but that overflows quicky. Instead we rearrange as:
+ * 1 + p^k + p^k^2 + ... p^k^e
+ */
UV _XS_divisor_sum(UV n, UV k)
{
UV factors[MPU_MAX_FACTORS+1];
- int nfac, i;
+ int nfac, i, j;
UV product = 1;
if (n <= 1) return n;
nfac = factor(n, factors);
- for (i = 0; i < nfac; i++) {
- UV f = factors[i];
- UV e = 1;
- while (i+1 < nfac && f == factors[i+1])
- { e++; i++; }
- if (k == 0) {
+ if (k == 0) {
+ for (i = 0; i < nfac; i++) {
+ UV e = 1, f = factors[i];
+ while (i+1 < nfac && f == factors[i+1]) { e++; i++; }
product *= (e+1);
- } else if (k == 1 && e == 1) {
- product *= f+1;
- } else if (k == 1) {
- UV fmult = f * f;
- while (e-- >= 2)
- fmult *= f;
- product *= (fmult - 1) / (f - 1);
- } else { /* Overflow is a concern */
- UV j, pk = f * f;
- for (j = 2; j < k; j++) pk *= f;
- if (e == 1) {
- product *= pk+1;
- } else {
- /* Less overflow than (pk^(e+1)-1) / (pk-1) */
- UV fmult = 1 + pk + pk*pk;
- UV pke = pk * pk;
- for (j = 2; j < e; j++) {
+ }
+ } else if (k == 1) {
+ for (i = 0; i < nfac; i++) {
+ UV e = 1, f = factors[i];
+ UV fmult = 1 + f;
+ while (i+1 < nfac && f == factors[i+1]) { e++; i++; }
+ if (e > 1) {
+ UV pke = f;
+ for (j = 1; j < e; j++) {
+ pke *= f;
+ fmult += pke;
+ }
+ }
+ product *= fmult;
+ }
+ } else {
+ for (i = 0; i < nfac; i++) {
+ UV e = 1, f = factors[i];
+ UV fmult, pk = f;
+ for (j = 1; j < k; j++) pk *= f;
+ while (i+1 < nfac && f == factors[i+1]) { e++; i++; }
+ fmult = 1 + pk;
+ if (e > 1) {
+ UV pke = pk;
+ for (j = 1; j < e; j++) {
pke *= pk;
fmult += pke;
}
- product *= fmult;
}
+ product *= fmult;
}
}
return product;
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index 533ba76..0411681 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -869,7 +869,7 @@ sub primes {
my $l = ($_Config{'maxbits'} > 32 && $bits > 79) ? 63 : 31;
$l = $bits-2 if $bits-2 < $l;
my $arange = (1 << $l) - 1; # 2^$l-1
- my $brange = Math::BigInt->new(2)->bpow($bits-$l-2)->bsub(1);
+ my $brange = Math::BigInt->new(2)->bpow($bits-$l-2)->bdec();
my $b = 2 * $irandf->($brange) + 1;
# Precalculate some modulii so we can do trial division on native int
# 9699690 = 2*3*5*7*11*13*17*19, so later operations can be native ints
@@ -1009,7 +1009,7 @@ sub primes {
# R is a random number between $I+1 and 2*$I
my $R = $I + 1 + $irandf->( $I - 1 );
#my $n = 2 * $R * $q + 1;
- my $n = Math::BigInt->new(2)->bmul($R)->bmul($q)->badd(1);
+ my $n = Math::BigInt->new(2)->bmul($R)->bmul($q)->binc();
# We constructed a promising looking $n. Now test it.
print "." if $verbose > 2;
if ($_HAVE_GMP) {
@@ -1082,7 +1082,7 @@ sub primes {
my $qpp = random_nbit_prime($lpp);
$qp = Math::BigInt->new("$qp") unless ref($qp) eq 'Math::BigInt';
$qpp = Math::BigInt->new("$qpp") unless ref($qpp) eq 'Math::BigInt';
- my ($il, $rem) = Math::BigInt->new(2)->bpow($l-1)->bsub(1)->bdiv(2*$qpp);
+ my ($il, $rem) = Math::BigInt->new(2)->bpow($l-1)->bdec()->bdiv(2*$qpp);
$il++ if $rem > 0;
$il = $il->as_int();
my $iu = Math::BigInt->new(2)->bpow($l)->bsub(2)->bdiv(2*$qpp)->as_int();
@@ -1090,11 +1090,11 @@ sub primes {
for (my $i = $istart; $i <= $iu; $i++) { # Search for q
my $q = 2 * $i * $qpp + 1;
next unless is_prob_prime($q);
- my $pp = $qp->copy->bmodpow($q-2, $q)->bmul(2)->bmul($qp)->bsub(1);
+ my $pp = $qp->copy->bmodpow($q-2, $q)->bmul(2)->bmul($qp)->bdec();
my ($jl, $rem) = Math::BigInt->new(2)->bpow($t-1)->bsub($pp)->bdiv(2*$q*$qp);
$jl++ if $rem > 0;
$jl = $jl->as_int();
- my $ju = Math::BigInt->new(2)->bpow($t)->bsub(1)->bsub($pp)->bdiv(2*$q*$qp)->as_int();
+ my $ju = Math::BigInt->new(2)->bpow($t)->bdec()->bsub($pp)->bdiv(2*$q*$qp)->as_int();
my $jstart = $jl + $irandf->($ju - $jl);
for (my $j = $jstart; $j <= $ju; $j++) { # Search for p
my $p = $pp + 2 * $j * $q * $qp;
@@ -1352,28 +1352,21 @@ sub euler_phi {
return $n if $n <= 1;
my @factors = factor($n);
+ my $totient = $n - $n + 1;
+ my $lastf = 0;
+
if (ref($n) ne 'Math::BigInt') {
- my $totient = 1;
- my $lastf = 0;
foreach my $f (@factors) {
if ($f == $lastf) { $totient *= $f; }
else { $totient *= $f-1; $lastf = $f; }
}
- return $totient;
- }
-
- my $totient = $n->copy->bone;
- my $lastf = 0;
- foreach my $factor (@factors) {
- # This screwball line is here to solve some issues with the GMP backend,
- # which has a weird bug. Results of the multiply can turn negative (!)
- # if we don't do this. Perhaps related to RT 71548?
- # perl -le 'use Math::BigInt lib=>'GMP'; my $a = 2931542417; my $n = Math::BigInt->new("49754396241690624"); my $x = $n*$a; print $x;'
- # perl -le 'use Math::BigInt lib=>'GMP'; my $a = Math::BigInt->bone; $a *= 2931542417; $a *= 49754396241690624; print $a;'
- # TODO: more work reproducing this
- my $f = $n->copy->bzero->badd("$factor");
- if ($f == $lastf) { $totient->bmul($f); }
- else { $totient->bmul($f->copy->bsub(1)); $lastf = $f; }
+ } else {
+ my $zero = $n->copy->bzero;
+ foreach my $factor (@factors) {
+ my $f = $zero->copy->badd("$factor"); # Math::BigInt::GMP RT 71548
+ if ($f == $lastf) { $totient->bmul($f); }
+ else { $totient->bmul($f->copy->bdec()); $lastf = $f; }
+ }
}
return $totient;
}
@@ -1404,22 +1397,27 @@ sub jordan_totient {
my $zero = $n->copy->bzero;
foreach my $factor (@factors) {
my $fmult = $zero->copy->badd("$factor")->bpow($k);
- $totient->bmul($fmult->copy->bsub(1));
+ $totient->bmul($fmult->copy->bdec());
$totient->bmul($fmult) for (2 .. $factor_mult{$factor});
}
}
return $totient;
}
-# Mathematica and Pari both have functions like this.
+my @_ds_overflow = # We'll use BigInt math if the input is larger than this.
+ (~0 > 4294967295)
+ ? (0, 3000000000000000000, 3000000000, 2487240, 64260, 7026)
+ : (0, 845404560, 52560, 1548, 252, 84);
sub divisor_sum {
my($n, $k) = @_;
return (0,1)[$n] if defined $n && $n <= 1;
_validate_num($n) || _validate_positive_integer($n);
- $k = 1 unless defined $k;
+ # With no argument, call the XS routine for k=1 immediately if possible.
+ return _XS_divisor_sum($n, 1)
+ if !defined $k && $n <= $_XS_MAXVAL && $n < $_ds_overflow[1];
- if (ref($k) eq 'CODE') {
+ if (defined $k && ref($k) eq 'CODE') {
my $sum = $k->(1);
return $sum if $n == 1;
foreach my $f (all_factors($n), $n ) {
@@ -1429,34 +1427,59 @@ sub divisor_sum {
}
croak "Second argument must be a code ref or number"
- unless _validate_num($k) || _validate_positive_integer($k);
-
- if ($n <= $_XS_MAXVAL) {
- return _XS_divisor_sum($n, $k) if $k <= 1;
- # 2 overflows 64-bit at 3,000,000,000 ish
- # 3 overflows 64-bit at 2487240
- # 4 overflows 64-bit at 64260
- # If we think we can handle overflow, we could do a few more
+ unless !defined $k || _validate_num($k) || _validate_positive_integer($k);
+ $k = 1 if !defined $k;
+
+ my $will_overflow = ($k == 0) ? 0
+ : ($k > 5) ? 1
+ : $n >= $_ds_overflow[$k];
+
+ return _XS_divisor_sum($n, $k) if $n <= $_XS_MAXVAL && !$will_overflow;
+
+ if ($will_overflow) {
+ if (!defined $Math::BigInt::VERSION) {
+ eval { require Math::BigInt; Math::BigInt->import(try=>'GMP,Pari'); 1; }
+ or do { croak "Cannot load Math::BigInt"; };
+ }
}
- my $bone = (ref($_[0]) eq 'Math::BigInt') ? $_[0]->copy->bone : 1;
- my $product = $bone;
+ # The standard way is:
+ # my $pk = $f ** $k; $product *= ($pk ** ($e+1) - 1) / ($pk - 1);
+ # But we get less overflow using:
+ # my $pk = $f ** $k; $product *= $pk**E for E in 0 .. e
+ # Also separate BigInt and do fiddly bits for better performance.
+
+ my $product = 1;
my @factors = ($n <= $_XS_MAXVAL) ? _XS_factor($n) : factor($n);
- while (@factors) {
- my $f = shift @factors;
- my $e = 1;
- while (@factors && $factors[0] == $f) {
- $e++;
- shift @factors;
+
+ if (!$will_overflow) {
+ while (@factors) {
+ my ($e, $f) = (1, shift @factors);
+ while (@factors && $factors[0] == $f) { $e++; shift @factors; }
+ if ($k == 0) {
+ $product *= ($e+1);
+ } else {
+ my $pk = $f ** $k;
+ my $fmult = $pk + 1;
+ foreach my $E (2 .. $e) { $fmult += $pk**$E }
+ $product *= $fmult;
+ }
}
- if ($k == 0) { $product *= ($e+1); }
- elsif ($k == 1) {
- if ($e == 1) { $product *= $f+1; }
- else { $product *= ( ($bone*$f)**($e+1) - 1) / ($f - 1); }
- } else {
- # TODO: do the long way around to get less overflow, see factor.c
- my $pk = ($bone * $f) ** $k;
- $product *= ($e == 1) ? $pk+1 : ($pk ** ($e+1) - 1) / ($pk - 1);
+ } else {
+ $product = Math::BigInt->bone;
+ my $bik = Math::BigInt->new("$k");
+ while (@factors) {
+ my ($e, $f) = (1, shift @factors);
+ while (@factors && $factors[0] == $f) { $e++; shift @factors; }
+ my $pk = Math::BigInt->new("$f")->bpow($bik);
+ if ($e == 1) { $pk->binc(); $product->bmul($pk); }
+ elsif ($e == 2) { $pk->badd($pk*$pk)->binc(); $product->bmul($pk); }
+ else {
+ my $fmult = $pk;
+ foreach my $E (2 .. $e) { $fmult += $pk->copy->bpow($E) }
+ $fmult->binc();
+ $product *= $fmult;
+ }
}
}
return $product;
@@ -1468,11 +1491,20 @@ sub _generic_forprimes (&$;$) { ## no critic qw(ProhibitSubroutinePrototypes)
if (!defined $end) { $end = $beg; $beg = 2; }
_validate_num($beg) || _validate_positive_integer($beg);
_validate_num($end) || _validate_positive_integer($end);
- my $p = ($beg <= 2) ? 2 : next_prime($beg-1);
- while ($p <= $end) {
- local *_ = \$p;
- $sub->();
- $p = next_prime($p);
+ # It's possible we're here just because the arguments were bigints < 2^64
+ # TODO: make a function to convert native size bigints to UVs, and let the
+ # XS functions call that, so we don't do these loop-de-loops.
+ if (!ref($beg) && !ref($end) && $beg <= $_XS_MAXVAL && $end <= $_XS_MAXVAL) {
+ return forprimes( \&$sub, $beg, $end);
+ }
+ $beg = 2 if $beg < 2;
+ {
+ my $pp;
+ local *_ = \$pp;
+ for (my $p = next_prime($beg-1); $p <= $end; $p = next_prime($p)) {
+ $pp = $p;
+ $sub->();
+ }
}
}
@@ -2477,9 +2509,10 @@ Version 0.31
say "lamba(49) = ", log(exp_mangoldt(49));
# divisor sum
- $sigma = divisor_sum( $n );
- $sigma0 = divisor_sum( $n, 1 );
- $sigma2 = divisor_sum( $n, sub { $_[0]*$_[0] } );
+ $sigma = divisor_sum( $n ); # sum of divisors
+ $sigma0 = divisor_sum( $n, 0 ); # count of divisors
+ $sigmak = divisor_sum( $n, $k );
+ $sigmaf = divisor_sum( $n, sub { log($_[0]) } ); # arbitrary func
# primorial n#, primorial p(n)#, and lcm
say "The product of primes below 47 is ", primorial(47);
@@ -3450,41 +3483,28 @@ yield similar results, albeit slower and using more memory.
say "Sum of divisors of $n:", divisor_sum( $n );
-This function takes a positive integer as input and returns the sum of all
-the divisors of the input, including 1 and itself. This is known as the
-sigma function (see Hardy and Wright section 16.7, or OEIS A000203).
-
-If a second argument is given that is numeric, it is used as the sigma
-parameter. The result will then be the sum of each divisor raised to
-this power. Hence C<0> will count the divisors, C<1> will sum them,
-C<2> will sum the squares, and so on.
-
-The more general form takes a code reference as a second parameter, which
-is applied to each divisor before the summation. This allows computation
-of numerous functions such as OEIS A000005 [d(n), sigma_0(n), tau(n)]:
-
- divisor_sum( $n, sub { 1 } );
-
-OEIS A001157 [sigma_2(n)]:
-
- divisor_sum( $n, sub { $_[0]*$_[0] } )
-
-the general sigma_k (OEIS A00005, A000203, A001157, A001158, etc.):
+This function takes a positive integer as input and returns the sum of the
+k-th powers of the divisors of the input, including 1 and itself. If the
+second argument (C<k>) is omitted it is assumed to be 1. This is known as
+the sigma function (see Hardy and Wright section 16.7, or OEIS A000203).
+The API is identical to Pari/GP's C<sigma> function.
- divisor_sum( $n, sub { $_[0] ** $k } );
+The second argument can be a code reference, which is called for each divisor
+and the results are summed. This allows computation of other functions,
+but will be less efficient than using the numeric second argument.
-the 5th Jordan totient (OEIS A059378):
+An example of the 5th Jordan totient (OEIS A059378):
divisor_sum( $n, sub { my $d=shift; $d**5 * moebius($n/$d); } );
-though in the last case we have a function L</jordan_totient> to compute
-it more efficiently.
+though we have a function L</jordan_totient> which is more efficient.
This function is useful for calculating things like aliquot sums, abundant
numbers, perfect numbers, etc.
-The summation is done as a bigint if the input was a bigint object. You may
-need to ensure the result of the subroutine does not overflow a native int.
+For numeric second arguments (sigma computations), the result will be a bigint
+if necessary. For the code reference case, the user must take care to return
+bigints if overflow will be a concern.
=head2 primorial
@@ -4405,11 +4425,10 @@ With it installed, it is about 2x slower than Math::Pari.
Similar to MPU's L<moebius>. Comparisons are similar to C<eulerphi>.
-=item C<sumdiv>
+=item C<sigma>
-Similar to MPU's L<divisor_sum>. The standard sum (sigma_1) is
-very fast in MPU. Giving it a sub makes it much slower, and for numbers
-with very many factors, Pari is I<much> faster.
+Similar to MPU's L<divisor_sum>. MPU is ~10x faster for native integers
+and about 2x slower for bigints.
=item C<eint1>
@@ -4425,13 +4444,13 @@ and complex inputs).
Overall, L<Math::Pari> supports a huge variety of functionality and has a
sophisticated and mature code base behind it (noting that the default version
of Pari used is about 10 years old now).
-For native integers sometimes
-the functions can be slower, but bigints are often superior and it rarely
-has any performance surprises. Some of the unique features MPU offers include
-super fast prime counts, nth_prime, ECPP primality proofs with certificates,
-approximations and limits for both, random primes, fast Mertens calculations,
-Chebyshev theta and psi functions, and the logarithmic integral and Riemann R
-functions. All with fairly minimal installation requirements.
+For native integers often using Math::Pari will be slower, but bigints are
+often superior and it rarely has any performance surprises. Some of the
+unique features MPU offers include super fast prime counts, nth_prime,
+ECPP primality proofs with certificates, approximations and limits for both,
+random primes, fast Mertens calculations, Chebyshev theta and psi functions,
+and the logarithmic integral and Riemann R functions. All with fairly
+minimal installation requirements.
=head1 PERFORMANCE
diff --git a/lib/Math/Prime/Util/PP.pm b/lib/Math/Prime/Util/PP.pm
index 551d5a7..e4e1ba5 100644
--- a/lib/Math/Prime/Util/PP.pm
+++ b/lib/Math/Prime/Util/PP.pm
@@ -780,7 +780,7 @@ sub is_pseudoprime {
my $x = (ref($n) eq 'Math::BigInt')
? $n->copy->bzero->badd($base)->bmodpow($n-1,$n)
: _native_powmod($base, $n-1, $n);
- return ($x == 1);
+ return ($x == 1) ? 1 : 0;
}
sub miller_rabin {
@@ -800,7 +800,7 @@ sub miller_rabin {
if ( ref($n) eq 'Math::BigInt' ) {
my $s = 0;
- my $nminus1 = $n->copy->bsub(1);
+ my $nminus1 = $n->copy->bdec();
my $d = $nminus1->copy;
while ($d->is_even) {
$s++;
@@ -1143,7 +1143,7 @@ sub is_almost_extra_strong_lucas_pseudoprime {
my $ZERO = $n->copy->bzero;
my $V = $ZERO + $P; # V_{k}
my $W = $ZERO + $P*$P-2; # V_{k+1}
- my $kstr = substr($n->copy->badd(1)->as_bin, 2);
+ my $kstr = substr($n->copy->binc()->as_bin, 2);
$kstr =~ s/(0*)$//;
my $s = length($1);
my $bpos = 0;
@@ -1330,7 +1330,7 @@ sub is_aks_prime {
return 1 if $r >= $n;
# Since r is a prime, phi(r) = r-1
- my $rlimit = int( Math::BigFloat->new("$r")->bsub(1)
+ my $rlimit = int( Math::BigFloat->new("$r")->bdec()
->bsqrt->bmul($log2n)->bfloor->bstr);
$_poly_bignum = 1;
@@ -1711,7 +1711,7 @@ sub pminus1_factor {
my $one = $n->copy->bone;
my ($j, $q, $saveq) = (32, 2, 2);
my $t = $one->copy;
- my $a = $one->copy->badd(1);
+ my $a = $one->copy->binc();
my $savea = $a->copy;
my $f = 1;
my($pc_beg, $pc_end, @bprimes);
diff --git a/sieve.c b/sieve.c
index 5e94d5d..7a8de05 100644
--- a/sieve.c
+++ b/sieve.c
@@ -372,7 +372,7 @@ int next_segment_primes(void* vctx, UV* base, UV* low, UV* high)
ctx->lod += range_d;
ctx->low = *high + 2;
-
+
return 1;
}
diff --git a/util.c b/util.c
index b4436af..17d927b 100644
--- a/util.c
+++ b/util.c
@@ -152,13 +152,7 @@ int _XS_is_prime(UV n)
const unsigned char* sieve;
int isprime;
- if (n <= 10) {
- switch (n) {
- case 2: case 3: case 5: case 7: return 2; break;
- default: break;
- }
- return 0;
- }
+ if (n <= 10) return (n == 2 || n == 3 || n == 5 || n == 7) ? 2 : 0;
d = n/30;
m = n - d*30;
mtab = masktab30[m]; /* Bitmask in mod30 wheel */
@@ -806,7 +800,7 @@ signed char* _moebius_range(UV lo, UV hi)
croak("Could not get memory for %"UVuf" moebius results\n", hi-lo+1);
A = (unsigned char*) mu;
if (sqrtn*sqrtn != hi) sqrtn++; /* ceil sqrtn */
-
+
logp = 1; nextlog = 3; /* 2+1 */
START_DO_FOR_EACH_PRIME(2, sqrtn) {
UV p2 = p*p;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list