[libmath-prime-util-perl] 14/17: Performance enhancements
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:47:14 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.21
in repository libmath-prime-util-perl.
commit 9285897e07f4c96fc6cae969e5f7a7acc56f4e28
Author: Dana Jacobsen <dana at acm.org>
Date: Fri Feb 22 16:50:41 2013 -0800
Performance enhancements
---
Changes | 16 +++++++---
MANIFEST | 1 +
XS.xs | 6 ++--
factor.c | 81 ++++++++++++++++++++++++++++++++++++------------
lib/Math/Prime/Util.pm | 83 +++++++++++++++++++++++---------------------------
t/19-moebius.t | 61 ++++++++++++++++++++++++++++++++-----
util.c | 2 +-
xt/moebius-mertens.pl | 35 +++++++++++++++++++++
8 files changed, 205 insertions(+), 80 deletions(-)
diff --git a/Changes b/Changes
index 714a7af..a2e6c17 100644
--- a/Changes
+++ b/Changes
@@ -4,17 +4,25 @@ Revision history for Perl extension Math::Prime::Util.
- Switch to using Bytes::Random::Secure for random primes.
- - primes.pl: Add circular and Panaitopol primes, speedup Pillai primes.
-
- Spelling fixes in documentation.
- - Fast return of Euler's totient and Möbius function over a range.
+ - primes.pl: Add circular and Panaitopol primes.
+
+ - euler_phi and moebius now will compute over a range.
- Add mertens function: 1000+ times faster than summing moebius($_).
- Add exp_mangoldt function: exponential of von Mangoldt's function.
- - divisor sum is 2x faster. Also default to sigma if no sub given.
+ - divisor_sum defaults to sigma if no sub is given (i.e. it sums).
+
+ - Performance:
+ - Speedup factoring small numbers. With -nobigint factoring from
+ 1 to 10M, it's 1.2x faster. 1.5x faster than Math::Factor::XS.
+ - Totient and Möbius over a range are much faster than separate calls.
+ - divisor_sum is 2x faster.
+ - primes.pl is much faster with Pillai primes.
+ - Reduce overhead in euler_phi -- about 2x faster for individual calls.
0.20 3 February 2012
diff --git a/MANIFEST b/MANIFEST
index 6945e23..6457a3a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -87,4 +87,5 @@ t/81-bignum.t
t/90-release-perlcritic.t
t/91-release-pod-syntax.t
t/92-release-pod-coverage.t
+xt/moebius-mertens.pl
.travis.yml
diff --git a/XS.xs b/XS.xs
index e831b0a..d79b4f7 100644
--- a/XS.xs
+++ b/XS.xs
@@ -219,7 +219,7 @@ _XS_factor(IN UV n)
PPCODE:
if (n < 4) { /* If n is 0-3, we're done. */
XPUSHs(sv_2mortal(newSVuv( n )));
- } else if (n < 2000000) { /* For small n, just trial division */
+ } else if (n < 10000000) { /* For small n, just trial division */
int i;
UV facs[32]; /* maximum number of factors is log2n */
UV nfacs = trial_factor(n, facs, 0);
@@ -228,8 +228,8 @@ _XS_factor(IN UV n)
}
} else {
int const verbose = _XS_get_verbose();
- UV const tlim_lower = 211; /* Trial division through this prime */
- UV const tlim = 223; /* This means we've checked through here */
+ UV const tlim_lower = 401; /* Trial division through this prime */
+ UV const tlim = 409; /* This means we've checked through here */
UV tofac_stack[MPU_MAX_FACTORS+1];
UV factored_stack[MPU_MAX_FACTORS+1];
int ntofac = 0;
diff --git a/factor.c b/factor.c
index 5396f78..96c7e64 100644
--- a/factor.c
+++ b/factor.c
@@ -20,50 +20,91 @@
* match the native integer type used inside our Perl, so just use those.
*/
+static const unsigned short primes_small[] =
+ {0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
+ 101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,
+ 193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,
+ 293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,
+ 409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509,
+ 521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631,
+ 641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751,
+ 757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877,
+ 881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009,
+ 1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093,
+ 1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201,
+ 1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297,
+ 1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427,
+ 1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499,
+ 1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607,
+ 1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709,
+ 1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823,
+ 1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933,
+ 1949,1951,1973,1979,1987,1993,1997,1999,2003,2011};
+#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0]))
int trial_factor(UV n, UV *factors, UV maxtrial)
{
- UV f, m, limit;
+ UV f, limit, newlimit;
int nfactors = 0;
if (maxtrial == 0) maxtrial = UV_MAX;
- if ( (n < 2) || (maxtrial < 2) ) {
+ /* Cover the cases 0/1/2/3 now */
+ if ( (n < 4) || (maxtrial < 4) ) {
factors[0] = n;
return 1;
}
+ /* Trial division for 2, 3, 5, 7, and see if we're done */
while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n /= 2; }
-
- if (3 <= maxtrial) while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; }
- if (5 <= maxtrial) while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; }
- if (7 <= maxtrial) while ( (n % 7) == 0 ) { factors[nfactors++] = 7; n /= 7; }
+ if (3<=maxtrial) while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; }
+ if (5<=maxtrial) while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; }
+ if (7<=maxtrial) while ( (n % 7) == 0 ) { factors[nfactors++] = 7; n /= 7; }
f = 11;
- m = 11;
-
if ( (n < (f*f)) || (maxtrial < f) ) {
if (n != 1)
factors[nfactors++] = n;
return nfactors;
}
+ /* Trial division to this number at most. Reduced as we find factors. */
limit = (UV) (sqrt(n)+0.1);
if (limit > maxtrial)
limit = maxtrial;
- /* wheel 30 */
- while (f <= limit) {
- if ( (n%f) == 0 ) {
- UV newlimit;
- do {
- factors[nfactors++] = f;
- n /= f;
- } while ( (n%f) == 0 );
- newlimit = (UV) (sqrt(n)+0.1);
- if (newlimit < limit) limit = newlimit;
+ /* Use the table of small primes to quickly do trial division. */
+ {
+ UV sp = 5;
+ f = primes_small[sp];
+ while (f <= limit && f <= 2003) {
+ if ( (n%f) == 0 ) {
+ do {
+ factors[nfactors++] = f;
+ n /= f;
+ } while ( (n%f) == 0 );
+ newlimit = (UV) (sqrt(n)+0.1);
+ if (newlimit < limit) limit = newlimit;
+ }
+ f = primes_small[++sp];
+ }
+ }
+
+ /* Trial division using a mod-30 wheel for larger values */
+ if (f <= limit) {
+ UV m = f % 30;
+ while (f <= limit) {
+ if ( (n%f) == 0 ) {
+ do {
+ factors[nfactors++] = f;
+ n /= f;
+ } while ( (n%f) == 0 );
+ newlimit = (UV) (sqrt(n)+0.1);
+ if (newlimit < limit) limit = newlimit;
+ }
+ f += wheeladvance30[m];
+ m = nextwheel30[m];
}
- f += wheeladvance30[m];
- m = nextwheel30[m];
}
+ /* All done! */
if (n != 1)
factors[nfactors++] = n;
return nfactors;
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index 3d111e2..82ca25c 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -51,10 +51,11 @@ sub _import_nobigint {
undef *is_prob_prime; *is_prob_prime = \&_XS_is_prob_prime;
undef *next_prime; *next_prime = \&_XS_next_prime;
undef *prev_prime; *prev_prime = \&_XS_prev_prime;
- #undef *prime_count; *prime_count = \&_XS_prime_count;
+ #undef *prime_count; *prime_count = \&_XS_prime_count;
undef *nth_prime; *nth_prime = \&_XS_nth_prime;
undef *is_strong_pseudoprime; *is_strong_pseudoprime = \&_XS_miller_rabin;
undef *miller_rabin; *miller_rabin = \&_XS_miller_rabin;
+ undef *mertens; *mertens = \&_XS_mertens;
}
BEGIN {
@@ -1062,10 +1063,6 @@ sub all_factors {
# A008683 Moebius function mu(n)
# A030059, A013929, A030229, A002321, A005117, A013929 all relate.
-
-# One can argue for the Omega function (A001221), Euler Phi (A000010), and
-# Merten's functions also.
-
sub moebius {
my($n, $nend) = @_;
_validate_positive_integer($n, 1);
@@ -1074,10 +1071,7 @@ sub moebius {
if (defined $nend) {
_validate_positive_integer($nend);
return () if $nend < $n;
- if ($nend <= $_XS_MAXVAL) {
- my $mu = _XS_moebius_range($n, $nend);
- return @$mu;
- }
+ return @{ _XS_moebius_range($n, $nend) } if $nend <= $_XS_MAXVAL;
my @mu = map { 1 } 0 .. $nend;
foreach my $j (2 .. $nend) {
next unless $mu[$j] == 1;
@@ -1106,15 +1100,16 @@ sub moebius {
return (((scalar @factors) % 2) == 0) ? 1 : -1;
}
+# A002321 Mertens' function. mertens(n) = sum(moebius(1,n))
sub mertens {
my($n) = @_;
_validate_positive_integer($n);
- return (0,1)[$n] if $n <= 1;
return _XS_mertens($n) if $n <= $_XS_MAXVAL;
# This is the most basic Deléglise and Rivat algorithm. u = n^1/2
# and no segmenting is done. Their algorithm uses u = n^1/3, breaks
# the summation into two parts, and calculates those in segments. Their
# computation time growth is half of this code.
+ return $n if $n <= 1;
my $u = int(sqrt($n));
my @mu = (0, moebius(1, $u)); # Hold values of mu for 0-u
my $musum = 0;
@@ -1137,25 +1132,20 @@ sub mertens {
}
-# Euler Phi, aka Euler Totient. A000010
-
+# A000010 Euler Phi, aka Euler Totient
sub euler_phi {
my($n, $nend) = @_;
# SAGE defines this to be 0 for all n <= 0. Others choose differently.
# I am following SAGE's decision for n <= 0.
return 0 if defined $n && $n < 0;
_validate_positive_integer($n);
- undef $nend if defined $nend && $nend == $n;
# Totient over a range. Could be improved, as this can use huge memory.
- if (defined $nend) {
+ if (defined $nend && $nend != $n) {
_validate_positive_integer($nend);
return () if $nend < $n;
# Use XS code if at all possible. Better memory use.
- if ($nend <= $_XS_MAXVAL) {
- my $totients = _XS_totient_range($n, $nend);
- return @$totients;
- }
+ return @{ _XS_totient_range($n, $nend) } if $nend <= $_XS_MAXVAL;
my @totients = map { $_ } 0 .. $nend;
foreach my $i (2 .. $nend) {
next unless $totients[$i] == $i;
@@ -1169,38 +1159,40 @@ sub euler_phi {
}
return (0,1)[$n] if $n <= 1;
- my %factor_mult;
- my @factors = grep { !$factor_mult{$_}++ }
- ($n <= $_XS_MAXVAL) ? _XS_factor($n) : factor($n);
- # Direct from Euler's product formula. Note division will be exact.
- #my $totient = $n;
- #foreach my $factor (@factors) {
- # $totient = int($totient/$factor) * ($factor-1);
- #}
+ if ($n <= $_XS_MAXVAL) {
+ my $last_f = 0;
+ my $totient = 1;
+ foreach my $f ( _XS_factor($n) ) {
+ if ($f == $last_f) { $totient *= $f; }
+ else { $totient *= $f-1; $last_f = $f; }
+ }
+ return $totient;
+ }
+
+ my %factor_mult;
+ my @factors = grep { !$factor_mult{$_}++ } factor($n);
+ my $totient = 1;
- # Alternate way doing multiplications only.
if (ref($n) ne 'Math::BigInt') {
- my $totient = 1; # $n - $n + 1 will make this a bigint if needed
foreach my $factor (@factors) {
$totient *= ($factor - 1);
$totient *= $factor for (2 .. $factor_mult{$factor});
}
- return $totient;
- }
-
- # Some real wackiness to solve issues with Math::BigInt::GMP (not seen with
- # Pari or Calc). Results of the multiply will go negative if we don't do
- # this. Standalone bug:
- # perl -E 'my $a = 2931542417; use bigint lib=>'GMP'; my $n = 49754396241690624; my $x = $n*$a; say $x;'
- # This may be related to RT 71548 of Math::BigInt::GMP.
- my $totient = $n->copy->bone;
- foreach my $factor (@factors) {
- my $f = $n->copy->bzero->badd("$factor");
- $totient->bmul($f->copy->bsub(1));
- $totient->bmul($f) for (2 .. $factor_mult{$factor});
+ } else {
+ # Some real wackiness to solve issues with Math::BigInt::GMP (not seen with
+ # Pari or Calc). Results of the multiply will go negative if we don't do
+ # this. To see if you hit the standalone bug:
+ # perl -E 'my $a = 2931542417; use bigint lib=>'GMP'; my $n = 49754396241690624; my $x = $n*$a; say $x;'
+ # This may be related to RT 71548 of Math::BigInt::GMP.
+ $totient = $n->copy->bone;
+ foreach my $factor (@factors) {
+ my $f = $n->copy->bzero->badd("$factor");
+ $totient->bmul($f->copy->bsub(1));
+ $totient->bmul($f) for (2 .. $factor_mult{$factor});
+ }
}
- $totient;
+ return $totient;
}
# Jordan's totient -- a generalization of Euler's totient.
@@ -2440,9 +2432,10 @@ is much faster, though a segmented sieve must be used for large C<n> to
control the memory taken. Benito and Varona (2008) show a simple C<n/3>
summation which is much faster and uses less memory. Better yet is a
simple C<n^1/2> version of Deléglise and Rivat (1996), which is what the
-current implementation uses. The best known implementation is Deléglise and
-Rivat's segmented C<n^1/3> algorithm. In theory using one of the advanced
-prime count algorithms can lead to a faster solution.
+current implementation uses. Deléglise and Rivat's full segmented C<n^1/3>
+algorithm is faster. Kuznetsov (2011) gives an alternate method that he
+indicates is even faster. In theory using one of the advanced prime count
+algorithms can lead to a faster solution.
=head2 euler_phi
diff --git a/t/19-moebius.t b/t/19-moebius.t
index 3da2504..1542bc3 100644
--- a/t/19-moebius.t
+++ b/t/19-moebius.t
@@ -5,6 +5,7 @@ use warnings;
use Test::More;
use Math::Prime::Util qw/moebius mertens euler_phi jordan_totient divisor_sum/;
+my $extra = defined $ENV{RELEASE_TESTING} && $ENV{RELEASE_TESTING};
my $use64 = Math::Prime::Util::prime_get_config->{'maxbits'} > 32;
my $broken64 = (18446744073709550592 == ~0);
$use64 = 0 if $broken64;
@@ -20,10 +21,55 @@ my %mertens = (
100 => 1,
1000 => 2,
10000 => -23,
-# 100000 => -48,
-# 1000000 => 212,
-# 10000000 => 1037,
+ 8 => -2,
+ 16 => -1,
+ 32 => -4,
+ 64 => -1,
+ 128 => -2,
+ 256 => -1,
+ 512 => -4,
+ 1024 => -4,
+ 2048 => 7,
+ 4096 => -19,
+ 8192 => 22,
);
+my %big_mertens = (
+ 100000 => -48,
+ 1000000 => 212,
+ 10000000 => 1037,
+);
+if ($extra && $use64) {
+ %big_mertens = ( %big_mertens,
+ 2 => 0, # A087987, mertens at primorials
+ 6 => -1,
+ 30 => -3,
+ 210 => -1,
+ 2310 => -1,
+ 30030 => 16,
+ 510510 => -25,
+ 9699690 => 278,
+ 223092870 => 3516,
+
+ 6433477 => 900, # 30^2
+ 109851909 => -4096, # A084235, 2^12
+
+ 2**14 => -32, # A084236
+ 2**15 => 26,
+ 2**16 => 14,
+ 2**17 => -20,
+ 2**18 => 24,
+ 2**19 => -125,
+ 2**20 => 257,
+ 2**21 => -362,
+ 2**22 => 228,
+ 2**23 => -10,
+
+ 10**8 => 1928,
+ 10**9 => -222,
+# 1*10**10 => -33722, # From Deleglise and Rivat
+# 2*10**10 => -48723, # Too slow with current method
+ );
+}
my %totients = (
123456 => 41088,
@@ -66,7 +112,8 @@ my %sigmak = (
plan tests => 0 + 1
+ 1 # Small Moebius
- + 3*scalar(keys %mertens) + 3
+ + 3*scalar(keys %mertens)
+ + 1*scalar(keys %big_mertens)
+ 2 # Small Phi
+ scalar(keys %totients)
+ scalar(keys %jordan_totients)
@@ -93,9 +140,9 @@ while (my($n, $mertens) = each (%mertens)) {
# Now with mertens function
is( mertens($n), $mertens, "mertens($n) == $mertens" );
}
-is( mertens( 100000), -48, "mertens(100000)" );
-is( mertens( 1000000), 212, "mertens(1000000)" );
-is( mertens(10000000), 1037, "mertens(10000000)" );
+while (my($n, $mertens) = each (%big_mertens)) {
+ is( mertens($n), $mertens, "mertens($n)" );
+}
{
my @phi = map { euler_phi($_) } (0 .. $#A000010);
diff --git a/util.c b/util.c
index 402106b..4bf6e4a 100644
--- a/util.c
+++ b/util.c
@@ -642,7 +642,6 @@ IV* _moebius_range(UV lo, UV hi)
*/
range = hi-lo+1;
sqrtn = (UV) (sqrt(hi) + 0.5);
- /* sqrtn = (UV) sqrt(hi); if (sqrtn*sqrtn < hi) sqrtn++; */ /* TODO */
New(0, mu, range, IV);
if (mu == 0)
@@ -705,6 +704,7 @@ IV _XS_mertens(UV n) {
IV* M;
IV sum;
+ if (n <= 1) return n;
u = (UV) sqrt(n);
mu = _moebius_range(0, u);
New(0, M, u+1, IV);
diff --git a/xt/moebius-mertens.pl b/xt/moebius-mertens.pl
new file mode 100755
index 0000000..b25226f
--- /dev/null
+++ b/xt/moebius-mertens.pl
@@ -0,0 +1,35 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+$| = 1; # fast pipes
+
+use Math::Prime::Util qw/moebius mertens/;
+use List::Util qw/sum/;
+use Algorithm::Diff qw/diff/;
+
+my $limit = shift || 1_000_000;
+
+print "Calculating moebius from 1 to $limit...";
+my @mu = map { moebius($_) } 1 .. $limit;
+print "...";
+unshift @mu, 0;
+print "...done\n";
+
+while (1) {
+ my $beg = 1 + int(rand($limit));
+ my $end = 1 + int(rand($limit));
+ ($beg,$end) = ($end,$beg) if $beg > $end;
+
+ # Does moebius range return the same values?
+ my @mu_range = @mu[ $beg .. $end ];
+ my @mobius = moebius($beg,$end);
+
+ my $mu_sum = sum(@mu_range);
+ my $mo_sum = sum(@mobius);
+ my $mert_sum = mertens($end) - mertens($beg-1);
+ warn "\nbeg $beg end $end sum $mu_sum range sum $mo_sum\n"
+ unless $mu_sum == $mo_sum;
+ warn "\nbeg $beg end $end sum $mu_sum mertsum $mert_sum\n"
+ unless $mu_sum == $mert_sum;
+ print ".";
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list