[libmath-prime-util-perl] 10/16: Updates from testing, make nth_prime_approx more accurate for large values
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:44:25 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.06
in repository libmath-prime-util-perl.
commit 4afae2bbf4c3069fdbe7fc3dee3c39441dd4cef4
Author: Dana Jacobsen <dana at acm.org>
Date: Mon Jun 11 15:12:39 2012 -0600
Updates from testing, make nth_prime_approx more accurate for large values
---
Changes | 7 +++++--
MANIFEST | 7 ++++++-
examples/test-nthapprox.pl | 52 ++++++++++++++++++++++++++++++++++++++++++++++
lib/Math/Prime/Util.pm | 46 ++++++++++++++++++++++++++--------------
t/13-primecount.t | 8 +++++--
util.c | 10 +++++++--
6 files changed, 107 insertions(+), 23 deletions(-)
diff --git a/Changes b/Changes
index 1990528..8e6a7e6 100644
--- a/Changes
+++ b/Changes
@@ -1,7 +1,10 @@
Revision history for Perl extension Math::Prime::Util.
-0.05 8 June 2012
- - Use assembler for mulmod if 64-bit and gcc and x86
+0.05 11 June 2012
+ - Speed up mulmod: asm for GCC + x86_64, native 64-bit for 32-bit Perl
+ is uint64_t is available, and range tests for others. This speeds up
+ some of the factoring as well as Miller-Rabin, which in turn speeds up
+ is_prime. is_prime is used quite commonly, so this is good.
- nth_prime routines should now all croak on overflow in the same way.
- Segmented prime_count, things like this are reasonably efficient:
say prime_count( 10**16, 10**16 + 2**20 )
diff --git a/MANIFEST b/MANIFEST
index 40dfd94..0fca5aa 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -14,14 +14,18 @@ sieve.h
sieve.c
util.h
util.c
-examples/bench-nthprime.pl
examples/bench-factor.pl
examples/bench-factor-extra.pl
examples/bench-factor-semiprime.pl
examples/bench-is-prime.pl
examples/bench-miller-rabin.pl
+examples/bench-nthprime.pl
+examples/bench-pcapprox.pl
examples/bench-random-prime.pl
examples/test-factoring.pl
+examples/test-holf.pl
+examples/test-nthapprox.pl
+examples/test-pcapprox.pl
t/01-load.t
t/02-can.t
t/03-init.t
@@ -33,6 +37,7 @@ t/14-nthprime.t
t/15-probprime.t
t/16-randomprime.t
t/17-pseudoprime.t
+t/18-functions.t
t/30-relations.t
t/50-factoring.t
t/90-release-perlcritic.t
diff --git a/examples/test-nthapprox.pl b/examples/test-nthapprox.pl
new file mode 100755
index 0000000..523a7b4
--- /dev/null
+++ b/examples/test-nthapprox.pl
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Math::Prime::Util ":all";
+$| = 1; # fast pipes
+my $use64 = Math::Prime::Util::_maxbits > 32;
+
+
+my %nthprimes = (
+ 1 => 2,
+ 10 => 29,
+ 100 => 541,
+ 1000 => 7919,
+ 10000 => 104729,
+ 100000 => 1299709,
+ 1000000 => 15485863,
+ 10000000 => 179424673,
+ 100000000 => 2038074743,
+ 1000000000 => 22801763489,
+ 10000000000 => 252097800623,
+ 100000000000 => 2760727302517,
+ 1000000000000 => 29996224275833,
+ 10000000000000 => 323780508946331,
+ 100000000000000 => 3475385758524527,
+ 1000000000000000 => 37124508045065437,
+ 10000000000000000 => 394906913903735329,
+ 100000000000000000 => 4185296581467695669,
+);
+
+printf(" N %12s %12s\n", "nth_approx", "percent");
+printf("----- %12s %12s\n", '-'x12, '-'x12);
+foreach my $n (sort {$a<=>$b} keys %nthprimes) {
+ my $nth = $nthprimes{$n};
+ my $ntha = nth_prime_approx($n);
+
+ printf "10^%2d %13llu %12.7f\n", length($n)-1, abs($nth-$ntha), 100*($ntha-$nth)/$nth;
+}
+
+print "\n";
+print "Lower / Upper bounds. Percentages.\n";
+print "\n";
+
+printf(" N %12s %12s\n", "lower", "upper");
+printf("----- %12s %12s\n", '-'x12,'-'x12);
+foreach my $n (sort {$a<=>$b} keys %nthprimes) {
+ my $nth = $nthprimes{$n};
+ my $nthl = nth_prime_lower($n);
+ my $nthu = nth_prime_upper($n);
+
+ printf "10^%2d %12.7f %12.7f\n",
+ length($n)-1, 100*($nth-$nthl)/$nth, 100*($nthu-$nth)/$nth;
+}
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index 0ca736a..1801149 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -232,7 +232,8 @@ Version 0.04
# Return Pi(n) -- the number of primes E<lt>= n.
- my $number_of_primes = prime_count( 1_000_000 );
+ $primepi = prime_count( 1_000_000 );
+ $primepi = prime_count( 10**14, 10**14+1000 ); # also does ranges
# Quickly return an approximation to Pi(n)
my $approx_number_of_primes = prime_count_approx( 10**17 );
@@ -327,8 +328,9 @@ using wheel factorization, or a segmented sieve.
$n = next_prime($n);
Returns the next prime greater than the input number. 0 is returned if the
-next prime is larger than a native integer type (the last primes being
-C<4,294,967,291> in 32-bit Perl and C<18,446,744,073,709,551,557> in 64-bit).
+next prime is larger than a native integer type (the last representable
+primes being C<4,294,967,291> in 32-bit Perl and
+C<18,446,744,073,709,551,557> in 64-bit).
=head2 prev_prime
@@ -341,16 +343,27 @@ input is C<2> or lower.
=head2 prime_count
- my $number_of_primes = prime_count( 1_000 );
+ my $primepi = prime_count( 1_000 );
+ my $pirange = prime_count( 1_000, 10_000 );
-Returns the Prime Count function C<Pi(n)>. The current implementation relies
-on sieving to find the primes within the interval, so will take some time and
-memory. It uses a segmented sieve if the primary sieve is not large enough,
-so is very memory efficient. However, time growth is slightly more than
-linear with C<n>, so it can take a very long time. A hybrid table approach
-is the usual means taken to speed this up, which a later version may do. For
-very large inputs the methods of Meissel, Lehmer, or
-Lagarias-Miller-Odlyzko-Deleglise-Rivat may be appropriate.
+Returns the Prime Count function C<Pi(n)>, also called C<primepi> in some
+math packages. When given two arguments, it returns the inclusive
+count of primes between the ranges (e.g. C<(13,17)> returns 2, C<14,17>
+and C<13,16> return 1, and C<14,16> returns 0).
+
+The current implementation relies on sieving to find the primes within the
+interval, so will take some time and memory. It uses a segmented sieve so
+is very memory efficient, and also allows fast results even with large
+base values. The complexity for C<prime_count(a, b)> is approximately
+C<O(sqrt(a) + (b-a))>, where the first term is typically negligible below
+C<~ 10^11>. Memory use is proportional only to C<sqrt(a)>, with total
+memory use under 1MB for any base under C<10^14>.
+
+A later implementation may work on improving performance for values, both
+in reducing memory use (the current maximum is 140MB at 2^64) and improving
+speed. Possibilities include a hybrid table approach, using an explicit
+formula with C<li(x) or C<R(x)>, or one of the Meissel, Lehmer,
+or Lagarias-Miller-Odlyzko-Deleglise-Rivat methods.
=head2 prime_count_upper
@@ -371,8 +384,8 @@ A common place these would be used is sizing an array to hold the first C<$n>
primes. It may be desirable to use a bit more memory than is necessary, to
avoid calling C<prime_count>.
-These routines use hand-verified tight limits below a range at least C<2^32>,
-and fall back to the proven Dusart bounds of
+These routines use hand-verified tight limits below a range at least C<2^33>,
+and fall back to the Dusart bounds of
x/logx * (1 + 1/logx + 1.80/log^2x) <= Pi(x)
@@ -389,8 +402,9 @@ above that range.
Returns an approximation to the C<prime_count> function, without having to
generate any primes. The current implementation uses the Riemann R function
-which is quite accurate. A slightly faster answer, but less accurate,
-can be obtained by averaging the upper and lower bounds.
+which is quite accurate: an error of less than C<0.0005%> is typical for
+input values over C<2^32>. A slightly faster (0.1ms vs. 1ms), but much less
+accurate, answer can be obtained by averaging the upper and lower bounds.
=head2 nth_prime
diff --git a/t/13-primecount.t b/t/13-primecount.t
index 067e786..c23cb0e 100644
--- a/t/13-primecount.t
+++ b/t/13-primecount.t
@@ -8,7 +8,7 @@ use Math::Prime::Util qw/prime_count prime_count_lower prime_count_upper prime_c
my $use64 = Math::Prime::Util::_maxbits > 32;
my $extra = defined $ENV{RELEASE_TESTING} && $ENV{RELEASE_TESTING};
-plan tests => 14*3 + 8 + 6*$extra + 2*18*$use64 + 12 + 5*$use64;
+plan tests => 14*3 + 8 + 6*$extra + 3*18*$use64 + 11 + 6*$use64;
# Powers of 2: http://oeis.org/A007053/b007053.txt
# Powers of 10: http://oeis.org/A006880/b006880.txt
@@ -55,13 +55,16 @@ while (my($n, $pin) = each (%pivals32)) {
is( prime_count($n), $pin, "Pi($n) = $pin" );
}
my $approx_range = abs($pin - prime_count_approx($n));
- my $range_limit = ($n <= 1000000000) ? 1100 : 70000;
+ my $range_limit = ($n <= 100000000) ? 100 : 500;
cmp_ok( $approx_range, '<=', $range_limit, "prime_count_approx($n) within $range_limit");
}
if ($use64) {
while (my($n, $pin) = each (%pivals64)) {
cmp_ok( prime_count_upper($n), '>=', $pin, "Pi($n) <= upper estimate" );
cmp_ok( prime_count_lower($n), '<=', $pin, "Pi($n) >= lower estimate" );
+ my $approx = prime_count_approx($n);
+ my $percent_limit = 0.0005;
+ cmp_ok( abs($pin - $approx) / $pin, '<=', $percent_limit/100.0, "prime_count approx($n) within $percent_limit\% of Pi($n)");
}
}
@@ -102,6 +105,7 @@ while (my($range, $expect) = each (%intervals)) {
next if $high > ~0;
is( prime_count($low,$high), $expect, "prime_count($range) = $expect");
}
+exit(0);
sub fixnum {
my $nstr = shift;
diff --git a/util.c b/util.c
index ab65a0c..39f5413 100644
--- a/util.c
+++ b/util.c
@@ -690,14 +690,18 @@ UV nth_prime_approx(UV n)
* m=2 - (((flog2n*flog2n) - 6*flog2n + 11) / (2*flogn*flogn))
* + O((flog2n/flogn)^3)
*
- * Shown in Dusart 1999 page 12, as well as other sources
+ * Shown in Dusart 1999 page 12, as well as other sources such as:
+ * http://www.emis.de/journals/JIPAM/images/153_02_JIPAM/153_02.pdf
+ * where the main issue you run into is that you're doing polynomial
+ * interpolation, so it oscillates like crazy with many high-order terms.
+ * Hence I'm leaving it at m=2.
*/
approx = fn * ( flogn + flog2n - 1
+ ((flog2n - 2)/flogn)
- (((flog2n*flog2n) - 6*flog2n + 11) / (2*flogn*flogn))
);
- /* Apply a correction to help keep small inputs close. */
+ /* Apply a correction to help keep values close */
order = flog2n/flogn;
order = order*order*order * fn;
@@ -708,6 +712,8 @@ UV nth_prime_approx(UV n)
else if (n < 4000) approx += 4.3 * order;
else if (n < 12000) approx += 3.0 * order;
else if (n <150000) approx += 2.1 * order;
+ else if (n <200000000) approx += 0.0 * order;
+ else approx += -0.023 * order;
/* For all three analytical functions, it is possible that for a given valid
* input, we will not be able to return an output that fits in the UV type.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list