[libmath-prime-util-perl] 10/16: Updates from testing, make nth_prime_approx more accurate for large values

Thu May 21 18:44:25 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.06
in repository libmath-prime-util-perl.

commit 4afae2bbf4c3069fdbe7fc3dee3c39441dd4cef4
Author: Dana Jacobsen <dana at acm.org>
Date:   Mon Jun 11 15:12:39 2012 -0600

    Updates from testing, make nth_prime_approx more accurate for large values
---
 Changes                    |  7 +++++--
 MANIFEST                   |  7 ++++++-
 examples/test-nthapprox.pl | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/Math/Prime/Util.pm     | 46 ++++++++++++++++++++++++++--------------
 t/13-primecount.t          |  8 +++++--
 util.c                     | 10 +++++++--
 6 files changed, 107 insertions(+), 23 deletions(-)

diff --git a/Changes b/Changes
index 1990528..8e6a7e6 100644
--- a/Changes
+++ b/Changes
@@ -1,7 +1,10 @@
 Revision history for Perl extension Math::Prime::Util.
 
-0.05  8 June 2012
-    - Use assembler for mulmod if 64-bit and gcc and x86
+0.05  11 June 2012
+    - Speed up mulmod: asm for GCC + x86_64, native 64-bit for 32-bit Perl
+      is uint64_t is available, and range tests for others.  This speeds up
+      some of the factoring as well as Miller-Rabin, which in turn speeds up
+      is_prime.  is_prime is used quite commonly, so this is good.
     - nth_prime routines should now all croak on overflow in the same way.
     - Segmented prime_count, things like this are reasonably efficient:
             say prime_count( 10**16,  10**16 + 2**20 )
diff --git a/MANIFEST b/MANIFEST
index 40dfd94..0fca5aa 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -14,14 +14,18 @@ sieve.h
 sieve.c
 util.h
 util.c
-examples/bench-nthprime.pl
 examples/bench-factor.pl
 examples/bench-factor-extra.pl
 examples/bench-factor-semiprime.pl
 examples/bench-is-prime.pl
 examples/bench-miller-rabin.pl
+examples/bench-nthprime.pl
+examples/bench-pcapprox.pl
 examples/bench-random-prime.pl
 examples/test-factoring.pl
+examples/test-holf.pl
+examples/test-nthapprox.pl
+examples/test-pcapprox.pl
 t/01-load.t
 t/02-can.t
 t/03-init.t
@@ -33,6 +37,7 @@ t/14-nthprime.t
 t/15-probprime.t
 t/16-randomprime.t
 t/17-pseudoprime.t
+t/18-functions.t
 t/30-relations.t
 t/50-factoring.t
 t/90-release-perlcritic.t
diff --git a/examples/test-nthapprox.pl b/examples/test-nthapprox.pl
new file mode 100755
index 0000000..523a7b4
--- /dev/null
+++ b/examples/test-nthapprox.pl
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Math::Prime::Util ":all";
+$| = 1;  # fast pipes
+my $use64 = Math::Prime::Util::_maxbits > 32;
+
+
+my %nthprimes = (
+                  1 => 2,
+                 10 => 29,
+                100 => 541,
+               1000 => 7919,
+              10000 => 104729,
+             100000 => 1299709,
+            1000000 => 15485863,
+           10000000 => 179424673,
+          100000000 => 2038074743,
+         1000000000 => 22801763489,
+        10000000000 => 252097800623,
+       100000000000 => 2760727302517,
+      1000000000000 => 29996224275833,
+     10000000000000 => 323780508946331,
+    100000000000000 => 3475385758524527,
+   1000000000000000 => 37124508045065437,
+  10000000000000000 => 394906913903735329,
+ 100000000000000000 => 4185296581467695669,
+);
+
+printf("  N    %12s  %12s\n", "nth_approx", "percent");
+printf("-----  %12s  %12s\n", '-'x12, '-'x12);
+foreach my $n (sort {$a<=>$b} keys %nthprimes) {
+  my $nth  = $nthprimes{$n};
+  my $ntha = nth_prime_approx($n);
+
+  printf "10^%2d %13llu  %12.7f\n", length($n)-1, abs($nth-$ntha), 100*($ntha-$nth)/$nth;
+}
+
+print "\n";
+print "Lower / Upper bounds.  Percentages.\n";
+print "\n";
+
+printf("  N    %12s  %12s\n", "lower", "upper");
+printf("-----  %12s  %12s\n", '-'x12,'-'x12);
+foreach my $n (sort {$a<=>$b} keys %nthprimes) {
+  my $nth  = $nthprimes{$n};
+  my $nthl  = nth_prime_lower($n);
+  my $nthu  = nth_prime_upper($n);
+
+  printf "10^%2d  %12.7f  %12.7f\n",
+         length($n)-1, 100*($nth-$nthl)/$nth, 100*($nthu-$nth)/$nth;
+}
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index 0ca736a..1801149 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -232,7 +232,8 @@ Version 0.04
 
 
   # Return Pi(n) -- the number of primes E<lt>= n.
-  my $number_of_primes = prime_count( 1_000_000 );
+  $primepi = prime_count( 1_000_000 );
+  $primepi = prime_count( 10**14, 10**14+1000 );  # also does ranges
 
   # Quickly return an approximation to Pi(n)
   my $approx_number_of_primes = prime_count_approx( 10**17 );
@@ -327,8 +328,9 @@ using wheel factorization, or a segmented sieve.
   $n = next_prime($n);
 
 Returns the next prime greater than the input number.  0 is returned if the
-next prime is larger than a native integer type (the last primes being
-C<4,294,967,291> in 32-bit Perl and C<18,446,744,073,709,551,557> in 64-bit).
+next prime is larger than a native integer type (the last representable
+primes being C<4,294,967,291> in 32-bit Perl and
+C<18,446,744,073,709,551,557> in 64-bit).
 
 
 =head2 prev_prime
@@ -341,16 +343,27 @@ input is C<2> or lower.
 
 =head2 prime_count
 
-  my $number_of_primes = prime_count( 1_000 );
+  my $primepi = prime_count( 1_000 );
+  my $pirange = prime_count( 1_000, 10_000 );
 
-Returns the Prime Count function C<Pi(n)>.  The current implementation relies
-on sieving to find the primes within the interval, so will take some time and
-memory.  It uses a segmented sieve if the primary sieve is not large enough,
-so is very memory efficient.  However, time growth is slightly more than
-linear with C<n>, so it can take a very long time.  A hybrid table approach
-is the usual means taken to speed this up, which a later version may do.  For
-very large inputs the methods of Meissel, Lehmer, or
-Lagarias-Miller-Odlyzko-Deleglise-Rivat may be appropriate.
+Returns the Prime Count function C<Pi(n)>, also called C<primepi> in some
+math packages.  When given two arguments, it returns the inclusive
+count of primes between the ranges (e.g. C<(13,17)> returns 2, C<14,17>
+and C<13,16> return 1, and C<14,16> returns 0).
+
+The current implementation relies on sieving to find the primes within the
+interval, so will take some time and memory.  It uses a segmented sieve so
+is very memory efficient, and also allows fast results even with large
+base values.  The complexity for C<prime_count(a, b)> is approximately
+C<O(sqrt(a) + (b-a))>, where the first term is typically negligible below
+C<~ 10^11>.  Memory use is proportional only to C<sqrt(a)>, with total
+memory use under 1MB for any base under C<10^14>.
+
+A later implementation may work on improving performance for values, both
+in reducing memory use (the current maximum is 140MB at 2^64) and improving
+speed.  Possibilities include a hybrid table approach, using an explicit
+formula with C<li(x) or C<R(x)>, or one of the Meissel, Lehmer,
+or Lagarias-Miller-Odlyzko-Deleglise-Rivat methods.
 
 
 =head2 prime_count_upper
@@ -371,8 +384,8 @@ A common place these would be used is sizing an array to hold the first C<$n>
 primes.  It may be desirable to use a bit more memory than is necessary, to
 avoid calling C<prime_count>.
 
-These routines use hand-verified tight limits below a range at least C<2^32>,
-and fall back to the proven Dusart bounds of
+These routines use hand-verified tight limits below a range at least C<2^33>,
+and fall back to the Dusart bounds of
 
     x/logx * (1 + 1/logx + 1.80/log^2x) <= Pi(x)
 
@@ -389,8 +402,9 @@ above that range.
 
 Returns an approximation to the C<prime_count> function, without having to
 generate any primes.  The current implementation uses the Riemann R function
-which is quite accurate.  A slightly faster answer, but less accurate,
-can be obtained by averaging the upper and lower bounds.
+which is quite accurate: an error of less than C<0.0005%> is typical for
+input values over C<2^32>.  A slightly faster (0.1ms vs. 1ms), but much less
+accurate, answer can be obtained by averaging the upper and lower bounds.
 
 
 =head2 nth_prime
diff --git a/t/13-primecount.t b/t/13-primecount.t
index 067e786..c23cb0e 100644
--- a/t/13-primecount.t
+++ b/t/13-primecount.t
@@ -8,7 +8,7 @@ use Math::Prime::Util qw/prime_count prime_count_lower prime_count_upper prime_c
 my $use64 = Math::Prime::Util::_maxbits > 32;
 my $extra = defined $ENV{RELEASE_TESTING} && $ENV{RELEASE_TESTING};
 
-plan tests => 14*3 + 8 + 6*$extra + 2*18*$use64 + 12 + 5*$use64;
+plan tests => 14*3 + 8 + 6*$extra + 3*18*$use64 + 11 + 6*$use64;
 
 #  Powers of 2:  http://oeis.org/A007053/b007053.txt
 #  Powers of 10: http://oeis.org/A006880/b006880.txt
@@ -55,13 +55,16 @@ while (my($n, $pin) = each (%pivals32)) {
     is( prime_count($n), $pin, "Pi($n) = $pin" );
   }
   my $approx_range = abs($pin - prime_count_approx($n));
-  my $range_limit = ($n <= 1000000000) ? 1100 : 70000;
+  my $range_limit = ($n <= 100000000) ? 100 : 500;
   cmp_ok( $approx_range, '<=', $range_limit, "prime_count_approx($n) within $range_limit");
 }
 if ($use64) {
   while (my($n, $pin) = each (%pivals64)) {
     cmp_ok( prime_count_upper($n), '>=', $pin, "Pi($n) <= upper estimate" );
     cmp_ok( prime_count_lower($n), '<=', $pin, "Pi($n) >= lower estimate" );
+    my $approx = prime_count_approx($n);
+    my $percent_limit = 0.0005;
+    cmp_ok( abs($pin - $approx) / $pin, '<=', $percent_limit/100.0, "prime_count approx($n) within $percent_limit\% of Pi($n)");
   }
 }
 
@@ -102,6 +105,7 @@ while (my($range, $expect) = each (%intervals)) {
   next if $high > ~0;
   is( prime_count($low,$high), $expect, "prime_count($range) = $expect");
 }
+exit(0);
 
 sub fixnum {
   my $nstr = shift;
diff --git a/util.c b/util.c
index ab65a0c..39f5413 100644
--- a/util.c
+++ b/util.c
@@ -690,14 +690,18 @@ UV nth_prime_approx(UV n)
    *    m=2   - (((flog2n*flog2n) - 6*flog2n + 11) / (2*flogn*flogn))
    *    + O((flog2n/flogn)^3)
    *
-   * Shown in Dusart 1999 page 12, as well as other sources
+   * Shown in Dusart 1999 page 12, as well as other sources such as:
+   *   http://www.emis.de/journals/JIPAM/images/153_02_JIPAM/153_02.pdf
+   * where the main issue you run into is that you're doing polynomial
+   * interpolation, so it oscillates like crazy with many high-order terms.
+   * Hence I'm leaving it at m=2.
    */
   approx = fn * ( flogn + flog2n - 1
                   + ((flog2n - 2)/flogn)
                   - (((flog2n*flog2n) - 6*flog2n + 11) / (2*flogn*flogn))
                 );
 
-  /* Apply a correction to help keep small inputs close. */
+  /* Apply a correction to help keep values close */
   order = flog2n/flogn;
   order = order*order*order * fn;
 
@@ -708,6 +712,8 @@ UV nth_prime_approx(UV n)
   else if (n <  4000) approx += 4.3 * order;
   else if (n < 12000) approx += 3.0 * order;
   else if (n <150000) approx += 2.1 * order;
+  else if (n <200000000) approx += 0.0 * order;
+  else                approx += -0.023 * order;
 
   /* For all three analytical functions, it is possible that for a given valid
    * input, we will not be able to return an output that fits in the UV type.

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git