[libmath-prime-util-perl] 32/43: Finish nth_twin_prime_approx

Thu May 21 18:53:08 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.40
in repository libmath-prime-util-perl.

commit d3078911763322b75bbf7d254367b8c678f905b6
Author: Dana Jacobsen <dana at acm.org>
Date:   Fri Apr 11 15:43:21 2014 -0700

    Finish nth_twin_prime_approx
---
 TODO                        |  2 --
 lib/Math/Prime/Util.pm      | 10 +++++++++-
 lib/Math/Prime/Util/PP.pm   | 45 +++++++++++++++++++++++++++++++++++++++------
 lib/Math/Prime/Util/PPFE.pm |  5 +++++
 t/13-primecount.t           | 21 +++++++++++++++++++--
 t/14-nthprime.t             | 23 ++++++++++++++++++++++-
 t/92-release-pod-coverage.t |  3 ++-
 util.c                      | 15 ++++++++++-----
 8 files changed, 106 insertions(+), 18 deletions(-)

diff --git a/TODO b/TODO
index 15e4cde..021d8b0 100644
--- a/TODO
+++ b/TODO
@@ -68,5 +68,3 @@
 - Ensure a fast path for Math::GMP from MPU -> MPU:GMP -> GMP, and back.
 
 - znlog better implementation
-
-- Documentation, PP implementation, tests for nth_twin_prime_approx
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index e6026e9..480c333 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -1314,7 +1314,9 @@ for the answer, using some small table acceleration.
 
 Returns an approximation to the twin prime count of C<n>.  This returns
 quickly and has a very small error for large values.  The method used is
-conjecture B of Hardy and Littlewood 1922, as stated in Sebah and Gourdon 2002.
+conjecture B of Hardy and Littlewood 1922, as stated in
+Sebah and Gourdon 2002.  For inputs under 10M, a correction factor is 
+additionally applied to reduce the mean squared error.
 
 
 =head2 nth_prime
@@ -1377,6 +1379,12 @@ correction.
 Returns the Nth twin prime.  This is done via sieving and counting, so
 is not very fast for large values.
 
+=head2 nth_twin_prime_approx
+
+Returns an approximation to the Nth twin prime.  A curve fit is used for
+small inputs (under 1200), while for larger inputs a binary search is done
+on the approximate twin prime count.
+
 
 =head2 is_pseudoprime
 
diff --git a/lib/Math/Prime/Util/PP.pm b/lib/Math/Prime/Util/PP.pm
index bc3f0bb..be6adee 100644
--- a/lib/Math/Prime/Util/PP.pm
+++ b/lib/Math/Prime/Util/PP.pm
@@ -149,12 +149,15 @@ sub _validate_positive_integer {
 }
 
 
-my @_primes_small = (
-   0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
-   101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,
-   193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,
-   293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,
-   409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509);
+my @_primes_small = (0,2,3,5);
+{
+  my $sieveref = _sieve_erat_string(5000);
+  my $n = 5;
+  foreach my $s (split("0", substr($$sieveref, 3), -1)) {
+    $n += 2 + 2 * length($s);
+    push @_primes_small, $n if $n <= 5000;
+  }
+}
 my @_prime_next_small = (
    2,2,3,5,5,7,7,11,11,11,11,13,13,17,17,17,17,19,19,23,23,23,23,
    29,29,29,29,29,29,31,31,37,37,37,37,37,37,41,41,41,41,43,43,47,
@@ -459,6 +462,11 @@ sub next_prime {
   _validate_positive_integer($n);
 
   return $_prime_next_small[$n] if $n <= $#_prime_next_small;
+  if ($n < $_primes_small[-1]) {
+    my $i = _tiny_prime_count($n);  # Binary search
+    return $_primes_small[$i+1];
+  }
+
 
   $n = Math::BigInt->new(''.$_[0])
      if ref($n) ne 'Math::BigInt' && $n >= MPU_MAXPRIME;
@@ -1353,9 +1361,17 @@ sub twin_prime_count {
 
 sub twin_prime_count_approx {
   my($n) = @_;
+  return twin_prime_count(3,$n) if $n < 2000;
   $n = _upgrade_to_float($n) if ref($n);
   my $logn = log($n);
   my $li2 = ExponentialIntegral($logn) + 2.8853900817779268147198494 - ($n/$logn);
+  if    ($n <     4000) { $li2 *= 1.0005 * log(log(log($n*4000))); }
+  elsif ($n <     8000) { $li2 *= 0.9734 * log(log(log($n*4000))); }
+  elsif ($n <    32000) { $li2 *= 0.8967 * log(log(log($n*4000))); }
+  elsif ($n <   200000) { $li2 *= 0.8937 * log(log(log($n*4000))); }
+  elsif ($n <  1000000) { $li2 *= 0.8793 * log(log(log($n*4000))); }
+  elsif ($n <  4000000) { $li2 *= 0.8766 * log(log(log($n*4000))); }
+  elsif ($n < 10000000) { $li2 *= 0.8664 * log(log(log($n*4000))); }
   return int(1.32032363169373914785562422 * $li2 + 0.5);
 }
 
@@ -1369,6 +1385,23 @@ sub nth_twin_prime {
   $nth;
 }
 
+sub nth_twin_prime_approx {
+  my($n) = @_;
+  return nth_twin_prime($n) if $n < 6;
+  $n = _upgrade_to_float($n) if ref($n);
+  my $nlogn2 = $n * log($n) * log($n);
+  return int(5.023 * $nlogn2/log(log(1600*$n*$n))) if $n > 59 && $n < 1200;
+
+  my $lo = int(1.0 * $nlogn2);
+  my $hi = int(3.0 * $nlogn2 + 3);
+  while ($lo < $hi) {
+    my $mid = $lo + (($hi-$lo) >> 1);
+    if (twin_prime_count_approx($mid) < $n) { $lo = $mid+1; }
+    else                                    { $hi = $mid;   }
+  }
+  return $lo;
+}
+
 
 #############################################################################
 
diff --git a/lib/Math/Prime/Util/PPFE.pm b/lib/Math/Prime/Util/PPFE.pm
index 77d917d..ac5c509 100644
--- a/lib/Math/Prime/Util/PPFE.pm
+++ b/lib/Math/Prime/Util/PPFE.pm
@@ -126,6 +126,11 @@ sub nth_twin_prime {
   _validate_positive_integer($n);
   return Math::Prime::Util::PP::nth_twin_prime($n);
 }
+sub nth_twin_prime_approx {
+  my($n) = @_;
+  _validate_positive_integer($n);
+  return Math::Prime::Util::PP::nth_twin_prime_approx($n);
+}
 
 
 sub is_prime {
diff --git a/t/13-primecount.t b/t/13-primecount.t
index 545e8b6..4df4e04 100644
--- a/t/13-primecount.t
+++ b/t/13-primecount.t
@@ -4,7 +4,8 @@ use warnings;
 
 use Test::More;
 use Math::Prime::Util qw/prime_count twin_prime_count
-                        prime_count_lower prime_count_upper prime_count_approx/;
+                         prime_count_lower prime_count_upper
+                         prime_count_approx twin_prime_count_approx/;
 
 my $isxs  = Math::Prime::Util::prime_get_config->{'xs'};
 my $use64 = Math::Prime::Util::prime_get_config->{'maxbits'} > 32;
@@ -82,6 +83,16 @@ my %intervals = (
 );
 delete @intervals{ grep { (parse_range($_))[1] > ~0 } keys %intervals };
 
+my %tpcs = (
+              5000 =>           126,
+            500000 =>          4565,
+          50000000 =>        239101,
+        5000000000 =>      14618166,
+      500000000000 =>     986222314,
+    50000000000000 =>   71018282471,
+  5000000000000000 => 5357875276068,
+);
+
 plan tests => 0 + 1
                 + 3*scalar(keys %pivals32)
                 + scalar(keys %pivals_small)
@@ -89,7 +100,7 @@ plan tests => 0 + 1
                 + scalar(keys %intervals)
                 + 1
                 + 5 + 2*$extra # prime count specific methods
-                + 3 + (($isxs && $use64) ? 1 : 0); # twin prime counts
+                + 3 + (($isxs && $use64) ? 1+2*scalar(keys %tpcs) : 0);# twin pc
 
 ok( eval { prime_count(13); 1; }, "prime_count in void context");
 
@@ -182,4 +193,10 @@ is(twin_prime_count(10**8,10**8+34587), 137, "twin prime count 10^8 to +34587");
 is(twin_prime_count(654321), 5744, "twin prime count 654321");
 if ($isxs && $use64) {
   is(twin_prime_count(1000000000123456), 1177209242446, "twin prime count 1000000000123456");
+  while (my($n, $tpc) = each (%tpcs)) {
+    is(twin_prime_count($n), $tpc, "twin prime count $n");
+    my $errorp = 100 * abs($tpc - twin_prime_count_approx($n)) / $tpc;
+    my $estr = sprintf "%8.6f%%", $errorp;
+    cmp_ok( $errorp, '<=', 2, "twin_prime_count_approx($n) is $estr");
+  }
 }
diff --git a/t/14-nthprime.t b/t/14-nthprime.t
index c026157..03fdcab 100644
--- a/t/14-nthprime.t
+++ b/t/14-nthprime.t
@@ -4,7 +4,8 @@ use warnings;
 
 use Test::More;
 use Math::Prime::Util qw/primes nth_prime nth_twin_prime
-                         nth_prime_lower nth_prime_upper nth_prime_approx/;
+                         nth_prime_lower nth_prime_upper
+                         nth_prime_approx nth_twin_prime_approx/;
 
 my $use64 = Math::Prime::Util::prime_get_config->{'maxbits'} > 32;
 my $usexs = Math::Prime::Util::prime_get_config->{'xs'};
@@ -62,6 +63,19 @@ my %nthprimes_small = map { $_ => $nthprimes32{$_} }
 
 my @small_primes = (0, @{primes($nth_small_prime)});
 
+my %ntpcs = (
+             5 =>                   29,
+            50 =>                 1487,
+           500 =>                32411,
+          5000 =>               557519,
+         50000 =>              8264957,
+        500000 =>            115438667,
+       5000000 =>           1523975909,
+      50000000 =>          19358093939,
+     500000000 =>         239211160649,
+);
+
+
 plan tests => 0 + 2*scalar(keys %pivals32)
                 + 1
                 + 3*scalar(keys %nthprimes32)
@@ -69,6 +83,7 @@ plan tests => 0 + 2*scalar(keys %pivals32)
                 + $use64 * 3 * scalar(keys %nthprimes64)
                 + 3   # nth_prime_lower with max index
                 + 3   # nth_twin_prime
+                + scalar(keys %ntpcs)   # nth_twin_prime_approx
                 + (($extra && $use64 && $usexs) ? 1 : 0);
 
 
@@ -125,3 +140,9 @@ if ($extra && $use64 && $usexs) {
 is( nth_twin_prime(0), 0, "nth_twin_prime(0) = 0" );
 is( nth_twin_prime(17), 239, "239 = 17th twin prime" );
 is( nth_twin_prime(1234), 101207, "101207 = 1234'th twin prime" );
+
+while (my($n, $nthtpc) = each (%ntpcs)) {
+  my $errorp = 100 * abs($nthtpc - nth_twin_prime_approx($n)) / $nthtpc;
+  my $estr = sprintf "%8.6f%%", $errorp;
+  cmp_ok( $errorp, '<=', 2, "nth_twin_prime_approx($n) is $estr");
+}
diff --git a/t/92-release-pod-coverage.t b/t/92-release-pod-coverage.t
index 9b83b6e..b63ddd6 100644
--- a/t/92-release-pod-coverage.t
+++ b/t/92-release-pod-coverage.t
@@ -58,7 +58,8 @@ sub mpu_public_regex {
       prime_count
       prime_count_lower prime_count_upper prime_count_approx
       nth_prime nth_prime_lower nth_prime_upper nth_prime_approx
-      twin_prime_count twin_prime_count_approx nth_twin_prime
+      twin_prime_count twin_prime_count_approx
+      nth_twin_prime nth_twin_prime_approx
       random_prime random_ndigit_prime random_nbit_prime random_strong_prime
       random_proven_prime random_proven_prime_with_cert
       random_maurer_prime random_maurer_prime_with_cert
diff --git a/util.c b/util.c
index 34bdf29..a33bcbf 100644
--- a/util.c
+++ b/util.c
@@ -589,10 +589,13 @@ UV twin_prime_count_approx(UV n)
     long double logn = logl(ln);
     long double li2 = _XS_ExponentialIntegral(logn) + two_over_log_two-ln/logn;
     /* try to minimize MSE */
-    if      (n <    7000) li2 *= 0.8991 * logl(logl(logl(ln*4000)));
-    else if (n <  200000) li2 *= 0.8938 * logl(logl(logl(ln*4000)));
-    else if (n < 1000000) li2 *= 0.8794 * logl(logl(logl(ln*4000)));
-    else if (n < 2000000) li2 *= 0.8808 * logl(logl(logl(ln*4000)));
+    if      (n <    4000) li2 *= 1.0005 * logl(logl(logl(ln*4000)));
+    if      (n <    8000) li2 *= 0.9734 * logl(logl(logl(ln*4000)));
+    if      (n <   32000) li2 *= 0.8967 * logl(logl(logl(ln*4000)));
+    else if (n <  200000) li2 *= 0.8937 * logl(logl(logl(ln*4000)));
+    else if (n < 1000000) li2 *= 0.8793 * logl(logl(logl(ln*4000)));
+    else if (n < 4000000) li2 *= 0.8766 * logl(logl(logl(ln*4000)));
+    else if (n <10000000) li2 *= 0.8664 * logl(logl(logl(ln*4000)));
     return (UV) (two_C2 * li2 + 0.5L);
   }
 }
@@ -982,11 +985,13 @@ UV nth_twin_prime_approx(UV n)
 
   if (n < 6)
     return nth_twin_prime(n);
+  if (n > 59 && n < 1200)  /* Curve fit */
+    return (UV) (5.023 * fn*flogn*flogn / logl(logl(1600*fn*fn)));
 
   /* Binary search on the TPC estimate.
    * Good results require that the TPC estimate is both fast and accurate.
    */
-  lo = (UV) (1.0 * fn * flogn * flogn);
+  lo = (UV) (0.0 * fn * flogn * flogn);
   hi = (UV) (3.0 * fn * flogn * flogn + 3);
   if (hi <= lo) hi = UV_MAX;
   while (lo < hi) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git