[libmath-prime-util-perl] 03/04: prime_count uses segmented sieve

Thu May 21 18:44:14 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.02
in repository libmath-prime-util-perl.

commit 504099fd21287fcb08538f76efb8cf154cc73342
Author: Dana Jacobsen <dana at acm.org>
Date:   Mon Jun 4 19:24:03 2012 -0600

    prime_count uses segmented sieve
---
 Changes                |  4 +++
 lib/Math/Prime/Util.pm | 16 +++++-----
 sieve.c                |  3 ++
 util.c                 | 81 +++++++++++++++++++++++++++++++++++---------------
 4 files changed, 73 insertions(+), 31 deletions(-)

diff --git a/Changes b/Changes
index ce9ee8b..3b02d1d 100644
--- a/Changes
+++ b/Changes
@@ -7,6 +7,10 @@ Revision history for Perl extension Math::Prime::Util.
     - Test for broken 64-bit Perl.
     - Fix overflow issues in segmented sieving.
     - Switch to using UVuf for croaks.  What I should have done all along.
+    - prime_count uses a segment sieve with 256k chunks (~7.9M numbers).
+      Not memory intensive any more, and faster for large inputs.  The time
+      growth is slightly over linear however, so expect to wait a long
+      time for 10^12 or more.
 
 0.01  4 June 2012
     - Initial release
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index bfdd673..64747a1 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -209,10 +209,12 @@ input is C<2> or lower.
 
 Returns the Prime Count function C<Pi(n)>.  The current implementation relies
 on sieving to find the primes within the interval, so will take some time and
-memory.  There are slightly faster ways to handle the sieving (e.g. maintain
-a list of counts from C<2 - j> for various C<j>, then do a segmented sieve
-between C<j> and C<n>), and for very large numbers the methods of Meissel,
-Lehmer, or Lagarias-Miller-Odlyzko-Deleglise-Rivat may be appropriate.
+memory.  It uses a segmented sieve if the primary sieve is not large enough,
+so is very memory efficient.  However, time growth is slightly more than
+linear with C<n>, so it can take a very long time.  A hybrid table approach
+is the usual means taken to speed this up, which a later version may do.  For
+very large inputs the methods of Meissel, Lehmer, or
+Lagarias-Miller-Odlyzko-Deleglise-Rivat may be appropriate.
 
 
 =head2 prime_count_upper
@@ -388,9 +390,9 @@ typically much faster for inputs in the 19+ digit range.
 
 =head1 LIMITATIONS
 
-The functions C<prime_count> and C<nth_prime> have not yet transitioned to
-using a segmented sieve, so will use too much memory to be practical when
-called with very large numbers (C<10^11> and up).
+The function C<nth_prime> has not yet transitioned to using a segmented sieve,
+so will use too much memory to be practical when called with very large
+numbers (C<10^11> and up).
 
 I have not completed testing all the functions near the word size limit
 (e.g. C<2^32> for 32-bit machines).  Please report any problems you find.
diff --git a/sieve.c b/sieve.c
index 064926b..8af0f70 100644
--- a/sieve.c
+++ b/sieve.c
@@ -248,5 +248,8 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
   }
   END_DO_FOR_EACH_SIEVE_PRIME;
 
+  if (startd == 0)
+    mem[0] |= masktab30[1];  /* 1 is composite */
+
   return 1;
 }
diff --git a/util.c b/util.c
index cd99333..e1cea2b 100644
--- a/util.c
+++ b/util.c
@@ -323,36 +323,69 @@ UV prime_count(UV n)
   if (n < NPRIME_COUNT_SMALL)
     return prime_count_small[n];
 
-  /* Get the cached sieve. */
-  if (get_prime_cache(n, &sieve) < n) {
-    croak("Couldn't generate sieve for prime_count");
-    return 0;
-  }
+  bytes = n/30;
+  s = 0;
 
-  /* Simple:
-   *          START_DO_FOR_EACH_SIEVE_PRIME(sieve, 7, n)
-   *            count++;
-   *          END_DO_FOR_EACH_SIEVE_PRIME;
-   */
+  if (n <= get_prime_cache(0, &sieve)) {
 
-  bytes = n / 30;
-  s = 0;
+    /* We have enough primes -- just count them. */
 
-  /* Start from last word position if we can.  This is a big speedup when
-   * calling prime_count many times with successively larger numbers. */
-  if (bytes >= last_bytes) {
-    s = last_bytes;
-    count = last_count;
-  }
+    /* Start from last word position if we can.  This is a big speedup when
+     * calling prime_count many times with successively larger numbers. */
+    if (bytes >= last_bytes) {
+      s = last_bytes;
+      count = last_count;
+    }
 
-  count += count_zero_bits(sieve+s, bytes-s);
+    count += count_zero_bits(sieve+s, bytes-s);
 
-  last_bytes = bytes;
-  last_count = count;
+    last_bytes = bytes;
+    last_count = count;
 
-  START_DO_FOR_EACH_SIEVE_PRIME(sieve, 30*bytes, n)
-    count++;
-  END_DO_FOR_EACH_SIEVE_PRIME;
+    START_DO_FOR_EACH_SIEVE_PRIME(sieve, 30*bytes, n)
+      count++;
+    END_DO_FOR_EACH_SIEVE_PRIME;
+
+  } else {
+
+    /* We don't have enough primes.  Repeatedly segment sieve */
+    UV const segment_size = 262144;
+    unsigned char* segment;
+
+    /* TODO: we really shouldn't need this */
+    prime_precalc( sqrt(n) + 2 );
+
+    segment = (unsigned char*) malloc( segment_size );
+    if (segment == 0) {
+      croak("Could not allocate %"UVuf" bytes for segment sieve", segment_size);
+      return 0;
+    }
+
+    for (s = 0; s <= bytes; s += segment_size) {
+      /* We want to sieve one extra byte, to handle the last fragment */
+      UV sieve_bytes = ((bytes-s) >= segment_size) ? segment_size : bytes-s+1;
+      UV count_bytes = ((bytes-s) >= segment_size) ? segment_size : bytes-s;
+
+      /* printf("sieving from %"UVuf" to %"UVuf"\n", 30*s+1, 30*(s+sieve_bytes-1)+29); */
+      if (sieve_segment(segment, s, s + sieve_bytes - 1) == 0) {
+        croak("Could not segment sieve from %"UVuf" to %"UVuf, 30*s+1, 30*(s+sieve_bytes)+29);
+        break;
+      }
+
+      if (count_bytes > 0)
+        count += count_zero_bits(segment, count_bytes);
+
+    }
+    s -= segment_size;
+
+    /*printf("counting fragment from %"UVuf" to %"UVuf"\n", 30*bytes-30*s, n-30*s); */
+    START_DO_FOR_EACH_SIEVE_PRIME(segment, 30*bytes - s*30, n - s*30)
+      count++;
+    END_DO_FOR_EACH_SIEVE_PRIME;
+
+    free(segment);
+
+  }
 
   return count;
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git