[libmath-prime-util-perl] 04/16: 7/11/13 presieve added

Partha P. Mukherjee ppm-guest at moszumanska.debian.org
Thu May 21 18:44:33 UTC 2015


This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.08
in repository libmath-prime-util-perl.

commit ed6a752475c670bf26fb74798a08e8d394c498a4
Author: Dana Jacobsen <dana at acm.org>
Date:   Mon Jun 18 19:25:45 2012 -0600

    7/11/13 presieve added
---
 Changes           |   2 +
 sieve.c           | 145 +++++++++++++++++++++++++++++++++++++++++++-----------
 t/13-primecount.t |   3 +-
 3 files changed, 121 insertions(+), 29 deletions(-)

diff --git a/Changes b/Changes
index 20a9897..7b3d91a 100644
--- a/Changes
+++ b/Changes
@@ -5,6 +5,8 @@ Revision history for Perl extension Math::Prime::Util.
     - Accuracy improvement and measurements for math functions.
     - Remove simple sieve -- it wasn't being used, and was just around for
       performance comparisons.
+    - Static presieve for 7, 11, and 13.  1k of ROM used for prefilling sieve
+      memory, meaning we can skip the 7, 11, and 13 loops.  ~15% speedup.
 
 0.07  17 June 2012
     - Fixed a bug in next_prime found by Lou Godio (thank you VERY much!).
diff --git a/sieve.c b/sieve.c
index 3679d1d..1ddc013 100644
--- a/sieve.c
+++ b/sieve.c
@@ -7,6 +7,110 @@
 #include "ptypes.h"
 
 
+/* 1001 bytes of presieved mod-30 bytes.  If the area to be sieved is
+ * appropriately filled with this data, then 7, 11, and 13 do not have
+ * to be sieved.  It wraps, so multiple memcpy's can be used.  Do be
+ * aware that if you start at 0, you'll have to correct the first byte.
+ */
+#define PRESIEVE_SIZE (7*11*13)
+static const unsigned char presieve13[PRESIEVE_SIZE] =
+{ 0x0e,0x20,0x10,0x81,0x49,0x24,0xc2,0x06,0x2a,0x90,0xa1,0x0c,0x14,
+  0x58,0x02,0x61,0x11,0xc3,0x28,0x0c,0x44,0x22,0xa4,0x10,0x91,0x18,
+  0x4d,0x40,0x82,0x21,0x58,0xa1,0x28,0x04,0x42,0x92,0x20,0x51,0x91,
+  0x8a,0x04,0x48,0x03,0x60,0x34,0x81,0x1c,0x06,0xc1,0x02,0xa2,0x10,
+  0x89,0x08,0x24,0x45,0x42,0x30,0x10,0xc5,0x0a,0x86,0x40,0x0a,0x30,
+  0x38,0x85,0x08,0x15,0x40,0x63,0x20,0x96,0x83,0x88,0x04,0x60,0x16,
+  0x28,0x10,0x81,0x49,0x44,0xe2,0x02,0x2c,0x12,0xa1,0x0c,0x04,0x50,
+  0x0a,0x61,0x10,0x83,0x48,0x2c,0x40,0x26,0x26,0x90,0x91,0x08,0x55,
+  0x48,0x82,0x20,0x19,0xc1,0x28,0x04,0x44,0x12,0xa0,0x51,0x81,0x9a,
+  0x0c,0x48,0x02,0x21,0x54,0xa1,0x18,0x04,0x43,0x82,0xa2,0x10,0x99,
+  0x08,0x24,0x44,0x03,0x70,0x30,0xc1,0x0c,0x86,0xc0,0x0a,0x20,0x30,
+  0x8d,0x08,0x14,0x41,0x43,0x20,0x92,0x85,0x0a,0x84,0x60,0x06,0x30,
+  0x18,0x81,0x49,0x05,0xc2,0x22,0x28,0x14,0xa3,0x8c,0x04,0x50,0x12,
+  0x69,0x10,0x83,0x09,0x4c,0x60,0x22,0x24,0x12,0x91,0x08,0x45,0x50,
+  0x8a,0x20,0x18,0x81,0x68,0x24,0x40,0x16,0x22,0xd1,0x81,0x8a,0x14,
+  0x48,0x02,0x20,0x15,0xc1,0x38,0x04,0x45,0x02,0xa2,0x10,0x89,0x18,
+  0x2c,0x44,0x02,0x31,0x50,0xe1,0x08,0x86,0x42,0x8a,0x20,0x30,0x95,
+  0x08,0x14,0x40,0x43,0x60,0xb2,0x81,0x0c,0x06,0xe0,0x06,0x20,0x10,
+  0x89,0x49,0x04,0xc3,0x42,0x28,0x10,0xa5,0x0e,0x84,0x50,0x02,0x71,
+  0x18,0x83,0x08,0x0d,0x40,0x22,0x24,0x14,0x93,0x88,0x45,0x40,0x92,
+  0x28,0x18,0x81,0x29,0x44,0x60,0x12,0x24,0x53,0x81,0x8a,0x04,0x58,
+  0x0a,0x20,0x14,0x81,0x58,0x24,0x41,0x06,0xa2,0x90,0x89,0x08,0x34,
+  0x4c,0x02,0x30,0x11,0xc1,0x28,0x86,0x44,0x0a,0xa0,0x30,0x85,0x18,
+  0x1c,0x40,0x43,0x21,0xd2,0xa1,0x08,0x04,0x62,0x86,0x20,0x10,0x91,
+  0x49,0x04,0xc2,0x03,0x68,0x30,0xa1,0x0c,0x06,0xd0,0x02,0x61,0x10,
+  0x8b,0x08,0x0c,0x41,0x62,0x24,0x10,0x95,0x0a,0xc5,0x40,0x82,0x30,
+  0x18,0x81,0x28,0x05,0x40,0x32,0x20,0x55,0x83,0x8a,0x04,0x48,0x12,
+  0x28,0x14,0x81,0x19,0x44,0x61,0x02,0xa6,0x12,0x89,0x08,0x24,0x54,
+  0x0a,0x30,0x10,0xc1,0x48,0xa6,0x40,0x0e,0x22,0xb0,0x85,0x08,0x14,
+  0x48,0x43,0x20,0x93,0xc1,0x28,0x04,0x64,0x06,0xa0,0x10,0x81,0x59,
+  0x0c,0xc2,0x02,0x29,0x50,0xa1,0x0c,0x04,0x52,0x82,0x61,0x10,0x93,
+  0x08,0x0c,0x40,0x23,0x64,0x30,0x91,0x0c,0x47,0xc0,0x82,0x20,0x18,
+  0x89,0x28,0x04,0x41,0x52,0x20,0x51,0x85,0x8a,0x84,0x48,0x02,0x30,
+  0x1c,0x81,0x18,0x05,0x41,0x22,0xa2,0x14,0x8b,0x88,0x24,0x44,0x12,
+  0x38,0x10,0xc1,0x09,0xc6,0x60,0x0a,0x24,0x32,0x85,0x08,0x14,0x50,
+  0x4b,0x20,0x92,0x81,0x48,0x24,0x60,0x06,0x22,0x90,0x81,0x49,0x14,
+  0xca,0x02,0x28,0x11,0xe1,0x2c,0x04,0x54,0x02,0xe1,0x10,0x83,0x18,
+  0x0c,0x40,0x22,0x25,0x50,0xb1,0x08,0x45,0x42,0x82,0x20,0x18,0x91,
+  0x28,0x04,0x40,0x13,0x60,0x71,0x81,0x8e,0x06,0xc8,0x02,0x20,0x14,
+  0x89,0x18,0x04,0x41,0x42,0xa2,0x10,0x8d,0x0a,0xa4,0x44,0x02,0x30,
+  0x18,0xc1,0x08,0x87,0x40,0x2a,0x20,0x34,0x87,0x88,0x14,0x40,0x53,
+  0x28,0x92,0x81,0x09,0x44,0x60,0x06,0x24,0x12,0x81,0x49,0x04,0xd2,
+  0x0a,0x28,0x10,0xa1,0x4c,0x24,0x50,0x06,0x63,0x90,0x83,0x08,0x1c,
+  0x48,0x22,0x24,0x11,0xd1,0x28,0x45,0x44,0x82,0xa0,0x18,0x81,0x38,
+  0x0c,0x40,0x12,0x21,0x51,0xa1,0x8a,0x04,0x4a,0x82,0x20,0x14,0x91,
+  0x18,0x04,0x41,0x03,0xe2,0x30,0x89,0x0c,0x26,0xc4,0x02,0x30,0x10,
+  0xc9,0x08,0x86,0x41,0x4a,0x20,0x30,0x85,0x0a,0x94,0x40,0x43,0x30,
+  0x9a,0x81,0x08,0x05,0x60,0x26,0x20,0x14,0x83,0xc9,0x04,0xc2,0x12,
+  0x28,0x10,0xa1,0x0d,0x44,0x70,0x02,0x65,0x12,0x83,0x08,0x0c,0x50,
+  0x2a,0x24,0x10,0x91,0x48,0x65,0x40,0x86,0x22,0x98,0x81,0x28,0x14,
+  0x48,0x12,0x20,0x51,0xc1,0xaa,0x04,0x4c,0x02,0xa0,0x14,0x81,0x18,
+  0x0c,0x41,0x02,0xa3,0x50,0xa9,0x08,0x24,0x46,0x82,0x30,0x10,0xd1,
+  0x08,0x86,0x40,0x0b,0x60,0x30,0x85,0x0c,0x16,0xc0,0x43,0x20,0x92,
+  0x89,0x08,0x04,0x61,0x46,0x20,0x10,0x85,0x4b,0x84,0xc2,0x02,0x38,
+  0x18,0xa1,0x0c,0x05,0x50,0x22,0x61,0x14,0x83,0x88,0x0c,0x40,0x32,
+  0x2c,0x10,0x91,0x09,0x45,0x60,0x82,0x24,0x1a,0x81,0x28,0x04,0x50,
+  0x1a,0x20,0x51,0x81,0xca,0x24,0x48,0x06,0x22,0x94,0x81,0x18,0x14,
+  0x49,0x02,0xa2,0x11,0xc9,0x28,0x24,0x44,0x02,0xb0,0x10,0xc1,0x18,
+  0x8e,0x40,0x0a,0x21,0x70,0xa5,0x08,0x14,0x42,0xc3,0x20,0x92,0x91,
+  0x08,0x04,0x60,0x07,0x60,0x30,0x81,0x4d,0x06,0xc2,0x02,0x28,0x10,
+  0xa9,0x0c,0x04,0x51,0x42,0x61,0x10,0x87,0x0a,0x8c,0x40,0x22,0x34,
+  0x18,0x91,0x08,0x45,0x40,0xa2,0x20,0x1c,0x83,0xa8,0x04,0x40,0x12,
+  0x28,0x51,0x81,0x8b,0x44,0x68,0x02,0x24,0x16,0x81,0x18,0x04,0x51,
+  0x0a,0xa2,0x10,0x89,0x48,0x24,0x44,0x06,0x32,0x90,0xc1,0x08,0x96,
+  0x48,0x0a,0x20,0x31,0xc5,0x28,0x14,0x44,0x43,0xa0,0x92,0x81,0x18,
+  0x0c,0x60,0x06,0x21,0x50,0xa1,0x49,0x04,0xc2,0x82,0x28,0x10,0xb1,
+  0x0c,0x04,0x50,0x03,0x61,0x30,0x83,0x0c,0x0e,0xc0,0x22,0x24,0x10,
+  0x99,0x08,0x45,0x41,0xc2,0x20,0x18,0x85,0x2a,0x84,0x40,0x12,0x30,
+  0x59,0x81,0x8a,0x05,0x48,0x22,0x20,0x14,0x83,0x98,0x04,0x41,0x12,
+  0xaa,0x10,0x89,0x09,0x64,0x64,0x02,0x34,0x12,0xc1,0x08,0x86,0x50,
+  0x0a,0x20,0x30,0x85,0x48,0x34,0x40,0x47,0x22,0x92,0x81,0x08,0x14,
+  0x68,0x06,0x20,0x11,0xc1,0x69,0x04,0xc6,0x02,0xa8,0x10,0xa1,0x1c,
+  0x0c,0x50,0x02,0x61,0x50,0xa3,0x08,0x0c,0x42,0xa2,0x24,0x10,0x91,
+  0x08,0x45,0x40,0x83,0x60,0x38,0x81,0x2c,0x06,0xc0,0x12,0x20,0x51,
+  0x89,0x8a,0x04,0x49,0x42,0x20,0x14,0x85,0x1a,0x84,0x41,0x02,0xb2,
+  0x18,0x89,0x08,0x25,0x44,0x22,0x30,0x14,0xc3,0x88,0x86,0x40,0x1a,
+  0x28,0x30,0x85,0x09,0x54,0x60,0x43,0x24,0x92,0x81,0x08,0x04,0x70};
+
+static void sieve_prefill(unsigned char* mem, UV startd, UV endd)
+{
+  UV nbytes = endd - startd + 1;
+  MPUassert( (mem != 0) && (endd >= startd), "sieve_prefill bad arguments");
+
+  /* TODO: Big ranges would benefit from copy doubling */
+  while (startd <= endd) {
+    UV pstartd = startd % PRESIEVE_SIZE;
+    UV sieve_bytes = PRESIEVE_SIZE - pstartd;
+    UV bytes = (nbytes > sieve_bytes) ? sieve_bytes : nbytes;
+    memcpy(mem, presieve13 + pstartd, bytes);
+    if (startd == 0)  mem[0] = 0x01; /* Correct first byte */
+    startd += bytes;
+    mem += bytes;
+    nbytes -= bytes;
+  }
+}
+
+
 
 /* Wheel 30 sieve.  Ideas from Terje Mathisen and Quesada / Van Pelt. */
 unsigned char* sieve_erat30(UV end)
@@ -16,30 +120,19 @@ unsigned char* sieve_erat30(UV end)
   UV prime;
 
   max_buf = (end/30) + ((end%30) != 0);
-  buffer_words = (max_buf + sizeof(UV) - 1) / sizeof(UV);
-  Newz(0, mem, buffer_words*sizeof(UV), unsigned char );
+  /* Round up to a word */
+  max_buf = ((max_buf + sizeof(UV) - 1) / sizeof(UV)) * sizeof(UV);
+  New(0, mem, max_buf, unsigned char );
   if (mem == 0) {
-    croak("allocation failure in sieve_erat30: could not alloc %"UVuf" bytes", (buffer_words*sizeof(UV)));
+    croak("allocation failure in sieve_erat30: could not alloc %"UVuf" bytes", max_buf);
     return 0;
   }
 
-  /* Shortcut to mark 7.  Just an optimization. */
-  if ( (7*7) <= end ) {
-    UV d = 1;
-    while ( (d+6) < max_buf) {
-      mem[d+0] = 0x20;  mem[d+1] = 0x10;  mem[d+2] = 0x81;  mem[d+3] = 0x08;
-      mem[d+4] = 0x04;  mem[d+5] = 0x40;  mem[d+6] = 0x02;  d += 7;
-    }
-    if ( d < max_buf )  mem[d++] = 0x20;
-    if ( d < max_buf )  mem[d++] = 0x10;
-    if ( d < max_buf )  mem[d++] = 0x81;
-    if ( d < max_buf )  mem[d++] = 0x08;
-    if ( d < max_buf )  mem[d++] = 0x04;
-    if ( d < max_buf )  mem[d++] = 0x40;
-    MPUassert( d >= max_buf, "sieve_erat30 incorrect 7 sieve");
-  }
+  /* Fill buffer with marked 7, 11, and 13 */
+  sieve_prefill(mem, 0, max_buf-1);
+
   limit = sqrt((double) end);  /* prime*prime can overflow */
-  for (prime = 11; prime <= limit; prime = next_prime_in_sieve(mem,prime)) {
+  for (prime = 17; prime <= limit; prime = next_prime_in_sieve(mem,prime)) {
     UV d = (prime*prime)/30;
     UV m = (prime*prime) - d*30;
     UV dinc = (2*prime)/30;
@@ -73,8 +166,6 @@ unsigned char* sieve_erat30(UV end)
     } while (d < max_buf);
   }
 
-  mem[0] |= masktab30[1];  /* 1 is composite */
-
   return mem;
 }
 
@@ -87,11 +178,12 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
   UV pcsize;
   UV startp = 30*startd;
   UV endp = (endd >= (UV_MAX/30))  ?  UV_MAX-2  :  30*endd+29;
-  UV ranged = endd - startd + 1;
 
   MPUassert( (mem != 0) && (endd >= startd) && (endp >= startp),
              "sieve_segment bad arguments");
-  memset(mem, 0, ranged);
+  
+  /* Fill buffer with marked 7, 11, and 13 */
+  sieve_prefill(mem, startd, endd);
 
   limit = sqrt( (double) endp ) + 1;
   /* printf("segment sieve from %"UVuf" to %"UVuf" (aux sieve to %"UVuf")\n", startp, endp, limit); */
@@ -101,9 +193,9 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
     return 0;
   }
 
-  START_DO_FOR_EACH_SIEVE_PRIME(sieve, 7, pcsize)
+  START_DO_FOR_EACH_SIEVE_PRIME(sieve, 17, pcsize)
   {
-    /* p increments from 7 to at least sqrt(endp) */
+    /* p increments from 17 to at least sqrt(endp) */
     UV p2 = p*p;   /* TODO: overflow */
     if (p2 > endp)  break;
     /* Find first multiple of p greater than p*p and larger than startp */
@@ -154,8 +246,5 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
   }
   END_DO_FOR_EACH_SIEVE_PRIME;
 
-  if (startd == 0)
-    mem[0] |= masktab30[1];  /* 1 is composite */
-
   return 1;
 }
diff --git a/t/13-primecount.t b/t/13-primecount.t
index 659d209..c2860c3 100644
--- a/t/13-primecount.t
+++ b/t/13-primecount.t
@@ -8,7 +8,7 @@ use Math::Prime::Util qw/prime_count prime_count_lower prime_count_upper prime_c
 my $use64 = Math::Prime::Util::_maxbits > 32;
 my $extra = defined $ENV{RELEASE_TESTING} && $ENV{RELEASE_TESTING};
 
-plan tests => 1 + 14*3 + 8 + 6*$extra + 3*18*$use64 + 11 + 6*$use64;
+plan tests => 1 + 15*3 + 9 + 6*$extra + 3*18*$use64 + 11 + 6*$use64;
 
 ok( eval { prime_count(13); 1; }, "prime_count in void context");
 
@@ -25,6 +25,7 @@ my %pivals32 = (
             10000000 => 664579,
            100000000 => 5761455,
           1000000000 => 50847534,
+               60067 => 6062,
                65535 => 6542,
             16777215 => 1077871,
           2147483647 => 105097565,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git



More information about the Pkg-perl-cvs-commits mailing list