[libmath-prime-util-perl] 112/181: Use memcpy doubling -- fewer calls needed

Thu May 21 18:51:12 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.36
in repository libmath-prime-util-perl.

commit af8d7a011d2cfa75f6ae12687cd3bd4b66dbd0d4
Author: Dana Jacobsen <dana at acm.org>
Date:   Sat Jan 4 00:56:11 2014 -0800

    Use memcpy doubling -- fewer calls needed
---
 sieve.c | 50 +++++++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/sieve.c b/sieve.c
index d64615f..8a4afc2 100644
--- a/sieve.c
+++ b/sieve.c
@@ -200,34 +200,42 @@ static const int wheel2xmap[30] =     /* (2*p)%30 => 2,14,22,26,4,8,16,28 */
 static const UV max_sieve_prime = (BITS_PER_WORD==64) ? 4294967291U : 65521U;
 
 
-static void sieve_prefill(unsigned char* mem, UV startd, UV endd)
+static void memtile(unsigned char* src, UV from, UV to) {
+  while (from < to) {
+    UV bytes = (2*from > to) ? to-from : from;
+    memcpy(src+from, src, bytes);
+    from += bytes;
+  }
+}
+
+static UV sieve_prefill(unsigned char* mem, UV startd, UV endd)
 {
+  UV next_prime = 17;
   UV nbytes = endd - startd + 1;
   MPUassert( (mem != 0) && (endd >= startd), "sieve_prefill bad arguments");
 
-  /* Walk the memory, tiling in the presieve area using memcpy.
-   * This is pretty fast, but it might still benefit from using copy
-   * doubling (where we copy to the memory, then copy memory to memory
-   * doubling in size each time), as memcpy usually loves big chunks.
-   */
-  while (startd <= endd) {
+  if (startd != 0) {
     UV pstartd = startd % PRESIEVE_SIZE;
-    UV sieve_bytes = PRESIEVE_SIZE - pstartd;
-    UV bytes = (nbytes > sieve_bytes) ? sieve_bytes : nbytes;
-    memcpy(mem, presieve13 + pstartd, bytes);
-    if (startd == 0)  mem[0] = 0x01; /* Correct first byte */
-    startd += bytes;
-    mem += bytes;
-    nbytes -= bytes;
+    UV tailbytes = PRESIEVE_SIZE - pstartd;
+    if (tailbytes > nbytes) tailbytes = nbytes;
+    memcpy(mem, presieve13 + pstartd, tailbytes); /* Copy tail to mem */
+    mem += tailbytes;    /* Advance so mem points at the beginning */
+    nbytes -= tailbytes;
+  }
+  if (nbytes > 0) {
+    memcpy(mem, presieve13, (nbytes < PRESIEVE_SIZE) ? nbytes : PRESIEVE_SIZE);
+    memtile(mem, PRESIEVE_SIZE, nbytes);
+    if (startd == 0) mem[0] = 0x01; /* Correct first byte */
   }
+  /* Leaving option open to tile 17 out and sieve, then return 19 */
+  return next_prime;
 }
 
 /* Wheel 30 sieve.  Ideas from Terje Mathisen and Quesada / Van Pelt. */
 unsigned char* sieve_erat30(UV end)
 {
   unsigned char* mem;
-  UV max_buf, limit;
-  UV prime;
+  UV max_buf, limit, prime;
 
   max_buf = (end/30) + ((end%30) != 0);
   /* Round up to a word */
@@ -235,10 +243,10 @@ unsigned char* sieve_erat30(UV end)
   New(0, mem, max_buf, unsigned char );
 
   /* Fill buffer with marked 7, 11, and 13 */
-  sieve_prefill(mem, 0, max_buf-1);
+  prime = sieve_prefill(mem, 0, max_buf-1);
 
   limit = isqrt(end);  /* prime*prime can overflow */
-  for (prime = 17; prime <= limit; prime = next_prime_in_sieve(mem,prime)) {
+  for (  ; prime <= limit; prime = next_prime_in_sieve(mem,prime)) {
     UV p2 = prime*prime;
     UV d = p2 / 30;
     UV m = p2 - d*30;
@@ -285,7 +293,7 @@ unsigned char* sieve_erat30(UV end)
 int sieve_segment(unsigned char* mem, UV startd, UV endd)
 {
   const unsigned char* sieve;
-  UV limit, slimit;
+  UV limit, slimit, start_base_prime;
   UV startp = 30*startd;
   UV endp = (endd >= (UV_MAX/30))  ?  UV_MAX-2  :  30*endd+29;
 
@@ -293,7 +301,7 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
              "sieve_segment bad arguments");
 
   /* Fill buffer with marked 7, 11, and 13 */
-  sieve_prefill(mem, startd, endd);
+  start_base_prime = sieve_prefill(mem, startd, endd);
 
   limit = isqrt(endp);  /* floor(sqrt(n)), will include p if p*p=endp */
   /* Don't use a sieve prime such that p*p > UV_MAX */
@@ -303,7 +311,7 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
   /* printf("segment sieve from %"UVuf" to %"UVuf" (aux sieve to %"UVuf")\n", startp, endp, slimit); */
   get_prime_cache(slimit, &sieve);
 
-  START_DO_FOR_EACH_SIEVE_PRIME(sieve, 17, slimit)
+  START_DO_FOR_EACH_SIEVE_PRIME(sieve, start_base_prime, slimit)
   {
     /* p increments from 17 to at most sqrt(endp).  Note on overflow:
      * 32-bit: limit=     65535, max p =      65521, p*p = ~0-1965854

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git