[libmath-prime-util-perl] 13/20: Start to add LMO prime_count

Thu May 21 18:47:31 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.23
in repository libmath-prime-util-perl.

commit 705c9247e386af8a9ec55a2ee00245182178f89e
Author: Dana Jacobsen <dana at acm.org>
Date:   Sun Mar 3 05:45:08 2013 -0800

    Start to add LMO prime_count
---
 Changes  |   5 ++
 TODO     |   6 +++
 XS.xs    |   5 +-
 aks.c    |   6 +--
 factor.c |  24 +++++-----
 lehmer.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 lehmer.h |   1 +
 sieve.c  |   5 +-
 util.c   |  36 ++++++++-------
 util.h   |  12 +++++
 10 files changed, 212 insertions(+), 47 deletions(-)

diff --git a/Changes b/Changes
index 994544b..31bd220 100644
--- a/Changes
+++ b/Changes
@@ -18,6 +18,11 @@ Revision history for Perl extension Math::Prime::Util.
 
     - Add the first and second Chebyshev functions (theta and psi).
 
+    - put isqrt(n) in util.h, use it everywhere.
+      put icbrt(n) in lehmer.h, use it there.
+
+    - Start on Lagarias-Miller-Odlyzko prime count.
+
     - Performance:
        - Divisor sum with no sub is ~10x faster.
        - Speed up PP version of exp_mangoldt, create XS version.
diff --git a/TODO b/TODO
index cb3763b..e523058 100644
--- a/TODO
+++ b/TODO
@@ -47,3 +47,9 @@
       } END_FOREACH_PRIME
   that (1) handles any low / high, and (2) segments.  Take segment_primes
   from XS.xs to do this.
+
+- Try in lehmer.c's phi function: Use two linear arrays to replace the
+  heap+array.  If we add val in one and sval in the other, they can remain
+  sorted, then reading the next combined value is easy.
+
+- Implement S2 calculation for LMO prime count.
diff --git a/XS.xs b/XS.xs
index b46185e..8892164 100644
--- a/XS.xs
+++ b/XS.xs
@@ -62,6 +62,9 @@ UV
 _XS_lehmer_pi(IN UV n)
 
 UV
+_XS_LMO_pi(IN UV n)
+
+UV
 _XS_nth_prime(IN UV n)
 
 int
@@ -158,7 +161,7 @@ segment_primes(IN UV low, IN UV high);
 
       {  /* Avoid recalculations of this */
         UV endp = (high_d >= (UV_MAX/30))  ?  UV_MAX-2  :  30*high_d+29;
-        prime_precalc( sqrt(endp) + 0.1 + 1 );
+        prime_precalc(isqrt(endp) + 1 );
       }
 
       while ( low_d <= high_d ) {
diff --git a/aks.c b/aks.c
index 0e3b123..3b7f18c 100644
--- a/aks.c
+++ b/aks.c
@@ -55,7 +55,7 @@ static int is_perfect_power(UV n) {
   if ( n < (UV) pow(10, DBL_DIG) ) {
 #endif
     /* Simple floating point method.  Fast, but need enough mantissa. */
-    b = sqrt(n)+0.5; if (b*b == n)  return 1; /* perfect square */
+    b = isqrt(n); if (b*b == n)  return 1; /* perfect square */
     for (b = 3; b < last; b = _XS_next_prime(b)) {
       UV root = pow(n, 1.0 / (double)b) + 0.5;
       if ( ((UV)(pow(root, b)+0.5)) == n)  return 1;
@@ -163,7 +163,7 @@ static UV* poly_mod_pow(UV* pn, UV power, UV r, UV mod)
 {
   UV* res;
   UV* temp;
-  int use_sqr = (mod > sqrt(UV_MAX/r)) ? 0 : 1;
+  int use_sqr = (mod > isqrt(UV_MAX/r)) ? 0 : 1;
 
   Newz(0, res, r, UV);
   New(0, temp, r, UV);
@@ -223,7 +223,7 @@ int _XS_is_aks_prime(UV n)
   if (is_perfect_power(n))
     return 0;
 
-  sqrtn = sqrt(n);
+  sqrtn = isqrt(n);
   log2n = log(n) / log(2);   /* C99 has a log2() function */
   limit = (UV) floor(log2n * log2n);
 
diff --git a/factor.c b/factor.c
index 8bc1357..c547159 100644
--- a/factor.c
+++ b/factor.c
@@ -90,7 +90,7 @@ int factor(UV n, UV *factors)
         /* Factor via trial division.  Nothing should make it here. */
         UV f = tlim;
         UV m = tlim % 30;
-        UV limit = (UV) (sqrt(n)+0.1);
+        UV limit = isqrt(n);
         if (verbose) printf("doing trial on %"UVuf"\n", n);
         while (f <= limit) {
           if ( (n%f) == 0 ) {
@@ -98,7 +98,7 @@ int factor(UV n, UV *factors)
               n /= f;
               fac_stack[nfac++] = f;
             } while ( (n%f) == 0 );
-            limit = (UV) (sqrt(n)+0.1);
+            limit = isqrt(n);
           }
           f += wheeladvance30[m];
           m =  nextwheel30[m];
@@ -178,7 +178,7 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
   }
 
   /* Trial division to this number at most.  Reduced as we find factors. */
-  limit = (UV) (sqrt(n)+0.1);
+  limit = isqrt(n);
   if (limit > maxtrial)
     limit = maxtrial;
 
@@ -192,7 +192,7 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
           factors[nfactors++] = f;
           n /= f;
         } while ( (n%f) == 0 );
-        newlimit = (UV) (sqrt(n)+0.1);
+        newlimit = isqrt(n);
         if (newlimit < limit)  limit = newlimit;
       }
       f = primes_small[++sp];
@@ -208,7 +208,7 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
           factors[nfactors++] = f;
           n /= f;
         } while ( (n%f) == 0 );
-        newlimit = (UV) (sqrt(n)+0.1);
+        newlimit = isqrt(n);
         if (newlimit < limit)  limit = newlimit;
       }
       f += wheeladvance30[m];
@@ -280,7 +280,7 @@ static int is_perfect_square(UV n, UV* sqrtn)
   m = n % 63;
   if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0;
 #endif
-  m = sqrt(n);
+  m = isqrt(n);
   if (n != (m*m))
     return 0;
 
@@ -459,7 +459,7 @@ int fermat_factor(UV n, UV *factors, UV rounds)
 
   MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in fermat_factor");
 
-  sqn = (UV) (sqrt(n)+0.1);
+  sqn = isqrt(n);
   x = 2 * sqn + 1;
   y = 1;
   r = (sqn*sqn) - n;
@@ -630,7 +630,7 @@ int pminus1_factor(UV n, UV *factors, UV B1, UV B2)
   UV savea = 2;
   UV saveq = 2;
   UV j = 1;
-  UV sqrtB1 = sqrt(B1);
+  UV sqrtB1 = isqrt(B1);
   MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pminus1_factor");
 
   for (q = 2; q <= sqrtB1; q = _XS_next_prime(q)) {
@@ -761,7 +761,7 @@ int squfof_factor(UV n, UV *factors, UV rounds)
   /* TODO:  What value of n leads to overflow? */
 
   qlast = 1;
-  s = sqrt(n);
+  s = isqrt(n);
 
   p = s;
   temp = n - (s*s);                 /* temp = n - floor(sqrt(n))^2   */
@@ -798,7 +798,7 @@ int squfof_factor(UV n, UV *factors, UV rounds)
     q = t;
     p = pnext;                          /* check for square; even iter   */
     if (jter & 1) continue;             /* jter is odd:omit square test  */
-    r = (int)sqrt((double)q);                 /* r = floor(sqrt(q))      */
+    r = isqrt(q);                       /* r = floor(sqrt(q))      */
     if (q != r*r) continue;
     if (qpoint == 0) break;
     qqueue[qpoint] = 0;
@@ -1005,8 +1005,8 @@ int racing_squfof_factor(UV n, UV *factors, UV rounds)
     }
     mult_save[i].valid = 1;
 
-    mult_save[i].b0 = sqrt( (double)nn64 );
-    mult_save[i].imax = sqrt( (double)mult_save[i].b0 ) / 3;
+    mult_save[i].b0 = isqrt(nn64);
+    mult_save[i].imax = sqrt(mult_save[i].b0) / 3;
     if (mult_save[i].imax < 20)     mult_save[i].imax = 20;
     if (mult_save[i].imax > rounds) mult_save[i].imax = rounds;
 
diff --git a/lehmer.c b/lehmer.c
index aef6b37..48d57d1 100644
--- a/lehmer.c
+++ b/lehmer.c
@@ -54,6 +54,7 @@
  */
 
 static int const verbose = 0;
+/* #define STAGE_TIMING 1 */
 
 #ifdef STAGE_TIMING
  #include <sys/time.h>
@@ -95,6 +96,16 @@ typedef   signed long IV;
 #define croak(fmt,...)            { printf(fmt,##__VA_ARGS__); exit(1); }
 #define prime_precalc(n)          /* */
 
+static UV isqrt(UV n)
+{
+  if (sizeof(UV) == 8 && n >= 18446744065119617025UL)  return 4294967295UL;
+  if (sizeof(UV) == 4 && n >= 4294836225UL)            return 65535UL;
+  UV root = (UV) sqrt((double)n);
+  while (root*root > n)  root--;
+  while ((root+1)*(root+1) <= n)  root++;
+  return root;
+}
+
 /* There has _got_ to be a better way to get an array of small primes using
  * primesieve.  This is ridiculous. */
 static UV* sieve_array = 0;
@@ -161,6 +172,36 @@ static UV* generate_small_primes(UV n)
 
 #endif
 
+static UV icbrt(UV n)
+{
+#if 0
+  /* The integer cube root code is about 30% faster for me */
+  if (n >= UVCONST(18446724184312856125)) return UVCONST(2642245);
+  UV root = (UV) pow(n, 1.0/3.0);
+  if (root*root*root > n) {
+    root--;
+    while (root*root*root > n)  root--;
+  } else {
+    while ((root+1)*(root+1)*(root+1) <= n)  root++;
+  }
+  return root;
+#else
+  int s;
+  UV y = 0;
+  for (s = (sizeof(UV)*8)-1; s >= 0; s -= 3) {
+    UV b;
+    y += y;
+    b = 3*y*(y+1)+1;
+    if ((n >> s) >= b) {
+      n -= b << s;
+      y++;
+    }
+  }
+  return y;
+#endif
+}
+
+
 /* Given an array of primes[1..lastprime], return Pi(n) where n <= lastprime.
  * This is actually quite fast, and definitely faster than sieving.  By using
  * this we can avoid caching prime counts and also skip most calls to the
@@ -455,7 +496,7 @@ UV _XS_legendre_pi(UV n)
   if (n < SIEVE_LIMIT)
     return _XS_prime_count(2, n);
 
-  a = _XS_legendre_pi( (UV) (sqrt(n)+0.5) );
+  a = _XS_legendre_pi(isqrt(n));
 
   return phi(n, a) + a - 1;
 }
@@ -472,9 +513,9 @@ UV _XS_meissel_pi(UV n)
 
   if (verbose > 0) printf("meissel %lu stage 1: calculate a,b,c \n", n);
   TIMING_START;
-  a = _XS_meissel_pi(pow(n, 1.0/3.0)+0.5);     /* a = floor(n^1/3) */
-  b = _XS_meissel_pi(sqrt(n)+0.5);             /* b = floor(n^1/2) */
-  c = a;                                       /* c = a            */
+  a = _XS_meissel_pi(icbrt(n));        /* a = floor(n^1/3) */
+  b = _XS_meissel_pi(isqrt(n));        /* b = floor(n^1/2) */
+  c = a;                               /* c = a            */
   TIMING_END_PRINT("stage 1")
 
   if (verbose > 0) printf("meissel %lu stage 2: phi(x,a) (a=%lu b=%lu c=%lu)\n", n, a, b, c);
@@ -491,7 +532,7 @@ UV _XS_meissel_pi(UV n)
   lastpc = primes[lastprime];
   TIMING_END_PRINT("small primes")
 
-  prime_precalc( sqrt( n / primes[a+1] ) );
+  prime_precalc(isqrt(n / primes[a+1]));
 
   if (verbose > 0) printf("meissel %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
   TIMING_START;
@@ -532,10 +573,10 @@ UV _XS_lehmer_pi(UV n)
 
   if (verbose > 0) printf("lehmer %lu stage 1: calculate a,b,c \n", n);
   TIMING_START;
-  z = (UV) sqrt((double)n+0.5);
-  a = _XS_lehmer_pi(sqrt((double)z)+0.5);          /* a = floor(n^1/4) */
-  b = _XS_lehmer_pi(z);                            /* b = floor(n^1/2) */
-  c = _XS_lehmer_pi(pow((double)n, 1.0/3.0)+0.5);  /* c = floor(n^1/3) */
+  z = isqrt(n);
+  a = _XS_lehmer_pi(isqrt(z));         /* a = floor(n^1/4) */
+  b = _XS_lehmer_pi(z);                /* b = floor(n^1/2) */
+  c = _XS_lehmer_pi(icbrt(n));         /* c = floor(n^1/3) */
   TIMING_END_PRINT("stage 1")
 
 
@@ -560,7 +601,7 @@ UV _XS_lehmer_pi(UV n)
 
   /* Ensure we have the base sieve for big prime_count ( n/primes[i] ). */
   /* This is about 75k for n=10^13, 421k for n=10^15, 2.4M for n=10^17 */
-  prime_precalc( sqrt( n / primes[a+1] ) );
+  prime_precalc(isqrt(n / primes[a+1]));
 
   if (verbose > 0) printf("lehmer %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
   TIMING_START;
@@ -574,7 +615,7 @@ UV _XS_lehmer_pi(UV n)
     lastw = w;
     sum = sum - lastwpc;
     if (i <= c) {
-      UV bi = bs_prime_count( (UV) (sqrt(w) + 0.5), primes, lastprime );
+      UV bi = bs_prime_count( isqrt(w), primes, lastprime );
       for (j = i; j <= bi; j++) {
         sum = sum - bs_prime_count(w / primes[j], primes, lastprime) + j - 1;
       }
@@ -585,6 +626,99 @@ UV _XS_lehmer_pi(UV n)
   return sum;
 }
 
+UV _XS_LMO_pi(UV n)
+{
+  UV a, b, sum, i, lastprime, lastpc, lastw, lastwpc;
+  UV n13, n12, n23;
+  IV S1;
+  UV S2, P2;
+  const UV* primes = 0;  /* small prime cache */
+  char* mu = 0;          /* moebius to n^1/3 */
+  UV*   lpf = 0;         /* least prime factor to n^1/3 */
+  DECLARE_TIMING_VARIABLES;
+  if (n < SIEVE_LIMIT)
+    return _XS_prime_count(2, n);
+
+  if (verbose > 0) printf("LMO %lu stage 1: calculate pi(n^1/3) \n", n);
+  TIMING_START;
+  n13 = icbrt(n);
+  n12 = isqrt(n);
+  n23 = (UV) (pow(n, 2.0/3.0)+0.01);
+  a = _XS_lehmer_pi(n13);
+  b = _XS_lehmer_pi(n12);
+  TIMING_END_PRINT("stage 1")
+
+  lastprime = b*16;
+  if (verbose > 0) printf("LMO %lu stage 2: %lu small primes\n", n, lastprime);
+  TIMING_START;
+  primes = generate_small_primes(lastprime);
+  if (primes == 0) croak("Error generating primes.\n");
+  lastpc = primes[lastprime];
+  TIMING_END_PRINT("small primes")
+
+  if (verbose > 0) printf("LMO %lu stage 3: calculate mu/lpf to %lu\n", n, a);
+  TIMING_START;
+  /* We could call MPU's:
+   *    mu = _moebius_range(0, n13+1)
+   * but (1) it's a bit slower (something to be addressed), and (2) we will
+   * do the least prime factor calculation at the same time.
+   */
+  New(0, mu, n13+1, char);
+  memset(mu, 1, sizeof(char) * (n13+1));
+  New(0, lpf, n13+1, UV);
+  memset(lpf, 0, sizeof(UV) * (n13+1));
+  mu[0] = 0;
+  for (i = 1; i <= a; i++) {
+    UV primei = primes[i];
+    UV j;
+    for (j = primei; j <= n13; j += primei) {
+      mu[j] = -mu[j];
+      if (lpf[j] == 0) lpf[j] = primei;
+    }
+    UV isquared = primei * primei;
+    for (j = isquared; j <= n13; j += isquared)
+      mu[j] = 0;
+  }
+  //for (i = 0; i <= n13; i++) { printf("mu %lu %ld\n", i, (IV)mu[i]); }
+  TIMING_END_PRINT("mu")
+
+  if (verbose > 0) printf("LMO %lu stage 4: calculate S1 (%lu)\n", n, n13);
+  TIMING_START;
+  S1 = 0;
+  for (i = 1; i <= n13; i++)
+    if (mu[i] != 0)
+      S1 += mu[i] * (IV) (n/i);
+  TIMING_END_PRINT("S1")
+  if (verbose > 0) printf("LMO %lu stage 4: S1 = %ld\n", n, S1);
+
+  S2 = 0;
+  /* TODO... */
+
+  Safefree(mu);
+  Safefree(lpf);
+
+  prime_precalc(isqrt(n / primes[a+1]));
+  if (verbose > 0) printf("LMO %lu stage 5: P2 loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
+  TIMING_START;
+  P2 = 0;
+  /* Reverse the i loop so w increases.  Count w in segments. */
+  lastw = 0;
+  lastwpc = 0;
+  for (i = b; i > a; i--) {
+    UV w = n / primes[i];
+    lastwpc = (w <= lastpc) ? bs_prime_count(w, primes, lastprime)
+                            : lastwpc + _XS_prime_count(lastw+1, w);
+    lastw = w;
+    P2 += lastwpc;
+  }
+  P2 -= ((b+a-2) * (b-a+1) / 2) - a + 1;
+  TIMING_END_PRINT("P2")
+  if (verbose > 0) printf("LMO %lu stage 5: P2 = %lu\n", n, P2);
+  Safefree(primes);
+  sum = P2 + S1 + S2;
+  return sum;
+}
+
 
 #ifdef PRIMESIEVE_STANDALONE
 int main(int argc, char *argv[])
@@ -607,9 +741,10 @@ int main(int argc, char *argv[])
   if      (!strcasecmp(method, "lehmer"))   { pi = _XS_lehmer_pi(n);      }
   else if (!strcasecmp(method, "meissel"))  { pi = _XS_meissel_pi(n);     }
   else if (!strcasecmp(method, "legendre")) { pi = _XS_legendre_pi(n);    }
+  else if (!strcasecmp(method, "lmo"))      { pi = _XS_LMO_pi(n);    }
   else if (!strcasecmp(method, "sieve"))    { pi = _XS_prime_count(2, n); }
   else {
-    printf("method must be one of: lehmer, meissel, legendre, or sieve\n");
+    printf("method must be one of: lehmer, meissel, legendre, lmo, or sieve\n");
     return(2);
   }
   gettimeofday(&t1, 0);
diff --git a/lehmer.h b/lehmer.h
index e071deb..94b46a4 100644
--- a/lehmer.h
+++ b/lehmer.h
@@ -6,5 +6,6 @@
 extern UV _XS_legendre_pi(UV n);
 extern UV _XS_meissel_pi(UV n);
 extern UV _XS_lehmer_pi(UV n);
+extern UV _XS_LMO_pi(UV n);
 
 #endif
diff --git a/sieve.c b/sieve.c
index bebd2df..16d22fc 100644
--- a/sieve.c
+++ b/sieve.c
@@ -6,6 +6,7 @@
 #include "sieve.h"
 #include "ptypes.h"
 #include "cache.h"
+#include "util.h"
 
 
 /* 1001 bytes of presieved mod-30 bytes.  If the area to be sieved is
@@ -136,7 +137,7 @@ unsigned char* sieve_erat30(UV end)
   /* Fill buffer with marked 7, 11, and 13 */
   sieve_prefill(mem, 0, max_buf-1);
 
-  limit = sqrt((double) end);  /* prime*prime can overflow */
+  limit = isqrt(end);  /* prime*prime can overflow */
   for (prime = 17; prime <= limit; prime = next_prime_in_sieve(mem,prime)) {
     UV d = (prime*prime)/30;
     UV m = (prime*prime) - d*30;
@@ -198,7 +199,7 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
   /* Fill buffer with marked 7, 11, and 13 */
   sieve_prefill(mem, startd, endd);
 
-  limit = sqrt( (double) endp ) + 1;
+  limit = isqrt(endp) + 1;
   /* printf("segment sieve from %"UVuf" to %"UVuf" (aux sieve to %"UVuf")\n", startp, endp, limit); */
   pcsize = get_prime_cache(limit, &sieve);
   if (pcsize < limit) {
diff --git a/util.c b/util.c
index 159d1b7..b041fb6 100644
--- a/util.c
+++ b/util.c
@@ -89,7 +89,7 @@ static UV count_zero_bits(const unsigned char* m, UV nbytes)
 static int _is_trial_prime7(UV n)
 {
   UV limit, i;
-  limit = sqrt(n);
+  limit = isqrt(n);
   i = 7;
   while (1) {   /* trial division, skipping multiples of 2/3/5 */
     if (i > limit) break;  if ((n % i) == 0) return 0;  i += 4;
@@ -112,7 +112,7 @@ static int _is_prime7(UV n)
   if (n > MPU_PROB_PRIME_BEST)
     return _XS_is_prob_prime(n);  /* We know this works for all 64-bit n */
 
-  limit = sqrt(n);
+  limit = isqrt(n);
   i = 7;
   while (1) {   /* trial division, skipping multiples of 2/3/5 */
     if (i > limit) break;  if ((n % i) == 0) return 0;  i += 4;
@@ -438,7 +438,7 @@ UV _XS_prime_count(UV low, UV high)
     /* Expand sieve to sqrt(n) */
     UV endp = (high_d >= (UV_MAX/30))  ?  UV_MAX-2  :  30*high_d+29;
     release_prime_cache(cache_sieve);
-    segment_size = get_prime_cache( sqrt(endp) + 1 , &cache_sieve) / 30;
+    segment_size = get_prime_cache( isqrt(endp) + 1 , &cache_sieve) / 30;
   }
 
   if ( (segment_size > 0) && (low_d <= segment_size) ) {
@@ -615,7 +615,7 @@ UV _XS_nth_prime(UV n)
     }
 
     /* Make sure the segment siever won't have to keep resieving. */
-    prime_precalc(sqrt(upper_limit));
+    prime_precalc(isqrt(upper_limit));
   }
 
   if (count == target)
@@ -659,7 +659,7 @@ IV* _moebius_range(UV lo, UV hi)
   /* This implementation follows that of Deléglise & Rivat (1996), which is
    * a segmented version of Lioen & van de Lune (1994).
    */
-  sqrtn = (UV) (sqrt(hi) + 0.5);
+  sqrtn = isqrt(hi);
 
   New(0, mu, hi-lo+1, IV);
   if (mu == 0)
@@ -725,7 +725,7 @@ IV _XS_mertens(UV n) {
   IV sum;
 
   if (n <= 1)  return n;
-  u = (UV) sqrt(n);
+  u = isqrt(n);
   mu = _moebius_range(0, u);
   New(0, M, u+1, IV);
   M[0] = 0;
@@ -981,7 +981,6 @@ static const long double riemann_zeta_table[] = {
 long double ld_riemann_zeta(long double x) {
   long double const tol = 1e-17;
   int i;
-  KAHAN_INIT(sum);
 
   if (x < 0)  croak("Invalid input to RiemannZeta:  x must be >= 0");
   if (x == 1) return INFINITY;
@@ -1014,7 +1013,7 @@ long double ld_riemann_zeta(long double x) {
                                         1.000000000000000000000L    };
     long double sumn = C8p[0]+x*(C8p[1]+x*(C8p[2]+x*(C8p[3]+x*(C8p[4]+x*(C8p[5]+x*(C8p[6]+x*(C8p[7]+x*C8p[8])))))));
     long double sumd = C8q[0]+x*(C8q[1]+x*(C8q[2]+x*(C8q[3]+x*(C8q[4]+x*(C8q[5]+x*(C8q[6]+x*(C8q[7]+x*C8q[8])))))));
-    sum = (sumn - (x-1)*sumd) / ((x-1)*sumd);
+    long double sum = (sumn - (x-1)*sumd) / ((x-1)*sumd);
     return sum;
   }
 
@@ -1026,16 +1025,19 @@ long double ld_riemann_zeta(long double x) {
   }
 
 #if 0
-  /* Simple defining series, works well. */
-  for (i = 5; i <= 1000000; i++) {
-    long double term = powl(i, -x);
-    KAHAN_SUM(sum, term);
-    if (term < tol*sum) break;
+  {
+    KAHAN_INIT(sum);
+    /* Simple defining series, works well. */
+    for (i = 5; i <= 1000000; i++) {
+      long double term = powl(i, -x);
+      KAHAN_SUM(sum, term);
+      if (term < tol*sum) break;
+    }
+    KAHAN_SUM(sum, powl(4, -x) );
+    KAHAN_SUM(sum, powl(3, -x) );
+    KAHAN_SUM(sum, powl(2, -x) );
+    return sum;
   }
-  KAHAN_SUM(sum, powl(4, -x) );
-  KAHAN_SUM(sum, powl(3, -x) );
-  KAHAN_SUM(sum, powl(2, -x) );
-  return sum;
 #endif
 
   /* The 2n!/B_2k series used by the Cephes library. */
diff --git a/util.h b/util.h
index 3492469..9d9821e 100644
--- a/util.h
+++ b/util.h
@@ -32,4 +32,16 @@ extern double _XS_RiemannR(double x);
   #define MPU_PROB_PRIME_BEST  UVCONST(100000000)
 #endif
 
+static UV isqrt(UV n) {
+ #if BIT_PER_WORD == 32
+  if (n >= UVCONST(4294836225)) return UVCONST(65535);
+ #else
+  if (n >= UVCONST(18446744065119617025)) return UVCONST(4294967295);
+ #endif
+  UV root = (UV) sqrt((double)n);
+  while (root*root > n)  root--;
+  while ((root+1)*(root+1) <= n)  root++;
+  return root;
+}
+
 #endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git