[libmath-prime-util-perl] 49/50: Fix prime count issue and make standalone. Add Meissel method.
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:45:41 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.13
in repository libmath-prime-util-perl.
commit b20511442938a45ac67340916f2455f6e2b66078
Author: Dana Jacobsen <dana at acm.org>
Date: Mon Nov 19 00:08:40 2012 -0800
Fix prime count issue and make standalone. Add Meissel method.
---
XS.xs | 6 +
lehmer.c | 369 ++++++++++++++++++++++++++++++++++++++----------------
lehmer.h | 1 +
t/13-primecount.t | 6 +-
4 files changed, 271 insertions(+), 111 deletions(-)
diff --git a/XS.xs b/XS.xs
index d61f012..5c69ba7 100644
--- a/XS.xs
+++ b/XS.xs
@@ -42,6 +42,12 @@ _XS_prime_count(IN UV low, IN UV high = 0)
RETVAL
UV
+_XS_legendre_pi(IN UV n)
+
+UV
+_XS_meissel_pi(IN UV n)
+
+UV
_XS_lehmer_pi(IN UV n)
UV
diff --git a/lehmer.c b/lehmer.c
index 9a38c95..facc3d7 100644
--- a/lehmer.c
+++ b/lehmer.c
@@ -3,10 +3,7 @@
#include <string.h>
#include <math.h>
-#include "lehmer.h"
-#include "util.h"
-#include "cache.h"
-#include "sieve.h"
+#define SIEVE_LIMIT 1000000 /* Just sieve if smaller than this */
/*****************************************************************************
*
@@ -16,13 +13,14 @@
* This is free software; you can redistribute it and/or modify it under
* the same terms as the Perl 5 programming language system itself.
*
- * This file is part of the Math::Prime::Util Perl module. It relies on two
- * features: (1) a sieve to generate very small primes which could be as
- * simple as a three-line SoE, and (2) _XS_prime_count(low, high) is expected
- * to return the prime count within the segment low to high inclusive. These
- * are used for the relatively small segments in the final stage, but making
- * it fast is important for the final performance. The primesieve package is
- * one source of excellent routines for either task.
+ * This file is part of the Math::Prime::Util Perl module, but also can be
+ * compiled as a standalone UNIX program using the primesieve package.
+ *
+ * g++ -O3 -DPRIMESIEVE_STANDALONE lehmer.c -o prime_count -lprimesieve
+ *
+ * For faster prime counting in stage 4 with multiprocessor machines:
+ *
+ * g++ -O3 -DPRIMESIEVE_STANDALONE -DPRIMESIEVE_PARALLEL lehmer.c -o prime_count -lprimesieve -lgomp
*
* The phi(x,a) calculation is unique, to the best of my knowledge. It keeps
* a heap of all x values + signed counts for the given 'a' value, and walks
@@ -35,6 +33,8 @@
* Calculating pi(10^11) is done in under 1 second on my computer. pi(10^14)
* takes under 1 minute, pi(10^16) in a half hour. Compared with Thomas
* R. Nicely's pix4 program, this one is 3-5x faster and uses 2-3x less memory.
+ * When compiled with parallel primesieve it is another 2x or more faster:
+ * pix4(10^16) takes 124 minutes, this takes 6 minutes.
*
* n phi(x,a) mem/time | stage 4 mem/time | total time
* 10^17 4953MB 871.14 | 2988MB 9911.9 | 179m 37.5s
@@ -46,13 +46,16 @@
* 10^11 0.034 | 0.138 | 0.213s
* 10^10 0.007 | 0.025 | 0.064s
*
+ * These timings are using Perl + MPU. The standalone version using primesieve
+ * speeds up stage 4 a lot for large values.
+ *
* Reference: Hans Riesel, "Prime Numbers and Computer Methods for
* Factorization", 2nd edition, 1994.
*/
static int const verbose = 0;
-#define DO_TIMING 0
-#if DO_TIMING
+
+#ifdef STAGE_TIMING
#include <sys/time.h>
#define DECLARE_TIMING_VARIABLES struct timeval t0, t1;
#define TIMING_START gettimeofday(&t0, 0);
@@ -67,6 +70,122 @@ static int const verbose = 0;
#define TIMING_END_PRINT(text)
#endif
+
+#ifdef PRIMESIEVE_STANDALONE
+
+#include <limits.h>
+#include <sys/time.h>
+#ifdef PRIMESIEVE_PARALLEL
+ #include <primesieve/soe/ParallelPrimeSieve.h>
+ ParallelPrimeSieve ps;
+#else
+ #include <primesieve/soe/PrimeSieve.h>
+ PrimeSieve ps;
+#endif
+
+/* Translations from Perl + Math::Prime::Util to C/C++ + primesieve */
+typedef unsigned long UV;
+typedef signed long IV;
+#define UV_MAX ULONG_MAX
+#define New(id, mem, size, type) mem = (type*) malloc((size)*sizeof(type))
+#define Newz(id, mem, size, type) mem = (type*) calloc(size, sizeof(type))
+#define Renew(mem, size, type) mem = (type*) realloc(mem,(size)*sizeof(type))
+#define Safefree(mem) free((void*)mem)
+#define _XS_prime_count(a, b) ps.countPrimes(a, b)
+#define croak(fmt,...) { printf(fmt,##__VA_ARGS__); exit(1); }
+#define prime_precalc(n) /* */
+
+/* There has _got_ to be a better way to get an array of small primes using
+ * primesieve. This is ridiculous. */
+static UV* sieve_array = 0;
+static UV sieve_k;
+static UV sieve_n;
+void primesieve_callback(uint64_t pk)
+ { if (sieve_k <= sieve_n) sieve_array[sieve_k++] = pk; }
+
+/* Generate an array of small primes up to and including n, where the kth
+ * prime is element p[k]. Remember to free when done. */
+static UV* generate_small_primes(UV n)
+{
+ UV* primes;
+ UV nth_prime = (n <= 10) ? 29 : n * ( log(n) + log(log(n)) ) + 1;
+ New(0, primes, n+1, UV);
+ if (primes == 0)
+ croak("Can not allocate small primes\n");
+ primes[0] = 0;
+ sieve_array = primes;
+ sieve_n = n;
+ sieve_k = 1;
+ ps.generatePrimes(2, nth_prime, primesieve_callback);
+ sieve_array = 0;
+ return primes;
+}
+
+#else
+
+#include "lehmer.h"
+#include "util.h"
+#include "cache.h"
+#include "sieve.h"
+
+/* Generate an array of small primes up to and including n, where the kth
+ * prime is element p[k]. Remember to free when done. */
+static UV* generate_small_primes(UV n)
+{
+ const unsigned char* sieve;
+ UV* primes;
+ UV i, nth_prime;
+
+ /* Dusart 1999 bound */
+ nth_prime = (n <= 10) ? 29 : n * ( log(n) + log(log(n)) ) + 1;
+
+ if (get_prime_cache(nth_prime, &sieve) < nth_prime) {
+ release_prime_cache(sieve);
+ croak("Could not generate sieve for %"UVuf, nth_prime);
+ }
+ New(0, primes, n+1, UV);
+ if (primes == 0)
+ croak("Can not allocate small primes\n");
+ primes[0] = 0; primes[1] = 2; primes[2] = 3; primes[3] = 5;
+ i = 3;
+ START_DO_FOR_EACH_SIEVE_PRIME( sieve, 7, nth_prime ) {
+ if (i >= n) break;
+ primes[++i] = p;
+ } END_DO_FOR_EACH_SIEVE_PRIME
+ release_prime_cache(sieve);
+ if (i < n)
+ croak("Did not generate enough small primes.\n");
+ if (verbose > 1) printf("generated %lu small primes, from 2 to %lu\n", i, primes[i]);
+ return primes;
+}
+
+#endif
+
+/* Given an array of primes[1..lastprime], return Pi(n) where n <= lastprime.
+ * This is actually quite fast, and definitely faster than sieving. By using
+ * this we can avoid caching prime counts and also skip most calls to the
+ * segment siever.
+ */
+static UV bs_prime_count(UV n, UV const* const primes, UV lastprime)
+{
+ UV i, j;
+ if (n < 2) return 0;
+ /* if (n > primes[lastprime]) return _XS_prime_count(2, n); */
+ if (n >= primes[lastprime]) {
+ if (n == primes[lastprime]) return lastprime;
+ croak("called bspc(%lu) with counts up to %lu\n", n, primes[lastprime]);
+ }
+ i = 1;
+ j = lastprime;
+ while (i < j) {
+ UV mid = (i+j)/2;
+ if (primes[mid] <= n) i = mid+1;
+ else j = mid;
+ }
+ return i-1;
+}
+
+
/* Use Mapes' method to calculate phi(x,a) for small a. This is really
* convenient and a little Perl script will spit this code out for whatever
* limit we select. It gets unwieldy with large a values.
@@ -270,24 +389,32 @@ static void heap_destroy(heap_t* h)
* memory consuming part.
*/
-static UV phi(UV x, UV a, UV const* const primes)
+static UV phi(UV x, UV a)
{
heap_t h1, h2;
- UV val;
+ UV val, smallv;
IV count, sum = 0;
- UV primea = primes ? primes[a+1] : _XS_nth_prime(a+1);
- if (x < primea) return (x > 0) ? 1 : 0;
- if (a == 1) return ((x+1)/2);
- if (a <= 7) return mapes(x, a);
+ const UV* primes;
- primea = primes ? primes[a] : _XS_prev_prime(primea);
+ if (a == 1) return ((x+1)/2);
+ if (a <= 7) return mapes(x, a);
- h1 = heap_create(a * 1000);
- h2 = heap_create(a * 1000);
+ primes = generate_small_primes(a+1);
+ if (primes == 0)
+ croak("Could not generate primes for phi(%lu,%lu)\n", x, a);
+ if (x < primes[a+1]) { Safefree(primes); return (x > 0) ? 1 : 0; }
+ /* This is a hack, trying to guess at a value where the array gets dense */
+ smallv = a * 1000;
+ if (smallv > x / primes[a] / 5)
+ smallv = x / primes[a] / 5;
+
+ h1 = heap_create(smallv);
+ h2 = heap_create(smallv);
heap_insert(&h1, x, 1);
while (a > 7) {
+ UV primea = primes[a];
if (heap_not_empty(h2)) croak("h2 heap isn't empty.");
while ( heap_not_empty(h1) ) {
UV sval;
@@ -303,8 +430,8 @@ static UV phi(UV x, UV a, UV const* const primes)
}
}
{ heap_t t = h1; h1 = h2; h2 = t; }
+ /* printf("a = %lu heap %lu/%lu + %lu\n", a, h1.small_N, h1.small_limit, h1.N); */
a--;
- primea = primes ? primes[a] : _XS_prev_prime(primea);
}
heap_destroy(&h2);
if (a != 7) croak("final loop is set for a=7, a = %lu\n", a);
@@ -314,110 +441,98 @@ static UV phi(UV x, UV a, UV const* const primes)
sum += count * mapes7(val);
}
heap_destroy(&h1);
+ Safefree(primes);
return (UV) sum;
}
-static UV* generate_small_primes(UV *n)
+/* Legendre's method. Interesting and a good test for phi(x,a), but Lehmer's
+ * method is much faster (Legendre: a = pi(n^.5), Lehmer: a = pi(n^.25)) */
+UV _XS_legendre_pi(UV n)
{
- const unsigned char* sieve;
- UV* primea;
- UV nth_prime;
- UV i;
-
- /* Dusart 1999 bound */
- nth_prime = (*n <= 10) ? 29 : *n * ( log(*n) + log(log(*n)) ) + 1;
+ UV a;
+ if (n < SIEVE_LIMIT)
+ return _XS_prime_count(2, n);
- if (get_prime_cache(nth_prime, &sieve) < nth_prime) {
- release_prime_cache(sieve);
- croak("Could not generate sieve for %"UVuf, nth_prime);
- }
- New(0, primea, *n+4, UV);
- if (primea == 0)
- croak("Can not allocate small primes\n");
- primea[0] = 0; primea[1] = 2; primea[2] = 3; primea[3] = 5;
- i = 3;
- START_DO_FOR_EACH_SIEVE_PRIME( sieve, 7, nth_prime ) {
- if (i >= *n) break;
- primea[++i] = p;
- } END_DO_FOR_EACH_SIEVE_PRIME
- release_prime_cache(sieve);
- if (i < *n)
- croak("Did not generate enough small primes.\n");
- if (verbose > 1) printf("generated %lu small primes, from 2 to %lu\n", i, primea[i]);
- return primea;
-}
+ a = _XS_legendre_pi(sqrt(n)+0.5);
-/* Given an array of primes[1..lastprime], return Pi(n) where n <= lastprime.
- * This is actually quite fast, and definitely faster than sieving. By using
- * this we can avoid caching prime counts and also skip most calls to the
- * segment siever.
- */
-static UV bs_prime_count(UV n, UV const* const primes, UV lastprime)
-{
- UV i, j;
- if (n < 2) return 0;
- /* if (n > primes[lastprime]) return _XS_prime_count(2, n); */
- if (n > primes[lastprime])
- croak("called bspc(%lu) with counts up to %lu\n", n, primes[lastprime]);
- i = 1;
- j = lastprime;
- while (i < j) {
- UV mid = (i+j)/2;
- if (primes[mid] <= n) i = mid+1;
- else j = mid;
- }
- return i-1;
+ return phi(n, a) + a - 1;
}
-/* Legendre's method. Interesting and a good test for phi(x,a), but Lehmer's
- * method is much faster (Legendre: a = pi(n^.5), Lehmer: a = pi(n^.25)) */
-UV _XS_legendre_pi(UV n)
+/* Meissel's method. */
+UV _XS_meissel_pi(UV n)
{
- UV a;
- if (n < 30000)
+ UV a, b, c, sum, i, lastprime, lastpc, lastw, lastwpc;
+ const UV* primes = 0; /* small prime cache */
+ DECLARE_TIMING_VARIABLES;
+ if (n < SIEVE_LIMIT)
return _XS_prime_count(2, n);
- a = _XS_legendre_pi(sqrt(n));
- prime_precalc(a);
- return phi(n, a, 0) + a - 1;
+ if (verbose > 0) printf("meissel %lu stage 1: calculate a,b,c \n", n);
+ TIMING_START;
+ a = _XS_meissel_pi(pow(n, 1.0/3.0)+0.5); /* a = floor(n^1/3) */
+ b = _XS_meissel_pi(sqrt(n)+0.5); /* b = floor(n^1/2) */
+ c = a; /* c = a */
+ TIMING_END_PRINT("stage 1")
+
+ if (verbose > 0) printf("meissel %lu stage 2: phi(x,a) (a=%lu b=%lu c=%lu)\n", n, a, b, c);
+ TIMING_START;
+ sum = phi(n, a) + ((b+a-2) * (b-a+1) / 2);
+ if (verbose > 0) printf("phi(%lu,%lu) = %lu. sum = %lu\n", n, a, sum - ((b+a-2) * (b-a+1) / 2), sum);
+ TIMING_END_PRINT("phi(x,a)")
+
+ lastprime = b*16;
+ if (verbose > 0) printf("meissel %lu stage 3: %lu small primes\n", n, lastprime);
+ TIMING_START;
+ primes = generate_small_primes(lastprime);
+ if (primes == 0) croak("Error generating primes.\n");
+ lastpc = primes[lastprime];
+ TIMING_END_PRINT("small primes")
+
+ prime_precalc( sqrt( n / primes[a+1] ) );
+
+ if (verbose > 0) printf("meissel %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
+ TIMING_START;
+ /* Reverse the i loop so w increases. Count w in segments. */
+ lastw = 0;
+ lastwpc = 0;
+ for (i = b; i > a; i--) {
+ UV w = n / primes[i];
+ lastwpc = (w <= lastpc) ? bs_prime_count(w, primes, lastprime)
+ : lastwpc + _XS_prime_count(lastw+1, w);
+ lastw = w;
+ sum = sum - lastwpc;
+ }
+ TIMING_END_PRINT("stage 4")
+ Safefree(primes);
+ return sum;
}
-/* Lehmer's method. This is basically Riesel's Lehmer function (page 22),
- * with some additional code to help optimize it.
- */
+/* Lehmer's method. This is basically Riesel's Lehmer function (page 22),
+ * with some additional code to help optimize it. */
UV _XS_lehmer_pi(UV n)
{
- UV z, a, b, c, sum, i, j, lastprimea, lastpc, lastw, lastwpc;
- UV* primea = 0; /* small prime cache, first b=pi(z)=pi(sqrt(n)) primes */
+ UV z, a, b, c, sum, i, j, lastprime, lastpc, lastw, lastwpc;
+ const UV* primes = 0; /* small prime cache, first b=pi(z)=pi(sqrt(n)) */
DECLARE_TIMING_VARIABLES;
- if (n < 1000000)
+ if (n < SIEVE_LIMIT)
return _XS_prime_count(2, n);
if (verbose > 0) printf("lehmer %lu stage 1: calculate a,b,c \n", n);
TIMING_START;
z = sqrt((double)n+0.5);
- a = _XS_lehmer_pi(sqrt((double)z)+0.5);
- b = _XS_lehmer_pi(z);
- c = _XS_lehmer_pi(pow((double)n, 1.0/3.0)+0.5);
+ a = _XS_lehmer_pi(sqrt((double)z)+0.5); /* a = floor(n^1/4) */
+ b = _XS_lehmer_pi(z); /* b = floor(n^1/2) */
+ c = _XS_lehmer_pi(pow((double)n, 1.0/3.0)+0.5); /* c = floor(n^1/3) */
TIMING_END_PRINT("stage 1")
- /* We're going to sieve for small primes twice, in the interest of saving
- * memory. For phi(x,a), get just as many as are needed (a+1) because it
- * is our memory bottleneck. Then sieve for more afterwards. */
if (verbose > 0) printf("lehmer %lu stage 2: phi(x,a) (z=%lu a=%lu b=%lu c=%lu)\n", n, z, a, b, c);
TIMING_START;
- lastprimea = a+1;
- primea = generate_small_primes(&lastprimea);
- if (primea == 0 || lastprimea < a+1) croak("Error generating primes.\n");
-
- sum = phi(n, a, primea) + ( (b+a-2) * (b-a+1) / 2);
-
- Safefree(primea);
+ sum = phi(n, a) + ((b+a-2) * (b-a+1) / 2);
TIMING_END_PRINT("phi(x,a)")
@@ -425,38 +540,72 @@ UV _XS_lehmer_pi(UV n)
* fast prime counts. Using a higher value here will mean more memory but
* faster operation. A lower value saves memory at the expense of more
* segment sieving.*/
- lastprimea = b*16;
- if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastprimea);
+ lastprime = b*16;
+ if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastprime);
TIMING_START;
- primea = generate_small_primes(&lastprimea);
- if (primea == 0 || lastprimea < b) croak("Error generating primes.\n");
- lastpc = primea[lastprimea];
+ primes = generate_small_primes(lastprime);
+ if (primes == 0) croak("Error generating primes.\n");
+ lastpc = primes[lastprime];
TIMING_END_PRINT("small primes")
- /* Ensure we have the base sieve for big prime_count ( n/primea[i] ). */
+ /* Ensure we have the base sieve for big prime_count ( n/primes[i] ). */
/* This is about 75k for n=10^13, 421k for n=10^15, 2.4M for n=10^17 */
- prime_precalc( sqrt( n / primea[a+1] ) );
+ prime_precalc( sqrt( n / primes[a+1] ) );
- if (verbose > 0) printf("lehmer %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primea[a+1]);
+ if (verbose > 0) printf("lehmer %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
TIMING_START;
/* Reverse the i loop so w increases. Count w in segments. */
lastw = 0;
lastwpc = 0;
for (i = b; i >= a+1; i--) {
- UV w = n / primea[i];
- lastwpc = (w <= lastpc) ? bs_prime_count(w, primea, lastprimea)
+ UV w = n / primes[i];
+ lastwpc = (w <= lastpc) ? bs_prime_count(w, primes, lastprime)
: lastwpc + _XS_prime_count(lastw+1, w);
lastw = w;
sum = sum - lastwpc;
if (i <= c) {
- UV bi = bs_prime_count( (UV) (sqrt(w) + 0.5), primea, lastprimea );
+ UV bi = bs_prime_count( (UV) (sqrt(w) + 0.5), primes, lastprime );
for (j = i; j <= bi; j++) {
- sum = sum - bs_prime_count(w / primea[j], primea, lastprimea) + j - 1;
+ sum = sum - bs_prime_count(w / primes[j], primes, lastprime) + j - 1;
}
}
}
TIMING_END_PRINT("stage 4")
- Safefree(primea);
+ Safefree(primes);
return sum;
}
+
+
+#ifdef PRIMESIEVE_STANDALONE
+int main(int argc, char *argv[])
+{
+ UV n, pi;
+ double t;
+ const char* method;
+ struct timeval t0, t1;
+
+ if (argc <= 1) { printf("usage: %s <n> [<method>]\n", argv[0]); return(1); }
+ n = atol(argv[1]);
+ if (n < 2) { printf("Pi(%lu) = 0\n", n); return(0); }
+
+ if (argc > 2)
+ method = argv[2];
+ else
+ method = "lehmer";
+
+ gettimeofday(&t0, 0);
+ if (!strcasecmp(method, "lehmer")) { pi = _XS_lehmer_pi(n); }
+ else if (!strcasecmp(method, "meissel")) { pi = _XS_meissel_pi(n); }
+ else if (!strcasecmp(method, "legendre")) { pi = _XS_legendre_pi(n); }
+ else if (!strcasecmp(method, "sieve")) { pi = _XS_prime_count(2, n); }
+ else {
+ printf("method must be one of: lehmer, meissel, legendre, or sieve\n");
+ return(2);
+ }
+ gettimeofday(&t1, 0);
+ t = (t1.tv_sec-t0.tv_sec); t *= 1000000.0; t += (t1.tv_usec - t0.tv_usec);
+ printf("%8s Pi(%lu) = %lu in %10.5fs\n", method, n, pi, t / 1000000.0);
+ return(0);
+}
+#endif
diff --git a/lehmer.h b/lehmer.h
index 410feb0..e071deb 100644
--- a/lehmer.h
+++ b/lehmer.h
@@ -4,6 +4,7 @@
#include "ptypes.h"
extern UV _XS_legendre_pi(UV n);
+extern UV _XS_meissel_pi(UV n);
extern UV _XS_lehmer_pi(UV n);
#endif
diff --git a/t/13-primecount.t b/t/13-primecount.t
index 23765db..dc1c53a 100644
--- a/t/13-primecount.t
+++ b/t/13-primecount.t
@@ -81,7 +81,8 @@ plan tests => 0 + 1
+ 3*scalar(keys %pivals32)
+ scalar(keys %pivals_small)
+ $use64 * 3 * scalar(keys %pivals64)
- + scalar(keys %intervals);
+ + scalar(keys %intervals)
+ + 1;
ok( eval { prime_count(13); 1; }, "prime_count in void context");
@@ -113,6 +114,9 @@ while (my($range, $expect) = each (%intervals)) {
is( prime_count($low,$high), $expect, "prime_count($range) = $expect");
}
+# Defect found in prime binary search
+is( prime_count(130066574), 7381740, "prime_count(130066574) = 7381740");
+
sub parse_range {
my($range) = @_;
my($low,$high);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list