[libmath-prime-util-perl] 57/181: Speedups for Legendre/Meissel/Lehmer/LMOS old prime count routines
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:51:06 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.36
in repository libmath-prime-util-perl.
commit 0150346823118859ffd7efc9c6be99b981edc1a6
Author: Dana Jacobsen <dana at acm.org>
Date: Sun Dec 29 03:55:32 2013 -0800
Speedups for Legendre/Meissel/Lehmer/LMOS old prime count routines
---
lehmer.c | 95 ++++++++++++++++++++++++++++++++--------------------------------
1 file changed, 47 insertions(+), 48 deletions(-)
diff --git a/lehmer.c b/lehmer.c
index ab4bec9..50d1f85 100644
--- a/lehmer.c
+++ b/lehmer.c
@@ -37,17 +37,21 @@
* His Lehmer/Meissel/Legendre seem a bit slower in serial, but
* parallelize much better.
*
- * 4.80s 1.3MB LMO
- * 27.51s* 136.0MB Lehmer Walisch primecount v0.7, 8 threads
+ * 4.74s 1.3MB LMO
+ * 24.53s* 137.9MB Lehmer Walisch primecount v0.9, 8 threads
+ * 38.74s* 150.3MB LMOS Walisch primecount v0.9, 8 threads
* 42.52s* 159.4MB Lehmer standalone, 8 threads
- * 50.82s* 136.2MB Meissel Walisch primecount v0.7, 8 threads
- * 51.55s 154.1MB LMOS standalone, 1 thread
- * 64.06s* 144.3MB Legendre Walisch primecount v0.7, 8 threads
- * 66.41s 67.0MB LMOS
- * 80.85s 286.6MB Meissel
- * 99.97s 159.6MB Lehmer
- * 114.40s 28.4MB Lehmer Walisch primecount v0.7, 1 thread
- * 171.24s 83.5MB Legendre
+ * 42.82s* 137.9MB Meissel Walisch primecount v0.9, 8 threads
+ * 51.88s 153.9MB LMOS standalone, 1 thread
+ * 52.01s* 145.5MB Legendre Walisch primecount v0.9, 8 threads
+ * 64.96s 160.3MB Lehmer standalone, 1 thread
+ * 67.16s 67.0MB LMOS
+ * 80.42s 286.6MB Meissel
+ * 99.70s 159.6MB Lehmer
+ * 107.43s 28.5MB Lehmer Walisch primecount v0.9, 1 thread
+ * 174.51s 83.5MB Legendre
+ * 185.11s 25.6MB LMOS Walisch primecount v0.9, 1 thread
+ * 191.19s 24.8MB Meissel Walisch primecount v0.9, 1 thread
* 868.96s 1668.1MB Lehmer pix4 by T.R. Nicely
*
* Reference: Hans Riesel, "Prime Numbers and Computer Methods for
@@ -212,11 +216,12 @@ static UV bs_prime_count(uint32_t n, uint32_t const* const primes, uint32_t last
{
UV i, j;
if (n <= 2) return (n == 2);
- /* if (n > primes[lastidx]) return _XS_prime_count(2, n); */
- if (n >= primes[lastidx]) {
- if (n == primes[lastidx]) return lastidx;
- croak("called bspc(%u) with counts up to %u\n", n, primes[lastidx]);
- }
+ /* If n is out of range, we could:
+ * 1. return _XS_prime_count(2, n);
+ * 2. if (n == primes[lastidx]) return lastidx else croak("bspc range");
+ * 3. if (n >= primes[lastidx]) return lastidx;
+ */
+ if (n >= primes[lastidx]) return lastidx;
j = lastidx;
if (n < 8480) {
i = 1 + (n>>4);
@@ -233,6 +238,7 @@ static UV bs_prime_count(uint32_t n, uint32_t const* const primes, uint32_t last
if (primes[mid] <= n) i = mid+1;
else j = mid;
}
+ /* if (i-1 != _XS_prime_count(2, n)) croak("wrong count for %lu: %lu vs. %lu\n", n, i-1, _XS_prime_count(2, n)); */
return i-1;
}
@@ -329,53 +335,45 @@ static void phicache_free(cache_t* cache) {
static void phi_cache_insert(uint32_t x, uint32_t a, IV sum, cache_t* cache) {
uint32_t cap = ( (x+32) >> 5) << 5;
+ /* If sum is too large for the cache, just ignore it. */
+ if (sum < SHRT_MIN || sum > SHRT_MAX) return;
if (cache->val[a] == 0) {
Newz(0, cache->val[a], cap, int16_t);
+ if (cache->val[a] == 0) croak("phi cache allocation failure");
cache->max[a] = cap;
} else if (cache->max[a] < cap) {
uint32_t i;
Renew(cache->val[a], cap, int16_t);
+ if (cache->val[a] == 0) croak("phi cache allocation failure");
for (i = cache->max[a]; i < cap; i++)
cache->val[a][i] = 0;
cache->max[a] = cap;
}
- if (sum < SHRT_MIN || sum > SHRT_MAX)
- croak("phi(%u,%u) 16-bit overflow: sum = %ld\n", x, a, sum);
- if (cache->val[a] == 0)
- croak("phi cache allocation failure");
cache->val[a][x] = sum;
}
static IV _phi3(UV x, UV a, int sign, const uint32_t* const primes, const uint32_t lastidx, cache_t* cache)
{
IV sum;
+ UV a2, iters, c;
if (a <= 1)
- return sign * ((a == 0) ? x : x-x/2); /* Allows PHICACHEX be larger */
+ return sign * ((a == 0) ? x : x-x/2);
else if (PHI_CACHE_POPULATED(x, a))
return sign * cache->val[a][x];
else if (a <= PHIC)
sum = sign * tablephi(x,a);
else if (x < primes[a+1])
sum = sign;
- else if (x <= primes[lastidx] && x < primes[a]*primes[a])
+ else if (x <= primes[lastidx] && x < primes[a+1]*primes[a+1])
sum = sign * (bs_prime_count(x, primes, lastidx) - a + 1);
else {
- UV a2;
- if (a*a > x) {
- UV ix = isqrt(x);
- UV iters = bs_prime_count(ix, primes, a+1);
- sum = sign * (x - a + iters);
- for (a2 = 1; a2 <= iters; a2++)
- sum += _phi3( FAST_DIV(x, primes[a2]), a2-1, -sign, primes, lastidx, cache);
- } else {
- if (PHI_CACHE_POPULATED(x, PHIC))
- sum = sign * cache->val[PHIC][x];
- else
- sum = sign * tablephi(x, PHIC);
- for (a2 = PHIC+1; a2 <= a; a2++)
- sum += _phi3( FAST_DIV(x,primes[a2]), a2-1, -sign, primes, lastidx, cache);
- }
+ iters = (a*a > x) ? iters = bs_prime_count( isqrt(x), primes, a) : a;
+ c = (iters > PHIC) ? PHIC : iters;
+ a2 = PHI_CACHE_POPULATED(x, c) ? cache->val[c][x] : tablephi(x, c);
+ sum = sign * (iters - a + a2);
+ for (a2 = c+1; a2 <= iters; a2++)
+ sum += _phi3(FAST_DIV(x,primes[a2]), a2-1, -sign, primes, lastidx, cache);
}
if (a < PHICACHEA && x < PHICACHEX)
phi_cache_insert(x, a, sign * sum, cache);
@@ -456,10 +454,10 @@ static void vcarray_merge(vcarray_t* a, vcarray_t* b)
/* Merge anything in B that appears in A. */
for (ai = 0, bi = 0, bj = 0; bi < bn; bi++) {
- /* Skip forward in A until empty or aa[ai].v <= ba[bi].v */
UV bval = ba[bi].v;
- while (ai < an && aa[ai].v > bval)
- ai++;
+ /* Skip forward in A until empty or aa[ai].v <= ba[bi].v */
+ while (ai+8 < an && aa[ai+8].v > bval) ai += 8;
+ while (ai < an && aa[ai ].v > bval) ai++;
/* if A empty then copy the remaining elements */
if (ai >= an) {
if (bi == bj)
@@ -495,16 +493,17 @@ static void vcarray_merge(vcarray_t* a, vcarray_t* b)
/* merge A and B. Very simple using reverse merge. */
ai = an-1;
bi = bn-1;
- for (k = kn-1; k >= 0; k--) {
- if (ai < 0) { /* A is exhausted, just filling in B */
- if (bi < 0) croak("ran out of data during merge");
- aa[k] = ba[bi--];
- } else if (bi < 0) { /* We've caught up with A */
- break;
- } else if (aa[ai].v < ba[bi].v) {
- aa[k] = aa[ai--];
+ for (k = kn-1; k >= 0 && bi >= 0; k--) {
+ UV bval = ba[bi].v;
+ long startai = ai;
+ while (ai >= 15 && aa[ai-15].v < bval) ai -= 16;
+ while (ai >= 3 && aa[ai- 3].v < bval) ai -= 4;
+ while (ai >= 0 && aa[ai ].v < bval) ai--;
+ if (startai > ai) {
+ k = k - (startai - ai) + 1;
+ memmove(aa+k, aa+ai+1, (startai-ai) * sizeof(vc_t));
} else {
- if (aa[ai].v == ba[bi].v) croak("deduplication error");
+ if (ai >= 0 && aa[ai].v == bval) croak("deduplication error");
aa[k] = ba[bi--];
}
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list