[libmath-prime-util-perl] 02/13: Some more speedups
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:47:41 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.24
in repository libmath-prime-util-perl.
commit 972b3da8f9800d063126e81500f5e0782a510081
Author: Dana Jacobsen <dana at acm.org>
Date: Tue Mar 5 18:51:24 2013 -0800
Some more speedups
---
Changes | 4 +++
lehmer.c | 100 +++++++++++++++++++++++++++++++++------------------------------
2 files changed, 56 insertions(+), 48 deletions(-)
diff --git a/Changes b/Changes
index 8b20caf..564a113 100644
--- a/Changes
+++ b/Changes
@@ -4,6 +4,10 @@ Revision history for Perl extension Math::Prime::Util.
- euler_phi on a range wasn't working right with some ranges.
+ - Fix compilation with old pre-C99 strict compilers (decl after statement).
+
+ - Some more prime count improvements to speed and space.
+
0.23 5 March 2013
- Replace XS Zeta for x > 5 with series from Cephes. It is 1 eps more
diff --git a/lehmer.c b/lehmer.c
index cc666d1..e1c7fd4 100644
--- a/lehmer.c
+++ b/lehmer.c
@@ -9,7 +9,7 @@
* necessary, we can spend less time sieving. This has a direct impact on
* the memory used in stage 4. About 10-12 seems to balance with the amount
* taken by the phi algorithm. */
-#define SIEVE_MULT 12
+#define SIEVE_MULT 10
/*****************************************************************************
*
@@ -107,9 +107,10 @@ typedef signed long IV;
static UV isqrt(UV n)
{
+ UV root;
if (sizeof(UV) == 8 && n >= 18446744065119617025UL) return 4294967295UL;
if (sizeof(UV) == 4 && n >= 4294836225UL) return 65535UL;
- UV root = (UV) sqrt((double)n);
+ root = (UV) sqrt((double)n);
while (root*root > n) root--;
while ((root+1)*(root+1) <= n) root++;
return root;
@@ -183,40 +184,36 @@ static UV* generate_small_primes(UV n)
static UV icbrt(UV n)
{
-#if 0
- /* The integer cube root code is about 30% faster for me */
+ UV root = 0;
+ /* int s = BITS_PER_WORD - (BITS_PER_WORD % 3); */
#if BITS_PER_WORD == 32
+ int s = 30;
if (n >= UVCONST(4291015625)) return UVCONST(1625);
#else
+ int s = 63;
if (n >= UVCONST(18446724184312856125)) return UVCONST(2642245);
#endif
- UV root = (UV) pow(n, 1.0/3.0);
+#if 0
+ /* The integer cube root code is about 30% faster for me */
+ root = (UV) pow(n, 1.0/3.0);
if (root*root*root > n) {
root--;
while (root*root*root > n) root--;
} else {
while ((root+1)*(root+1)*(root+1) <= n) root++;
}
- return root;
-#else
- int s;
- UV y = 0;
- /* Alternately: s = (sizeof(UV)*8)-(sizeof(UV)*8)%3 */
-#if BITS_PER_WORD == 32
- for (s = 30; s >= 0; s -= 3) {
#else
- for (s = 63; s >= 0; s -= 3) {
-#endif
+ for ( ; s >= 0; s -= 3) {
UV b;
- y += y;
- b = 3*y*(y+1)+1;
+ root += root;
+ b = 3*root*(root+1)+1;
if ((n >> s) >= b) {
n -= b << s;
- y++;
+ root++;
}
}
- return y;
#endif
+ return root;
}
@@ -318,20 +315,11 @@ static void vcarray_destroy(vcarray_t* l)
}
/* Insert a value/count pair. We do this indirection because about 80% of
* the calls result in a merge with the previous entry. */
-#define vcarray_insert(arr, val, count) \
- if (arr.n > 0 && arr.a[arr.n-1].v == val) \
- arr.a[arr.n-1].c += count; \
- else \
- vcarray_insert_func(&arr, val, count);
-static void vcarray_insert_func(vcarray_t* l, UV val, IV count)
+static void vcarray_insert(vcarray_t* l, UV val, IV count)
{
UV n = l->n;
- if (n > 0 && l->a[n-1].v <= val) {
- if (l->a[n-1].v < val)
- croak("Previous value was %lu, inserting %lu out of order\n", l->a[n-1].v, val);
- l->a[n-1].c += count;
- return;
- }
+ if (n > 0 && l->a[n-1].v < val)
+ croak("Previous value was %lu, inserting %lu out of order\n", l->a[n-1].v, val);
if (n >= l->size) {
UV new_size;
if (l->size == 0) {
@@ -434,6 +422,8 @@ static UV phi(UV x, UV a)
UV sum = 0;
IV count;
const UV* primes;
+ vcarray_t a1, a2;
+ vc_t* arr;
if (a == 1) return ((x+1)/2);
if (a <= 7) return mapes(x, a);
@@ -443,35 +433,45 @@ static UV phi(UV x, UV a)
croak("Could not generate primes for phi(%lu,%lu)\n", x, a);
if (x < primes[a+1]) { Safefree(primes); return (x > 0) ? 1 : 0; }
- vcarray_t a1 = vcarray_create();
- vcarray_t a2 = vcarray_create();
- vcarray_insert(a1, x, 1);
+ a1 = vcarray_create();
+ a2 = vcarray_create();
+ vcarray_insert(&a1, x, 1);
while (a > 7) {
UV primea = primes[a];
+ UV sval_last = 0;
+ IV sval_count = 0;
+ arr = a1.a;
for (i = 0; i < a1.n; i++) {
- val = a1.a[i].v;
- count = a1.a[i].c;
- if (count == 0)
- continue;
+ count = arr[i].c;
+ if (count == 0) continue; /* Skip if count = 0 */
+ val = arr[i].v;
sval = val / primea;
- if (sval >= primea) {
- vcarray_insert(a2, sval, -count);
- } else {
- sum -= count;
+ if (sval < primea) break; /* stop inserting into a2 if small */
+ if (sval != sval_last) { /* non-merged value. Insert into a2 */
+ if (sval_last != 0)
+ vcarray_insert(&a2, sval_last, sval_count);
+ sval_last = sval;
+ sval_count = 0;
}
+ sval_count -= count; /* Accumulate count for this sval */
}
+ if (sval_last != 0) /* Insert the last sval */
+ vcarray_insert(&a2, sval_last, sval_count);
+ /* For each small sval, add up the counts */
+ for ( ; i < a1.n; i++)
+ sum -= arr[i].c;
/* Merge a1 and a2 into a1. a2 will be emptied. */
vcarray_merge(&a1, &a2);
a--;
}
vcarray_destroy(&a2);
if (a != 7) croak("final loop is set for a=7, a = %lu\n", a);
+ arr = a1.a;
for (i = 0; i < a1.n; i++) {
- val = a1.a[i].v;
- count = a1.a[i].c;
+ count = arr[i].c;
if (count != 0)
- sum += count * mapes7(val);
+ sum += count * mapes7( arr[i].v );
}
vcarray_destroy(&a1);
Safefree(primes);
@@ -578,11 +578,14 @@ UV _XS_lehmer_pi(UV n)
TIMING_END_PRINT("phi(x,a)")
- /* Sieve to get small primes. Get more than the minimum needed (b) to allow
- * fast prime counts. Using a higher value here will mean more memory but
- * faster operation. A lower value saves memory at the expense of more
- * segment sieving.*/
+ /* We get an array of the first b primes. This is used in stage 4. If we
+ * get more than necessary, we can use them to speed up some.
+ */
+#ifdef PRIMESIEVE_STANDALONE
lastprime = b*SIEVE_MULT;
+#else
+ lastprime = b; /* Use the precalc instead */
+#endif
if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastprime);
TIMING_START;
primes = generate_small_primes(lastprime);
@@ -594,6 +597,7 @@ UV _XS_lehmer_pi(UV n)
/* Ensure we have the base sieve for big prime_count ( n/primes[i] ). */
/* This is about 75k for n=10^13, 421k for n=10^15, 2.4M for n=10^17 */
prime_precalc(isqrt(n / primes[a+1]));
+ prime_precalc( (UV) pow(n, 3.0/5.0) ); /* Sieve more for speed */
if (verbose > 0) printf("lehmer %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
TIMING_START;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list