[libmath-prime-util-perl] 01/11: More efficient ranged Moebius code
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:48:31 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.28
in repository libmath-prime-util-perl.
commit 2f92c04c7360ada6f37e7ba9f9fc40bce3b24e52
Author: Dana Jacobsen <dana at acm.org>
Date: Mon May 20 16:00:06 2013 -0700
More efficient ranged Moebius code
---
util.c | 102 +++++++++++++++++++++++++++++++++--------------------------------
1 file changed, 52 insertions(+), 50 deletions(-)
diff --git a/util.c b/util.c
index f0f9e47..043c484 100644
--- a/util.c
+++ b/util.c
@@ -734,7 +734,7 @@ UV _XS_nth_prime(UV n)
return ( (segbase*30) + p );
}
-/* Return an IV array with lo-hi+1 elements. mu[k-lo] = µ(k) for k = lo .. hi.
+/* Return a char array with lo-hi+1 elements. mu[k-lo] = µ(k) for k = lo .. hi.
* It is the callers responsibility to call Safefree on the result. */
#define PGTLO(p,lo) ((p) >= lo) ? (p) : ((p)*(lo/(p)) + ((lo%(p))?(p):0))
#define P2GTLO(pinit, p, lo) \
@@ -744,16 +744,30 @@ char* _moebius_range(UV lo, UV hi)
char* mu;
UV i;
UV sqrtn = isqrt(hi);
-#if 1
- IV* A;
+#if 0
+ /* Simple char method. Needs way too many primes. */
+ New(0, mu, hi-lo+1, char);
+ if (mu == 0)
+ croak("Could not get memory for %"UVuf" moebius results\n", hi-lo+1);
+ memset(mu, 1, hi-lo+1);
+ if (lo == 0) mu[0] = 0;
+ prime_precalc( _XS_nth_prime_upper(hi) );
+ START_DO_FOR_EACH_PRIME(2, hi) {
+ UV p2 = p*p;
+ for (i = PGTLO(p2, lo); i <= hi; i += p2)
+ mu[i-lo] = 0;
+ for (i = PGTLO(p, lo); i <= hi; i += p)
+ mu[i-lo] = -mu[i-lo];
+ } END_DO_FOR_EACH_PRIME
+#endif
- /* This implementation follows that of Deléglise & Rivat (1996), which is
- * a segmented version of Lioen & van de Lune (1994). Downside is that it
+#if 0
+ /* This implementation follows Deléglise & Rivat (1996), which is a
+ * segmented version of Lioen & van de Lune (1994). Downside is that it
* uses an array of IV's, so too much memory. Pawleicz (2011) shows a small
* variation but seems to just be a rearrangement -- there is no time or
- * space difference on my machines. (TODO: but maybe for hi > 2^63?)
- */
-
+ * space difference on my machines. */
+ IV* A;
New(0, A, hi-lo+1, IV);
if (A == 0)
croak("Could not get memory for %"UVuf" moebius results\n", hi-lo+1);
@@ -778,58 +792,45 @@ char* _moebius_range(UV lo, UV hi)
}
Safefree(A);
#endif
-#if 0
- UV p;
- /* Simple char method, Needs way too many primes */
- New(0, mu, hi-lo+1, char);
- if (mu == 0)
- croak("Could not get memory for %"UVuf" moebius results\n", hi-lo+1);
- memset(mu, 1, hi-lo+1);
- if (lo == 0) mu[0] = 0;
- prime_precalc( _XS_nth_prime_upper(hi) );
- for (p = 2; p <= hi; p = _XS_next_prime(p)) {
- UV p2 = p*p;
- for (i = PGTLO(p2, lo); i <= hi; i += p2)
- mu[i-lo] = 0;
- for (i = PGTLO(p, lo); i <= hi; i += p)
- mu[i-lo] = -mu[i-lo];
- }
-#endif
-#if 0
- /* Kuznetsov's transform of Deléglise & Rivat (1996) into logs.
- * (1) I'm using the log function, which should be fixed (easy).
- * (2) it doesn't work. try 64-101 vs. 64 vs. 100.
- */
- UV p;
+
+#if 1
+ /* Kuznetsov indicates that the Deléglise & Rivat (1996) method can be
+ * modified to work on logs, which allows us to operate with no
+ * intermediate memory at all. There isn't really any time savings. */
unsigned char* A;
- New(0, A, hi-lo+1, unsigned char);
- if (A == 0)
+ unsigned char logp;
+ UV nextlog;
+
+ Newz(0, mu, hi-lo+1, char);
+ if (mu == 0)
croak("Could not get memory for %"UVuf" moebius results\n", hi-lo+1);
- memset(A, 0, hi-lo+1);
+ A = (unsigned char*) mu;
if (sqrtn*sqrtn != hi) sqrtn++; /* ceil sqrtn */
- prime_precalc(sqrtn);
- for (p = 2; p <= sqrtn; p = _XS_next_prime(p)) {
+
+ logp = 1; nextlog = 3; /* 2+1 */
+ START_DO_FOR_EACH_PRIME(2, sqrtn) {
UV p2 = p*p;
- unsigned char l = 1 | (unsigned char) ( log(p)/log(2) );
+ if (p > nextlog) {
+ logp += 2; /* logp is 1 | ceil(log(p)/log(2)) */
+ nextlog = ((nextlog-1)*4)+1;
+ }
for (i = PGTLO(p, lo); i <= hi; i += p)
- A[i-lo] += l;
+ A[i-lo] += logp;
for (i = PGTLO(p2, lo); i <= hi; i += p2)
A[i-lo] |= 0x80;
- }
+ } END_DO_FOR_EACH_PRIME
- New(0, mu, hi-lo+1, char);
- if (mu == 0)
- croak("Could not get memory for %"UVuf" moebius results\n", hi-lo+1);
+ logp = 0; nextlog = lo;
+ while (nextlog >>= 1) logp++;
+ nextlog = 2UL << logp;
for (i = lo; i <= hi; i++) {
unsigned char a = A[i-lo];
- unsigned char log2i = (unsigned char) ( log(i)/log(2) );
- //printf("i = %lu a = %lu log2i = %lu\n", i, a, log2i);
- if (a & 0x80) { mu[i-lo] = 0; }
- else if (a >= log2i) { mu[i-lo] = 1 - 2*(a&1); }
- else { mu[i-lo] = -1 + 2*(a&1); }
+ if (i >= nextlog) { logp++; nextlog *= 2; } /* logp is log(p)/log(2) */
+ if (a & 0x80) { mu[i-lo] = 0; }
+ else if (a >= logp) { mu[i-lo] = 1 - 2*(a&1); }
+ else { mu[i-lo] = -1 + 2*(a&1); }
}
if (lo == 0) mu[0] = 0;
- Safefree(A);
#endif
return mu;
}
@@ -878,8 +879,9 @@ IV _XS_mertens(UV n) {
}
return -sum;
#else
- /* Deléglise and Rivat (1996) using u = n^1/2 and unsegmented. */
- /* Very simple, but they use u = n^1/3 and segment */
+ /* See Deléglise and Rivat (1996) for a segmented n^1/3 algorithm.
+ * This implementation uses their lemma 2.1 directly, so is n^1/2.
+ */
UV u, i, m, nmk;
char* mu;
IV* M;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list