[libmath-prime-util-perl] 77/181: non-XSUB tweaks
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:51:08 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.36
in repository libmath-prime-util-perl.
commit 150fa335dab67667ad542e8740512ec6813f8257
Author: bulk88 <bulk88 at hotmail.com>
Date: Tue Dec 31 05:39:06 2013 -0500
non-XSUB tweaks
aks.c in _XS_is_aks_prime _XS_get_verbose is usually inlined to a C static
deref move initialization closer to the first use, therefore, that C
static isn't read on a couple quick shortcut return branches
cache.c in prime_memfree, do the Safefree outside of the lock, more
multithreading action then, hint to the compiler to use xchg if possible,
I couldn't get -O1 Visual C 2003 to do xchg
factor.c in pp1_pow, factor out the common expession to before the
conditionaal, the 2nd one could be optimized with exchanges and other
tricks, but I'll leave that to compiler developers to figure out
mulmod.h has a multi-eval problem, addmod calls arg "a", 3 times so
mulmpd is called 3 times with same args
util.c _XS_prev_prime a comment to not try to merge the 2 loops
util.c in count_segment_ranged reduce liveness of various vars, dont fix
whitespace to avoid massive WS changes that mess up git blames
---
aks.c | 3 ++-
cache.c | 10 +++++++---
factor.c | 5 +++--
mulmod.h | 1 +
util.c | 21 ++++++++++++++-------
5 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/aks.c b/aks.c
index 6f4f9d8..6fbc9b6 100644
--- a/aks.c
+++ b/aks.c
@@ -215,7 +215,7 @@ int _XS_is_aks_prime(UV n)
{
UV sqrtn, limit, r, rlimit, a;
double log2n;
- int verbose = _XS_get_verbose();
+ int verbose;
if (n < 2)
return 0;
@@ -229,6 +229,7 @@ int _XS_is_aks_prime(UV n)
log2n = log(n) / log(2); /* C99 has a log2() function */
limit = (UV) floor(log2n * log2n);
+ verbose = _XS_get_verbose();
if (verbose) { printf("# aks limit is %lu\n", (unsigned long) limit); }
for (r = 2; r < n; r++) {
diff --git a/cache.c b/cache.c
index 0e8b101..b47618e 100644
--- a/cache.c
+++ b/cache.c
@@ -237,15 +237,19 @@ void prime_precalc(UV n)
void prime_memfree(void)
{
+ unsigned char* local_prime_segment = 0;
MPUassert(mutex_init == 1, "cache mutexes have not been initialized");
MUTEX_LOCK(&segment_mutex);
/* Don't free if another thread is using it */
- if ( (prime_segment != 0) && (prime_segment_is_available) ) {
- Safefree(prime_segment);
- prime_segment = 0;
+ if ( (prime_segment != local_prime_segment) && (prime_segment_is_available) ) {\
+ /* hint to use xchg op */
+ unsigned char* local_prime_segment2 = local_prime_segment;
+ local_prime_segment = prime_segment;
+ prime_segment = local_prime_segment2;
}
MUTEX_UNLOCK(&segment_mutex);
+ if(local_prime_segment) Safefree(local_prime_segment);
WRITE_LOCK_START;
/* Put primary cache back to initial state */
diff --git a/factor.c b/factor.c
index 7b11512..4683b38 100644
--- a/factor.c
+++ b/factor.c
@@ -739,11 +739,12 @@ static void pp1_pow(UV *cX, unsigned long exp, UV n)
bit = 1UL << (b-2);
}
while (bit) {
+ UV T = mulsubmod(X, Y, X0, n);
if ( exp & bit ) {
- X = mulsubmod(X, Y, X0, n);
+ X = T;
Y = mulsubmod(Y, Y, 2, n);
} else {
- Y = mulsubmod(X, Y, X0, n);
+ Y = T;
X = mulsubmod(X, X, 2, n);
}
bit >>= 1;
diff --git a/mulmod.h b/mulmod.h
index 2eff302..c03851d 100644
--- a/mulmod.h
+++ b/mulmod.h
@@ -118,6 +118,7 @@ static INLINE UV submod(UV a, UV b, UV n) {
/* a^2 + c mod n */
#define sqraddmod(a, c, n) addmod(sqrmod(a,n), c, n)
/* a*b + c mod n */
+/* TODO mulmod is a function, addmod is a multi eval macro == mulmod called 3x uselessly */
#define muladdmod(a, b, c, n) addmod(mulmod(a,b,n), c, n)
/* a*b - c mod n */
#define mulsubmod(a, b, c, n) submod(mulmod(a,b,n), c, n)
diff --git a/util.c b/util.c
index 4f14cae..3771f7f 100644
--- a/util.c
+++ b/util.c
@@ -293,6 +293,10 @@ UV _XS_prev_prime(UV n)
m = n - d*30;
if (n < 30*NPRIME_SIEVE30) {
+ /* don't merge this loop with the next loop prime_sieve30 is a C static,
+ which on CISC CPUs can be accessed with instruction pointer relative
+ addressing, instead of a pointer in a register deref addressing which
+ frees a register */
do {
m = prevwheel30[m];
if (m==29) { MPUassert(d>0, "d 0 in prev_prime"); d--; }
@@ -378,21 +382,19 @@ static UV count_segment_maxcount(const unsigned char* sieve, UV base, UV nbytes,
*/
static UV count_segment_ranged(const unsigned char* sieve, UV nbytes, UV lowp, UV highp)
{
+ MPUassert( sieve != 0, "count_segment_ranged incorrect args");
+ if (nbytes == 0) return 0;
+{
UV count = 0;
- UV lo_d = lowp/30;
- UV lo_m = lowp - lo_d*30;
UV hi_d = highp/30;
- UV hi_m = highp - hi_d*30;
-
- MPUassert( sieve != 0, "count_segment_ranged incorrect args");
if (hi_d >= nbytes) {
hi_d = nbytes-1;
highp = hi_d*30+29;
}
- if ( (nbytes == 0) || (highp < lowp) )
+ if (highp < lowp)
return 0;
#if 0
@@ -402,7 +404,9 @@ static UV count_segment_ranged(const unsigned char* sieve, UV nbytes, UV lowp, U
END_DO_FOR_EACH_SIEVE_PRIME;
return count;
#endif
-
+{
+ UV lo_d = lowp/30;
+ UV lo_m = lowp - lo_d*30;
/* Count first fragment */
if (lo_m > 1) {
UV upper = (highp <= (lo_d*30+29)) ? highp : (lo_d*30+29);
@@ -418,6 +422,7 @@ static UV count_segment_ranged(const unsigned char* sieve, UV nbytes, UV lowp, U
/* Count bytes in the middle */
{
+ UV hi_m = highp - hi_d*30;
UV count_bytes = hi_d - lo_d + (hi_m == 29);
if (count_bytes > 0) {
count += count_zero_bits(sieve+lo_d, count_bytes);
@@ -436,6 +441,8 @@ static UV count_segment_ranged(const unsigned char* sieve, UV nbytes, UV lowp, U
return count;
}
+}
+}
/*
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list