[libmath-prime-util-perl] 23/72: Move primality functions to new file. Use Monty routines
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:49:37 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.32
in repository libmath-prime-util-perl.
commit f246f75cbf60d2086793b6a25c4f5290d879512a
Author: Dana Jacobsen <dana at acm.org>
Date: Wed Sep 11 15:27:59 2013 -0700
Move primality functions to new file. Use Monty routines
---
.gitignore | 1 +
Changes | 9 +-
MANIFEST | 2 +
Makefile.PL | 13 +-
TODO | 9 +-
XS.xs | 3 +
aks.c | 1 -
factor.c | 527 +----------------------------
factor.h | 21 --
lib/Math/Prime/Util/PrimeArray.pm | 4 +-
primality.c | 689 ++++++++++++++++++++++++++++++++++++++
primality.h | 22 ++
util.c | 2 +-
util.h | 11 +
xt/primality-small.pl | 2 +-
15 files changed, 757 insertions(+), 559 deletions(-)
diff --git a/.gitignore b/.gitignore
index 4681eb4..55be186 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ factor.o
sieve.o
util.o
lehmer.o
+primality.o
aks.o
blib*
b[0-9][0-9][0-9][0-9][0-9][0-9].txt
diff --git a/Changes b/Changes
index 695ed5b..ef8973f 100644
--- a/Changes
+++ b/Changes
@@ -18,7 +18,14 @@ Revision history for Perl module Math::Prime::Util
Thanks to Kim Walisch for all discussions about this.
- divisor_sum can take a '1' as the second argument as an alias for
- sub {1}, but works much faster.
+ sub {1}, but works much faster. (experimental, may change)
+
+ - Incorporate Montgomery reduction for primality testing, thanks to
+ Wojciech Izykowski. This is a 1.3 to 1.5x speedup for is_prob_prime,
+ is_prime, and is_strong_pseudoprime for numbers > 2^32 on x86_64.
+ This also things like prime_iterator for values > 2^32.
+
+ - Primality functions moved to their own file primality.c.
0.31 2013-08-07
diff --git a/MANIFEST b/MANIFEST
index 1b3031f..d16dc1e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -25,6 +25,8 @@ factor.h
factor.c
lehmer.h
lehmer.c
+primality.h
+primality.c
sieve.h
sieve.c
util.h
diff --git a/Makefile.PL b/Makefile.PL
index b0099c2..2e24249 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -7,12 +7,13 @@ WriteMakefile1(
LICENSE => 'perl',
AUTHOR => 'Dana A Jacobsen <dana at acm.org>',
- OBJECT => 'cache.o ' .
- 'factor.o ' .
- 'aks.o ' .
- 'lehmer.o ' .
- 'sieve.o ' .
- 'util.o ' .
+ OBJECT => 'cache.o ' .
+ 'factor.o ' .
+ 'primality.o '.
+ 'aks.o ' .
+ 'lehmer.o ' .
+ 'sieve.o ' .
+ 'util.o ' .
'XS.o',
LIBS => ['-lm'],
diff --git a/TODO b/TODO
index 146dc4c..97bb00f 100644
--- a/TODO
+++ b/TODO
@@ -39,6 +39,11 @@
- Figure out a way to make the internal FOR_EACH_PRIME macros use a segmented
sieve.
-- Refactor where functions exist in .c. Lots of primality tests in factor.c.
-
- Rewrite 23-primality-proofs.t for new format (keep some of the old tests?).
+
+- Use Montgomery routines in more places: Factoring and Lucas tests.
+
+- Add a group order function. We have a naieve method used in AKS (where the
+ values are so small it doesn't matter). See:
+ http://cs-haven.blogspot.com/2012/02/efficiently-computing-order-of-element.html
+ for some discussion of different methods.
diff --git a/XS.xs b/XS.xs
index 2150603..84a7ba3 100644
--- a/XS.xs
+++ b/XS.xs
@@ -14,6 +14,7 @@
#include "cache.h"
#include "sieve.h"
#include "util.h"
+#include "primality.h"
#include "factor.h"
#include "lehmer.h"
#include "aks.h"
@@ -484,6 +485,7 @@ _XS_is_prime(IN UV n)
_XS_is_extra_strong_lucas_pseudoprime = 4
_XS_is_frobenius_underwood_pseudoprime = 5
_XS_is_aks_prime = 6
+ _XS_BPSW = 7
PREINIT:
int ret;
CODE:
@@ -495,6 +497,7 @@ _XS_is_prime(IN UV n)
case 4: ret = _XS_is_lucas_pseudoprime(n, 2); break;
case 5: ret = _XS_is_frobenius_underwood_pseudoprime(n); break;
case 6: ret = _XS_is_aks_prime(n); break;
+ case 7: ret = _XS_BPSW(n); break;
default: croak("_XS_is_prime: Unknown function alias"); break;
}
RETVAL = ret;
diff --git a/aks.c b/aks.c
index ba86e69..119d55b 100644
--- a/aks.c
+++ b/aks.c
@@ -28,7 +28,6 @@
#include "aks.h"
#include "util.h"
#include "sieve.h"
-#include "factor.h"
#include "cache.h"
#include "mulmod.h"
diff --git a/factor.c b/factor.c
index b7166de..e43f335 100644
--- a/factor.c
+++ b/factor.c
@@ -3,13 +3,14 @@
#include <string.h>
#include <math.h>
-#define FUNC_gcd_ui
#include "ptypes.h"
#include "factor.h"
-#include "util.h"
#include "sieve.h"
#include "mulmod.h"
#include "cache.h"
+#include "primality.h"
+#define FUNC_gcd_ui
+#include "util.h"
/*
* You need to remember to use UV for unsigned and IV for signed types that
@@ -289,396 +290,6 @@ static int is_perfect_square(UV n, UV* sqrtn)
return 1;
}
-static int jacobi_iu(IV in, UV m) {
- int j = 1;
- UV n = (in < 0) ? -in : in;
-
- if (m <= 0 || (m%2) == 0) return 0;
- if (in < 0 && (m%4) == 3) j = -j;
- while (n != 0) {
- while ((n % 2) == 0) {
- n >>= 1;
- if ( (m % 8) == 3 || (m % 8) == 5 ) j = -j;
- }
- { UV t = n; n = m; m = t; }
- if ( (n % 4) == 3 && (m % 4) == 3 ) j = -j;
- n = n % m;
- }
- return (m == 1) ? j : 0;
-}
-
-
-/* Fermat pseudoprime */
-int _XS_is_pseudoprime(UV n, UV a)
-{
- UV x;
- UV const nm1 = n-1;
-
- if (n == 2 || n == 3) return 1;
- if (n < 5) return 0;
- if (a < 2) croak("Base %"UVuf" is invalid", a);
- if (a >= n) {
- a %= n;
- if ( a <= 1 || a == nm1 )
- return 1;
- }
-
- x = powmod(a, nm1, n); /* x = a^(n-1) mod n */
- return (x == 1);
-}
-
-
-/* Miller-Rabin probabilistic primality test
- * Returns 1 if probably prime relative to the bases, 0 if composite.
- * Bases must be between 2 and n-2
- */
-int _XS_miller_rabin(UV n, const UV *bases, int nbases)
-{
- UV const nm1 = n-1;
- UV d = n-1;
- int b, r, s = 0;
-
- MPUassert(n > 3, "MR called with n <= 3");
-
- while ( (d&1) == 0 ) {
- s++;
- d >>= 1;
- }
- for (b = 0; b < nbases; b++) {
- UV x, a = bases[b];
-
- if (a < 2)
- croak("Base %"UVuf" is invalid", a);
- if (a >= n)
- a %= n;
- if ( (a <= 1) || (a == nm1) )
- continue;
-
- /* n is a strong pseudoprime to this base if either
- * - a^d = 1 mod n
- * - a^(d2^r) = -1 mod n for some r: 0 <= r <= s-1
- */
-
- x = powmod(a, d, n);
- if ( (x == 1) || (x == nm1) ) continue;
-
- /* cover r = 1 to s-1, r=0 was just done */
- for (r = 1; r < s; r++) {
- x = sqrmod(x, n);
- if ( x == nm1 ) break;
- if ( x == 1 ) return 0;
- }
- if (r >= s)
- return 0;
- }
- return 1;
-}
-
-/* M-R with a = 2 and some checks removed. For internal use. */
-int _SPRP2(UV n)
-{
- UV const nm1 = n-1;
- UV d = n-1;
- UV x;
- int r, s = 0;
-
- MPUassert(n > 3, "S-PRP-2 called with n <= 3");
- if (!(n & 1)) return 0;
- while ( (d & 1) == 0 ) { s++; d >>= 1; }
- /* n is a strong pseudoprime to this base if either
- * - a^d = 1 mod n
- * - a^(d2^r) = -1 mod n for some r: 0 <= r <= s-1 */
- x = powmod(2, d, n);
- if (x == 1 || x == nm1) return 1;
-
- /* just did r=0, now test r = 1 to s-1 */
- for (r = 1; r < s; r++) {
- x = sqrmod(x, n);
- if (x == nm1) return 1;
- }
- return 0;
-}
-
-/* Select M-R bases from http://miller-rabin.appspot.com/, 27 May 2013 */
-#if BITS_PER_WORD == 32
-static const UV mr_bases_small_2[2] = {31, 73};
-static const UV mr_bases_small_3[3] = {2, 7, 61};
-#else
-static const UV mr_bases_large_1[1] = { UVCONST( 9345883071009581737 ) };
-static const UV mr_bases_large_2[2] = { UVCONST( 336781006125 ),
- UVCONST( 9639812373923155 ) };
-static const UV mr_bases_large_3[3] = { UVCONST( 4230279247111683200 ),
- UVCONST( 14694767155120705706 ),
- UVCONST( 16641139526367750375 ) };
-#endif
-
-int _XS_is_prob_prime(UV n)
-{
- int ret;
-
- if (n < 11) {
- if (n == 2 || n == 3 || n == 5 || n == 7) return 2;
- else return 0;
- }
- if (!(n%2) || !(n%3) || !(n%5) || !(n%7)) return 0;
- if (n < 121) /* 11*11 */ return 2;
- if (!(n%11) || !(n%13) || !(n%17) || !(n%19) ||
- !(n%23) || !(n%29) || !(n%31) || !(n%37) ||
- !(n%41) || !(n%43) || !(n%47) || !(n%53)) return 0;
- if (n < 3481) /* 59*59 */ return 2;
-
-#if BITS_PER_WORD == 32
- /* We could use one base when n < 49191, two when n < 360018361. */
- if (n < UVCONST(9080191))
- ret = _XS_miller_rabin(n, mr_bases_small_2, 2);
- else
- ret = _XS_miller_rabin(n, mr_bases_small_3, 3);
-#else
- /* AESLSP test costs about 1.5 Selfridges, vs. ~2.2 for strong Lucas.
- * So it works out to be faster to do AES-BPSW vs. 3 M-R tests. */
- if (n < UVCONST(341531))
- ret = _XS_miller_rabin(n, mr_bases_large_1, 1);
- else if (n < UVCONST(1050535501))
- ret = _XS_miller_rabin(n, mr_bases_large_2, 2);
- else
- ret = _SPRP2(n) && _XS_is_almost_extra_strong_lucas_pseudoprime(n,1);
-#endif
- return 2*ret;
-}
-
-/* Generic Lucas sequence for any appropriate P and Q */
-void lucas_seq(UV* Uret, UV* Vret, UV* Qkret, UV n, IV P, IV Q, UV k)
-{
- UV U, V, b, Dmod, Qmod, Pmod, Qk;
-
- if (k == 0) {
- *Uret = 0;
- *Vret = 2;
- *Qkret = Q;
- return;
- }
-
- Qmod = (Q < 0) ? (UV)(Q + (IV)n) : (UV)Q;
- Pmod = (P < 0) ? (UV)(P + (IV)n) : (UV)P;
- Dmod = submod( mulmod(Pmod, Pmod, n), mulmod(4, Qmod, n), n);
- MPUassert(Dmod != 0, "lucas_seq: D is 0");
- U = 1;
- V = Pmod;
- Qk = Qmod;
- { UV v = k; b = 1; while (v >>= 1) b++; }
-
- if (Q == 1) {
- while (b > 1) {
- U = mulmod(U, V, n);
- V = mulsubmod(V, V, 2, n);
- b--;
- if ( (k >> (b-1)) & UVCONST(1) ) {
- UV t2 = mulmod(U, Dmod, n);
- U = muladdmod(U, Pmod, V, n);
- if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
- V = muladdmod(V, P, t2, n);
- if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
- }
- }
- } else if (P == 1 && Q == -1) {
- /* This is about 30% faster than the generic code below. Since 50% of
- * Lucas and strong Lucas tests come here, I think it's worth doing. */
- int sign = Q;
- while (b > 1) {
- U = mulmod(U, V, n);
- if (sign == 1) V = mulsubmod(V, V, 2, n);
- else V = muladdmod(V, V, 2, n);
- sign = 1; /* Qk *= Qk */
- b--;
- if ( (k >> (b-1)) & UVCONST(1) ) {
- UV t2 = mulmod(U, Dmod, n);
- U = addmod(U, V, n);
- if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
- V = addmod(V, t2, n);
- if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
- sign = -1; /* Qk *= Q */
- }
- }
- if (sign == 1) Qk = 1;
- } else {
- while (b > 1) {
- U = mulmod(U, V, n);
- V = mulsubmod(V, V, addmod(Qk,Qk,n), n);
- Qk = sqrmod(Qk, n);
- b--;
- if ( (k >> (b-1)) & UVCONST(1) ) {
- UV t2 = mulmod(U, Dmod, n);
- U = muladdmod(U, Pmod, V, n);
- if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
- V = muladdmod(V, P, t2, n);
- if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
- Qk = mulmod(Qk, Qmod, n);
- }
- }
- }
- *Uret = U;
- *Vret = V;
- *Qkret = Qk;
-}
-
-/* Lucas tests:
- * 0: Standard
- * 1: Strong
- * 2: Extra Strong (Mo/Jones/Grantham)
- *
- * Goal:
- * (1) no false results when combined with the SPRP-2 test.
- * (2) fast enough to use SPRP-2 + this in place of 3+ M-R tests.
- *
- * For internal purposes, we typically want to use the extra strong test
- * because it is slightly faster (Q = 1 simplies things). None of them have
- * any false positives for the BPSW test.
- *
- * This runs 4-7x faster than MPU::GMP, which means ~10x faster than most GMP
- * implementations. It is about 2x slower than a single M-R test.
- */
-int _XS_is_lucas_pseudoprime(UV n, int strength)
-{
- IV P, Q, D;
- UV U, V, Qk, d, s;
-
- if (n == 2 || n == 3) return 1;
- if (n < 5 || (n%2) == 0) return 0;
- if (n == UV_MAX) return 0;
-
- if (strength < 2) {
- UV Du = 5;
- IV sign = 1;
- while (1) {
- D = Du * sign;
- if (gcd_ui(Du, n) > 1 && gcd_ui(Du, n) != n) return 0;
- if (jacobi_iu(D, n) == -1)
- break;
- if (Du == 21 && is_perfect_square(n, 0)) return 0;
- Du += 2;
- sign = -sign;
- }
- P = 1;
- Q = (1 - D) / 4;
- } else {
- P = 3;
- Q = 1;
- while (1) {
- D = P*P - 4;
- if (gcd_ui(D, n) > 1 && gcd_ui(D, n) != n) return 0;
- if (jacobi_iu(D, n) == -1)
- break;
- if (P == 21 && is_perfect_square(n, 0)) return 0;
- P++;
- }
- }
- MPUassert( D == (P*P - 4*Q) , "is_lucas_pseudoprime: incorrect DPQ");
-
- d = n+1;
- s = 0;
- if (strength > 0)
- while ( (d & 1) == 0 ) { s++; d >>= 1; }
- lucas_seq(&U, &V, &Qk, n, P, Q, d);
-
- if (strength == 0) {
- if (U == 0)
- return 1;
- } else if (strength == 1) {
- if (U == 0)
- return 1;
- /* Now check to see if V_{d*2^r} == 0 for any 0 <= r < s */
- while (s--) {
- if (V == 0)
- return 1;
- if (s) {
- V = mulsubmod(V, V, addmod(Qk,Qk,n), n);
- Qk = sqrmod(Qk, n);
- }
- }
- } else {
- if ( U == 0 && (V == 2 || V == (n-2)) )
- return 1;
- /* Now check to see if V_{d*2^r} == 0 for any 0 <= r < s-1 */
- s--;
- while (s--) {
- if (V == 0)
- return 1;
- if (s)
- V = mulsubmod(V, V, 2, n);
- }
- }
- return 0;
-}
-
-/* A generalization of Pari's shortcut to the extra-strong Lucas test.
- * I've added a gcd check at the top, which needs to be done and also results
- * in fewer pseudoprimes. Pari always does trial division to 100 first so
- * is unlikely to come up there. This only calculate V, which can be done
- * faster, but that means we have more pseudoprimes than the standard
- * extra-strong test.
- *
- * increment: 1 for Baillie OEIS, 2 for Pari.
- *
- * With increment = 1, these results will be a subset of the extra-strong
- * Lucas pseudoprimes. With increment = 2, we produce Pari's results.
- */
-int _XS_is_almost_extra_strong_lucas_pseudoprime(UV n, UV increment)
-{
- UV P, V, d, s;
-
- if (n == 2 || n == 3 || n == 5) return 1;
- if (n < 7 || (n%2) == 0) return 0;
- if (n == UV_MAX) return 0;
- if (increment < 1 || increment > 256)
- croak("Invalid lucas paramater increment: %"UVuf"\n", increment);
-
- P = 3;
- while (1) {
- UV D = P*P - 4;
- d = gcd_ui(D, n);
- if (d > 1 && d < n)
- return 0;
- if (jacobi_iu(D, n) == -1)
- break;
- if (P == (3+20*increment) && is_perfect_square(n, 0)) return 0;
- P += increment;
- if (P > 65535)
- croak("lucas_extrastrong_params: P exceeded 65535");
- }
- if (P >= n) P %= n; /* Never happens with increment < 4 */
-
- d = n+1;
- s = 0;
- while ( (d & 1) == 0 ) { s++; d >>= 1; }
-
- {
- UV W, b;
- V = P;
- W = mulsubmod(P, P, 2, n);
- { UV v = d; b = 1; while (v >>= 1) b++; }
- while (b-- > 1) {
- if ( (d >> (b-1)) & UVCONST(1) ) {
- V = mulsubmod(V, W, P, n);
- W = mulsubmod(W, W, 2, n);
- } else {
- W = mulsubmod(V, W, P, n);
- V = mulsubmod(V, V, 2, n);
- }
- }
- }
-
- if (V == 2 || V == (n-2))
- return 1;
- while (s-- > 1) {
- if (V == 0)
- return 1;
- V = mulsubmod(V, V, 2, n);
- if (V == 2)
- return 0;
- }
- return 0;
-}
-
UV _XS_divisor_sum(UV n)
{
@@ -1378,135 +989,3 @@ int racing_squfof_factor(UV n, UV *factors, UV rounds)
factors[0] = n;
return 1;
}
-
-
-/****************************************************************************/
-
-/*
- *
- * The Frobenius-Underwood test has no known counterexamples below 10^13, but
- * has not been extensively tested above that. This is the Minimal Lambda+2
- * test from section 9 of "Quadratic Composite Tests" by Paul Underwood.
- *
- * Given the script:
- * time mpu 'forprimes { Math::Prime::Util::_XS_is_frobenius_underwood_pseudoprime($_); Math::Prime::Util::_XS_is_frobenius_underwood_pseudoprime($_+2); } 500_000_000'
- * and replacing the tests appropriately, I get these times:
- *
- * 0.87 $_ (cost of empty loop)
- * 21.37 _XS_is_pseudoprime($_,2)
- * 22.42 _XS_miller_rabin($_,2)
- * 44.53 _XS_is_lucas_pseudoprime($_)
- * 43.95 _XS_is_strong_lucas_pseudoprime($_)
- * 40.09 _XS_is_extra_strong_lucas_pseudoprime($_)
- * 25.86 _XS_is_almost_extra_strong_lucas_pseudoprime($_)
- * 42.40 _XS_is_frobenius_underwood_pseudoprime($_)
- * 27.02 _XS_is_prob_prime($_)
- * 27.24 _XS_is_prime($_)
- *
- * At these sizes is_prob_prime is doing 1-2 M-R tests. The input validation
- * is adding a noticeable overhead to is_prime.
- *
- * With a set of 100k 64-bit random primes; 'do { die unless ... } for 1..50'
- *
- * 0.32 empty loop
- * 10.25 _XS_is_pseudoprime($_,2)
- * 10.06 _XS_miller_rabin($_,2)
- * 22.02 _XS_is_lucas_pseudoprime($_)
- * 21.81 _XS_is_strong_lucas_pseudoprime($_)
- * 20.99 _XS_is_extra_strong_lucas_pseudoprime($_)
- * 14.01 _XS_is_almost_extra_strong_lucas_pseudoprime($_)
- * 18.44 _XS_is_frobenius_underwood_pseudoprime($_)
- * 24.11 _XS_is_prob_prime($_)
- * 24.06 _XS_is_prime($_)
- *
- * At this point is_prob_prime has transitioned to BPSW.
- *
- * Calling a powmod a 'Selfridge' unit, then we see:
- * 1 Selfridge unit M-R test
- * 1.4 Selfridge unit "almost extra strong" Lucas
- * 2 Selfridge units Lucas or Frobenius-Underwood
- * 3 Selfridge units BPSW (standard, strong, or extra-strong)
- *
- * We try to structure the primality test like:
- * 1) simple divisibility very fast primes and ~10% of composites
- * 2) M-R with base 2 1 Selfridge primes and .00000000002% comps
- * 3) Lucas test 2 Selfridge only primes
- *
- * Hence given a random composite, about 90% of the time it costs us almost
- * nothing. After spending 1 Selfridge on the first MR test, less than 32M
- * composites remain undecided out of 18 quintillion 64-bit composites. The
- * final Lucas test has no false positives.
- * Replacing the Lucas test with the F-U test won't save any time. Replacing
- * the whole thing with the F-U test (assuming it has no false results for
- * all 64-bit values, which hasn't been verified), doesn't help either.
- * It's 2/3 the cost for primes, but much more expensive for composites. It
- * seems of interest for > 2^64 as a different test to do in addition to BPSW.
- */
-
-
-int _XS_is_frobenius_underwood_pseudoprime(UV n)
-{
- int bit;
- UV x, result, multiplier, a, b, np1, len, t1, t2, na;
- IV t;
-
- if (n < 2) return 0;
- if (n < 4) return 1;
- if ((n % 2) == 0) return 0;
- if (is_perfect_square(n,0)) return 0;
- if (n == UV_MAX) return 0;
-
- x = 0;
- t = -1;
- while ( jacobi_iu( t, n ) != -1 ) {
- x++;
- t = (IV)(x*x) - 4;
- }
- result = addmod( addmod(x, x, n), 5, n);
- multiplier = addmod(x, 2, n);
-
- a = 1;
- b = 2;
- np1 = n+1;
- { UV v = np1; len = 1; while (v >>= 1) len++; }
-
- if (x == 0) {
- for (bit = len-2; bit >= 0; bit--) {
- t2 = addmod(b, b, n);
- na = mulmod(a, t2, n);
- t1 = addmod(b, a, n);
- t2 = submod(b, a, n);
- b = mulmod(t1, t2, n);
- a = na;
- if ( (np1 >> bit) & UVCONST(1) ) {
- t1 = mulmod(a, 2, n);
- na = addmod(t1, b, n);
- t1 = addmod(b, b, n);
- b = submod(t1, a, n);
- a = na;
- }
- }
- } else {
- for (bit = len-2; bit >= 0; bit--) {
- t1 = mulmod(a, x, n);
- t2 = addmod(b, b, n);
- t1 = addmod(t1, t2, n);
- na = mulmod(a, t1, n);
- t1 = addmod(b, a, n);
- t2 = submod(b, a, n);
- b = mulmod(t1, t2, n);
- a = na;
- if ( (np1 >> bit) & UVCONST(1) ) {
- t1 = mulmod(a, multiplier, n);
- na = addmod(t1, b, n);
- t1 = addmod(b, b, n);
- b = submod(t1, a, n);
- a = na;
- }
- }
- }
- if (_XS_get_verbose()>1) printf("%"UVuf" is %s with x = %"UVuf"\n", n, (a == 0 && b == result) ? "probably prime" : "composite", x);
- if (a == 0 && b == result)
- return 1;
- return 0;
-}
diff --git a/factor.h b/factor.h
index c7dd3ae..2979b32 100644
--- a/factor.h
+++ b/factor.h
@@ -18,27 +18,6 @@ extern int pplus1_factor(UV n, UV *factors, UV B);
extern int squfof_factor(UV n, UV *factors, UV rounds);
extern int racing_squfof_factor(UV n, UV *factors, UV rounds);
-
-extern int _XS_is_pseudoprime(UV n, UV a);
-extern int _XS_miller_rabin(UV n, const UV *bases, int nbases);
-extern int _SPRP2(UV n);
-extern int _XS_is_prob_prime(UV n);
-extern void lucas_seq(UV* U, UV* V, UV* Qk, UV n, IV P, IV Q, UV k);
-extern int _XS_is_lucas_pseudoprime(UV n, int strength);
-extern int _XS_is_frobenius_underwood_pseudoprime(UV n);
-extern int _XS_is_almost_extra_strong_lucas_pseudoprime(UV n, UV increment);
-
extern UV _XS_divisor_sum(UV n);
-#ifdef FUNC_gcd_ui
-static UV gcd_ui(UV x, UV y) {
- UV t;
- if (y < x) { t = x; x = y; y = t; }
- while (y > 0) {
- t = y; y = x % y; x = t; /* y1 <- x0 % y0 ; x1 <- y0 */
- }
- return x;
-}
-#endif
-
#endif
diff --git a/lib/Math/Prime/Util/PrimeArray.pm b/lib/Math/Prime/Util/PrimeArray.pm
index baf7815..a47795a 100644
--- a/lib/Math/Prime/Util/PrimeArray.pm
+++ b/lib/Math/Prime/Util/PrimeArray.pm
@@ -199,8 +199,8 @@ Example:
If you want sequential primes with low memory, I recommend using
L<Math::Prime::Util/forprimes>. It is much faster, as the tied array
-functionality in Perl is not high performance. It does have limitations
-vs. the prime array, but many tasks find they can use it.
+functionality in Perl is not high performance. It isn't as flexible as
+the prime array, but it is a very common pattern.
If you prefer an iterator pattern, I would recommend using
L<Math::Prime::Util/prime_iterator>. It will be a bit faster than using this
diff --git a/primality.c b/primality.c
new file mode 100644
index 0000000..171317d
--- /dev/null
+++ b/primality.c
@@ -0,0 +1,689 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "ptypes.h"
+#include "primality.h"
+#include "mulmod.h"
+#define FUNC_gcd_ui
+#include "util.h"
+
+/* Primality related functions, including Montgomery math */
+
+/* TODO:
+ * - Convert F-U test to Montgomery
+ * - Convert Lucas tests to Montgomery
+ */
+
+static const UV mr_bases_const2[1] = {2};
+
+/******************************************************************************
+ Code inside USE_MONT_PRIMALITY is Montgomery math and efficient M-R from
+ Wojciech Izykowski. See: https://github.com/wizykowski/miller-rabin
+
+Copyright (c) 2013, Wojciech Izykowski
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************************************************************/
+#if USE_MONT_PRIMALITY
+static INLINE uint64_t mont_prod64(uint64_t a, uint64_t b, uint64_t n, uint64_t npi)
+{
+ uint64_t t_hi, t_lo, m, mn_hi, mn_lo, u;
+ int carry;
+ /* t_hi * 2^64 + t_lo = a*b */
+ asm("mulq %3" : "=a"(t_lo), "=d"(t_hi) : "a"(a), "rm"(b));
+ m = t_lo * npi;
+ /* mn_hi * 2^64 + mn_lo = m*n */
+ asm("mulq %3" : "=a"(mn_lo), "=d"(mn_hi) : "a"(m), "rm"(n));
+ carry = t_lo + mn_lo < t_lo ? 1 : 0;
+ u = t_hi + mn_hi + carry;
+ if (u < t_hi) return u-n;
+ return u >= n ? u-n : u;
+}
+#define mont_square64(a, n, npi) mont_prod64(a, a, n, npi)
+static INLINE UV mont_powmod64(uint64_t a, uint64_t k, uint64_t one, uint64_t n, uint64_t npi)
+{
+ uint64_t t = one;
+ while (k) {
+ if (k & 1) t = mont_prod64(t, a, n, npi);
+ k >>= 1;
+ if (k) a = mont_square64(a, n, npi);
+ }
+ return t;
+}
+static INLINE uint64_t modular_inverse64(const uint64_t a)
+{
+ uint64_t u,x,w,z,q;
+
+ x = 1; z = a;
+
+ q = (-z)/z + 1; /* = 2^64 / z */
+ u = - q; /* = -q * x */
+ w = - q * z; /* = b - q * z = 2^64 - q * z */
+
+ /* after first iteration all variables are 64-bit */
+
+ while (w) {
+ if (w < z) {
+ q = u; u = x; x = q; /* swap(u, x) */
+ q = w; w = z; z = q; /* swap(w, z) */
+ }
+ q = w / z;
+ u -= q * x;
+ w -= q * z;
+ }
+ return x;
+}
+static INLINE uint64_t compute_modn64(const uint64_t n)
+{
+
+ if (n <= (1ULL << 63)) {
+ uint64_t res = ((1ULL << 63) % n) << 1;
+ return res < n ? res : res-n;
+ } else
+ return -n;
+}
+#define compute_a_times_2_64_mod_n(a, n, r) mulmod(a, r, n)
+static INLINE uint64_t compute_2_65_mod_n(const uint64_t n, const uint64_t modn)
+{
+ if (n <= (1ULL << 63)) {
+ uint64_t res = modn << 1;
+ return res < n ? res : res - n;
+ } else {
+ /* n can fit 2 or 3 times in 2^65 */
+ if (n > UVCONST(12297829382473034410))
+ return -n-n; /* 2^65 mod n = 2^65 - 2*n */
+ else
+ return -n-n-n; /* 2^65 mod n = 2^65 - 3*n */
+ }
+}
+/* static INLINE int efficient_mr64(const uint64_t bases[], const int cnt, const uint64_t n) */
+static int monty_mr64(const uint64_t n, const UV* bases, int cnt)
+{
+ int i, j, t;
+ const uint64_t npi = modular_inverse64(-((int64_t)n));
+ const uint64_t r = compute_modn64(n);
+ uint64_t u = n - 1;
+ const uint64_t nr = n - r;
+
+ t = 0;
+ while (!(u&1)) { t++; u >>= 1; }
+ for (j = 0; j < cnt; j++) {
+ const uint64_t a = bases[j];
+ uint64_t A = compute_a_times_2_64_mod_n(a, n, r);
+ uint64_t d;
+ if (a < 2) croak("Base %"UVuf" is invalid", (UV)a);
+ if (!A) continue; /* PRIME in subtest */
+ d = mont_powmod64(A, u, r, n, npi); /* compute a^u mod n */
+ if (d == r || d == nr) continue; /* PRIME in subtest */
+ for (i=1; i<t; i++) {
+ d = mont_square64(d, n, npi);
+ if (d == r) return 0;
+ if (d == nr) break; /* PRIME in subtest */
+ }
+ if (i == t) return 0;
+ }
+ return 1;
+}
+#endif
+/******************************************************************************/
+
+
+
+
+/* Helper functions */
+static int is_perfect_square(UV n, UV* sqrtn)
+{
+ UV m;
+ m = n & 127;
+ if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0;
+ m = isqrt(n);
+ if (n != (m*m))
+ return 0;
+ if (sqrtn != 0)
+ *sqrtn = m;
+ return 1;
+}
+static int jacobi_iu(IV in, UV m) {
+ int j = 1;
+ UV n = (in < 0) ? -in : in;
+
+ if (m <= 0 || (m%2) == 0) return 0;
+ if (in < 0 && (m%4) == 3) j = -j;
+ while (n != 0) {
+ while ((n % 2) == 0) {
+ n >>= 1;
+ if ( (m % 8) == 3 || (m % 8) == 5 ) j = -j;
+ }
+ { UV t = n; n = m; m = t; }
+ if ( (n % 4) == 3 && (m % 4) == 3 ) j = -j;
+ n = n % m;
+ }
+ return (m == 1) ? j : 0;
+}
+
+
+
+
+/* Fermat pseudoprime */
+int _XS_is_pseudoprime(UV const n, UV a)
+{
+ UV x;
+
+ if (n == 2 || n == 3) return 1;
+ if (n < 5) return 0;
+ if (a < 2) croak("Base %"UVuf" is invalid", a);
+ if (a >= n) {
+ a %= n;
+ if ( a <= 1 || a == n-1 )
+ return 1;
+ }
+ x = powmod(a, n-1, n); /* x = a^(n-1) mod n */
+ return (x == 1);
+}
+
+/* Miller-Rabin probabilistic primality test
+ * Returns 1 if probably prime relative to the bases, 0 if composite.
+ * Bases must be between 2 and n-2
+ */
+int _XS_miller_rabin(UV const n, const UV *bases, int nbases)
+{
+ UV d = n-1;
+ int b, r, s = 0;
+
+ MPUassert(n > 3, "MR called with n <= 3");
+ if ((n & 1) == 0) return 0;
+
+ if (USE_MONT_PRIMALITY && n >= UVCONST(4294967295))
+ return monty_mr64((uint64_t)n, bases, nbases);
+
+ while (!(d&1)) { s++; d >>= 1; }
+
+ for (b = 0; b < nbases; b++) {
+ UV x, a = bases[b];
+ if (a < 2) croak("Base %"UVuf" is invalid", a);
+ if (a >= n) a %= n;
+ if ( (a <= 1) || (a == n-1) )
+ continue;
+ /* n is a strong pseudoprime to base a if either:
+ * a^d = 1 mod n
+ * a^(d2^r) = -1 mod n for some r: 0 <= r <= s-1
+ */
+ x = powmod(a, d, n);
+ if ( (x == 1) || (x == n-1) ) continue;
+ for (r = 1; r < s; r++) { /* r=0 was just done, test r = 1 to s-1 */
+ x = sqrmod(x, n);
+ if ( x == n-1 ) break;
+ if ( x == 1 ) return 0;
+ }
+ if (r >= s) return 0;
+ }
+ return 1;
+}
+
+int _XS_BPSW(UV const n)
+{
+ if (n == 2 || n == 3 || n == 5) return 1;
+ if (n < 7 || (n%2) == 0 || n == UV_MAX) return 0;
+
+#if !USE_MONT_PRIMALITY
+ return _XS_miller_rabin(n, mr_bases_const2, 1)
+ && _XS_is_almost_extra_strong_lucas_pseudoprime(n,1);
+#else
+ if (n < UVCONST(4294967295)) {
+ return _XS_miller_rabin(n, mr_bases_const2, 1)
+ && _XS_is_almost_extra_strong_lucas_pseudoprime(n,1);
+ } else {
+ const uint64_t npi = modular_inverse64(-((int64_t)n));
+ const uint64_t montr = compute_modn64(n);
+ const uint64_t mont2 = compute_2_65_mod_n(n, montr);
+ uint64_t u = n-1;
+ const uint64_t nr = n-montr;
+ int i, t = 0;
+ UV P, V, d, s;
+
+ /* M-R with base 2 */
+ while (!(u&1)) { t++; u >>= 1; }
+ {
+ uint64_t A = mont2;
+ if (A) {
+ uint64_t d = mont_powmod64(A, u, montr, n, npi);
+ if (d != montr && d != nr) {
+ for (i=1; i<t; i++) {
+ d = mont_square64(d, n, npi);
+ if (d == montr) return 0;
+ if (d == nr) break;
+ }
+ if (i == t)
+ return 0;
+ }
+ }
+ }
+ /* AES Lucas test */
+ P = 3;
+ while (1) {
+ UV D = P*P - 4;
+ d = gcd_ui(D, n);
+ if (d > 1 && d < n)
+ return 0;
+ if (jacobi_iu(D, n) == -1)
+ break;
+ if (P == (3+20) && is_perfect_square(n, 0)) return 0;
+ P++;
+ if (P > 65535)
+ croak("lucas_extrastrong_params: P exceeded 65535");
+ }
+
+ d = n+1;
+ s = 0;
+ while ( (d & 1) == 0 ) { s++; d >>= 1; }
+
+ {
+ const uint64_t montP = compute_a_times_2_64_mod_n(P, n, montr);
+ UV W, b;
+ W = submod( mont_prod64( montP, montP, n, npi), mont2, n);
+ V = montP;
+ { UV v = d; b = 1; while (v >>= 1) b++; }
+ while (b-- > 1) {
+ if ( (d >> (b-1)) & UVCONST(1) ) {
+ V = submod( mont_prod64(V, W, n, npi), montP, n);
+ W = submod( mont_prod64(W, W, n, npi), mont2, n);
+ } else {
+ W = submod( mont_prod64(V, W, n, npi), montP, n);
+ V = submod( mont_prod64(V, V, n, npi), mont2, n);
+ }
+ }
+ }
+
+ if (V == mont2 || V == (n-mont2))
+ return 1;
+ while (s-- > 1) {
+ if (V == 0)
+ return 1;
+ V = submod( mont_prod64(V, V, n, npi), mont2, n);
+ if (V == mont2)
+ return 0;
+ }
+ }
+ return 0;
+#endif
+}
+
+/* Generic Lucas sequence for any appropriate P and Q */
+void lucas_seq(UV* Uret, UV* Vret, UV* Qkret, UV n, IV P, IV Q, UV k)
+{
+ UV U, V, b, Dmod, Qmod, Pmod, Qk;
+
+ if (k == 0) {
+ *Uret = 0;
+ *Vret = 2;
+ *Qkret = Q;
+ return;
+ }
+
+ Qmod = (Q < 0) ? (UV)(Q + (IV)n) : (UV)Q;
+ Pmod = (P < 0) ? (UV)(P + (IV)n) : (UV)P;
+ Dmod = submod( mulmod(Pmod, Pmod, n), mulmod(4, Qmod, n), n);
+ MPUassert(Dmod != 0, "lucas_seq: D is 0");
+ U = 1;
+ V = Pmod;
+ Qk = Qmod;
+ { UV v = k; b = 1; while (v >>= 1) b++; }
+
+ if (Q == 1) {
+ while (b > 1) {
+ U = mulmod(U, V, n);
+ V = mulsubmod(V, V, 2, n);
+ b--;
+ if ( (k >> (b-1)) & UVCONST(1) ) {
+ UV t2 = mulmod(U, Dmod, n);
+ U = muladdmod(U, Pmod, V, n);
+ if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
+ V = muladdmod(V, P, t2, n);
+ if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
+ }
+ }
+ } else if (P == 1 && Q == -1) {
+ /* This is about 30% faster than the generic code below. Since 50% of
+ * Lucas and strong Lucas tests come here, I think it's worth doing. */
+ int sign = Q;
+ while (b > 1) {
+ U = mulmod(U, V, n);
+ if (sign == 1) V = mulsubmod(V, V, 2, n);
+ else V = muladdmod(V, V, 2, n);
+ sign = 1; /* Qk *= Qk */
+ b--;
+ if ( (k >> (b-1)) & UVCONST(1) ) {
+ UV t2 = mulmod(U, Dmod, n);
+ U = addmod(U, V, n);
+ if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
+ V = addmod(V, t2, n);
+ if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
+ sign = -1; /* Qk *= Q */
+ }
+ }
+ if (sign == 1) Qk = 1;
+ } else {
+ while (b > 1) {
+ U = mulmod(U, V, n);
+ V = mulsubmod(V, V, addmod(Qk,Qk,n), n);
+ Qk = sqrmod(Qk, n);
+ b--;
+ if ( (k >> (b-1)) & UVCONST(1) ) {
+ UV t2 = mulmod(U, Dmod, n);
+ U = muladdmod(U, Pmod, V, n);
+ if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
+ V = muladdmod(V, P, t2, n);
+ if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
+ Qk = mulmod(Qk, Qmod, n);
+ }
+ }
+ }
+ *Uret = U;
+ *Vret = V;
+ *Qkret = Qk;
+}
+
+/* Lucas tests:
+ * 0: Standard
+ * 1: Strong
+ * 2: Extra Strong (Mo/Jones/Grantham)
+ *
+ * None of them have any false positives for the BPSW test. Also see the
+ * "almost extra strong" test.
+ */
+int _XS_is_lucas_pseudoprime(UV n, int strength)
+{
+ IV P, Q, D;
+ UV U, V, Qk, d, s;
+
+ if (n == 2 || n == 3) return 1;
+ if (n < 5 || (n%2) == 0) return 0;
+ if (n == UV_MAX) return 0;
+
+ if (strength < 2) {
+ UV Du = 5;
+ IV sign = 1;
+ while (1) {
+ D = Du * sign;
+ if (gcd_ui(Du, n) > 1 && gcd_ui(Du, n) != n) return 0;
+ if (jacobi_iu(D, n) == -1)
+ break;
+ if (Du == 21 && is_perfect_square(n, 0)) return 0;
+ Du += 2;
+ sign = -sign;
+ }
+ P = 1;
+ Q = (1 - D) / 4;
+ } else {
+ P = 3;
+ Q = 1;
+ while (1) {
+ D = P*P - 4;
+ if (gcd_ui(D, n) > 1 && gcd_ui(D, n) != n) return 0;
+ if (jacobi_iu(D, n) == -1)
+ break;
+ if (P == 21 && is_perfect_square(n, 0)) return 0;
+ P++;
+ }
+ }
+ MPUassert( D == (P*P - 4*Q) , "is_lucas_pseudoprime: incorrect DPQ");
+
+ d = n+1;
+ s = 0;
+ if (strength > 0)
+ while ( (d & 1) == 0 ) { s++; d >>= 1; }
+ lucas_seq(&U, &V, &Qk, n, P, Q, d);
+
+ if (strength == 0) {
+ if (U == 0)
+ return 1;
+ } else if (strength == 1) {
+ if (U == 0)
+ return 1;
+ /* Now check to see if V_{d*2^r} == 0 for any 0 <= r < s */
+ while (s--) {
+ if (V == 0)
+ return 1;
+ if (s) {
+ V = mulsubmod(V, V, addmod(Qk,Qk,n), n);
+ Qk = sqrmod(Qk, n);
+ }
+ }
+ } else {
+ if ( U == 0 && (V == 2 || V == (n-2)) )
+ return 1;
+ /* Now check to see if V_{d*2^r} == 0 for any 0 <= r < s-1 */
+ s--;
+ while (s--) {
+ if (V == 0)
+ return 1;
+ if (s)
+ V = mulsubmod(V, V, 2, n);
+ }
+ }
+ return 0;
+}
+
+/* A generalization of Pari's shortcut to the extra-strong Lucas test.
+ * I've added a gcd check at the top, which needs to be done and also results
+ * in fewer pseudoprimes. Pari always does trial division to 100 first so
+ * is unlikely to come up there. This only calculate V, which can be done
+ * faster, but that means we have more pseudoprimes than the standard
+ * extra-strong test.
+ *
+ * increment: 1 for Baillie OEIS, 2 for Pari.
+ *
+ * With increment = 1, these results will be a subset of the extra-strong
+ * Lucas pseudoprimes. With increment = 2, we produce Pari's results.
+ */
+int _XS_is_almost_extra_strong_lucas_pseudoprime(UV n, UV increment)
+{
+ UV P, V, d, s;
+
+ if (n == 2 || n == 3 || n == 5) return 1;
+ if (n < 7 || (n%2) == 0) return 0;
+ if (n == UV_MAX) return 0;
+ if (increment < 1 || increment > 256)
+ croak("Invalid lucas paramater increment: %"UVuf"\n", increment);
+
+ P = 3;
+ while (1) {
+ UV D = P*P - 4;
+ d = gcd_ui(D, n);
+ if (d > 1 && d < n)
+ return 0;
+ if (jacobi_iu(D, n) == -1)
+ break;
+ if (P == (3+20*increment) && is_perfect_square(n, 0)) return 0;
+ P += increment;
+ if (P > 65535)
+ croak("lucas_extrastrong_params: P exceeded 65535");
+ }
+ if (P >= n) P %= n; /* Never happens with increment < 4 */
+
+ d = n+1;
+ s = 0;
+ while ( (d & 1) == 0 ) { s++; d >>= 1; }
+
+ {
+ UV W, b;
+ V = P;
+ W = mulsubmod(P, P, 2, n);
+ { UV v = d; b = 1; while (v >>= 1) b++; }
+ while (b-- > 1) {
+ if ( (d >> (b-1)) & UVCONST(1) ) {
+ V = mulsubmod(V, W, P, n);
+ W = mulsubmod(W, W, 2, n);
+ } else {
+ W = mulsubmod(V, W, P, n);
+ V = mulsubmod(V, V, 2, n);
+ }
+ }
+ }
+ if (V == 2 || V == (n-2))
+ return 1;
+ while (s-- > 1) {
+ if (V == 0)
+ return 1;
+ V = mulsubmod(V, V, 2, n);
+ if (V == 2)
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * The Frobenius-Underwood test has no known counterexamples below 10^13, but
+ * has not been extensively tested above that. This is the Minimal Lambda+2
+ * test from section 9 of "Quadratic Composite Tests" by Paul Underwood.
+ *
+ * It is generally slower than the AES Lucas test, but for large values will
+ * be faster than a BPSW test. Currently it is of use mainly for numbers
+ * larger than 2^64 as an additional non-correlated test.
+ *
+ * TODO: Convert to Montgomery
+ */
+int _XS_is_frobenius_underwood_pseudoprime(UV n)
+{
+ int bit;
+ UV x, result, multiplier, a, b, np1, len, t1, t2, na;
+ IV t;
+
+ if (n < 2) return 0;
+ if (n < 4) return 1;
+ if ((n % 2) == 0) return 0;
+ if (is_perfect_square(n,0)) return 0;
+ if (n == UV_MAX) return 0;
+
+ x = 0;
+ t = -1;
+ while ( jacobi_iu( t, n ) != -1 ) {
+ x++;
+ t = (IV)(x*x) - 4;
+ }
+ result = addmod( addmod(x, x, n), 5, n);
+ multiplier = addmod(x, 2, n);
+
+ a = 1;
+ b = 2;
+ np1 = n+1;
+ { UV v = np1; len = 1; while (v >>= 1) len++; }
+
+ if (x == 0) {
+ for (bit = len-2; bit >= 0; bit--) {
+ t2 = addmod(b, b, n);
+ na = mulmod(a, t2, n);
+ t1 = addmod(b, a, n);
+ t2 = submod(b, a, n);
+ b = mulmod(t1, t2, n);
+ a = na;
+ if ( (np1 >> bit) & UVCONST(1) ) {
+ t1 = mulmod(a, 2, n);
+ na = addmod(t1, b, n);
+ t1 = addmod(b, b, n);
+ b = submod(t1, a, n);
+ a = na;
+ }
+ }
+ } else {
+ for (bit = len-2; bit >= 0; bit--) {
+ t1 = mulmod(a, x, n);
+ t2 = addmod(b, b, n);
+ t1 = addmod(t1, t2, n);
+ na = mulmod(a, t1, n);
+ t1 = addmod(b, a, n);
+ t2 = submod(b, a, n);
+ b = mulmod(t1, t2, n);
+ a = na;
+ if ( (np1 >> bit) & UVCONST(1) ) {
+ t1 = mulmod(a, multiplier, n);
+ na = addmod(t1, b, n);
+ t1 = addmod(b, b, n);
+ b = submod(t1, a, n);
+ a = na;
+ }
+ }
+ }
+ if (_XS_get_verbose()>1) printf("%"UVuf" is %s with x = %"UVuf"\n", n, (a == 0 && b == result) ? "probably prime" : "composite", x);
+ if (a == 0 && b == result)
+ return 1;
+ return 0;
+}
+
+
+/******************************************************************************/
+
+
+/* Select M-R bases from http://miller-rabin.appspot.com/, 26 July 2013 */
+#if BITS_PER_WORD == 32
+static const UV mr_bases_small_2[2] = {31, 73};
+static const UV mr_bases_small_3[3] = {2, 7, 61};
+#else
+static const UV mr_bases_large_1[1] = { UVCONST( 9345883071009581737 ) };
+static const UV mr_bases_large_2[2] = { UVCONST( 336781006125 ),
+ UVCONST( 9639812373923155 ) };
+static const UV mr_bases_large_3[3] = { UVCONST( 4230279247111683200 ),
+ UVCONST( 14694767155120705706 ),
+ UVCONST( 16641139526367750375 ) };
+static const UV mr_bases_large_7[7] = { 2, 325, 9375, 28178, 450775, 9780504, 1795265022 };
+#endif
+
+int _XS_is_prob_prime(UV n)
+{
+ int ret;
+
+ if (n < 11) {
+ if (n == 2 || n == 3 || n == 5 || n == 7) return 2;
+ else return 0;
+ }
+ if (!(n%2) || !(n%3) || !(n%5) || !(n%7)) return 0;
+ if (n < 121) /* 11*11 */ return 2;
+ if (!(n%11) || !(n%13) || !(n%17) || !(n%19) ||
+ !(n%23) || !(n%29) || !(n%31) || !(n%37) ||
+ !(n%41) || !(n%43) || !(n%47) || !(n%53)) return 0;
+ if (n < 3481) /* 59*59 */ return 2;
+
+#if BITS_PER_WORD == 32
+ /* We could use one base when n < 49191, two when n < 360018361. */
+ if (n < UVCONST(9080191))
+ ret = _XS_miller_rabin(n, mr_bases_small_2, 2);
+ else
+ ret = _XS_miller_rabin(n, mr_bases_small_3, 3);
+#else
+ /* AESLSP test costs about 1.5 Selfridges, vs. ~2.2 for strong Lucas.
+ * So it works out to be faster to do AES-BPSW vs. 3 M-R tests. */
+ if (n < UVCONST(341531))
+ ret = _XS_miller_rabin(n, mr_bases_large_1, 1);
+ else if (n < UVCONST(1050535501))
+ ret = _XS_miller_rabin(n, mr_bases_large_2, 2);
+ else
+ ret = _XS_BPSW(n);
+ /*
+ ret = efficient_mr64(mr_bases_large_7, 7, n);
+ ret = _XS_miller_rabin(n, mr_bases_large_7, 7);
+ */
+#endif
+ return 2*ret;
+}
diff --git a/primality.h b/primality.h
new file mode 100644
index 0000000..ec7e0f9
--- /dev/null
+++ b/primality.h
@@ -0,0 +1,22 @@
+#ifndef MPU_PRIMALITY_H
+#define MPU_PRIMALITY_H
+
+#include "ptypes.h"
+
+#if BITS_PER_WORD == 64 && HAVE_STD_U64 && defined(__GNUC__) && defined(__x86_64__)
+#define USE_MONT_PRIMALITY 1
+#else
+#define USE_MONT_PRIMALITY 0
+#endif
+
+extern int _XS_is_pseudoprime(UV const n, UV a);
+extern int _XS_miller_rabin(UV const n, const UV *bases, int nbases);
+extern void lucas_seq(UV* U, UV* V, UV* Qk, UV n, IV P, IV Q, UV k);
+extern int _XS_is_lucas_pseudoprime(UV n, int strength);
+extern int _XS_is_frobenius_underwood_pseudoprime(UV n);
+extern int _XS_is_almost_extra_strong_lucas_pseudoprime(UV n, UV increment);
+
+extern int _XS_BPSW(UV const n);
+extern int _XS_is_prob_prime(UV n);
+
+#endif
diff --git a/util.c b/util.c
index 523ce65..b4436af 100644
--- a/util.c
+++ b/util.c
@@ -58,7 +58,7 @@
#include "ptypes.h"
#include "util.h"
#include "sieve.h"
-#include "factor.h"
+#include "primality.h"
#include "cache.h"
#include "lehmer.h"
diff --git a/util.h b/util.h
index 4194879..efc4107 100644
--- a/util.h
+++ b/util.h
@@ -47,4 +47,15 @@ static UV isqrt(UV n) {
return root;
}
+#ifdef FUNC_gcd_ui
+static UV gcd_ui(UV x, UV y) {
+ UV t;
+ if (y < x) { t = x; x = y; y = t; }
+ while (y > 0) {
+ t = y; y = x % y; x = t; /* y1 <- x0 % y0 ; x1 <- y0 */
+ }
+ return x;
+}
+#endif
+
#endif
diff --git a/xt/primality-small.pl b/xt/primality-small.pl
index 70d5900..98491f4 100755
--- a/xt/primality-small.pl
+++ b/xt/primality-small.pl
@@ -5,7 +5,7 @@ $| = 1; # fast pipes
# Make sure the is_prob_prime functionality is working for small inputs.
# Good for making sure the first few M-R bases are set up correctly.
-my $limit = 1_000_000_000;
+my $limit = shift || 1_000_000_000;
use Math::Prime::Util qw/is_prob_prime/;
# Use another code base for comparison.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list