[libmath-prime-util-perl] 31/72: Montgomery reduction for all Lucas tests

Thu May 21 18:49:38 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.32
in repository libmath-prime-util-perl.

commit 8ce592abcfe4336f20ee0a3ea8a79709564b80bd
Author: Dana Jacobsen <dana at acm.org>
Date:   Tue Sep 17 01:11:00 2013 -0700

    Montgomery reduction for all Lucas tests
---
 Changes     |  3 ++
 primality.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 81 insertions(+), 21 deletions(-)

diff --git a/Changes b/Changes
index 316fda4..4c161d3 100644
--- a/Changes
+++ b/Changes
@@ -29,6 +29,9 @@ Revision history for Perl module Math::Prime::Util
       is_prime, and is_strong_pseudoprime for numbers > 2^32 on x86_64.
       This also help things like prime_iterator for values > 2^32.
 
+    - Montgomery reduction used in Lucas and Frobenius tests.  Up to 2x
+      speedup for 33 to 64-bit inputs on x86_64/gcc platforms.
+
     - Primality functions moved to their own file primality.c.
 
 0.31  2013-08-07
diff --git a/primality.c b/primality.c
index e69508a..986dd60 100644
--- a/primality.c
+++ b/primality.c
@@ -97,7 +97,7 @@ static INLINE uint64_t modular_inverse64(const uint64_t a)
 }
 static INLINE uint64_t compute_modn64(const uint64_t n)
 {
-  
+
   if (n <= (1ULL << 63)) {
     uint64_t res = ((1ULL << 63) % n) << 1;
     return res < n ? res : res-n;
@@ -367,7 +367,7 @@ void lucas_seq(UV* Uret, UV* Vret, UV* Qkret, UV n, IV P, IV Q, UV k)
         UV t2 = mulmod(U, Dmod, n);
         U = muladdmod(U, Pmod, V, n);
         if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
-        V = muladdmod(V, P, t2, n);
+        V = muladdmod(V, Pmod, t2, n);
         if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
       }
     }
@@ -401,7 +401,7 @@ void lucas_seq(UV* Uret, UV* Vret, UV* Qkret, UV n, IV P, IV Q, UV k)
         UV t2 = mulmod(U, Dmod, n);
         U = muladdmod(U, Pmod, V, n);
         if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
-        V = muladdmod(V, P, t2, n);
+        V = muladdmod(V, Pmod, t2, n);
         if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
         Qk = mulmod(Qk, Qmod, n);
       }
@@ -462,33 +462,90 @@ int _XS_is_lucas_pseudoprime(UV n, int strength)
     while ( (d & 1) == 0 ) {  s++;  d >>= 1; }
 
 #if USE_MONT_PRIMALITY
-  if (n > UVCONST(4294967295) && strength == 0 && P == 1 && Q == -1) {
+  if (n > UVCONST(4294967295)) {
     const uint64_t npi = modular_inverse64(-((int64_t)n));
     const uint64_t mont1 = compute_modn64(n);
     const uint64_t mont2 = compute_2_65_mod_n(n, mont1);
-    const uint64_t mont5 = compute_a_times_2_64_mod_n(5, n, mont1);
-    int sign = -1;
+    const uint64_t montP = (P == 1) ? mont1
+                         : (P >= 0) ? compute_a_times_2_64_mod_n(P, n, mont1)
+                         : n - compute_a_times_2_64_mod_n(-P, n, mont1);
+    const uint64_t montD = (D >= 0) ? compute_a_times_2_64_mod_n(D, n, mont1)
+                         : n - compute_a_times_2_64_mod_n(-D, n, mont1);
     UV b;
     { UV v = d; b = 1; while (v >>= 1) b++; }
 
+    /* U, V, Qk, and mont* are in Montgomery space */
     U = mont1;
-    V = mont1;
-    while (b > 1) {
-      U = mont_prod64(U, V, n, npi);
-      if (sign == 1) V = submod( mont_square64(V,n,npi), mont2, n);
-      else           V = addmod( mont_square64(V,n,npi), mont2, n);
-      sign = 1;   /* Qk *= Qk */
-      b--;
-      if ( (d >> (b-1)) & UVCONST(1) ) {
-        UV t2 = mont_prod64(U, mont5, n, npi); /* D = 5 */
-        U = addmod(U, V, n);
-        if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
-        V = addmod(V, t2, n);
-        if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
-        sign = -1;  /* Qk *= Q */
+    V = montP;
+
+    if (Q == 1 || Q == -1) {   /* Faster code for |Q|=1, also opt for P=1 */
+      int sign = Q;
+      while (b > 1) {
+        U = mont_prod64(U, V, n, npi);
+        if (sign == 1) V = submod( mont_square64(V,n,npi), mont2, n);
+        else           V = addmod( mont_square64(V,n,npi), mont2, n);
+        sign = 1;
+        b--;
+        if ( (d >> (b-1)) & UVCONST(1) ) {
+          UV t2 = mont_prod64(U, montD, n, npi);
+          if (P == 1) {
+            U = addmod(U, V, n);
+            V = addmod(V, t2, n);
+          } else {
+            U = addmod( mont_prod64(U, montP, n, npi), V, n);
+            V = addmod( mont_prod64(V, montP, n, npi), t2, n);
+          }
+          if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
+          if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
+          sign = Q;
+        }
+      }
+      Qk = (sign == 1) ? mont1 : n-mont1;
+    } else {
+      const uint64_t montQ = (Q >= 0) ? compute_a_times_2_64_mod_n(Q, n, mont1)
+                           : n - compute_a_times_2_64_mod_n(-Q, n, mont1);
+      Qk = montQ;
+      while (b > 1) {
+        U = mont_prod64(U, V, n, npi);
+        V = submod( mont_square64(V,n,npi), addmod(Qk,Qk,n), n);
+        Qk = mont_square64(Qk,n,npi);
+        b--;
+        if ( (d >> (b-1)) & UVCONST(1) ) {
+          UV t2 = mont_prod64(U, montD, n, npi);
+          U = addmod( mont_prod64(U, montP, n, npi), V, n);
+          if (U & 1) { U = (n>>1) + (U>>1) + 1; } else { U >>= 1; }
+          V = addmod( mont_prod64(V, montP, n, npi), t2, n);
+          if (V & 1) { V = (n>>1) + (V>>1) + 1; } else { V >>= 1; }
+          Qk = mont_prod64(Qk, montQ, n, npi);
+        }
       }
     }
-    return (U == 0);
+    if (strength == 0) {
+      if (U == 0)
+        return 1;
+    } else if (strength == 1) {
+      if (U == 0)
+        return 1;
+      while (s--) {
+        if (V == 0)
+          return 1;
+        if (s) {
+          V = submod( mont_square64(V,n,npi), addmod(Qk,Qk,n), n);
+          Qk = mont_square64(Qk,n,npi);
+        }
+      }
+    } else {
+      if ( U == 0 && (V == mont2 || V == (n-mont2)) )
+        return 1;
+      s--;
+      while (s--) {
+        if (V == 0)
+          return 1;
+        if (s)
+          V = submod( mont_square64(V,n,npi), mont2, n);
+      }
+    }
+    return 0;
   }
 #endif
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git