[libmath-prime-util-perl] 33/72: Revamp input processing to accept bigints

Thu May 21 18:49:38 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.32
in repository libmath-prime-util-perl.

commit 67f3d32f721be4648788dcaeb1fa159e735dd0bf
Author: Dana Jacobsen <dana at acm.org>
Date:   Tue Sep 17 17:21:18 2013 -0700

    Revamp input processing to accept bigints
---
 Changes                |  3 ++
 XS.xs                  | 92 ++++++++++++++++++++++++++++++--------------------
 lib/Math/Prime/Util.pm | 30 +++++++---------
 t/81-bignum.t          |  2 +-
 util.c                 | 44 +++++++++++++-----------
 5 files changed, 96 insertions(+), 75 deletions(-)

diff --git a/Changes b/Changes
index c389969..53c22e5 100644
--- a/Changes
+++ b/Changes
@@ -8,6 +8,9 @@ Revision history for Perl module Math::Prime::Util
       - carmichael_lambda
       - znorder
 
+    - Input validation accepts bigint objects and converts them to scalars
+      entirely in XS.
+
     - random_nbit_prime now uses Fouque and Tibouchi A1.  Slightly better
       uniformity and typically a bit faster.
 
diff --git a/XS.xs b/XS.xs
index 99c0e64..c1f7921 100644
--- a/XS.xs
+++ b/XS.xs
@@ -19,14 +19,25 @@
 #include "lehmer.h"
 #include "aks.h"
 
-/* Workaround perl 5.6 UVs */
+#ifdef WIN32
+  #ifdef _MSC_VER
+    #include <stdlib.h>
+    #define PSTRTOULL(str, end, base) _strtoui64 (str, end, base)
+  #else
+    #define PSTRTOULL(str, end, base) strtoul (str, end, base)
+  #endif
+#else
+  #define PSTRTOULL(str, end, base) strtoull (str, end, base)
+#endif
+
+
+/* Workaround perl 5.6 UVs and bigints in later */
 #if PERL_REVISION <= 5 && PERL_VERSION <= 6 && BITS_PER_WORD == 64
- /* This could be blown up with a wacky string, but it's just for 5.6 */
  #define set_val_from_sv(val, sv) \
-   { char*ptr = SvPV_nolen(sv); val = Strtoul(ptr, NULL, 10); }
+   val = PSTRTOULL(SvPV_nolen(sv), NULL, 10);
 #else
  #define set_val_from_sv(val, sv) \
-   val = SvUV(sv)
+   val = (!SvROK(sv)) ? SvUV(sv) : PSTRTOULL(SvPV_nolen(sv), NULL, 10);
 #endif
 
 /* multicall compatibility stuff */
@@ -52,6 +63,14 @@ static int pbrent_factor_a1(UV n, UV *factors, UV maxrounds) {
   return pbrent_factor(n, factors, maxrounds, 1);
 }
 
+#if BITS_PER_WORD == 32
+  static const unsigned int uvmax_maxlen = 10;
+  static const char uvmax_str[] = "4294967295";
+#else
+  static const unsigned int uvmax_maxlen = 20;
+  static const char uvmax_str[] = "18446744073709551615";
+#endif
+
 /* Is this a pedantically valid integer?
  * Croaks if undefined or invalid.
  * Returns 0 if it is an object or a string too large for a UV.
@@ -62,38 +81,32 @@ static int _validate_int(SV* n, int negok)
   dTHX;
   char* ptr;
   STRLEN i, len;
-  UV val;
   int isneg = 0;
 
   if (!SvOK(n))  croak("Parameter must be defined");
-  /* aside: to detect bigint: if ( SvROK(n) && sv_isa(n, "Math::BigInt") ) */
-  if (SvROK(n))  return 0;
-  /* Perhaps SvPVbyte, or other UTF8 stuff? */
-  ptr = SvPV(n, len);
-  if (len == 0)  croak("Parameter '' must be a positive integer");
-  for (i = 0; i < len; i++) {
-    if (!isDIGIT(ptr[i])) {
-      if (i == 0 && ptr[i] == '-' && negok)
-        isneg = 1;
-      else if (i == 0 && ptr[i] == '+')
-        /* Allowed */ ;
-      else
-        croak("Parameter '%s' must be a positive integer", ptr); /* TODO NULL */
-    }
+  if (SvROK(n) && !sv_isa(n, "Math::BigInt"))  return 0;
+  ptr = SvPV(n, len);  /* This will stringify bigints for us, yay */
+  if (len == 0 || ptr == 0)  croak("Parameter '' must be a positive integer");
+  if (ptr[0] == '-') {                 /* Read negative sign */
+    if (negok) { isneg = 1; ptr++; len--; }
+    else       croak("Parameter '%s' must be a positive integer", ptr);
   }
-  if (isneg) return -1;  /* It's a valid negative number */
-  set_val_from_sv(val, n);
-  if (val == UV_MAX) { /* Could be bigger than UV_MAX.  Need to find out. */
-    char vstr[40];
-    sprintf(vstr, "%"UVuf, val);
-    /* Skip possible leading zeros */
-    while (len > 0 && (*ptr == '0' || *ptr == '+'))
-      { ptr++; len--; }
-    for (i = 0; i < len; i++)
-      if (vstr[i] != ptr[i])
-        return 0;
-  }
-  return 1;
+  if (ptr[0] == '+') { ptr++; len--; } /* Allow a single plus sign */
+  while (len > 0 && *ptr == '0')       /* Strip all leading zeros */
+    { ptr++; len--; }
+  if (len > uvmax_maxlen)              /* Huge number, don't even look at it */
+    return 0;
+  for (i = 0; i < len; i++)            /* Ensure all characters are digits */
+    if (!isDIGIT(ptr[i]))
+      croak("Parameter '%s' must be a positive integer", ptr);
+  if (isneg)                           /* Negative number (ignore overflow) */
+    return -1;
+  if (len < uvmax_maxlen)              /* Valid small integer */
+    return 1;
+  for (i = 0; i < uvmax_maxlen; i++)   /* Check if in range */
+    if (ptr[i] > uvmax_str[i])
+      return 0;
+  return 1;                            /* Looks good */
 }
 
 /* Call a Perl sub to handle work for us.
@@ -513,19 +526,19 @@ _XS_is_almost_extra_strong_lucas_pseudoprime(IN UV n, IN UV increment = 1)
     RETVAL
 
 int
-is_prime(IN SV* n)
+is_prime(IN SV* svn)
   ALIAS:
     is_prob_prime = 1
   PREINIT:
     int status;
   PPCODE:
-    status = _validate_int(n, 1);
+    status = _validate_int(svn, 1);
     if (status == -1) {
       XSRETURN_UV(0);
     } else if (status == 1) {
-      UV val;
-      set_val_from_sv(val, n);
-      XSRETURN_UV(_XS_is_prime(val));
+      UV n;
+      set_val_from_sv(n, svn);
+      XSRETURN_UV(_XS_is_prime(n));
     } else {
       const char* sub = 0;
       if (_XS_get_callgmp())
@@ -693,6 +706,11 @@ _validate_num(SV* n, ...)
   CODE:
     RETVAL = 0;
     if (_validate_int(n, 0)) {
+      if (SvROK(n)) {  /* Convert small Math::BigInt object into scalar */
+        UV val;
+        set_val_from_sv(val, n);
+        sv_setuv(n, val);
+      }
       if (items > 1 && SvOK(ST(1))) {
         UV val, min, max;
         set_val_from_sv(val, n);
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index ea1177b..e0ea514 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -1492,13 +1492,6 @@ sub _generic_forprimes (&$;$) {    ## no critic qw(ProhibitSubroutinePrototypes)
   if (!defined $end) { $end = $beg; $beg = 2; }
   _validate_num($beg) || _validate_positive_integer($beg);
   _validate_num($end) || _validate_positive_integer($end);
-  # It's possible we're here just because the arguments were bigints < 2^64
-  # TODO: make a function to convert native size bigints to UVs, and let the
-  #       XS functions call that, so we don't do these loop-de-loops.  This
-  #       also has nasty coupling with the XS implementation.
-  if (!ref($beg) && !ref($end) && $beg <= $_XS_MAXVAL && $end <= $_XS_MAXVAL && $] >= 5.007) {
-    return forprimes( \&$sub, $beg, $end);
-  }
   $beg = 2 if $beg < 2;
   {
     my $pp;
@@ -4100,11 +4093,11 @@ is 40 by default).  Accuracy without MPFR should be 35 digits.
 
 Print strong pseudoprimes to base 17 up to 10M:
 
+    # Similar to A001262's isStrongPsp function, but over 4x faster
     perl -MMath::Prime::Util=:all -E 'my $n=3; while($n <= 10000000) { print "$n " if is_strong_pseudoprime($n,$base) && !is_prime($n); $n+=2; } BEGIN {$|=1; $base=17}'
 
 or, slightly faster, use forprimes and loop over the odds between primes:
 
-   # Runs about 5x faster than Pari using A001262's isStrongPsp function
    perl -MMath::Prime::Util=:all -E '$|=1; $base=17; my $prev = 1; forprimes { $prev += 2; while ($prev < $_) { print "$prev " if is_strong_pseudoprime($prev,$base); $prev += 2; } } 3,10000000'
 
 Print some primes above 64-bit range:
@@ -4272,8 +4265,10 @@ handle using it.  There are still some functions it doesn't do well
 L<Math::Prime::XS> has C<is_prime> and C<primes> functionality.  There is no
 bigint support.  The C<is_prime> function uses well-written trial division,
 meaning it is very fast for small numbers, but terribly slow for large
-64-bit numbers.  With the latest release, MPU should be faster for all sizes.
-The prime sieve is an unoptimized non-segmented SoE which returns an
+64-bit numbers.  Because MPU does input validation and bigint conversion,
+there is about 20 microseconds of additional overhead making MPXS a little
+faster for tiny inputs, but once over 700k, MPU is faster for all values.
+MPXS's prime sieve is an unoptimized non-segmented SoE which returns an
 array.  It works well for 32-bit values, but speed and memory are problematic
 for larger values.
 
@@ -4287,8 +4282,7 @@ All this functionality is present in MPU as well, though not required.
 
 L<Bit::Vector> supports the C<primes> and C<prime_count> functionality in a
 somewhat similar way to L<Math::Prime::FastSieve>.  It is the slowest of all
-the XS sieves, and has the most memory use.  It is, however, faster than
-the pure Perl code in MPU or elsewhere.
+the XS sieves, and has the most memory use.  It is faster than pure Perl code.
 
 L<Crypt::Primes> supports C<random_maurer_prime> functionality.  MPU has
 more options for random primes (n-digit, n-bit, ranged, and strong) in
@@ -4302,7 +4296,7 @@ Having L<Math::Prime::Util::GMP> installed also helps performance for MPU.
 Crypt::Primes is hardcoded to use L<Crypt::Random>, while MPU uses
 L<Bytes::Random::Secure>, and also allows plugging in a random function.
 This is more flexible, faster, has fewer dependencies, and uses a CSPRNG
-for security.
+for security.  MPU can return a primality certificate.
 What Crypt::Primes has that MPU does not is support for returning a generator.
 
 L<Math::Factor::XS> calculates prime factors and factors, which correspond to
@@ -4674,15 +4668,15 @@ result is uniformly distributed, only about 10% of the primes in the range
 are selected for output.  This is a result of the FastPrime algorithm and
 is usually unimportant.
 
-L<Crypt::Primes/maurer> is included for comparison.  It is pretty fast for
-small sizes but gets slow as the size increases.  It does not perform any
-primality checks on the intermediate results or the final result (I highly
-recommended you run a primality test on the output).
+L<Crypt::Primes/maurer> times are included for comparison.  It is pretty
+fast for small sizes but gets slow as the size increases.  It does not
+perform any primality checks on the intermediate results or the final
+result (I highly recommended you run a primality test on the output).
 Additionally important for servers, L<Crypt::Primes/maurer> uses excessive
 system entropy and can grind to a halt if C</dev/random> is exhausted
 (it can take B<days> to return).  The times above are on a machine running
 L<HAVEGED|http://www.issihosts.com/haveged/>
-so never waits for entropy.
+so never waits for entropy.  Without this, the times would be much higher.
 
 
 =head1 AUTHORS
diff --git a/t/81-bignum.t b/t/81-bignum.t
index 77fe482..edca71c 100644
--- a/t/81-bignum.t
+++ b/t/81-bignum.t
@@ -209,7 +209,7 @@ SKIP: {
 ###############################################################################
 
 SKIP: {
-  skip "Your 64-bit Perl is broken, skipping moebius and euler_phi tests", 6 if $broken64;
+  skip "Your 64-bit Perl is broken, skipping moebius and euler_phi tests", 7 if $broken64;
   my $n;
   $n = 618970019642690137449562110;
   is( moebius($n), -1, "moebius($n)" );
diff --git a/util.c b/util.c
index 17d927b..7eaaad7 100644
--- a/util.c
+++ b/util.c
@@ -147,30 +147,36 @@ static const unsigned char prime_sieve30[] =
 /* Return of 2 if n is prime, 0 if not.  Do it fast. */
 int _XS_is_prime(UV n)
 {
-  UV d, m;
-  unsigned char mtab;
-  const unsigned char* sieve;
-  int isprime;
+  if (n < UVCONST(2000000000)) {
+    UV d, m;
+    unsigned char mtab;
+    const unsigned char* sieve;
+    int isprime = -1;
 
-  if (n <= 10)  return (n == 2 || n == 3 || n == 5 || n == 7) ? 2 : 0;
-  d = n/30;
-  m = n - d*30;
-  mtab = masktab30[m];  /* Bitmask in mod30 wheel */
+    if (n <= 10)  return (n == 2 || n == 3 || n == 5 || n == 7) ? 2 : 0;
 
-  /* Return 0 if a multiple of 2, 3, or 5 */
-  if (mtab == 0)
-    return 0;
+    d = n/30;
+    m = n - d*30;
+    mtab = masktab30[m];  /* Bitmask in mod30 wheel */
 
-  if (d < NPRIME_SIEVE30)
-    return (prime_sieve30[d] & mtab) ? 0 : 2;
+    /* Return 0 if a multiple of 2, 3, or 5 */
+    if (mtab == 0)
+      return 0;
 
-  isprime = (n <= get_prime_cache(0, &sieve))
-            ?  2*((sieve[d] & mtab) == 0)
-            :  -1;
-  release_prime_cache(sieve);
+    /* Check static tiny sieve */
+    if (d < NPRIME_SIEVE30)
+      return (prime_sieve30[d] & mtab) ? 0 : 2;
+
+    if (!(n%7) || !(n%11) || !(n%13)) return 0;
 
-  /* return (isprime >= 0)  ?  isprime  :  _is_prime7(n); */
-  return (isprime >= 0)  ?  isprime  :  _XS_is_prob_prime(n);
+    /* Check primary cache */
+    if (n <= get_prime_cache(0, &sieve))
+      isprime = 2*((sieve[d] & mtab) == 0);
+    release_prime_cache(sieve);
+    if (isprime >= 0)
+      return isprime;
+  }
+  return _XS_is_prob_prime(n);
 }
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git