[libmath-prime-util-perl] 03/06: random primes, and no asserts

Thu May 21 18:44:20 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.04
in repository libmath-prime-util-perl.

commit 0e4fe85d6b2b46549a92b7b0d007960b3d921803
Author: Dana Jacobsen <dana at acm.org>
Date:   Thu Jun 7 21:07:46 2012 -0600

    random primes, and no asserts
---
 Changes                        |  4 ++
 TODO                           |  9 ++--
 XS.xs                          |  7 ++-
 examples/bench-random-prime.pl | 20 +++++++++
 factor.c                       | 27 ++++++------
 lib/Math/Prime/Util.pm         | 99 +++++++++++++++++++++++++++++++++++++++++-
 ptypes.h                       |  2 +
 sieve.c                        |  8 ++--
 util.c                         | 14 +++---
 9 files changed, 153 insertions(+), 37 deletions(-)

diff --git a/Changes b/Changes
index 69b2f9b..64a99b7 100644
--- a/Changes
+++ b/Changes
@@ -3,6 +3,10 @@ Revision history for Perl extension Math::Prime::Util.
 0.04  7 June 2012
     - Didn't do tests on 32-bit machine before release.  Test suite caught
       problem with next_prime overflow.
+    - Try to use 64-bit modulo math even when Perl is 32-bit.  It can make
+      is_prime run up to 10x faster (which impacts next_prime, factoring, etc.)
+    - replace all assert with croak indicating an internal error.
+    - Add random_prime and random_ndigit_prime
 
 0.03  6 June 2012
     - Speed up factoring.
diff --git a/TODO b/TODO
index 9ce9eb8..840db11 100644
--- a/TODO
+++ b/TODO
@@ -17,9 +17,6 @@
 
 - Faster SQUFOF
 
-- random_prime( {bits => 32,
-                 digits => 4,
-                 between => '4 and 100',
-                 between_nth => '26 and 42', } )
-  for super huge bases, we can use approx functions to get center, generate
-  a range, and then do random selection within it.
+- prime count bounds are <=, nth_prime bounds are <.  Pick one.
+
+- test file for random_prime
diff --git a/XS.xs b/XS.xs
index 9c02513..1d23c6e 100644
--- a/XS.xs
+++ b/XS.xs
@@ -149,8 +149,8 @@ segment_primes(IN UV low, IN UV high, IN UV segment_size = 65536UL)
         UV segbase = low_d * 30;
         /* printf("  startd = %"UVuf"  endd = %"UVuf"\n", startd, endd); */
   
-        assert( seghigh_d >= low_d );
-        assert( range_d <= segment_size );
+        MPUassert( seghigh_d >= low_d, "segment_primes highd < lowd");
+        MPUassert( range_d <= segment_size, "segment_primes range > segment size");
 
         /* Sieve from startd*30+1 to endd*30+29.  */
         if (sieve_segment(sieve, low_d, seghigh_d) == 0) {
@@ -262,14 +262,13 @@ factor(IN UV n)
             /* For sufficiently large n, try more complex methods. */
             /* SQUFOF (succeeds 98-99.9%) */
             split_success = squfof_factor(n, factor_stack+nstack, 256*1024)-1;
-            assert( (split_success == 0) || (split_success == 1) );
             /* a few rounds of Pollard rho (succeeds most of the rest) */
             if (!split_success) {
               split_success = prho_factor(n, factor_stack+nstack, 400)-1;
-              assert( (split_success == 0) || (split_success == 1) );
             }
           }
           if (split_success) {
+            MPUassert( split_success == 1, "split factor returned more than 2 factors");
             nstack++;
             n = factor_stack[nstack];
           } else {
diff --git a/examples/bench-random-prime.pl b/examples/bench-random-prime.pl
new file mode 100755
index 0000000..3e5c9e9
--- /dev/null
+++ b/examples/bench-random-prime.pl
@@ -0,0 +1,20 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+use Math::Prime::Util qw/random_prime random_ndigit_prime/;
+use Benchmark qw/:all/;
+use List::Util qw/min max/;
+my $count = shift || -3;
+
+srand(29);
+test_at_digits($_) for (2 .. 10);
+
+sub test_at_digits {
+  my $digits = shift;
+  die "Digits must be > 0" unless $digits > 0;
+
+  cmpthese($count,{
+    "$digits digits" => sub { random_ndigit_prime($digits) for (1..10000) },
+  });
+}
diff --git a/factor.c b/factor.c
index 59e4314..76a7ba5 100644
--- a/factor.c
+++ b/factor.c
@@ -1,7 +1,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <assert.h>
 #include <limits.h>
 #define __STDC_LIMIT_MACROS
 #include <stdint.h>
@@ -169,7 +168,7 @@ int miller_rabin(UV n, const UV *bases, UV nbases)
   int s = 0;
   UV d = n-1;
 
-  assert(n > 3);
+  MPUassert(n > 3, "MR called with n <= 3");
 
   while ( (d&1) == 0 ) {
     s++;
@@ -298,7 +297,7 @@ int fermat_factor(UV n, UV *factors, UV rounds)
 {
   IV sqn, x, y, r;
 
-  assert( (n >= 3) && ((n%2) != 0) );
+  MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in fermat_factor");
 
   sqn = sqrt((double) n);
   x = 2 * sqn + 1;
@@ -317,7 +316,7 @@ int fermat_factor(UV n, UV *factors, UV rounds)
   if ( (r != 1) && (r != n) ) {
     factors[0] = r;
     factors[1] = n/r;
-    assert( factors[0] * factors[1] == n );
+    MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
     return 2;
   }
   factors[0] = n;
@@ -331,7 +330,7 @@ int holf_factor(UV n, UV *factors, UV rounds)
 {
   UV i, s, m, f;
 
-  assert( (n >= 3) && ((n%2) != 0) );
+  MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in holf_factor");
 
   for (i = 1; i <= rounds; i++) {
     s = sqrt(n*i);                      /* TODO: overflow here */
@@ -352,7 +351,7 @@ int holf_factor(UV n, UV *factors, UV rounds)
         break;
       factors[0] = f;
       factors[1] = n/f;
-      assert( factors[0] * factors[1] == n );
+      MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
       return 2;
     }
   }
@@ -372,7 +371,7 @@ int pbrent_factor(UV n, UV *factors, UV rounds)
   UV Xi = 2;
   UV Xm = 2;
 
-  assert( (n >= 3) && ((n%2) != 0) );
+  MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pbrent_factor");
 
   switch (n%4) {
     case 0:  a =  1; break;
@@ -388,7 +387,7 @@ int pbrent_factor(UV n, UV *factors, UV rounds)
     if ( (f != 1) && (f != n) ) {
       factors[0] = f;
       factors[1] = n/f;
-      assert( factors[0] * factors[1] == n );
+      MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
       return 2;
     }
     if ( (i & (i-1)) == 0)   /* i is a power of 2 */
@@ -410,7 +409,7 @@ int prho_factor(UV n, UV *factors, UV rounds)
   UV U = 7;
   UV V = 7;
 
-  assert( (n >= 3) && ((n%2) != 0) );
+  MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in prho_factor");
 
   switch (n%4) {
     case 0:  a =  5; break;
@@ -431,7 +430,7 @@ int prho_factor(UV n, UV *factors, UV rounds)
     } else if (f != 1) {
       factors[0] = f;
       factors[1] = n/f;
-      assert( factors[0] * factors[1] == n );
+      MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
       return 2;
     }
   }
@@ -449,7 +448,7 @@ int pminus1_factor(UV n, UV *factors, UV rounds)
   UV f, i;
   UV kf = 13;
 
-  assert( (n >= 3) && ((n%2) != 0) );
+  MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pminus1_factor");
 
   for (i = 1; i <= rounds; i++) {
     kf = powmod(kf, i, n);
@@ -458,7 +457,7 @@ int pminus1_factor(UV n, UV *factors, UV rounds)
     if ( (f != 1) && (f != n) ) {
       factors[0] = f;
       factors[1] = n/f;
-      assert( factors[0] * factors[1] == n );
+      MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
       return 2;
     }
   }
@@ -484,7 +483,7 @@ int squfof_factor(UV n, UV *factors, UV rounds)
   IV jter, iter;
   int reloop;
 
-  assert( (n >= 3) && ((n%2) != 0) );
+  MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in squfof_factor");
 
   /* TODO:  What value of n leads to overflow? */
 
@@ -593,7 +592,7 @@ int squfof_factor(UV n, UV *factors, UV rounds)
 
   factors[0] = p;
   factors[1] = q;
-  assert( factors[0] * factors[1] == n );
+  MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
   return 2;
 }
 
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index 70e93b9..357400e 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -18,6 +18,7 @@ our @EXPORT_OK = qw(
                      next_prime  prev_prime
                      prime_count prime_count_lower prime_count_upper prime_count_approx
                      nth_prime nth_prime_lower nth_prime_upper nth_prime_approx
+                     random_prime random_ndigit_prime
                      factor
                    );
 our %EXPORT_TAGS = (all => [ @EXPORT_OK ]);
@@ -35,7 +36,8 @@ BEGIN {
 }
 
 
-my $_maxparam = (_maxbits == 32) ? 4294967295 : 18446744073709551615;
+my $_maxparam  = (_maxbits == 32) ? 4294967295 : 18446744073709551615;
+my $_maxdigits = (_maxbits == 32) ? 10 : 20;
 
 sub primes {
   my $optref = {};  $optref = shift if ref $_[0] eq 'HASH';
@@ -95,6 +97,63 @@ sub primes {
   return $sref;
 }
 
+sub random_prime {
+  my $low = (@_ == 2)  ?  shift  :  2;
+  my $high = shift;
+  if ( (!defined $low) || (!defined $high) ||
+       ($low =~ tr/0123456789//c) || ($high =~ tr/0123456789//c)
+     ) {
+    croak "Parameters [ $low $high ] must be positive integers";
+  }
+  croak "Parameters [ $low $high ] not in range 0-$_maxparam" unless $low <= $_maxparam && $high <= $_maxparam;
+  $low = 2 if $low < 2;
+
+  # Make sure we have a valid range.
+  $low = next_prime($low-1);
+  $high = ($high < ~0) ? prev_prime($high+1) : prev_prime($high);
+  return $low if ($low == $high) && is_prime($low);
+  return if $low >= $high;
+
+  # At this point low and high are both primes, and low < high.
+  my $range = $high - $low + 1;
+  my $prime;
+
+  if ($high < 30000) {
+    # nice deterministic solution, but gets very costly with large values.
+    my $li = ($low == 2) ? 1 : prime_count($low);
+    my $hi = prime_count($high);
+    my $irange = $hi - $li + 1;
+    my $rand = int(rand($irange));
+    $prime = nth_prime($li + $rand);
+  } else {
+    # random loop
+    if ($range <= 4294967295) {
+      do {
+        $prime = $low + int(rand($range));
+      }  while ( !($prime%2) && !($prime%3) && !is_prime($prime) );
+    } else {
+      do {
+        my $rand = ( (int(rand(4294967295)) << 32) + int(rand(4294967295)) ) % $range;
+        $prime = $low + $rand;
+      }  while ( !($prime%2) && !($prime%3) && !is_prime($prime) );
+    }
+  }
+  return $prime;
+}
+
+sub random_ndigit_prime {
+  my $digits = shift;
+  if ((!defined $digits) || ($digits > $_maxdigits) || ($digits < 1)) {
+    croak "Digits must be between 1 and $_maxdigits";
+  }
+  my $low = ($digits == 1) ? 0 : int(10 ** ($digits-1));
+  my $max = int(10 ** $digits);
+  $max = ~0 if $max > ~0;
+  return random_prime($low, $max);
+}
+
+# Perhaps a random_nbit_prime ?   Definition?
+
 # We use this object to let them control when memory is freed.
 package Math::Prime::Util::MemFree;
 use Carp qw/croak confess/;
@@ -199,6 +258,12 @@ Version 0.04
   my $mf = Math::Prime::Util::MemFree->new;
 
 
+  # Random primes
+  my $small_prime = random_prime(1000);      # random prime <= limit
+  my $rand_prime = random_prime(100, 10000); # random prime within a range
+  my $rand_prime = random_ndigit_prime(6);   # random 6-digit prime
+
+
 =head1 DESCRIPTION
 
 A set of utilities related to prime numbers.  These include multiple sieving
@@ -393,6 +458,38 @@ then always be 0 (composite) or 2 (prime).  A later implementation may switch
 to a BPSW test, depending on speed.
 
 
+=head2 random_prime
+
+  my $small_prime = random_prime(1000);      # random prime <= limit
+  my $rand_prime = random_prime(100, 10000); # random prime within a range
+
+Returns a randomly selected prime that will be greater than or equal to the
+lower limit and less than or equal to the upper limit.  If no lower limit is
+given, 2 is implied.  Returns undef if no primes exist within the range.
+
+This will return a uniform distribution of the primes in the range, meaning
+for each prime in the range, the chances are equally likely that it will be
+seen.
+
+The current algorithm does a random index selection for small numbers, which
+is deterministic.  For larger numbers, this can be very slow, so the
+obvious Monte Carlo method is used, where random numbers in the range are
+selected until one is prime.
+
+The L<rand> function is called one or more times for selection.
+
+
+=head2 random_ndigit_prime
+
+  say "My 4-digit prime number is: ", random_ndigit_prime(4);
+
+Selects a random n-digit prime, where the input is an integer number of
+digits between 1 and the maximum native type (10 for 32-bit, 20 for 64-bit).
+One of the primes within that range (e.g. 1000 - 9999 for 4-digits) will be
+uniformly selected using the L<rand> function.
+
+
+
 =head1 UTILITY FUNCTIONS
 
 =head2 prime_precalc
diff --git a/ptypes.h b/ptypes.h
index 32887e8..cd4a2e2 100644
--- a/ptypes.h
+++ b/ptypes.h
@@ -41,4 +41,6 @@
 #define NWORDS(bits)  ( ((bits)+BITS_PER_WORD-1) / BITS_PER_WORD )
 #define NBYTES(bits)  ( ((bits)+8-1) / 8 )
 
+#define MPUassert(c,text) if (!(c)) { croak("Math::Prime::Util internal error: " text); }
+
 #endif
diff --git a/sieve.c b/sieve.c
index 0598c9a..e5003d3 100644
--- a/sieve.c
+++ b/sieve.c
@@ -1,7 +1,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <assert.h>
 #include <limits.h>
 #include <math.h>
 
@@ -135,7 +134,7 @@ unsigned char* sieve_erat30(UV end)
     if ( d < max_buf )  mem[d++] = 0x08;
     if ( d < max_buf )  mem[d++] = 0x04;
     if ( d < max_buf )  mem[d++] = 0x40;
-    assert(d >= max_buf);
+    MPUassert( d >= max_buf, "sieve_erat30 incorrect 7 sieve");
   }
   limit = sqrt((double) end);  /* prime*prime can overflow */
   for (prime = 11; prime <= limit; prime = next_prime_in_sieve(mem,prime)) {
@@ -188,9 +187,8 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
   UV endp = (endd >= (UV_MAX/30))  ?  UV_MAX-2  :  30*endd+29;
   UV ranged = endd - startd + 1;
 
-  assert(mem != 0);
-  assert(endd >= startd);
-  assert(endp >= startp);
+  MPUassert( (mem != 0) && (endd >= startd) && (endp >= startp),
+             "sieve_segment bad arguments");
   memset(mem, 0, ranged);
 
   limit = sqrt( (double) endp ) + 1;
diff --git a/util.c b/util.c
index 71607b4..9b47e74 100644
--- a/util.c
+++ b/util.c
@@ -1,7 +1,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <assert.h>
 #include <limits.h>
 #include <math.h>
 
@@ -516,8 +515,9 @@ UV nth_prime_upper(UV n)
   else
     upper = fn * ( flogn + flog2n );
 
+  /* Special case to not overflow any 32-bit */
   if (upper > (double)UV_MAX)
-    return 0;
+    return (n <= UVCONST(203280221))  ?  UVCONST(4294967292)  :  0;
 
   return (UV) ceil(upper);
 }
@@ -581,8 +581,8 @@ static UV count_segment(const unsigned char* sieve, UV nbytes, UV maxcount, UV*
   UV count = 0;
   UV byte = 0;
 
-  assert(sieve != 0);
-  assert(pos != 0);
+  MPUassert(sieve != 0, "count_segment incorrect args");
+  MPUassert(pos != 0, "count_segment incorrect args");
   *pos = 0;
   if ( (nbytes == 0) || (maxcount == 0) )
     return 0;
@@ -594,7 +594,7 @@ static UV count_segment(const unsigned char* sieve, UV nbytes, UV maxcount, UV*
     count -= byte_zeros[sieve[--byte]];
   }
 
-  assert(count < maxcount);
+  MPUassert(count < maxcount, "count_segment wrong count");
 
   if (byte == nbytes)
     return count;
@@ -604,7 +604,7 @@ static UV count_segment(const unsigned char* sieve, UV nbytes, UV maxcount, UV*
     if (++count == maxcount)  { *pos = p; return count; }
   END_DO_FOR_EACH_SIEVE_PRIME;
 
-  croak("count_segment failed");
+  MPUassert(0, "count_segment failure");
   return 0;
 }
 
@@ -667,6 +667,6 @@ UV nth_prime(UV n)
     if (count < target)
       segbase += segment_size;
   }
-  assert(count == target);
+  MPUassert(count == target, "nth_prime got incorrect count");
   return ( (segbase*30) + p );
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git