[libmath-prime-util-perl] 02/04: Updates for 32-bit behavior

Thu May 21 18:44:14 UTC 2015

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.02
in repository libmath-prime-util-perl.

commit 1bfc4ead9a36a415c3f02fc93b3f1b62b94586be
Author: Dana Jacobsen <dana at acm.org>
Date:   Mon Jun 4 17:29:15 2012 -0600

    Updates for 32-bit behavior
---
 Changes                |  8 ++++++++
 README                 |  2 +-
 TODO                   |  3 +++
 XS.xs                  | 42 +++++++++++++++++++++++++-----------------
 factor.c               | 23 +++++++++++++++++------
 lib/Math/Prime/Util.pm | 34 ++++++++++++++++++++--------------
 sieve.c                | 22 ++++++++++++----------
 t/03-init.t            |  4 +++-
 t/50-factoring.t       | 15 ++++++++++++++-
 util.c                 |  6 +++---
 10 files changed, 106 insertions(+), 53 deletions(-)

diff --git a/Changes b/Changes
index 1d2347f..ce9ee8b 100644
--- a/Changes
+++ b/Changes
@@ -1,4 +1,12 @@
 Revision history for Perl extension Math::Prime::Util.
 
+0.02  4 June 2012
+    - Back off new_ok to new/isa_ok to keep Test::More requirements low.
+    - Some documentation updates.
+    - I accidently used long in SQUFOF, which breaks LLP64.
+    - Test for broken 64-bit Perl.
+    - Fix overflow issues in segmented sieving.
+    - Switch to using UVuf for croaks.  What I should have done all along.
+
 0.01  4 June 2012
     - Initial release
diff --git a/README b/README
index 6ad8514..933f139 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-Math::Prime::Util version 0.01
+Math::Prime::Util version 0.02
 
 A set of utilities related to prime numbers.  These include multiple sieving
 methods, is_prime, prime_count, nth_prime, approximations and bounds for
diff --git a/TODO b/TODO
index 4180b9f..a227713 100644
--- a/TODO
+++ b/TODO
@@ -5,6 +5,9 @@
 
 - prime_count and nth_prime with very large inputs.
 
+- segment sieve should itself use a segment for its primes.
+  Today we'd need sqrt(2^64) max = 140MB.  Segmenting would yield under 1MB.
+
 - factoring (Fermat, Pollard Rho, HOLF, etc.)
 
 - Add test to check maxbits in compiled library vs. Perl
diff --git a/XS.xs b/XS.xs
index f91b9da..b188633 100644
--- a/XS.xs
+++ b/XS.xs
@@ -86,7 +86,7 @@ sieve_primes(IN UV low, IN UV high)
   CODE:
     if (low <= high) {
       if (get_prime_cache(high, &sieve) < high) {
-        croak("Could not generate sieve for %ld", high);
+        croak("Could not generate sieve for %"UVuf, high);
       } else {
         if ((low <= 2) && (high >= 2)) { av_push(av, newSVuv( 2 )); }
         if ((low <= 3) && (high >= 3)) { av_push(av, newSVuv( 3 )); }
@@ -125,6 +125,7 @@ segment_primes(IN UV low, IN UV high, IN UV segment_size = 65536UL)
   PREINIT:
     AV* av = newAV();
     unsigned char* sieve;
+    UV low_d, high_d;
   CODE:
     if ((low <= 2) && (high >= 2)) { av_push(av, newSVuv( 2 )); }
     if ((low <= 3) && (high >= 3)) { av_push(av, newSVuv( 3 )); }
@@ -134,27 +135,34 @@ segment_primes(IN UV low, IN UV high, IN UV segment_size = 65536UL)
       /* Call the segment siever one or more times */
       sieve = (unsigned char*) malloc( segment_size );
       if (sieve == 0)
-        croak("Could not allocate %lu bytes for segment sieve", segment_size);
-      while (low <= high) {
-        UV seghigh = ((high/30 - low/30) < segment_size)
-                          ?  high
-                          :  ( (low/30 + segment_size-1)*30+29 );
-        UV startd = low/30;
-        UV endd = seghigh/30;
-        UV ranged = endd - startd + 1;
-        assert(endd >= startd);
-        assert(ranged <= segment_size);
+        croak("Could not allocate %"UVuf" bytes for segment sieve", segment_size);
+
+      /* To protect vs. overflow, work entirely with d. */
+      low_d  = low  / 30;
+      high_d = high / 30;
+      while ( low_d <= high_d ) {
+        UV seghigh_d = ((high_d - low_d) < segment_size)
+                       ? high_d
+                       : (low_d + segment_size-1);
+        UV range_d = seghigh_d - low_d + 1;
+        UV seghigh = (seghigh_d == high_d) ? high : (seghigh_d*30+29);
+        UV segbase = low_d * 30;
+        /* printf("  startd = %"UVuf"  endd = %"UVuf"\n", startd, endd); */
+  
+        assert( seghigh_d >= low_d );
+        assert( range_d <= segment_size );
 
         /* Sieve from startd*30+1 to endd*30+29.  */
-        if (sieve_segment(sieve, startd, endd) == 0) {
-          croak("Could not segment sieve from %lu to %lu", startd*30+1, endd*30+29);
+        if (sieve_segment(sieve, low_d, seghigh_d) == 0) {
+          croak("Could not segment sieve from %"UVuf" to %"UVuf, segbase+1, seghigh);
           break;
         }
 
-        START_DO_FOR_EACH_SIEVE_PRIME( sieve, low-startd*30, seghigh-30*startd )
-          av_push(av,newSVuv( startd*30 + p ));
+        START_DO_FOR_EACH_SIEVE_PRIME( sieve, low - segbase, seghigh - segbase )
+          av_push(av,newSVuv( segbase + p ));
         END_DO_FOR_EACH_SIEVE_PRIME
 
+        low_d += range_d;
         low = seghigh+2;
       }
       free(sieve);
@@ -172,7 +180,7 @@ erat_primes(IN UV low, IN UV high)
     if (low <= high) {
       sieve = sieve_erat30(high);
       if (sieve == 0) {
-        croak("Could not generate sieve for %lu", high);
+        croak("Could not generate sieve for %"UVuf, high);
       } else {
         if ((low <= 2) && (high >= 2)) { av_push(av, newSVuv( 2 )); }
         if ((low <= 3) && (high >= 3)) { av_push(av, newSVuv( 3 )); }
@@ -199,7 +207,7 @@ erat_simple_primes(IN UV low, IN UV high)
     if (low <= high) {
       sieve = sieve_erat(high);
       if (sieve == 0) {
-        croak("Could not generate sieve for %ld", high);
+        croak("Could not generate sieve for %"UVuf, high);
       } else {
         if (low <= 2) { av_push(av, newSVuv( 2 )); low = 3; }
         low  = low/2;
diff --git a/factor.c b/factor.c
index 4957642..9ed9c11 100644
--- a/factor.c
+++ b/factor.c
@@ -9,6 +9,17 @@
 #include "util.h"
 #include "sieve.h"
 
+/*
+ * You need to remember to use UV for unsigned and IV for signed types that
+ * are large enough to hold our data.
+ *   If you use int, that's 32-bit on LP64 and LLP64 machines.  You lose.
+ *   If you use long, that's 32-bit on LLP64 machines.  You lose.
+ *   If you use long long, you may be too large which isn't so bad, but some
+ *                         compilers may not understand the type at all.
+ * perl.h already figured all this out, and provided us with these types which
+ * match the native integer type used inside our Perl, so just use those.
+ */
+
 
 int trial_factor(UV n, UV *factors, UV maxtrial)
 {
@@ -278,9 +289,9 @@ int pminus1_factor(UV n, UV *factors, UV rounds)
 
 /* My modification of Ben Buhrow's modification of Bob Silverman's SQUFOF code.
  * I like Jason P's code a lot -- I should put it in. */
-static long qqueue[100];
-static long qpoint;
-static void enqu(long q, long *iter) {
+static IV qqueue[100];
+static IV qpoint;
+static void enqu(IV q, IV *iter) {
   qqueue[qpoint] = q;
   if (++qpoint >= 100) *iter = -1;
 }
@@ -289,8 +300,8 @@ int squfof_factor(UV n, UV *factors, UV rounds)
 {
   int nfactors = 0;
   UV temp;
-  long iq,ll,l2,p,pnext,q,qlast,r,s,t,i;
-  long jter, iter;
+  IV iq,ll,l2,p,pnext,q,qlast,r,s,t,i;
+  IV jter, iter;
   int reloop;
 
   if ( (n < 2) ) {
@@ -320,7 +331,7 @@ int squfof_factor(UV n, UV *factors, UV rounds)
   }
 
   q = temp;              /* q = excess of n over next smaller square */
-  ll = 1 + 2*(long)sqrt((double)(p+p));
+  ll = 1 + 2*(IV)sqrt((double)(p+p));
   l2 = ll/2;
   qpoint = 0;
 
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index 8087970..bfdd673 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -5,7 +5,7 @@ use Carp qw/croak confess/;
 
 BEGIN {
   $Math::Prime::Util::AUTHORITY = 'cpan:DANAJ';
-  $Math::Prime::Util::VERSION = '0.01';
+  $Math::Prime::Util::VERSION = '0.02';
 }
 
 # parent is cleaner, and in the Perl 5.10.1 / 5.12.0 core, but not earlier.
@@ -34,6 +34,8 @@ BEGIN {
 }
 
 
+my $_maxparam = (_maxbits == 32) ? 4294967295 : 18446744073709551615;
+
 sub primes {
   my $optref = {};  $optref = shift if ref $_[0] eq 'HASH';
   croak "no parameters to primes" unless scalar @_ > 0;
@@ -44,11 +46,14 @@ sub primes {
 
   # Validate parameters
   if ( (!defined $low) || (!defined $high) ||
-       #($low < 0) || ($high < 0) ||
        ($low =~ tr/0123456789//c) || ($high =~ tr/0123456789//c)
      ) {
-    croak "Parameters must be positive integers";
+    croak "Parameters [ $low $high ] must be positive integers";
   }
+
+  # Verify the parameters are in range.
+  croak "Parameters [ $low $high ] not in range 0-$_maxparam" unless $low <= $_maxparam && $high <= $_maxparam;
+
   return $sref if ($low > $high) || ($high < 2);
 
   my $method = $optref->{'method'};
@@ -126,7 +131,7 @@ Math::Prime::Util - Utilities related to prime numbers, including fast generator
 
 =head1 VERSION
 
-Version 0.01
+Version 0.02
 
 
 =head1 SYNOPSIS
@@ -356,9 +361,8 @@ q are the same number of digits), then this will be very fast.
   my @factors = squfof_factor($n);
 
 Produces factors, not necessarily prime, of the positive number input.  It
-is possible the factorization will fail, in which case you will need to use
-another method.  Failure in this case means a single factor is returned that
-is not prime.
+is possible the function will be unable to find a factor, in which case a
+single factor (the input) is returned.
 
 =head2 prho_factor
 
@@ -376,10 +380,10 @@ finding a factor or very large inputs in remarkably short time, similar to
 how Fermat's method works very well for factors near C<sqrt(n)>.  They are
 also amenable to massively parallel searching.
 
-For 64-bit input, these are unlikely to be of too much use.  An optimized
-SQUFOF implementation takes under 20 milliseconds to find a factor for any
-62-bit input on modern desktop computers.  Lightweight quadratic sieves
-are typically much faster for inputs in the 19+ digit range.
+For 64-bit input, these are unlikely to be of much use.  An optimized SQUFOF
+implementation takes under 20 milliseconds to find a factor for any 62-bit
+input on modern desktop computers.  Lightweight quadratic sieves are
+typically much faster for inputs in the 19+ digit range.
 
 
 =head1 LIMITATIONS
@@ -391,9 +395,11 @@ called with very large numbers (C<10^11> and up).
 I have not completed testing all the functions near the word size limit
 (e.g. C<2^32> for 32-bit machines).  Please report any problems you find.
 
-The factoring algorithms are mildly interesting but really limited by not
-being big number aware.  Factoring 64-bit numbers is not much of a challenge
-these days.
+The extra factoring algorithms are mildly interesting but really limited by
+not being big number aware.
+
+Perl versions earlier than 5.8.0 have issues with 64-bit.  The test suite will
+try to determine if your Perl is broken.  This will show up in factoring tests.
 
 
 =head1 PERFORMANCE
diff --git a/sieve.c b/sieve.c
index b3765ef..064926b 100644
--- a/sieve.c
+++ b/sieve.c
@@ -78,12 +78,12 @@ void prime_free(void)
 UV* sieve_erat(UV end)
 {
   UV* mem;
-  size_t n, s;
-  size_t last = (end+1)/2;
+  UV n, s;
+  UV last = (end+1)/2;
 
   mem = (UV*) calloc( NWORDS(last), sizeof(UV) );
   if (mem == 0) {
-    croak("allocation failure in sieve_erat: could not alloc %lu bits", (unsigned long)last);
+    croak("allocation failure in sieve_erat: could not alloc %"UVuf" bits", last);
     return 0;
   }
 
@@ -105,14 +105,14 @@ UV* sieve_erat(UV end)
 unsigned char* sieve_erat30(UV end)
 {
   unsigned char* mem;
-  size_t max_buf, buffer_words, limit;
+  UV max_buf, buffer_words, limit;
   UV prime;
 
   max_buf = (end/30) + ((end%30) != 0);
   buffer_words = (max_buf + sizeof(UV) - 1) / sizeof(UV);
   mem = (unsigned char*) calloc( buffer_words, sizeof(UV) );
   if (mem == 0) {
-    croak("allocation failure in sieve_erat30: could not alloc %lu bytes", (unsigned long)(buffer_words*sizeof(UV)));
+    croak("allocation failure in sieve_erat30: could not alloc %"UVuf" bytes", (buffer_words*sizeof(UV)));
     return 0;
   }
 
@@ -179,15 +179,16 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
   UV limit;
   UV pcsize;
   UV startp = 30*startd;
-  UV endp = 30*endd+29;
+  UV endp = (endd >= (UV_MAX/30))  ?  UV_MAX-2  :  30*endd+29;
   UV ranged = endd - startd + 1;
 
   assert(mem != 0);
   assert(endd >= startd);
+  assert(endp >= startp);
   memset(mem, 0, ranged);
 
   limit = sqrt( (double) endp ) + 1;
-  /* printf("segment sieve from %lu to %lu (aux sieve to %lu)\n", startp, endp, limit); */
+  /* printf("segment sieve from %"UVuf" to %"UVuf" (aux sieve to %"UVuf")\n", startp, endp, limit); */
   pcsize = get_prime_cache(limit, &sieve);
   if (pcsize < limit) {
     croak("Couldn't generate small sieve for segment sieve");
@@ -198,7 +199,7 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
   {
     /* p increments from 7 to at least sqrt(endp) */
     UV p2 = p*p;   /* TODO: overflow */
-    if (p2 >= endp)  break;
+    if (p2 > endp)  break;
     /* Find first multiple of p greater than p*p and larger than startp */
     if (p2 < startp) {
       p2 = (startp / p) * p;
@@ -206,14 +207,15 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd)
     }
     /* Bump to next multiple that isn't divisible by 2, 3, or 5 */
     while (masktab30[p2%30] == 0) { p2 += p; }
-    if (p2 < endp)  {
+    /* It is possible we've overflowed p2, so check for that */
+    if ( (p2 <= endp) && (p2 >= startp) ) {
       /* Sieve from startd to endd starting at p2, stepping p */
 #if 0
       /* Basic sieve */
       do {
         mem[(p2 - startp)/30] |= masktab30[p2%30];
         do { p2 += 2*p; } while (masktab30[p2%30] == 0);
-      } while (p2 < endp);
+      } while ( (p2 <= endp) && (p2 >= startp) );
 #else
       UV d = (p2)/30;
       UV m = (p2) - d*30;
diff --git a/t/03-init.t b/t/03-init.t
index 17690db..feb3940 100644
--- a/t/03-init.t
+++ b/t/03-init.t
@@ -26,7 +26,9 @@ is( Math::Prime::Util::_get_prime_cache_size, $init_size, "Internal space went b
 
 # Now do the object way.
 {
-  my $mf = new_ok( 'Math::Prime::Util::MemFree');
+  #my $mf = new_ok( 'Math::Prime::Util::MemFree');  # Better 0.88+ way
+  my $mf = Math::Prime::Util::MemFree->new;
+  isa_ok $mf, 'Math::Prime::Util::MemFree';
   prime_precalc($bigsize);
   cmp_ok( Math::Prime::Util::_get_prime_cache_size, '>', $init_size, "Internal space grew after large precalc" );
 }
diff --git a/t/50-factoring.t b/t/50-factoring.t
index da2bf28..d7f9de7 100644
--- a/t/50-factoring.t
+++ b/t/50-factoring.t
@@ -25,7 +25,20 @@ push @testn, @testn64 if $use64;
 push @testn, qw/9999986200004761 99999989237606677 999999866000004473/
       if $use64 && $extra;
 
-plan tests => 2 * scalar @testn;
+plan tests =>  (2 * scalar @testn) + ($use64 ? 1 : 0);
+
+if ($use64) {
+  # Simple test:  perl -e 'die if 18446744073709550592 == ~0'
+  my $broken = (18446744073709550592 == ~0);
+  if ($broken) {
+    if ($] < 5.008) {
+      diag "Perl pre-5.8.0 has broken 64-bit.  Expect failures.";
+    } else {
+      diag "Eek!  Your 64-bit Perl $] is **** BROKEN ****.  Expect failures.";
+    }
+  }
+  ok( !$broken, "64-bit isn't obviously broken" );
+}
 
 foreach my $n (@testn) {
   my @f = factor($n);
diff --git a/util.c b/util.c
index 3947ddd..cd99333 100644
--- a/util.c
+++ b/util.c
@@ -469,12 +469,12 @@ UV nth_prime(UV n)
 
   upper_limit = nth_prime_upper(n);
   if (upper_limit == 0) {
-    croak("nth_prime(%lu) would overflow", (unsigned long)n);
+    croak("nth_prime(%"UVuf") would overflow", n);
     return 0;
   }
   /* The nth prime is guaranteed to be within this range */
   if (get_prime_cache(upper_limit, &sieve) < upper_limit) {
-    croak("Couldn't generate sieve for nth(%lu) [sieve to %lu]", (unsigned long)n, (unsigned long)upper_limit);
+    croak("Couldn't generate sieve for nth(%"UVuf") [sieve to %"UVuf"]", n, upper_limit);
     return 0;
   }
 
@@ -495,6 +495,6 @@ UV nth_prime(UV n)
   START_DO_FOR_EACH_SIEVE_PRIME(sieve, start, upper_limit)
     if (++count == n)  return p;
   END_DO_FOR_EACH_SIEVE_PRIME;
-  croak("nth_prime failed for %lu, not found in range %lu - %lu", (unsigned long)n, (unsigned long) start, (unsigned long)upper_limit);
+  croak("nth_prime failed for %"UVuf", not found in range %"UVuf" - %"UVuf, n, start, upper_limit);
   return 0;
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git