[libmath-prime-util-perl] 06/11: Update primearray and add a simple iterator
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:48:32 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.28
in repository libmath-prime-util-perl.
commit ed60b06a4e197d8150e5f2a764f1a50992d1afa4
Author: Dana Jacobsen <dana at acm.org>
Date: Tue May 21 17:17:36 2013 -0700
Update primearray and add a simple iterator
---
TODO | 5 ++
examples/bench-primearray.pl | 120 +++++++++++++++++++++++++++++++
lib/Math/Prime/Util.pm | 16 ++++-
lib/Math/Prime/Util/PrimeArray.pm | 146 ++++++++++++++++++++++++--------------
4 files changed, 231 insertions(+), 56 deletions(-)
diff --git a/TODO b/TODO
index f716733..b2bce1a 100644
--- a/TODO
+++ b/TODO
@@ -45,6 +45,11 @@
- Tests
- Examples
+- prime_iterator
+ - Documentation
+ - Tests
+ - Examples
+
- Make forprimes use a segment sieve
- Figure out a way to make the internal FOR_EACH_PRIME macros use a segmented
diff --git a/examples/bench-primearray.pl b/examples/bench-primearray.pl
new file mode 100755
index 0000000..bc2d402
--- /dev/null
+++ b/examples/bench-primearray.pl
@@ -0,0 +1,120 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Math::Prime::Util qw/:all/;
+use Math::Prime::Util::PrimeArray qw/make_prime_iterator/;
+use Math::NumSeq::Primes;
+use Math::Prime::TiedArray;
+use Benchmark qw/:all/;
+use List::Util qw/min max/;
+my $count = shift || -2;
+
+my ($s, $nlimit, $ilimit, $expect);
+
+if (1) {
+print "summation to 100k\n";
+$nlimit = 100000;
+$ilimit = prime_count($nlimit)-1;
+$expect = 0; forprimes { $expect += $_ } $nlimit;
+
+cmpthese($count,{
+ 'primes' => sub { $s=0; $s += $_ for @{primes($nlimit)}; die unless $s == $expect; },
+ 'forprimes' => sub { $s=0; forprimes { $s += $_ } $nlimit; die unless $s == $expect; },
+ 'iterator' => sub { $s=0; my $it = prime_iterator();
+ $s += $it->() for 0..$ilimit;
+ die unless $s == $expect; },
+ 'pa iter' => sub { $s=0; my $it = make_prime_iterator();
+ $s += $it->() for 0..$ilimit;
+ die unless $s == $expect; },
+ 'pa index' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ $s += $primes[$_] for 0..$ilimit;
+ die unless $s == $expect; },
+ 'pa loop' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ for (@primes) { last if $_ > $nlimit; $s += $_; }
+ die $s unless $s == $expect; },
+ 'pa slice' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ $s += $_ for @primes[0..$ilimit];
+ die unless $s == $expect; },
+ 'pa each' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ while(my(undef,$v) = each @primes) { last if $v > $nlimit; $s += $v; }
+ die $s unless $s == $expect; },
+ 'pa shift' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ while ((my $p = shift @primes) <= $nlimit) { $s += $p; }
+ die unless $s == $expect; },
+ 'numseq' => sub { $s=0; my $seq = Math::NumSeq::Primes->new;
+ while (1) { my($undev,$v) = $seq->next; last if $v > $nlimit; $s += $v; }
+ die $s unless $s == $expect; },
+ # This was slightly faster than slice or shift
+ 'tiedarray' => sub { $s=0; tie my @primes, "Math::Prime::TiedArray", extend_step => 1000;
+ $s += $primes[$_] for 0..$ilimit;
+ die unless $s == $expect; },
+});
+}
+
+if (0) {
+print "summation to 10M\n";
+print "Skipping Math::Prime::TiedArray as it will take too long\n";
+$nlimit = 10_000_000;
+$ilimit = prime_count($nlimit)-1;
+$expect = 0; forprimes { $expect += $_ } $nlimit;
+
+cmpthese($count,{
+ 'primes' => sub { $s=0; $s += $_ for @{primes($nlimit)}; die unless $s == $expect; },
+ 'forprimes' => sub { $s=0; forprimes { $s += $_ } $nlimit; die unless $s == $expect; },
+ 'pa index' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ $s += $primes[$_] for 0..$ilimit;
+ die unless $s == $expect; },
+ 'pa loop' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ for (@primes) { last if $_ > $nlimit; $s += $_; }
+ die $s unless $s == $expect; },
+ 'pa slice' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ $s += $_ for @primes[0..$ilimit];
+ die unless $s == $expect; },
+ 'pa each' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ while(my(undef,$v) = each @primes) { last if $v > $nlimit; $s += $v; }
+ die $s unless $s == $expect; },
+ 'pa shift' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ while ((my $p = shift @primes) <= $nlimit) { $s += $p; }
+ die unless $s == $expect; },
+ 'numseq' => sub { $s=0; my $seq = Math::NumSeq::Primes->new;
+ while (1) { my($undev,$v) = $seq->next; last if $v > $nlimit; $s += $v; }
+ die $s unless $s == $expect; },
+});
+}
+
+if (0) {
+print "Walk primes backwards from 1M\n";
+$nlimit = 1_000_000;
+$ilimit = prime_count($nlimit)-1;
+$expect = 0; forprimes { $expect += $_ } $nlimit;
+
+cmpthese($count,{
+ 'rev primes'=> sub { $s=0; $s += $_ for reverse @{primes($nlimit)}; die unless $s == $expect; },
+ 'nthprime' => sub { $s=0; $s += nth_prime($_) for reverse 1..$ilimit+1; die unless $s == $expect; },
+ 'pa index' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ $s += $primes[$_] for reverse 0..$ilimit;
+ die unless $s == $expect; },
+ 'tiedarray' => sub { $s=0; tie my @primes, "Math::Prime::TiedArray", extend_step => 1000;
+ $s += $primes[$_] for reverse 0..$ilimit;
+ die unless $s == $expect; },
+});
+}
+
+if (0) {
+print "Random walk in 1M\n";
+srand(29);
+my @rindex;
+do { push @rindex, int(rand(1000000)) } for 1..10000;
+$expect = 0; $expect += nth_prime($_+1) for @rindex;
+
+cmpthese($count,{
+ 'nthprime' => sub { $s=0; $s += nth_prime($_+1) for @rindex; },
+ 'pa index' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray";
+ $s += $primes[$_] for @rindex;
+ die unless $s == $expect; },
+ # Argh! Is it possible to write a slower sieve than the one MPTA uses?
+ #'tiedarray' => sub { $s=0; tie my @primes, "Math::Prime::TiedArray", extend_step => 10000;
+ # $s += $primes[$_] for @rindex;
+ # die unless $s == $expect; },
+});
+}
diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm
index 0dac169..1347132 100644
--- a/lib/Math/Prime/Util.pm
+++ b/lib/Math/Prime/Util.pm
@@ -21,7 +21,7 @@ our @EXPORT_OK =
is_aks_prime
miller_rabin
primes
- forprimes
+ forprimes prime_iterator
next_prime prev_prime
prime_count
prime_count_lower prime_count_upper prime_count_approx
@@ -1365,6 +1365,20 @@ sub _generic_forprimes (&$;$) {
}
}
+sub prime_iterator {
+ my($start) = @_;
+ my $p = (defined $start && $start > 0) ? $start-1 : 0;
+ if (ref($p) ne 'Math::BigInt' && $p <= $_XS_MAXVAL) {
+ return sub { $p = _XS_next_prime($p); return $p; };
+ } elsif ($_HAVE_GMP) {
+ return sub { my $next = Math::Prime::Util::GMP::next_prime($p);
+ $p = $p-$p+$next;
+ return $p; };
+ } else {
+ return sub { $p = Math::Prime::Util::PP::next_prime($p); return $p; }
+ }
+}
+
# Omega function A001221. Just an example.
sub _omega {
my($n) = @_;
diff --git a/lib/Math/Prime/Util/PrimeArray.pm b/lib/Math/Prime/Util/PrimeArray.pm
index 76d7adc..39fa38a 100644
--- a/lib/Math/Prime/Util/PrimeArray.pm
+++ b/lib/Math/Prime/Util/PrimeArray.pm
@@ -4,7 +4,7 @@ use warnings;
BEGIN {
$Math::Prime::Util::PrimeArray::AUTHORITY = 'cpan:DANAJ';
- $Math::Prime::Util::PrimeArray::VERSION = '0.14';
+ $Math::Prime::Util::PrimeArray::VERSION = '0.28';
}
# parent is cleaner, and in the Perl 5.10.1 / 5.12.0 core, but not earlier.
@@ -14,7 +14,7 @@ our @EXPORT_OK = qw( );
our %EXPORT_TAGS = (all => [ @EXPORT_OK ]);
-use Math::Prime::Util qw/nth_prime nth_prime_upper primes prime_precalc/;
+use Math::Prime::Util qw/nth_prime nth_prime_upper nth_prime_lower primes prime_precalc next_prime prev_prime/;
use Tie::Array;
use Carp qw/carp croak confess/;
@@ -59,27 +59,38 @@ sub FETCH {
if ( $index < $begidx || $index > $endidx ) {
- if ($index == $endidx+1) { $self->{ACCESS_TYPE}++; }
- elsif ($index == $begidx-1) { $self->{ACCESS_TYPE}--; }
- else { $self->{ACCESS_TYPE}=0; }
-
- if ($index == $endidx+1 && $self->{ACCESS_TYPE} > 2) { # Forward iter
-
- my $end_prime = nth_prime_upper($index + 10000);
- $self->{PRIMES} = primes( $self->{PRIMES}->[-1]+1, $end_prime );
-
- $begidx = $endidx+1;
-
- # TODO: backwards iter
-
- } else { # Looks like random access so far. Get a small range.
-
- my $start_idx = ($index < 500) ? 0 : $index - 500;
- my $start_prime = nth_prime($start_idx+1); # This is exact
- my $end_prime = nth_prime_upper($index+500); # This is a bound
- $self->{PRIMES} = primes($start_prime, $end_prime);
-
- $begidx = $start_idx;
+ if ($index == $endidx+1) { # Forward iteration
+
+ $self->{ACCESS_TYPE}++;
+ if ($self->{ACCESS_TYPE} > 2) {
+ my $end_prime = nth_prime_upper($index + 10_000);
+ $self->{PRIMES} = primes( $self->{PRIMES}->[-1]+1, $end_prime );
+ $begidx = $endidx+1;
+ } else {
+ $begidx = $index;
+ $self->{PRIMES} = [next_prime($self->{PRIMES}->[-1])];
+ }
+
+ } elsif ($index == $begidx-1) { # Backward iteration
+
+ $self->{ACCESS_TYPE}--;
+ if ($self->{ACCESS_TYPE} < -2) {
+ my $num = 10_000;
+ $begidx = ($index < $num) ? 0 : $index - $num;
+ my $start_prime = nth_prime($begidx+1);
+ my $end_prime = nth_prime_upper($index+500);
+ $self->{PRIMES} = primes($start_prime, $end_prime);
+ } else {
+ $begidx = $index;
+ $self->{PRIMES} = [prev_prime($self->{PRIMES}->[0])];
+ }
+
+ } else { # Random access
+
+ $self->{ACCESS_TYPE} = int($self->{ACCESS_TYPE} / 2);
+ # Alternately we could get a small window
+ $begidx = $index;
+ $self->{PRIMES} = [nth_prime($begidx+1)];
}
$self->{BEG_INDEX} = $begidx;
@@ -126,7 +137,7 @@ Math::Prime::Util::PrimeArray - A tied array for primes
=head1 VERSION
-Version 0.14
+Version 0.28
=head1 SYNOPSIS
@@ -134,7 +145,7 @@ Version 0.14
use Math::Prime::Util::PrimeArray;
# Create:
- my @primes; tie @primes, 'Math::Prime::Util::PrimeArray';
+ tie my @primes, 'Math::Prime::Util::PrimeArray';
# Use in a loop by index:
for my $n (1..10) {
@@ -169,11 +180,9 @@ An array that acts like the infinite set of primes. This may be more
convenient than using L<Math::Prime::Util> directly, and in some cases it can
be faster than calling C<next_prime> and C<prev_prime>.
-Internally when an index is accessed, an area surrounding the index is sieved
-if necessary, then the result returned. This means random access will be a
-little slower than using C<nth_prime> directly, but not by very much.
-Random access in a small window (1000 or so primes in either direction) will
-be very fast, as will sequential access in either direction.
+If the access pattern is ascending or decending, then a window is sieved and
+results returned from the window as needed. If the access pattern is random,
+then C<nth_prime> is used.
Shifting acts like the array is losing elements at the front, so after two
shifts, C<$primes[0] == 5>. Unshift will move the internal shift index back
@@ -189,52 +198,79 @@ Example:
say $primes[0]; # 5
unshift @primes, 2; # back up two
say $primes[0]; # 2
-
+
+If you prefer the iterator pattern, I would recommend using
+L<Math::Prime::Util/prime_iterator>. It will be faster than using this
+tied array, but of course you don't get random access. If you find yourself
+using the C<shift> operation, consider the iterator.
+
=head1 LIMITATIONS
The size of the array will always be shown as 2147483647 (IV32 max), even in
a 64-bit environment where primes through C<2^64> are available.
+There are some people that find the idea of shifting a prime array abhorrent,
+as after two shifts, "the second prime is 7?!". If this bothers you, do not
+use C<shift> on the tied array.
+
=head1 PERFORMANCE
+ MPU forprimes: forprimes { $sum += $_ } nth_prime(100_000);
+ MPU iterator: my $it = prime_iterator; $sum += $it->() for 1..100000;
+ MPU array: $sum += $_ for @{primes(nth_prime(100_000))};
+ MPUPA: tie my @primes, ...; $sum += $primes[$_] for 0..99999;
+ MNSP: my $seq = Math::NumSeq::Primes->new;
+ $sum += ($seq->next)[1] for 1..100000;
+ MPTA: tie my @primes, ...; $sum += $primes[$_] for 0..99999;
+
+Memory use is comparing the delta between just loading the module and running
+the test. Math::NumSeq v58, Math::Prime::TiedArray v0.04.
+
Summing the first 0.1M primes via walking the array:
- 40ms 3.3 MB $sum += $_ for @{primes(nth_prime(100_000))};
- 300ms 0.6 MB Math::Prime::Util::PrimeArray
- 230ms 2.2 MB Math::NumSeq::Primes
- 10300ms 36.2 MB Math::Primes::TiedArray (extend 1k)
+ 9ms 52k Math::Prime::Util forprimes
+ 150ms 0 Math::Prime::Util prime_iterator
+ 15ms 4400k Math::Prime::Util sum big array
+ 220ms 840k Math::Prime::Util::PrimeArray
+ 130ms 280k Math::NumSeq::Primes sequence iterator
+ 33000ms 65 MB Math::Prime::TiedArray (extend 1k)
Summing the first 1M primes via walking the array:
- 0.3s 35.5 MB $sum += $_ for @{primes(nth_prime(1_000_000))};
- 3.9s 1.3 MB Math::Prime::Util::PrimeArray
- 9.6s 2.2 MB Math::NumSeq::Primes
- 146.7s 444.9 MB Math::Primes::TiedArray (extend 1k)
+ 0.1s 300k Math::Prime::Util forprimes
+ 1.9s 0 Math::Prime::Util prime_iterator
+ 0.2s 40 MB Math::Prime::Util sum big array
+ 2.2s 1.1MB Math::Prime::Util::PrimeArray
+ 7.1s 1.2MB Math::NumSeq::Primes sequence iterator
+ 122.1s 785 MB Math::Prime::TiedArray (extend 1k)
Summing the first 10M primes via walking the array:
- 4s 365 MB $sum += $_ for @{primes(nth_prime(10_000_000))};
- 2m 2 MB Math::Prime::Util::PrimeArray
- 85m 2 MB Math::NumSeq::Primes
- >5000 MB Math::Primes::TiedArray (extend 1k)
-
-Using L<Math::Prime::Util> directly in a naive fashion uses lots of memory,
-but is extremely fast. Sieving segments at a time would control the memory
-use, which is one thing the C<PrimeArray> tie is trying to do for you (but
-adds more inefficiency than is ideal).
+ 0.8s 5.9MB Math::Prime::Util forprimes
+ 23.1s 0 Math::Prime::Util prime_iterator
+ 1.6s 368 MB Math::Prime::Util sum big array
+ 21.2s 1.2MB Math::Prime::Util::PrimeArray
+ 3680 s 11.1MB Math::NumSeq::Primes sequence iterator
+ >5000 MB Math::Primes::TiedArray (extend 1k)
+
+L<Math::Prime::Util> offers three obvious solutions: a big array, an iterator,
+and the C<forprimes> construct. The big array is fast but uses a B<lot> of
+memory, forcing the user to start programming segments. Using the iterator
+avoids all the memory use, but isn't as fast (this may improve in a later
+release, as this is a new feature). The C<forprimes> construct is by far
+the fastest, but it isn't quite as flexible as the iterator (most notably
+there is no way to exit early, and it doesn't lend itself to wrapping inside
+a filter).
L<Math::NumSeq::Primes> offers an iterator alternative, and works quite well
-for reasonably small numbers. It does not, however, support random access.
-There seems to be a 2MB fixed overhead, but after that the memory use is
-is well constrained. It is very fast for small values, but clearly is
-getting slower as we sum to 1 million, and takes well over an hour to count
-to 10 million.
+for reasonably small numbers. It does not support random access. It is
+very fast for small values, but is very slow with large counts.
L<Math::Primes::TiedArray> is remarkably impractical for anything other
-than very small numbers. I believe the times and memory use in the above
-tables illustrate this.
+than very small numbers. The sieve used is incredibly slow, and the memory
+use is crazy.
=head1 SEE ALSO
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list