[dh-make-perl] 01/02: when parsing in parallel, reorder the files in such a way so that the load is evenly distributed
Damyan Ivanov
dmn at moszumanska.debian.org
Tue Dec 5 21:25:41 UTC 2017
This is an automated email from the git hooks/post-receive script.
dmn pushed a commit to branch master
in repository dh-make-perl.
commit e58cfe810212e8c84f22ce01856a59b3743d3052
Author: Damyan Ivanov <dmn at debian.org>
Date: Tue Dec 5 21:13:07 2017 +0000
when parsing in parallel, reorder the files in such a way so that the load is evenly distributed
---
lib/Debian/AptContents.pm | 65 +++++++++++++++++++++++++++++++++++++----------
1 file changed, 51 insertions(+), 14 deletions(-)
diff --git a/lib/Debian/AptContents.pm b/lib/Debian/AptContents.pm
index d9fdc08..443bc6e 100644
--- a/lib/Debian/AptContents.pm
+++ b/lib/Debian/AptContents.pm
@@ -177,6 +177,46 @@ invocation.
=cut
+# distribute files so that the load (measured by file size) is distributed
+# more or less equally
+sub _distribute_files {
+ my ( $self, $files, $cpus ) = @_;
+
+ return $files unless $cpus > 1;
+
+ my @data = map( { file => $_, size => -s $_ }, @$files );
+ @data = sort { $b->{size} <=> $a->{size} } @data;
+
+ my ( @slots, @slot_load );
+ for(1..$cpus) {
+ push @slots, [];
+ push @slot_load, 0;
+ }
+
+ for my $item (@data) {
+ my $lightest_slot = 0;
+ my $lightest_slot_load = $slot_load[0];
+ for my $slot ( 1 .. $#slots ) {
+ next unless $slot_load[$slot] < $lightest_slot_load;
+
+ $lightest_slot = $slot;
+ $lightest_slot_load = $slot_load[$slot];
+ }
+
+ push @{ $slots[$lightest_slot] }, $item->{file};
+ $slot_load[$lightest_slot] += $item->{size};
+ }
+
+ if (0) {
+ for my $slot (@slots) {
+ warn "Slot package: \n";
+ warn sprintf( " %8d=%s\n", -s $_, $_ ) for @$slot;
+ }
+ }
+
+ return @slots;
+}
+
sub read_cache {
my $self = shift;
@@ -239,26 +279,22 @@ sub read_cache {
);
}
- require IO::Pipe; require IO::Select; require Sys::CPU;
+ require IO::Pipe; require IO::Select;
+ my $cpus = eval { require Sys::CPU; Sys::CPU::cpu_count() };
+ unless ($cpus) {
+ $self->warning( 1, "Sys::CPU not available");
+ $self->warning( 1, "Using single parser process");
+ $cpus = 1;
+ }
# parsing of Contents files goes to forked children. contents_files
# is sorted by size, so that no child can have only long Contents.
# the number of children is capped to the number of CPUs on the system
- my @files = sort { -s $b <=> -s $a } @{ $self->contents_files };
- my $cpus = Sys::CPU::cpu_count();
my ( @kids, %kid_by_fh );
+ for my $portion (
+ $self->_distribute_files( $self->contents_files, $cpus ) )
{
- my $kid_no = -1;
- for my $fn (@files) {
- $kid_no++;
-
- if ( @kids < $cpus ) {
- push @kids, { files => [$fn], lines => 0 };
- }
- else {
- push @{ $kids[ $kid_no % @kids ]->{files} }, $fn;
- }
- }
+ push @kids, { files => $portion, lines => 0 };
}
my $sel = IO::Select->new;
@@ -280,6 +316,7 @@ sub read_cache {
'/usr/lib/apt/apt-helper', 'cat-file',
@{ $kid->{files} }
);
+
open( my $f, "-|", @cat_cmd )
or die sprintf( "Error running '%s': %d\n",
join( ' ', @cat_cmd ), $! );
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/dh-make-perl.git
More information about the Pkg-perl-cvs-commits
mailing list