[dh-make-perl] 01/02: when parsing in parallel, reorder the files in such a way so that the load is evenly distributed

Damyan Ivanov dmn at moszumanska.debian.org
Tue Dec 5 21:25:41 UTC 2017


This is an automated email from the git hooks/post-receive script.

dmn pushed a commit to branch master
in repository dh-make-perl.

commit e58cfe810212e8c84f22ce01856a59b3743d3052
Author: Damyan Ivanov <dmn at debian.org>
Date:   Tue Dec 5 21:13:07 2017 +0000

    when parsing in parallel, reorder the files in such a way so that the load is evenly distributed
---
 lib/Debian/AptContents.pm | 65 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/lib/Debian/AptContents.pm b/lib/Debian/AptContents.pm
index d9fdc08..443bc6e 100644
--- a/lib/Debian/AptContents.pm
+++ b/lib/Debian/AptContents.pm
@@ -177,6 +177,46 @@ invocation.
 
 =cut
 
+# distribute files so that the load (measured by file size) is distributed
+# more or less equally
+sub _distribute_files {
+    my ( $self, $files, $cpus ) = @_;
+
+    return $files unless $cpus > 1;
+
+    my @data = map( { file => $_, size => -s $_ }, @$files );
+    @data = sort { $b->{size} <=> $a->{size} } @data;
+
+    my ( @slots, @slot_load );
+    for(1..$cpus) {
+        push @slots, [];
+        push @slot_load, 0;
+    }
+
+    for my $item (@data) {
+        my $lightest_slot = 0;
+        my $lightest_slot_load = $slot_load[0];
+        for my $slot ( 1 .. $#slots ) {
+            next unless $slot_load[$slot] < $lightest_slot_load;
+
+            $lightest_slot      = $slot;
+            $lightest_slot_load = $slot_load[$slot];
+        }
+
+        push @{ $slots[$lightest_slot] }, $item->{file};
+        $slot_load[$lightest_slot] += $item->{size};
+    }
+
+    if (0) {
+        for my $slot (@slots) {
+            warn "Slot package: \n";
+            warn sprintf( "  %8d=%s\n", -s $_, $_ ) for @$slot;
+        }
+    }
+
+    return @slots;
+}
+
 sub read_cache {
     my $self = shift;
 
@@ -239,26 +279,22 @@ sub read_cache {
                 );
             }
 
-            require IO::Pipe; require IO::Select; require Sys::CPU;
+            require IO::Pipe; require IO::Select;
+            my $cpus = eval { require Sys::CPU; Sys::CPU::cpu_count() };
+            unless ($cpus) {
+                $self->warning( 1, "Sys::CPU not available");
+                $self->warning( 1, "Using single parser process");
+                $cpus = 1;
+            }
 
             # parsing of Contents files goes to forked children. contents_files
             # is sorted by size, so that no child can have only long Contents.
             # the number of children is capped to the number of CPUs on the system
-            my @files = sort { -s $b <=> -s $a } @{ $self->contents_files };
-            my $cpus = Sys::CPU::cpu_count();
             my ( @kids, %kid_by_fh );
+            for my $portion (
+                $self->_distribute_files( $self->contents_files, $cpus ) )
             {
-                my $kid_no = -1;
-                for my $fn (@files) {
-                    $kid_no++;
-
-                    if ( @kids < $cpus ) {
-                        push @kids, { files => [$fn], lines => 0 };
-                    }
-                    else {
-                        push @{ $kids[ $kid_no % @kids ]->{files} }, $fn;
-                    }
-                }
+                push @kids, { files => $portion, lines => 0 };
             }
 
             my $sel = IO::Select->new;
@@ -280,6 +316,7 @@ sub read_cache {
                         '/usr/lib/apt/apt-helper', 'cat-file',
                         @{ $kid->{files} }
                     );
+
                     open( my $f, "-|", @cat_cmd )
                         or die sprintf( "Error running '%s': %d\n",
                         join( ' ', @cat_cmd ), $! );

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/dh-make-perl.git



More information about the Pkg-perl-cvs-commits mailing list