[libconfig-model-dpkg-perl] 11/11: (c) scanner: coalesce (c) years for files in same dir with same owners and license

dod at debian.org dod at debian.org
Fri Mar 20 16:25:54 UTC 2015


This is an automated email from the git hooks/post-receive script.

dod pushed a commit to branch master
in repository libconfig-model-dpkg-perl.

commit d72f08a24a6a31009282bea9b22373ce6e9e27a3
Author: Dominique Dumont <dod at debian.org>
Date:   Fri Mar 20 13:48:37 2015 +0100

    (c) scanner: coalesce (c) years for files in same dir with same owners and license
---
 lib/Dpkg/Copyright/Scanner.pm | 81 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 77 insertions(+), 4 deletions(-)

diff --git a/lib/Dpkg/Copyright/Scanner.pm b/lib/Dpkg/Copyright/Scanner.pm
index ffd59f1..d846408 100644
--- a/lib/Dpkg/Copyright/Scanner.pm
+++ b/lib/Dpkg/Copyright/Scanner.pm
@@ -130,7 +130,7 @@ sub scan_files ( %args ) {
     say "No copyright information found" unless keys %$files;
 
     # regroup %files hash: all leaves have same id -> wild card
-    my $squashed = __squash($files);
+    my $squashed = __squash($files, \@copyrights_by_id, \$id);
 
     # pack files by copyright id
     my @packed = __pack_files($files);
@@ -215,9 +215,66 @@ sub __pack_dir ($h, $pack, @path) {
     push $pack->@*, map { [ $_, $pack_by_id{$_}->@* ];  } keys %pack_by_id ;
 }
 
-# $h is a tree of hash matching the directory structure. Each leaf is a
+# find ids that can be merged together in a single directory.
+# I.e. merge entries with same license and same set of owners. In this
+# case the years are merged together.
+sub __squash_copyrights_years ($count, $copyrights_by_id, $top_id_ref) {
+
+    my %id_year_by_same_owner_license;
+    foreach my $id ( sort keys $count->%* ) {
+        my ($c,$l) = $copyrights_by_id->[$id]->@* ;
+        #say "id $id: c $c l $l";
+        my @owners ;
+        my @years ;
+        foreach my $line (split(/\n\s+/,$c)) {
+            my ($owner, @year) = __split_copyright($line);
+            push @owners, $owner;
+            push @years, join(',', at year);
+        }
+        my $k = join('|', $l, @owners);
+        $id_year_by_same_owner_license{$k} //= [];
+        push $id_year_by_same_owner_license{$k}->@*, [ $id, @years ];
+    }
+
+    my @merged_c_info;
+    # now detect where %id_year_by_same_owner_license references more
+    # than one id this means that several entries can be merge in a
+    # *new* id (to avoid cloberring data of other directories)
+    foreach my $owner_license (keys %id_year_by_same_owner_license) {
+        my @entries =  $id_year_by_same_owner_license{$owner_license}->@* ;
+        next unless @entries > 1;
+        my ($l, at owners) = split /\|/, $owner_license;
+        my @ranges_of_years ;
+        my @ids;
+      SQUASH:
+        foreach my $entry (@entries) {
+            my ($id, @years) = $entry->@* ;
+
+            for (my $i = 0; $i < @years; $i++) {
+                my $span = $ranges_of_years[$i] //= Array::IntSpan->new();
+                last SQUASH unless $span; # bail out in case of problems
+                $span->set_range_as_string($years[$i], 1);
+            }
+            push @ids, $id;
+        }
+        my @squashed_c;
+        for (my $i=0; $i < @owners ; $i++) {
+            $ranges_of_years[$i]->consolidate();
+            $squashed_c[$i] = $ranges_of_years[$i]->get_range_list.', '.$owners[$i];
+        }
+        my $new_id = $$top_id_ref++;
+        $copyrights_by_id->[$new_id] = [ join("\n  ", at squashed_c), $l ];
+        foreach my $id (@ids) {
+            $merged_c_info[$id] = $new_id; # TODO: replace the old ids with news ids in the loop below
+        }
+    }
+
+    return @merged_c_info;
+}
+
+    # $h is a tree of hash matching the directory structure. Each leaf is a
 # copyright id.
-sub __squash ($h) {
+sub __squash ($h, $copyrights_by_id, $top_id_ref) {
     my %count ;
 
     # count the number of times each (c) info is used in this directory.
@@ -227,7 +284,7 @@ sub __squash ($h) {
         if (ref($item)) {
             # squash may return a plain id, or a hash with '*' => id ,
             # or a non squashable hash
-            $h->{$name} = __squash($item);
+            $h->{$name} = __squash($item, $copyrights_by_id, $top_id_ref);
         }
         my $id = (ref($item) and defined $item->{'*'}) ? $item->{'*'} : $item ;
 
@@ -238,6 +295,22 @@ sub __squash ($h) {
         }
     }
 
+    # works only on (c) referenced by %count, does not use paths
+    my @merged_c_info
+        = __squash_copyrights_years (\%count, $copyrights_by_id, $top_id_ref) ;
+
+    foreach my $name (sort keys %$h) {
+        my $id = $h->{$name};
+        next if ref ($id);
+        if ( my $new_id =  $merged_c_info[$id] ) {
+            $h->{$name} = $new_id;
+            $count{$new_id}//=0;
+            $count{$new_id} ++;
+            $count{$id} --;
+        }
+        #say "$name: ", $copyrights_by_id->[$id][0];
+    }
+
     # find the most used (c) info in this directory
     my $max = 0;
     my $max_id;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libconfig-model-dpkg-perl.git



More information about the Pkg-perl-cvs-commits mailing list