[libconfig-model-dpkg-perl] 11/11: (c) scanner: coalesce (c) years for files in same dir with same owners and license
dod at debian.org
dod at debian.org
Fri Mar 20 16:25:54 UTC 2015
This is an automated email from the git hooks/post-receive script.
dod pushed a commit to branch master
in repository libconfig-model-dpkg-perl.
commit d72f08a24a6a31009282bea9b22373ce6e9e27a3
Author: Dominique Dumont <dod at debian.org>
Date: Fri Mar 20 13:48:37 2015 +0100
(c) scanner: coalesce (c) years for files in same dir with same owners and license
---
lib/Dpkg/Copyright/Scanner.pm | 81 ++++++++++++++++++++++++++++++++++++++++---
1 file changed, 77 insertions(+), 4 deletions(-)
diff --git a/lib/Dpkg/Copyright/Scanner.pm b/lib/Dpkg/Copyright/Scanner.pm
index ffd59f1..d846408 100644
--- a/lib/Dpkg/Copyright/Scanner.pm
+++ b/lib/Dpkg/Copyright/Scanner.pm
@@ -130,7 +130,7 @@ sub scan_files ( %args ) {
say "No copyright information found" unless keys %$files;
# regroup %files hash: all leaves have same id -> wild card
- my $squashed = __squash($files);
+ my $squashed = __squash($files, \@copyrights_by_id, \$id);
# pack files by copyright id
my @packed = __pack_files($files);
@@ -215,9 +215,66 @@ sub __pack_dir ($h, $pack, @path) {
push $pack->@*, map { [ $_, $pack_by_id{$_}->@* ]; } keys %pack_by_id ;
}
-# $h is a tree of hash matching the directory structure. Each leaf is a
+# find ids that can be merged together in a single directory.
+# I.e. merge entries with same license and same set of owners. In this
+# case the years are merged together.
+sub __squash_copyrights_years ($count, $copyrights_by_id, $top_id_ref) {
+
+ my %id_year_by_same_owner_license;
+ foreach my $id ( sort keys $count->%* ) {
+ my ($c,$l) = $copyrights_by_id->[$id]->@* ;
+ #say "id $id: c $c l $l";
+ my @owners ;
+ my @years ;
+ foreach my $line (split(/\n\s+/,$c)) {
+ my ($owner, @year) = __split_copyright($line);
+ push @owners, $owner;
+ push @years, join(',', at year);
+ }
+ my $k = join('|', $l, @owners);
+ $id_year_by_same_owner_license{$k} //= [];
+ push $id_year_by_same_owner_license{$k}->@*, [ $id, @years ];
+ }
+
+ my @merged_c_info;
+ # now detect where %id_year_by_same_owner_license references more
+ # than one id this means that several entries can be merge in a
+ # *new* id (to avoid cloberring data of other directories)
+ foreach my $owner_license (keys %id_year_by_same_owner_license) {
+ my @entries = $id_year_by_same_owner_license{$owner_license}->@* ;
+ next unless @entries > 1;
+ my ($l, at owners) = split /\|/, $owner_license;
+ my @ranges_of_years ;
+ my @ids;
+ SQUASH:
+ foreach my $entry (@entries) {
+ my ($id, @years) = $entry->@* ;
+
+ for (my $i = 0; $i < @years; $i++) {
+ my $span = $ranges_of_years[$i] //= Array::IntSpan->new();
+ last SQUASH unless $span; # bail out in case of problems
+ $span->set_range_as_string($years[$i], 1);
+ }
+ push @ids, $id;
+ }
+ my @squashed_c;
+ for (my $i=0; $i < @owners ; $i++) {
+ $ranges_of_years[$i]->consolidate();
+ $squashed_c[$i] = $ranges_of_years[$i]->get_range_list.', '.$owners[$i];
+ }
+ my $new_id = $$top_id_ref++;
+ $copyrights_by_id->[$new_id] = [ join("\n ", at squashed_c), $l ];
+ foreach my $id (@ids) {
+ $merged_c_info[$id] = $new_id; # TODO: replace the old ids with news ids in the loop below
+ }
+ }
+
+ return @merged_c_info;
+}
+
+ # $h is a tree of hash matching the directory structure. Each leaf is a
# copyright id.
-sub __squash ($h) {
+sub __squash ($h, $copyrights_by_id, $top_id_ref) {
my %count ;
# count the number of times each (c) info is used in this directory.
@@ -227,7 +284,7 @@ sub __squash ($h) {
if (ref($item)) {
# squash may return a plain id, or a hash with '*' => id ,
# or a non squashable hash
- $h->{$name} = __squash($item);
+ $h->{$name} = __squash($item, $copyrights_by_id, $top_id_ref);
}
my $id = (ref($item) and defined $item->{'*'}) ? $item->{'*'} : $item ;
@@ -238,6 +295,22 @@ sub __squash ($h) {
}
}
+ # works only on (c) referenced by %count, does not use paths
+ my @merged_c_info
+ = __squash_copyrights_years (\%count, $copyrights_by_id, $top_id_ref) ;
+
+ foreach my $name (sort keys %$h) {
+ my $id = $h->{$name};
+ next if ref ($id);
+ if ( my $new_id = $merged_c_info[$id] ) {
+ $h->{$name} = $new_id;
+ $count{$new_id}//=0;
+ $count{$new_id} ++;
+ $count{$id} --;
+ }
+ #say "$name: ", $copyrights_by_id->[$id][0];
+ }
+
# find the most used (c) info in this directory
my $max = 0;
my $max_id;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libconfig-model-dpkg-perl.git
More information about the Pkg-perl-cvs-commits
mailing list