[libconfig-model-dpkg-perl] 01/01: copyright update: better handle files without info

dod at debian.org dod at debian.org
Thu Feb 11 20:24:13 UTC 2016


This is an automated email from the git hooks/post-receive script.

dod pushed a commit to branch master
in repository libconfig-model-dpkg-perl.

commit f4be162d18817759408e4e18df427ba5a049a559
Author: Dominique Dumont <dod at debian.org>
Date:   Thu Feb 11 21:20:09 2016 +0100

    copyright update: better handle files without info
    
    id 0 is used as a special id for files that contains no info.
    
    This enables to handle correctly packages like moar where all files have
    no (c) info, but all 3rd party have information: handlign files without
    info while coaslescing restores the balance
---
 lib/Config/Model/Dpkg/Copyright.pm                 | 43 ++++++++++++++++------
 lib/Dpkg/Copyright/Scanner.pm                      | 43 ++++++++++++----------
 .../rakudo-star/debian/fill.copyright.blanks.yml   |  4 +-
 3 files changed, 58 insertions(+), 32 deletions(-)

diff --git a/lib/Config/Model/Dpkg/Copyright.pm b/lib/Config/Model/Dpkg/Copyright.pm
index bb6ac43..1fd8642 100644
--- a/lib/Config/Model/Dpkg/Copyright.pm
+++ b/lib/Config/Model/Dpkg/Copyright.pm
@@ -66,10 +66,12 @@ sub update ($self, %args) {
 
     # explode new data and merge with existing entries
     my %new_split_files;
-    my @data;
+    my @data = (''); # id 0 is reserved for entries without info
     my %data_keys;
     foreach my $path ( sort keys $files->%* ) {
-        my ($c, $l) = $copyrights_by_id->[ $files->{$path} ]->@*;
+        my $id = $files->{$path};
+        next if $id == 0 and not defined $copyrights_by_id->[$id];
+        my ($c, $l) = $copyrights_by_id->[$id]->@*;
 
         my $new_data = dclone (delete $old_split_files{$path} || {} );
         my $old_cop = $new_data->{Copyright};
@@ -81,6 +83,14 @@ sub update ($self, %args) {
 
         # when all fails
         $new_data->{Copyright} ||= 'UNKNOWN';
+        $new_data->{License}{short_name} ||= 'UNKNOWN';
+
+        # skip when no info is found in original data
+        if ($id == 0
+            and $new_data->{Copyright} =~ /no-info-found|unknown/i
+            and $new_data->{License}{short_name} =~ /unknown/i) {
+            next;
+        }
 
         # create an inventory of different file copyright and license data
         my $dumper = Data::Dumper->new([$new_data])->Sortkeys(1)->Indent(0);
@@ -88,7 +98,9 @@ sub update ($self, %args) {
         my $d_key = $data_keys{$datum_dump};
         if (not defined $d_key) {
             push @data,$new_data;
-            $d_key = $data_keys{$datum_dump} = $#data;
+            # id 0 is special and is treated diffrently. It must not be used since
+            # entries without info are skipped. Hence @data was init with ('');
+            $d_key = $data_keys{$datum_dump} = $#data ;
         }
 
         # explode path in subpaths and store id pointing to copyright data in there
@@ -96,6 +108,7 @@ sub update ($self, %args) {
     }
 
     # at this point:
+    # * $copyrights_by_id is not longer used, its data and merged data are in @data
     # * @data contains a list of copyright/license data
     # * %new_split_files contains a tree matching a directory tree where each leaf
     #   is an integer index referencing
@@ -150,26 +163,34 @@ sub update ($self, %args) {
             $p =~ s/\.$/*/;
             my $old_data = delete $preserved_path{$p};
 
+            my $using_old_data = 0;
             if ($old_data) {
-                my $overridden = 1;
                 if ($datum->{Copyright} =~ /unknown|no-info-found/i) {
                     $self->_say( "keeping copyright dir data for $p");
                     $datum->{Copyright} = $old_data->{Copyright};
-                    $overridden = 0;
+                    $using_old_data = 1;
                 }
                 if ($datum->{License}{short_name} =~ /unknown|no-info-found/i) {
                     $self->_say( "keeping license dir data for $p");
                     $datum->{License}{short_name} = $old_data->{License}{short_name};
                     $datum->{License}{full_license} = $old_data->{License}{full_license};
-                    $overridden = 0;
+                    $using_old_data = 1;
                 }
-                $self->_say( "old dir data for $p overridden") if $overridden;
+                $self->_say( "old dir data for $p overridden by new data") unless $using_old_data;
             }
 
-            # skip writing data because it duplicates information
-            # found in directory above above (as shown the path ending
-            # with '/.')
-            next if $paths[0] =~ /\.$/;
+            if ($paths[0] =~ /\.$/) {
+                if ($using_old_data) {
+                    # fix path ending with '.' that contain merged info from old copyright file
+                    $paths[0] = $p;
+                } else {
+                    # skip writing data because it duplicates information
+                    # found in directory above above (as shown the path ending
+                    # with '/.')
+                    # $self->_say( "skipping redundant path ".$paths[0] );
+                    next;
+                }
+            }
         };
 
         my $path_str = $self->normalize_path(\@paths);
diff --git a/lib/Dpkg/Copyright/Scanner.pm b/lib/Dpkg/Copyright/Scanner.pm
index b12dcc0..72f2cb9 100644
--- a/lib/Dpkg/Copyright/Scanner.pm
+++ b/lib/Dpkg/Copyright/Scanner.pm
@@ -63,7 +63,7 @@ sub print_copyright ( %args ) {
         my ($id, @paths) = $p->@*;
         my ($c,$l) = $copyrights_by_id->[$id]->@*;
 
-        next if $c eq 'no-info-found';
+        next if $id == 0;
 
         # don't print directory info covered by same info in directory above
         next if $paths[0] =~ /\.$/;
@@ -199,7 +199,7 @@ sub scan_files ( %args ) {
 
     my %copyrights ;
     my $files = {};
-    my $id = 0;
+    my $id = 1;
 
     foreach my $line (sort @lines) {
         chomp $line;
@@ -267,8 +267,9 @@ sub scan_files ( %args ) {
         _warn "Path $f has no ".join(' or ', @no_info_found)." info. You may want to add a line in debian/fill.copyright.blanks.yml\n"
             if @no_info_found;
 
-        #say "Storing '$f' : '$c' '$l'";
-        $files->{$f} = $copyrights{$c}{$l} //= $id++;
+        my $has_info = @no_info_found < 2;
+        # say "Storing '$f' : '$c' '$l' has_info: $has_info id $id";
+        $files->{$f} = $copyrights{$c}{$l} //= ($has_info ? $id++ : 0);
     }
 
     my @notused = grep { ! $fill_blank_data->{$_}{used} and $_; } sort keys %$fill_blank_data ;
@@ -389,7 +390,7 @@ sub __pack_dir ($h, $pack, @path) {
 sub __squash_copyrights_years ($copyrights_by_id) {
 
     my %id_year_by_same_owner_license;
-    for (my $id = 0; $id < $copyrights_by_id->@* ; $id++ ) {
+    for (my $id = 1; $id < $copyrights_by_id->@* ; $id++ ) {
         my ($c,$l) = $copyrights_by_id->[$id]->@* ;
         #say "id $id: c $c l $l";
         my @owners ;
@@ -487,7 +488,7 @@ sub __squash ($h) {
         foreach my $name (sort keys %$h) {
             if ($name =~ $re) {
                 my $id = delete $h->{$name};
-                #say "del global lic info $name with ".Dumper($id);
+                # say "del global lic info $name with $id";
                 # using 1 means that info from this file is easy to override
                 $count{$id} = 1;
             };
@@ -506,7 +507,8 @@ sub __squash ($h) {
         my $id = (ref($item) and defined $item->{'*'}) ? $item->{'*'} : $item ;
 
         # do not count non squashable hashes (i.e. there's no main (c) info)
-        if (not ref ($id)) {
+        # do not count ids containing no information (id 0)
+        if (not ref ($id) and $id != 0) {
             $count{$id}//=0;
             $count{$id} ++;
         }
@@ -527,20 +529,23 @@ sub __squash ($h) {
 
     # all files associated to the most used (c) info are deleted to
     # be represented by '*' entry
-    foreach my $name (sort keys %$h) {
-        my $item = $h->{$name};
-        if (ref($item) and defined $item->{'*'} and $item->{'*'} == $main_license_id) {
-            # rename item/* to item/. when covered by ./*
-            # this is a "weak" directory info which is handled specially
-            $item->{'.'} = delete $item->{'*'};
-        }
-        if (not ref ($item)) {
-            # delete file that is represented by '*' entry
-            delete $h->{$name} if $item == $main_license_id;
+    if (defined $main_license_id) {
+        foreach my $name (sort keys %$h) {
+            my $item = $h->{$name};
+            if (ref($item) and defined $item->{'*'} and $item->{'*'} == $main_license_id) {
+                # rename item/* to item/. when covered by ./*
+                # this is a "weak" directory info which is handled specially
+                $item->{'.'} = delete $item->{'*'};
+            }
+            if (not ref ($item)) {
+                # delete file that is represented by '*' entry
+                delete $h->{$name} if $item == $main_license_id;
+            }
         }
+
+        # here's the '*' file representing the most used (c) info
+        $h->{'*'} //= $main_license_id;
     }
-    # here's the '*' file representing the most used (c) info
-    $h->{'*'} //= $main_license_id if defined $main_license_id;
 
     return $h;
 }
diff --git a/t/model_tests.d/dpkg-examples/rakudo-star/debian/fill.copyright.blanks.yml b/t/model_tests.d/dpkg-examples/rakudo-star/debian/fill.copyright.blanks.yml
index 63955a1..f4b0a08 100644
--- a/t/model_tests.d/dpkg-examples/rakudo-star/debian/fill.copyright.blanks.yml
+++ b/t/model_tests.d/dpkg-examples/rakudo-star/debian/fill.copyright.blanks.yml
@@ -6,5 +6,5 @@ README:
 debian:
   license: Artistic-2.0
 .*jquery.*:
-  skip: 1
- 
\ No newline at end of file
+  skip: 0
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libconfig-model-dpkg-perl.git



More information about the Pkg-perl-cvs-commits mailing list