[libconfig-model-dpkg-perl] 03/33: Scanner: globally squash copyright that have same owners and licenses

dod at debian.org dod at debian.org
Mon Mar 30 17:41:34 UTC 2015


This is an automated email from the git hooks/post-receive script.

dod pushed a commit to branch master
in repository libconfig-model-dpkg-perl.

commit 6301aab78214a2298b161f254da84b568833bcde
Author: Dominique Dumont <dod at debian.org>
Date:   Mon Mar 23 13:35:53 2015 +0100

    Scanner: globally squash copyright that have same owners and licenses
---
 lib/Dpkg/Copyright/Scanner.pm      | 53 +++++++++++++++---------------
 t/scanner/examples/pan.out         | 23 ++++---------
 t/scanner/examples/sdl2.out        |  6 ++--
 t/scanner/squash_copyright_years.t | 67 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+), 45 deletions(-)

diff --git a/lib/Dpkg/Copyright/Scanner.pm b/lib/Dpkg/Copyright/Scanner.pm
index d846408..f8975af 100644
--- a/lib/Dpkg/Copyright/Scanner.pm
+++ b/lib/Dpkg/Copyright/Scanner.pm
@@ -105,8 +105,9 @@ sub scan_files ( %args ) {
         $c =~ s/(\d+)\s*-\s*(\d+)/$1-$2/g;
         $c =~ s/\b(\d{4}),?\s+([\S^\d])/$1, $2/g;
         $c =~ s/\s+by\s+//g;
-        $c =~ s/all\s+rights?\s+reserved//gi;
+        $c =~ s/all\s+rights?\s+reserved[\s\.]*//gi;
         $c = 'no-info-found' if $c =~ /^\*No/;
+        $c =~ s/^\s+|\s+$//g;
 
         $c =  __pack_copyright($c);
 
@@ -129,8 +130,10 @@ sub scan_files ( %args ) {
 
     say "No copyright information found" unless keys %$files;
 
+    __squash_copyrights_years ($files, \@copyrights_by_id) ;
+
     # regroup %files hash: all leaves have same id -> wild card
-    my $squashed = __squash($files, \@copyrights_by_id, \$id);
+    my $squashed = __squash($files);
 
     # pack files by copyright id
     my @packed = __pack_files($files);
@@ -218,10 +221,10 @@ sub __pack_dir ($h, $pack, @path) {
 # find ids that can be merged together in a single directory.
 # I.e. merge entries with same license and same set of owners. In this
 # case the years are merged together.
-sub __squash_copyrights_years ($count, $copyrights_by_id, $top_id_ref) {
+sub __squash_copyrights_years ($files, $copyrights_by_id) {
 
     my %id_year_by_same_owner_license;
-    foreach my $id ( sort keys $count->%* ) {
+    for (my $id = 0; $id < $#$copyrights_by_id; $id++ ) {
         my ($c,$l) = $copyrights_by_id->[$id]->@* ;
         #say "id $id: c $c l $l";
         my @owners ;
@@ -251,6 +254,7 @@ sub __squash_copyrights_years ($count, $copyrights_by_id, $top_id_ref) {
             my ($id, @years) = $entry->@* ;
 
             for (my $i = 0; $i < @years; $i++) {
+                last SQUASH if $years[$i] =~ /[^\d,\s-]/;
                 my $span = $ranges_of_years[$i] //= Array::IntSpan->new();
                 last SQUASH unless $span; # bail out in case of problems
                 $span->set_range_as_string($years[$i], 1);
@@ -262,19 +266,32 @@ sub __squash_copyrights_years ($count, $copyrights_by_id, $top_id_ref) {
             $ranges_of_years[$i]->consolidate();
             $squashed_c[$i] = $ranges_of_years[$i]->get_range_list.', '.$owners[$i];
         }
-        my $new_id = $$top_id_ref++;
+
+        my $new_id = @$copyrights_by_id + @merged_c_info ;
         $copyrights_by_id->[$new_id] = [ join("\n  ", at squashed_c), $l ];
         foreach my $id (@ids) {
-            $merged_c_info[$id] = $new_id; # TODO: replace the old ids with news ids in the loop below
+            $merged_c_info[$id] = $new_id;
         }
     }
+    # replace the old ids with news ids
+    __swap_merged_ids($files, \@merged_c_info);
+}
 
-    return @merged_c_info;
+sub __swap_merged_ids ($files, $merged_c_info) {
+    foreach my $name (sort keys %$files) {
+        my $item = $files->{$name};
+        if (ref($item)) {
+            __swap_merged_ids($item,$merged_c_info);
+        }
+        elsif (my $new_id = $merged_c_info->[$item]) {
+            $files->{$name} = "$new_id"  ;
+        }
+    }
 }
 
-    # $h is a tree of hash matching the directory structure. Each leaf is a
+# $h is a tree of hash matching the directory structure. Each leaf is a
 # copyright id.
-sub __squash ($h, $copyrights_by_id, $top_id_ref) {
+sub __squash ($h) {
     my %count ;
 
     # count the number of times each (c) info is used in this directory.
@@ -284,7 +301,7 @@ sub __squash ($h, $copyrights_by_id, $top_id_ref) {
         if (ref($item)) {
             # squash may return a plain id, or a hash with '*' => id ,
             # or a non squashable hash
-            $h->{$name} = __squash($item, $copyrights_by_id, $top_id_ref);
+            $h->{$name} = __squash($item);
         }
         my $id = (ref($item) and defined $item->{'*'}) ? $item->{'*'} : $item ;
 
@@ -295,22 +312,6 @@ sub __squash ($h, $copyrights_by_id, $top_id_ref) {
         }
     }
 
-    # works only on (c) referenced by %count, does not use paths
-    my @merged_c_info
-        = __squash_copyrights_years (\%count, $copyrights_by_id, $top_id_ref) ;
-
-    foreach my $name (sort keys %$h) {
-        my $id = $h->{$name};
-        next if ref ($id);
-        if ( my $new_id =  $merged_c_info[$id] ) {
-            $h->{$name} = $new_id;
-            $count{$new_id}//=0;
-            $count{$new_id} ++;
-            $count{$id} --;
-        }
-        #say "$name: ", $copyrights_by_id->[$id][0];
-    }
-
     # find the most used (c) info in this directory
     my $max = 0;
     my $max_id;
diff --git a/t/scanner/examples/pan.out b/t/scanner/examples/pan.out
index f405e16..9921c76 100644
--- a/t/scanner/examples/pan.out
+++ b/t/scanner/examples/pan.out
@@ -1,5 +1,9 @@
 Files: *
-Copyright: 2002-2006, Charles Kerr <charles at rebelbase.com>
+Copyright: 1994-2001, Frank Pilhofer. The author may
+License: GPL-2+
+
+Files: pan/*
+Copyright: 2002-2007, Charles Kerr <charles at rebelbase.com>
 License: GPL-2
 
 Files: pan/data/cert-store.cc
@@ -8,11 +12,6 @@ Copyright: 2011, Heinrich Müller <henmull at src.gnome.org>
   2002-2006, Charles Kerr <charles at rebelbase.com>
 License: GPL-2
 
-Files: pan/data/parts.cc
- pan/data/parts.h
-Copyright: 2002-2007, Charles Kerr <charles at rebelbase.com>
-License: GPL-2
-
 Files: pan/general/e-util.cc
  pan/general/e-util.h
 Copyright: 2000, 2001, Ximian, Inc
@@ -26,12 +25,12 @@ Copyright: 2007, Calin Culianu <calin at ajvar.org>
 License: LGPL-2+
 
 Files: pan/general/map-vector.h
-Copyright: 2001, by Andrei Alexandrescu
+Copyright: 2001, Andrei Alexandrescu
   , 2001. Addison-Wesley
 License: UNKNOWN
 
 Files: pan/general/sorted-vector.h
-Copyright: 2002, Martin Holzherr (holzherr at infobrain.com). All rights reserved
+Copyright: 2002, Martin Holzherr (holzherr at infobrain.com).
 License: public-domain
 
 Files: pan/gui/e-action-combo-box.c
@@ -56,10 +55,6 @@ Copyright: 1989, 1991, Free Software Foundation, Inc.\n\
   , the software, and\n\
 License: UNKNOWN
 
-Files: pan/gui/profiles-dialog.cc
-Copyright: 2002, Charles Kerr <charles at rebelbase.com>
-License: GPL-2
-
 Files: pan/gui/xface.c
 Copyright: , messages are not removed, and no monies are exchanged
   , James Ashton - Sydney University - June 1990
@@ -106,10 +101,6 @@ Copyright: 2011, Heinrich Müller <henmull at src.gnome.org>
   2002, vjt (irssi project)
 License: GPL-2
 
-Files: uulib/*
-Copyright: 1994-2001, by Frank Pilhofer. The author may
-License: GPL-2+
-
 Files: uulib/crc32.c
 Copyright: 1995-2005, Mark Adler
 License: Zlib
diff --git a/t/scanner/examples/sdl2.out b/t/scanner/examples/sdl2.out
index 41255d6..7e744c0 100644
--- a/t/scanner/examples/sdl2.out
+++ b/t/scanner/examples/sdl2.out
@@ -21,7 +21,7 @@ Copyright: 1997-2014, Sam Lantinga <slouken at libsdl.org>
 License: Zlib
 
 Files: src/libm/*
-Copyright: 1993, by Sun Microsystems, Inc. All rights reserved
+Copyright: 1993, Sun Microsystems, Inc.
 License: UNKNOWN
 
 Files: src/libm/math_libm.h
@@ -61,7 +61,7 @@ License: Zlib
 
 Files: src/test/SDL_test_md5.c
 Copyright: 1997-2014, Sam Lantinga <slouken at libsdl.org>
-  1990, RSA Data Security, Inc. All rights reserved. **
+  1990, RSA Data Security, Inc. **
 License: Zlib
 
 Files: src/video/x11/edid-parse.c
@@ -70,7 +70,7 @@ License: Expat
 
 Files: src/video/x11/imKStoUCS.c
  src/video/x11/imKStoUCS.h
-Copyright: 1994-2003, The XFree86 Project, Inc. All Rights Reserved
+Copyright: 1994-2003, The XFree86 Project, Inc.
 License: Expat
 
 Files: test/*
diff --git a/t/scanner/squash_copyright_years.t b/t/scanner/squash_copyright_years.t
new file mode 100644
index 0000000..2bb636a
--- /dev/null
+++ b/t/scanner/squash_copyright_years.t
@@ -0,0 +1,67 @@
+# -*- cperl -*-
+use strict;
+use warnings;
+use 5.010;
+
+use Test::More;   # see done_testing()
+use Test::Differences;
+use YAML::Tiny;
+
+require_ok( 'Dpkg::Copyright::Scanner' );
+
+# __pack_copyright tests
+my @tests = (
+    [
+        'dir with squashable copyright',
+        "---
+pan:
+  data:
+    article-cache.cc: 4
+    article-cache.h: 4
+    article.cc: 6
+    article.h: 6
+    cert-store.cc: 5
+    data.cc: 4
+    data.h: 4
+",
+        "---
+pan:
+  data:
+    article-cache.cc: 4
+    article-cache.h: 4
+    article.cc: 10
+    article.h: 10
+    cert-store.cc: 10
+    data.cc: 4
+    data.h: 4
+"    ],
+);
+
+my @copyright_by_id = (
+    [ 'GPL', '2002, foo'],
+    [ 'GPL', '2003, bar1'],
+    [ 'GPL', '2003, bar2'],
+    [ 'GPL', '2003, bar3'],
+    [ 'GPL', '2003, bar4'],
+    [ 'GPL', '2003, bar5'],
+    [ 'GPL', '2003, bar5'],
+    [ 'GPL', '2003, bar7'],
+    [ 'GPL', '2003, bar8'],
+    [ 'GPL', '2003, bar9']
+);
+
+
+
+foreach my $t (@tests) {
+    my ($label,$in,$expect) = @$t;
+    my $h = Load($in);
+    Dpkg::Copyright::Scanner::__squash_copyrights_years($h, \@copyright_by_id);
+    eq_or_diff(
+        $h,
+        ref($expect) ? $expect : Load($expect),
+        "__squash_copyrights_years $label"
+    );
+}
+
+
+done_testing();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libconfig-model-dpkg-perl.git



More information about the Pkg-perl-cvs-commits mailing list