[devscripts] 02/16: licensecheck: check file encoding and decode properly when reading file (Closes: #784821)

dod at debian.org dod at debian.org
Thu May 14 16:34:25 UTC 2015


This is an automated email from the git hooks/post-receive script.

dod pushed a commit to branch master
in repository devscripts.

commit 025ad4ea8ba92d32bd698a83149f782c17f78bf0
Author: Dominique Dumont <dod at debian.org>
Date:   Thu May 14 17:04:20 2015 +0200

    licensecheck: check file encoding and decode properly when reading file (Closes: #784821)
---
 scripts/licensecheck.pl | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/scripts/licensecheck.pl b/scripts/licensecheck.pl
index 2f16133..f379578 100755
--- a/scripts/licensecheck.pl
+++ b/scripts/licensecheck.pl
@@ -132,11 +132,22 @@ Adam D. Barratt <adam at adam-barratt.org.uk>
 
 =cut
 
+# see http://stackoverflow.com/questions/6162484/why-does-modern-perl-avoid-utf-8-by-default/6163129#6163129
+use v5.14;
+use utf8;
+
 use strict;
+use autodie;
 use warnings;
+use warnings    qw< FATAL  utf8     >;
+use Encode qw/decode/;
+
 use Getopt::Long qw(:config gnu_getopt);
 use File::Basename;
 
+
+binmode STDOUT, ':utf8';
+
 my $progname = basename($0);
 
 # From dpkg-source
@@ -284,14 +295,27 @@ while (@files) {
     my $license = '';
     my %copyrights;
 
+    # Encode::Guess does not work well, use good old file command to get file encoding
+    my $mime = `file -bi $file`;
+    my $charset ;
+    if ($mime =~ /charset=([\w-]+)/) {
+        $charset = $1;
+    }
+    else {
+        die "can't find charset of $file\n";
+    }
+
     open (my $F, '<' ,$file) or die "Unable to access $file\n";
-    while (<$F>) {
+    binmode $F, ':raw';
+
+    while ( <$F>) {
         last if ($. > $OPT{'lines'});
-        $content .= $_;
-	$copyright_match = parse_copyright($_);
-	if ($copyright_match) {
-	    $copyrights{lc("$copyright_match")} = "$copyright_match";
-	}
+        my $data = decode($charset,$_);
+        $content .= $data;
+        $copyright_match = parse_copyright($data);
+        if ($copyright_match) {
+            $copyrights{lc("$copyright_match")} = "$copyright_match";
+        }
     }
     close($F);
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/collab-maint/devscripts.git



More information about the devscripts-devel mailing list