[devscripts] 05/08: uscan: refine directory walking logic
Osamu Aoki
osamu at moszumanska.debian.org
Mon Sep 28 14:01:42 UTC 2015
This is an automated email from the git hooks/post-receive script.
osamu pushed a commit to branch multitar
in repository devscripts.
commit fbb99516a24af60f07ac15b3ef9f5aeafaddcd2e
Author: Osamu Aoki <osamu at debian.org>
Date: Mon Sep 28 00:15:38 2015 +0900
uscan: refine directory walking logic
code chunk was moved to a location after setting $download_version since this
chunk has a call to recursive_regex_dir which uses $download_version to change
its behavior for #734748.
recursive_regex_dir calls newest_dir and newest_dir is modified to cope with
Debian Bug report logs - #557768
[uscan] please support directory "version" mangling
Debian Bug report logs - #734748
devscripts: [uscan] Please use $download-version
whenever a recursive regex dir is being processed
---
scripts/uscan.pl | 264 +++++++++++++++++++++++++++++++++++++------------------
1 file changed, 177 insertions(+), 87 deletions(-)
diff --git a/scripts/uscan.pl b/scripts/uscan.pl
index 20724f5..b8c6cc7 100755
--- a/scripts/uscan.pl
+++ b/scripts/uscan.pl
@@ -345,32 +345,38 @@ Don't use PASV mode for the FTP connection.
=item B<dversionmangle=>I<rules>
-Normalize the last upstream version string found in
-F<debian/changelog>
+Normalize the last upstream version string found in F<debian/changelog>. Removal of upstream repackage mark by B<+s/dfsg\d+$//> is usually done here.
+
+=item B<dirversionmangle=>I<rules>
+
+Normalize the directory path string matching the regex in a set of parentheses
+of B<http::/>I<URL> as the sortable version string. This is used as the
+sorting index only.
=item B<pagemangle=>I<rules>
-Normalize the downloaded web page string
+Normalize the downloaded web page string. (Do not use this unless this is absolutely needed. B<s> rules should be appled with B<g> option.)
=item B<uversionmangle=>I<rules>
-Normalize the candidate upstream version strings
-extracted from hrefs in the source of the web page.
+Normalize the candidate upstream version strings extracted from hrefs in the
+source of the web page. This is used as the sorting index when selecting the
+latest upstream version.
=item B<versionmangle=>I<rules>
Syntactic shorthand for B<uversionmangle=>I<rules>B<,dversionmangle=>I<rules>
-=item B<filenamemangle=>I<rules>
-
-Normalize the downloaded tarball filename string I<< <upkg>-<uversion>.tar.gz
->>.
-
=item B<oversionmangle=>I<rules>
Generate the version string I<< <oversion> >> of the source tarball I<<
<spkg>_<oversion>.orig.tar.gz >> from I<< <uversion> >>.
+=item B<filenamemangle=>I<rules>
+
+Normalize the downloaded tarball filename string I<< <upkg>-<uversion>.tar.gz
+>>.
+
=item B<downloadurlmangle=>I<rules>
Normalize the candidate upstream tarball URL string.
@@ -2151,6 +2157,9 @@ sub process_watchline ($$$$$$)
elsif ($opt =~ /^\s*filenamemangle\s*=\s*(.+?)\s*$/) {
@{$options{'filenamemangle'}} = split /;/, $1;
}
+ elsif ($opt =~ /^\s*dirversionmangle\s*=\s*(.+?)\s*$/) {
+ @{$options{'dirversionmangle'}} = split /;/, $1;
+ }
elsif ($opt =~ /^\s*oversionmangle\s*=\s*(.+?)\s*$/) {
@{$options{'oversionmangle'}} = split /;/, $1;
}
@@ -2246,7 +2255,8 @@ sub process_watchline ($$$$$$)
# If PGP used, check required programs and generate files
print STDERR "$progname debug: \$gpgv_used=$gpgv_used, \$gpg_used=$gpg_used, \$download=$download, \$force_download=$force_download\n" if $debug;
- print STDERR "$progname debug: \$options{'pgpmode'}=$options{'pgpmode'}, \$options{'pgpsigurlmangle'}=$options{'pgpsigurlmangle'}\n" if $debug;
+ print STDERR "$progname debug: \$options{'pgpmode'}=$options{'pgpmode'}, \$options{'pgpsigurlmangle'}=$options{'pgpsigurlmangle'}\n" if $debug and defined $options{'pgpsigurlmangle'};
+ print STDERR "$progname debug: \$options{'pgpmode'}=$options{'pgpmode'}, \$options{'pgpsigurlmangle'}=undef\n" if $debug and ! defined $options{'pgpsigurlmangle'};
if (($download or $force_download) and ($gpgv_used == 1 or $gpg_used == 1)) {
if ($gpgv_used == 1 and ! $havegpgv) {
uscan_warn "$progname warning: pgpsigurlmangle option exists, please install gpgv or gpgv2.\n";
@@ -2306,23 +2316,6 @@ sub process_watchline ($$$$$$)
# Handle pypi.python.org addresses specially
$base =~ s%^https?://pypi\.python\.org/packages/source/./%https://pypi.debian.net/%;
- if ($base =~ m%^(\w+://[^/]+)%) {
- $site = $1;
- } else {
- uscan_warn "$progname warning: Can't determine protocol and site in\n $watchfile, skipping:\n $line\n";
- return 1;
- }
-
- # Find the path with the greatest version number matching the regex
- $base = recursive_regex_dir($base, \%options, $watchfile);
- if ($base eq '') { return 1; }
-
- # We're going to make the pattern
- # (?:(?:http://site.name)?/dir/path/)?base_pattern
- # It's fine even for ftp sites
- $basedir = $base;
- $basedir =~ s%^\w+://[^/]+/%/%;
- $pattern = "(?:(?:$site)?" . quotemeta($basedir) . ")?$filepattern";
}
# End parsing the watch line for all version=1/2/3/4
# all options('...') variables have been set
@@ -2393,10 +2386,31 @@ sub process_watchline ($$$$$$)
}
}
+ if ($watch_version != 1) {
+ if ($base =~ m%^(\w+://[^/]+)%) {
+ $site = $1;
+ } else {
+ uscan_warn "$progname warning: Can't determine protocol and site in\n $watchfile, skipping:\n $line\n";
+ return 1;
+ }
+
+ # Find the path with the greatest version number matching the regex
+ $base = recursive_regex_dir($base, \%options, $watchfile);
+ if ($base eq '') { return 1; }
+
+ # We're going to make the pattern
+ # (?:(?:http://site.name)?/dir/path/)?base_pattern
+ # It's fine even for ftp sites
+ $basedir = $base;
+ $basedir =~ s%^\w+://[^/]+/%/%;
+ $pattern = "(?:(?:$site)?" . quotemeta($basedir) . ")?$filepattern";
+ }
+
push @patterns, $pattern;
push @sites, $site;
push @basedirs, $basedir;
+ my $match = '';
# Start Checking $site and look for $filepattern which is newer than $lastversion
# What is the most recent file, based on the filenames?
# We first have to find the candidates, then we sort them using
@@ -2490,6 +2504,7 @@ sub process_watchline ($$$$$$)
my @hrefs;
while ($content =~ m/<\s*a\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/sgi) {
my $href = $2;
+ my $mangled_version;
$href =~ s/\n//g;
foreach my $_pattern (@patterns) {
if ($href =~ m&^$_pattern$&) {
@@ -2497,11 +2512,11 @@ sub process_watchline ($$$$$$)
# watch_version 2 only recognised one group; the code
# below will break version 2 watchfiles with a construction
# such as file-([\d\.]+(-\d+)?) (bug #327258)
- push @hrefs, [$1, $href];
+ $mangled_version = $1;
} else {
# need the map { ... } here to handle cases of (...)?
# which may match but then return undef values
- my $mangled_version =
+ $mangled_version =
join(".", map { $_ if defined($_) }
$href =~ m&^$_pattern$&);
foreach my $pat (@{$options{'uversionmangle'}}) {
@@ -2515,36 +2530,40 @@ sub process_watchline ($$$$$$)
return 1;
}
}
- push @hrefs, [$mangled_version, $href];
}
+ $match = '';
+ if (defined $download_version) {
+ if ($mangled_version eq $download_version) {
+ $match = "matched with the download version";
+ }
+ }
+ push @hrefs, [$mangled_version, $href, $match];
}
}
}
if (@hrefs) {
- if ($verbose) {
- print "-- Found the following matching hrefs:\n";
- foreach my $href (@hrefs) { print " $$href[1] ($$href[0])\n"; }
+ @hrefs = Devscripts::Versort::upstream_versort(@hrefs);
+ if ($debug) {
+ print "-- Found the following matching hrefs on the web page (newest first):\n";
+ foreach my $href (@hrefs) { print " $$href[1] ($$href[0]) $$href[2]\n"; }
}
- if (defined $download_version) {
- # set $newversion, $newfile matching $download_version if it is found in the web page
- my @vhrefs = grep { $$_[0] eq $download_version } @hrefs;
- if (@vhrefs) {
- ($newversion, $newfile) = @{$vhrefs[0]};
- print STDERR "$progname debug: Found remote URL matiching the requested version.\n" if $debug;
- } else {
- uscan_warn "$progname warning: In $watchfile no matching hrefs for version $download_version"
- . " in watch line\n $line\n";
- return 1;
- }
+ }
+ if (defined $download_version) {
+ my @vhrefs = grep { $$_[2] } @hrefs;
+ if (@vhrefs) {
+ ($newversion, $newfile, undef) = @{$vhrefs[0]};
} else {
- # set the newest $newversion, $newfile if $download_version is not defined
- @hrefs = Devscripts::Versort::upstream_versort(@hrefs);
- ($newversion, $newfile) = @{$hrefs[0]};
+ uscan_warn "$progname warning: In $watchfile no matching hrefs for version $download_version"
+ . " in watch line\n $line\n";
+ return 1;
}
} else {
- uscan_warn "$progname warning: In $watchfile,\n no matching hrefs for watch line\n $line\n";
- print STDERR "$progname debug: Picked URL matiching the newest version.\n" if $debug;
- return 1;
+ if (@hrefs) {
+ ($newversion, $newfile, undef) = @{$hrefs[0]};
+ } else {
+ uscan_warn "$progname warning: In $watchfile no matching files for watch line\n $line\n";
+ return 1;
+ }
}
} elsif ($site =~ m%^ftp://%) {
# FTP site
@@ -2592,7 +2611,13 @@ sub process_watchline ($$$$$$)
return 1;
}
}
- push @files, [$mangled_version, $file];
+ $match = '';
+ if (defined $download_version) {
+ if ($mangled_version eq $download_version) {
+ $match = "matched with the download version";
+ }
+ }
+ push @files, [$mangled_version, $file, $match];
}
} else {
# they all look like:
@@ -2612,34 +2637,39 @@ sub process_watchline ($$$$$$)
return 1;
}
}
- push @files, [$mangled_version, $file];
+ $match = '';
+ if (defined $download_version) {
+ if ($mangled_version eq $download_version) {
+ $match = "matched with the download version";
+ }
+ }
+ push @files, [$mangled_version, $file, $match];
}
}
}
-
if (@files) {
+ @files = Devscripts::Versort::upstream_versort(@files);
if ($verbose) {
- print "-- Found the following matching files:\n";
- foreach my $file (@files) { print " $$file[1] ($$file[0])\n"; }
+ print "-- Found the following matching files on the web page (newest first):\n";
+ foreach my $file (@files) { print " $$file[1] ($$file[0]) $$file[2]\n"; }
}
- if (defined $download_version) {
- # set $newversion, $newfile matching $download_version if it is found in the web page
- my @vfiles = grep { $$_[0] eq $download_version } @files;
- if (@vfiles) {
- ($newversion, $newfile) = @{$vfiles[0]};
- } else {
- uscan_warn "$progname warning: In $watchfile no matching files for version $download_version"
- . " in watch line\n $line\n";
- return 1;
- }
+ }
+ if (defined $download_version) {
+ my @vfiles = grep { $$_[2] } @files;
+ if (@vfiles) {
+ ($newversion, $newfile, undef) = @{$vfiles[0]};
} else {
- # set the newest $newversion, $newfile if $download_version is not defined
- @files = Devscripts::Versort::upstream_versort(@files);
- ($newversion, $newfile) = @{$files[0]};
+ uscan_warn "$progname warning: In $watchfile no matching files for version $download_version"
+ . " in watch line\n $line\n";
+ return 1;
}
} else {
- uscan_warn "$progname warning: In $watchfile no matching files for watch line\n $line\n";
- return 1;
+ if (@files) {
+ ($newversion, $newfile, undef) = @{$files[0]};
+ } else {
+ uscan_warn "$progname warning: In $watchfile no matching files for watch line\n $line\n";
+ return 1;
+ }
}
} else {
# Neither HTTP nor FTP
@@ -3234,7 +3264,9 @@ sub newest_dir ($$$$$) {
my ($site, $dir, $pattern, $optref, $watchfile) = @_;
my $base = $site.$dir;
my ($request, $response);
+ my $newdir;
+ print STDERR "$progname debug: download version requested: $download_version\n" if $debug and defined $download_version;
if ($site =~ m%^http(s)?://%) {
if (defined($1) and !$haveSSL) {
uscan_die "$progname: you must have the liblwp-protocol-https-perl package installed\nto use https URLs\n";
@@ -3262,28 +3294,48 @@ sub newest_dir ($$$$$) {
print STDERR "$progname debug: matching pattern $dirpattern\n"
if $debug;
my @hrefs;
+ my $match ='';
while ($content =~ m/<\s*a\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/gi) {
my $href = $2;
if ($href =~ m&^$dirpattern/?$&) {
my $mangled_version = join(".", map { $_ // '' } $href =~ m&^$dirpattern/?$&);
- push @hrefs, [$mangled_version, $href];
+ foreach my $pat (@{$$optref{'dirversionmangle'}}) {
+ print STDERR "$progname debug: Dirversionnmangle rule: $pat\n" if $debug;
+ if (! safe_replace(\$mangled_version, $pat)) {
+ uscan_warn "$progname: In $watchfile, potentially"
+ . " unsafe or malformed dirversionmangle"
+ . " pattern:\n '$pat'"
+ . " found.\n";
+ return 1;
+ }
+ }
+ $match = '';
+ if (defined $download_version) {
+ if ($mangled_version eq $download_version) {
+ $match = "matched with the download version";
+ }
+ }
+ push @hrefs, [$mangled_version, $href, $match];
}
}
+ my @vhrefs = grep { $$_[2] } @hrefs;
+ if (@vhrefs) {
+ $newdir = $vhrefs[0][1];
+ }
if (@hrefs) {
@hrefs = Devscripts::Versort::upstream_versort(@hrefs);
if ($debug) {
print "-- Found the following matching hrefs (newest first):\n";
- foreach my $href (@hrefs) { print " $$href[1] ($$href[0])\n"; }
+ foreach my $href (@hrefs) { print " $$href[1] ($$href[0]) $$href[2]\n"; }
}
- my $newdir = $hrefs[0][1];
- # just give the final directory component
- $newdir =~ s%/$%%;
- $newdir =~ s%^.*/%%;
- return $newdir;
+ $newdir //= $hrefs[0][1];
} else {
uscan_warn "$progname warning: In $watchfile,\n no matching hrefs for pattern\n $site$dir$pattern";
return '';
}
+ # just give the final directory component
+ $newdir =~ s%/$%%;
+ $newdir =~ s%^.*/%%;
}
elsif ($site =~ m%^ftp://%) {
# FTP site
@@ -3312,6 +3364,7 @@ sub newest_dir ($$$$$) {
# so we may have to look for <a href="filename"> type patterns
print STDERR "$progname debug: matching pattern $pattern\n" if $debug;
my (@dirs);
+ my $match ='';
# We separate out HTMLised listings from standard listings, so
# that we can target our search correctly
@@ -3320,7 +3373,23 @@ sub newest_dir ($$$$$) {
m/(?:<\s*a\s+[^>]*href\s*=\s*\")((?-i)$pattern)\"/gi) {
my $dir = $1;
my $mangled_version = join(".", $dir =~ m/^$pattern$/);
- push @dirs, [$mangled_version, $dir];
+ foreach my $pat (@{$$optref{'dirversionmangle'}}) {
+ print STDERR "$progname debug: Dirversionnmangle rule: $pat\n" if $debug;
+ if (! safe_replace(\$mangled_version, $pat)) {
+ uscan_warn "$progname: In $watchfile, potentially"
+ . " unsafe or malformed dirversionmangle"
+ . " pattern:\n '$pat'"
+ . " found.\n";
+ return 1;
+ }
+ }
+ $match = '';
+ if (defined $download_version) {
+ if ($mangled_version eq $download_version) {
+ $match = "matched with the download version";
+ }
+ }
+ push @dirs, [$mangled_version, $dir, $match];
}
} else {
# they all look like:
@@ -3329,27 +3398,48 @@ sub newest_dir ($$$$$) {
if ($ln =~ m/($pattern)(\s+->\s+\S+)?$/) {
my $dir = $1;
my $mangled_version = join(".", $dir =~ m/^$pattern$/);
- push @dirs, [$mangled_version, $dir];
+ foreach my $pat (@{$$optref{'dirversionmangle'}}) {
+ print STDERR "$progname debug: Dirversionnmangle rule: $pat\n" if $debug;
+ if (! safe_replace(\$mangled_version, $pat)) {
+ uscan_warn "$progname: In $watchfile, potentially"
+ . " unsafe or malformed dirversionmangle"
+ . " pattern:\n '$pat'"
+ . " found.\n";
+ return 1;
+ }
+ }
+ $match = '';
+ if (defined $download_version) {
+ if ($mangled_version eq $download_version) {
+ $match = "matched with the download version";
+ }
+ }
+ push @dirs, [$mangled_version, $dir, $match];
}
}
}
+ my @vdirs = grep { $$_[2] } @dirs;
+ if (@vdirs) {
+ $newdir = $vdirs[0][1];
+ }
if (@dirs) {
+ @dirs = Devscripts::Versort::upstream_versort(@dirs);
if ($debug) {
- print STDERR "-- Found the following matching dirs:\n";
- foreach my $dir (@dirs) { print STDERR " $$dir[1]\n"; }
+ print STDERR "-- Found the following matching FTP dirs (newest first):\n";
+ foreach my $dir (@dirs) { print STDERR " $$dir[1] ($$dir[0]) $$dir[2]\n"; }
}
- @dirs = Devscripts::Versort::upstream_versort(@dirs);
- my ($newversion, $newdir) = @{$dirs[0]};
- return $newdir;
+ $newdir //= $dirs[0][1];
} else {
uscan_warn "$progname warning: In $watchfile no matching dirs for pattern\n $base$pattern\n";
- return '';
+ $newdir = '';
}
}
else {
# Neither HTTP nor FTP site
- return 1;
+ uscan_warn "$progname: neither HTTP nor FTP site, impossible case for newdir().\n";
+ $newdir = '';
}
+ return $newdir;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/collab-maint/devscripts.git
More information about the devscripts-devel
mailing list