[devscripts] 01/12: uscan: new pagemangle rule
Osamu Aoki
osamu at moszumanska.debian.org
Wed Sep 23 16:43:29 UTC 2015
This is an automated email from the git hooks/post-receive script.
osamu pushed a commit to branch multitar
in repository devscripts.
commit b84cd0a49be5e336ec54c412e6ac1e5e95ab6f43
Author: Osamu Aoki <osamu at debian.org>
Date: Wed Sep 23 20:23:03 2015 +0900
uscan: new pagemangle rule
This kicks off the series of commits towards the new version=4
on the "multitar" (multi-upstream-tarballs) branch.
The version bump from 3 to 4 will happen at the end.
* generic way to mangle the whole web page.
* address needs for fullsourcemangle. Closes: #395439
* text in <a>...</a> is a special case. Closes: #705989
* s/data-realurl/href/g is a special case. Closes: #773390
* s3.amazonaws.com special case code is marked deprecated.
* add test case for amazonaws like page
---
scripts/uscan.pl | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/scripts/uscan.pl b/scripts/uscan.pl
index 89ee442..411f467 100755
--- a/scripts/uscan.pl
+++ b/scripts/uscan.pl
@@ -828,6 +828,9 @@ sub process_watchline ($$$$$$)
@{$options{'uversionmangle'}} = split /;/, $1;
@{$options{'dversionmangle'}} = split /;/, $1;
}
+ elsif ($opt =~ /^\s*pagemangle\s*=\s*(.+?)\s*$/) {
+ @{$options{'pagemangle'}} = split /;/, $1;
+ }
elsif ($opt =~ /^\s*filenamemangle\s*=\s*(.+?)\s*$/) {
@{$options{'filenamemangle'}} = split /;/, $1;
}
@@ -1027,10 +1030,24 @@ sub process_watchline ($$$$$$)
print STDERR "$progname debug: received content:\n$content\n[End of received content]\n"
if $debug;
+ # pagenmangle: should not abuse this slow operation
+ foreach my $pat (@{$options{'pagemangle'}}) {
+ print STDERR "$progname debug: pagemangle rule $pat\n" if $debug;
+ if (! safe_replace(\$content, $pat)) {
+ uscan_warn "$progname: In $watchfile, potentially"
+ . " unsafe or malformed pagemangle"
+ . " pattern:\n '$pat'"
+ . " found. Skipping watchline\n"
+ . " $line\n";
+ return 1;
+ }
+ }
if ($content =~ m%^<[?]xml%i &&
- $content =~ m%xmlns="http://s3.amazonaws.com/doc/2006-03-01/"%) {
+ $content =~ m%xmlns="http://s3.amazonaws.com/doc/2006-03-01/"% &&
+ $content !~ m%<Key><a\s+href%) {
# this is an S3 bucket listing. Insert an 'a href' tag
# into the content for each 'Key', so that it looks like html (LP: #798293)
+ uscan_warn "*** Amazon special case code is deprecated***\nUse opts=pagemangle rule, instead\n";
print STDERR "$progname debug: fixing s3 listing\n" if $debug;
$content =~ s%<Key>([^<]*)</Key>%<Key><a href="$1">$1</a></Key>%g
}
@@ -1051,6 +1068,10 @@ sub process_watchline ($$$$$$)
($urlbase = $base) =~ s%/[^/]*$%/%;
}
+ print STDERR "$progname debug: pagemangled content:\n$content\n[End of pagemangled content]\n"
+ if $debug;
+
+ # search hrefs in web page to obtain a list of uversionmangled version and matching download URL
print STDERR "$progname debug: matching pattern(s) @patterns\n" if $debug;
my @hrefs;
while ($content =~ m/<\s*a\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/sgi) {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/collab-maint/devscripts.git
More information about the devscripts-devel
mailing list