[devscripts] 01/12: uscan: new pagemangle rule

Osamu Aoki osamu at moszumanska.debian.org
Wed Sep 23 16:43:29 UTC 2015


This is an automated email from the git hooks/post-receive script.

osamu pushed a commit to branch multitar
in repository devscripts.

commit b84cd0a49be5e336ec54c412e6ac1e5e95ab6f43
Author: Osamu Aoki <osamu at debian.org>
Date:   Wed Sep 23 20:23:03 2015 +0900

    uscan: new pagemangle rule
    
        This kicks off the series of commits towards the new version=4
        on the "multitar" (multi-upstream-tarballs) branch.
    
        The version bump from 3 to 4 will happen at the end.
    
         * generic way to mangle the whole web page.
           * address needs for fullsourcemangle.  Closes: #395439
           * text in <a>...</a> is a special case.  Closes: #705989
           * s/data-realurl/href/g is a special case.  Closes: #773390
         * s3.amazonaws.com special case code is marked deprecated.
         * add test case for amazonaws like page
---
 scripts/uscan.pl | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/scripts/uscan.pl b/scripts/uscan.pl
index 89ee442..411f467 100755
--- a/scripts/uscan.pl
+++ b/scripts/uscan.pl
@@ -828,6 +828,9 @@ sub process_watchline ($$$$$$)
 		    @{$options{'uversionmangle'}} = split /;/, $1;
 		    @{$options{'dversionmangle'}} = split /;/, $1;
 		}
+		elsif ($opt =~ /^\s*pagemangle\s*=\s*(.+?)\s*$/) {
+		    @{$options{'pagemangle'}} = split /;/, $1;
+		}
 		elsif ($opt =~ /^\s*filenamemangle\s*=\s*(.+?)\s*$/) {
 		    @{$options{'filenamemangle'}} = split /;/, $1;
 		}
@@ -1027,10 +1030,24 @@ sub process_watchline ($$$$$$)
 	print STDERR "$progname debug: received content:\n$content\n[End of received content]\n"
 	    if $debug;
 
+	# pagenmangle: should not abuse this slow operation
+	foreach my $pat (@{$options{'pagemangle'}}) {
+	    print STDERR "$progname debug: pagemangle rule $pat\n" if $debug;
+	    if (! safe_replace(\$content, $pat)) {
+		uscan_warn "$progname: In $watchfile, potentially"
+		  . " unsafe or malformed pagemangle"
+		  . " pattern:\n  '$pat'"
+		  . " found. Skipping watchline\n"
+		  . "  $line\n";
+		return 1;
+	    }
+	}
 	if ($content =~ m%^<[?]xml%i &&
-	    $content =~ m%xmlns="http://s3.amazonaws.com/doc/2006-03-01/"%) {
+	    $content =~ m%xmlns="http://s3.amazonaws.com/doc/2006-03-01/"% &&
+	    $content !~ m%<Key><a\s+href%) {
 	    # this is an S3 bucket listing.  Insert an 'a href' tag
 	    # into the content for each 'Key', so that it looks like html (LP: #798293)
+	    uscan_warn "*** Amazon special case code is deprecated***\nUse opts=pagemangle rule, instead\n";
 	    print STDERR "$progname debug: fixing s3 listing\n" if $debug;
 	    $content =~ s%<Key>([^<]*)</Key>%<Key><a href="$1">$1</a></Key>%g
 	}
@@ -1051,6 +1068,10 @@ sub process_watchline ($$$$$$)
 	    ($urlbase = $base) =~ s%/[^/]*$%/%;
 	}
 
+	print STDERR "$progname debug: pagemangled content:\n$content\n[End of pagemangled content]\n"
+	    if $debug;
+
+	# search hrefs in web page to obtain a list of uversionmangled version and matching download URL
 	print STDERR "$progname debug: matching pattern(s) @patterns\n" if $debug;
 	my @hrefs;
 	while ($content =~ m/<\s*a\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/sgi) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/collab-maint/devscripts.git



More information about the devscripts-devel mailing list