[devscripts] 04/05: uscan: Use URI & URI::QueryParam to parse URIs

James McCoy jamessan at debian.org
Tue Jun 9 02:35:04 UTC 2015


This is an automated email from the git hooks/post-receive script.

jamessan pushed a commit to branch master
in repository devscripts.

commit 74d3eb2ed5ad108d122134ff84d00c2c52b238bd
Author: James McCoy <jamessan at debian.org>
Date:   Mon Jun 8 22:23:14 2015 -0400

    uscan: Use URI & URI::QueryParam to parse URIs
    
    The ordering of the query parameters in debbug's URLs isn't stable any
    more (and we shouldn't have relied on that anyway).  This broke the
    manual parsing of web pages when trying to cache parts of a bug.
    
    Using URI to programatically extract the parameters simplifies the code
    and avoids the issue of parameter ordering.
    
    Closes: #786706
    Signed-off-by: James McCoy <jamessan at debian.org>
---
 README           |   3 +-
 debian/changelog |   3 ++
 debian/control   |   4 +-
 scripts/bts.pl   | 114 +++++++++++++++++++++++++++++++------------------------
 4 files changed, 71 insertions(+), 53 deletions(-)

diff --git a/README b/README
index e024fa8..4537ed8 100644
--- a/README
+++ b/README
@@ -27,7 +27,8 @@ And now, in mostly alphabetical order, the scripts:
 - bts: A command-line tool for accessing the BTS, both to
   send mails to control at bts.debian.org and to access the web pages and
   SOAP interface of the BTS. [www-browser, libauthen-sasl-perl,
-  libnet-smtp-ssl-perl, libsoap-lite-perl, libwww-perl, bsd-mailx | mailx]
+  libnet-smtp-ssl-perl, libsoap-lite-perl, liburi-perl, libwww-perl,
+  bsd-mailx | mailx]
 
 - build-rdeps: Searches for all packages that build-depend on a given package
   [dctrl-tools]
diff --git a/debian/changelog b/debian/changelog
index 314ea96..6a85252 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -27,6 +27,9 @@ devscripts (2.15.5) UNRELEASED; urgency=medium
     (Closes: #659330)
   * bts:
     + Use https to talk to bugs.debian.org
+    + Use URI & URI::QueryParam to parse URIs rather than regexps.  This fixes
+      issues with not detecting BTS URLs as valid due to ordering of query
+      parameters.  (Closes: #786706)
   * debuild: Recognize -jauto as a valid option. Based on a patch by Reiner
     Herrmann.  (Closes: #787276)
   * uscan: Fix Github example in man page.  (Closes: #757194)
diff --git a/debian/control b/debian/control
index 2a6b0df..f0b1d07 100644
--- a/debian/control
+++ b/debian/control
@@ -96,8 +96,8 @@ Description: scripts to make the life of a Debian Package maintainer easier
     E for stderr) for every line of output
   - archpath: print tla/Bazaar package names [tla | bazaar]
   - bts: a command-line tool for manipulating the BTS [www-browser,
-    libauthen-sasl-perl, libnet-smtp-ssl-perl, libsoap-lite-perl, libwww-perl,
-    bsd-mailx | mailx]
+    libauthen-sasl-perl, libnet-smtp-ssl-perl, libsoap-lite-perl, liburi-perl,
+    libwww-perl, bsd-mailx | mailx]
   - build-rdeps: search for all packages that build-depend on a given package
     [dctrl-tools]
   - chdist: tool to easily play with several distributions [dctrl-tools]
diff --git a/scripts/bts.pl b/scripts/bts.pl
index 8a3a34a..e7d9e68 100755
--- a/scripts/bts.pl
+++ b/scripts/bts.pl
@@ -42,7 +42,7 @@ bts - developers' command line interface to the BTS
 
 =cut
 
-use 5.006_000;
+use 5.010; # for defined-or
 use strict;
 use warnings;
 use File::Basename;
@@ -59,6 +59,9 @@ use Devscripts::Debbugs;
 use Fcntl qw(O_RDWR O_RDONLY O_CREAT F_SETFD);
 use Getopt::Long;
 use Encode;
+# Need support for ; as query param separator
+use URI 1.37;
+use URI::QueryParam;
 
 use Scalar::Util qw(looks_like_number);
 use POSIX qw(locale_h strftime);
@@ -3476,25 +3479,26 @@ sub href_to_filename {
     my $href = $_[0];
     my ($msg, $filename);
 
-    if ($href =~ m%\[<a(?: class=\".*?\")? href="(?:/cgi-bin/)?bugreport\.cgi([^\?]*)\?([^\"]*);bug=(\d+)">.*?\(([^,]*), .*?\)\]%) {
+    if ($href =~ m%\[<a(?: class=\".*?\")? href="((?:/cgi-bin/)?bugreport\.cgi([^\?]*)\?[^\"]*)">.*?\(([^,]*), .*?\)\]%) {
 	# this looks like an attachment; $4 should give the MIME-type
-	my $urlfilename = $1;
-	my $ref = $2;
-	my $bug = $3;
-	my $mimetype = $4;
+	my $uri = URI->new($1);
+	my $urlfilename = $2;
+	my $bug = $uri->query_param_delete('bug');
+	my $mimetype = $3;
+
+	my $ref = $uri->query();
 	$ref =~ s/&(?:amp;)?/;/g;  # normalise all hrefs
+	$uri->query($ref);
 
-	return undef unless $ref =~ /msg=(\d+);(filename=[^;]*;)?att=(\d+)/;
-	$msg = "$1-$3";
-	$urlfilename ||= "$2" if defined $2;
-	$urlfilename ||= "";
+	$msg = $uri->query_param('msg');
+	my $att = $uri->query_param('att');
+	return undef unless $msg && $att;
+	$msg .= "-$att";
+	$urlfilename ||= $att // '';
 
 	my $fileext = '';
 	if ($urlfilename =~ m%^/%) {
 	    $filename = basename($urlfilename);
-	} elsif ($urlfilename =~ m%^filename=([^;]*?);%) {
-	    $urlfilename = $1;
-	    $filename = basename($urlfilename);
 	} else {
 	    $filename = '';
 	    if ($mimetype eq 'text/plain') { $fileext = '.txt'; }
@@ -3506,36 +3510,45 @@ sub href_to_filename {
 	    $filename = "$bug/$msg$fileext";
 	}
     }
-    elsif ($href =~ m%<a(?: class=\".*?\")? href="(?:/cgi-bin/)?bugreport\.cgi([^\?]*)\?([^"]*);?bug=(\d+)(.*?)".*?>%) {
-	my $urlfilename = $1;
-	my $ref = $2;
-	my $bug = $3;
-	$ref .= $4 if defined $4;
+    elsif ($href =~ m%<a(?: class=\".*?\")? href="((?:/cgi-bin/)?bugreport\.cgi([^\?]*)\?([^"]*))".*?>%) {
+	my $uri = URI->new($1);
+	my $urlfilename = $2;
+	my $bug = $uri->query_param_delete('bug');
+	$msg = $uri->query_param_delete('msg');
+
+	my $ref = $uri->query // '';
 	$ref =~ s/&(?:amp;)?/;/g;  # normalise all hrefs
 	$ref =~ s/;archive=(yes|no)\b//;
 	$ref =~ s/%3D/=/g;
+	$uri->query($ref);
 
-	if ($ref =~ /msg=(\d+);$/) {
-	    $msg = $1;
-	    $filename = "$bug/$1.html";
-	}
-	elsif ($ref =~ /msg=(\d+);mbox=yes;$/) {
-	    $msg = "$1-mbox";
-	    $filename = "$bug/$1.mbox";
-	}
-	elsif ($ref =~ /^mbox=yes;$/) {
-	    $msg = 'rawmbox';
-	    $filename = "$bug.raw.mbox";
+	my %params = (
+	    mboxstatus => '', mboxstat => '', mboxmaint => '', mbox => '',
+	    $uri->query_form(),
+	);
+
+	if ($msg && !%params) {
+	    $filename = File::Spec->catfile($bug, "$msg.html");
 	}
-	elsif ($ref =~ /mboxstat(us)?=yes/) {
+	elsif (($params{mboxstat} || $params{mboxstatus}) eq 'yes') {
 	    $msg = 'statusmbox';
 	    $filename = "$bug.status.mbox";
 	}
-	elsif ($ref =~ /mboxmaint=yes/) {
+	elsif ($params{mboxmaint} eq 'yes') {
 	    $msg = 'mbox';
 	    $filename = "$bug.mbox";
 	}
-	elsif ($ref eq '') {
+	elsif ($params{mbox} eq 'yes') {
+	    if ($msg) {
+		$filename = "$bug/$msg.mbox";
+		$msg .= '-mbox';
+	    }
+	    else {
+		$filename = "$bug.raw.mbox";
+		$msg = 'rawmbox';
+	    }
+	}
+	elsif (!$ref) {
 	    return undef;
 	}
 	else {
@@ -3544,29 +3557,30 @@ sub href_to_filename {
 	    return undef;
 	}
     }
-    elsif ($href =~ m%<a[^>]* href=\"(?:/cgi-bin/)?version\.cgi([^>]+><img[^>]* src=\"(?:/cgi-bin/)?version\.cgi)?\?([^\"]+)\">%i) {
-	my $refs = $2;
-	$refs = $1 if not defined $refs;
+    elsif ($href =~ m%<(?:a[^>]* href|img [^>]* src)="((?:/cgi-bin/)?version\.cgi\?[^"]+)"[^>]*>%i) {
+	my $uri = URI->new($1);
+	my %params = $uri->query_form();
+
+	if ($params{package}) {
+	    $filename .= $params{package};
+	}
+	if ($params{found}) {
+	    $filename .= ".f.$params{found}";
+	}
+	if ($params{fixed}) {
+	    $filename .= ".fx.$params{fixed}";
+	}
+	if ($params{collapse}) {
+	    $filename .= '.co';
+	}
 
-	# Remove package= and make sure the package name is at the
-	# start of the filename
-	$refs =~ s/(.*?)package=(.*?)(;.*?|)$/$2;$1$3/;
-	# Package versions
-	$refs =~ s/;found=/.f./g;
-	$refs =~ s/;fixed=/.fx./g;
 	# Replace encoded "/" and "," characters with "."
-	$refs =~ s/%2[FC]/./g;
+	$filename =~ s@(?:%2[FC]|/|,)@. at gi;
 	# Remove encoded spaces
-	$refs =~ s/\+//g;
-	# Is this a "collapsed" graph?
-	$refs =~ s/;collapse=1(.*)/$1.co/;
-	# Remove any other parameters
-	$refs =~ s/(^|;)(\w+)=\d+//g;
-	# and tidy up any remaining separators
-	$refs =~ s/;//g;
+	$filename =~ s/\+//g;
 
 	$msg = 'versions';
-	$filename = "$refs.png";
+	$filename .= '.png';
     }
     else {
 	return undef;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/collab-maint/devscripts.git



More information about the devscripts-devel mailing list