[devscripts] 04/05: uscan: Use URI & URI::QueryParam to parse URIs
James McCoy
jamessan at debian.org
Tue Jun 9 02:35:04 UTC 2015
This is an automated email from the git hooks/post-receive script.
jamessan pushed a commit to branch master
in repository devscripts.
commit 74d3eb2ed5ad108d122134ff84d00c2c52b238bd
Author: James McCoy <jamessan at debian.org>
Date: Mon Jun 8 22:23:14 2015 -0400
uscan: Use URI & URI::QueryParam to parse URIs
The ordering of the query parameters in debbug's URLs isn't stable any
more (and we shouldn't have relied on that anyway). This broke the
manual parsing of web pages when trying to cache parts of a bug.
Using URI to programatically extract the parameters simplifies the code
and avoids the issue of parameter ordering.
Closes: #786706
Signed-off-by: James McCoy <jamessan at debian.org>
---
README | 3 +-
debian/changelog | 3 ++
debian/control | 4 +-
scripts/bts.pl | 114 +++++++++++++++++++++++++++++++------------------------
4 files changed, 71 insertions(+), 53 deletions(-)
diff --git a/README b/README
index e024fa8..4537ed8 100644
--- a/README
+++ b/README
@@ -27,7 +27,8 @@ And now, in mostly alphabetical order, the scripts:
- bts: A command-line tool for accessing the BTS, both to
send mails to control at bts.debian.org and to access the web pages and
SOAP interface of the BTS. [www-browser, libauthen-sasl-perl,
- libnet-smtp-ssl-perl, libsoap-lite-perl, libwww-perl, bsd-mailx | mailx]
+ libnet-smtp-ssl-perl, libsoap-lite-perl, liburi-perl, libwww-perl,
+ bsd-mailx | mailx]
- build-rdeps: Searches for all packages that build-depend on a given package
[dctrl-tools]
diff --git a/debian/changelog b/debian/changelog
index 314ea96..6a85252 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -27,6 +27,9 @@ devscripts (2.15.5) UNRELEASED; urgency=medium
(Closes: #659330)
* bts:
+ Use https to talk to bugs.debian.org
+ + Use URI & URI::QueryParam to parse URIs rather than regexps. This fixes
+ issues with not detecting BTS URLs as valid due to ordering of query
+ parameters. (Closes: #786706)
* debuild: Recognize -jauto as a valid option. Based on a patch by Reiner
Herrmann. (Closes: #787276)
* uscan: Fix Github example in man page. (Closes: #757194)
diff --git a/debian/control b/debian/control
index 2a6b0df..f0b1d07 100644
--- a/debian/control
+++ b/debian/control
@@ -96,8 +96,8 @@ Description: scripts to make the life of a Debian Package maintainer easier
E for stderr) for every line of output
- archpath: print tla/Bazaar package names [tla | bazaar]
- bts: a command-line tool for manipulating the BTS [www-browser,
- libauthen-sasl-perl, libnet-smtp-ssl-perl, libsoap-lite-perl, libwww-perl,
- bsd-mailx | mailx]
+ libauthen-sasl-perl, libnet-smtp-ssl-perl, libsoap-lite-perl, liburi-perl,
+ libwww-perl, bsd-mailx | mailx]
- build-rdeps: search for all packages that build-depend on a given package
[dctrl-tools]
- chdist: tool to easily play with several distributions [dctrl-tools]
diff --git a/scripts/bts.pl b/scripts/bts.pl
index 8a3a34a..e7d9e68 100755
--- a/scripts/bts.pl
+++ b/scripts/bts.pl
@@ -42,7 +42,7 @@ bts - developers' command line interface to the BTS
=cut
-use 5.006_000;
+use 5.010; # for defined-or
use strict;
use warnings;
use File::Basename;
@@ -59,6 +59,9 @@ use Devscripts::Debbugs;
use Fcntl qw(O_RDWR O_RDONLY O_CREAT F_SETFD);
use Getopt::Long;
use Encode;
+# Need support for ; as query param separator
+use URI 1.37;
+use URI::QueryParam;
use Scalar::Util qw(looks_like_number);
use POSIX qw(locale_h strftime);
@@ -3476,25 +3479,26 @@ sub href_to_filename {
my $href = $_[0];
my ($msg, $filename);
- if ($href =~ m%\[<a(?: class=\".*?\")? href="(?:/cgi-bin/)?bugreport\.cgi([^\?]*)\?([^\"]*);bug=(\d+)">.*?\(([^,]*), .*?\)\]%) {
+ if ($href =~ m%\[<a(?: class=\".*?\")? href="((?:/cgi-bin/)?bugreport\.cgi([^\?]*)\?[^\"]*)">.*?\(([^,]*), .*?\)\]%) {
# this looks like an attachment; $4 should give the MIME-type
- my $urlfilename = $1;
- my $ref = $2;
- my $bug = $3;
- my $mimetype = $4;
+ my $uri = URI->new($1);
+ my $urlfilename = $2;
+ my $bug = $uri->query_param_delete('bug');
+ my $mimetype = $3;
+
+ my $ref = $uri->query();
$ref =~ s/&(?:amp;)?/;/g; # normalise all hrefs
+ $uri->query($ref);
- return undef unless $ref =~ /msg=(\d+);(filename=[^;]*;)?att=(\d+)/;
- $msg = "$1-$3";
- $urlfilename ||= "$2" if defined $2;
- $urlfilename ||= "";
+ $msg = $uri->query_param('msg');
+ my $att = $uri->query_param('att');
+ return undef unless $msg && $att;
+ $msg .= "-$att";
+ $urlfilename ||= $att // '';
my $fileext = '';
if ($urlfilename =~ m%^/%) {
$filename = basename($urlfilename);
- } elsif ($urlfilename =~ m%^filename=([^;]*?);%) {
- $urlfilename = $1;
- $filename = basename($urlfilename);
} else {
$filename = '';
if ($mimetype eq 'text/plain') { $fileext = '.txt'; }
@@ -3506,36 +3510,45 @@ sub href_to_filename {
$filename = "$bug/$msg$fileext";
}
}
- elsif ($href =~ m%<a(?: class=\".*?\")? href="(?:/cgi-bin/)?bugreport\.cgi([^\?]*)\?([^"]*);?bug=(\d+)(.*?)".*?>%) {
- my $urlfilename = $1;
- my $ref = $2;
- my $bug = $3;
- $ref .= $4 if defined $4;
+ elsif ($href =~ m%<a(?: class=\".*?\")? href="((?:/cgi-bin/)?bugreport\.cgi([^\?]*)\?([^"]*))".*?>%) {
+ my $uri = URI->new($1);
+ my $urlfilename = $2;
+ my $bug = $uri->query_param_delete('bug');
+ $msg = $uri->query_param_delete('msg');
+
+ my $ref = $uri->query // '';
$ref =~ s/&(?:amp;)?/;/g; # normalise all hrefs
$ref =~ s/;archive=(yes|no)\b//;
$ref =~ s/%3D/=/g;
+ $uri->query($ref);
- if ($ref =~ /msg=(\d+);$/) {
- $msg = $1;
- $filename = "$bug/$1.html";
- }
- elsif ($ref =~ /msg=(\d+);mbox=yes;$/) {
- $msg = "$1-mbox";
- $filename = "$bug/$1.mbox";
- }
- elsif ($ref =~ /^mbox=yes;$/) {
- $msg = 'rawmbox';
- $filename = "$bug.raw.mbox";
+ my %params = (
+ mboxstatus => '', mboxstat => '', mboxmaint => '', mbox => '',
+ $uri->query_form(),
+ );
+
+ if ($msg && !%params) {
+ $filename = File::Spec->catfile($bug, "$msg.html");
}
- elsif ($ref =~ /mboxstat(us)?=yes/) {
+ elsif (($params{mboxstat} || $params{mboxstatus}) eq 'yes') {
$msg = 'statusmbox';
$filename = "$bug.status.mbox";
}
- elsif ($ref =~ /mboxmaint=yes/) {
+ elsif ($params{mboxmaint} eq 'yes') {
$msg = 'mbox';
$filename = "$bug.mbox";
}
- elsif ($ref eq '') {
+ elsif ($params{mbox} eq 'yes') {
+ if ($msg) {
+ $filename = "$bug/$msg.mbox";
+ $msg .= '-mbox';
+ }
+ else {
+ $filename = "$bug.raw.mbox";
+ $msg = 'rawmbox';
+ }
+ }
+ elsif (!$ref) {
return undef;
}
else {
@@ -3544,29 +3557,30 @@ sub href_to_filename {
return undef;
}
}
- elsif ($href =~ m%<a[^>]* href=\"(?:/cgi-bin/)?version\.cgi([^>]+><img[^>]* src=\"(?:/cgi-bin/)?version\.cgi)?\?([^\"]+)\">%i) {
- my $refs = $2;
- $refs = $1 if not defined $refs;
+ elsif ($href =~ m%<(?:a[^>]* href|img [^>]* src)="((?:/cgi-bin/)?version\.cgi\?[^"]+)"[^>]*>%i) {
+ my $uri = URI->new($1);
+ my %params = $uri->query_form();
+
+ if ($params{package}) {
+ $filename .= $params{package};
+ }
+ if ($params{found}) {
+ $filename .= ".f.$params{found}";
+ }
+ if ($params{fixed}) {
+ $filename .= ".fx.$params{fixed}";
+ }
+ if ($params{collapse}) {
+ $filename .= '.co';
+ }
- # Remove package= and make sure the package name is at the
- # start of the filename
- $refs =~ s/(.*?)package=(.*?)(;.*?|)$/$2;$1$3/;
- # Package versions
- $refs =~ s/;found=/.f./g;
- $refs =~ s/;fixed=/.fx./g;
# Replace encoded "/" and "," characters with "."
- $refs =~ s/%2[FC]/./g;
+ $filename =~ s@(?:%2[FC]|/|,)@. at gi;
# Remove encoded spaces
- $refs =~ s/\+//g;
- # Is this a "collapsed" graph?
- $refs =~ s/;collapse=1(.*)/$1.co/;
- # Remove any other parameters
- $refs =~ s/(^|;)(\w+)=\d+//g;
- # and tidy up any remaining separators
- $refs =~ s/;//g;
+ $filename =~ s/\+//g;
$msg = 'versions';
- $filename = "$refs.png";
+ $filename .= '.png';
}
else {
return undef;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/collab-maint/devscripts.git
More information about the devscripts-devel
mailing list