r46253 - in /trunk/libhtml-parser-perl: Changes META.yml Parser.pm debian/changelog debian/control debian/copyright lib/HTML/Entities.pm t/entities.t util.c
jawnsy-guest at users.alioth.debian.org
jawnsy-guest at users.alioth.debian.org
Fri Oct 23 14:15:16 UTC 2009
Author: jawnsy-guest
Date: Fri Oct 23 14:15:10 2009
New Revision: 46253
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=46253
Log:
* New upstream release
* Standards-Version 3.8.3 (drop perl version dependency)
* Add myself to Uploaders and Copyright
* Rewrote control description and harmonized the layout
Modified:
trunk/libhtml-parser-perl/Changes
trunk/libhtml-parser-perl/META.yml
trunk/libhtml-parser-perl/Parser.pm
trunk/libhtml-parser-perl/debian/changelog
trunk/libhtml-parser-perl/debian/control
trunk/libhtml-parser-perl/debian/copyright
trunk/libhtml-parser-perl/lib/HTML/Entities.pm
trunk/libhtml-parser-perl/t/entities.t
trunk/libhtml-parser-perl/util.c
Modified: trunk/libhtml-parser-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/Changes?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/Changes (original)
+++ trunk/libhtml-parser-perl/Changes Fri Oct 23 14:15:10 2009
@@ -1,3 +1,12 @@
+_______________________________________________________________________________
+2009-10-22 Release 3.63
+
+Gisle Aas (2):
+ Take more care to prepare the char range for encode_entities [RT#50170]
+ decode_entities confused by trailing incomplete entity
+
+
+
_______________________________________________________________________________
2009-08-13 Release 3.62
Modified: trunk/libhtml-parser-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/META.yml?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/META.yml (original)
+++ trunk/libhtml-parser-perl/META.yml Fri Oct 23 14:15:10 2009
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: HTML-Parser
-version: 3.62
+version: 3.63
abstract: HTML parser class
author:
- Gisle Aas <gisle at activestate.com>
@@ -9,7 +9,8 @@
configure_requires:
ExtUtils::MakeMaker: 0
build_requires:
- Test::More: 0
+ ExtUtils::MakeMaker: 0
+ Test::More: 0
requires:
HTML::Tagset: 3
perl: 5.006
@@ -21,7 +22,7 @@
directory:
- t
- inc
-generated_by: ExtUtils::MakeMaker version 6.4801
+generated_by: ExtUtils::MakeMaker version 6.55_02
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: 1.4
Modified: trunk/libhtml-parser-perl/Parser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/Parser.pm?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/Parser.pm (original)
+++ trunk/libhtml-parser-perl/Parser.pm Fri Oct 23 14:15:10 2009
@@ -9,7 +9,7 @@
use strict;
use vars qw($VERSION @ISA);
-$VERSION = "3.62";
+$VERSION = "3.63";
require HTML::Entities;
Modified: trunk/libhtml-parser-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/debian/changelog?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/debian/changelog (original)
+++ trunk/libhtml-parser-perl/debian/changelog Fri Oct 23 14:15:10 2009
@@ -1,3 +1,12 @@
+libhtml-parser-perl (3.63-1) UNRELEASED; urgency=low
+
+ * New upstream release
+ * Standards-Version 3.8.3 (drop perl version dependency)
+ * Add myself to Uploaders and Copyright
+ * Rewrote control description and harmonized the layout
+
+ -- Jonathan Yu <jawnsy at cpan.org> Fri, 23 Oct 2009 06:38:01 -0400
+
libhtml-parser-perl (3.62-1) unstable; urgency=low
[ Salvatore Bonaccorso ]
Modified: trunk/libhtml-parser-perl/debian/control
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/debian/control?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/debian/control (original)
+++ trunk/libhtml-parser-perl/debian/control Fri Oct 23 14:15:10 2009
@@ -1,28 +1,34 @@
Source: libhtml-parser-perl
+Section: perl
+Priority: optional
+Build-Depends: perl, debhelper (>= 7.0.8), quilt (>= 0.46-7), liburi-perl,
+ libhtml-tagset-perl, libtest-pod-perl
Maintainer: Debian Perl Group <pkg-perl-maintainers at lists.alioth.debian.org>
Uploaders: Krzysztof Krzyzaniak (eloy) <eloy at debian.org>,
Kenneth J. Pronovici <pronovic at debian.org>,
Damyan Ivanov <dmn at debian.org>,
Antonio Radici <antonio at dyne.org>,
- Rene Mayorga <rmayorga at debian.org>,
+ Rene Mayorga <rmayorga at debian.org>, Jonathan Yu <jawnsy at cpan.org>,
Salvatore Bonaccorso <salvatore.bonaccorso at gmail.com>
-Section: perl
-Priority: optional
-Build-Depends: debhelper (>= 7.0.8), perl (>= 5.8.1), libhtml-tagset-perl,
- libtest-pod-perl, liburi-perl, quilt (>= 0.46-7)
-Standards-Version: 3.8.2
+Standards-Version: 3.8.3
Homepage: http://search.cpan.org/dist/HTML-Parser/
Vcs-Svn: svn://svn.debian.org/pkg-perl/trunk/libhtml-parser-perl/
Vcs-Browser: http://svn.debian.org/viewsvn/pkg-perl/trunk/libhtml-parser-perl/
Package: libhtml-parser-perl
Architecture: any
-Depends: ${misc:Depends}, ${perl:Depends}, libhtml-tagset-perl, ${shlibs:Depends}, liburi-perl
+Depends: ${misc:Depends}, ${perl:Depends}, ${shlibs:Depends}, liburi-perl,
+ libhtml-tagset-perl
Suggests: libdata-dump-perl
Enhances: libwww-perl
-Replaces: libwww-perl (<<5.36-0)
-Conflicts: libwww-perl (<<5.36-0), libhtml-tree-perl (<<0.61-0)
+Replaces: libwww-perl (<< 5.36-0)
+Conflicts: libwww-perl (<< 5.36-0), libhtml-tree-perl (<< 0.61-0)
Description: collection of modules that parse HTML text documents
- HTML::Tagset is a collection of modules that parse HTML text documents.
+ HTML::Parser is a collection of modules useful for handling HTML documents.
These modules used to be part of the libwww-perl distribution, but are now
unbundled in order to facilitate a separate development track.
+ .
+ Objects of the HTML::Parser class will recognize markup and separate it from
+ content data. As different kinds of markup are recognized, the corresponding
+ event handler is invoked. The document to be parsed may also be supplied in
+ arbitrary chunks, making on-the-fly parsing of network documents possible.
Modified: trunk/libhtml-parser-perl/debian/copyright
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/debian/copyright?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/debian/copyright (original)
+++ trunk/libhtml-parser-perl/debian/copyright Fri Oct 23 14:15:10 2009
@@ -16,6 +16,7 @@
© 2005-2007 Krzysztof Krzyzaniak (eloy) <eloy at debian.org>
© 2008 Damyan Ivanov <dmn at debian.org>
© 2009 Salvatore Bonaccorso <salvatore.bonaccorso at gmail.com>
+ © 2009 Jonathan Yu <jawnsy at cpan.org>
License: Artistic | GPL-1+
License: Artistic
Modified: trunk/libhtml-parser-perl/lib/HTML/Entities.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/lib/HTML/Entities.pm?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/lib/HTML/Entities.pm (original)
+++ trunk/libhtml-parser-perl/lib/HTML/Entities.pm Fri Oct 23 14:15:10 2009
@@ -77,14 +77,21 @@
=item encode_entities( $string, $unsafe_chars )
This routine replaces unsafe characters in $string with their entity
-representation. A second argument can be given to specify which
-characters to consider unsafe (i.e., which to escape). The default set
-of characters to encode are control chars, high-bit chars, and the
-C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >>
-characters. But this, for example, would encode I<just> the
-C<< < >>, C<< & >>, C<< > >>, and C<< " >> characters:
+representation. A second argument can be given to specify which characters to
+consider unsafe. The unsafe characters is specified using the regular
+expression character class syntax (what you find within brackets in regular
+expressions).
+
+The default set of characters to encode are control chars, high-bit chars, and
+the C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >> characters. But this,
+for example, would encode I<just> the C<< < >>, C<< & >>, C<< > >>, and C<< "
+>> characters:
$encoded = encode_entities($input, '<>&"');
+
+and this would only encode non-plain ascii:
+
+ $encoded = encode_entities($input, '^\n\x20-\x25\x27-\x7e');
This routine is exported by default.
@@ -139,7 +146,7 @@
@EXPORT = qw(encode_entities decode_entities _decode_entities);
@EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
-$VERSION = "3.60";
+$VERSION = "3.63";
sub Version { $VERSION; }
require HTML::Parser; # for fast XS implemented decode_entities
@@ -457,7 +464,10 @@
if (defined $_[1] and length $_[1]) {
unless (exists $subst{$_[1]}) {
# Because we can't compile regex we fake it with a cached sub
- my $code = "sub {\$_[0] =~ s/([$_[1]])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
+ my $chars = $_[1];
+ $chars =~ s,(?<!\\)([]/]),\\$1,g;
+ $chars =~ s,(?<!\\)\\\z,\\\\,;
+ my $code = "sub {\$_[0] =~ s/([$chars])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
$subst{$_[1]} = eval $code;
die( $@ . " while trying to turn range: \"$_[1]\"\n "
. "into code: $code\n "
Modified: trunk/libhtml-parser-perl/t/entities.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/t/entities.t?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/t/entities.t (original)
+++ trunk/libhtml-parser-perl/t/entities.t Fri Oct 23 14:15:10 2009
@@ -1,6 +1,6 @@
use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
-use Test::More tests => 12;
+use Test::More tests => 17;
$a = "Våre norske tegn bør æres";
@@ -24,6 +24,11 @@
$a = "abcdef";
is(encode_entities($a, 'a-c'), "abcdef");
+$a = "[24/7]\\";
+is(encode_entities($a, '/'), "[24/7]\\");
+is(encode_entities($a, '\\/'), "[24/7]\\");
+is(encode_entities($a, '\\'), "[24/7]\");
+is(encode_entities($a, ']\\'), "[24/7]\");
# See how well it does against rfc1866...
$ent = $plain = "";
@@ -66,6 +71,8 @@
is(decode_entities("'"), "'");
is(encode_entities("'", "'"), "'");
+is(decode_entities("Attention Homeοωnөrs...1ѕt Tімe Eνөг"),
+ "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
__END__
# Quoted from rfc1866.txt
Modified: trunk/libhtml-parser-perl/util.c
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/util.c?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/util.c (original)
+++ trunk/libhtml-parser-perl/util.c Fri Oct 23 14:15:10 2009
@@ -94,14 +94,14 @@
ent_start = s;
repl = 0;
- if (*s == '#') {
+ if (s < end && *s == '#') {
UV num = 0;
UV prev = 0;
int ok = 0;
s++;
- if (*s == 'x' || *s == 'X') {
+ if (s < end && (*s == 'x' || *s == 'X')) {
s++;
- while (*s) {
+ while (s < end) {
char *tmp = strchr(PL_hexdigit, *s);
if (!tmp)
break;
@@ -117,7 +117,7 @@
}
}
else {
- while (isDIGIT(*s)) {
+ while (s < end && isDIGIT(*s)) {
num = num * 10 + (*s - '0');
if (prev && num < prev) {
/* overflow */
@@ -180,7 +180,7 @@
}
else {
char *ent_name = s;
- while (isALNUM(*s))
+ while (s < end && isALNUM(*s))
s++;
if (ent_name != s && entity2char) {
SV** svp;
@@ -216,7 +216,7 @@
if (repl) {
char *repl_allocated = 0;
- if (*s == ';')
+ if (s < end && *s == ';')
s++;
t--; /* '&' already copied, undo it */
More information about the Pkg-perl-cvs-commits
mailing list