r46248 - in /branches/upstream/libhtml-parser-perl/current: Changes META.yml Parser.pm lib/HTML/Entities.pm t/entities.t util.c
jawnsy-guest at users.alioth.debian.org
jawnsy-guest at users.alioth.debian.org
Fri Oct 23 14:06:48 UTC 2009
Author: jawnsy-guest
Date: Fri Oct 23 14:03:52 2009
New Revision: 46248
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=46248
Log:
[svn-upgrade] Integrating new upstream version, libhtml-parser-perl (3.63)
Modified:
branches/upstream/libhtml-parser-perl/current/Changes
branches/upstream/libhtml-parser-perl/current/META.yml
branches/upstream/libhtml-parser-perl/current/Parser.pm
branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm
branches/upstream/libhtml-parser-perl/current/t/entities.t
branches/upstream/libhtml-parser-perl/current/util.c
Modified: branches/upstream/libhtml-parser-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Changes?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Changes (original)
+++ branches/upstream/libhtml-parser-perl/current/Changes Fri Oct 23 14:03:52 2009
@@ -1,3 +1,12 @@
+_______________________________________________________________________________
+2009-10-22 Release 3.63
+
+Gisle Aas (2):
+ Take more care to prepare the char range for encode_entities [RT#50170]
+ decode_entities confused by trailing incomplete entity
+
+
+
_______________________________________________________________________________
2009-08-13 Release 3.62
Modified: branches/upstream/libhtml-parser-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/META.yml?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/META.yml (original)
+++ branches/upstream/libhtml-parser-perl/current/META.yml Fri Oct 23 14:03:52 2009
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: HTML-Parser
-version: 3.62
+version: 3.63
abstract: HTML parser class
author:
- Gisle Aas <gisle at activestate.com>
@@ -9,7 +9,8 @@
configure_requires:
ExtUtils::MakeMaker: 0
build_requires:
- Test::More: 0
+ ExtUtils::MakeMaker: 0
+ Test::More: 0
requires:
HTML::Tagset: 3
perl: 5.006
@@ -21,7 +22,7 @@
directory:
- t
- inc
-generated_by: ExtUtils::MakeMaker version 6.4801
+generated_by: ExtUtils::MakeMaker version 6.55_02
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: 1.4
Modified: branches/upstream/libhtml-parser-perl/current/Parser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Parser.pm?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Parser.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/Parser.pm Fri Oct 23 14:03:52 2009
@@ -9,7 +9,7 @@
use strict;
use vars qw($VERSION @ISA);
-$VERSION = "3.62";
+$VERSION = "3.63";
require HTML::Entities;
Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm Fri Oct 23 14:03:52 2009
@@ -77,14 +77,21 @@
=item encode_entities( $string, $unsafe_chars )
This routine replaces unsafe characters in $string with their entity
-representation. A second argument can be given to specify which
-characters to consider unsafe (i.e., which to escape). The default set
-of characters to encode are control chars, high-bit chars, and the
-C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >>
-characters. But this, for example, would encode I<just> the
-C<< < >>, C<< & >>, C<< > >>, and C<< " >> characters:
+representation. A second argument can be given to specify which characters to
+consider unsafe. The unsafe characters is specified using the regular
+expression character class syntax (what you find within brackets in regular
+expressions).
+
+The default set of characters to encode are control chars, high-bit chars, and
+the C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >> characters. But this,
+for example, would encode I<just> the C<< < >>, C<< & >>, C<< > >>, and C<< "
+>> characters:
$encoded = encode_entities($input, '<>&"');
+
+and this would only encode non-plain ascii:
+
+ $encoded = encode_entities($input, '^\n\x20-\x25\x27-\x7e');
This routine is exported by default.
@@ -139,7 +146,7 @@
@EXPORT = qw(encode_entities decode_entities _decode_entities);
@EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
-$VERSION = "3.60";
+$VERSION = "3.63";
sub Version { $VERSION; }
require HTML::Parser; # for fast XS implemented decode_entities
@@ -457,7 +464,10 @@
if (defined $_[1] and length $_[1]) {
unless (exists $subst{$_[1]}) {
# Because we can't compile regex we fake it with a cached sub
- my $code = "sub {\$_[0] =~ s/([$_[1]])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
+ my $chars = $_[1];
+ $chars =~ s,(?<!\\)([]/]),\\$1,g;
+ $chars =~ s,(?<!\\)\\\z,\\\\,;
+ my $code = "sub {\$_[0] =~ s/([$chars])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
$subst{$_[1]} = eval $code;
die( $@ . " while trying to turn range: \"$_[1]\"\n "
. "into code: $code\n "
Modified: branches/upstream/libhtml-parser-perl/current/t/entities.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/t/entities.t?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/t/entities.t (original)
+++ branches/upstream/libhtml-parser-perl/current/t/entities.t Fri Oct 23 14:03:52 2009
@@ -1,6 +1,6 @@
use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
-use Test::More tests => 12;
+use Test::More tests => 17;
$a = "Våre norske tegn bør æres";
@@ -24,6 +24,11 @@
$a = "abcdef";
is(encode_entities($a, 'a-c'), "abcdef");
+$a = "[24/7]\\";
+is(encode_entities($a, '/'), "[24/7]\\");
+is(encode_entities($a, '\\/'), "[24/7]\\");
+is(encode_entities($a, '\\'), "[24/7]\");
+is(encode_entities($a, ']\\'), "[24/7]\");
# See how well it does against rfc1866...
$ent = $plain = "";
@@ -66,6 +71,8 @@
is(decode_entities("'"), "'");
is(encode_entities("'", "'"), "'");
+is(decode_entities("Attention Homeοωnөrs...1ѕt Tімe Eνөг"),
+ "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
__END__
# Quoted from rfc1866.txt
Modified: branches/upstream/libhtml-parser-perl/current/util.c
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/util.c?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/util.c (original)
+++ branches/upstream/libhtml-parser-perl/current/util.c Fri Oct 23 14:03:52 2009
@@ -94,14 +94,14 @@
ent_start = s;
repl = 0;
- if (*s == '#') {
+ if (s < end && *s == '#') {
UV num = 0;
UV prev = 0;
int ok = 0;
s++;
- if (*s == 'x' || *s == 'X') {
+ if (s < end && (*s == 'x' || *s == 'X')) {
s++;
- while (*s) {
+ while (s < end) {
char *tmp = strchr(PL_hexdigit, *s);
if (!tmp)
break;
@@ -117,7 +117,7 @@
}
}
else {
- while (isDIGIT(*s)) {
+ while (s < end && isDIGIT(*s)) {
num = num * 10 + (*s - '0');
if (prev && num < prev) {
/* overflow */
@@ -180,7 +180,7 @@
}
else {
char *ent_name = s;
- while (isALNUM(*s))
+ while (s < end && isALNUM(*s))
s++;
if (ent_name != s && entity2char) {
SV** svp;
@@ -216,7 +216,7 @@
if (repl) {
char *repl_allocated = 0;
- if (*s == ';')
+ if (s < end && *s == ';')
s++;
t--; /* '&' already copied, undo it */
More information about the Pkg-perl-cvs-commits
mailing list