r46248 - in /branches/upstream/libhtml-parser-perl/current: Changes META.yml Parser.pm lib/HTML/Entities.pm t/entities.t util.c

jawnsy-guest at users.alioth.debian.org jawnsy-guest at users.alioth.debian.org
Fri Oct 23 14:06:48 UTC 2009


Author: jawnsy-guest
Date: Fri Oct 23 14:03:52 2009
New Revision: 46248

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=46248
Log:
[svn-upgrade] Integrating new upstream version, libhtml-parser-perl (3.63)

Modified:
    branches/upstream/libhtml-parser-perl/current/Changes
    branches/upstream/libhtml-parser-perl/current/META.yml
    branches/upstream/libhtml-parser-perl/current/Parser.pm
    branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm
    branches/upstream/libhtml-parser-perl/current/t/entities.t
    branches/upstream/libhtml-parser-perl/current/util.c

Modified: branches/upstream/libhtml-parser-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Changes?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Changes (original)
+++ branches/upstream/libhtml-parser-perl/current/Changes Fri Oct 23 14:03:52 2009
@@ -1,3 +1,12 @@
+_______________________________________________________________________________
+2009-10-22  Release 3.63
+
+Gisle Aas (2):
+      Take more care to prepare the char range for encode_entities [RT#50170]
+      decode_entities confused by trailing incomplete entity
+
+
+
 _______________________________________________________________________________
 2009-08-13  Release 3.62
 

Modified: branches/upstream/libhtml-parser-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/META.yml?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/META.yml (original)
+++ branches/upstream/libhtml-parser-perl/current/META.yml Fri Oct 23 14:03:52 2009
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:               HTML-Parser
-version:            3.62
+version:            3.63
 abstract:           HTML parser class
 author:
     - Gisle Aas <gisle at activestate.com>
@@ -9,7 +9,8 @@
 configure_requires:
     ExtUtils::MakeMaker:  0
 build_requires:
-    Test::More:  0
+    ExtUtils::MakeMaker:  0
+    Test::More:           0
 requires:
     HTML::Tagset:  3
     perl:          5.006
@@ -21,7 +22,7 @@
     directory:
         - t
         - inc
-generated_by:       ExtUtils::MakeMaker version 6.4801
+generated_by:       ExtUtils::MakeMaker version 6.55_02
 meta-spec:
     url:      http://module-build.sourceforge.net/META-spec-v1.4.html
     version:  1.4

Modified: branches/upstream/libhtml-parser-perl/current/Parser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/Parser.pm?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/Parser.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/Parser.pm Fri Oct 23 14:03:52 2009
@@ -9,7 +9,7 @@
 use strict;
 use vars qw($VERSION @ISA);
 
-$VERSION = "3.62";
+$VERSION = "3.63";
 
 require HTML::Entities;
 

Modified: branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm (original)
+++ branches/upstream/libhtml-parser-perl/current/lib/HTML/Entities.pm Fri Oct 23 14:03:52 2009
@@ -77,14 +77,21 @@
 =item encode_entities( $string, $unsafe_chars )
 
 This routine replaces unsafe characters in $string with their entity
-representation. A second argument can be given to specify which
-characters to consider unsafe (i.e., which to escape). The default set
-of characters to encode are control chars, high-bit chars, and the
-C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >>
-characters.  But this, for example, would encode I<just> the
-C<< < >>, C<< & >>, C<< > >>, and C<< " >> characters:
+representation. A second argument can be given to specify which characters to
+consider unsafe.  The unsafe characters is specified using the regular
+expression character class syntax (what you find within brackets in regular
+expressions).
+
+The default set of characters to encode are control chars, high-bit chars, and
+the C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >> characters.  But this,
+for example, would encode I<just> the C<< < >>, C<< & >>, C<< > >>, and C<< "
+>> characters:
 
   $encoded = encode_entities($input, '<>&"');
+
+and this would only encode non-plain ascii:
+
+  $encoded = encode_entities($input, '^\n\x20-\x25\x27-\x7e');
 
 This routine is exported by default.
 
@@ -139,7 +146,7 @@
 @EXPORT = qw(encode_entities decode_entities _decode_entities);
 @EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
 
-$VERSION = "3.60";
+$VERSION = "3.63";
 sub Version { $VERSION; }
 
 require HTML::Parser;  # for fast XS implemented decode_entities
@@ -457,7 +464,10 @@
     if (defined $_[1] and length $_[1]) {
 	unless (exists $subst{$_[1]}) {
 	    # Because we can't compile regex we fake it with a cached sub
-	    my $code = "sub {\$_[0] =~ s/([$_[1]])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
+	    my $chars = $_[1];
+	    $chars =~ s,(?<!\\)([]/]),\\$1,g;
+	    $chars =~ s,(?<!\\)\\\z,\\\\,;
+	    my $code = "sub {\$_[0] =~ s/([$chars])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
 	    $subst{$_[1]} = eval $code;
 	    die( $@ . " while trying to turn range: \"$_[1]\"\n "
 	      . "into code: $code\n "

Modified: branches/upstream/libhtml-parser-perl/current/t/entities.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/t/entities.t?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/t/entities.t (original)
+++ branches/upstream/libhtml-parser-perl/current/t/entities.t Fri Oct 23 14:03:52 2009
@@ -1,6 +1,6 @@
 use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
 
-use Test::More tests => 12;
+use Test::More tests => 17;
 
 $a = "V&aring;re norske tegn b&oslash;r &#230res";
 
@@ -24,6 +24,11 @@
 $a = "abcdef";
 is(encode_entities($a, 'a-c'), "&#97;&#98;&#99;def");
 
+$a = "[24/7]\\";
+is(encode_entities($a, '/'), "[24&#47;7]\\");
+is(encode_entities($a, '\\/'), "[24&#47;7]\\");
+is(encode_entities($a, '\\'), "[24/7]&#92;");
+is(encode_entities($a, ']\\'), "[24/7&#93;&#92;");
 
 # See how well it does against rfc1866...
 $ent = $plain = "";
@@ -66,6 +71,8 @@
 is(decode_entities("&apos;"), "'");
 is(encode_entities("'", "'"), "&#39;");
 
+is(decode_entities("Attention Home&#959&#969n&#1257rs...1&#1109t T&#1110&#1084e E&#957&#1257&#1075"),
+  "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
 
 __END__
 # Quoted from rfc1866.txt

Modified: branches/upstream/libhtml-parser-perl/current/util.c
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-parser-perl/current/util.c?rev=46248&op=diff
==============================================================================
--- branches/upstream/libhtml-parser-perl/current/util.c (original)
+++ branches/upstream/libhtml-parser-perl/current/util.c Fri Oct 23 14:03:52 2009
@@ -94,14 +94,14 @@
 	ent_start = s;
 	repl = 0;
 
-	if (*s == '#') {
+	if (s < end && *s == '#') {
 	    UV num = 0;
 	    UV prev = 0;
 	    int ok = 0;
 	    s++;
-	    if (*s == 'x' || *s == 'X') {
+	    if (s < end && (*s == 'x' || *s == 'X')) {
 		s++;
-		while (*s) {
+		while (s < end) {
 		    char *tmp = strchr(PL_hexdigit, *s);
 		    if (!tmp)
 			break;
@@ -117,7 +117,7 @@
 		}
 	    }
 	    else {
-		while (isDIGIT(*s)) {
+		while (s < end && isDIGIT(*s)) {
 		    num = num * 10 + (*s - '0');
 		    if (prev && num < prev) {
 			/* overflow */
@@ -180,7 +180,7 @@
 	}
 	else {
 	    char *ent_name = s;
-	    while (isALNUM(*s))
+	    while (s < end && isALNUM(*s))
 		s++;
 	    if (ent_name != s && entity2char) {
 		SV** svp;
@@ -216,7 +216,7 @@
 
 	if (repl) {
 	    char *repl_allocated = 0;
-	    if (*s == ';')
+	    if (s < end && *s == ';')
 		s++;
 	    t--;  /* '&' already copied, undo it */
 




More information about the Pkg-perl-cvs-commits mailing list