r46253 - in /trunk/libhtml-parser-perl: Changes META.yml Parser.pm debian/changelog debian/control debian/copyright lib/HTML/Entities.pm t/entities.t util.c

jawnsy-guest at users.alioth.debian.org jawnsy-guest at users.alioth.debian.org
Fri Oct 23 14:15:16 UTC 2009


Author: jawnsy-guest
Date: Fri Oct 23 14:15:10 2009
New Revision: 46253

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=46253
Log:
* New upstream release
* Standards-Version 3.8.3 (drop perl version dependency)
* Add myself to Uploaders and Copyright
* Rewrote control description and harmonized the layout

Modified:
    trunk/libhtml-parser-perl/Changes
    trunk/libhtml-parser-perl/META.yml
    trunk/libhtml-parser-perl/Parser.pm
    trunk/libhtml-parser-perl/debian/changelog
    trunk/libhtml-parser-perl/debian/control
    trunk/libhtml-parser-perl/debian/copyright
    trunk/libhtml-parser-perl/lib/HTML/Entities.pm
    trunk/libhtml-parser-perl/t/entities.t
    trunk/libhtml-parser-perl/util.c

Modified: trunk/libhtml-parser-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/Changes?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/Changes (original)
+++ trunk/libhtml-parser-perl/Changes Fri Oct 23 14:15:10 2009
@@ -1,3 +1,12 @@
+_______________________________________________________________________________
+2009-10-22  Release 3.63
+
+Gisle Aas (2):
+      Take more care to prepare the char range for encode_entities [RT#50170]
+      decode_entities confused by trailing incomplete entity
+
+
+
 _______________________________________________________________________________
 2009-08-13  Release 3.62
 

Modified: trunk/libhtml-parser-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/META.yml?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/META.yml (original)
+++ trunk/libhtml-parser-perl/META.yml Fri Oct 23 14:15:10 2009
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:               HTML-Parser
-version:            3.62
+version:            3.63
 abstract:           HTML parser class
 author:
     - Gisle Aas <gisle at activestate.com>
@@ -9,7 +9,8 @@
 configure_requires:
     ExtUtils::MakeMaker:  0
 build_requires:
-    Test::More:  0
+    ExtUtils::MakeMaker:  0
+    Test::More:           0
 requires:
     HTML::Tagset:  3
     perl:          5.006
@@ -21,7 +22,7 @@
     directory:
         - t
         - inc
-generated_by:       ExtUtils::MakeMaker version 6.4801
+generated_by:       ExtUtils::MakeMaker version 6.55_02
 meta-spec:
     url:      http://module-build.sourceforge.net/META-spec-v1.4.html
     version:  1.4

Modified: trunk/libhtml-parser-perl/Parser.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/Parser.pm?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/Parser.pm (original)
+++ trunk/libhtml-parser-perl/Parser.pm Fri Oct 23 14:15:10 2009
@@ -9,7 +9,7 @@
 use strict;
 use vars qw($VERSION @ISA);
 
-$VERSION = "3.62";
+$VERSION = "3.63";
 
 require HTML::Entities;
 

Modified: trunk/libhtml-parser-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/debian/changelog?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/debian/changelog (original)
+++ trunk/libhtml-parser-perl/debian/changelog Fri Oct 23 14:15:10 2009
@@ -1,3 +1,12 @@
+libhtml-parser-perl (3.63-1) UNRELEASED; urgency=low
+
+  * New upstream release
+  * Standards-Version 3.8.3 (drop perl version dependency)
+  * Add myself to Uploaders and Copyright
+  * Rewrote control description and harmonized the layout
+
+ -- Jonathan Yu <jawnsy at cpan.org>  Fri, 23 Oct 2009 06:38:01 -0400
+
 libhtml-parser-perl (3.62-1) unstable; urgency=low
 
   [ Salvatore Bonaccorso ]

Modified: trunk/libhtml-parser-perl/debian/control
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/debian/control?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/debian/control (original)
+++ trunk/libhtml-parser-perl/debian/control Fri Oct 23 14:15:10 2009
@@ -1,28 +1,34 @@
 Source: libhtml-parser-perl
+Section: perl
+Priority: optional
+Build-Depends: perl, debhelper (>= 7.0.8), quilt (>= 0.46-7), liburi-perl,
+ libhtml-tagset-perl, libtest-pod-perl
 Maintainer: Debian Perl Group <pkg-perl-maintainers at lists.alioth.debian.org>
 Uploaders: Krzysztof Krzyzaniak (eloy) <eloy at debian.org>,
  Kenneth J. Pronovici <pronovic at debian.org>,
  Damyan Ivanov <dmn at debian.org>,
  Antonio Radici <antonio at dyne.org>,
- Rene Mayorga <rmayorga at debian.org>,
+ Rene Mayorga <rmayorga at debian.org>, Jonathan Yu <jawnsy at cpan.org>,
  Salvatore Bonaccorso <salvatore.bonaccorso at gmail.com>
-Section: perl
-Priority: optional
-Build-Depends: debhelper (>= 7.0.8), perl (>= 5.8.1), libhtml-tagset-perl,
- libtest-pod-perl, liburi-perl, quilt (>= 0.46-7)
-Standards-Version: 3.8.2
+Standards-Version: 3.8.3
 Homepage: http://search.cpan.org/dist/HTML-Parser/
 Vcs-Svn: svn://svn.debian.org/pkg-perl/trunk/libhtml-parser-perl/
 Vcs-Browser: http://svn.debian.org/viewsvn/pkg-perl/trunk/libhtml-parser-perl/
 
 Package: libhtml-parser-perl
 Architecture: any
-Depends: ${misc:Depends}, ${perl:Depends}, libhtml-tagset-perl, ${shlibs:Depends}, liburi-perl
+Depends: ${misc:Depends}, ${perl:Depends}, ${shlibs:Depends}, liburi-perl,
+ libhtml-tagset-perl
 Suggests: libdata-dump-perl
 Enhances: libwww-perl
-Replaces: libwww-perl (<<5.36-0)
-Conflicts: libwww-perl (<<5.36-0), libhtml-tree-perl (<<0.61-0)
+Replaces: libwww-perl (<< 5.36-0)
+Conflicts: libwww-perl (<< 5.36-0), libhtml-tree-perl (<< 0.61-0)
 Description: collection of modules that parse HTML text documents
- HTML::Tagset is a collection of modules that parse HTML text documents. 
+ HTML::Parser is a collection of modules useful for handling HTML documents.
  These modules used to be part of the libwww-perl distribution, but are now
  unbundled in order to facilitate a separate development track.
+ .
+ Objects of the HTML::Parser class will recognize markup and separate it from
+ content data. As different kinds of markup are recognized, the corresponding
+ event handler is invoked. The document to be parsed may also be supplied in
+ arbitrary chunks, making on-the-fly parsing of network documents possible.

Modified: trunk/libhtml-parser-perl/debian/copyright
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/debian/copyright?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/debian/copyright (original)
+++ trunk/libhtml-parser-perl/debian/copyright Fri Oct 23 14:15:10 2009
@@ -16,6 +16,7 @@
 	   © 2005-2007 Krzysztof Krzyzaniak (eloy) <eloy at debian.org> 
 	   © 2008 Damyan Ivanov <dmn at debian.org>
 	   © 2009 Salvatore Bonaccorso <salvatore.bonaccorso at gmail.com> 
+	   © 2009 Jonathan Yu <jawnsy at cpan.org>
 License: Artistic | GPL-1+
 
 License: Artistic

Modified: trunk/libhtml-parser-perl/lib/HTML/Entities.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/lib/HTML/Entities.pm?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/lib/HTML/Entities.pm (original)
+++ trunk/libhtml-parser-perl/lib/HTML/Entities.pm Fri Oct 23 14:15:10 2009
@@ -77,14 +77,21 @@
 =item encode_entities( $string, $unsafe_chars )
 
 This routine replaces unsafe characters in $string with their entity
-representation. A second argument can be given to specify which
-characters to consider unsafe (i.e., which to escape). The default set
-of characters to encode are control chars, high-bit chars, and the
-C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >>
-characters.  But this, for example, would encode I<just> the
-C<< < >>, C<< & >>, C<< > >>, and C<< " >> characters:
+representation. A second argument can be given to specify which characters to
+consider unsafe.  The unsafe characters is specified using the regular
+expression character class syntax (what you find within brackets in regular
+expressions).
+
+The default set of characters to encode are control chars, high-bit chars, and
+the C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >> characters.  But this,
+for example, would encode I<just> the C<< < >>, C<< & >>, C<< > >>, and C<< "
+>> characters:
 
   $encoded = encode_entities($input, '<>&"');
+
+and this would only encode non-plain ascii:
+
+  $encoded = encode_entities($input, '^\n\x20-\x25\x27-\x7e');
 
 This routine is exported by default.
 
@@ -139,7 +146,7 @@
 @EXPORT = qw(encode_entities decode_entities _decode_entities);
 @EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
 
-$VERSION = "3.60";
+$VERSION = "3.63";
 sub Version { $VERSION; }
 
 require HTML::Parser;  # for fast XS implemented decode_entities
@@ -457,7 +464,10 @@
     if (defined $_[1] and length $_[1]) {
 	unless (exists $subst{$_[1]}) {
 	    # Because we can't compile regex we fake it with a cached sub
-	    my $code = "sub {\$_[0] =~ s/([$_[1]])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
+	    my $chars = $_[1];
+	    $chars =~ s,(?<!\\)([]/]),\\$1,g;
+	    $chars =~ s,(?<!\\)\\\z,\\\\,;
+	    my $code = "sub {\$_[0] =~ s/([$chars])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
 	    $subst{$_[1]} = eval $code;
 	    die( $@ . " while trying to turn range: \"$_[1]\"\n "
 	      . "into code: $code\n "

Modified: trunk/libhtml-parser-perl/t/entities.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/t/entities.t?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/t/entities.t (original)
+++ trunk/libhtml-parser-perl/t/entities.t Fri Oct 23 14:15:10 2009
@@ -1,6 +1,6 @@
 use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
 
-use Test::More tests => 12;
+use Test::More tests => 17;
 
 $a = "V&aring;re norske tegn b&oslash;r &#230res";
 
@@ -24,6 +24,11 @@
 $a = "abcdef";
 is(encode_entities($a, 'a-c'), "&#97;&#98;&#99;def");
 
+$a = "[24/7]\\";
+is(encode_entities($a, '/'), "[24&#47;7]\\");
+is(encode_entities($a, '\\/'), "[24&#47;7]\\");
+is(encode_entities($a, '\\'), "[24/7]&#92;");
+is(encode_entities($a, ']\\'), "[24/7&#93;&#92;");
 
 # See how well it does against rfc1866...
 $ent = $plain = "";
@@ -66,6 +71,8 @@
 is(decode_entities("&apos;"), "'");
 is(encode_entities("'", "'"), "&#39;");
 
+is(decode_entities("Attention Home&#959&#969n&#1257rs...1&#1109t T&#1110&#1084e E&#957&#1257&#1075"),
+  "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
 
 __END__
 # Quoted from rfc1866.txt

Modified: trunk/libhtml-parser-perl/util.c
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-parser-perl/util.c?rev=46253&op=diff
==============================================================================
--- trunk/libhtml-parser-perl/util.c (original)
+++ trunk/libhtml-parser-perl/util.c Fri Oct 23 14:15:10 2009
@@ -94,14 +94,14 @@
 	ent_start = s;
 	repl = 0;
 
-	if (*s == '#') {
+	if (s < end && *s == '#') {
 	    UV num = 0;
 	    UV prev = 0;
 	    int ok = 0;
 	    s++;
-	    if (*s == 'x' || *s == 'X') {
+	    if (s < end && (*s == 'x' || *s == 'X')) {
 		s++;
-		while (*s) {
+		while (s < end) {
 		    char *tmp = strchr(PL_hexdigit, *s);
 		    if (!tmp)
 			break;
@@ -117,7 +117,7 @@
 		}
 	    }
 	    else {
-		while (isDIGIT(*s)) {
+		while (s < end && isDIGIT(*s)) {
 		    num = num * 10 + (*s - '0');
 		    if (prev && num < prev) {
 			/* overflow */
@@ -180,7 +180,7 @@
 	}
 	else {
 	    char *ent_name = s;
-	    while (isALNUM(*s))
+	    while (s < end && isALNUM(*s))
 		s++;
 	    if (ent_name != s && entity2char) {
 		SV** svp;
@@ -216,7 +216,7 @@
 
 	if (repl) {
 	    char *repl_allocated = 0;
-	    if (*s == ';')
+	    if (s < end && *s == ';')
 		s++;
 	    t--;  /* '&' already copied, undo it */
 




More information about the Pkg-perl-cvs-commits mailing list