r77515 - in /branches/upstream/libhtml-format-perl/current: Build.PL Changes META.json META.yml README lib/HTML/FormatPS.pm lib/HTML/FormatRTF.pm lib/HTML/FormatText.pm lib/HTML/Formatter.pm t/rt69426.t

ansgar at users.alioth.debian.org ansgar at users.alioth.debian.org
Sat Jul 16 10:21:02 UTC 2011


Author: ansgar
Date: Sat Jul 16 10:21:00 2011
New Revision: 77515

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=77515
Log:
[svn-upgrade] new version libhtml-format-perl (2.09)

Modified:
    branches/upstream/libhtml-format-perl/current/Build.PL
    branches/upstream/libhtml-format-perl/current/Changes
    branches/upstream/libhtml-format-perl/current/META.json
    branches/upstream/libhtml-format-perl/current/META.yml
    branches/upstream/libhtml-format-perl/current/README
    branches/upstream/libhtml-format-perl/current/lib/HTML/FormatPS.pm
    branches/upstream/libhtml-format-perl/current/lib/HTML/FormatRTF.pm
    branches/upstream/libhtml-format-perl/current/lib/HTML/FormatText.pm
    branches/upstream/libhtml-format-perl/current/lib/HTML/Formatter.pm
    branches/upstream/libhtml-format-perl/current/t/rt69426.t

Modified: branches/upstream/libhtml-format-perl/current/Build.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/Build.PL?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/Build.PL (original)
+++ branches/upstream/libhtml-format-perl/current/Build.PL Sat Jul 16 10:21:00 2011
@@ -7,7 +7,6 @@
 
 my %module_build_args = (
   'build_requires' => {
-    'Data::Dump' => '0',
     'English' => '0',
     'File::Find' => '0',
     'File::Slurp' => '0',
@@ -28,7 +27,7 @@
     'Gisle Aas <gisle at ActiveState.com>'
   ],
   'dist_name' => 'HTML-Format',
-  'dist_version' => '2.08',
+  'dist_version' => '2.09',
   'license' => 'perl',
   'module_name' => 'HTML::Format',
   'recommends' => {},

Modified: branches/upstream/libhtml-format-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/Changes?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/Changes (original)
+++ branches/upstream/libhtml-format-perl/current/Changes Sat Jul 16 10:21:00 2011
@@ -1,3 +1,8 @@
+2.09      2011-07-15 14:20:12 Europe/London
+    - Reworking of HTML quote issue in FormatPS
+    - Now remap all double quotes to " in FormatPS (which is not
+      really right, but the best I can do with latin1 output)
+
 2.08      2011-07-13 16:20:54 Europe/London
     - RT#69426 - issues with HTML quotes
     - UTF8 handling requires perl 5.8, so minimum requirement bumped

Modified: branches/upstream/libhtml-format-perl/current/META.json
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/META.json?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/META.json (original)
+++ branches/upstream/libhtml-format-perl/current/META.json Sat Jul 16 10:21:00 2011
@@ -53,7 +53,6 @@
       },
       "test" : {
          "requires" : {
-            "Data::Dump" : 0,
             "English" : 0,
             "File::Find" : 0,
             "File::Slurp" : 0,
@@ -68,19 +67,19 @@
    "provides" : {
       "HTML::FormatPS" : {
          "file" : "lib/HTML/FormatPS.pm",
-         "version" : "2.08"
+         "version" : "2.09"
       },
       "HTML::FormatRTF" : {
          "file" : "lib/HTML/FormatRTF.pm",
-         "version" : "2.08"
+         "version" : "2.09"
       },
       "HTML::FormatText" : {
          "file" : "lib/HTML/FormatText.pm",
-         "version" : "2.08"
+         "version" : "2.09"
       },
       "HTML::Formatter" : {
          "file" : "lib/HTML/Formatter.pm",
-         "version" : "2.08"
+         "version" : "2.09"
       }
    },
    "release_status" : "stable",
@@ -96,7 +95,7 @@
          "web" : "http://github.com/nigelm/html-format"
       }
    },
-   "version" : "2.08",
+   "version" : "2.09",
    "x_Dist_Zilla" : {
       "plugins" : [
          {

Modified: branches/upstream/libhtml-format-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/META.yml?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/META.yml (original)
+++ branches/upstream/libhtml-format-perl/current/META.yml Sat Jul 16 10:21:00 2011
@@ -5,7 +5,6 @@
   - 'Sean M Burke <sburke at cpan.org>'
   - 'Gisle Aas <gisle at ActiveState.com>'
 build_requires:
-  Data::Dump: 0
   English: 0
   File::Find: 0
   File::Slurp: 0
@@ -27,16 +26,16 @@
 provides:
   HTML::FormatPS:
     file: lib/HTML/FormatPS.pm
-    version: 2.08
+    version: 2.09
   HTML::FormatRTF:
     file: lib/HTML/FormatRTF.pm
-    version: 2.08
+    version: 2.09
   HTML::FormatText:
     file: lib/HTML/FormatText.pm
-    version: 2.08
+    version: 2.09
   HTML::Formatter:
     file: lib/HTML/Formatter.pm
-    version: 2.08
+    version: 2.09
 requires:
   Carp: 0
   Data::Dumper: 0
@@ -63,7 +62,7 @@
   bugtracker: http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Format
   homepage: http://search.cpan.org/dist/HTML-Format
   repository: git://github.com/nigelm/html-format.git
-version: 2.08
+version: 2.09
 x_Dist_Zilla:
   plugins:
     -

Modified: branches/upstream/libhtml-format-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/README?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/README (original)
+++ branches/upstream/libhtml-format-perl/current/README Sat Jul 16 10:21:00 2011
@@ -2,7 +2,7 @@
     HTML::Formatter - Base class for HTML formatters
 
 VERSION
-    version 2.08
+    version 2.09
 
 SYNOPSIS
       use HTML::FormatSomething;

Modified: branches/upstream/libhtml-format-perl/current/lib/HTML/FormatPS.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/lib/HTML/FormatPS.pm?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/lib/HTML/FormatPS.pm (original)
+++ branches/upstream/libhtml-format-perl/current/lib/HTML/FormatPS.pm Sat Jul 16 10:21:00 2011
@@ -13,7 +13,7 @@
 
 use base 'HTML::Formatter';
 
-our $VERSION = '2.08'; # VERSION
+our $VERSION = '2.09'; # VERSION
 our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
 
 # We now use Smart::Comments in place of the old DEBUG framework.
@@ -262,11 +262,14 @@
 # ------------------------------------------------------------------------
 sub width {
     my $self = shift;
+    my $str  = shift;
 
     my $w  = 0;
     my $wx = $self->{wx};
     my $sz = $self->{pointsize};
-    for ( unpack( "C*", $_[0] ) ) {
+
+    # need to encode to same encoding as font before getting width
+    for ( unpack( "C*", $self->encode_string($str) ) ) {
 
         # if the character is outside the table, assume its m sized
         $w += ( ( $_ > $#{$wx} ) ? $wx->[ ord('m') ] : $wx->[$_] ) * $sz    # unless  $_ eq 0xAD; # optional hyphen
@@ -509,19 +512,11 @@
     $str =~ tr/\x01//d;
     return unless length $str;
 
-    # the string from the parser is normally unicode, and may contain
-    # some punctuation characters in the 'General Punctuation' block
-    # which can be expressed in latin1, but Encode module fails on them
-    # so we will manually hack these...
-    if ( utf8::is_utf8($str) ) {
-        $str =~ tr/\x{2018}\x{2019}\x{201A}/`',/;
-    }
-
     # must escape parentheses and backslash
     $str =~ s/([\(\)\\])/\\$1/g;
 
     # encode output to latin1 when pushing it out
-    $self->{line} .= "(" . $self->{encoder}->encode($str) . ")S\n";
+    $self->{line} .= "(" . $self->encode_string($str) . ")S\n";
     $self->{showstring} = "";
 }
 
@@ -682,6 +677,22 @@
 }
 
 # ------------------------------------------------------------------------
+sub encode_string {    # converts string into latin1 charset
+    my ( $self, $str ) = @_;
+
+    # the string from the parser is normally unicode, and may contain
+    # some punctuation characters in the 'General Punctuation' block
+    # which can be expressed in latin1, but Encode module fails on them
+    # so we will manually hack these...
+    # Theres no usable latin1 for the double quote chars so map to "
+    if ( utf8::is_utf8($str) ) {
+        $str =~ tr/\x{2018}\x{2019}\x{201A}\x{201C}\x{201D}\x{201F}\x{2033}\x{2036}/`',"""""/;
+    }
+
+    return $self->{encoder}->encode($str);
+}
+
+# ------------------------------------------------------------------------
 sub out {    # Output a word
     my ( $self, $text ) = @_;
 
@@ -865,7 +876,7 @@
 
 =head1 VERSION
 
-version 2.08
+version 2.09
 
 =head1 SYNOPSIS
 
@@ -1007,18 +1018,18 @@
 
 L<HTML::Formatter>
 
+=head1 ISSUES
+
 =over
 
 =item *
 
 Output is in ISO Latin1 format. The underlying HTML parsers tend to
-now work in Unicode (perl native) code points. There is an impedence
-match between these, which may give issues with complex characters
+now work in Unicode (perl native) code points. There is an impedance
+mismatch between these, which may give issues with complex characters
 within HTML.
 
 =back
-
-=head1 ISSUES
 
 =head1 TO DO
 

Modified: branches/upstream/libhtml-format-perl/current/lib/HTML/FormatRTF.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/lib/HTML/FormatRTF.pm?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/lib/HTML/FormatRTF.pm (original)
+++ branches/upstream/libhtml-format-perl/current/lib/HTML/FormatRTF.pm Sat Jul 16 10:21:00 2011
@@ -13,7 +13,7 @@
 
 use base 'HTML::Formatter';
 
-our $VERSION = '2.08'; # VERSION
+our $VERSION = '2.09'; # VERSION
 our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
 
 # ------------------------------------------------------------------------
@@ -509,7 +509,7 @@
 
 =head1 VERSION
 
-version 2.08
+version 2.09
 
 =head1 SYNOPSIS
 

Modified: branches/upstream/libhtml-format-perl/current/lib/HTML/FormatText.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/lib/HTML/FormatText.pm?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/lib/HTML/FormatText.pm (original)
+++ branches/upstream/libhtml-format-perl/current/lib/HTML/FormatText.pm Sat Jul 16 10:21:00 2011
@@ -13,7 +13,7 @@
 
 use base 'HTML::Formatter';
 
-our $VERSION = '2.08'; # VERSION
+our $VERSION = '2.09'; # VERSION
 our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
 
 # ------------------------------------------------------------------------
@@ -221,7 +221,7 @@
 
 =head1 VERSION
 
-version 2.08
+version 2.09
 
 =head1 SYNOPSIS
 

Modified: branches/upstream/libhtml-format-perl/current/lib/HTML/Formatter.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/lib/HTML/Formatter.pm?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/lib/HTML/Formatter.pm (original)
+++ branches/upstream/libhtml-format-perl/current/lib/HTML/Formatter.pm Sat Jul 16 10:21:00 2011
@@ -14,7 +14,7 @@
 # this should be commented out in release versions....
 ##use Smart::Comments;
 
-our $VERSION = '2.08'; # VERSION
+our $VERSION = '2.09'; # VERSION
 our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
 
 #
@@ -798,7 +798,7 @@
 
 =head1 VERSION
 
-version 2.08
+version 2.09
 
 =head1 SYNOPSIS
 

Modified: branches/upstream/libhtml-format-perl/current/t/rt69426.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-format-perl/current/t/rt69426.t?rev=77515&op=diff
==============================================================================
--- branches/upstream/libhtml-format-perl/current/t/rt69426.t (original)
+++ branches/upstream/libhtml-format-perl/current/t/rt69426.t Sat Jul 16 10:21:00 2011
@@ -1,8 +1,8 @@
 use strict;
 use warnings;
 use Test::More 0.96;
-use Data::Dump qw[dump];
-use File::Slurp;
+##use Data::Dump qw[dump];    # uncomment if needed for debugging
+##use File::Slurp;            # uncomment if needed for debugging
 
 # Bug was that a right single quote character - ’
 # caused a garbage character to go into the output.  This was due to
@@ -16,42 +16,56 @@
 
 BEGIN { use_ok("HTML::FormatPS"); use_ok("HTML::TreeBuilder"); }
 
-my $obj   = new_ok("HTML::FormatPS");
-my $htree = new_ok("HTML::TreeBuilder");
+my $table = {
+    '’' => 'apostrophe/right single quote',
+    '‘' => 'left single quote',
+    '”' => 'right double quote',
+    '“' => 'left double quote',
+    '£' => 'pound symbol',
+};
 
-my $html = '<html><body>it’s an apostrophe.</body></html>';
-ok( $html, 'HTML string containing an apostrophe' );
+foreach my $quoted ( sort { $a cmp $b } keys %{$table} ) {
+    my $desc = $table->{$quoted};
+    subtest "Checking $quoted -> $desc", sub {
+        my $obj   = new_ok("HTML::FormatPS");
+        my $htree = new_ok("HTML::TreeBuilder");
+        my $html  = '<html><body>The ' . $desc . ' is a ' . $quoted . ' character</body></html>';
+        ok( $html, "HTML string containing an $desc" );
 
-ok( $htree->parse_content($html), 'Parse HTML content' );
+        ok( $htree->parse_content($html), 'Parse HTML content' );
 
-my $result = $obj->format_string($html);
-ok( $result, 'Converted HTML object' );
+        my $result = $obj->format_string($html);
+        ok( $result, 'Converted HTML object' );
 
-# count high bit characters
-my $count;
-{
-    use bytes;
-    $count = $result =~ tr/\177-\377//;
+        # count high bit characters
+        my $count;
+        {
+            use bytes;
+            if ( $quoted eq '£' ) {
+
+                # we must exclude latin1 pound - char \243
+                $count = $result =~ tr/\177-\242\244-\377//;
+            }
+            else {
+                $count = $result =~ tr/\177-\377//;
+            }
+        }
+
+        ok( ( $count == 0 ), 'No unexpected high-bit characters found' );
+
+        ## # stuff postscript out into file - uncomment if you need for debugging
+        ## my $fn = $quoted;
+        ## $fn =~ tr/a-z//cd;
+        ## $fn .= '.ps';
+        ## write_file( $fn, { binmode => ':raw' }, $result );
+
+        ## # tell details about errors - uncomment if needed
+        ## diag( dump( { orig => $html, dump => $htree->dump, result => $result } ) ) if ($count);
+
+        done_testing();
+    };
+
 }
-
-ok( ( $count == 0 ), 'No unexpected high-bit characters found' );
-
-# same test, this time using a pound symbol (which exists in latin1)
-$html = '<html><body>A £ symbol</body></html>';
-ok( $html, 'HTML string containing a British pound symbol' );
-
-ok( $htree->parse_content($html), 'Parse HTML content' );
-
-$result = $obj->format_string($html);
-ok( $result, 'Converted HTML object' );
-
-# count high bit characters - excluding latin1 pound \243
-{
-    use bytes;
-    $count = $result =~ tr/\177-\242\244-\377//;
-}
-
-ok( ( $count == 0 ), 'No unexpected high-bit characters found' );
 
 # finish up
 done_testing();




More information about the Pkg-perl-cvs-commits mailing list