[Po4a-commits] po4a/lib/Locale/Po4a Html.pm,1.8,1.9

Denis Barbier po4a-devel@lists.alioth.debian.org
Sun, 28 Nov 2004 22:38:59 +0000


Update of /cvsroot/po4a/po4a/lib/Locale/Po4a
In directory haydn:/tmp/cvs-serv13522/lib/Locale/Po4a

Modified Files:
	Html.pm 
Log Message:
  [Yves Rutschle]
  (HTML module)
  * Preserve leading and trailing spaces in tag content.
  * Fix the <img> tag.
  * Translate text only if it contains some letters.
  * Add t/22-html.t


Index: Html.pm
===================================================================
RCS file: /cvsroot/po4a/po4a/lib/Locale/Po4a/Html.pm,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- Html.pm	27 Aug 2004 10:31:53 -0000	1.8
+++ Html.pm	28 Nov 2004 22:38:56 -0000	1.9
@@ -80,11 +80,16 @@
     my ($self,$filename)=@_;
     my $stream = HTML::TokeParser->new($filename)
         || die "Couldn't read HTML file $filename : $!";
+
+    $stream->unbroken_text( [1] );
     
     my @type=();
     NEXT : while (my $token = $stream->get_token) {
         if($token->[0] eq 'T') {
-            my $text = trim($token->[1]);
+            my $text = $token->[1];
+            my ($pre_spaces) = ($text =~ /^(\s*)/);
+            my ($post_spaces) = ($text =~ /(\s*)$/);
+            $text = trim($text);
             if (notranslation($text) == 1) {
                 $self->pushline( get_tag( $token ) );
                 next NEXT;
@@ -97,14 +102,38 @@
 #  $encoded = HTML::Entities::encode($a);
 #  $decoded = HTML::Entities::decode($a);
 	    #print STDERR $token->[0];
-            $self->pushline( " ".$self->translate($text,
+            $self->pushline( $pre_spaces . $self->translate($text,
 		                                  "FIXME:0",
 		                                  (scalar @type ? $type[scalar @type-1]: "NOTYPE")
-	                                         )." " );
+	                                         ) . $post_spaces,
+                             'wrap' => 1
+                             );
             next NEXT;
 	} elsif ($token->[0] eq 'S') {
 	    push @type,$token->[1];
-            $self->pushline( get_tag( $token ) );
+            my $text =  get_tag( $token );
+            if ( $token->[1] eq 'img' ) {
+                my %attr = %{$token->[2]};
+                for my $a (qw/title alt/) {
+                    my $content = $attr{$a};
+                    if (defined $content) {
+                        $content = trim($content);
+                        my $translated = $self->translate( 
+                                              $content,
+                                              "FIXME:0",
+                                              "img_$a"
+                                              );
+                        $attr{$a} = $translated;
+                    }
+                }
+                my ($closing) = ( $text =~ /(\s*\/?>)/ );
+                # reconstruct the tag from scratch
+                delete $attr{'/'}; # Parser thinks closing / in XHTML is an attribute
+                $text = "<img";
+                $text .= " $_=\"$attr{$_}\"" foreach keys %attr;
+                $text .= $closing;
+            }
+            $self->pushline( $text );
         } elsif ($token->[0] eq 'E') {
 	    pop @type;
             $self->pushline( get_tag( $token ) );
@@ -136,11 +165,12 @@
 
 sub trim { 
     my $s=shift;
-    $s =~ s/\n//g;  # remove \n in text
-    $s =~ s/\r//g;  # remove \r in text
-    $s =~ s/\t//g;  # remove tabulations
-    $s =~ s/^\s+//; # remove leading spaces
-    $s =~ s/\s+$//; # remove trailing spaces
+    $s =~ s/\n/ /g;  # remove \n in text
+    $s =~ s/\r/ /g;  # remove \r in text
+    $s =~ s/\t/ /g;  # remove tabulations
+    $s =~ s/\s+/ /g; # remove multiple spaces
+    $s =~ s/^\s*//g; # remove leading spaces
+    $s =~ s/\s*$//g; # remove trailing spaces
     return $s;
 } 
 
@@ -163,6 +193,11 @@
     # don't translate entries composed of one entity
     return 1 if ($s =~ /^&[^;]*;$/);
     
+# don't translate entries with no letters
+# (happens with e.g.  <b>Hello</b>, <i>world</i> )
+#                                 ^^
+#                    ", " doesn't need translation
+    return 1 unless $s =~ /\w/;
     return 0;          
 }