[Po4a-commits] po4a/lib/Locale/Po4a Html.pm,1.8,1.9
Denis Barbier
po4a-devel@lists.alioth.debian.org
Sun, 28 Nov 2004 22:38:59 +0000
- Previous message: [Po4a-commits] po4a/t 22-html.t,NONE,1.1
- Next message: [Po4a-commits] po4a/t/data-22 attribute.html,NONE,1.1 attribute_out.html,NONE,1.1 html.html,NONE,1.1 html.po,NONE,1.1 spaces.html,NONE,1.1 spaces.po,NONE,1.1 spaces_out.html,NONE,1.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/po4a/po4a/lib/Locale/Po4a
In directory haydn:/tmp/cvs-serv13522/lib/Locale/Po4a
Modified Files:
Html.pm
Log Message:
[Yves Rutschle]
(HTML module)
* Preserve leading and trailing spaces in tag content.
* Fix the <img> tag.
* Translate text only if it contains some letters.
* Add t/22-html.t
Index: Html.pm
===================================================================
RCS file: /cvsroot/po4a/po4a/lib/Locale/Po4a/Html.pm,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- Html.pm 27 Aug 2004 10:31:53 -0000 1.8
+++ Html.pm 28 Nov 2004 22:38:56 -0000 1.9
@@ -80,11 +80,16 @@
my ($self,$filename)=@_;
my $stream = HTML::TokeParser->new($filename)
|| die "Couldn't read HTML file $filename : $!";
+
+ $stream->unbroken_text( [1] );
my @type=();
NEXT : while (my $token = $stream->get_token) {
if($token->[0] eq 'T') {
- my $text = trim($token->[1]);
+ my $text = $token->[1];
+ my ($pre_spaces) = ($text =~ /^(\s*)/);
+ my ($post_spaces) = ($text =~ /(\s*)$/);
+ $text = trim($text);
if (notranslation($text) == 1) {
$self->pushline( get_tag( $token ) );
next NEXT;
@@ -97,14 +102,38 @@
# $encoded = HTML::Entities::encode($a);
# $decoded = HTML::Entities::decode($a);
#print STDERR $token->[0];
- $self->pushline( " ".$self->translate($text,
+ $self->pushline( $pre_spaces . $self->translate($text,
"FIXME:0",
(scalar @type ? $type[scalar @type-1]: "NOTYPE")
- )." " );
+ ) . $post_spaces,
+ 'wrap' => 1
+ );
next NEXT;
} elsif ($token->[0] eq 'S') {
push @type,$token->[1];
- $self->pushline( get_tag( $token ) );
+ my $text = get_tag( $token );
+ if ( $token->[1] eq 'img' ) {
+ my %attr = %{$token->[2]};
+ for my $a (qw/title alt/) {
+ my $content = $attr{$a};
+ if (defined $content) {
+ $content = trim($content);
+ my $translated = $self->translate(
+ $content,
+ "FIXME:0",
+ "img_$a"
+ );
+ $attr{$a} = $translated;
+ }
+ }
+ my ($closing) = ( $text =~ /(\s*\/?>)/ );
+ # reconstruct the tag from scratch
+ delete $attr{'/'}; # Parser thinks closing / in XHTML is an attribute
+ $text = "<img";
+ $text .= " $_=\"$attr{$_}\"" foreach keys %attr;
+ $text .= $closing;
+ }
+ $self->pushline( $text );
} elsif ($token->[0] eq 'E') {
pop @type;
$self->pushline( get_tag( $token ) );
@@ -136,11 +165,12 @@
sub trim {
my $s=shift;
- $s =~ s/\n//g; # remove \n in text
- $s =~ s/\r//g; # remove \r in text
- $s =~ s/\t//g; # remove tabulations
- $s =~ s/^\s+//; # remove leading spaces
- $s =~ s/\s+$//; # remove trailing spaces
+ $s =~ s/\n/ /g; # remove \n in text
+ $s =~ s/\r/ /g; # remove \r in text
+ $s =~ s/\t/ /g; # remove tabulations
+ $s =~ s/\s+/ /g; # remove multiple spaces
+ $s =~ s/^\s*//g; # remove leading spaces
+ $s =~ s/\s*$//g; # remove trailing spaces
return $s;
}
@@ -163,6 +193,11 @@
# don't translate entries composed of one entity
return 1 if ($s =~ /^&[^;]*;$/);
+# don't translate entries with no letters
+# (happens with e.g. <b>Hello</b>, <i>world</i> )
+# ^^
+# ", " doesn't need translation
+ return 1 unless $s =~ /\w/;
return 0;
}
- Previous message: [Po4a-commits] po4a/t 22-html.t,NONE,1.1
- Next message: [Po4a-commits] po4a/t/data-22 attribute.html,NONE,1.1 attribute_out.html,NONE,1.1 html.html,NONE,1.1 html.po,NONE,1.1 spaces.html,NONE,1.1 spaces.po,NONE,1.1 spaces_out.html,NONE,1.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]