[Po4a-devel][CVS] po4a/lib/Locale/Po4a Xml.pm,1.3,1.4
Jordi Vilalta
po4a-devel@lists.alioth.debian.org
Sun, 25 Jul 2004 09:07:12 +0000
Update of /cvsroot/po4a/po4a/lib/Locale/Po4a
In directory haydn:/tmp/cvs-serv19936
Modified Files:
Xml.pm
Log Message:
- Integrated TODO list into the documentation
- Implemented the "caseinsensitive" option
- Now get_string_until receives a hash of options, and there's a new "unquoted"
option, to skip matches between quotes. It's now used to enhance the end of
tag search (in case there's a > quoted into an attribute, for example)
- tag_in_list adapted to work with the proposed tag options syntax (w<...>)
- Changed found_string to receive a hash with info about what it has found, and
it creates the comment (doesn't receive it). This way, derived modules have
all the information to create custom comments
- Implemented the w/W options in front of the tags, to override the default
wrapping (proposed by Martin)
Index: Xml.pm
===================================================================
RCS file: /cvsroot/po4a/po4a/lib/Locale/Po4a/Xml.pm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- Xml.pm 19 Jul 2004 14:59:53 -0000 1.3
+++ Xml.pm 25 Jul 2004 09:07:10 -0000 1.4
@@ -114,9 +114,10 @@
=item wrap
Canonizes the string to translate, considering that whitespaces are not
-important, and wraps the translated document.
+important, and wraps the translated document. This option can be overriden
+by custom tag options. See the "tags" option below.
-=item caseinsensitive (TODO)
+=item caseinsensitive
It makes the tags and attributes searching to work in a case insensitive
way. If it's defined, it will treat <BooK>laNG and <BOOK>Lang as <book>lang.
@@ -139,6 +140,12 @@
form <aaa>, but you can join some (<bbb><aaa>) to say that the contents of
the tag <aaa> will only be translated when it's into a <bbb> tag.
+You can also specify some tag options putting some characters in front of
+the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap)
+to override the default behavior specified by the global "wrap" option.
+
+Example: W<chapter><title>
+
=item attributes (TODO)
Space-separated list of the tag's attributes you want to translate. You can
@@ -211,14 +218,16 @@
There you can control which strings you want to translate, and perform
transformations to them before or after the translation itself.
-It receives the extracted text, the reference on where it was, and a
-comment that tells if it's an attribute value, a tag content... It must
-return the text that will replace the original in the translated document.
-Here's a basic example of this function:
+It receives the extracted text, the reference on where it was, and a hash
+that contains extra information to control what strings to translate, how
+to translate them and to generate the comment.
+
+It must return the text that will replace the original in the translated
+document. Here's a basic example of this function:
sub found_string {
- my ($self,$text,$ref,$comment)=@_;
- $text = $self->translate($text,$ref,$comment,
+ my ($self,$text,$ref,$options)=@_;
+ $text = $self->translate($text,$ref,"type".$options->{'type'},
'wrap'=>$self->{options}{'wrap'});
return $text;
}
@@ -229,9 +238,25 @@
=cut
sub found_string {
- my ($self,$text,$ref,$comment)=@_;
- $text = $self->translate($text,$ref,$comment,
- 'wrap'=>$self->{options}{'wrap'});
+ my ($self,$text,$ref,$options)=@_;
+
+ my $comment;
+ my $wrap = $self->{options}{'wrap'};
+
+ if ($options->{'type'} eq "tag") {
+ $comment = "Contents of: ".$self->get_path;
+
+ if($options->{'tag_options'} =~ /w/) {
+ $wrap = 1;
+ }
+ if($options->{'tag_options'} =~ /W/) {
+ $wrap = 0;
+ }
+ } else {
+ die dgettext("po4a","po4a::xml: Internal error: unknown string type.")."\n";
+ }
+
+ $text = $self->translate($text,$ref,$comment,'wrap'=>$wrap);
return $text;
}
@@ -296,7 +321,7 @@
# { beginning => "?",
# end => "?",
# breaking => 1,
-# f_translate => \&tag_trans_...},
+# f_translate => \&tag_trans_procins},
{ beginning => "!DOCTYPE",
end => "]",
breaking => 1,
@@ -318,7 +343,7 @@
sub tag_extract_comment {
my ($self,$remove)=(shift,shift);
- my ($eof,@tag)=$self->get_string_until('-->',1,$remove);
+ my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove,unquoted=>1});
return ($eof,@tag);
}
@@ -342,7 +367,12 @@
sub tag_extract_doctype {
my ($self,$remove)=(shift,shift);
- my ($eof,@tag)=$self->get_string_until(']>',1,$remove);
+ my ($eof,@tag)=$self->get_string_until(']>',{include=>1,unquoted=>1});
+ if ($eof) {
+ ($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1});
+ } else {
+ ($eof,@tag)=$self->get_string_until(']>',{include=>1,remove=>$remove,unquoted=>1});
+ }
return ($eof,@tag);
}
@@ -501,7 +531,7 @@
($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end});
if ($line =~ /^<\Q$match1\E/) {
if (!defined($tag_types[$i]->{f_extract})) {
- my ($eof,@lines) = $self->get_string_until(">",1,0);
+ my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1});
my $line2 = $self->join_lines(@lines);
#print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n";
if (defined($line2) and $line2 =~ /\Q$match2\E>$/) {
@@ -544,7 +574,7 @@
if (defined($tag_types[$type]->{f_extract})) {
($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove);
} else {
- ($eof,@tag) = $self->get_string_until($match2.">",1,$remove);
+ ($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1});
}
$tag[0] =~ /^<\Q$match1\E(.*)$/s;
$tag[0] = $1;
@@ -621,9 +651,11 @@
=item tag_in_list
-This function returns a boolean value that says if the first argument (a tag
+This function returns a string value that says if the first argument (a tag
hierarchy) matches any of the tags from the second argument (a list of tags
-or tag hierarchies).
+or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the
+matched tag options (the characters in front of the tag) or 1 (if that tag
+doesn't have options).
=cut
@@ -633,12 +665,25 @@
my $i = 0;
while (!$found && $i < @list) {
- my $element = $list[$i];
- if ( $tag =~ /\Q$element\E$/ ) {
-#print $tag."==".$element."\n";
- $found = 1;
+ $list[$i] =~ /(.*?)(<.*)/;
+ my $options = $1;
+ my $element = $2;
+ if ($self->{options}{'caseinsensitive'}) {
+ if ( $tag =~ /\Q$element\E$/i ) {
+ $found = 1;
+ }
+ } else {
+ if ( $tag =~ /\Q$element\E$/ ) {
+ $found = 1;
+ }
+ }
+ if ($found) {
+ if ($options) {
+ $found = $options;
+ }
+ } else {
+ $i++;
}
- $i++;
}
return $found;
}
@@ -708,15 +753,15 @@
sub treat_content {
my $self = shift;
my $blank="";
- my ($eof,@paragraph)=$self->get_string_until('<',0,1);
+ my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1});
while (!$eof and !$self->breaking_tag) {
my @text;
# Append the found inline tag
- ($eof,@text)=$self->get_string_until('>',1,1);
+ ($eof,@text)=$self->get_string_until('>',{include=>1,remove=>1,unquoted=>1});
push @paragraph, @text;
- ($eof,@text)=$self->get_string_until('<',0,1);
+ ($eof,@text)=$self->get_string_until('<',{remove=>1});
if ($#text > 0) {
push @paragraph, @text;
}
@@ -767,14 +812,29 @@
if ( length($self->join_lines(@paragraph)) > 0 ) {
my $struc = $self->get_path;
- my $inlist = $self->tag_in_list($struc,@{$self->{tags}});
+ my $options = $self->tag_in_list($struc,@{$self->{tags}});
+ my $inlist;
+ if ($options eq 0) {
+ $inlist = 0;
+ $options = "";
+ } elsif ($options eq 1) {
+ $inlist = 1;
+ $options = "";
+ } else {
+ $inlist = 1;
+ }
#print $self->{options}{'tagsonly'}."==".$inlist."\n";
if ( $self->{options}{'tagsonly'} eq $inlist ) {
#print "YES\n";
- $self->pushline($self->found_string($self->join_lines(@paragraph),
- $paragraph[1],"Content of tag ".$struc));
+ $self->pushline($self->found_string(
+ $self->join_lines(@paragraph),
+ $paragraph[1], {
+ type=>"tag",
+ tag_options=>$options
+ }));
} else {
#print "NO\n";
+#TODO: should print that this tag isn't translated in verbose mode
$self->pushline($self->join_lines(@paragraph));
}
}
@@ -818,20 +878,35 @@
=item get_string_until
This function returns an array with the lines (and references) from the input
-stream until it finds the first argument. The second argument is a boolean
-that says if the returned array should contain the searched text or not. The
-third argument is another boolean that says if the returned stream should be
-removed from the input or not.
+stream until it finds the first argument. The second argument is an options
+hash. Value 0 means disabled (the default) and 1, enabled.
+
+The valid options are:
+
+=over 4
+
+=item include
+
+This makes the returned array to contain the searched text
+
+=item remove
+
+This removes the returned stream from the input
+
+=item unquoted
+
+This ensures that the searched text is outside any quotes
=cut
sub get_string_until {
- # search = the text we want to find (at the moment it can't have \n's)
- # include = include the searched text in the returned paragraph
- # remove = remove the returned text from input or leave it intact
- my ($self,$search,$include,$remove) = (shift,shift,shift,shift);
- if (!defined($include)) { $include = 0; }
- if (!defined($remove)) { $remove = 0; }
+ my ($self,$search) = (shift,shift);
+ my $options = shift;
+ my ($include,$remove,$unquoted) = (0,0,0);
+
+ if (defined($options->{include})) { $include = $options->{include}; }
+ if (defined($options->{remove})) { $remove = $options->{remove}; }
+ if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; }
my ($line,$ref) = $self->shiftline();
my (@text,$paragraph);
@@ -840,9 +915,16 @@
while (defined($line) and !$found) {
push @text, ($line,$ref);
$paragraph .= $line;
- if ( $paragraph =~ /.*\Q$search\E.*/s ) {
- $found = 1;
+ if ($unquoted) {
+ if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*\Q$search\E.*/s ) {
+ $found = 1;
+ }
} else {
+ if ( $paragraph =~ /.*\Q$search\E.*/s ) {
+ $found = 1;
+ }
+ }
+ if (!$found) {
($line,$ref)=$self->shiftline();
}
}
@@ -850,15 +932,21 @@
if (!defined($line)) { $eof = 1; }
if ( $found ) {
- if(!$include) {
- $text[$#text-1] =~ /(.*?)(\Q$search\E.*)/s;
+ $line = "";
+ if($unquoted) {
+ $text[$#text-1] =~ /^(((\".*?\")|(\'.*?\')|[^\"\'])*?\Q$search\E)(.*)/s;
$text[$#text-1] = $1;
- $line = $2;
+ $line = $5;
} else {
$text[$#text-1] =~ /(.*?\Q$search\E)(.*)/s;
$text[$#text-1] = $1;
$line = $2;
}
+ if(!$include) {
+ $text[$#text-1] =~ /(.*)(\Q$search\E.*)/s;
+ $text[$#text-1] = $1;
+ $line = $2.$line;
+ }
if (defined($line) and ($line ne "")) {
$self->unshiftline ($line,$text[$#text]);
}
@@ -898,6 +986,19 @@
Well... hmm... If this works for you now, you're using a very simple
document format ;)
+=head1 TODO LIST
+
+ATTRIBUTES
+
+MODIFY TAG TYPES FROM INHERITED MODULES
+(move the tag_types structure inside the $self hash?)
+
+XML HEADER (ENCODING)
+DOCTYPE (ENTITIES)
+INCLUDED FILES
+
+breaking tag inside non-breaking tag (possible?) causes ugly comments
+
=head1 SEE ALSO
L<po4a(7)>, L<Locale::Po4a::TransTranctor(3pm)>.
@@ -916,27 +1017,3 @@
=cut
1;
-
-
-##### TODO LIST #####
-#
-#OPTIONS
-#caseinsensitive
-#attributes
-#
-#MODIFY TAG TYPES FROM INHERITED MODULES
-#(move the tag_types structure inside the $self hash?)
-#
-#DOCTYPE (ENTITIES)
-#INCLUDED FILES
-#
-#XML HEADER (ENCODING)
-#
-#breaking tag inside non-breaking tag (possible?) causes ugly comments
-
-# <abbrev>
-# W<acronym>
-# W<arg>
-# <artheader>
-# with 'w' meaning wrap (by default) and 'W' meaning don't wrap.
-# there should be the module option to select the default behavior