[Po4a-commits] po4a/lib/Locale/Po4a TeX.pm,1.5,1.6
Nicolas FRAN??OIS
po4a-devel@lists.alioth.debian.org
Sat, 08 Jan 2005 10:44:30 +0000
Update of /cvsroot/po4a/po4a/lib/Locale/Po4a
In directory haydn:/tmp/cvs-serv9174/lib/Locale/Po4a
Modified Files:
TeX.pm
Log Message:
Some cleanups and comments.
New functionnalities:
* Better handling of the spaces surrounding commands. They are now kept as close to the original as possible.
* Start using "% po4a: " line for parser personalisation.
* It is now possible to build a derivated parser.
* Handle file inclusion (based on Transtractor's read).
Index: TeX.pm
===================================================================
RCS file: /cvsroot/po4a/po4a/lib/Locale/Po4a/TeX.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- TeX.pm 8 Jan 2005 10:34:00 -0000 1.5
+++ TeX.pm 8 Jan 2005 10:44:28 -0000 1.6
@@ -75,6 +75,11 @@
use Locale::Po4a::TransTractor;
use Locale::gettext qw(dgettext);
+use File::Basename qw(dirname);
+use Carp qw(croak);
+
+use Encode;
+use Encode::Guess;
# hash of known commands and environments, with parsing sub.
# See end of this file
@@ -103,6 +108,15 @@
"index"
);
+# Directory name of the main file.
+# It is the directory where included files will be searched.
+# See read_file.
+my $my_dirname;
+
+# Array of files that should not be included by read_file.
+# See read_file.
+our @exclude_include;
+
#########################
#### DEBUGGING STUFF ####
#########################
@@ -111,7 +125,8 @@
'translate' => 0, # see translation
'extract_commands' => 0, # see commands extraction
'commands' => 0, # see command subroutines
- 'environments' => 0 # see environment subroutines
+ 'environments' => 0, # see environment subroutines
+ 'translate_buffer' => 0 # see buffer translation
);
sub pre_trans {
@@ -164,6 +179,11 @@
# They are stored in the @comments array, and then displayed as a PO
# comment with the first translated string of the paragraph.
my @comments = ();
+
+# Wrapper arround Transtractor's translate, with pre- and post-processing
+# filters.
+# Comments of a paragraph are inserted as a PO comment for the first
+# translated string of this paragraph.
sub translate {
my ($self,$str,$ref,$type) = @_;
my (%options)=@_;
@@ -175,18 +195,35 @@
return $str if ($str eq "\n");
$str=pre_trans($self,$str,$ref||$self->{ref},$type);
+
+ # add comments (if any and not already added to the PO)
if (@comments) {
$options{'comment'} .= join('\n', @comments);
@comments = ();
}
+
+# FIXME: translate may append a newline, keep the trailing spaces so we can
+# recover them.
+ my $spaces = "";
+ if ($str =~ m/(\s+)$/s) {
+ $spaces = $1;
+ }
+
# Translate this
$str = $self->SUPER::translate($str,
$ref||$self->{ref},
$type || $self->{type},
%options);
+
+# FIXME: translate may append a newline, see above
+ if ($options{'wrap'}) {
+ chomp $str;
+ $str .= $spaces;
+ }
+
$str=post_trans($self,$str,$ref||$self->{ref},$type);
- print STDERR "$str\n" if ($debug{'translate'});
+ print STDERR "'$str'\n" if ($debug{'translate'});
return $str;
}
@@ -370,10 +407,10 @@
$opt = $2.$opt;
}
} else {
- # FIXME: can an argument contain an empty line?
- # If it happens, either we should change the parse
- # subroutine (so that it doesn't break entity), or
- # we have to shiftline here.
+ # FIXME: an argument can contain an empty line.
+ # We should change the parse subroutine (so that it doesn't
+ # break entity).
+ # FIXME: see ch06:267
die sprintf "un-balanced ]";
}
}
@@ -401,42 +438,68 @@
return ($command,$variant,\@opts,\@args,$buffer);
}
-# Warning: may be reentrant.
+# Recursively translate a buffer by separating leading and trailing
+# commands (those which should be translatted separately) from the
+# buffer.
sub translate_buffer {
my ($self,$buffer,@env) = (shift,shift,@_);
-#print STDERR "translate_buffer($buffer,@env)\n";
+ print STDERR "translate_buffer($buffer,@env)="
+ if ($debug{'translate_buffer'});
my ($command,$variant) = ("","");
my $opts = ();
my $args = ();
my $translated_buffer = "";
- my $end_translated_buffer = "";
+ my $orig_buffer = $buffer;
my $t = ""; # a temporary string
# translate leading commands.
do {
+ # keep the leading space to put them back after the translation of
+ # the command.
+ my $spaces = "";
+ if ($buffer =~ /^(\s+)(.*)$/s) {
+ $spaces = $1;
+ $buffer = $2;
+ }
($command, $variant, $opts, $args, $buffer) =
get_leading_command($self,$buffer);
if (length($command)) {
# call the command subroutine.
- # These command subroutine will probably call translate_buffer
- # with the content of the arguments which need a translation.
+ # These command subroutines will probably call translate_buffer
+ # with the content of each argument that need a translation.
if (defined ($commands{$command})) {
($t,@env) = &{$commands{$command}}($self,$command,$variant,
$opts,$args,\@env);
- $translated_buffer .= $t;
+ $translated_buffer .= $spaces.$t;
+ # Handle spaces after a command.
+ $spaces = "";
+ if ($buffer =~ /^(\s+)(.*)$/s) {
+ $spaces = $1;
+ $buffer = $2;
+ }
+ $translated_buffer .= $spaces;
} else {
die sprintf("unknown command: '%s'", $command)."\n"
}
+ } else {
+ $buffer = $spaces.$buffer;
}
} while (length($command));
# array of trailing commands, which will be translated later.
my @trailing_commands = ();
do {
+ my $spaces = "";
+ if ($buffer =~ /^(.*)(\s+)$/s) {
+ $buffer = $1;
+ $spaces = $2;
+ }
($command, $variant, $opts, $args, $buffer) =
get_trailing_command($self,$buffer);
if (length($command)) {
- unshift @trailing_commands, ($command, $variant, $opts, $args);
+ unshift @trailing_commands, ($command, $variant, $opts, $args, $spaces);
+ } else {
+ $buffer .= $spaces;
}
} while (length($command));
@@ -452,35 +515,145 @@
}
}
}
-
+ # Keep spaces at the end of the buffer.
+ my $spaces = "";
+ if ($buffer =~ /^(.*)(\s+)$/s) {
+ $spaces = $2;
+ $buffer = $1;
+ }
$translated_buffer .= $self->translate($buffer,$self->{ref},
@env?$env[-1]:"Plain text",
"wrap" => $wrap);
- chomp $translated_buffer if ($wrap);
+ # Restore spaces at the end of the buffer.
+ $translated_buffer .= $spaces;
}
+ # append the translation of the trailing commands
while (@trailing_commands) {
my $command = shift @trailing_commands;
my $variant = shift @trailing_commands;
my $opts = shift @trailing_commands;
my $args = shift @trailing_commands;
+ my $spaces = shift @trailing_commands;
if (defined ($commands{$command})) {
($t,@env) = &{$commands{$command}}($self,$command,$variant,
$opts,$args,\@env);
- $translated_buffer .= $t;
+ $translated_buffer .= $t.$spaces;
} else {
die sprintf("unknown command: '%s'", $command)."\n";
}
}
+ print STDERR "($translated_buffer,@env)\n"
+ if ($debug{'translate_buffer'});
return ($translated_buffer,@env);
}
################################
#### EXTERNAL CUSTOMIZATION ####
################################
-sub parse_definition_file {}
-sub parse_definition_line {}
+
+# Overload Transtractor's read
+sub read {
+ my $self=shift;
+ my $filename=shift;
+
+ # keep the directory name of the main file.
+ $my_dirname = dirname($filename);
+
+ push @{$self->{TT}{doc_in}}, read_file($self, $filename);
+}
+
+# Recursively read a file, appending included files.
+# Except from the file inclusion part, it is a cut and paste from
+# Transtractor's read.
+sub read_file {
+ my $self=shift;
+ my $filename=shift
+ or croak(dgettext("po4a","Can't read from file without having a filename")."\n");
+ my $linenum=0;
+ my @entries=();
+
+ open (my $in, $filename)
+ or croak (sprintf(dgettext("po4a","Can't read from %s: %s"),
+ $filename,$!)."\n");
+ while (defined (my $textline = <$in>)) {
+ $linenum++;
+ my $ref="$filename:$linenum";
+ while ($textline =~ /^(.*)\\include\{([^\{]*)\}(.*)$/) {
+ my ($begin,$newfilename,$end) = ($1,$2,$3);
+ my $include = 1;
+ foreach my $f (@exclude_include) {
+ if ($f eq $newfilename) {
+ $include = 0;
+ $begin .= "\\include{$newfilename}";
+ $textline = $end;
+ }
+ }
+ if ($begin !~ /^\s*$/) {
+ push @entries, ($begin,$ref);
+ }
+ if ($include) {
+ push @entries, read_file($self,
+ "$my_dirname/$newfilename.tex");
+ $textline = $end;
+ }
+ }
+ if (length($textline)) {
+ my @entry=($textline,$ref);
+ push @entries, @entry;
+
+ # Detect if this file has non-ascii characters
+ if($self->{TT}{ascii_input}) {
+
+ my $decoder = guess_encoding($textline);
+ if (!ref($decoder) or $decoder !~ /Encode::XS=/) {
+ # We have detected a non-ascii line
+ $self->{TT}{ascii_input} = 0;
+ # Save the reference for future error message
+ $self->{TT}{non_ascii_ref} ||= $ref;
+ print "cucu'$ref'$textline'\n";
+ }
+ }
+ }
+ }
+ close $in
+ or croak (sprintf(dgettext("po4a","Can't close %s after reading: %s"),
+ $filename,$!)."\n");
+
+ return @entries;
+}
+
+# Subroutine for parsing a file with po4a directive (definitions for
+# newcommands).
+sub parse_definition_file {
+ my ($self,$filename)=@_;
+
+ open (IN,"<$my_dirname/$filename")
+ || die sprintf(dgettext("po4a","Can't open %s: %s"),$filename,$!)."\n";
+ while (<IN>) {
+ if (/^%\s+po4a:/) {
+ parse_definition_line($self, $_);
+ }
+ }
+}
+# Parse a definition line ("% po4a: ")
+sub parse_definition_line {
+ my ($self,$line)=@_;
+ $line =~ s/^%\s+po4a:\s*//;
+
+ if ($line =~ /^command\s+(\w)\s+(.*)$/) {
+ my $command = $1;
+ my $line = $2;
+ if ($line =~ /^alias\s+(\w)/) {
+ if (defined ($commands{$2})) {
+ $commands{$command} = $commands{$2}
+ } else {
+ die "Cannot use an alias to the unknown command $2\n";
+ }
+ }
+ }
+}
#############################
#### MAIN PARSE FUNCTION ####
@@ -513,7 +686,7 @@
$paragraph =~ s/(?<!\\)%$//; # FIXME: even number of \ ...
if (length($paragraph)) {
($t, @env) = translate_buffer($self,$paragraph,@env);
- $self->pushline($t."\n");
+ $self->pushline($t);
$paragraph="";
}
$self->pushline($line."\n");
@@ -696,7 +869,7 @@
}
foreach (split(/ /, $command_categories{'untranslated'})) {
if (defined($commands{$_})) {
- print "coucou $_\n";
+ # FIXME: Should we allow to redefine commands
}
$commands{$_} = \&untranslated;
}
@@ -707,7 +880,7 @@
}
foreach (split(/ /, $command_categories{'translate_joined'})) {
if (defined($commands{$_})) {
- print "coucou $_\n";
+ # FIXME: Should we allow to redefine commands
}
$commands{$_} = \&translate_joined;
}