[Po4a-commits] po4a/lib/Locale/Po4a Po.pm,1.32,1.33
Nicolas FRAN??OIS
po4a-devel@lists.alioth.debian.org
Sun, 09 Jan 2005 18:13:24 +0000
Update of /cvsroot/po4a/po4a/lib/Locale/Po4a
In directory haydn:/tmp/cvs-serv7973/lib/Locale/Po4a
Modified Files:
Po.pm
Log Message:
\n or \t can be preceded by an even number of backslash
Index: Po.pm
===================================================================
RCS file: /cvsroot/po4a/po4a/lib/Locale/Po4a/Po.pm,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -d -r1.32 -r1.33
--- Po.pm 7 Jan 2005 22:50:52 -0000 1.32
+++ Po.pm 9 Jan 2005 18:13:22 -0000 1.33
@@ -972,16 +972,35 @@
print STDERR "\nunescape [$text]====" if $debug{'escape'};
$text = join("",split(/\n/,$text));
$text =~ s/\\"/"/g;
- $text =~ s/([^\\])\\n/$1\n/g;
- $text =~ s/^\\n/\n/mg;
- $text =~ s/([^\\])\\t/$1\t/g;
+ # unescape newlines
+ # NOTE on \G:
+ # The following regular expression introduce newlines.
+ # Thus, ^ doesn't match all beginnings of lines.
+ # \G is a zero-width assertion that matches the position
+ # of the previous substitution with s///g. As every
+ # substitution ends by a newline, it always matches a
+ # position just after a newline.
+ $text =~ s/( # $1:
+ (\G|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ )\\n # and followed by an escaped newline
+ /$1\n/sgx; # single string, match globally, allow comments
+ # unescape tabulations
+ $text =~ s/( # $1:
+ (^|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ )\\t # and followed by an escaped tabulation
+ /$1\t/mgx; # multilines string, match globally, allow comments
+ # and unescape the escape character
$text =~ s/\\\\/\\/g;
print STDERR ">$text<\n" if $debug{'escape'};
return $text;
}
-# transforme the string to its representation as it should be written in po files
+# transform the string to its representation as it should be written in po files
sub escape_text {
my $text = shift;
@@ -1004,8 +1023,14 @@
return '""' unless defined($string) && length($string);
print STDERR "\nquote [$string]====" if $debug{'quote'};
- $string =~ s/([^\\])\\n/$1!!DUMMYPOPM!!/gm;
- $string =~ s|!!DUMMYPOPM!!|\\n\n|gm;
+ # break lines on newlines, if any
+ # see unescape_text for an explanation on \G
+ $string =~ s/( # $1:
+ (\G|[^\\]) # beginning of the line or any char
+ # different from '\'
+ (\\\\)* # followed by any even number of '\'
+ \\n) # and followed by an escaped newline
+ /$1\n/sgx; # single string, match globally, allow comments
$string = wrap($string);
my @string = split(/\n/,$string);
$string = join ("\"\n\"",@string);
@@ -1025,6 +1050,8 @@
$string =~ s/^""\\n//s;
$string =~ s/^"(.*)"$/$1/s;
$string =~ s/"\n"//gm;
+ # Note: an even number of '\' could precede \\n, but I could not build a
+ # document to test this
$string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
$string =~ s|!!DUMMYPOPM!!|\\n|gm;
print STDERR ">$string<\n" if $debug{'quote'};
@@ -1032,15 +1059,14 @@
}
# canonize the string: write it on only one line, changing consecutive whitespace to
-# only on space.
+# only one space.
# Warning, it changes the string and should only be called if the string is plain text
sub canonize {
my $text=shift;
print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
$text =~ s/^ *//s;
- $text =~ s/([^\\])\n/$1 /gm;
- $text =~ s/ \n/ /gm;
- $text =~ s/([^\\])\n/$1 /gm;
+ # if ($text eq "\n"), it messed up the first string (header)
+ $text =~ s/\n/ /gm if ($text ne "\n");
$text =~ s/([.)]) +/$1 /gm;
$text =~ s/([^.)]) */$1 /gm;
$text =~ s/ *$//s;