[Po4a-devel][CVS] po4a/lib/Locale/Po4a Sgml.pm,1.31,1.32

Martin Quinson po4a-devel@lists.alioth.debian.org
Sat, 07 Aug 2004 21:09:04 +0000


Update of /cvsroot/po4a/po4a/lib/Locale/Po4a
In directory haydn:/tmp/cvs-serv30262/lib/Locale/Po4a

Modified Files:
	Sgml.pm 
Log Message:
Allow %entities; in prolog

Index: Sgml.pm
===================================================================
RCS file: /cvsroot/po4a/po4a/lib/Locale/Po4a/Sgml.pm,v
retrieving revision 1.31
retrieving revision 1.32
diff -u -d -r1.31 -r1.32
--- Sgml.pm	7 Aug 2004 04:51:33 -0000	1.31
+++ Sgml.pm	7 Aug 2004 21:09:02 -0000	1.32
@@ -67,9 +67,20 @@
 
 =item verbatim
 
+The layout within those tags should not be changed. The paragraph won't get
+wrapped, and no extra indentation space or new line will be added for
+cosmetic purpose.
+
 =item empty
 
+Tags not needing to be closed.
 
+=item ignore
+
+Tags ignored and considered as plain char data by po4a. That is to say that
+they can be part of a msgid. For example, E<gt>bE<lt> is a good candidate
+for this category since putting it in the translate section would create
+msgid not being whole sentences, which is bad.
 
 =item force
 
@@ -164,11 +175,11 @@
 @EXPORT = qw();
 
 use Locale::Po4a::TransTractor;
-use Locale::gettext qw(gettext);
+use Locale::gettext qw(dgettext);
 
 eval qq{use SGMLS};
 if ($@) {
-  die "po4a::sgml: ".gettext("The needed module SGMLS.pm was not found and needs to be installed. It can be found on the CPAN, in package libsgmls-perl on debian, etc.")."\n";
+  die "po4a::sgml: ".dgettext("po4a","The needed module SGMLS.pm was not found and needs to be installed. It can be found on the CPAN, in package libsgmls-perl on debian, etc.")."\n";
 }
 
 use File::Temp;
@@ -200,7 +211,7 @@
     
     foreach my $opt (keys %options) {
 	if ($options{$opt}) {
-	    die sprintf("po4a::sgml: ".gettext ("Unknown option: %s"), $opt)."\n" unless exists $self->{options}{$opt};
+	    die sprintf("po4a::sgml: ".dgettext ("po4a","Unknown option: %s"), $opt)."\n" unless exists $self->{options}{$opt};
 	    $self->{options}{$opt} = $options{$opt};
 	}
     }
@@ -234,14 +245,14 @@
     # don't translate entries composed of one entity
     if ( (($string =~ /^&[^;]*;$/) || ($options{'wrap'} && $string =~ /^\s*&[^;]*;\s*$/))
 	 && !($self->{options}{'include-all'}) ){
-	warn sprintf("po4a::sgml: ".gettext("msgid skipped to help translators (contains only an entity)"), $string)."\n"
+	warn sprintf("po4a::sgml: ".dgettext("po4a","msgid skipped to help translators (contains only an entity)"), $string)."\n"
 	       unless $self->verbose() <= 0;
 	return $string;
     }
     # don't translate entries composed of tags only
     if ( $string =~ /^(((<[^>]*>)|\s)*)$/
 	 && !($self->{options}{'include-all'}) ) {
-	warn sprintf("po4a::sgml: ".gettext("msgid skipped to help translators (contains only tags)"), $string)."\n"
+	warn sprintf("po4a::sgml: ".dgettext("po4a","msgid skipped to help translators (contains only tags)"), $string)."\n"
 	       unless $self->verbose() <= 0;
 	return $string;
     }
@@ -285,15 +296,15 @@
     #   - protect optional inclusion marker (ie, "<![ %str [" and "]]>")
     #   - protect entities from expansion (ie "&release;")
     open (IN,"<$filename") 
-	|| die sprintf(gettext("Can't open %s: %s"),$filename,$!)."\n";
+	|| die sprintf(dgettext("po4a","Can't open %s: %s"),$filename,$!)."\n";
     my $origfile="";
     while (<IN>) {
 	$origfile .= $_;
     }
-    close IN || die sprintf("po4a::sgml: ".gettext("can't close %s: %s"),$filename,$!)."\n";
+    close IN || die sprintf("po4a::sgml: ".dgettext("po4a","can't close %s: %s"),$filename,$!)."\n";
     # Detect the XML pre-prolog
     if ($origfile =~ s/^(\s*<\?xml[^?]*\?>)//) {
-	warn sprintf(gettext(
+	warn sprintf(dgettext("po4a",
 		"po4a::sgml: Trying to handle a XML document as a SGML one.\n".
 		"po4a::sgml: Feel lucky if it works, help us implementing a proper XML backend if it does not."),$filename)."\n"
 	  unless $self->verbose() <= 0;
@@ -306,7 +317,7 @@
 	my $pos = 0;  # where in the document (in chars) while detecting prolog boundaries
 	
 	unless ($prolog =~ s/^(.*<!DOCTYPE).*$/$1/is) {
-	    die sprintf(gettext(
+	    die sprintf(dgettext("po4a",
 	    	"po4a::sgml: This file is not a master SGML document (no DOCTYPE).\n".
 		"po4a::sgml: It may be a file to be included by another one, in which case\n".
 		"po4a::sgml: it should not be passed to po4a directly. Text from included\n".
@@ -406,10 +417,10 @@
 
     } else {
 	if ($self->{options}{'force'}) {
-	    warn "po4a::sgml: ".gettext("DTD of this file is unknown, but proceeding as requested.")."\n";
+	    warn "po4a::sgml: ".dgettext("po4a","DTD of this file is unknown, but proceeding as requested.")."\n";
 	    $self->set_tags_kind();
 	} else {
-	    die sprintf("po4a::sgml: ".gettext(
+	    die sprintf("po4a::sgml: ".dgettext("po4a",
 	    	"DTD of this file is unknown. (supported: debiandoc, docbook).\n".
 		"The prolog follows:\n%s"),
 	  	                $filename,$prolog)."\n";
@@ -426,29 +437,89 @@
     # protect the conditional inclusions in the file
     $origfile =~ s/<!\[(\s*[^\[]+)\[/{PO4A-beg-$1}/g; # cond. incl. starts
     $origfile =~ s/\]\]>/{PO4A-end}/g;                # cond. incl. end
+    
+    # Deal with the %entities; in the prolog. God damn it, this code is gross!
+    # Try hard not to change the number of lines to not fuck up the references
+    my %prologentincl;
+    my $moretodo=1;
+    while ($moretodo) { # non trivial loop to deal with recursiv inclusion
+	$moretodo = 0;
+	# Unprotect not yet defined inclusions
+	$prolog =~ s/{PO4A-percent}/%/sg;
+	while ($prolog =~ /(.*?)<!ENTITY\s*%\s*(\S*)\s*SYSTEM\s*"([^>"]*)">(.*)$/is) {  #})"{ (Stupid editor)
+	    print STDERR "Seen the definition entity of prolog inclusion $2 (=$3)\n"
+	      if ($debug{'entities'});
+	    # Preload the content of the entity.
+	    my $key = $2;
+	    my $filename=$3;
+	    $prolog = $1.$4;
+	    (-e $filename && open IN,"<$filename")  ||
+	      die sprintf("po4a::sgml: ".dgettext("po4a","Can't open %s (content of entity %s%s;): %s"),
+		  $filename,'%',$key,$!)."\n";
+	    local $/ = undef;
+	    $prologentincl{$key} = <IN>;
+	    close IN;
+	    my @lines = split(/\n/,$prologentincl{$key});
+	    print STDERR "Content of \%$key; is $filename (".(scalar @lines)." lines long)\n"
+	      if ($debug{'entities'});
+	    # leave those damn references in peace by making sure it fits on one line
+	    $prologentincl{$key} = join (" ", @lines);
+	    print STDERR "content: ".$prologentincl{$key}."\n"
+	      if ($debug{'entities'});
+	    $moretodo = 1;
+	}
+        print STDERR "prolog=>>>>$prolog<<<<\n"
+	      if ($debug{'entities'});
+        while ($prolog =~ /^(.*?)%([^;\s]*);(.*)$/s) {
+	    my ($pre,$ent,$post) = ($1,$2,$3);
+	    # Yeah, right, the content of the entity can be defined in a not yet loaded entity
+	    # It's easy to build a weird case where all that shit colapse poorly. But why the
+	    # hell are you using those strange constructs in your document, damn it?
+	    print STDERR "Seen prolog inclusion $ent\n" if ($debug{'entities'});
+	    if (defined ($prologentincl{$ent})) {
+		$prolog = $pre.$prologentincl{$ent}.$post;
+		print STDERR "Change \%$ent; to its content in the prolog\n"
+		  if $debug{'entities'};
+		$moretodo = 1;
+	    } else {
+		# AAAARGH stupid document using %bla; and having then defined in another inclusion!
+		# Protect it for this pass, and unprotect it on next one
+		print STDERR "entitity $ent not defined yet ?!\n"
+		  if $debug{'entities'};
+		$prolog = "$pre".'{PO4A-percent}'."$ent;$post";
+	    }
+	}
+    }
+    # Unprotect undefined inclusions
+    $prolog =~ s/{PO4A-percent}/%/sg;
+    if ($prolog =~ /%([^;\s]*);/) {
+       die sprintf("po4a::sgml: ".dgettext("po4a","unrecognized prolog inclusion entitity: %%%s;")."\n",$1);
+    }
     # Protect &entities; (but the ones asking for a file inclusion)
     #   search the file inclusion entities
     my %entincl;
     my $searchprolog=$prolog;
-    while ($searchprolog =~ /<!ENTITY\s(\S*)\s*SYSTEM\s*"([^>"]*)">(.*)$/is) {#})"{
-	print STDERR "Seen the entity of inclusion $1 (=$2)\n"
-	    if ($debug{'entities'});
-	$entincl{$1}{'filename'}=$2;
-	$searchprolog = $3;
-    }
-    #   Change the entities to their content
-    foreach my $key (keys %entincl) {
-	open IN,"<".$entincl{$key}{'filename'}  ||
-	    die sprintf(gettext("Can't open %s: %s"),$entincl{$key},$!)."\n";
+    while ($searchprolog =~ /(.*?)<!ENTITY\s(\S*)\s*SYSTEM\s*"([^>"]*)">(.*)$/is) {#})"{
+	print STDERR "Seen the entity of inclusion $2 (=$3)\n"
+	  if ($debug{'entities'});
+	my $key = $2;
+	my $filename = $3;
+	$searchprolog = $1.$4;
+	$entincl{$key}{'filename'}=$filename;
+	# Preload the content of the entity
+	(-e $filename && open IN,"<$filename")  ||
+	  die sprintf("po4a::sgml: ".dgettext("po4a","Can't open %s (content of entity %s%s;): %s"),
+	      $filename,'&',$key,$!)."\n";
 	local $/ = undef;
 	$entincl{$key}{'content'} = <IN>;
 	close IN;
 	@lines= split(/\n/,$entincl{$key}{'content'});
 	$entincl{$key}{'length'} = scalar @lines;
-	print STDERR "read $entincl{$key}{'filename'} ($entincl{$key}{'length'} lines long)\n" 
-	    if ($debug{'entities'});
+	print STDERR "read $filename (content of \&$key;, $entincl{$key}{'length'} lines long)\n" 
+	  if ($debug{'entities'});
     }
-    #   Change the entities
+
+    #   Change the entities in the file
     while ($origfile =~ /^(.*?)&([^;\s]*);(.*)$/s) {
 	if (defined $entincl{$2}) {
 	    my ($begin,$key,$end)=($1,$2,$3);
@@ -490,12 +561,12 @@
 					      DIR    => "/tmp",
 					      UNLINK => 0);
     print $tmpfh $origfile;
-    close $tmpfh || die sprintf(gettext("Can't close tempfile: %s"),$!)."\n";
+    close $tmpfh || die sprintf(dgettext("po4a","Can't close tempfile: %s"),$!)."\n";
 
     my $cmd="cat $tmpfile|nsgmls -l -E 0 2>/dev/null|";
     print STDERR "CMD=$cmd\n" if ($debug{'generic'});
 
-    open (IN,$cmd) || die sprintf(gettext("Can't run nsgmls: %s"),$!)."\n";
+    open (IN,$cmd) || die sprintf(dgettext("po4a","Can't run nsgmls: %s"),$!)."\n";
 
     # The kind of tags
     my (%translate,%empty,%verbatim,%indent,%exist);
@@ -569,7 +640,7 @@
 	my $type;
 	
 	if ($event->type eq 'start_element') {
-	    die sprintf("po4a::sgml: ".gettext("%s: Unknown tag %s"),
+	    die sprintf("po4a::sgml: ".dgettext("po4a","%s: Unknown tag %s"),
 			$refs[$parse->line],$event->data->name)."\n" 
 		unless $exist{$event->data->name};
 	    
@@ -633,7 +704,7 @@
 		$buffer="";
 		push @open,$tag;
 	    } elsif ($indent{$event->data->name()}) {
-		die sprintf(gettext(
+		die sprintf(dgettext("po4a",
 		    "Closing tag for a translation container missing before %s, at %s"
 				    ),$tag,$ref)."\n"
 		    if (scalar @open);
@@ -682,7 +753,7 @@
 		    push @open,$tag;
 		}
 	    } elsif ($indent{$event->data->name()}) {
-		die sprintf(gettext(
+		die sprintf(dgettext("po4a",
            "Closing tag for a translation container missing before %s, at %s"
 				    ),$tag,$ref)."\n"
 		    if (scalar @open);
@@ -739,7 +810,7 @@
 	}
 
 	else {
-	    die sprintf(gettext("%s:%d: Unknown SGML event type: %s"),
+	    die sprintf(dgettext("po4a","%s:%d: Unknown SGML event type: %s"),
 			$refs[$parse->line],$event->type)."\n";
 	    
 	}