[Debian-tex-commits] SVN tex-common commit + diffs: r2587 - in
tex-common/trunk: debian doc
Florent Rougon
frn at alioth.debian.org
Sat Mar 17 11:30:52 CET 2007
Author: frn
Date: 2007-03-17 10:30:51 +0000 (Sat, 17 Mar 2007)
New Revision: 2587
Modified:
tex-common/trunk/debian/changelog
tex-common/trunk/doc/texify-tex-output
Log:
Changes to texify-tex-output:
* The document preamble is now easily customizable with a list of
(regexp, replacement) tuples.
* More documentation (in particular, new option --algorithm explains in detail
what the script does).
* Use '/usr/bin/python' instead of '/usr/bin/env python' (not really
necessary, though, since the script is not even called at package build
time--cf. changelog).
* Add the LastChangedRevision property, so that --version actually reports
something accurate.
Update the Debian changelog.
Modified: tex-common/trunk/debian/changelog
===================================================================
--- tex-common/trunk/debian/changelog 2007-03-16 16:51:52 UTC (rev 2586)
+++ tex-common/trunk/debian/changelog 2007-03-17 10:30:51 UTC (rev 2587)
@@ -1,7 +1,16 @@
tex-common (1.3~1) unreleased; urgency=low
* Fix typography in Debian-on-TeX, thanks to Miguel de Val Borro
- <miguel.deval at gmail.com> (closes: #413449)
+ <miguel.deval at gmail.com> (closes: #413449) [frank]
+
+ * In the source package, replace 'tex-sed' with a Python script named
+ 'texify-tex-output' to do a better job (handling all known cases so
+ far, some of which seemed rather difficult to implement in sed).
+
+ We don't need to Build-Depend on Python, because this script is only
+ used when we generate PDF output from the DebianDoc documents
+ (Debian-TeX-Policy, TeX-on-Debian), which we don't do at build time
+ in order to avoid chicken-and-egg problems. [florent]
-- Frank Küster <frank at debian.org> Wed, 14 Mar 2007 16:10:53 +0100
Modified: tex-common/trunk/doc/texify-tex-output
===================================================================
--- tex-common/trunk/doc/texify-tex-output 2007-03-16 16:51:52 UTC (rev 2586)
+++ tex-common/trunk/doc/texify-tex-output 2007-03-17 10:30:51 UTC (rev 2587)
@@ -1,6 +1,6 @@
-#! /usr/bin/env python
-
-# texify-tex-output --- Change TeX into \TeX and similar stuff
+#! /usr/bin/python
+#
+# texify-tex-output --- Enhance the LaTeX output of DebianDoc tools
# Copyright (c) 2007 Florent Rougon
#
# This program is free software; you can redistribute it and/or modify
@@ -21,46 +21,80 @@
import sys, os, re, getopt
-# List of (regexp, replacement text) tuples describing the substitutions to
-# perform.
+# Call with option --algorithm for a precise explanation of how the script
+# works (or look for the definition of 'algorithm_doc' below).
#
+#
+# ****************************************************************************
+# * Simple customization starts here *
+# ****************************************************************************
+
+# Document preamble
+# ~~~~~~~~~~~~~~~~~
+# List of (regexp, replacement text) tuples describing customize the preamble.
+#
# For the regexp syntax, see:
#
# file:///usr/share/doc/python2.5-doc/html/lib/re-syntax.html
#
# The replacement text is expanded by match_object.expand(), therefore you can
-# reference groups from the matching regexp with backreferences such as
-# \1, \2, etc. (even by group name). Similarly, escape sequences such as \n
-# are processed in the replacement text, therefore we have to use two
-# backslashes there to insert one backslash, even when using raw strings
-# (cf. file:///usr/share/doc/python2.5-doc/html/lib/match-objects.html).
-substitutions = [(r"\bTeX\b", r"\\TeX{}"),
- (r"\bpdfTeX\b", r"pdf\\TeX{}"),
- (r"\bMetafont\b", r"\\MF{}"),
- (r"\bLaTeX\b", r"\\LaTeX{}"),
- (r"\bConTeXt\b", r"Con\\TeX{}t"),
- (r"\bteTeX\b", r"te\\TeX{}"),
- (r"\bMiKTeX\b", r"MiK\\TeX{}")]
+# reference groups from the matching regexp with backreferences such as \1,
+# \2, etc. (even by group name with the \g<name> syntax). Similarly, escape
+# sequences such as \n are processed in the replacement text, therefore we
+# have to use two backslashes there to insert one backslash, even when using
+# raw strings (cf.
+# file:///usr/share/doc/python2.5-doc/html/lib/match-objects.html).
+#
+# For a substitution to happen on a given line, the regexp must match at
+# the beginning of that line. The replacement text can generate several lines
+# if needed, include the original line (using a group), etc.
+preamble_substitutions = [
+ (r"^(?P<input_line>\s*\\usepackage(\[[^][]*\])?\{fontenc\}.*)$",
+ r"\g<input_line>\n\\usepackage{mflogo}\n")]
+# How to recognize the end of the preamble
+begin_doc_re = r"^\s*\\begin\{document\}"
+
+# Document body
+# ~~~~~~~~~~~~~
+# List of (regexp, replacement text) tuples describing which substitutions are
+# to be performed on each chunk of the body text.
+#
+# The same comments as for 'preamble_substitutions' apply here (in particular,
+# a replacement text can make use of groups from the corresponding regexp),
+# except that here, we don't check if given regexp *matches* at the beginning
+# of a chunk; instead, we *search* (using regexp.search()) for the first match
+# of that regexp in the chunk.
+body_substitutions = [(r"\bTeX\b", r"\\TeX{}"),
+ (r"\bpdfTeX\b", r"pdf\\TeX{}"),
+ (r"\bMetafont\b", r"\\MF{}"),
+ (r"\bLaTeX\b", r"\\LaTeX{}"),
+ (r"\bConTeXt\b", r"Con\\TeX{}t"),
+ (r"\bteTeX\b", r"te\\TeX{}"),
+ (r"\bMiKTeX\b", r"MiK\\TeX{}")]
+
# List of (command_name, number_of_args) tuples for LaTeX commands which
# should not be subject to the regexp substitution (neither the command name,
# nor its arguments) .
#
-# If DebianDoc starts using an \envvar command for typesetting the names
-# of environment variables (that would be nice), it should be added to this
-# list.
+# If the LaTeX output from DebianDoc tools starts using an \envvar command for
+# typesetting the names of environment variables (that would be nice), it
+# should be added to this list.
skipped_commands = [("file", 1)]
# When processing the body of the document, if a line matches one of the
# regexps in 'no_subst', it will not be suject to substitution at all.
no_subst = [r"^.*\bgenerated from \\\$Id:[ \t]+.+[ \t]+\\\$"]
+# ****************************************************************************
+# * Simple customization ends here *
+# ****************************************************************************
progname = os.path.basename(sys.argv[0])
-progversion_base = "0.1"
+progversion_base = "0.2"
# Append an SVN revision part to the program version
-svn_revision_string = "$LastChangedRevision: 2510 $"
+svn_revision_string = "$LastChangedRevision$"
svn_revision_rec = re.compile(r"^\$LastChangedRevision: ([0-9]+) \$$")
svn_revision_mo = svn_revision_rec.match(svn_revision_string)
@@ -74,14 +108,107 @@
usage = """Usage: %(progname)s [option ...] input_file output_file
-Filter DebianDoc LaTeX's output to translate TeX into \TeX, etc.
+Enhance the LaTeX output of DebianDoc tools.
+The document preamble is customized; in the document body, TeX is replaced
+with \\TeX{}, LaTeX with \\LaTeX{}, etc., except where it doesn't make sense
+(as in the argument of \\file, and in the SVN Id).
+
+The calling syntax allows this script to be specified as the argument to the
+-s option of commands such as debiandoc2latexpdf.
+
Options:
+ --algorithm explain the algorithm used
--help display this message and exit
--version output version information and exit""" \
% {"progname": progname}
+algorithm_doc = """\
+The algorithm used in %(progname)s is the following. First, read the
+preamble and customize it (e.g., to add '\\usepackage{mflogo}' after
+the line loading 'fontenc'). This is done the following way:
+
+ Each line of the preamble is read separately. If it matches one of the
+ regular expressions in 'preamble_substitutions', the corresponding
+ replacement text is substituted, and no other substitution is done on that
+ line (i.e., the first regexp that matches 'wins').
+
+ Lines that don't match any regexp in 'preamble_substitutions' are output
+ verbatim.
+
+ Note: 'preamble_substitutions' is a list of tuples; the first element of
+ each tuple is a regexp, and the second element is its corresponding
+ replacement text.
+
+ The end of the preamble is detected when a line matches the regexp
+ in 'begin_doc_re'.
+
+Then, process the body of the document line by line. If a line matches at
+least one of the regular expressions in 'no_subst', it is dumped verbatim.
+Currently, this is used to avoid changing 'Debian-TeX-Policy.sgml' into
+'Debian-\\TeX{}-Policy.sgml' in the Id generated by subversion:
+
+ $Id$
+
+Other lines in the document body go through the following filter:
+
+ 1. Split the line into chunks separated by LaTeX commands listed in
+ 'skipped_commands' (currently, only \\file).
+
+ 2. Such commands and their arguments are dumped verbatim. This avoids
+ mangling file names that contain the string 'TeX', such as
+ '/etc/texmf/texmf.d/05TeXMF.cnf'.
+
+ For each of these commands, the number of arguments is supposed to be
+ fixed, as specified in %(progname)s. But it is possible to specify
+ that e.g., \\file takes one argument, and \\othercommand takes two
+ arguments.
+
+ 3. The remaining chunks of text each go through the substitution process,
+ which works as follows:
+
+ Initialisation:
+
+ index = 0 --- which means, start at the beginning of the chunk
+
+ Loop:
+
+ (a) Look for the first match of each regular expression in
+ 'body_substitutions' (the regexp is the first element of each tuple),
+ starting at 'index' in the chunk. If no regexp matches, it means
+ there is nothing left to replace in the chunk, therefore we break the
+ loop.
+
+ (b) Choose the regexp that matched earliest in the chunk, dump the text
+ from 'index' to the beginning of the regexp match, and write the
+ replacement text for the regexp (which is given in the second element
+ of the tuple in 'body_substitutions' that contains the regexp, and
+ can make use of groups that matched in the regexp---specified as \\1,
+ \\2, or even by group name).
+
+ (c) Let 'index' point right after the end of the regexp match and start a
+ new loop iteration, provided 'index' doesn't point to the end of the
+ chunk yet.
+
+ The idea behind this loop is to proceed as a human would do, instead of
+ the simpler way, which would be: successively replace all occurrences of
+ each regexp in 'body_substitutions' in the chunk. This simpler way would
+ cause problems, because a replacement text for a given regexp could be
+ later matched by another regexp, and be subject to a second (recursive)
+ replacement, which is generally not wanted and forces one to be very
+ careful about the order in which the regexps are listed.
+
+Currently, the arguments of LaTeX commands in step 2 are supposed to all fit
+on the same line as the command, and they are recognized based on brace
+matching, with escaped braces \\{ and \\} properly handled (they are not
+confused with braces which delimit arguments). Due to this single-line
+limitation, the arguments cannot contain TeX comments. Currently, this is
+sufficient for the \\file commmand calls produced by DebianDoc, which is why
+this relatively simple design was chosen.""" \
+% {"progname": progname}
+
+
class error(Exception):
pass
@@ -92,15 +219,29 @@
"Exception raised for obvious bugs (when an assertion is false)."
-def process_preamble(input_stream, output_stream, lineno):
- fontenc_rec = re.compile(r"^\s*\\usepackage(\[[^][]*\])?\{fontenc\}")
- begin_doc_rec = re.compile(r"^\s*\\begin\{document\}")
+def compile_regexps(seq):
+ res = []
+ for e in seq:
+ res.append((re.compile(e[0]), e[1]))
+
+ return res
+
+
+def process_preamble(input_stream, output_stream, preamble_substitutions,
+ begin_doc_re, lineno):
+ begin_doc_rec = re.compile(begin_doc_re)
+ subs = compile_regexps(preamble_substitutions)
+
for line in input_stream:
obuf = [line] # output buffer
- if fontenc_rec.match(line):
- obuf.append("\\usepackage{mflogo}\n")
+ for regexp, repl in subs:
+ mo = regexp.match(line)
+ if mo is not None:
+ # Replacement text
+ obuf = [mo.expand(repl)]
+ break
output_stream.write(''.join(obuf))
lineno += 1
@@ -116,7 +257,9 @@
If all elements are equal to -1, return None."""
+ # Smallest number found so far, among those that are different from -1
min_so_far = None
+ # Index of this number in 'l'
index_of_min_so_far = None
for i in range(len(l)):
@@ -129,6 +272,12 @@
def skip_cmd_and_args(obuf, line, cmd_start, command, nargs, lineno):
+ """Skip a LaTeX command and its arguments.
+
+ The command 'command' is supposed to start at position 'cmd_start' in
+ 'line', and accept 'nargs' mandatory arguments.
+
+ """
start_of_cmd_call_rec = re.compile(r"\\%s[ \t]*\{" % command)
mo = start_of_cmd_call_rec.match(line, pos=cmd_start)
@@ -275,7 +424,8 @@
def process_command_line():
try:
opts, args = getopt.getopt(sys.argv[1:], "",
- ["help",
+ ["algorithm",
+ "help",
"version"])
except getopt.GetoptError, message:
sys.stderr.write(usage + "\n")
@@ -284,7 +434,10 @@
params = {}
for option, value in opts:
- if option == "--help":
+ if option == "--algorithm":
+ print algorithm_doc
+ return ("exit", 0)
+ elif option == "--help":
print usage
return ("exit", 0)
elif option == "--version":
@@ -304,15 +457,6 @@
return ("continue", params)
-def compile_regexps(seq):
- res = []
-
- for e in seq:
- res.append((re.compile(e[0]), e[1]))
-
- return res
-
-
def main():
action, p = process_command_line()
if action == "exit":
@@ -324,8 +468,10 @@
# Number of the input line that will be read next, starting from 1
lineno = 1
- lineno = process_preamble(input_stream, output_stream, lineno)
- process_body(input_stream, output_stream, substitutions, no_subst,
+ lineno = process_preamble(input_stream, output_stream,
+ preamble_substitutions, begin_doc_re,
+ lineno)
+ process_body(input_stream, output_stream, body_substitutions, no_subst,
lineno)
sys.exit(0)
Property changes on: tex-common/trunk/doc/texify-tex-output
___________________________________________________________________
Name: svn:keywords
- Id
+ Id LastChangedRevision
More information about the Debian-tex-commits
mailing list