[sagenb] 08/179: Tools for sws2rst

Tue May 6 12:05:04 UTC 2014

This is an automated email from the git hooks/post-receive script.

felix-guest pushed a commit to branch master
in repository sagenb.

commit 94f0b0c5bc86bb0983ee53742139a31fb843945d
Author: Pablo Angulo <pablo.angulo at uam.es>
Date:   Wed Jan 19 10:58:48 2011 +0100

    Tools for sws2rst
---
 sagenb/misc/comments2rst.py  | 370 +++++++++++++++++++++++++++++++++++++++++++
 sagenb/misc/results2rst.py   | 168 ++++++++++++++++++++
 sagenb/misc/worksheet2rst.py | 196 +++++++++++++++++++++++
 3 files changed, 734 insertions(+)

diff --git a/sagenb/misc/comments2rst.py b/sagenb/misc/comments2rst.py
new file mode 100644
index 0000000..ebbb809
--- /dev/null
+++ b/sagenb/misc/comments2rst.py
@@ -0,0 +1,370 @@
+# -*- coding: utf-8 -*-
+r"""
+Convert html from text cells in the notebook into ReStructuredText
+
+This is called by sws2rst
+
+- Pablo Angulo Ardoy (2011-02-25): initial version
+"""
+#**************************************************
+# Copyright (C) 2011 Pablo Angulo
+#
+# Distributed under the terms of the GPL License
+#**************************************************
+
+
+import re
+import os
+try:
+    from BeautifulSoup import (ICantBelieveItsBeautifulSoup, Tag,
+                               CData, Comment, Declaration, ProcessingInstruction)
+except ImportError:
+    raise ImportError, """BeautifulSoup must be installed.
+
+You might download a spkg from: 
+
+http://trac.sagemath.org/sage_trac/raw-attachment/ticket/10637/beautifulsoup-3.2.0.p0.spkg
+"""
+
+def preprocess_display_latex(text):
+    r"""replace $$some display latex$$ with <display>some display latex</display>
+    before the soup is built.
+
+    Deals with the situation where <p></p> tags are mixed
+    with $$, like $$<p>display_latex$$</p>, unless the mess is huge
+
+    EXAMPLES::
+
+        sage: from sagenb.misc.comments2rst import preprocess_display_latex
+        sage: s="$$a=2$$"
+        sage: preprocess_display_latex(s)
+        '<display>a=2</display>'
+        sage: s="<p>$$a=2$$</p>"
+        sage: preprocess_display_latex(s)
+        '<p><display>a=2</display></p>'
+        sage: s="<p>$$a=2</p>$$"
+        sage: preprocess_display_latex(s)
+        '<p><display>a=2</display></p>'
+        sage: s="$$<p>a=2</p>$$"
+        sage: preprocess_display_latex(s)
+        '<display>a=2</display>'
+    """
+    ls = []
+    start_tag = True
+    partes = text.split('$$')
+    for c in partes[:-1]:
+        if start_tag:
+            ls.append(c)
+            ls.append('<display>')
+        else:
+            c0, count = prune_tags(c)
+            ls.append(c0)
+            ls.append('</display>')
+            if count == 1:
+                ls.append('<p>')
+            elif count == -1:
+                ls.append('</p>')
+            elif abs(count)>1:
+                raise Exception, 'display latex was messed up with html code'
+        start_tag = not start_tag
+    ls.append(partes[-1])
+    return ''.join(ls)
+
+def prune_tags(text):
+    count = text.count('<p>') - text.count('</p>')
+    return text.replace('<br/>','').replace('<br />','').replace('<p>','').replace('</p>',''), count
+
+escapable_chars = { '+' :r'\+',
+                    '*' :r'\*',
+                    '|' :r'\|',
+                    '-' :r'\-'}
+def escape_chars(text):
+    for c,r in escapable_chars.iteritems():
+        text = text.replace(c,r)
+    return text
+
+def replace_courier(soup):
+    """Lacking a better option, I use courier font to mark <code>
+    within tinyMCE. And I want to turn that into real code tags.
+
+    Most users won't be needing this(?)
+    """
+    for t in soup.findAll(lambda s:s.has_key('style') and 'courier' in s['style']):
+        tag = Tag(soup, 'code')
+        while t.contents:
+            tag.append(t.contents[0])
+        t.replaceWith(tag)
+
+#inline_latex is careful not to confuse escaped dollars
+inline_latex = re.compile(r'([^\\])\$(.*?)([^\\])\$')
+latex_beginning = re.compile(r'\$(.*?)([^\\])\$')
+def replace_latex(soup):
+    r"""Replaces inline latex by :math:`code` and escapes
+    some rst special chars like +, -, * and | outside of inline latex
+
+    does not escape chars inside display or pre tags
+
+    EXAMPLES::
+
+        sage: from sagenb.misc.comments2rst import replace_latex
+        sage: from BeautifulSoup import ICantBelieveItsBeautifulSoup
+        sage: s = ICantBelieveItsBeautifulSoup("<p>Some <strong>latex: $e^\pi i=-1$</strong></p>")
+        sage: replace_latex(s)
+        sage: s
+        <p>Some <strong>latex: :math:`e^\pi i=-1`</strong></p>
+        sage: s = ICantBelieveItsBeautifulSoup("<p><strong>2+2 | 1+3</strong></p>")
+        sage: replace_latex(s)
+        sage: s
+        <p><strong>2\+2 \| 1\+3</strong></p>
+    """
+    for t in soup.findAll(text=re.compile('.+')):
+        if latex_beginning.match(t):
+            t.replaceWith(inline_latex.sub('\\1:math:`\\2\\3`',
+                                           latex_beginning.sub(':math:`\\1\\2`',
+                                                               unicode(t),
+                                                               1)))        
+        elif inline_latex.search(t):
+            t.replaceWith(inline_latex.sub('\\1:math:`\\2\\3`',
+                                           unicode(t)))
+        elif not (t.fetchParents(name = 'display')
+                  or t.fetchParents(name = 'pre')):
+            t.replaceWith(escape_chars(t))
+
+class Soup2Rst(object):
+    """builds the rst text from the Soup Tree
+    """
+    tags = {'h1':'header',
+            'h2':'header',
+            'h3':'header',
+            'h4':'header',
+            'p': 'inline_no_tag',
+            '[document]': 'document',
+            'br': 'br',
+            'b':'strong',
+            'strong':'strong',
+            'em':'em',
+            'pre':'pre',
+            'code':'code',
+            'display':'display',
+            'span':'inline_no_tag',
+            'ul':'ul',
+            'ol':'ol',
+            'li':'li',
+            'a':'a',
+            'table':'table',
+#            'tr':'tr',
+            'td':'inline_no_tag',
+            'th':'inline_no_tag',
+            'tt':'inline_no_tag',
+            'div':'block_no_tag',
+            'img':'img',
+#            '':'',
+            }
+
+    headers = {'h1':u'=',
+               'h2':u'-',
+               'h3':u'~',
+               'h4':u'"',
+               }
+    
+    def __init__(self, images_dir):
+        self.images_dir = images_dir
+        self._nested_list = 0
+        self._inside_ol   = False
+        self._inside_code_tag = False
+
+    def visit(self, node):
+        if isinstance(node, (CData, Comment, Declaration, ProcessingInstruction)):
+            return ''
+        elif hasattr(node, 'name'):
+            try:
+                visitor = getattr(self, 'visit_' + self.tags[node.name])
+                return visitor(node)
+            except (KeyError, AttributeError):
+                print 'Warning: node not supported (or something else?) ' + node.name
+                return unicode(node)
+        else:
+            #Assume plain string
+            return unicode(node).replace('\n','')
+
+    def visit_document(self, node):
+        return '\n'.join(self.visit(tag) for tag in node.contents)    
+
+    def get_plain_text(self, node):
+        """Gets all text, removing all tags"""
+        if hasattr(node, 'contents'):
+            t = ' '.join(self.get_plain_text(tag) for tag in node.contents)
+        else:
+            t = unicode(node)
+        return t.replace('\n','')
+        
+    def visit_header(self, node):
+        s = ' '.join(self.visit(tag) for tag in node.contents)
+        spacer = self.headers[node.name]*len(s)
+        return s.replace( '\n', '') +  '\n' + spacer
+
+    def visit_pre(self, node):
+        return '::\n\n    '+unicode(node)[5:-6].replace('<br />','\n').replace('<br></br>','\n').replace('\n','\n    ')
+
+    def visit_ul(self, node):
+        self._nested_list += 1
+        result = '\n'.join(self.visit(tag) for tag in node.contents)
+        self._nested_list -= 1
+        return result
+
+    def visit_ol(self, node):
+        self._nested_list += 1
+        self._inside_ol = True
+        result = '\n'.join(self.visit(tag) for tag in node.contents)
+        self._nested_list -= 1
+        self._inside_ol = False
+        return result
+
+    def visit_li(self, node):
+        return (' '*self._nested_list
+                + ('#. ' if self._inside_ol else '- ')
+                +' '.join(self.visit(tag) for tag in node.contents))
+
+    def visit_display(self, node):
+        return ('\n.. MATH::\n\n    ' +
+                unicode(node)[9:-10].replace('<br></br>','\n').replace('\n','\n    ') +
+                '\n\n')
+
+    def visit_img(self, node):
+        return '.. image:: ' + os.path.join(self.images_dir, node['src'].replace(' ','_')) + '\n    :align: center\n'
+
+    def visit_table(self,node):
+        rows = []
+        for elt in node.contents:
+            if not hasattr(elt,'name'):
+                pass
+            elif elt.name == 'thead':
+                rows.extend(self.prepare_tr(row)
+                            for row in elt
+                            if hasattr(row,'name') and
+                            row.name=='tr')
+                rows.append([]) #this row represents a separator
+            elif elt.name == 'tbody':
+                rows.extend(self.prepare_tr(row)
+                            for row in elt
+                            if hasattr(row,'name') and
+                            row.name=='tr')
+            elif elt.name == 'tr':
+                rows.append(self.prepare_tr(elt))
+
+        ncols = max(len(row) for row in rows)
+        for row in rows:
+            if len(row) < ncols:
+                row.extend( ['']*(ncols - len(row)))
+        cols_sizes = [max(len(td) for td in tds_in_col)
+                      for tds_in_col in zip(*rows)]
+        result = [' '.join('='*c for c in cols_sizes)]
+        
+        for row in rows:
+            if any(td for td in row):
+                result.append(' '.join(td+' '*(l - len(td))
+                                       for l,td in zip(cols_sizes,row)))
+            else:
+                result.append(' '.join('-'*c for c in cols_sizes))
+        result.append(' '.join('='*c for c in cols_sizes))
+        return '\n'.join(result)
+
+    def prepare_tr(self, node):
+        return [self.visit(tag) for tag in node.contents if tag!='\n']
+        
+    def visit_br(self, node):
+        return '\n'
+
+    def visit_strong(self, node):
+        if node.contents:
+            content = ' '.join(self.visit(tag) for tag in node.contents).strip()
+            if '``' in content or self._inside_code_tag:
+                return content
+            else:
+                return '**' + content + '**'
+        else:
+            return ''
+
+    def visit_em(self,node):
+        if node.contents:
+            return '*' + ' '.join(self.visit(tag) for tag in node.contents).strip() + '*'
+        else:
+            return ''
+
+    def visit_code(self, node):
+        if node.contents:
+            self._inside_code_tag = True
+            content = self.get_plain_text(node).strip()
+            self._inside_code_tag = False
+            return '``' + content + '``'
+        else:
+            return ''
+
+    def visit_inline_no_tag(self, node):
+        return (' '.join(self.visit(tag)
+                         for tag in node.contents)).strip() + '\n'
+
+    def visit_block_no_tag(self, node):
+        return '\n'.join(self.visit(tag) for tag in node.contents)
+
+    def visit_a(self, node):
+        return ('`' + ' '.join(self.visit(tag) for tag in node.contents) +
+                ' <' + node['href'] + '>`_'
+                )
+
+def html2rst(text, images_dir):
+    """Converts html, tipically generated by tinyMCE, into rst
+    compatible with Sage documentation.
+
+    The main job is done by BeautifulSoup, which is much more
+    robust than conventional parsers like HTMLParser, but also
+    several details specific of this context are taken into
+    account, so this code differs from generic approaches like
+    those found on the web.
+
+    INPUT:
+
+    - ``text`` -- string -- a chunk of HTML text
+
+    - ``images_dir`` -- string -- folder where images are stored
+
+    OUTPUT:
+
+    - string -- rst text
+
+    EXAMPLES::
+
+        sage: from sagenb.misc.comments2rst import html2rst
+        sage: html2rst('<p>Some text with <em>math</em>: $e^{\pi i}=-1$</p>', '')
+        u'Some text with  *math* : :math:`e^{\\pi i}=-1`\n'
+        sage: html2rst('<p>Text with <em>incorrect</p> nesting</em>.', '')       
+        u'Text with  *incorrect*\n\n nesting\n.'
+        sage: html2rst('<pre>Preformatted: \n    a+2\n</pre><p> Not preformatted: \n    a+2\n</p>', '')
+        u'::\n\n    Preformatted: \n        a+2\n    \nNot preformatted:     a\\+2\n'
+        sage: html2rst('áñ &ntildeá','')
+        u'\xe1\xf1 \xf1\xe1'
+        sage: html2rst('<p>some text</p><p>$$</p><p>3.183098861 \cdot 10^{-1}</p><p>$$</p>','')
+        u'some text\n\n.. MATH::\n\n    3.183098861 \\cdot 10^{-1}\n'
+    """
+    
+    #replace $$some display latex$$ with
+    #<display>some display latex</display>
+    text = preprocess_display_latex(text)
+
+    #eliminate nasty  
+    text = text.replace(' ',' ')
+            
+    #ICantBelieveItsBeautifulSoup is better than BeautifulSoup
+    #for html that wasn't generated by humans (like tinyMCE)
+    soup = ICantBelieveItsBeautifulSoup(text,
+                       convertEntities=ICantBelieveItsBeautifulSoup.HTML_ENTITIES)    
+
+    #remove all comments
+    comments = soup.findAll(text=lambda text:isinstance(text, Comment))
+    for comment in comments:
+        comment.extract()
+
+    replace_courier(soup)
+    replace_latex(soup)
+    v = Soup2Rst(images_dir)
+    return v.visit(soup)
diff --git a/sagenb/misc/results2rst.py b/sagenb/misc/results2rst.py
new file mode 100644
index 0000000..23b87fd
--- /dev/null
+++ b/sagenb/misc/results2rst.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+r"""
+Convert output from code cells in the notebook into ReStructuredText
+
+This is called by sws2rst
+
+- Pablo Angulo Ardoy (2011-02-25): initial version
+"""
+#**************************************************
+# Copyright (C) 2011 Pablo Angulo
+#
+# Distributed under the terms of the GPL License
+#**************************************************
+
+
+import re
+IMAGES_DIR = 'images/'
+
+#We parse lines one by one but keep track of current scope
+#similarly to worksheet2rst.py
+#Results are split into different types. Some are discarded
+class States(object):
+    NORMAL = 0
+    HTML = 1
+    MATH = 2
+    TRACEBACK = 3
+
+class LineTypes(object):
+    PLAIN = 0
+    IMAGE = 1
+    LATEX = 2
+    HTML  = 3
+    TRACE = 4
+
+class ResultsParser(object):
+    """Auxiliary class for results2rst
+    """
+    def __init__(self, images_dir):
+        ##Order matters, place more restrictive regex's before more general ones
+        ##If no regex matches, line will be discarded
+        ##a self transition is needes to produce any output
+        self.transitions = {
+            States.NORMAL:[
+                #IMAGE
+                     (re.compile(r"^\<html\>\<font color='black'\>"
+                                 r"\<img src='cell\://(.*?)'\>"
+                                 r"\</font\>\</html\>"),
+                      "\n.. image:: " + images_dir + "\\1\n    :align: center\n",
+                      LineTypes.IMAGE,
+                      States.NORMAL),
+                #SELF-CONTAINED MATH
+                     (re.compile(r"^\<html\>\<div class=\"math\"\>"
+                                 r"\\newcommand\{\\Bold\}\[1\]\{\\mathbf\{\#1\}\}"
+                                 r"(.*?)\</div\>\</html\>$"),
+                      "\n.. MATH::\n\n    \\1\n",
+                      LineTypes.LATEX,
+                      States.NORMAL),
+                #SELF-CONTAINED MATH - BIS
+                     (re.compile(r"^\<html\>\<div class=\"math\"\>"
+                                 r"(.*?)\</div\>\</html\>$"),
+                      "\n.. MATH::\n\n    \\1",
+                      LineTypes.LATEX,
+                      States.NORMAL),
+                #START Traceback
+                     (re.compile(r"^(Traceback.*)"),
+                      "    Traceback (most recent call last):",
+                      LineTypes.TRACE,
+                      States.TRACEBACK),
+                #START MATH
+                     (re.compile(r"^\<html\>\<div class=\"math\"\>"
+                                 r"\\newcommand\{\\Bold\}\[1\]\{\\mathbf\{\#1\}\}(.*?)"),
+                      "\n.. MATH::\n\n    \\1",
+                      LineTypes.LATEX,
+                      States.MATH),
+                #SELF-CONTAINED HTML
+                     (re.compile(r"^\<html\>.*</html\>$"),
+                      "    <html>...</html>",
+                      LineTypes.HTML,
+                      States.NORMAL),        
+                #START HTML
+                     (re.compile(r"^\<html\>.*"),
+                      "    <html>...</html>",
+                      LineTypes.HTML,
+                      States.HTML),        
+                #CONTINUE NORMAL
+                     (re.compile("(.*)"),
+                      "    \\1",
+                      LineTypes.PLAIN,
+                      States.NORMAL),                
+                ],
+            States.MATH:[
+                 #END MATH
+                     (re.compile(r"(.*?)\</div\>\</html\>$"),
+                      "    \\1",
+                      LineTypes.LATEX,
+                      States.NORMAL),
+                 #CONTINUE MATH
+                     (re.compile("(.*)"),
+                      "    \\1",
+                      LineTypes.LATEX,
+                      States.MATH),        
+                ],
+            States.TRACEBACK:[
+                 #END Traceback
+                     (re.compile(r"^(\S.*)"),
+                      "    ...\n    \\1",
+                      LineTypes.TRACE,
+                      States.NORMAL),
+                ],
+            States.HTML:[
+                 #END HTML
+                     (re.compile(r".*</html\>$"),
+                      "",
+                      LineTypes.HTML,
+                      States.NORMAL),
+                ],
+        }
+    
+    def parse(self, text):
+        result_plain = []
+        result_show = []
+        state = States.NORMAL
+        for line in text.splitlines():
+            for regex, replacement, line_type, new_state in self.transitions[state]:
+                if regex.match(line):
+                    result = result_plain if line_type in (LineTypes.PLAIN, LineTypes.HTML)\
+                             else result_show
+                    result.append( regex.sub(replacement, line))
+                    state = new_state
+                    break
+        result_plain.extend(result_show)
+        return '\n'.join(result_plain)
+
+def results2rst(text, images_dir):
+    r"""Converts the result of evaluation of notebook cells
+    into rst compatible with Sage documentation.
+
+    Several common patterns are identified, and treated
+    accordingly. Some patterns are dropped, while others
+    are not recognized.
+
+    Currently, latex and images are recognized and converted.
+
+    INPUT:
+
+    - ``text`` -- string -- a chunk of HTML text
+
+    - ``images_dir`` -- string -- folder where images are stored
+
+    OUTPUT:
+
+    - string -- rst text
+
+    EXAMPLES::
+
+        sage: from sagenb.misc.results2rst import results2rst
+        sage: s="<html><font color='black'><img src='cell://sage0.png'></font></html>"
+        sage: results2rst(s,'')
+        '\n.. image:: sage0.png\n    :align: center\n'
+        sage: results2rst("4",'')
+        '    4'
+        sage: s=r'<html><div class="math">\newcommand{\Bold}[1]{\mathbf{#1}}\frac{3}{2}</div></html>'
+        sage: results2rst(s,'')                                       
+        '\n.. MATH::\n\n    \\frac{3}{2}\n'
+    """
+    Parser = ResultsParser(images_dir)
+    return Parser.parse(text)
+
diff --git a/sagenb/misc/worksheet2rst.py b/sagenb/misc/worksheet2rst.py
new file mode 100644
index 0000000..f12fe3e
--- /dev/null
+++ b/sagenb/misc/worksheet2rst.py
@@ -0,0 +1,196 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+r"""
+Convert worksheet.html files into ReStructuredText documents
+
+This is called by 'sage -sws2rst'. Can also be used as a commandline script 
+(if BeautifulSoup is installed):
+
+``python worksheet2rst.py worksheet.html``
+
+or
+
+``cat worksheet.html | python worksheet2rst.py``
+
+AUTHOR:
+
+- Pablo Angulo Ardoy (2011-02-25): initial version
+
+
+The content of worksheet.html is split into comments, code, and output 
+(the result of evaluating the code), as follows:
+
+comments
+{{{id=..|
+code
+///
+results
+}}}
+
+Each kind of text is dealt with separately.
+
+"""
+#**************************************************
+# Copyright (C) 2011 Pablo Angulo
+#
+# Distributed under the terms of the GPL License
+#**************************************************
+
+
+import sys
+import os
+import re
+from comments2rst import html2rst
+from results2rst import results2rst
+import codecs
+
+#We parse lines one by one but keep track of current scope
+#comments
+#{{{id=..|
+#code
+#///
+#results
+#}}}
+#RESULT_TO_BE_DROPPED corresponds to a results section whose
+#code was empty, and will be discarded, whether it's empty or not
+class States(object):
+    COMMENT = 0
+    CODE = 1
+    RESULT = 2
+    RESULT_TO_BE_DROPPED = 3
+
+# REs for splitting comments, code and results
+START_CELL_RE = re.compile('^\{\{\{id=(\d*)\|')
+END_CODE_RE   = re.compile('^\/\/\/')
+END_CELL_RE   = re.compile('^\}\}\}')
+
+#When to switch State, and which State to
+transitions = {
+    States.COMMENT:(
+        START_CELL_RE,
+        States.CODE
+        ),
+    States.CODE:(
+        END_CODE_RE,
+        States.RESULT),
+    States.RESULT:(
+        END_CELL_RE,
+        States.COMMENT),
+    States.RESULT_TO_BE_DROPPED:(
+        END_CELL_RE,
+        States.COMMENT)
+    }
+
+def code_parser(text):
+    """
+    
+    Arguments:
+
+    INPUT:
+
+    - ``s``:sage code, may or may not start with "sage:"
+
+    OUTPUT:
+
+    - string -- rst text
+
+    EXAMPLES (not used for unit test, see 
+    http://groups.google.com/group/sage-devel/browse_thread/thread/d82cb049ac102f3a)
+
+    : from sagenb.misc.worksheet2rst import code_parser
+    : s="a=2"
+    : code_parser(s)
+    '::\n\n    sage: a=2'
+    : s="def f(n):\n    return n+1\n"
+    : code_parser(s)
+    '::\n\n    sage: def f(n):\n    ...       return n+1'
+    : s="sage: def f(n):\nsage:     return n+1\n"
+    : code_parser(s)
+    '::\n\n    sage: def f(n):\n    ...       return n+1'
+    """
+    lines = ['::', '']
+    for s in text.splitlines():
+        l = s[6:] if s.startswith('sage: ') else s
+        if not l: continue
+        prefix = '    ...   ' if l[0] == ' ' else '    sage: '
+        lines.append(prefix + l)
+    return '\n'.join(lines)
+
+def worksheet2rst(s, images_dir=''):
+    """Parses a string, tipically the content of the file
+    worksheet.html inside a sws file, and converts it into
+    rst compatible with Sage documentation.
+
+    INPUT:
+
+    - ``s`` -- string -- text, tipically the content of
+                               worksheet.html
+
+    - ``images_dir`` -- string -- folder where images are stored
+
+    OUTPUT:
+
+    - string -- rst text
+
+    EXAMPLES (not used for unit test, see 
+    http://groups.google.com/group/sage-devel/browse_thread/thread/d82cb049ac102f3a)
+
+    : from sagenb.misc.worksheet2rst import worksheet2rst
+    : worksheet2rst('<p>some text</p>\n{{{id=1|\nprint 2+2\n///\n4\n}}}')      
+    u'.. -*- coding: utf-8 -*-\n\nsome text\n\n::\n\n    sage: print 2+2\n    4\n\n.. end of output\n'
+    : s = '{{{id=2|\nshow(f)\n///\n<html><div class="math">\\sqrt{x}</div></html>\n}}}\n'
+    : worksheet2rst(s)
+    u'.. -*- coding: utf-8 -*-\n\n\n::\n\n    sage: show(f)\n\n.. MATH::\n\n    \\sqrt{x}\n\n.. end of output\n'       
+    """
+    result_parser = results2rst
+    state = States.COMMENT
+    result = ['.. -*- coding: utf-8 -*-\n']
+    ls = []
+    last = 0
+    for line in s.splitlines():
+        regex, next_state= transitions[state]
+        m = regex.match(line) 
+        if m:
+            if state == States.COMMENT:
+                last_cell_id = m.group(1)
+                img_path = images_dir + os.path.sep
+                result.append(html2rst(u'\n'.join(ls), img_path))
+            elif state == States.RESULT:
+                img_path = os.path.join(images_dir, 'cell_%s_'%last_cell_id)
+                result.append(result_parser(u'\n'.join(ls),
+                                             img_path))
+                result.append('')
+                result.append('.. end of output')
+            elif state == States.CODE:
+                if ls and any(ls):
+                    result.append(code_parser(u'\n'.join(ls)))
+                else:
+                    next_state = States.RESULT_TO_BE_DROPPED
+            ls = []
+            state = next_state
+        else:
+            ls.append(line)
+    if state == States.COMMENT:
+        img_path = images_dir + os.path.sep
+        result.append(html2rst(u'\n'.join(ls), img_path))
+    elif state == States.RESULT:
+        img_path = os.path.join(images_dir, 'cell_%s_'%last_cell_id)
+        result.append(result_parser(u'\n'.join(ls),
+                                     img_path))
+        result.append('')
+        result.append('.. end of output')
+    elif state == States.CODE:
+        result.append(code_parser(u'\n'.join(ls)))
+
+    return u'\n'.join(result)
+
+if __name__=='__main__':
+    if len(sys.argv)>1:        
+        fichero = codecs.open(sys.argv[1], mode='r', encoding='utf-8')
+        text = fichero.read()
+        fichero.close()
+    else:
+        text = sys.stdin.read()
+
+    print worksheet2rst(text).encode('utf-8')
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/sagenb.git