[sagenb] 28/179: Sanitize the output and text cells of published worksheets.
felix salfelder
felix-guest at moszumanska.debian.org
Tue May 6 12:05:07 UTC 2014
This is an automated email from the git hooks/post-receive script.
felix-guest pushed a commit to branch master
in repository sagenb.
commit 77fb49a0f585f01b5bc4bc26dfca7e64499aef78
Author: Jason Grout <jason.grout at drake.edu>
Date: Thu Oct 4 16:00:50 2012 -0500
Sanitize the output and text cells of published worksheets.
---
sagenb/data/sage/html/notebook/cell.html | 8 ++--
sagenb/data/sage/html/notebook/text_cell.html | 2 +-
sagenb/notebook/cell.py | 53 ++++++++++++++++++++++-----
3 files changed, 48 insertions(+), 15 deletions(-)
diff --git a/sagenb/data/sage/html/notebook/cell.html b/sagenb/data/sage/html/notebook/cell.html
index 0027932..a2fcdbd 100644
--- a/sagenb/data/sage/html/notebook/cell.html
+++ b/sagenb/data/sage/html/notebook/cell.html
@@ -96,20 +96,20 @@ INPUT:
<div class="cell_output_{{ "print_" if do_print else '' }}{{ cell.cell_output_type() }}"
id="cell_output_{{ cell.id() }}">
{% if cell.introspect() %}
- {{ cell.output_text(0, html=true) }}
+ {{ cell.output_text(0, html=true, sanitize=publish) }}
{% else %}
- {{ cell.output_text(wrap_, html=true) }}
+ {{ cell.output_text(wrap_, html=true, sanitize=publish) }}
{% endif %}
</div>
{% if not do_print %}
<div class="cell_output_{{ 'print_' if do_print else '' }}nowrap_{{ cell.cell_output_type() }}"
id="cell_output_nowrap_{{ cell.id() }}">
- {{ cell.output_text(0, html=true) }}
+ {{ cell.output_text(0, html=true, sanitize=publish) }}
</div>
{% endif %}
<div class="cell_output_html_{{ cell.cell_output_type() }}"
id="cell_output_html_{{ cell.id() }}">
- {{ cell.output_html() }}
+ {{ cell.output_html(sanitize=publish) }}
</div>
</div>
</td>
diff --git a/sagenb/data/sage/html/notebook/text_cell.html b/sagenb/data/sage/html/notebook/text_cell.html
index dffc6bc..8e00d76 100644
--- a/sagenb/data/sage/html/notebook/text_cell.html
+++ b/sagenb/data/sage/html/notebook/text_cell.html
@@ -25,7 +25,7 @@ INPUT:
</script>
{% endif %}
<div class="text_cell" id="cell_text_{{ cell.id() }}">
- {{ cell.plain_text() }}
+ {{ cell.plain_text(sanitize=publish) }}
</div>
{% if JEDITABLE_TINYMCE and not cell.worksheet().is_published() and not cell.worksheet().docbrowser() and not do_print and not publish %}
<script type="text/javascript">
diff --git a/sagenb/notebook/cell.py b/sagenb/notebook/cell.py
index 1580580..f15afa7 100644
--- a/sagenb/notebook/cell.py
+++ b/sagenb/notebook/cell.py
@@ -47,6 +47,25 @@ re_script = re.compile(r'<script[^>]*?>.*?</script>', re.DOTALL | re.I)
# Whether to enable editing of :class:`TextCell`s with TinyMCE.
JEDITABLE_TINYMCE = True
+try:
+ from lxml.html.clean import Cleaner
+ from lxml.etree import XMLSyntaxError
+ class SageCleaner(Cleaner):
+ def allow_element(self, el):
+ # Added this one test for mathjax <script> tags
+ if el.tag=='script' and el.get('type')=='math/tex' and not el.get('src'):
+ return True
+ return super(SageCleaner, self).allow_element(el)
+ html_cleaner = SageCleaner(page_structure=False, remove_tags=('head', 'title'), style=True)
+ def clean_html(text):
+ try:
+ return html_cleaner.clean_html(text)
+ except XMLSyntaxError:
+ return ''
+except ImportError:
+ def clean_html(text):
+ # looks ugly, but gets the job done
+ return text.replace('<', '<')
###########################
# Generic (abstract) cell #
@@ -563,7 +582,7 @@ class TextCell(Cell_generic):
editing = editing, publish = publish)
- def plain_text(self, prompts=False):
+ def plain_text(self, prompts=False, sanitize=False):
ur"""
Returns a plain text version of this text cell.
@@ -585,7 +604,10 @@ class TextCell(Cell_generic):
sage: C.plain_text()
u'\u011b\u0161\u010d\u0159\u017e\xfd\xe1\xed\xe9\u010f\u010e'
"""
- return self._text
+ if sanitize:
+ return clean_html(self._text)
+ else:
+ return self._text
def edit_text(self):
"""
@@ -1664,7 +1686,7 @@ class Cell(Cell_generic):
except AttributeError:
return None
- def output_html(self):
+ def output_html(self, sanitize=False):
"""
Returns this compute cell's HTML output.
@@ -1682,7 +1704,10 @@ class Cell(Cell_generic):
u'<strong>5</strong>'
"""
try:
- return self._out_html
+ if sanitize:
+ return clean_html(self._out_html)
+ else:
+ return self._out_html
except AttributeError:
self._out_html = ''
return ''
@@ -1716,7 +1741,7 @@ class Cell(Cell_generic):
urls = urls.replace(s, begin + s[7:-1] + end)
return urls
- def output_text(self, ncols=0, html=True, raw=False, allow_interact=True):
+ def output_text(self, ncols=0, html=True, raw=False, allow_interact=True, sanitize=False):
ur"""
Returns this compute cell's output text.
@@ -1733,6 +1758,9 @@ class Cell(Cell_generic):
- ``allow_interact`` - a boolean (default: True); whether to
allow :func:`sagenb.notebook.interact.interact`\ ion
+ - ``sanitize`` - a boolean (default: False); whether to sanitize
+ the html (if html is selected)
+
OUTPUT:
- a string
@@ -1757,7 +1785,7 @@ class Cell(Cell_generic):
"""
if allow_interact and hasattr(self, '_interact_output'):
# Get the input template
- z = self.output_text(ncols, html, raw, allow_interact=False)
+ z = self.output_text(ncols, html, raw, allow_interact=False, sanitize=sanitize)
if not INTERACT_TEXT in z or not INTERACT_HTML in z:
return z
if ncols:
@@ -1765,7 +1793,7 @@ class Cell(Cell_generic):
try:
# Fill in the output template
output, html = self._interact_output
- output = self.parse_html(output, ncols)
+ output = self.parse_html(output, ncols, sanitize=sanitize)
z = z.replace(INTERACT_TEXT, output)
z = z.replace(INTERACT_HTML, html)
return z
@@ -1793,7 +1821,7 @@ class Cell(Cell_generic):
return s
if html:
- s = self.parse_html(s, ncols)
+ s = self.parse_html(s, ncols, sanitize=sanitize)
if (not is_interact and not self.is_html() and len(s.strip()) > 0 and
'<div class="docstring">' not in s):
@@ -1801,7 +1829,7 @@ class Cell(Cell_generic):
return s.strip('\n')
- def parse_html(self, s, ncols):
+ def parse_html(self, s, ncols, sanitize=False):
r"""
Parses HTML for output, escaping and wrapping HTML and
removing script elements.
@@ -1812,6 +1840,8 @@ class Cell(Cell_generic):
- ``ncols`` - an integer; the number of word wrap columns
+ - ``sanitize`` - a boolean; sanitize the html
+
OUTPUT:
- a string
@@ -1829,7 +1859,10 @@ class Cell(Cell_generic):
return word_wrap(escape(x), ncols)
def format_html(x):
- return self.process_cell_urls(x)
+ t = self.process_cell_urls(x)
+ if sanitize:
+ t = clean_html(t)
+ return t
# If there is an error in the output, specially format it.
if not self.is_interactive_cell():
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/sagenb.git
More information about the debian-science-commits
mailing list