[diffoscope] 01/01: WIP: html-dir: split index pages up if they get too big
Ximin Luo
infinity0 at debian.org
Mon Jun 19 20:50:49 UTC 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch WIP/humungous-diffs
in repository diffoscope.
commit e88d0938f58d9d5aac6c023a437a543a373c6945
Author: Ximin Luo <infinity0 at debian.org>
Date: Mon Jun 19 22:46:32 2017 +0200
WIP: html-dir: split index pages up if they get too big
---
diffoscope/difference.py | 6 +-
diffoscope/presenters/html/html.py | 226 +++++++++++++++++++++++++++++---
diffoscope/presenters/html/templates.py | 52 ++++----
diffoscope/presenters/utils.py | 5 +
4 files changed, 242 insertions(+), 47 deletions(-)
diff --git a/diffoscope/difference.py b/diffoscope/difference.py
index 54cb686..bff6c28 100644
--- a/diffoscope/difference.py
+++ b/diffoscope/difference.py
@@ -136,7 +136,7 @@ class Difference(object):
queue.extend(top._details)
yield from self.traverse_breadth(queue)
- def traverse_heapq(self, scorer, queue=None):
+ def traverse_heapq(self, scorer, yield_score=False, queue=None):
"""Traverse the difference tree using a priority queue, where each node
is scored according to a user-supplied function, and nodes with smaller
scores are traversed first (after they have been added to the queue).
@@ -148,10 +148,10 @@ class Difference(object):
queue = queue if queue is not None else [(scorer(self, None), self)]
if queue:
val, top = heapq.heappop(queue)
- yield top
+ yield ((top, val) if yield_score else top)
for d in top._details:
heapq.heappush(queue, (scorer(d, val), d))
- yield from self.traverse_heapq(scorer, queue)
+ yield from self.traverse_heapq(scorer, yield_score, queue)
@staticmethod
def from_feeder(feeder1, feeder2, path1, path2, source=None, comment=None, **kwargs):
diff --git a/diffoscope/presenters/html/html.py b/diffoscope/presenters/html/html.py
index bb847a7..ddadede 100644
--- a/diffoscope/presenters/html/html.py
+++ b/diffoscope/presenters/html/html.py
@@ -37,6 +37,7 @@ import re
import sys
import html
import codecs
+import contextlib
import hashlib
import logging
import contextlib
@@ -47,7 +48,7 @@ from diffoscope.diff import SideBySideDiff, DIFFON, DIFFOFF
from ..icon import FAVICON_BASE64
from ..utils import PrintLimitReached, DiffBlockLimitReached, \
- create_limited_print_func, Presenter, make_printer
+ create_limited_print_func, Presenter, make_printer, PartialString
from . import templates
@@ -106,22 +107,25 @@ def convert(s, ponct=0, tag=''):
return t.getvalue()
+def get_visual(visual, anchor, indentstr, indentnum, end_tag=True):
+ logger.debug('including image for %s', visual.source)
+ indent = tuple(indentstr * (indentnum + x) for x in range(3))
+ end_tag = indent + u"</div>" if end_tag else u""
+ return u"""{0[0]}<div class="difference">
+{0[1]}<div class="diffheader">
+{0[1]}<div class="diffcontrol">⊟</div>
+{0[1]}<div><span class="source">{1}</span>
+{0[2]}<a class="anchor" href="#{2}" name="{2}">\xb6</a>
+{0[1]}</div>
+{0[1]}</div>
+{0[1]}<div class="difference"><img src=\"data:{3},{4}\" alt=\"compared images\" /></div>
+{5}""".format(indent, html.escape(visual.source), anchor, visual.data_type, visual.content, end_tag)
+
def output_visual(print_func, visual, parents):
logger.debug('including image for %s', visual.source)
sources = parents + [visual.source]
- print_func(u'<div class="difference">')
- print_func(u'<div class="diffheader">')
- print_func(u'<div class="diffcontrol">⊟</div>')
- print_func(u'<div><span class="source">%s</span>'
- % html.escape(visual.source))
anchor = escape_anchor('/'.join(sources[1:]))
- print_func(
- u' <a class="anchor" href="#%s" name="%s">\xb6</a>' % (anchor, anchor))
- print_func(u"</div>")
- print_func(u"</div>")
- print_func(u'<div class="difference">'
- u'<img src=\"data:%s,%s\" alt=\"compared images\" /></div>' %
- (visual.data_type, visual.content))
+ print_func(get_visual(visual, anchor, "", 0, end_tag=False))
print_func(u"</div>", force=True)
def escape_anchor(val):
@@ -139,18 +143,95 @@ def escape_anchor(val):
return val
-def output_header(css_url, print_func):
+def output_anchor(path):
+ return escape_anchor('/'.join(path[1:]))
+
+def output_node_frame(difference, path, indentstr, indentnum, body):
+ indent = tuple(indentstr * (indentnum + x) for x in range(3))
+ anchor = output_anchor(path)
+ dctrl_class, dctrl = ("diffcontrol", u'⊟') if difference.has_visible_children() else ("diffcontrol-nochildren", u'⊡')
+ if difference.source1 == difference.source2:
+ header = u"""{0[1]}<div class="{1}">{2}</div>
+{0[1]}<div><span class="source">{4}</span>
+{0[2]}<a class="anchor" href="#{3}" name="{3}">\xb6</a>
+{0[1]}</div>
+""".format(indent, dctrl_class, dctrl, anchor,
+ html.escape(PartialString.escape(difference.source1)))
+ else:
+ header = u"""{0[1]}<div class="{1} diffcontrol-double">{2}</div>
+{0[1]}<div><span class="source">{4}</span> vs.</div>
+{0[1]}<div><span class="source">{5}</span>
+{0[2]}<a class="anchor" href="#{3}" name="{3}">\xb6</a>
+{0[1]}</div>
+""".format(indent, dctrl_class, dctrl, anchor,
+ html.escape(PartialString.escape(difference.source1)),
+ html.escape(PartialString.escape(difference.source2)))
+
+ return u"""{0[1]}<div class="diffheader">
+{1}{0[1]}</div>
+{2}""".format(indent, header, body)
+
+def output_node(difference, path, indentstr, indentnum, css_url, directory):
+ indent = tuple(indentstr * (indentnum + x) for x in range(3))
+ t, cont = PartialString.cont()
+
+ if difference.comments:
+ comments = u'{0[1]}<div class="comment">\n{1}{0[1]}</div>\n'.format(
+ indent, "".join(u"{0[2]}{1}<br/>\n".format(indent, html.escape(x)) for x in difference.comments))
+ else:
+ comments = u""
+
+ visuals = u""
+ for visual in difference.visuals:
+ visuals += get_visual(visual, output_anchor(path), indentstr, indentnum+1)
+
+ udiff = io.StringIO()
+ if difference.unified_diff:
+ def print_func(x, force=False):
+ udiff.write(x)
+ HTMLPresenter().output_unified_diff(print_func, css_url, directory, difference.unified_diff, difference.has_internal_linenos)
+
+ # Construct a PartialString for this node
+ # {3} gets mapped to {-1}, a continuation hole for later child nodes
+ body = u"{0}{1}{2}{3}".format(t.escape(comments), t.escape(visuals), t.escape(udiff.getvalue()), "{-1}")
+ if len(path) == 1:
+ # root node, frame it
+ t = cont(t, output_node_frame(difference, path, indentstr, indentnum, body))
+ else:
+ t = cont(t, body)
+
+ # Add holes for child nodes
+ for d in difference.details:
+ # {0} hole, for the child node's contents
+ # {-1} continuation hole, for later child nodes
+ t = cont(t, u"""{0[1]}<div class="difference">
+{1}{0[1]}</div>
+{{-1}}""".format(indent, output_node_frame(d, path + [d.source1], indentstr, indentnum+1, "{0}")), d)
+
+ return cont(t, u"")
+
+def get_header(css_url):
if css_url:
css_link = '<link href="%s" type="text/css" rel="stylesheet" />' % css_url
else:
css_link = ''
- print_func(templates.HEADER % {'title': html.escape(' '.join(sys.argv)),
- 'favicon': FAVICON_BASE64,
- 'css_link': css_link,
- })
+ return templates.HEADER % {
+ 'title': html.escape(' '.join(sys.argv)),
+ 'favicon': FAVICON_BASE64,
+ 'css_link': css_link,
+ }
+
+def output_header(css_url, print_func):
+ print_func(get_header(css_url))
+
+def get_footer(jquery_url=None):
+ footer = templates.FOOTER % {'version': VERSION}
+ if jquery_url:
+ return templates.SCRIPTS % {'jquery_url': html.escape(jquery_url)} + footer
+ return footer
def output_footer(print_func):
- print_func(templates.FOOTER % {'version': VERSION}, force=True)
+ print_func(get_footer(), force=True)
@contextlib.contextmanager
@@ -351,10 +432,10 @@ class HTMLPresenter(Presenter):
anchor = escape_anchor('/'.join(sources[1:]))
print_func(u' <a class="anchor" href="#%s" name="%s">\xb6</a>' % (anchor, anchor))
print_func(u"</div>")
+ print_func(u"</div>")
if difference.comments:
print_func(u'<div class="comment">%s</div>'
% u'<br />'.join(map(html.escape, difference.comments)))
- print_func(u"</div>")
if len(difference.visuals) > 0:
for visual in difference.visuals:
output_visual(print_func, visual, sources)
@@ -394,7 +475,7 @@ class HTMLPresenter(Presenter):
)
-class HTMLDirectoryPresenter(HTMLPresenter):
+class HTML2DirectoryPresenter(HTMLPresenter):
def output_html_directory(self, directory, difference, css_url=None, jquery_url=None):
"""
@@ -450,3 +531,106 @@ class HTMLDirectoryPresenter(HTMLPresenter):
css_url=parsed_args.css_url,
jquery_url=parsed_args.jquery_url,
)
+
+
+PLACEHOLDER = """<div class="ondemand-details">... <a href="%s.html">load details</a> ...</div>
+"""
+PLACEHOLDER_LEN = len(PLACEHOLDER)
+
+
+class HTMLDirectoryPresenter(HTML2DirectoryPresenter):
+
+ def output_difference(self, difference, jquery_url, css_url, directory):
+ partial_outputs = {} # nodes to their partial output
+ partial_ancestor = {} # child nodes to ancestor nodes
+
+ with contextlib.ExitStack() as xstack:
+ printers = {} # nodes to their printers
+ def maybe_print(node):
+ if partial_outputs[node].holes:
+ return
+ printers[node](partial_outputs[node].format())
+ del partial_outputs[node]
+ del printers[node]
+
+ def smallest_first(node, parscore):
+ depth = parscore[0] + 1 if parscore else 0
+ parents = [node] + parscore[2] if parscore else []
+ return depth, node.size_self(), parents
+
+ for node, score in difference.traverse_heapq(smallest_first, yield_score=True):
+ ancestor = partial_ancestor.pop(node, None)
+ logger.debug('html output for %s', node.source1)
+ path = score[2] + [node.source1]
+ node_output = output_node(node, path, " ", len(path)-1, css_url, directory)
+ anchor = output_anchor(path)
+
+ if ancestor:
+ logger.debug("output size: %s, %s",
+ partial_outputs[ancestor].size(PLACEHOLDER_LEN), node_output.size(PLACEHOLDER_LEN))
+ if ancestor and partial_outputs[ancestor].size(PLACEHOLDER_LEN) + node_output.size(PLACEHOLDER_LEN) < 100000: # FIXME limit
+ # under limit, add it to an existing page
+ partial_outputs[ancestor] = partial_outputs[ancestor].pformat({node: node_output})
+ stored = ancestor
+
+ else:
+ # over limit (or root), new subpage
+ if ancestor:
+ partial_outputs[ancestor] = partial_outputs[ancestor].pformat({node: PLACEHOLDER % anchor})
+ maybe_print(ancestor)
+ footer = get_footer()
+ else:
+ assert node is difference
+ footer = get_footer(jquery_url)
+ anchor = "index"
+
+ partial_outputs[node] = node_output.frame(
+ get_header(css_url) + u'<div class="difference">\n',
+ u'</div>\n' + footer)
+ printers[node] = xstack.enter_context(file_printer(directory, "%s.html" % anchor))
+ stored = node
+
+ for child in node.details:
+ partial_ancestor[child] = stored
+
+ maybe_print(stored)
+
+ if partial_outputs:
+ import pprint
+ pprint.pprint(partial_outputs, indent=4)
+ assert not partial_outputs
+
+ def output_html_directory(self, directory, difference, css_url=None, jquery_url=None):
+ """
+ Multi-file presenter. Writes to a directory, and puts large diff tables
+ into files of their own.
+
+ This uses jQuery. By default it uses /usr/share/javascript/jquery/jquery.js
+ (symlinked, so that you can still share the result over HTTP).
+ You can also pass --jquery URL to diffoscope to use a central jQuery copy.
+ """
+ if not os.path.exists(directory):
+ os.makedirs(directory)
+
+ if not os.path.isdir(directory):
+ raise ValueError("%s is not a directory" % directory)
+
+ if not jquery_url:
+ jquery_symlink = os.path.join(directory, "jquery.js")
+ if os.path.exists(jquery_symlink):
+ jquery_url = "./jquery.js"
+ else:
+ if os.path.lexists(jquery_symlink):
+ os.unlink(jquery_symlink)
+ for path in JQUERY_SYSTEM_LOCATIONS:
+ if os.path.exists(path):
+ os.symlink(path, jquery_symlink)
+ jquery_url = "./jquery.js"
+ break
+ if not jquery_url:
+ logger.warning('--jquery was not specified and jQuery was not found in any known location. Disabling on-demand inline loading.')
+ logger.debug('Locations searched: %s', ', '.join(JQUERY_SYSTEM_LOCATIONS))
+ if jquery_url == 'disable':
+ jquery_url = None
+
+ self.output_difference(difference, jquery_url, css_url, directory)
diff --git a/diffoscope/presenters/html/templates.py b/diffoscope/presenters/html/templates.py
index 713718a..26e262e 100644
--- a/diffoscope/presenters/html/templates.py
+++ b/diffoscope/presenters/html/templates.py
@@ -110,12 +110,12 @@ HEADER = """<!DOCTYPE html>
.diffoscope .diffheader:hover .anchor {
display: inline;
}
- .diffoscope table.diff tr.ondemand td {
+ .diffoscope table.diff tr.ondemand td, .diffoscope div.ondemand-details {
background: #f99;
text-align: center;
padding: 0.5em 0;
}
- .diffoscope table.diff tr.ondemand:hover td {
+ .diffoscope table.diff tr.ondemand:hover td, .diffoscope div.ondemand-details:hover {
background: #faa;
cursor: pointer;
}
@@ -140,41 +140,47 @@ HEADER = """<!DOCTYPE html>
<body class="diffoscope">
"""
-FOOTER = """
-<div class="footer">Generated by <a href="https://diffoscope.org" rel="noopener noreferrer" target="_blank">diffoscope</a> %(version)s</div>
+FOOTER = """<div class="footer">Generated by <a href="https://diffoscope.org" rel="noopener noreferrer" target="_blank">diffoscope</a> %(version)s</div>
</body>
</html>
"""
-SCRIPTS = """
-<script src="%(jquery_url)s"></script>
+SCRIPTS = """<script src="%(jquery_url)s"></script>
<script type="text/javascript">
$(function() {
- var load_cont = function() {
- var a = $(this).find("a");
+ // activate "loading" controls
+ var load_cont, load_generic = function(selector, target, getInfo, postLoad) {
+ return function() {
+ var a = $(this).find("a");
+ var filename = a.attr('href');
+ var info = getInfo ? getInfo(a) : null;
+ var button = a.parent();
+ button.text('... loading ...');
+ (target ? target(button) : button).load(filename + " " + selector, function() {
+ // https://stackoverflow.com/a/8452751/946226
+ var elems = $(this).children(':first').unwrap();
+ // set this behaviour for the next link too
+ var td = elems.parent().find(".ondemand td");
+ td.on('click', load_cont);
+ postLoad ? postLoad(td, info) : null;
+ });
+ return false;
+ };
+ };
+ load_cont = load_generic("tr", function(x) { return x.parent(); }, function(a) {
var textparts = /^(.*)\((\d+) pieces?(.*)\)$/.exec(a.text());
var numleft = Number.parseInt(textparts[2]) - 1;
var noun = numleft == 1 ? "piece" : "pieces";
- var newtext = textparts[1] + "(" + numleft + " " + noun + textparts[3] + ")";
- var filename = a.attr('href');
- var td = a.parent();
- td.text('... loading ...');
- td.parent().load(filename + " tr", function() {
- // https://stackoverflow.com/a/8452751/946226
- var elems = $(this).children(':first').unwrap();
- // set this behaviour for the next link too
- var td = elems.parent().find(".ondemand td");
- td.find("a").text(newtext);
- td.on('click', load_cont);
- });
- return false;
- };
+ return textparts[1] + "(" + numleft + " " + noun + textparts[3] + ")";
+ }, function(td, info) { td.find("a").text(info); });
$(".ondemand td").on('click', load_cont);
+ $(".ondemand-details").on('click', load_generic("div.difference > *"));
+ // activate [+]/[-] controls
var diffcontrols = $(".diffcontrol");
diffcontrols.on('click', function(evt) {
var control = $(this);
var parent = control.parent();
- var target = $.merge(parent.siblings('table.diff, div.difference'), parent.find('div.comment'));
+ var target = parent.siblings('table.diff, div.difference, div.comment');
var orig = target;
if (evt.shiftKey) {
var gparent = parent.parent();
diff --git a/diffoscope/presenters/utils.py b/diffoscope/presenters/utils.py
index c46773a..ae32819 100644
--- a/diffoscope/presenters/utils.py
+++ b/diffoscope/presenters/utils.py
@@ -249,6 +249,7 @@ class PartialString(object):
return real_mapping, new_holes
def size(self, hole_size=1):
+ # FIXME: fix for {{ and }} etc
return self.base_len + hole_size * self.num_holes
def pformat(self, mapping={}):
@@ -296,6 +297,10 @@ class PartialString(object):
return t.pformat({cont: cls(fmtstr, *(holes + (cont,)))})
return cls("{0}", cont), cont
+ def frame(self, header, footer):
+ frame = self.__class__(self.escape(header) + "{0}" + self.escape(footer), None)
+ return frame.pformat({None: self})
+
if __name__ == "__main__":
import doctest
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the Reproducible-commits
mailing list