[Reproducible-commits] [debbindiff] 06/09: Replace Python difflib by vim for speed reasons
Jérémy Bobbio
lunar at moszumanska.debian.org
Tue Sep 30 15:10:11 UTC 2014
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch master
in repository debbindiff.
commit 2fd094ae584816c7786bf930efc3ded460ff6ac3
Author: Jérémy Bobbio <lunar at debian.org>
Date: Tue Sep 30 15:46:58 2014 +0200
Replace Python difflib by vim for speed reasons
difflib is unbearably slow for any non-trivial output. Calling out to vim is
much faster and the output is still quite nice. Crazy trick, though.
---
debbindiff/presenters/html.py | 61 ++++++++++++++++++++++++++++++++++---------
1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/debbindiff/presenters/html.py b/debbindiff/presenters/html.py
index 2766c68..24e99b2 100644
--- a/debbindiff/presenters/html.py
+++ b/debbindiff/presenters/html.py
@@ -18,11 +18,14 @@
# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
-import difflib
import os.path
+import re
+import subprocess
import sys
+from tempfile import NamedTemporaryFile
from xml.sax.saxutils import escape
from debbindiff import logger
+from debbindiff.comparators.utils import make_temp_directory
HEADER = """
<!DOCTYPE html>
@@ -60,20 +63,31 @@ HEADER = """
table.diff {
font-size: 10pt;
}
- %(htmldiff_styles)s
+ .lnr {
+ background-color: #ccc;
+ color: #666;
+ }
+ .DiffChange {
+ background-color: #ff8888;
+ font-weight: bold;
+ }
+ .DiffText {
+ color: white;
+ background-color: #ff4444;
+ font-weight: bold;
+ }
</style>
</head>
<body>
"""
FOOTER = """
-%(htmldiff_legend)s
<div class="footer">Generated by debbindiff</div>
</body>
</html>
"""
-MAX_PAGE_SIZE = 1000 * 2 ** 10 # 1000 kB
+MAX_PAGE_SIZE = 2000 * 2 ** 10 # 2000 kB
class PrintLimitReached(Exception):
pass
@@ -88,11 +102,35 @@ def create_limited_print_func(print_func):
raise PrintLimitReached()
return limited_print_func
+# Huge thanks to Stefaan Himpe for this solution:
+# http://technogems.blogspot.com/2011/09/generate-side-by-side-diffs-in-html.html
+def create_diff(lines1, lines2):
+ with make_temp_directory() as temp_dir:
+ path1 = os.path.join(temp_dir, 'content1')
+ path2 = os.path.join(temp_dir, 'content2')
+ diff_path = os.path.join(temp_dir, 'diff.html')
+ with open(path1, 'w') as f:
+ f.writelines(lines1)
+ with open(path2, 'w') as f:
+ f.writelines(lines2)
+ subprocess.check_call(
+ ['vim', '-n', '-N', '-e', '-i', 'NONE', '-u', 'NORC', '-U', 'NORC',
+ '-d', path1, path2,
+ '-c', 'colorscheme zellner',
+ '-c', 'let g:html_number_lines=1',
+ '-c', 'let g:html_use_css=1',
+ '-c', 'TOhtml',
+ '-c', 'w! %s' % (diff_path,),
+ '-c', 'qall!',
+ ], shell=False, close_fds=True,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ output = open(diff_path).read()
+ output = re.search(r'(<table.*</table>)', output, flags=re.MULTILINE | re.DOTALL).group(1)
+ output = re.sub(r'<th.*</th>', '', output, flags=re.MULTILINE | re.DOTALL)
+ return output
+
def output_difference(difference, print_func):
logger.debug('html output for %s' % (difference.source1,))
- if not hasattr(output_difference, 'htmldiff'):
- output_difference.htmldiff = difflib.HtmlDiff(wrapcolumn=70)
-
print_func("<div class='difference'>")
try:
if difference.source1 == difference.source2:
@@ -103,9 +141,7 @@ def output_difference(difference, print_func):
if difference.comment:
print_func("<div class='comment'>%s</div>" % escape(difference.comment))
if difference.lines1 and difference.lines2:
- print_func(output_difference.htmldiff.make_table(
- difference.lines1, difference.lines2,
- context=True, numlines=3))
+ print_func(create_diff(difference.lines1, difference.lines2))
for detail in difference.details:
output_difference(detail, print_func)
except PrintLimitReached, e:
@@ -119,11 +155,10 @@ def output_html(differences, print_func=None):
print_func = print
print_func = create_limited_print_func(print_func)
try:
- print_func(HEADER % { 'title': escape(' '.join(sys.argv)),
- 'htmldiff_styles': difflib._styles })
+ print_func(HEADER % { 'title': escape(' '.join(sys.argv)) })
for difference in differences:
output_difference(difference, print_func)
except PrintLimitReached, e:
logger.debug('print limit reached')
print_func("<div class='error'>Max output size reached.</div>", force=True)
- print_func(FOOTER % { 'htmldiff_legend': difflib._legend }, force=True)
+ print_func(FOOTER, force=True)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git
More information about the Reproducible-commits
mailing list