[Reproducible-commits] [debbindiff] 06/09: Replace Python difflib by vim for speed reasons

Jérémy Bobbio lunar at moszumanska.debian.org
Tue Sep 30 15:10:11 UTC 2014


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository debbindiff.

commit 2fd094ae584816c7786bf930efc3ded460ff6ac3
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Tue Sep 30 15:46:58 2014 +0200

    Replace Python difflib by vim for speed reasons
    
    difflib is unbearably slow for any non-trivial output. Calling out to vim is
    much faster and the output is still quite nice. Crazy trick, though.
---
 debbindiff/presenters/html.py | 61 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/debbindiff/presenters/html.py b/debbindiff/presenters/html.py
index 2766c68..24e99b2 100644
--- a/debbindiff/presenters/html.py
+++ b/debbindiff/presenters/html.py
@@ -18,11 +18,14 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 from __future__ import print_function
-import difflib
 import os.path
+import re
+import subprocess
 import sys
+from tempfile import NamedTemporaryFile
 from xml.sax.saxutils import escape
 from debbindiff import logger
+from debbindiff.comparators.utils import make_temp_directory
 
 HEADER = """
 <!DOCTYPE html>
@@ -60,20 +63,31 @@ HEADER = """
     table.diff {
       font-size: 10pt;
     }
-    %(htmldiff_styles)s
+    .lnr {
+      background-color: #ccc;
+      color: #666;
+    }
+    .DiffChange {
+      background-color: #ff8888;
+      font-weight: bold;
+    }
+    .DiffText {
+      color: white;
+      background-color: #ff4444;
+      font-weight: bold;
+    }
   </style>
 </head>
 <body>
 """
 
 FOOTER = """
-%(htmldiff_legend)s
 <div class="footer">Generated by debbindiff</div>
 </body>
 </html>
 """
 
-MAX_PAGE_SIZE = 1000 * 2 ** 10 # 1000 kB
+MAX_PAGE_SIZE = 2000 * 2 ** 10 # 2000 kB
 
 class PrintLimitReached(Exception):
     pass
@@ -88,11 +102,35 @@ def create_limited_print_func(print_func):
             raise PrintLimitReached()
     return limited_print_func
 
+# Huge thanks to Stefaan Himpe for this solution:
+# http://technogems.blogspot.com/2011/09/generate-side-by-side-diffs-in-html.html
+def create_diff(lines1, lines2):
+    with make_temp_directory() as temp_dir:
+        path1 = os.path.join(temp_dir, 'content1')
+        path2 = os.path.join(temp_dir, 'content2')
+        diff_path = os.path.join(temp_dir, 'diff.html')
+        with open(path1, 'w') as f:
+            f.writelines(lines1)
+        with open(path2, 'w') as f:
+            f.writelines(lines2)
+        subprocess.check_call(
+            ['vim', '-n', '-N', '-e', '-i', 'NONE', '-u', 'NORC', '-U', 'NORC',
+             '-d', path1, path2,
+             '-c', 'colorscheme zellner',
+             '-c', 'let g:html_number_lines=1',
+             '-c', 'let g:html_use_css=1',
+             '-c', 'TOhtml',
+             '-c', 'w! %s' % (diff_path,),
+             '-c', 'qall!',
+            ], shell=False, close_fds=True,
+            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        output = open(diff_path).read()
+        output = re.search(r'(<table.*</table>)', output, flags=re.MULTILINE | re.DOTALL).group(1)
+        output = re.sub(r'<th.*</th>', '', output, flags=re.MULTILINE | re.DOTALL)
+        return output
+
 def output_difference(difference, print_func):
     logger.debug('html output for %s' % (difference.source1,))
-    if not hasattr(output_difference, 'htmldiff'):
-        output_difference.htmldiff = difflib.HtmlDiff(wrapcolumn=70)
-
     print_func("<div class='difference'>")
     try:
         if difference.source1 == difference.source2:
@@ -103,9 +141,7 @@ def output_difference(difference, print_func):
         if difference.comment:
             print_func("<div class='comment'>%s</div>" % escape(difference.comment))
         if difference.lines1 and difference.lines2:
-            print_func(output_difference.htmldiff.make_table(
-                difference.lines1, difference.lines2,
-                context=True, numlines=3))
+            print_func(create_diff(difference.lines1, difference.lines2))
         for detail in difference.details:
             output_difference(detail, print_func)
     except PrintLimitReached, e:
@@ -119,11 +155,10 @@ def output_html(differences, print_func=None):
         print_func = print
     print_func = create_limited_print_func(print_func)
     try:
-        print_func(HEADER % { 'title': escape(' '.join(sys.argv)),
-                              'htmldiff_styles': difflib._styles })
+        print_func(HEADER % { 'title': escape(' '.join(sys.argv)) })
         for difference in differences:
             output_difference(difference, print_func)
     except PrintLimitReached, e:
         logger.debug('print limit reached')
         print_func("<div class='error'>Max output size reached.</div>", force=True)
-    print_func(FOOTER % { 'htmldiff_legend': difflib._legend }, force=True)
+    print_func(FOOTER, force=True)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list