[Reproducible-commits] [debbindiff] 01/01: Speed up creation of diffs with vim.

Reiner Herrmann deki-guest at moszumanska.debian.org
Sat Jan 10 10:53:11 UTC 2015


This is an automated email from the git hooks/post-receive script.

deki-guest pushed a commit to branch master
in repository debbindiff.

commit b799e20a2e8608c8cc536eff275b404615cfd398
Author: Reiner Herrmann <reiner at reiner-h.de>
Date:   Fri Jan 9 21:15:01 2015 +0100

    Speed up creation of diffs with vim.
    
    Creating HTML diffs with vim gets very slow when the files
    become larger and there are very large blocks that differ.
    To speed this up, the individual differing blocks are
    shrunk when they are longer than a configured number of lines.
    The information about the blocks is obtained by first calling
    'diff', which is extremely fast even on large files.
---
 debbindiff/presenters/html.py | 54 +++++++++++++++++++++++++++++++++++++++++++
 debian/control                |  1 +
 2 files changed, 55 insertions(+)

diff --git a/debbindiff/presenters/html.py b/debbindiff/presenters/html.py
index f8fddf1..b9658b7 100644
--- a/debbindiff/presenters/html.py
+++ b/debbindiff/presenters/html.py
@@ -22,6 +22,7 @@ import os.path
 import re
 import subprocess
 import sys
+import tempfile
 from xml.sax.saxutils import escape
 from debbindiff import logger, VERSION
 from debbindiff.comparators.utils import make_temp_directory
@@ -96,6 +97,7 @@ FOOTER = """
 """
 
 DEFAULT_MAX_PAGE_SIZE = 2000 * 2 ** 10  # 2000 kB
+MAX_DIFF_BLOCK_LINES = 500
 
 
 class PrintLimitReached(Exception):
@@ -113,6 +115,57 @@ def create_limited_print_func(print_func, max_page_size):
     return limited_print_func
 
 
+def trim_file(path, skip_lines):
+    n = 0
+    skip = 0
+    content = open(path, "r")
+    tmp_file = tempfile.NamedTemporaryFile("w", delete=False)
+    for line in content:
+        n += 1
+        if n in skip_lines.keys():
+            skip = skip_lines[n]
+            tmp_file.write("[ %d lines removed ]\n" % skip)
+
+        if skip > 0:
+            if n not in skip_lines.keys():
+                # insert dummy line to preserve correct line numbers
+                tmp_file.write(".\n")
+            skip -= 1
+        else:
+            tmp_file.write(line)
+    content.close()
+    tmp_file.close()
+    os.rename(tmp_file.name, path)
+
+# reduce size of diff blocks by prediffing with diff (which is extremely fast)
+# and then trimming the blocks larger than the configured limit
+def diff_optimize_files(path1, path2):
+    p = subprocess.Popen(['diff', '-u0', path1, path2], shell=False,
+        close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+    (stdout, _) = p.communicate()
+    p.wait()
+    if p.returncode != 1:
+        return 'diff exited with error %d' % p.returncode
+
+    skip_lines1 = dict()
+    skip_lines2 = dict()
+    search = re.compile('^@@\s+-(\d+),(\d+)\s+\+(\d+),(\d+)\s+@@$')
+    for line in stdout.split('\n'):
+        found = search.match(line)
+        if found:
+            (start1, start2) = (int(found.group(1)), int(found.group(3)))
+            (len1, len2) = (int(found.group(2)), int(found.group(4)))
+            if len1 > MAX_DIFF_BLOCK_LINES:
+                skip_lines1[start1 + MAX_DIFF_BLOCK_LINES] = len1 - MAX_DIFF_BLOCK_LINES
+            if len2 > MAX_DIFF_BLOCK_LINES:
+                skip_lines2[start2 + MAX_DIFF_BLOCK_LINES] = len2 - MAX_DIFF_BLOCK_LINES
+
+    if len(skip_lines1) > 0:
+        trim_file(path1, skip_lines1)
+    if len(skip_lines2) > 0:
+        trim_file(path2, skip_lines2)
+
+
 # Huge thanks to Stefaan Himpe for this solution:
 # http://technogems.blogspot.com/2011/09/generate-side-by-side-diffs-in-html.html
 def create_diff(lines1, lines2):
@@ -124,6 +177,7 @@ def create_diff(lines1, lines2):
             f.writelines(map(lambda u: u.encode('utf-8'), lines1))
         with open(path2, 'w') as f:
             f.writelines(map(lambda u: u.encode('utf-8'), lines2))
+        diff_optimize_files(path1, path2)
         p = subprocess.Popen(
             ['vim', '-n', '-N', '-e', '-i', 'NONE', '-u', 'NORC', '-U', 'NORC',
              '-d', path1, path2,
diff --git a/debian/control b/debian/control
index 4eb0979..7e21315 100644
--- a/debian/control
+++ b/debian/control
@@ -19,6 +19,7 @@ Package: debbindiff
 Architecture: all
 Depends: binutils-multiarch,
          bzip2,
+         diffutils,
          file,
          fontforge-extras,
          gettext,

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list