[Reproducible-commits] [debbindiff] 03/05: Perform content comparison when creating Difference objects

Jérémy Bobbio lunar at moszumanska.debian.org
Fri Mar 27 17:49:54 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to annotated tag 12
in repository debbindiff.

commit 3d5f0d7b05eb39600178661587a60357262e5e80
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Fri Mar 27 15:22:00 2015 +0100

    Perform content comparison when creating Difference objects
    
    Instead of storing the full content twice when creating Difference objects, we
    now directly run `diff` and store the unified diff. Large blocks in the diff
    are still trimmed. This results in huge memory savings and debbindiff can now
    happily compare changes for installation-guide.
    
    As tool_required() is not only for comparators anymore, we move it to
    debbindiff, together with logger.
    
    Text output becomes really straightforward as we just have to write what
    we've previously recorded.
    
    For the HTML output, we stop using vim and instead borrow code from
    diff2html.py found at <http://git.droids-corp.org/?p=diff2html.git>.
    
    Closes: #772029
    Closes: #779476
---
 debbindiff.py                       |   2 +-
 debbindiff/__init__.py              |  55 ++++
 debbindiff/comparators/binary.py    |   3 +-
 debbindiff/comparators/bzip2.py     |   3 +-
 debbindiff/comparators/changes.py   |  12 +-
 debbindiff/comparators/cpio.py      |   7 +-
 debbindiff/comparators/deb.py       |   3 +-
 debbindiff/comparators/directory.py |  16 +-
 debbindiff/comparators/elf.py       |  13 +-
 debbindiff/comparators/fonts.py     |   6 +-
 debbindiff/comparators/gettext.py   |   6 +-
 debbindiff/comparators/gzip.py      |   6 +-
 debbindiff/comparators/haskell.py   |   5 +-
 debbindiff/comparators/pdf.py       |   9 +-
 debbindiff/comparators/png.py       |   6 +-
 debbindiff/comparators/rpm.py       |   7 +-
 debbindiff/comparators/squashfs.py  |   7 +-
 debbindiff/comparators/tar.py       |   3 +-
 debbindiff/comparators/text.py      |   8 +-
 debbindiff/comparators/utils.py     |  55 ----
 debbindiff/comparators/xz.py        |   3 +-
 debbindiff/comparators/zip.py       |   6 +-
 debbindiff/difference.py            | 140 +++++++++-
 debbindiff/presenters/html.py       | 538 +++++++++++++++++++++++++++---------
 debbindiff/presenters/text.py       |  18 +-
 debian/control                      |   5 +-
 debian/copyright                    |   4 +
 27 files changed, 662 insertions(+), 284 deletions(-)

diff --git a/debbindiff.py b/debbindiff.py
index b363629..55064ba 100755
--- a/debbindiff.py
+++ b/debbindiff.py
@@ -73,7 +73,7 @@ def make_printer(path):
 
 class ListToolsAction(argparse.Action):
     def __call__(self, parser, namespace, values, option_string=None):
-        from debbindiff.comparators.utils import tool_required, RequiredToolNotFound
+        from debbindiff import tool_required, RequiredToolNotFound
         print("External tools required:")
         print(', '.join(tool_required.all))
         print()
diff --git a/debbindiff/__init__.py b/debbindiff/__init__.py
index 65daf48..42b9ca9 100644
--- a/debbindiff/__init__.py
+++ b/debbindiff/__init__.py
@@ -18,6 +18,7 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 import logging
+from distutils.spawn import find_executable
 
 VERSION = "11"
 
@@ -28,3 +29,57 @@ ch.setLevel(logging.DEBUG)
 logger.addHandler(ch)
 formatter = logging.Formatter('%(levelname)8s %(message)s')
 ch.setFormatter(formatter)
+
+
+class RequiredToolNotFound(Exception):
+    PROVIDERS = { 'ar':         { 'debian': 'binutils-multiarch' }
+                , 'bzip2':      { 'debian': 'bzip2' }
+                , 'cpio':       { 'debian': 'cpio' }
+                , 'diff':       { 'debian': 'diffutils' }
+                , 'file':       { 'debian': 'file' }
+                , 'getfacl':    { 'debian': 'acl' }
+                , 'ghc':        { 'debian': 'ghc' }
+                , 'gpg':        { 'debian': 'gnupg' }
+                , 'gzip':       { 'debian': 'gzip' }
+                , 'ls':         { 'debian': 'coreutils' }
+                , 'lsattr':     { 'debian': 'e2fsprogs' }
+                , 'msgunfmt':   { 'debian': 'gettext' }
+                , 'objdump':    { 'debian': 'binutils-multiarch' }
+                , 'pdftk':      { 'debian': 'pdftk' }
+                , 'pdftotext':  { 'debian': 'poppler-utils' }
+                , 'readelf':    { 'debian': 'binutils-multiarch' }
+                , 'rpm2cpio':   { 'debian': 'rpm2cpio' }
+                , 'showttf':    { 'debian': 'fontforge-extras' }
+                , 'sng':        { 'debian': 'sng' }
+                , 'stat':       { 'debian': 'coreutils' }
+                , 'unsquashfs': { 'debian': 'squashfs-tools' }
+                , 'xxd':        { 'debian': 'vim-common' }
+                , 'xz':         { 'debian': 'xz-utils' }
+                , 'zipinfo':    { 'debian': 'unzip' }
+              }
+
+    def __init__(self, command):
+        self.command = command
+
+    def get_package(self):
+        providers = RequiredToolNotFound.PROVIDERS.get(self.command, None)
+        if not providers:
+            return None
+        # XXX: hardcode Debian for now
+        return providers['debian']
+
+
+# decorator that checks if the specified tool is installed
+def tool_required(command):
+    if not hasattr(tool_required, 'all'):
+        tool_required.all = set()
+    tool_required.all.add(command)
+    def wrapper(original_function):
+        if find_executable(command):
+            def tool_check(*args, **kwargs):
+                return original_function(*args, **kwargs)
+        else:
+            def tool_check(*args, **kwargs):
+                raise RequiredToolNotFound(command)
+        return tool_check
+    return wrapper
diff --git a/debbindiff/comparators/binary.py b/debbindiff/comparators/binary.py
index c6002ed..de636e8 100644
--- a/debbindiff/comparators/binary.py
+++ b/debbindiff/comparators/binary.py
@@ -30,5 +30,4 @@ def compare_binary_files(path1, path2, source=None):
     hexdump2 = get_hexdump(path2)
     if hexdump1 == hexdump2:
         return []
-    return [Difference(hexdump1.splitlines(1), hexdump2.splitlines(1),
-                       path1, path2, source)]
+    return [Difference(hexdump1, hexdump2, path1, path2, source)]
diff --git a/debbindiff/comparators/bzip2.py b/debbindiff/comparators/bzip2.py
index ab2f541..842bb7a 100644
--- a/debbindiff/comparators/bzip2.py
+++ b/debbindiff/comparators/bzip2.py
@@ -21,8 +21,9 @@ from contextlib import contextmanager
 import os.path
 import subprocess
 import debbindiff.comparators
-from debbindiff.comparators.utils import binary_fallback, make_temp_directory, tool_required
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
 from debbindiff.difference import get_source
+from debbindiff import tool_required
 
 
 @contextmanager
diff --git a/debbindiff/comparators/changes.py b/debbindiff/comparators/changes.py
index 400815c..2555f62 100644
--- a/debbindiff/comparators/changes.py
+++ b/debbindiff/comparators/changes.py
@@ -45,12 +45,10 @@ def compare_changes_files(path1, path2, source=None):
     differences = []
     for field in DOT_CHANGES_FIELDS:
         if dot_changes1[field] != dot_changes2[field]:
-            lines1 = dot_changes1[field].splitlines(1)
-            lines2 = dot_changes2[field].splitlines(1)
-            lines1[0] = "%s: %s" % (field, lines1[0])
-            lines2[0] = "%s: %s" % (field, lines2[0])
+            content1 = "%s: %s" % (field, dot_changes1[field])
+            content2 = "%s: %s" % (field, dot_changes2[field])
             differences.append(Difference(
-                lines1, lines2,
+                content1, content2,
                 dot_changes1.get_changes_file(),
                 dot_changes2.get_changes_file(),
                 source=source))
@@ -64,8 +62,8 @@ def compare_changes_files(path1, path2, source=None):
         return differences
 
     files_difference = Difference(
-        dot_changes1.get_as_string('Files').splitlines(1),
-        dot_changes2.get_as_string('Files').splitlines(1),
+        dot_changes1.get_as_string('Files'),
+        dot_changes2.get_as_string('Files'),
         dot_changes1.get_changes_file(),
         dot_changes2.get_changes_file(),
         source=source,
diff --git a/debbindiff/comparators/cpio.py b/debbindiff/comparators/cpio.py
index 3132577..86a643f 100644
--- a/debbindiff/comparators/cpio.py
+++ b/debbindiff/comparators/cpio.py
@@ -20,8 +20,8 @@
 import subprocess
 import os.path
 import debbindiff.comparators
-from debbindiff import logger
-from debbindiff.comparators.utils import binary_fallback, make_temp_directory, tool_required
+from debbindiff import logger, tool_required
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
 from debbindiff.difference import Difference
 
 
@@ -54,8 +54,7 @@ def compare_cpio_files(path1, path2, source=None):
     content2 = get_cpio_content(path2, verbose=True)
     if content1 != content2:
         differences.append(Difference(
-            content1.splitlines(1), content2.splitlines(1),
-            path1, path2, source="metadata"))
+            content1, content2, path1, path2, source="metadata"))
 
     # compare files contained in archive
     content1 = get_cpio_content(path1, verbose=False)
diff --git a/debbindiff/comparators/deb.py b/debbindiff/comparators/deb.py
index f67f18c..009ee77 100644
--- a/debbindiff/comparators/deb.py
+++ b/debbindiff/comparators/deb.py
@@ -57,8 +57,7 @@ def compare_deb_files(path1, path2, source=None):
     content2 = get_ar_content(path2)
     if content1 != content2:
         differences.append(Difference(
-            content1.splitlines(1), content2.splitlines(1),
-            path1, path2, source="metadata"))
+            content1, content2, path1, path2, source="metadata"))
     return differences
 
 
diff --git a/debbindiff/comparators/directory.py b/debbindiff/comparators/directory.py
index fd7fe54..7e2bb5b 100644
--- a/debbindiff/comparators/directory.py
+++ b/debbindiff/comparators/directory.py
@@ -20,9 +20,8 @@
 import os.path
 import re
 import subprocess
-from debbindiff import logger
+from debbindiff import logger, tool_required
 from debbindiff.difference import Difference
-from debbindiff.comparators.utils import tool_required
 import debbindiff.comparators
 
 
@@ -63,8 +62,7 @@ def compare_meta(path1, path2):
         stat2 = stat(path2)
         if stat1 != stat2:
             differences.append(Difference(
-                stat1.splitlines(1), stat2.splitlines(1),
-                path1, path2, source="stat"))
+                stat1, stat2, path1, path2, source="stat"))
     except RequiredToolNotFound:
         logger.warn("'stat' not found! Is PATH wrong?")
 
@@ -73,8 +71,7 @@ def compare_meta(path1, path2):
         lsattr2 = lsattr(path2)
         if lsattr1 != lsattr2:
             differences.append(Difference(
-                lsattr1.splitlines(1), lsattr2.splitlines(1),
-                path1, path2, source="lattr"))
+                lsattr1, lsattr2, path1, path2, source="lattr"))
     except RequiredToolNotFound:
         logger.info("Unable to find 'lsattr'.")
 
@@ -83,8 +80,7 @@ def compare_meta(path1, path2):
         acl2 = getfacl(path2)
         if acl1 != acl2:
             differences.append(Difference(
-                acl1.splitlines(1), acl2.splitlines(1),
-                path1, path2, source="getfacl"))
+                acl1, acl2, path1, path2, source="getfacl"))
     except RequiredToolNotFound:
         logger.info("Unable to find 'getfacl'.")
     return differences
@@ -112,9 +108,7 @@ def compare_directories(path1, path2, source=None):
     ls1 = sorted(ls(path1))
     ls2 = sorted(ls(path2))
     if ls1 != ls2:
-        differences.append(Difference(
-            ls1.splitlines(1), ls2.splitlines(1),
-            path1, path2, source="ls"))
+        differences.append(Difference(ls1, ls2, path1, path2, source="ls"))
     differences.extend(compare_meta(path1, path2))
     if differences:
         d = Difference(None, None, path1, path2, source=source)
diff --git a/debbindiff/comparators/elf.py b/debbindiff/comparators/elf.py
index 8bf447b..911c515 100644
--- a/debbindiff/comparators/elf.py
+++ b/debbindiff/comparators/elf.py
@@ -20,7 +20,8 @@
 import os.path
 import re
 import subprocess
-from debbindiff.comparators.utils import binary_fallback, get_ar_content, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback, get_ar_content
 from debbindiff.difference import Difference
 
 
@@ -59,19 +60,18 @@ def _compare_elf_data(path1, path2, source=None):
     all2 = readelf_all(path2)
     if all1 != all2:
         differences.append(Difference(
-            all1.splitlines(1), all2.splitlines(1),
-            path1, path2, source='readelf --all'))
+            all1, all2, path1, path2, source='readelf --all'))
     debug_dump1 = readelf_debug_dump(path1)
     debug_dump2 = readelf_debug_dump(path2)
     if debug_dump1 != debug_dump2:
         differences.append(Difference(
-            debug_dump1.splitlines(1), debug_dump2.splitlines(1),
+            debug_dump1, debug_dump2,
             path1, path2, source='readelf --debug-dump'))
     objdump1 = objdump_disassemble(path1)
     objdump2 = objdump_disassemble(path2)
     if objdump1 != objdump2:
         differences.append(Difference(
-            objdump1.splitlines(1), objdump2.splitlines(1),
+            objdump1, objdump2,
             path1, path2, source='objdump --disassemble --full-contents'))
     return differences
 
@@ -89,7 +89,6 @@ def compare_static_lib_files(path1, path2, source=None):
     content2 = get_ar_content(path2)
     if content1 != content2:
         differences.append(Difference(
-            content1.splitlines(1), content2.splitlines(1),
-            path1, path2, source="metadata"))
+            content1, content2, path1, path2, source="metadata"))
     differences.extend(_compare_elf_data(path1, path2, source))
     return differences
diff --git a/debbindiff/comparators/fonts.py b/debbindiff/comparators/fonts.py
index 464ee86..a064b30 100644
--- a/debbindiff/comparators/fonts.py
+++ b/debbindiff/comparators/fonts.py
@@ -19,7 +19,8 @@
 
 import locale
 import subprocess
-from debbindiff.comparators.utils import binary_fallback, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback
 from debbindiff.difference import Difference
 
 
@@ -33,6 +34,5 @@ def compare_ttf_files(path1, path2, source=None):
     ttf1 = show_ttf(path1)
     ttf2 = show_ttf(path2)
     if ttf1 != ttf2:
-        return [Difference(ttf1.splitlines(1), ttf2.splitlines(1),
-                           path1, path2, source='showttf')]
+        return [Difference(ttf1, ttf2, path1, path2, source='showttf')]
     return []
diff --git a/debbindiff/comparators/gettext.py b/debbindiff/comparators/gettext.py
index 20400ea..26a20e5 100644
--- a/debbindiff/comparators/gettext.py
+++ b/debbindiff/comparators/gettext.py
@@ -18,7 +18,8 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 import subprocess
-from debbindiff.comparators.utils import binary_fallback, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback
 from debbindiff.difference import Difference
 
 
@@ -32,6 +33,5 @@ def compare_mo_files(path1, path2, source=None):
     mo1 = msgunfmt(path1)
     mo2 = msgunfmt(path2)
     if mo1 != mo2:
-        return [Difference(mo1.splitlines(1), mo2.splitlines(1),
-                           path1, path2, source='msgunfmt')]
+        return [Difference(mo1, mo2, path1, path2, source='msgunfmt')]
     return []
diff --git a/debbindiff/comparators/gzip.py b/debbindiff/comparators/gzip.py
index 6272864..bedfaaa 100644
--- a/debbindiff/comparators/gzip.py
+++ b/debbindiff/comparators/gzip.py
@@ -21,7 +21,8 @@ from contextlib import contextmanager
 import subprocess
 import os.path
 import debbindiff.comparators
-from debbindiff.comparators.utils import binary_fallback, make_temp_directory, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
 from debbindiff.difference import Difference, get_source
 
 
@@ -53,8 +54,7 @@ def compare_gzip_files(path1, path2, source=None):
     metadata2 = get_gzip_metadata(path2)
     if metadata1 != metadata2:
         differences.append(Difference(
-            metadata1.splitlines(1), metadata2.splitlines(1),
-            path1, path2, source='metadata'))
+            metadata1, metadata2, path1, path2, source='metadata'))
     # check content
     with decompress_gzip(path1) as new_path1:
         with decompress_gzip(path2) as new_path2:
diff --git a/debbindiff/comparators/haskell.py b/debbindiff/comparators/haskell.py
index c1793e5..53fdcf1 100644
--- a/debbindiff/comparators/haskell.py
+++ b/debbindiff/comparators/haskell.py
@@ -18,7 +18,8 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 import subprocess
-from debbindiff.comparators.utils import binary_fallback, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback
 from debbindiff.difference import Difference
 
 
@@ -32,6 +33,6 @@ def compare_hi_files(path1, path2, source=None):
     iface1 = show_iface(path1)
     iface2 = show_iface(path2)
     if iface1 != iface2:
-        return [Difference(iface1.splitlines(1), iface2.splitlines(1),
+        return [Difference(iface1, iface2,
                            path1, path2, source='ghc --show-iface')]
     return []
diff --git a/debbindiff/comparators/pdf.py b/debbindiff/comparators/pdf.py
index 8add5f2..4a2aa04 100644
--- a/debbindiff/comparators/pdf.py
+++ b/debbindiff/comparators/pdf.py
@@ -18,7 +18,8 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 import subprocess
-from debbindiff.comparators.utils import binary_fallback, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback
 from debbindiff.difference import Difference, get_source
 
 
@@ -45,14 +46,12 @@ def compare_pdf_files(path1, path2, source=None):
     text2 = pdftotext(path2)
     if text1 != text2:
         differences.append(
-            Difference(text1.splitlines(1), text2.splitlines(1),
-                       text1, text2,
+            Difference(text1, text2, path1, path2,
                        source="pdftotext %s" % src))
     uncompressed1 = uncompress(path1)
     uncompressed2 = uncompress(path2)
     if uncompressed1 != uncompressed2:
         differences.append(
-            Difference(uncompressed1.splitlines(1), uncompressed2.splitlines(1),
-                       path1, path2,
+            Difference(uncompressed1, uncompressed2, path1, path2,
                        source="pdftk %s output - uncompress" % src))
     return differences
diff --git a/debbindiff/comparators/png.py b/debbindiff/comparators/png.py
index 85e630b..7487b79 100644
--- a/debbindiff/comparators/png.py
+++ b/debbindiff/comparators/png.py
@@ -18,7 +18,8 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 import subprocess
-from debbindiff.comparators.utils import binary_fallback, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback
 from debbindiff.difference import Difference
 
 
@@ -38,7 +39,6 @@ def compare_png_files(path1, path2, source=None):
     sng1 = sng(path1)
     sng2 = sng(path2)
     if sng1 != sng2:
-        return [Difference(sng1.splitlines(1), sng2.splitlines(1),
-                           path1, path2, source='sng')]
+        return [Difference(sng1, sng2, path1, path2, source='sng')]
     return []
 
diff --git a/debbindiff/comparators/rpm.py b/debbindiff/comparators/rpm.py
index 9391ee4..85af22b 100644
--- a/debbindiff/comparators/rpm.py
+++ b/debbindiff/comparators/rpm.py
@@ -22,8 +22,8 @@ import os.path
 import subprocess
 from contextlib import contextmanager
 import debbindiff.comparators
-from debbindiff import logger
-from debbindiff.comparators.utils import binary_fallback, make_temp_directory, tool_required
+from debbindiff import logger, tool_required
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
 from debbindiff.difference import Difference, get_source
 
 def get_rpm_header(path, ts):
@@ -82,8 +82,7 @@ def compare_rpm_files(path1, path2, source=None):
         header2 = get_rpm_header(path2, ts)
         if header1 != header2:
             differences.append(Difference(
-                header1.splitlines(1), header2.splitlines(2),
-                path1, path2, source="header"))
+                header1, header2, path1, path2, source="header"))
 
     # extract cpio archive
     with extract_rpm_payload(path1) as archive1:
diff --git a/debbindiff/comparators/squashfs.py b/debbindiff/comparators/squashfs.py
index cee7c47..e198ec6 100644
--- a/debbindiff/comparators/squashfs.py
+++ b/debbindiff/comparators/squashfs.py
@@ -20,8 +20,8 @@
 import subprocess
 import os.path
 import debbindiff.comparators
-from debbindiff import logger
-from debbindiff.comparators.utils import binary_fallback, make_temp_directory, tool_required
+from debbindiff import logger, tool_required
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
 from debbindiff.difference import Difference
 
 
@@ -58,8 +58,7 @@ def compare_squashfs_files(path1, path2, source=None):
     content2 = get_squashfs_content(path2)
     if content1 != content2:
         differences.append(Difference(
-            content1.splitlines(1), content2.splitlines(1),
-            path1, path2, source="metadata"))
+            content1, content2, path1, path2, source="metadata"))
 
     # compare files contained in archive
     content1 = get_squashfs_content(path1, verbose=False)
diff --git a/debbindiff/comparators/tar.py b/debbindiff/comparators/tar.py
index 381d6d2..48fcc94 100644
--- a/debbindiff/comparators/tar.py
+++ b/debbindiff/comparators/tar.py
@@ -70,6 +70,5 @@ def compare_tar_files(path1, path2, source=None):
             content2 = get_tar_content(tar2).decode('utf-8')
             if content1 != content2:
                 differences.append(Difference(
-                    content1.splitlines(1), content2.splitlines(1),
-                    path1, path2, source="metadata"))
+                    content1, content2, path1, path2, source="metadata"))
     return differences
diff --git a/debbindiff/comparators/text.py b/debbindiff/comparators/text.py
index 1583ce1..0eccfaf 100644
--- a/debbindiff/comparators/text.py
+++ b/debbindiff/comparators/text.py
@@ -26,11 +26,11 @@ def compare_text_files(path1, path2, encoding, source=None):
     if encoding is None:
         encoding = 'utf-8'
     try:
-        lines1 = codecs.open(path1, 'r', encoding=encoding).readlines()
-        lines2 = codecs.open(path2, 'r', encoding=encoding).readlines()
+        content1 = codecs.open(path1, 'r', encoding=encoding).read()
+        content2 = codecs.open(path2, 'r', encoding=encoding).read()
     except (LookupError, UnicodeDecodeError):
         # unknown or misdetected encoding
         return compare_binary_files(path1, path2, source)
-    if lines1 == lines2:
+    if content1 == content2:
         return []
-    return [Difference(lines1, lines2, path1, path2, source)]
+    return [Difference(content1, content2, path1, path2, source)]
diff --git a/debbindiff/comparators/utils.py b/debbindiff/comparators/utils.py
index a036d47..d2e476b 100644
--- a/debbindiff/comparators/utils.py
+++ b/debbindiff/comparators/utils.py
@@ -19,7 +19,6 @@
 
 from contextlib import contextmanager
 # The following would be shutil.which in Python 3.3
-from distutils.spawn import find_executable
 import hashlib
 import re
 import os
@@ -83,60 +82,6 @@ def binary_fallback(original_function):
     return with_fallback
 
 
-class RequiredToolNotFound(Exception):
-    PROVIDERS = { 'ar':         { 'debian': 'binutils-multiarch' }
-                , 'bzip2':      { 'debian': 'bzip2' }
-                , 'cpio':       { 'debian': 'cpio' }
-                , 'file':       { 'debian': 'file' }
-                , 'getfacl':    { 'debian': 'acl' }
-                , 'ghc':        { 'debian': 'ghc' }
-                , 'gpg':        { 'debian': 'gnupg' }
-                , 'gzip':       { 'debian': 'gzip' }
-                , 'ls':         { 'debian': 'coreutils' }
-                , 'lsattr':     { 'debian': 'e2fsprogs' }
-                , 'msgunfmt':   { 'debian': 'gettext' }
-                , 'objdump':    { 'debian': 'binutils-multiarch' }
-                , 'pdftk':      { 'debian': 'pdftk' }
-                , 'pdftotext':  { 'debian': 'poppler-utils' }
-                , 'readelf':    { 'debian': 'binutils-multiarch' }
-                , 'rpm2cpio':   { 'debian': 'rpm2cpio' }
-                , 'showttf':    { 'debian': 'fontforge-extras' }
-                , 'sng':        { 'debian': 'sng' }
-                , 'stat':       { 'debian': 'coreutils' }
-                , 'unsquashfs': { 'debian': 'squashfs-tools' }
-                , 'vim':        { 'debian': 'vim' }
-                , 'xxd':        { 'debian': 'vim-common' }
-                , 'xz':         { 'debian': 'xz-utils' }
-                , 'zipinfo':    { 'debian': 'unzip' }
-              }
-
-    def __init__(self, command):
-        self.command = command
-
-    def get_package(self):
-        providers = RequiredToolNotFound.PROVIDERS.get(self.command, None)
-        if not providers:
-            return None
-        # XXX: hardcode Debian for now
-        return providers['debian']
-
-
-# decorator that checks if the specified tool is installed
-def tool_required(command):
-    if not hasattr(tool_required, 'all'):
-        tool_required.all = set()
-    tool_required.all.add(command)
-    def wrapper(original_function):
-        if find_executable(command):
-            def tool_check(*args, **kwargs):
-                return original_function(*args, **kwargs)
-        else:
-            def tool_check(*args, **kwargs):
-                raise RequiredToolNotFound(command)
-        return tool_check
-    return wrapper
-
-
 @contextmanager
 def make_temp_directory():
     temp_dir = tempfile.mkdtemp(suffix='debbindiff')
diff --git a/debbindiff/comparators/xz.py b/debbindiff/comparators/xz.py
index e24e05d..11ccd14 100644
--- a/debbindiff/comparators/xz.py
+++ b/debbindiff/comparators/xz.py
@@ -21,7 +21,8 @@ from contextlib import contextmanager
 import os.path
 import subprocess
 import debbindiff.comparators
-from debbindiff.comparators.utils import binary_fallback, make_temp_directory, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
 from debbindiff.difference import get_source
 
 
diff --git a/debbindiff/comparators/zip.py b/debbindiff/comparators/zip.py
index dc08d67..1c2be6b 100644
--- a/debbindiff/comparators/zip.py
+++ b/debbindiff/comparators/zip.py
@@ -24,7 +24,8 @@ from zipfile import ZipFile
 from debbindiff import logger
 from debbindiff.difference import Difference
 import debbindiff.comparators
-from debbindiff.comparators.utils import binary_fallback, make_temp_directory, tool_required
+from debbindiff import tool_required
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
 
 
 @tool_required('zipinfo')
@@ -71,6 +72,5 @@ def compare_zip_files(path1, path2, source=None):
                 zipinfo2 = get_zipinfo(path2, verbose=True)
             if zipinfo1 != zipinfo2:
                 differences.append(Difference(
-                    zipinfo1.splitlines(1), zipinfo2.splitlines(1),
-                    path1, path2, source="metadata"))
+                    zipinfo1, zipinfo2, path1, path2, source="metadata"))
     return differences
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
index 313ba1a..43536d8 100644
--- a/debbindiff/difference.py
+++ b/debbindiff/difference.py
@@ -18,15 +18,134 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 import os.path
+from functools import partial
+from tempfile import NamedTemporaryFile
+import re
+import subprocess
+from debbindiff import logger, tool_required, RequiredToolNotFound
+
+
+MAX_DIFF_BLOCK_LINES = 50
+
+
+class DiffParser(object):
+    RANGE_RE = re.compile(r'^@@\s+-(?P<start1>\d+)(,(?P<len1>\d+))?\s+\+(?P<start2>\d+)(,(?P<len2>\d+))?\s+@@$')
+
+    def __init__(self, output):
+        self._output = output
+        self._action = self.read_headers
+        self._diff = ''
+        self._remaining_hunk_lines = None
+        self._block_len = None
+        self._direction = None
+
+    def parse(self):
+        while True:
+            line = self._output.readline().decode('utf-8')
+            if line == '': # EOF
+                return self._diff
+            self._action = self._action(line)
+
+    def read_headers(self, line):
+        found = DiffParser.RANGE_RE.match(line)
+        if line.startswith('---'):
+            return self.read_headers
+        elif line.startswith('+++'):
+            return self.read_headers
+        elif not found:
+            raise ValueError('Unable to parse diff headers: %s' % repr(line))
+        self._diff += line
+        if found.group('len1'):
+            self._remaining_hunk_lines = int(found.group('len1'))
+        else:
+            self._remaining_hunk_lines = 1
+        if found.group('len2'):
+            self._remaining_hunk_lines += int(found.group('len2'))
+        else:
+            self._remaining_hunk_lines += 1
+        self._direction = None
+        return self.read_hunk
+
+    def read_hunk(self, line):
+        if line[0] == ' ':
+            self._remaining_hunk_lines -= 2
+        elif line[0] == '+':
+            self._remaining_hunk_lines -= 1
+        elif line[0] == '-':
+            self._remaining_hunk_lines -= 1
+        elif line[0] == '\\':
+            pass
+        elif self._remaining_hunk_lines == 0:
+            return self.read_headers(line)
+        else:
+            raise ValueError('Unable to parse diff hunk: %s' % repr(line))
+        self._diff += line
+        if line[0] in ('-', '+') and line[0] == self._direction:
+            self._block_len += 1
+            if self._block_len >= MAX_DIFF_BLOCK_LINES:
+                return self.skip_block
+        else:
+            self._block_len = 1
+            self._direction = line[0]
+        return self.read_hunk
+
+    def skip_block(self, line):
+        if not line.startswith(self._direction):
+            self._diff += '%s[ %d lines removed ]\n' % (self._direction, self._block_len - MAX_DIFF_BLOCK_LINES)
+            return self.read_hunk(line)
+        self._block_len += 1
+        self._remaining_hunk_lines -= 1
+        if self._remaining_hunk_lines == 0:
+            self._diff += '%s[ %d lines removed ]\n' % (self._direction, self._block_len - MAX_DIFF_BLOCK_LINES)
+            return self.read_headers
+        return self.skip_block
+
+
+ at tool_required('diff')
+def diff(content1, content2):
+    with NamedTemporaryFile('w') as tmp_file1:
+        with NamedTemporaryFile('w') as tmp_file2:
+            # fill temporary files
+            tmp_file1.write(content1.encode('utf-8'))
+            tmp_file2.write(content2.encode('utf-8'))
+            # work-around unified diff limitation: if there's no newlines in both
+            # don't make it a difference
+            if content1[-1] != '\n' and content2[-1] != '\n':
+                tmp_file1.write('\n')
+                tmp_file2.write('\n')
+            tmp_file1.flush()
+            tmp_file2.flush()
+            # run diff
+            logger.debug('running diff')
+            cmd = ['diff', '-au7', tmp_file1.name, tmp_file2.name]
+            p = subprocess.Popen(cmd, shell=False,
+                                 close_fds=True, stdin=None, stdout=subprocess.PIPE,
+                                 stderr=subprocess.STDOUT)
+            # parse ouptut
+            logger.debug('parsing diff output')
+            diff = DiffParser(p.stdout).parse()
+            p.wait()
+            if p.returncode not in (0, 1):
+                raise subprocess.CalledProcessError(cmd, p.returncode, output=diff)
+            return diff
 
 
 class Difference(object):
-    def __init__(self, lines1, lines2, path1, path2, source=None,
+    def __init__(self, content1, content2, path1, path2, source=None,
                  comment=None):
-        if lines1 and type(lines1[0]) is not unicode:
-            raise UnicodeError('lines1 has not been decoded')
-        if lines2 and type(lines2[0]) is not unicode:
-            raise UnicodeError('lines2 has not been decoded')
+        self._comment = comment
+        if content1 and type(content1) is not unicode:
+            raise UnicodeError('content1 has not been decoded')
+        if content2 and type(content2) is not unicode:
+            raise UnicodeError('content2 has not been decoded')
+        self._unified_diff = None
+        if content1 is not None and content2 is not None:
+            try:
+                self._unified_diff = diff(content1, content2)
+            except RequiredToolNotFound:
+                self._comment = 'diff is not available!'
+                if comment:
+                    self._comment += '\n\n' + comment
         # allow to override declared file paths, useful when comparing
         # tempfiles
         if source:
@@ -38,9 +157,6 @@ class Difference(object):
         else:
             self._source1 = path1
             self._source2 = path2
-        self._lines1 = lines1
-        self._lines2 = lines2
-        self._comment = comment
         self._details = []
 
     @property
@@ -60,12 +176,8 @@ class Difference(object):
         return self._source2
 
     @property
-    def lines1(self):
-        return self._lines1
-
-    @property
-    def lines2(self):
-        return self._lines2
+    def unified_diff(self):
+        return self._unified_diff
 
     @property
     def details(self):
diff --git a/debbindiff/presenters/html.py b/debbindiff/presenters/html.py
index b687f9e..71a9483 100644
--- a/debbindiff/presenters/html.py
+++ b/debbindiff/presenters/html.py
@@ -4,6 +4,10 @@
 #
 # Copyright © 2014-2015 Jérémy Bobbio <lunar at debian.org>
 #           ©      2015 Reiner Herrmann <reiner at reiner-h.de>
+#           © 2012-2013 Olivier Matz <zer0 at droids-corp.org>
+#           ©      2012 Alan De Smet <adesmet at cs.wisc.edu>
+#           ©      2012 Sergey Satskiy <sergey.satskiy at gmail.com>
+#           ©      2012 scito <info at scito.ch>
 #
 #
 # debbindiff is free software: you can redistribute it and/or modify
@@ -18,9 +22,18 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+#
+#
+# Most of the code is borrowed from diff2html.py available at:
+# http://git.droids-corp.org/?p=diff2html.git
+#
+# Part of the code is inspired by diff2html.rb from
+# Dave Burt <dave (at) burt.id.au> (mainly for html theme)
+#
 
 from __future__ import print_function
 import os.path
+import htmlentitydefs
 import re
 import subprocess
 import sys
@@ -29,12 +42,23 @@ from xml.sax.saxutils import escape
 from debbindiff import logger, VERSION
 from debbindiff.comparators.utils import make_temp_directory
 
+# minimum line size, we add a zero-sized breakable space every
+# LINESIZE characters
+LINESIZE = 20
+TABSIZE = 8
+
+# Characters we're willing to word wrap on
+WORDBREAK = " \t;.,/):-"
+
+DIFFON = "\x01"
+DIFFOFF = "\x02"
+
 HEADER = """
 <!DOCTYPE html>
 <html>
 <head>
   <meta charset="utf-8">
-  <meta name="generator" content="pandoc">
+  <meta name="generator" content="debbindiff">
   <title>%(title)s</title>
   <style>
     body {
@@ -57,14 +81,41 @@ HEADER = """
     }
     .difference th,
     .difference td {
-      width: 50%%;
       border: 0;
     }
-    .difference td + td {
-      border-left: solid black 1px;
+    table.diff {
+      border: 0px;
+      border-collapse:collapse;
+      font-size:0.75em;
+      font-family: Lucida Console, monospace;
+    }
+    td.line {
+      color:#8080a0
+    }
+    th {
+      background: black;
+      color: white
+    }
+    tr.diffunmodified td {
+      background: #D0D0E0
+    }
+    tr.diffhunk td {
+      background: #A0A0A0
+    }
+    tr.diffadded td {
+      background: #CCFFCC
     }
-    .difference td div {
-      overflow: auto;
+    tr.diffdeleted td {
+      background: #FFCCCC
+    }
+    tr.diffchanged td {
+      background: #FFFFA0
+    }
+    span.diffchanged2 {
+      background: #E0C880
+    }
+    span.diffponct {
+      color: #B08080
     }
     .comment {
       font-style: italic;
@@ -78,19 +129,6 @@ HEADER = """
       color: white;
       padding: 0.2em;
     }
-    .lnr {
-      background-color: #ccc;
-      color: #666;
-    }
-    .DiffChange {
-      background-color: #ff8888;
-      font-weight: bold;
-    }
-    .DiffText {
-      color: white;
-      background-color: #ff4444;
-      font-weight: bold;
-    }
     .anchor {
       margin-left: 0.5em;
       font-size: 80%%;
@@ -132,131 +170,383 @@ def create_limited_print_func(print_func, max_page_size):
     return limited_print_func
 
 
-def trim_file(path, skip_lines):
-    n = 0
-    skip = 0
-    with file(path, "r") as content:
-        tmp_file = None
-        try:
-            tmp_file = NamedTemporaryFile("w", dir=os.path.dirname(path),
-                                          delete=False)
-            for line in content:
-                n += 1
-                if n in skip_lines.keys():
-                    skip = skip_lines[n]
-                    tmp_file.write("[ %d lines removed ]\n" % skip)
-
-                if skip > 0:
-                    skip -= 1
+buf = []
+add_cpt, del_cpt = 0, 0
+line1, line2 = 0, 0
+hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
+
+
+def sane(x):
+    r = ""
+    for i in x:
+        j = ord(i)
+        if i not in ['\t', '\n'] and (j < 32):
+            r = r + "."
+        else:
+            r = r + i
+    return r
+
+
+def linediff(s, t):
+    '''
+    Original line diff algorithm of diff2html. It's character based.
+    '''
+    if len(s):
+        s = unicode(reduce(lambda x, y:x+y, [ sane(c) for c in s ]))
+    if len(t):
+        t = unicode(reduce(lambda x, y:x+y, [ sane(c) for c in t ]))
+
+    m, n = len(s), len(t)
+    d = [[(0, 0) for i in range(n+1)] for i in range(m+1)]
+
+
+    d[0][0] = (0, (0, 0))
+    for i in range(m+1)[1:]:
+        d[i][0] = (i,(i-1, 0))
+    for j in range(n+1)[1:]:
+        d[0][j] = (j,(0, j-1))
+
+    for i in range(m+1)[1:]:
+        for j in range(n+1)[1:]:
+            if s[i-1] == t[j-1]:
+                cost = 0
+            else:
+                cost = 1
+            d[i][j] = min((d[i-1][j][0] + 1, (i-1, j)),
+                          (d[i][j-1][0] + 1, (i, j-1)),
+                          (d[i-1][j-1][0] + cost, (i-1, j-1)))
+
+    l = []
+    coord = (m, n)
+    while coord != (0, 0):
+        l.insert(0, coord)
+        x, y = coord
+        coord = d[x][y][1]
+
+    l1 = []
+    l2 = []
+
+    for coord in l:
+        cx, cy = coord
+        child_val = d[cx][cy][0]
+
+        father_coord = d[cx][cy][1]
+        fx, fy = father_coord
+        father_val = d[fx][fy][0]
+
+        diff = (cx-fx, cy-fy)
+
+        if diff == (0, 1):
+            l1.append("")
+            l2.append(DIFFON + t[fy] + DIFFOFF)
+        elif diff == (1, 0):
+            l1.append(DIFFON + s[fx] + DIFFOFF)
+            l2.append("")
+        elif child_val-father_val == 1:
+            l1.append(DIFFON + s[fx] + DIFFOFF)
+            l2.append(DIFFON + t[fy] + DIFFOFF)
+        else:
+            l1.append(s[fx])
+            l2.append(t[fy])
+
+    r1, r2 = (reduce(lambda x, y:x+y, l1), reduce(lambda x, y:x+y, l2))
+    return r1, r2
+
+
+def diff_changed(old, new):
+    '''
+    Returns the differences basend on characters between two strings
+    wrapped with DIFFON and DIFFOFF using `diff`.
+    '''
+    con = {'=': (lambda x: x),
+           '+': (lambda x: DIFFON + x + DIFFOFF),
+           '-': (lambda x: '')}
+    return "".join([(con[a])("".join(b)) for a, b in diff(old, new)])
+
+
+def diff_changed_ts(old, new):
+    '''
+    Returns a tuple for a two sided comparison based on characters, see `diff_changed`.
+    '''
+    return (diff_changed(new, old), diff_changed(old, new))
+
+
+def word_diff(old, new):
+    '''
+    Returns the difference between the old and new strings based on words. Punctuation is not part of the word.
+
+    Params:
+        old the old string
+        new the new string
+
+    Returns:
+        the output of `diff` on the two strings after splitting them
+        on whitespace (a list of change instructions; see the docstring
+        of `diff`)
+    '''
+    separator_pattern = '(\W+)';
+    return diff(re.split(separator_pattern, old, flags=re.UNICODE), re.split(separator_pattern, new, flags=re.UNICODE))
+
+
+def diff_changed_words(old, new):
+    '''
+    Returns the difference between two strings based on words (see `word_diff`)
+    wrapped with DIFFON and DIFFOFF.
+
+    Returns:
+        the output of the diff expressed delimited with DIFFON and DIFFOFF.
+    '''
+    con = {'=': (lambda x: x),
+           '+': (lambda x: DIFFON + x + DIFFOFF),
+           '-': (lambda x: '')}
+    return "".join([(con[a])("".join(b)) for a, b in word_diff(old, new)])
+
+
+def diff_changed_words_ts(old, new):
+    '''
+    Returns a tuple for a two sided comparison based on words, see `diff_changed_words`.
+    '''
+    return (diff_changed_words(new, old), diff_changed_words(old, new))
+
+
+def convert(s, linesize=0, ponct=0):
+    i = 0
+    t = u""
+    for c in s:
+        # used by diffs
+        if c == DIFFON:
+            t += u'<span class="diffchanged2">'
+        elif c == DIFFOFF:
+            t += u"</span>"
+
+        # special html chars
+        elif htmlentitydefs.codepoint2name.has_key(ord(c)):
+            t += u"&%s;" % (htmlentitydefs.codepoint2name[ord(c)])
+            i += 1
+
+        # special highlighted chars
+        elif c == "\t" and ponct == 1:
+            n = tabsize-(i%tabsize)
+            if n == 0:
+                n = tabsize
+            t += (u'<span class="diffponct">»</span>'+' '*(n-1))
+        elif c == " " and ponct == 1:
+            t += u'<span class="diffponct">·</span>'
+        elif c == "\n" and ponct == 1:
+            t += u'<br/><span class="diffponct">\</span>'
+        else:
+            t += c
+            i += 1
+
+        if linesize and (WORDBREAK.count(c) == 1):
+            t += u'​'
+            i = 0
+        if linesize and i > linesize:
+            i = 0
+            t += u"​"
+
+    return t
+
+
+def output_hunk(print_func):
+    print_func(u'<tr class="diffhunk"><td colspan="2">Offset %d, %d lines modified</td>'%(hunk_off1, hunk_size1))
+    print_func(u'<td colspan="2">Offset %d, %d lines modified</td></tr>\n'%(hunk_off2, hunk_size2))
+
+
+def output_line(print_func, s1, s2):
+    global line1
+    global line2
+
+    orig1 = s1
+    orig2 = s2
+
+    if s1 == None and s2 == None:
+        type_name = "unmodified"
+    elif s1 == "" and s2 == "":
+        type_name = "unmodified"
+    elif s1 == None or s1 == "":
+        type_name = "added"
+    elif s2 == None or s2 == "":
+        type_name = "deleted"
+    elif s1 == s2 and not s1.endswith('lines removed ]') and not s2.endswith('lines removed ]'):
+        type_name = "unmodified"
+    else:
+        type_name = "changed"
+        s1, s2 = linediff(orig1, orig2)
+
+    print_func(u'<tr class="diff%s">' % type_name)
+    try:
+        if s1 is not None:
+            print_func(u'<td class="diffline">%d </td>' % line1)
+            print_func(u'<td class="diffpresent">')
+            print_func(convert(s1, linesize=LINESIZE, ponct=1))
+            print_func(u'</td>')
+        else:
+            s1 = ""
+            print_func(u'<td colspan="2"> </td>')
+
+        if s2 is not None:
+            print_func(u'<td class="diffline">%d </td>' % line2)
+            print_func(u'<td class="diffpresent">')
+            print_func(convert(s2, linesize=LINESIZE, ponct=1))
+            print_func(u'</td>')
+        else:
+            s2 = ""
+            print_func(u'<td colspan="2"> </td>')
+    finally:
+        print_func(u"</tr>\n", force=True)
+
+    m = orig1 and re.match(r"^\[ (\d+) lines removed \]$", orig1)
+    if m:
+        line1 += int(m.group(1))
+    elif orig1 is not None:
+        line1 += 1
+    m = orig2 and re.match(r"^\[ (\d+) lines removed \]$", orig2)
+    if m:
+        line2 += int(m.group(1))
+    elif orig2 is not None:
+        line2 += 1
+
+
+def empty_buffer(print_func):
+    global buf
+    global add_cpt
+    global del_cpt
+
+    if del_cpt == 0 or add_cpt == 0:
+        for l in buf:
+            output_line(print_func, l[0], l[1])
+
+    elif del_cpt != 0 and add_cpt != 0:
+        l0, l1 = [], []
+        for l in buf:
+            if l[0] != None:
+                l0.append(l[0])
+            if l[1] != None:
+                l1.append(l[1])
+        max_len = (len(l0) > len(l1)) and len(l0) or len(l1)
+        for i in range(max_len):
+            s0, s1 = "", ""
+            if i < len(l0):
+                s0 = l0[i]
+            if i < len(l1):
+                s1 = l1[i]
+            output_line(print_func, s0, s1)
+
+    add_cpt, del_cpt = 0, 0
+    buf = []
+
+
+def output_unified_diff(print_func, unified_diff):
+    global add_cpt, del_cpt
+    global line1, line2
+    global hunk_off1, hunk_size1, hunk_off2, hunk_size2
+
+    print_func(u'<table class="diff">\n')
+    try:
+        print_func(u'<colgroup><col style="width: 3em;"/><col style="99%"/>\n')
+        print_func(u'<col style="width: 3em;"/><col style="99%"/></colgroup>\n')
+
+        for l in unified_diff.splitlines():
+            m = re.match(r'^--- ([^\s]*)', l)
+            if m:
+                empty_buffer(print_func)
+                continue
+            m = re.match(r'^\+\+\+ ([^\s]*)', l)
+            if m:
+                empty_buffer(print_func)
+                continue
+
+            m = re.match(r"@@ -(\d+),?(\d*) \+(\d+),?(\d*)", l)
+            if m:
+                empty_buffer(print_func)
+                hunk_data = map(lambda x:x=="" and 1 or int(x), m.groups())
+                hunk_off1, hunk_size1, hunk_off2, hunk_size2 = hunk_data
+                line1, line2 = hunk_off1, hunk_off2
+                output_hunk(print_func)
+                continue
+
+            if re.match(r"^\\ No newline", l):
+                if hunk_size2 == 0:
+                    buf[-1] = (buf[-1][0], buf[-1][1] + '\n' + l[2:])
                 else:
-                    tmp_file.write(line)
-            os.rename(tmp_file.name, path)
-        finally:
-            if tmp_file:
-                try:
-                    os.unlink(tmp_file.name)
-                except OSError as _:
-                    pass # we've done our best
-
-
-# reduce size of diff blocks by prediffing with diff (which is extremely fast)
-# and then trimming the blocks larger than the configured limit
-def optimize_files_for_diff(path1, path2):
-    cmd = ['diff', '-au7', path1, path2]
-    p = subprocess.Popen(cmd, shell=False,
-        close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-    stdout, stderr = p.communicate()
-    p.wait()
-    if p.returncode != 1:
-        raise subprocess.CalledProcessError(cmd, p.returncode, output=stderr)
-    skip_lines1 = dict()
-    skip_lines2 = dict()
-    search = re.compile(r'^@@\s+-(?P<start1>\d+),(?P<len1>\d+)\s+\+(?P<start2>\d+),(?P<len2>\d+)\s+@@$')
-    for line in stdout.split('\n'):
-        found = search.match(line)
-        if found:
-            start1 = int(found.group('start1'))
-            len1 = int(found.group('len1'))
-            start2 = int(found.group('start2'))
-            len2 = int(found.group('len2'))
-            if len1 > MAX_DIFF_BLOCK_LINES:
-                skip_lines1[start1 + MAX_DIFF_BLOCK_LINES] = len1 - MAX_DIFF_BLOCK_LINES
-            if len2 > MAX_DIFF_BLOCK_LINES:
-                skip_lines2[start2 + MAX_DIFF_BLOCK_LINES] = len2 - MAX_DIFF_BLOCK_LINES
-    if len(skip_lines1) > 0:
-        trim_file(path1, skip_lines1)
-    if len(skip_lines2) > 0:
-        trim_file(path2, skip_lines2)
-
-
-# Huge thanks to Stefaan Himpe for this solution:
-# http://technogems.blogspot.com/2011/09/generate-side-by-side-diffs-in-html.html
-def create_diff(lines1, lines2):
-    with make_temp_directory() as temp_dir:
-        path1 = os.path.join(temp_dir, 'content1')
-        path2 = os.path.join(temp_dir, 'content2')
-        diff_path = os.path.join(temp_dir, 'diff.html')
-        with open(path1, 'w') as f:
-            f.writelines([u.encode('utf-8') for u in lines1])
-        with open(path2, 'w') as f:
-            f.writelines([u.encode('utf-8') for u in lines2])
-        optimize_files_for_diff(path1, path2)
-        p = subprocess.Popen(
-            ['vim', '-n', '-N', '-e', '-i', 'NONE', '-u', 'NORC', '-U', 'NORC',
-             '-d', path1, path2,
-             '-c', 'colorscheme zellner',
-             '-c', 'let g:html_number_lines=1',
-             '-c', 'let g:html_use_css=1',
-             '-c', 'let g:html_no_progress=1',
-             '-c', 'TOhtml',
-             '-c', 'w! %s' % (diff_path,),
-             '-c', 'qall!',
-            ], shell=False, close_fds=True,
-            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-        # Consume all output and wait until end of processing
-        _, _ = p.communicate()
-        p.wait()
-        if p.returncode != 0:
-            return 'vim exited with error %d' % p.returncode
-        output = open(diff_path).read()
-        output = re.search(r'(<table.*</table>)', output,
-                           flags=re.MULTILINE | re.DOTALL).group(1)
-        output = re.sub(r'<th.*</th>', '', output,
-                        flags=re.MULTILINE | re.DOTALL)
-        return output
+                    buf[-1] = (buf[-1][0] + '\n' + l[2:], buf[-1][1])
+                continue
+
+            if hunk_size1 <= 0 and hunk_size2 <= 0:
+                empty_buffer(print_func)
+                continue
+
+            m = re.match(r"^\+\[ (\d+) lines removed \]$", l)
+            if m:
+                add_cpt += int(m.group(1))
+                hunk_size2 -= int(m.group(1))
+                buf.append((None, l[1:]))
+                continue
+
+            if re.match(r"^\+", l):
+                add_cpt += 1
+                hunk_size2 -= 1
+                buf.append((None, l[1:]))
+                continue
+
+            m = re.match(r"^-\[ (\d+) lines removed \]$", l)
+            if m:
+                del_cpt += int(m.group(1))
+                hunk_size1 -= int(m.group(1))
+                buf.append((l[1:], None))
+                continue
+
+            if re.match(r"^-", l):
+                del_cpt += 1
+                hunk_size1 -= 1
+                buf.append((l[1:], None))
+                continue
+
+            if re.match(r"^ ", l) and hunk_size1 and hunk_size2:
+                empty_buffer(print_func)
+                hunk_size1 -= 1
+                hunk_size2 -= 1
+                buf.append((l[1:], l[1:]))
+                continue
+
+            empty_buffer(print_func)
+
+        empty_buffer(print_func)
+    finally:
+        print_func(u"</table>", force=True)
 
 
 def output_difference(difference, print_func, parents):
     logger.debug('html output for %s', difference.source1)
     sources = parents + [difference.source1]
-    print_func("<div class='difference'>")
+    print_func(u"<div class='difference'>")
     try:
-        print_func("<div class='diffheader'>")
+        print_func(u"<div class='diffheader'>")
         if difference.source1 == difference.source2:
-            print_func("<div><span class='source'>%s<span>"
+            print_func(u"<div><span class='source'>%s<span>"
                        % escape(difference.source1))
         else:
-            print_func("<div><span class='source'>%s</span> vs.</div>"
+            print_func(u"<div><span class='source'>%s</span> vs.</div>"
                        % escape(difference.source1))
-            print_func("<div><span class='source'>%s</span>"
+            print_func(u"<div><span class='source'>%s</span>"
                        % escape(difference.source2))
         anchor = '/'.join(sources[1:])
-        print_func(" <a class='anchor' href='#%s' name='%s'>¶</a>" % (anchor, anchor))
-        print_func("</div>")
+        print_func(u" <a class='anchor' href='#%s' name='%s'>¶</a>" % (anchor, anchor))
+        print_func(u"</div>")
         if difference.comment:
-            print_func("<div class='comment'>%s</div>"
+            print_func(u"<div class='comment'>%s</div>"
                        % escape(difference.comment).replace('\n', '<br />'))
-        print_func("</div>")
-        if difference.lines1 or difference.lines2:
-            print_func(create_diff(difference.lines1 or ['<empty>'],
-                                   difference.lines2 or ['<empty>']))
+        print_func(u"</div>")
+        if difference.unified_diff:
+            output_unified_diff(print_func, difference.unified_diff)
         for detail in difference.details:
             output_difference(detail, print_func, sources)
     except PrintLimitReached:
         logger.debug('print limit reached')
         raise
     finally:
-        print_func("</div>", force=True)
+        print_func(u"</div>", force=True)
 
 
 def output_header(css_url, print_func):
@@ -281,6 +571,6 @@ def output_html(differences, css_url=None, print_func=None, max_page_size=None):
             output_difference(difference, print_func, [])
     except PrintLimitReached:
         logger.debug('print limit reached')
-        print_func("<div class='error'>Max output size reached.</div>",
+        print_func(u"<div class='error'>Max output size reached.</div>",
                    force=True)
     print_func(FOOTER % {'version': VERSION}, force=True)
diff --git a/debbindiff/presenters/text.py b/debbindiff/presenters/text.py
index 3468978..447a76b 100644
--- a/debbindiff/presenters/text.py
+++ b/debbindiff/presenters/text.py
@@ -29,21 +29,9 @@ def print_difference(difference, print_func):
     if difference.comment:
         for line in difference.comment.split('\n'):
             print_func(u"│┄ %s" % line)
-    if difference.lines1 or difference.lines2:
-        if difference.lines1 and not difference.lines1[-1].endswith('\n'):
-            difference.lines1[-1] = difference.lines1[-1] + '\n'
-            difference.lines1.append('<No newline at the end>\n')
-        if difference.lines2 and not difference.lines2[-1].endswith('\n'):
-            difference.lines2[-1] = difference.lines2[-1] + '\n'
-            difference.lines2.append('<No newline at the end>\n')
-        g = difflib.unified_diff(difference.lines1, difference.lines2)
-        # First skip lines with filename
-        g.next()
-        g.next()
-        for line in g:
-            if line.startswith('--- ') or line.startswith('+++ '):
-                continue
-            print_func(u"│ %s" % line, end='')
+    if difference.unified_diff:
+        for line in difference.unified_diff.splitlines():
+            print_func(u"│ %s" % line)
 
 def print_details(difference, print_func):
     if not difference.details:
diff --git a/debian/control b/debian/control
index 076e18c..15ba569 100644
--- a/debian/control
+++ b/debian/control
@@ -19,10 +19,7 @@ Vcs-Browser: https://anonscm.debian.org/cgit/reproducible/debbindiff.git
 
 Package: debbindiff
 Architecture: all
-Depends: diffutils,
-         gnupg,
-         vim,
-         vim-common,
+Depends: gnupg,
          ${misc:Depends},
          ${python:Depends},
 Recommends: ${debbindiff:Recommends}
diff --git a/debian/copyright b/debian/copyright
index acc2abe..c49a711 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -47,6 +47,10 @@ License: GPL-3+
 Files: debbindiff/presenters/html.py
 Copyright: 2014-2015 Jérémy Bobbio <lunar at debian.org>
                 2015 Reiner Herrmann <reiner at reiner-h.de>
+           2012-2013 Olivier Matz <zer0 at droids-corp.org>
+                2012 Alan De Smet <adesmet at cs.wisc.edu>
+                2012 Sergey Satskiy <sergey.satskiy at gmail.com>
+                2012 scito <info at scito.ch>
 License: GPL-3+
 
 Files: debbindiff/presenters/text.py

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list