[Reproducible-commits] [debbindiff] 04/19: Add the ability to feed diff from file objects

Jérémy Bobbio lunar at moszumanska.debian.org
Tue Mar 31 14:59:28 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch pu/feed-diff
in repository debbindiff.

commit bc232ba9e38b93d60098a278d67671f7153e1961
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Mon Mar 30 06:19:17 2015 +0200

    Add the ability to feed diff from file objects
    
    This paves the way for huge memory savings.
    Convert xxd as an example.
---
 debbindiff/comparators/binary.py    | 14 ++++---
 debbindiff/comparators/changes.py   |  4 +-
 debbindiff/comparators/cpio.py      |  2 +-
 debbindiff/comparators/deb.py       |  2 +-
 debbindiff/comparators/directory.py |  8 ++--
 debbindiff/comparators/elf.py       |  8 ++--
 debbindiff/comparators/fonts.py     |  2 +-
 debbindiff/comparators/gettext.py   |  2 +-
 debbindiff/comparators/gzip.py      |  2 +-
 debbindiff/comparators/haskell.py   |  2 +-
 debbindiff/comparators/pdf.py       |  4 +-
 debbindiff/comparators/png.py       |  2 +-
 debbindiff/comparators/rpm.py       |  2 +-
 debbindiff/comparators/squashfs.py  |  2 +-
 debbindiff/comparators/tar.py       |  2 +-
 debbindiff/comparators/zip.py       |  2 +-
 debbindiff/difference.py            | 79 ++++++++++++++++++++++++++++++-------
 17 files changed, 97 insertions(+), 42 deletions(-)

diff --git a/debbindiff/comparators/binary.py b/debbindiff/comparators/binary.py
index 5256972..7a606fc 100644
--- a/debbindiff/comparators/binary.py
+++ b/debbindiff/comparators/binary.py
@@ -18,14 +18,18 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 from binascii import hexlify
+from contextlib import contextmanager
 import subprocess
 from debbindiff.difference import Difference
 from debbindiff import tool_required, RequiredToolNotFound
 
 
+ at contextmanager
 @tool_required('xxd')
 def xxd(path):
-    return subprocess.check_output(['xxd', path], shell=False).decode('ascii')
+    p = subprocess.Popen(['xxd', path], shell=False, stdout=subprocess.PIPE)
+    yield p.stdout
+    p.wait()
 
 
 def hexdump_fallback(path):
@@ -38,14 +42,14 @@ def hexdump_fallback(path):
 
 def compare_binary_files(path1, path2, source=None):
     try:
-        hexdump1 = xxd(path1)
-        hexdump2 = xxd(path2)
-        comment = None
+        with xxd(path1) as xxd1:
+            with xxd(path2) as xxd2:
+                difference = Difference.from_file(xxd1, xxd2, path1, path2, source)
     except RequiredToolNotFound:
         hexdump1 = hexdump_fallback(path1)
         hexdump2 = hexdump_fallback(path2)
         comment = 'xxd not available in path. Falling back to Python hexlify.\n'
-    difference = Difference.from_content(hexdump1, hexdump2, path1, path2, source, comment)
+        difference = Difference.from_unicode(hexdump1, hexdump2, path1, path2, source, comment)
     if not difference:
         return []
     return [difference]
diff --git a/debbindiff/comparators/changes.py b/debbindiff/comparators/changes.py
index 321cc12..ac13345 100644
--- a/debbindiff/comparators/changes.py
+++ b/debbindiff/comparators/changes.py
@@ -47,7 +47,7 @@ def compare_changes_files(path1, path2, source=None):
         if dot_changes1[field] != dot_changes2[field]:
             content1 = "%s: %s" % (field, dot_changes1[field])
             content2 = "%s: %s" % (field, dot_changes2[field])
-            difference = Difference.from_content(
+            difference = Difference.from_unicode(
                              content1, content2,
                              dot_changes1.get_changes_file(),
                              dot_changes2.get_changes_file(),
@@ -61,7 +61,7 @@ def compare_changes_files(path1, path2, source=None):
     files2 = dot_changes2.get('Files')
     logger.debug(dot_changes1.get_as_string('Files'))
 
-    files_difference = Difference.from_content(
+    files_difference = Difference.from_unicode(
         dot_changes1.get_as_string('Files'),
         dot_changes2.get_as_string('Files'),
         dot_changes1.get_changes_file(),
diff --git a/debbindiff/comparators/cpio.py b/debbindiff/comparators/cpio.py
index 9455534..0cacd1d 100644
--- a/debbindiff/comparators/cpio.py
+++ b/debbindiff/comparators/cpio.py
@@ -52,7 +52,7 @@ def compare_cpio_files(path1, path2, source=None):
     # compare metadata
     content1 = get_cpio_content(path1, verbose=True)
     content2 = get_cpio_content(path2, verbose=True)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      content1, content2, path1, path2, source="metadata")
     if difference:
         differences.append(difference)
diff --git a/debbindiff/comparators/deb.py b/debbindiff/comparators/deb.py
index ad99d99..9a9be51 100644
--- a/debbindiff/comparators/deb.py
+++ b/debbindiff/comparators/deb.py
@@ -55,7 +55,7 @@ def compare_deb_files(path1, path2, source=None):
     # look up differences in file list and file metadata
     content1 = get_ar_content(path1)
     content2 = get_ar_content(path2)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      content1, content2, path1, path2, source="metadata")
     if difference:
         differences.append(difference)
diff --git a/debbindiff/comparators/directory.py b/debbindiff/comparators/directory.py
index fd0eb4e..96bc1f9 100644
--- a/debbindiff/comparators/directory.py
+++ b/debbindiff/comparators/directory.py
@@ -60,7 +60,7 @@ def compare_meta(path1, path2):
     try:
         stat1 = stat(path1)
         stat2 = stat(path2)
-        difference = Difference.from_content(
+        difference = Difference.from_unicode(
                          stat1, stat2, path1, path2, source="stat")
         if difference:
             differences.append(difference)
@@ -70,7 +70,7 @@ def compare_meta(path1, path2):
     try:
         lsattr1 = lsattr(path1)
         lsattr2 = lsattr(path2)
-        difference = Difference.from_content(
+        difference = Difference.from_unicode(
                          lsattr1, lsattr2, path1, path2, source="lattr")
         if difference:
             differences.append(difference)
@@ -80,7 +80,7 @@ def compare_meta(path1, path2):
     try:
         acl1 = getfacl(path1)
         acl2 = getfacl(path2)
-        difference = Difference.from_content(
+        difference = Difference.from_unicode(
                          acl1, acl2, path1, path2, source="getfacl")
         if difference:
             differences.append(difference)
@@ -110,7 +110,7 @@ def compare_directories(path1, path2, source=None):
         differences.extend(in_differences)
     ls1 = sorted(ls(path1))
     ls2 = sorted(ls(path2))
-    difference = Difference.from_content(ls1, ls2, path1, path2, source="ls")
+    difference = Difference.from_unicode(ls1, ls2, path1, path2, source="ls")
     if difference:
         differences.append(difference)
     differences.extend(compare_meta(path1, path2))
diff --git a/debbindiff/comparators/elf.py b/debbindiff/comparators/elf.py
index a90626f..66b2e49 100644
--- a/debbindiff/comparators/elf.py
+++ b/debbindiff/comparators/elf.py
@@ -58,20 +58,20 @@ def _compare_elf_data(path1, path2, source=None):
     differences = []
     all1 = readelf_all(path1)
     all2 = readelf_all(path2)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      all1, all2, path1, path2, source='readelf --all')
     if difference:
         differences.append(difference)
     debug_dump1 = readelf_debug_dump(path1)
     debug_dump2 = readelf_debug_dump(path2)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      debug_dump1, debug_dump2,
                      path1, path2, source='readelf --debug-dump')
     if difference:
         differences.append(difference)
     objdump1 = objdump_disassemble(path1)
     objdump2 = objdump_disassemble(path2)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      objdump1, objdump2,
                      path1, path2, source='objdump --disassemble --full-contents')
     if difference:
@@ -90,7 +90,7 @@ def compare_static_lib_files(path1, path2, source=None):
     # look up differences in metadata
     content1 = get_ar_content(path1)
     content2 = get_ar_content(path2)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      content1, content2, path1, path2, source="metadata")
     if difference:
         differences.append(difference)
diff --git a/debbindiff/comparators/fonts.py b/debbindiff/comparators/fonts.py
index d8ee439..1db81da 100644
--- a/debbindiff/comparators/fonts.py
+++ b/debbindiff/comparators/fonts.py
@@ -34,7 +34,7 @@ def show_ttf(path):
 def compare_ttf_files(path1, path2, source=None):
     ttf1 = show_ttf(path1)
     ttf2 = show_ttf(path2)
-    difference = Difference.from_content(ttf1, ttf2, path1, path2, source='showttf')
+    difference = Difference.from_unicode(ttf1, ttf2, path1, path2, source='showttf')
     if not difference:
         return []
     return [difference]
diff --git a/debbindiff/comparators/gettext.py b/debbindiff/comparators/gettext.py
index ad8b154..54c8da0 100644
--- a/debbindiff/comparators/gettext.py
+++ b/debbindiff/comparators/gettext.py
@@ -41,7 +41,7 @@ def msgunfmt(path):
 def compare_mo_files(path1, path2, source=None):
     mo1 = msgunfmt(path1)
     mo2 = msgunfmt(path2)
-    difference = Difference.from_content(mo1, mo2, path1, path2, source='msgunfmt')
+    difference = Difference.from_unicode(mo1, mo2, path1, path2, source='msgunfmt')
     if not difference:
         return []
     return [difference]
diff --git a/debbindiff/comparators/gzip.py b/debbindiff/comparators/gzip.py
index 227e898..80f0521 100644
--- a/debbindiff/comparators/gzip.py
+++ b/debbindiff/comparators/gzip.py
@@ -52,7 +52,7 @@ def compare_gzip_files(path1, path2, source=None):
     # check metadata
     metadata1 = get_gzip_metadata(path1)
     metadata2 = get_gzip_metadata(path2)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      metadata1, metadata2, path1, path2, source='metadata')
     if difference:
         differences.append(difference)
diff --git a/debbindiff/comparators/haskell.py b/debbindiff/comparators/haskell.py
index 7064b4e..abb0d0e 100644
--- a/debbindiff/comparators/haskell.py
+++ b/debbindiff/comparators/haskell.py
@@ -32,7 +32,7 @@ def show_iface(path):
 def compare_hi_files(path1, path2, source=None):
     iface1 = show_iface(path1)
     iface2 = show_iface(path2)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      iface1, iface2, path1, path2, source='ghc --show-iface')
     if not difference:
         return []
diff --git a/debbindiff/comparators/pdf.py b/debbindiff/comparators/pdf.py
index b347f33..32d82b4 100644
--- a/debbindiff/comparators/pdf.py
+++ b/debbindiff/comparators/pdf.py
@@ -44,13 +44,13 @@ def compare_pdf_files(path1, path2, source=None):
     src = get_source(path1, path2) or 'FILE'
     text1 = pdftotext(path1)
     text2 = pdftotext(path2)
-    difference = Difference.from_content(text1, text2, path1, path2,
+    difference = Difference.from_unicode(text1, text2, path1, path2,
                                          source="pdftotext %s" % src)
     if difference:
         differences.append(difference)
     uncompressed1 = uncompress(path1)
     uncompressed2 = uncompress(path2)
-    difference = Difference.from_content(uncompressed1, uncompressed2, path1, path2,
+    difference = Difference.from_unicode(uncompressed1, uncompressed2, path1, path2,
                                          source="pdftk %s output - uncompress" % src)
     if difference:
         differences.append(difference)
diff --git a/debbindiff/comparators/png.py b/debbindiff/comparators/png.py
index 582a137..3969b5a 100644
--- a/debbindiff/comparators/png.py
+++ b/debbindiff/comparators/png.py
@@ -38,7 +38,7 @@ def sng(path):
 def compare_png_files(path1, path2, source=None):
     sng1 = sng(path1)
     sng2 = sng(path2)
-    difference = Difference.from_content(sng1, sng2, path1, path2, source='sng')
+    difference = Difference.from_unicode(sng1, sng2, path1, path2, source='sng')
     if not difference:
         return []
     return [difference]
diff --git a/debbindiff/comparators/rpm.py b/debbindiff/comparators/rpm.py
index 8c3e075..7018b27 100644
--- a/debbindiff/comparators/rpm.py
+++ b/debbindiff/comparators/rpm.py
@@ -80,7 +80,7 @@ def compare_rpm_files(path1, path2, source=None):
         ts.setVSFlags(-1)
         header1 = get_rpm_header(path1, ts)
         header2 = get_rpm_header(path2, ts)
-        difference = Difference.from_content(
+        difference = Difference.from_unicode(
                          header1, header2, path1, path2, source="header")
         if difference:
             differences.append(difference)
diff --git a/debbindiff/comparators/squashfs.py b/debbindiff/comparators/squashfs.py
index 8f17123..f383009 100644
--- a/debbindiff/comparators/squashfs.py
+++ b/debbindiff/comparators/squashfs.py
@@ -56,7 +56,7 @@ def compare_squashfs_files(path1, path2, source=None):
     # compare metadata
     content1 = get_squashfs_content(path1)
     content2 = get_squashfs_content(path2)
-    difference = Difference.from_content(
+    difference = Difference.from_unicode(
                      content1, content2, path1, path2, source="metadata")
     if difference:
         differences.append(difference)
diff --git a/debbindiff/comparators/tar.py b/debbindiff/comparators/tar.py
index dfc1384..cb2a736 100644
--- a/debbindiff/comparators/tar.py
+++ b/debbindiff/comparators/tar.py
@@ -68,7 +68,7 @@ def compare_tar_files(path1, path2, source=None):
             # look up differences in file list and file metadata
             content1 = get_tar_content(tar1).decode('utf-8')
             content2 = get_tar_content(tar2).decode('utf-8')
-            difference = Difference.from_content(
+            difference = Difference.from_unicode(
                              content1, content2, path1, path2, source="metadata")
             if difference:
                 differences.append(difference)
diff --git a/debbindiff/comparators/zip.py b/debbindiff/comparators/zip.py
index 3170a19..c17c00a 100644
--- a/debbindiff/comparators/zip.py
+++ b/debbindiff/comparators/zip.py
@@ -70,7 +70,7 @@ def compare_zip_files(path1, path2, source=None):
                 # search harder
                 zipinfo1 = get_zipinfo(path1, verbose=True)
                 zipinfo2 = get_zipinfo(path2, verbose=True)
-            difference = Difference.from_content(
+            difference = Difference.from_unicode(
                              zipinfo1, zipinfo2, path1, path2, source="metadata")
             if difference:
                 differences.append(difference)
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
index 06a5cb4..9ee4cb5 100644
--- a/debbindiff/difference.py
+++ b/debbindiff/difference.py
@@ -24,6 +24,8 @@ from functools import partial
 from tempfile import NamedTemporaryFile
 import re
 import subprocess
+import sys
+import traceback
 from threading import Thread
 from multiprocessing import Queue
 from debbindiff import logger, tool_required, RequiredToolNotFound
@@ -164,6 +166,37 @@ def run_diff(fd1, fd2, end_nl_q1, end_nl_q2):
     return parser.diff
 
 
+# inspired by https://stackoverflow.com/a/6874161
+class ExThread(Thread):
+    def __init__(self, *args, **kwargs):
+        super(ExThread, self).__init__(*args, **kwargs)
+        self.__status_queue = Queue()
+
+    def run(self, *args, **kwargs):
+        try:
+            super(ExThread, self).run(*args, **kwargs)
+        except Exception:
+            except_type, except_class, tb = sys.exc_info()
+            self.__status_queue.put((except_type, except_class, traceback.extract_tb(tb)))
+        self.__status_queue.put(None)
+
+    def wait_for_exc_info(self):
+        return self.__status_queue.get()
+
+    def join(self):
+        ex_info = self.wait_for_exc_info()
+        if ex_info is None:
+            return
+        else:
+            except_type, except_class, tb = ex_info
+            logger.debug('Exception: %s' %
+                         traceback.format_exception_only(except_type, except_class)[0].strip())
+            logger.debug('Traceback:')
+            for line in traceback.format_list(tb):
+                logger.debug(line[:-1])
+            raise except_type, except_class, None
+
+
 def feed(feeder, f, end_nl_q):
     # work-around unified diff limitation: if there's no newlines in both
     # don't make it a difference
@@ -178,15 +211,17 @@ def feed(feeder, f, end_nl_q):
 def fd_from_feeder(feeder, end_nl_q):
     pipe_r, pipe_w = os.pipe()
     outf = os.fdopen(pipe_w, 'w')
-    t = Thread(target=feed, args=(feeder, outf, end_nl_q))
+    t = ExThread(target=feed, args=(feeder, outf, end_nl_q))
     t.daemon = True
     t.start()
     yield pipe_r
-    t.join()
-    outf.close()
+    try:
+        t.join()
+    finally:
+        outf.close()
 
 
-def make_feeder_from_content(content):
+def make_feeder_from_unicode(content):
     def feeder(f):
         for offset in range(0, len(content), DIFF_CHUNK):
             f.write(content[offset:offset + DIFF_CHUNK].encode('utf-8'))
@@ -194,11 +229,19 @@ def make_feeder_from_content(content):
     return feeder
 
 
-def diff(content1, content2):
+def make_feeder_from_file(in_file, filter=lambda buf: buf.encode('utf-8')):
+    def feeder(out_file):
+        end_nl = False
+        for buf in iter(in_file.readline, b''):
+            out_file.write(filter(buf))
+            end_nl = buf[-1] == '\n'
+        return end_nl
+    return feeder
+
+
+def diff(feeder1, feeder2):
     end_nl_q1 = Queue()
     end_nl_q2 = Queue()
-    feeder1 = make_feeder_from_content(content1)
-    feeder2 = make_feeder_from_content(content2)
     with fd_from_feeder(feeder1, end_nl_q1) as fd1:
         with fd_from_feeder(feeder2, end_nl_q2) as fd2:
             return run_diff(fd1, fd2, end_nl_q1, end_nl_q2)
@@ -222,16 +265,12 @@ class Difference(object):
         self._details = []
 
     @staticmethod
-    def from_content(content1, content2, path1, path2, source=None,
-                     comment=None):
+    def from_feeder(feeder1, feeder2, path1, path2, source=None,
+                    comment=None):
         actual_comment = comment
-        if content1 and type(content1) is not unicode:
-            raise UnicodeError('content1 has not been decoded')
-        if content2 and type(content2) is not unicode:
-            raise UnicodeError('content2 has not been decoded')
         unified_diff = None
         try:
-            unified_diff = diff(content1, content2)
+            unified_diff = diff(feeder1, feeder2)
         except RequiredToolNotFound:
             actual_comment = 'diff is not available!'
             if comment:
@@ -240,6 +279,18 @@ class Difference(object):
             return None
         return Difference(unified_diff, path1, path2, source, actual_comment)
 
+    @staticmethod
+    def from_unicode(content1, content2, *args, **kwargs):
+        return Difference.from_feeder(make_feeder_from_unicode(content1),
+                                      make_feeder_from_unicode(content2),
+                                      *args, **kwargs)
+
+    @staticmethod
+    def from_file(file1, file2, *args, **kwargs):
+        return Difference.from_feeder(make_feeder_from_file(file1),
+                                      make_feeder_from_file(file2),
+                                      *args, **kwargs)
+
     @property
     def comment(self):
         return self._comment

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list