[Reproducible-commits] [debbindiff] 04/19: Add the ability to feed diff from file objects
Jérémy Bobbio
lunar at moszumanska.debian.org
Tue Mar 31 14:59:28 UTC 2015
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch pu/feed-diff
in repository debbindiff.
commit bc232ba9e38b93d60098a278d67671f7153e1961
Author: Jérémy Bobbio <lunar at debian.org>
Date: Mon Mar 30 06:19:17 2015 +0200
Add the ability to feed diff from file objects
This paves the way for huge memory savings.
Convert xxd as an example.
---
debbindiff/comparators/binary.py | 14 ++++---
debbindiff/comparators/changes.py | 4 +-
debbindiff/comparators/cpio.py | 2 +-
debbindiff/comparators/deb.py | 2 +-
debbindiff/comparators/directory.py | 8 ++--
debbindiff/comparators/elf.py | 8 ++--
debbindiff/comparators/fonts.py | 2 +-
debbindiff/comparators/gettext.py | 2 +-
debbindiff/comparators/gzip.py | 2 +-
debbindiff/comparators/haskell.py | 2 +-
debbindiff/comparators/pdf.py | 4 +-
debbindiff/comparators/png.py | 2 +-
debbindiff/comparators/rpm.py | 2 +-
debbindiff/comparators/squashfs.py | 2 +-
debbindiff/comparators/tar.py | 2 +-
debbindiff/comparators/zip.py | 2 +-
debbindiff/difference.py | 79 ++++++++++++++++++++++++++++++-------
17 files changed, 97 insertions(+), 42 deletions(-)
diff --git a/debbindiff/comparators/binary.py b/debbindiff/comparators/binary.py
index 5256972..7a606fc 100644
--- a/debbindiff/comparators/binary.py
+++ b/debbindiff/comparators/binary.py
@@ -18,14 +18,18 @@
# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
from binascii import hexlify
+from contextlib import contextmanager
import subprocess
from debbindiff.difference import Difference
from debbindiff import tool_required, RequiredToolNotFound
+ at contextmanager
@tool_required('xxd')
def xxd(path):
- return subprocess.check_output(['xxd', path], shell=False).decode('ascii')
+ p = subprocess.Popen(['xxd', path], shell=False, stdout=subprocess.PIPE)
+ yield p.stdout
+ p.wait()
def hexdump_fallback(path):
@@ -38,14 +42,14 @@ def hexdump_fallback(path):
def compare_binary_files(path1, path2, source=None):
try:
- hexdump1 = xxd(path1)
- hexdump2 = xxd(path2)
- comment = None
+ with xxd(path1) as xxd1:
+ with xxd(path2) as xxd2:
+ difference = Difference.from_file(xxd1, xxd2, path1, path2, source)
except RequiredToolNotFound:
hexdump1 = hexdump_fallback(path1)
hexdump2 = hexdump_fallback(path2)
comment = 'xxd not available in path. Falling back to Python hexlify.\n'
- difference = Difference.from_content(hexdump1, hexdump2, path1, path2, source, comment)
+ difference = Difference.from_unicode(hexdump1, hexdump2, path1, path2, source, comment)
if not difference:
return []
return [difference]
diff --git a/debbindiff/comparators/changes.py b/debbindiff/comparators/changes.py
index 321cc12..ac13345 100644
--- a/debbindiff/comparators/changes.py
+++ b/debbindiff/comparators/changes.py
@@ -47,7 +47,7 @@ def compare_changes_files(path1, path2, source=None):
if dot_changes1[field] != dot_changes2[field]:
content1 = "%s: %s" % (field, dot_changes1[field])
content2 = "%s: %s" % (field, dot_changes2[field])
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
content1, content2,
dot_changes1.get_changes_file(),
dot_changes2.get_changes_file(),
@@ -61,7 +61,7 @@ def compare_changes_files(path1, path2, source=None):
files2 = dot_changes2.get('Files')
logger.debug(dot_changes1.get_as_string('Files'))
- files_difference = Difference.from_content(
+ files_difference = Difference.from_unicode(
dot_changes1.get_as_string('Files'),
dot_changes2.get_as_string('Files'),
dot_changes1.get_changes_file(),
diff --git a/debbindiff/comparators/cpio.py b/debbindiff/comparators/cpio.py
index 9455534..0cacd1d 100644
--- a/debbindiff/comparators/cpio.py
+++ b/debbindiff/comparators/cpio.py
@@ -52,7 +52,7 @@ def compare_cpio_files(path1, path2, source=None):
# compare metadata
content1 = get_cpio_content(path1, verbose=True)
content2 = get_cpio_content(path2, verbose=True)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
diff --git a/debbindiff/comparators/deb.py b/debbindiff/comparators/deb.py
index ad99d99..9a9be51 100644
--- a/debbindiff/comparators/deb.py
+++ b/debbindiff/comparators/deb.py
@@ -55,7 +55,7 @@ def compare_deb_files(path1, path2, source=None):
# look up differences in file list and file metadata
content1 = get_ar_content(path1)
content2 = get_ar_content(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
diff --git a/debbindiff/comparators/directory.py b/debbindiff/comparators/directory.py
index fd0eb4e..96bc1f9 100644
--- a/debbindiff/comparators/directory.py
+++ b/debbindiff/comparators/directory.py
@@ -60,7 +60,7 @@ def compare_meta(path1, path2):
try:
stat1 = stat(path1)
stat2 = stat(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
stat1, stat2, path1, path2, source="stat")
if difference:
differences.append(difference)
@@ -70,7 +70,7 @@ def compare_meta(path1, path2):
try:
lsattr1 = lsattr(path1)
lsattr2 = lsattr(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
lsattr1, lsattr2, path1, path2, source="lattr")
if difference:
differences.append(difference)
@@ -80,7 +80,7 @@ def compare_meta(path1, path2):
try:
acl1 = getfacl(path1)
acl2 = getfacl(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
acl1, acl2, path1, path2, source="getfacl")
if difference:
differences.append(difference)
@@ -110,7 +110,7 @@ def compare_directories(path1, path2, source=None):
differences.extend(in_differences)
ls1 = sorted(ls(path1))
ls2 = sorted(ls(path2))
- difference = Difference.from_content(ls1, ls2, path1, path2, source="ls")
+ difference = Difference.from_unicode(ls1, ls2, path1, path2, source="ls")
if difference:
differences.append(difference)
differences.extend(compare_meta(path1, path2))
diff --git a/debbindiff/comparators/elf.py b/debbindiff/comparators/elf.py
index a90626f..66b2e49 100644
--- a/debbindiff/comparators/elf.py
+++ b/debbindiff/comparators/elf.py
@@ -58,20 +58,20 @@ def _compare_elf_data(path1, path2, source=None):
differences = []
all1 = readelf_all(path1)
all2 = readelf_all(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
all1, all2, path1, path2, source='readelf --all')
if difference:
differences.append(difference)
debug_dump1 = readelf_debug_dump(path1)
debug_dump2 = readelf_debug_dump(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
debug_dump1, debug_dump2,
path1, path2, source='readelf --debug-dump')
if difference:
differences.append(difference)
objdump1 = objdump_disassemble(path1)
objdump2 = objdump_disassemble(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
objdump1, objdump2,
path1, path2, source='objdump --disassemble --full-contents')
if difference:
@@ -90,7 +90,7 @@ def compare_static_lib_files(path1, path2, source=None):
# look up differences in metadata
content1 = get_ar_content(path1)
content2 = get_ar_content(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
diff --git a/debbindiff/comparators/fonts.py b/debbindiff/comparators/fonts.py
index d8ee439..1db81da 100644
--- a/debbindiff/comparators/fonts.py
+++ b/debbindiff/comparators/fonts.py
@@ -34,7 +34,7 @@ def show_ttf(path):
def compare_ttf_files(path1, path2, source=None):
ttf1 = show_ttf(path1)
ttf2 = show_ttf(path2)
- difference = Difference.from_content(ttf1, ttf2, path1, path2, source='showttf')
+ difference = Difference.from_unicode(ttf1, ttf2, path1, path2, source='showttf')
if not difference:
return []
return [difference]
diff --git a/debbindiff/comparators/gettext.py b/debbindiff/comparators/gettext.py
index ad8b154..54c8da0 100644
--- a/debbindiff/comparators/gettext.py
+++ b/debbindiff/comparators/gettext.py
@@ -41,7 +41,7 @@ def msgunfmt(path):
def compare_mo_files(path1, path2, source=None):
mo1 = msgunfmt(path1)
mo2 = msgunfmt(path2)
- difference = Difference.from_content(mo1, mo2, path1, path2, source='msgunfmt')
+ difference = Difference.from_unicode(mo1, mo2, path1, path2, source='msgunfmt')
if not difference:
return []
return [difference]
diff --git a/debbindiff/comparators/gzip.py b/debbindiff/comparators/gzip.py
index 227e898..80f0521 100644
--- a/debbindiff/comparators/gzip.py
+++ b/debbindiff/comparators/gzip.py
@@ -52,7 +52,7 @@ def compare_gzip_files(path1, path2, source=None):
# check metadata
metadata1 = get_gzip_metadata(path1)
metadata2 = get_gzip_metadata(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
metadata1, metadata2, path1, path2, source='metadata')
if difference:
differences.append(difference)
diff --git a/debbindiff/comparators/haskell.py b/debbindiff/comparators/haskell.py
index 7064b4e..abb0d0e 100644
--- a/debbindiff/comparators/haskell.py
+++ b/debbindiff/comparators/haskell.py
@@ -32,7 +32,7 @@ def show_iface(path):
def compare_hi_files(path1, path2, source=None):
iface1 = show_iface(path1)
iface2 = show_iface(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
iface1, iface2, path1, path2, source='ghc --show-iface')
if not difference:
return []
diff --git a/debbindiff/comparators/pdf.py b/debbindiff/comparators/pdf.py
index b347f33..32d82b4 100644
--- a/debbindiff/comparators/pdf.py
+++ b/debbindiff/comparators/pdf.py
@@ -44,13 +44,13 @@ def compare_pdf_files(path1, path2, source=None):
src = get_source(path1, path2) or 'FILE'
text1 = pdftotext(path1)
text2 = pdftotext(path2)
- difference = Difference.from_content(text1, text2, path1, path2,
+ difference = Difference.from_unicode(text1, text2, path1, path2,
source="pdftotext %s" % src)
if difference:
differences.append(difference)
uncompressed1 = uncompress(path1)
uncompressed2 = uncompress(path2)
- difference = Difference.from_content(uncompressed1, uncompressed2, path1, path2,
+ difference = Difference.from_unicode(uncompressed1, uncompressed2, path1, path2,
source="pdftk %s output - uncompress" % src)
if difference:
differences.append(difference)
diff --git a/debbindiff/comparators/png.py b/debbindiff/comparators/png.py
index 582a137..3969b5a 100644
--- a/debbindiff/comparators/png.py
+++ b/debbindiff/comparators/png.py
@@ -38,7 +38,7 @@ def sng(path):
def compare_png_files(path1, path2, source=None):
sng1 = sng(path1)
sng2 = sng(path2)
- difference = Difference.from_content(sng1, sng2, path1, path2, source='sng')
+ difference = Difference.from_unicode(sng1, sng2, path1, path2, source='sng')
if not difference:
return []
return [difference]
diff --git a/debbindiff/comparators/rpm.py b/debbindiff/comparators/rpm.py
index 8c3e075..7018b27 100644
--- a/debbindiff/comparators/rpm.py
+++ b/debbindiff/comparators/rpm.py
@@ -80,7 +80,7 @@ def compare_rpm_files(path1, path2, source=None):
ts.setVSFlags(-1)
header1 = get_rpm_header(path1, ts)
header2 = get_rpm_header(path2, ts)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
header1, header2, path1, path2, source="header")
if difference:
differences.append(difference)
diff --git a/debbindiff/comparators/squashfs.py b/debbindiff/comparators/squashfs.py
index 8f17123..f383009 100644
--- a/debbindiff/comparators/squashfs.py
+++ b/debbindiff/comparators/squashfs.py
@@ -56,7 +56,7 @@ def compare_squashfs_files(path1, path2, source=None):
# compare metadata
content1 = get_squashfs_content(path1)
content2 = get_squashfs_content(path2)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
diff --git a/debbindiff/comparators/tar.py b/debbindiff/comparators/tar.py
index dfc1384..cb2a736 100644
--- a/debbindiff/comparators/tar.py
+++ b/debbindiff/comparators/tar.py
@@ -68,7 +68,7 @@ def compare_tar_files(path1, path2, source=None):
# look up differences in file list and file metadata
content1 = get_tar_content(tar1).decode('utf-8')
content2 = get_tar_content(tar2).decode('utf-8')
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
content1, content2, path1, path2, source="metadata")
if difference:
differences.append(difference)
diff --git a/debbindiff/comparators/zip.py b/debbindiff/comparators/zip.py
index 3170a19..c17c00a 100644
--- a/debbindiff/comparators/zip.py
+++ b/debbindiff/comparators/zip.py
@@ -70,7 +70,7 @@ def compare_zip_files(path1, path2, source=None):
# search harder
zipinfo1 = get_zipinfo(path1, verbose=True)
zipinfo2 = get_zipinfo(path2, verbose=True)
- difference = Difference.from_content(
+ difference = Difference.from_unicode(
zipinfo1, zipinfo2, path1, path2, source="metadata")
if difference:
differences.append(difference)
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
index 06a5cb4..9ee4cb5 100644
--- a/debbindiff/difference.py
+++ b/debbindiff/difference.py
@@ -24,6 +24,8 @@ from functools import partial
from tempfile import NamedTemporaryFile
import re
import subprocess
+import sys
+import traceback
from threading import Thread
from multiprocessing import Queue
from debbindiff import logger, tool_required, RequiredToolNotFound
@@ -164,6 +166,37 @@ def run_diff(fd1, fd2, end_nl_q1, end_nl_q2):
return parser.diff
+# inspired by https://stackoverflow.com/a/6874161
+class ExThread(Thread):
+ def __init__(self, *args, **kwargs):
+ super(ExThread, self).__init__(*args, **kwargs)
+ self.__status_queue = Queue()
+
+ def run(self, *args, **kwargs):
+ try:
+ super(ExThread, self).run(*args, **kwargs)
+ except Exception:
+ except_type, except_class, tb = sys.exc_info()
+ self.__status_queue.put((except_type, except_class, traceback.extract_tb(tb)))
+ self.__status_queue.put(None)
+
+ def wait_for_exc_info(self):
+ return self.__status_queue.get()
+
+ def join(self):
+ ex_info = self.wait_for_exc_info()
+ if ex_info is None:
+ return
+ else:
+ except_type, except_class, tb = ex_info
+ logger.debug('Exception: %s' %
+ traceback.format_exception_only(except_type, except_class)[0].strip())
+ logger.debug('Traceback:')
+ for line in traceback.format_list(tb):
+ logger.debug(line[:-1])
+ raise except_type, except_class, None
+
+
def feed(feeder, f, end_nl_q):
# work-around unified diff limitation: if there's no newlines in both
# don't make it a difference
@@ -178,15 +211,17 @@ def feed(feeder, f, end_nl_q):
def fd_from_feeder(feeder, end_nl_q):
pipe_r, pipe_w = os.pipe()
outf = os.fdopen(pipe_w, 'w')
- t = Thread(target=feed, args=(feeder, outf, end_nl_q))
+ t = ExThread(target=feed, args=(feeder, outf, end_nl_q))
t.daemon = True
t.start()
yield pipe_r
- t.join()
- outf.close()
+ try:
+ t.join()
+ finally:
+ outf.close()
-def make_feeder_from_content(content):
+def make_feeder_from_unicode(content):
def feeder(f):
for offset in range(0, len(content), DIFF_CHUNK):
f.write(content[offset:offset + DIFF_CHUNK].encode('utf-8'))
@@ -194,11 +229,19 @@ def make_feeder_from_content(content):
return feeder
-def diff(content1, content2):
+def make_feeder_from_file(in_file, filter=lambda buf: buf.encode('utf-8')):
+ def feeder(out_file):
+ end_nl = False
+ for buf in iter(in_file.readline, b''):
+ out_file.write(filter(buf))
+ end_nl = buf[-1] == '\n'
+ return end_nl
+ return feeder
+
+
+def diff(feeder1, feeder2):
end_nl_q1 = Queue()
end_nl_q2 = Queue()
- feeder1 = make_feeder_from_content(content1)
- feeder2 = make_feeder_from_content(content2)
with fd_from_feeder(feeder1, end_nl_q1) as fd1:
with fd_from_feeder(feeder2, end_nl_q2) as fd2:
return run_diff(fd1, fd2, end_nl_q1, end_nl_q2)
@@ -222,16 +265,12 @@ class Difference(object):
self._details = []
@staticmethod
- def from_content(content1, content2, path1, path2, source=None,
- comment=None):
+ def from_feeder(feeder1, feeder2, path1, path2, source=None,
+ comment=None):
actual_comment = comment
- if content1 and type(content1) is not unicode:
- raise UnicodeError('content1 has not been decoded')
- if content2 and type(content2) is not unicode:
- raise UnicodeError('content2 has not been decoded')
unified_diff = None
try:
- unified_diff = diff(content1, content2)
+ unified_diff = diff(feeder1, feeder2)
except RequiredToolNotFound:
actual_comment = 'diff is not available!'
if comment:
@@ -240,6 +279,18 @@ class Difference(object):
return None
return Difference(unified_diff, path1, path2, source, actual_comment)
+ @staticmethod
+ def from_unicode(content1, content2, *args, **kwargs):
+ return Difference.from_feeder(make_feeder_from_unicode(content1),
+ make_feeder_from_unicode(content2),
+ *args, **kwargs)
+
+ @staticmethod
+ def from_file(file1, file2, *args, **kwargs):
+ return Difference.from_feeder(make_feeder_from_file(file1),
+ make_feeder_from_file(file2),
+ *args, **kwargs)
+
@property
def comment(self):
return self._comment
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git
More information about the Reproducible-commits
mailing list