[Reproducible-commits] [debbindiff] 01/02: Add support for comparing xz files

Jérémy Bobbio lunar at moszumanska.debian.org
Mon Sep 29 13:55:06 UTC 2014


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository debbindiff.

commit 89131e23be256457307ccf09ed622555219e4a5e
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Sun Sep 28 18:56:51 2014 +0200

    Add support for comparing xz files
---
 debbindiff.py | 155 +++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 116 insertions(+), 39 deletions(-)

diff --git a/debbindiff.py b/debbindiff.py
index 3e28061..23dfe1c 100755
--- a/debbindiff.py
+++ b/debbindiff.py
@@ -25,7 +25,12 @@ import difflib
 import os.path
 import re
 import magic
+import hashlib
 import codecs
+import tempfile
+import shutil
+import subprocess
+from contextlib import contextmanager
 from debbindiff.changes import Changes
 from debbindiff.pyxxd import hexdump
 from debbindiff import logger
@@ -39,37 +44,41 @@ class Difference(object):
         else:
             self._source1 = path1
             self._source2 = path2
-        self._in_sources1 = []
-        self._in_sources2 = []
         self._lines1 = lines1
         self._lines2 = lines2
         self._comment = comment
         self._details = []
 
-    def get_diff(self):
+    @property
+    def comment(self):
+        return self._comment
+
+    @comment.setter
+    def set_comment(self, comment):
+        self._comment = comment
+
+    def get_diff(self, in_sources1=[], in_sources2=[]):
         if self._comment:
             yield '\n'
             for line in self._comment.split('\n'):
                 yield line
             yield '\n\n'
-        sources1 = self._in_sources1 + [self._source1]
-        sources2 = self._in_sources2 + [self._source2]
-        fromfile1 = " → ".join(sources1)
-        fromfile2 = " → ".join(sources2)
-        for line in difflib.unified_diff(self._lines1, self._lines2,
-                                         fromfile=fromfile1,
-                                         tofile=fromfile2,  n=0):
-            if not line.endswith('\n'):
-                line += '\n'
-            yield line
+        sources1 = in_sources1 + [self._source1]
+        sources2 = in_sources2 + [self._source2]
+        if self._lines1 is not None and self._lines2 is not None:
+            fromfile1 = " -> ".join(sources1)
+            fromfile2 = " -> ".join(sources2)
+            for line in difflib.unified_diff(self._lines1, self._lines2,
+                                             fromfile=fromfile1,
+                                             tofile=fromfile2,  n=0):
+                if not line.endswith('\n'):
+                    line += '\n'
+                yield line
         for detail in self._details:
-            for line in detail.get_diff():
+            for line in detail.get_diff(sources1, sources2):
                 yield line
 
     def add_details(self, differences):
-        for difference in differences:
-            difference._in_sources1 = self._in_sources1 + self._source1
-            difference._in_sources2 = self._in_sources2 + self._source2
         self._details.extend(differences)
 
 DOT_CHANGES_FIELDS = [
@@ -79,7 +88,7 @@ DOT_CHANGES_FIELDS = [
         "Changes"
     ]
 
-def compare_changes_files(path1, path2):
+def compare_changes_files(path1, path2, source=None):
     try:
         dot_changes1 = Changes(filename=path1)
         dot_changes1.validate(check_signature=False)
@@ -96,20 +105,24 @@ def compare_changes_files(path1, path2):
                 ["%s: %s" % (field, dot_changes1[field])],
                 ["%s: %s" % (field, dot_changes2[field])],
                 dot_changes1.get_changes_file(),
-                dot_changes2.get_changes_file()))
+                dot_changes2.get_changes_file(),
+                source=source))
 
     # This will handle differences in the list of files, checksums, priority
     # and section
     files1 = dot_changes1.get('Files')
     files2 = dot_changes2.get('Files')
     logger.debug(dot_changes1.get_as_string('Files'))
-    if files1 != files2:
-        differences.append(Difference(
-            dot_changes1.get_as_string('Files').splitlines(1),
-            dot_changes2.get_as_string('Files').splitlines(1),
-            dot_changes1.get_changes_file(),
-            dot_changes2.get_changes_file(),
-            comment="List of files does not match"))
+    if files1 == files2:
+        return differences
+
+    files_difference = Difference(
+        dot_changes1.get_as_string('Files').splitlines(1),
+        dot_changes2.get_as_string('Files').splitlines(1),
+        dot_changes1.get_changes_file(),
+        dot_changes2.get_changes_file(),
+        source=source,
+        comment="List of files does not match")
 
     files1 = dict([(d['name'], d) for d in files1])
     files2 = dict([(d['name'], d) for d in files2])
@@ -119,13 +132,13 @@ def compare_changes_files(path1, path2):
         d2 = files2[filename]
         if d1['md5sum'] != d2['md5sum']:
             logger.debug("%s mentioned in .changes have differences" % filename)
-            differences += compare_files(dot_changes1.get_path(filename),
-                                         dot_changes2.get_path(filename))
-    return differences
+            files_difference.add_details(compare_files(dot_changes1.get_path(filename),
+                                                       dot_changes2.get_path(filename),
+                                                       source=get_source(dot_changes1.get_path(filename),
+                                                                         dot_changes2.get_path(filename))))
 
-COMPARATORS = [
-        (None, r'\.changes$', compare_changes_files),
-    ]
+    differences.append(files_difference)
+    return differences
 
 def guess_mime_type(path):
     if not hasattr(guess_mime_type, 'mimedb'):
@@ -133,6 +146,65 @@ def guess_mime_type(path):
         guess_mime_type.mimedb.load()
     return guess_mime_type.mimedb.file(path)
 
+
+def are_same_binaries(path1, path2):
+    BUF_SIZE = 20 * 2 ** 10 # 20 kB
+    h1 = hashlib.md5()
+    f1 = open(path1, 'rb')
+    h2 = hashlib.md5()
+    f2 = open(path2, 'rb')
+    while True:
+        buf1 = f1.read(BUF_SIZE)
+        buf2 = f2.read(BUF_SIZE)
+        if not buf1 or not buf2:
+            return False
+        h1.update(buf1)
+        h2.update(buf2)
+        if h1.digest() != h2.digest():
+            return False
+    return True
+
+def get_source(path1, path2):
+    if os.path.basename(path1) == os.path.basename(path2):
+        return os.path.basename(path1)
+    return None
+
+ at contextmanager
+def make_temp_directory():
+    temp_dir = tempfile.mkdtemp(suffix='debbindiff')
+    yield temp_dir
+    shutil.rmtree(temp_dir)
+
+ at contextmanager
+def decompress_xz(path):
+    with make_temp_directory() as temp_dir:
+        if path.endswith('.xz'):
+            temp_path = os.path.join(temp_dir, os.path.basename(path[:-3]))
+        else:
+            temp_path = os.path.join(temp_dir, "%s-content" % path)
+        with open(temp_path, 'wb') as temp_file:
+            subprocess.check_call(
+                ["xz", "--decompress", "--stdout", path],
+                shell=False, stdout=temp_file, stderr=None)
+            yield temp_path
+
+def compare_xz_files(path1, path2, source=None):
+    if are_same_binaries(path1, path2):
+        return []
+
+    with decompress_xz(path1) as new_path1:
+        with decompress_xz(path2) as new_path2:
+            inside_differences = compare_files(new_path1, new_path2, source=get_source(new_path1, new_path2))
+
+    # no differences detected inside? let's at least do a binary diff
+    if len(inside_differences) == 0:
+        difference = compare_binary_files(path1, path2)[0]
+        difference.comment = "No differences found inside, yet compressed data differs"
+    else:
+        difference = Difference(None, None, path1, path2, source=get_source(path1, path2))
+        difference.add_details(inside_differences)
+    return [difference]
+
 def compare_text_files(path1, path2, encoding, source=None):
     lines1 = codecs.open(path1, 'r', encoding=encoding).readlines()
     lines2 = codecs.open(path2, 'r', encoding=encoding).readlines()
@@ -147,7 +219,12 @@ def compare_binary_files(path1, path2, source=None):
         return []
     return [Difference(hexdump1.splitlines(1), hexdump2.splitlines(1), path1, path2, source)]
 
-def compare_unknown(path1, path2):
+COMPARATORS = [
+        (None,                      r'\.changes$', compare_changes_files),
+        (r'^application/x-xz(;|$)', r'\.xz$',      compare_xz_files)
+    ]
+
+def compare_unknown(path1, path2, source=None):
     logger.debug("compare unknown path: %s and %s" % (path1, path2))
     mime_type1 = guess_mime_type(path1)
     mime_type2 = guess_mime_type(path2)
@@ -159,10 +236,10 @@ def compare_unknown(path1, path2):
             encoding = encodings1[0]
         else:
             encoding = None
-        return compare_text_files(path1, path2, encoding)
-    return compare_binary_files(path1, path2)
+        return compare_text_files(path1, path2, encoding, source)
+    return compare_binary_files(path1, path2, source)
 
-def compare_files(path1, path2):
+def compare_files(path1, path2, source=None):
     if not os.path.isfile(path1):
         logger.critical("%s is not a file" % path1)
         sys.exit(2)
@@ -174,10 +251,10 @@ def compare_files(path1, path2):
             mime_type1 = guess_mime_type(path1)
             mime_type2 = guess_mime_type(path2)
             if re.search(mime_type_regex, mime_type1) and re.search(mime_type_regex, mime_type2):
-                return comparator(path1, path2)
+                return comparator(path1, path2, source)
         if filename_regex and re.search(filename_regex, path1) and re.search(filename_regex, path2):
-            return comparator(path1, path2)
-    return compare_unknown(path1, path2)
+            return comparator(path1, path2, source)
+    return compare_unknown(path1, path2, source)
 
 def main():
     if len(sys.argv) != 3:

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list