[Reproducible-commits] [debbindiff] 01/10: Split everything in little modules

Jérémy Bobbio lunar at moszumanska.debian.org
Mon Sep 29 18:29:23 UTC 2014


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository debbindiff.

commit dcd90da6627e09b0d93660ab3957ee67a1b1ae35
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Mon Sep 29 16:31:04 2014 +0200

    Split everything in little modules
---
 .gitignore                         |   2 +
 debbindiff.py                      | 292 +------------------------------------
 debbindiff/comparators/__init__.py |  73 ++++++++++
 debbindiff/comparators/binary.py   |  30 ++++
 debbindiff/comparators/changes.py  |  83 +++++++++++
 debbindiff/comparators/tar.py      |  76 ++++++++++
 debbindiff/comparators/text.py     |  28 ++++
 debbindiff/comparators/utils.py    |  64 ++++++++
 debbindiff/comparators/xz.py       |  47 ++++++
 debbindiff/difference.py           |  73 ++++++++++
 10 files changed, 478 insertions(+), 290 deletions(-)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c9834df
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.sw[p-z]
+*.pyc
diff --git a/debbindiff.py b/debbindiff.py
index 13b2e7a..b478475 100755
--- a/debbindiff.py
+++ b/debbindiff.py
@@ -21,301 +21,13 @@
 from __future__ import print_function
 
 import sys
-import difflib
-import os.path
-import re
-import magic
-import hashlib
-import codecs
-import tempfile
-import shutil
-import subprocess
-import tarfile
-from StringIO import StringIO
-from contextlib import contextmanager
-from debbindiff.changes import Changes
-from debbindiff.pyxxd import hexdump
-from debbindiff import logger
-
-class Difference(object):
-    def __init__(self, lines1, lines2, path1, path2, source=None, comment=None):
-        # allow to override declared file paths, useful when comparing tempfiles
-        if source:
-            self._source1 = source
-            self._source2 = source
-        else:
-            self._source1 = path1
-            self._source2 = path2
-        self._lines1 = lines1
-        self._lines2 = lines2
-        self._comment = comment
-        self._details = []
-
-    @property
-    def comment(self):
-        return self._comment
-
-    @comment.setter
-    def set_comment(self, comment):
-        self._comment = comment
-
-    def get_diff(self, in_sources1=[], in_sources2=[]):
-        if self._comment:
-            yield '\n'
-            for line in self._comment.split('\n'):
-                yield line
-            yield '\n\n'
-        sources1 = in_sources1 + [self._source1]
-        sources2 = in_sources2 + [self._source2]
-        if self._lines1 is not None and self._lines2 is not None:
-            fromfile1 = " -> ".join(sources1)
-            fromfile2 = " -> ".join(sources2)
-            for line in difflib.unified_diff(self._lines1, self._lines2,
-                                             fromfile=fromfile1,
-                                             tofile=fromfile2,  n=0):
-                if not line.endswith('\n'):
-                    line += '\n'
-                yield line
-        for detail in self._details:
-            for line in detail.get_diff(sources1, sources2):
-                yield line
-
-    def add_details(self, differences):
-        self._details.extend(differences)
-
-DOT_CHANGES_FIELDS = [
-        "Format", "Source", "Binary", "Architecture",
-        "Version", "Distribution", "Urgency",
-        "Maintainer", "Changed-By", "Description",
-        "Changes"
-    ]
-
-def compare_changes_files(path1, path2, source=None):
-    try:
-        dot_changes1 = Changes(filename=path1)
-        dot_changes1.validate(check_signature=False)
-        dot_changes2 = Changes(filename=path2)
-        dot_changes2.validate(check_signature=False)
-    except IOError, e:
-        logger.critical(e)
-        sys.exit(2)
-
-    differences = []
-    for field in DOT_CHANGES_FIELDS:
-        if dot_changes1[field] != dot_changes2[field]:
-            differences.append(Difference(
-                ["%s: %s" % (field, dot_changes1[field])],
-                ["%s: %s" % (field, dot_changes2[field])],
-                dot_changes1.get_changes_file(),
-                dot_changes2.get_changes_file(),
-                source=source))
-
-    # This will handle differences in the list of files, checksums, priority
-    # and section
-    files1 = dot_changes1.get('Files')
-    files2 = dot_changes2.get('Files')
-    logger.debug(dot_changes1.get_as_string('Files'))
-    if files1 == files2:
-        return differences
-
-    files_difference = Difference(
-        dot_changes1.get_as_string('Files').splitlines(1),
-        dot_changes2.get_as_string('Files').splitlines(1),
-        dot_changes1.get_changes_file(),
-        dot_changes2.get_changes_file(),
-        source=source,
-        comment="List of files does not match")
-
-    files1 = dict([(d['name'], d) for d in files1])
-    files2 = dict([(d['name'], d) for d in files2])
-
-    for filename in sorted(set(files1.keys()).union(files2.keys())):
-        d1 = files1[filename]
-        d2 = files2[filename]
-        if d1['md5sum'] != d2['md5sum']:
-            logger.debug("%s mentioned in .changes have differences" % filename)
-            files_difference.add_details(compare_files(dot_changes1.get_path(filename),
-                                                       dot_changes2.get_path(filename),
-                                                       source=get_source(dot_changes1.get_path(filename),
-                                                                         dot_changes2.get_path(filename))))
-
-    differences.append(files_difference)
-    return differences
-
-def guess_mime_type(path):
-    if not hasattr(guess_mime_type, 'mimedb'):
-        guess_mime_type.mimedb = magic.open(magic.MIME)
-        guess_mime_type.mimedb.load()
-    return guess_mime_type.mimedb.file(path)
-
-
-def are_same_binaries(path1, path2):
-    BUF_SIZE = 20 * 2 ** 10 # 20 kB
-    h1 = hashlib.md5()
-    f1 = open(path1, 'rb')
-    h2 = hashlib.md5()
-    f2 = open(path2, 'rb')
-    while True:
-        buf1 = f1.read(BUF_SIZE)
-        buf2 = f2.read(BUF_SIZE)
-        if not buf1 or not buf2:
-            return False
-        h1.update(buf1)
-        h2.update(buf2)
-        if h1.digest() != h2.digest():
-            return False
-    return True
-
-def get_source(path1, path2):
-    if os.path.basename(path1) == os.path.basename(path2):
-        return os.path.basename(path1)
-    return None
-
- at contextmanager
-def make_temp_directory():
-    temp_dir = tempfile.mkdtemp(suffix='debbindiff')
-    yield temp_dir
-    shutil.rmtree(temp_dir)
-
- at contextmanager
-def decompress_xz(path):
-    with make_temp_directory() as temp_dir:
-        if path.endswith('.xz'):
-            temp_path = os.path.join(temp_dir, os.path.basename(path[:-3]))
-        else:
-            temp_path = os.path.join(temp_dir, "%s-content" % path)
-        with open(temp_path, 'wb') as temp_file:
-            subprocess.check_call(
-                ["xz", "--decompress", "--stdout", path],
-                shell=False, stdout=temp_file, stderr=None)
-            yield temp_path
-
-# decorator that will create a fallback on binary diff if no differences
-# are detected
-def binary_fallback(original_function):
-    def with_fallback(path1, path2, source=None):
-        if are_same_binaries(path1, path2):
-            return []
-        inside_differences = original_function(path1, path2, source)
-        # no differences detected inside? let's at least do a binary diff
-        if len(inside_differences) == 0:
-            difference = compare_binary_files(path1, path2)[0]
-            difference.comment = "No differences found inside, yet data differs"
-        else:
-            difference = Difference(None, None, path1, path2, source=get_source(path1, path2))
-            difference.add_details(inside_differences)
-        return [difference]
-    return with_fallback
-
- at binary_fallback
-def compare_xz_files(path1, path2, source=None):
-    with decompress_xz(path1) as new_path1:
-        with decompress_xz(path2) as new_path2:
-            return compare_files(new_path1, new_path2, source=get_source(new_path1, new_path2))
-
-def get_tar_content(tar):
-    orig_stdout = sys.stdout
-    output = StringIO()
-    try:
-        sys.stdout = output
-        tar.list(verbose=True)
-        return output.getvalue()
-    finally:
-        sys.stdout = orig_stdout
-
- at binary_fallback
-def compare_tar_files(path1, path2, source=None):
-    difference = None
-    content_differences = []
-    with tarfile.open(path1, 'r') as tar1:
-        with tarfile.open(path2, 'r') as tar2:
-            # look up differences in content
-            with make_temp_directory() as temp_dir1:
-                with make_temp_directory() as temp_dir2:
-                    logger.debug('content1 %s' % (tar1.getnames(),))
-                    logger.debug('content2 %s' % (tar2.getnames(),))
-                    for name in sorted(set(tar1.getnames()).intersection(tar2.getnames())):
-                        member1 = tar1.getmember(name)
-                        member2 = tar2.getmember(name)
-                        if not member1.isfile() or not member2.isfile():
-                            continue
-                        logger.debug('extract member %s' % (name,))
-                        tar1.extract(name, temp_dir1)
-                        tar2.extract(name, temp_dir2)
-                        content_differences.extend(
-                            compare_files(os.path.join(temp_dir1, name),
-                                          os.path.join(temp_dir2, name),
-                                          source=name))
-            # look up differences in file list and file metadata
-            content1 = get_tar_content(tar1)
-            content2 = get_tar_content(tar2)
-            if content1 != content2:
-                difference = Difference(content1.splitlines(1), content2.splitlines(1), path1, path2, source)
-            elif len(content_differences) >= 0:
-                difference = Difference(None, None, path1, path2, source)
-    if difference:
-        difference.add_details(content_differences)
-        return [difference]
-    else:
-        return []
-
-def compare_text_files(path1, path2, encoding, source=None):
-    lines1 = codecs.open(path1, 'r', encoding=encoding).readlines()
-    lines2 = codecs.open(path2, 'r', encoding=encoding).readlines()
-    if lines1 == lines2:
-        return []
-    return [Difference(lines1, lines2, path1, path2, source)]
-
-def compare_binary_files(path1, path2, source=None):
-    hexdump1 = hexdump(open(path1, 'rb').read())
-    hexdump2 = hexdump(open(path2, 'rb').read())
-    if hexdump1 == hexdump2:
-        return []
-    return [Difference(hexdump1.splitlines(1), hexdump2.splitlines(1), path1, path2, source)]
-
-COMPARATORS = [
-        (None,                       r'\.changes$', compare_changes_files),
-        (r'^application/x-xz(;|$)',  r'\.xz$',      compare_xz_files),
-        (r'^application/x-tar(;|$)', r'\.tar$',     compare_tar_files),
-    ]
-
-def compare_unknown(path1, path2, source=None):
-    logger.debug("compare unknown path: %s and %s" % (path1, path2))
-    mime_type1 = guess_mime_type(path1)
-    mime_type2 = guess_mime_type(path2)
-    logger.debug("mime_type1: %s | mime_type2: %s" % (mime_type1, mime_type2))
-    if mime_type1.startswith('text/') and mime_type2.startswith('text/'):
-        encodings1 = re.findall(r'; charset=([^ ]+)', mime_type1)
-        encodings2 = re.findall(r'; charset=([^ ]+)', mime_type2)
-        if len(encodings1) > 0 and encodings1 == encodings2:
-            encoding = encodings1[0]
-        else:
-            encoding = None
-        return compare_text_files(path1, path2, encoding, source)
-    return compare_binary_files(path1, path2, source)
-
-def compare_files(path1, path2, source=None):
-    if not os.path.isfile(path1):
-        logger.critical("%s is not a file" % path1)
-        sys.exit(2)
-    if not os.path.isfile(path2):
-        logger.critical("%s is not a file" % path2)
-        sys.exit(2)
-    for mime_type_regex, filename_regex, comparator in COMPARATORS:
-        if mime_type_regex:
-            mime_type1 = guess_mime_type(path1)
-            mime_type2 = guess_mime_type(path2)
-            if re.search(mime_type_regex, mime_type1) and re.search(mime_type_regex, mime_type2):
-                return comparator(path1, path2, source)
-        if filename_regex and re.search(filename_regex, path1) and re.search(filename_regex, path2):
-            return comparator(path1, path2, source)
-    return compare_unknown(path1, path2, source)
+import debbindiff.comparators
 
 def main():
     if len(sys.argv) != 3:
         print("Usage: %s FILE1 FILE2")
         sys.exit(2)
-    differences = compare_files(sys.argv[1], sys.argv[2])
+    differences = debbindiff.comparators.compare_files(sys.argv[1], sys.argv[2])
     for difference in differences:
         for line in difference.get_diff():
             print(line, end='')
diff --git a/debbindiff/comparators/__init__.py b/debbindiff/comparators/__init__.py
new file mode 100644
index 0000000..d72e981
--- /dev/null
+++ b/debbindiff/comparators/__init__.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
+import magic
+import os.path
+import re
+from debbindiff import logger
+from debbindiff.difference import Difference, get_source
+from debbindiff.comparators.binary import compare_binary_files
+from debbindiff.comparators.changes import compare_changes_files
+from debbindiff.comparators.text import compare_text_files
+from debbindiff.comparators.tar import compare_tar_files
+from debbindiff.comparators.xz import compare_xz_files
+
+def guess_mime_type(path):
+    if not hasattr(guess_mime_type, 'mimedb'):
+        guess_mime_type.mimedb = magic.open(magic.MIME)
+        guess_mime_type.mimedb.load()
+    return guess_mime_type.mimedb.file(path)
+
+def compare_unknown(path1, path2, source=None):
+    logger.debug("compare unknown path: %s and %s" % (path1, path2))
+    mime_type1 = guess_mime_type(path1)
+    mime_type2 = guess_mime_type(path2)
+    logger.debug("mime_type1: %s | mime_type2: %s" % (mime_type1, mime_type2))
+    if mime_type1.startswith('text/') and mime_type2.startswith('text/'):
+        encodings1 = re.findall(r'; charset=([^ ]+)', mime_type1)
+        encodings2 = re.findall(r'; charset=([^ ]+)', mime_type2)
+        if len(encodings1) > 0 and encodings1 == encodings2:
+            encoding = encodings1[0]
+        else:
+            encoding = None
+        return compare_text_files(path1, path2, encoding, source)
+    return compare_binary_files(path1, path2, source)
+
+COMPARATORS = [
+        (None,                       r'\.changes$', compare_changes_files),
+        (r'^application/x-xz(;|$)',  r'\.xz$',      compare_xz_files),
+        (r'^application/x-tar(;|$)', r'\.tar$',     compare_tar_files),
+    ]
+
+def compare_files(path1, path2, source=None):
+    if not os.path.isfile(path1):
+        logger.critical("%s is not a file" % path1)
+        sys.exit(2)
+    if not os.path.isfile(path2):
+        logger.critical("%s is not a file" % path2)
+        sys.exit(2)
+    for mime_type_regex, filename_regex, comparator in COMPARATORS:
+        if mime_type_regex:
+            mime_type1 = guess_mime_type(path1)
+            mime_type2 = guess_mime_type(path2)
+            if re.search(mime_type_regex, mime_type1) and re.search(mime_type_regex, mime_type2):
+                return comparator(path1, path2, source)
+        if filename_regex and re.search(filename_regex, path1) and re.search(filename_regex, path2):
+            return comparator(path1, path2, source)
+    return compare_unknown(path1, path2, source)
diff --git a/debbindiff/comparators/binary.py b/debbindiff/comparators/binary.py
new file mode 100644
index 0000000..c8695aa
--- /dev/null
+++ b/debbindiff/comparators/binary.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
+from debbindiff.difference import Difference
+from debbindiff.pyxxd import hexdump
+
+def compare_binary_files(path1, path2, source=None):
+    hexdump1 = hexdump(open(path1, 'rb').read())
+    hexdump2 = hexdump(open(path2, 'rb').read())
+    if hexdump1 == hexdump2:
+        return []
+    return [Difference(hexdump1.splitlines(1), hexdump2.splitlines(1), path1, path2, source)]
+
+
diff --git a/debbindiff/comparators/changes.py b/debbindiff/comparators/changes.py
new file mode 100644
index 0000000..27adf3f
--- /dev/null
+++ b/debbindiff/comparators/changes.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
+from debbindiff import logger
+from debbindiff.changes import Changes
+import debbindiff.comparators
+from debbindiff.difference import Difference, get_source
+
+DOT_CHANGES_FIELDS = [
+        "Format", "Source", "Binary", "Architecture",
+        "Version", "Distribution", "Urgency",
+        "Maintainer", "Changed-By", "Description",
+        "Changes"
+    ]
+
+def compare_changes_files(path1, path2, source=None):
+    try:
+        dot_changes1 = Changes(filename=path1)
+        dot_changes1.validate(check_signature=False)
+        dot_changes2 = Changes(filename=path2)
+        dot_changes2.validate(check_signature=False)
+    except IOError, e:
+        logger.critical(e)
+        sys.exit(2)
+
+    differences = []
+    for field in DOT_CHANGES_FIELDS:
+        if dot_changes1[field] != dot_changes2[field]:
+            differences.append(Difference(
+                ["%s: %s" % (field, dot_changes1[field])],
+                ["%s: %s" % (field, dot_changes2[field])],
+                dot_changes1.get_changes_file(),
+                dot_changes2.get_changes_file(),
+                source=source))
+
+    # This will handle differences in the list of files, checksums, priority
+    # and section
+    files1 = dot_changes1.get('Files')
+    files2 = dot_changes2.get('Files')
+    logger.debug(dot_changes1.get_as_string('Files'))
+    if files1 == files2:
+        return differences
+
+    files_difference = Difference(
+        dot_changes1.get_as_string('Files').splitlines(1),
+        dot_changes2.get_as_string('Files').splitlines(1),
+        dot_changes1.get_changes_file(),
+        dot_changes2.get_changes_file(),
+        source=source,
+        comment="List of files does not match")
+
+    files1 = dict([(d['name'], d) for d in files1])
+    files2 = dict([(d['name'], d) for d in files2])
+
+    for filename in sorted(set(files1.keys()).union(files2.keys())):
+        d1 = files1[filename]
+        d2 = files2[filename]
+        if d1['md5sum'] != d2['md5sum']:
+            logger.debug("%s mentioned in .changes have differences" % filename)
+            files_difference.add_details(
+                debbindiff.comparators.compare_files(dot_changes1.get_path(filename),
+                                                     dot_changes2.get_path(filename),
+                                                     source=get_source(dot_changes1.get_path(filename),
+                                                                       dot_changes2.get_path(filename))))
+
+    differences.append(files_difference)
+    return differences
diff --git a/debbindiff/comparators/tar.py b/debbindiff/comparators/tar.py
new file mode 100644
index 0000000..65af2de
--- /dev/null
+++ b/debbindiff/comparators/tar.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
+import os.path
+from StringIO import StringIO
+import sys
+import tarfile
+from debbindiff import logger
+from debbindiff.difference import Difference
+import debbindiff.comparators
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
+
+def get_tar_content(tar):
+    orig_stdout = sys.stdout
+    output = StringIO()
+    try:
+        sys.stdout = output
+        tar.list(verbose=True)
+        return output.getvalue()
+    finally:
+        sys.stdout = orig_stdout
+
+ at binary_fallback
+def compare_tar_files(path1, path2, source=None):
+    difference = None
+    content_differences = []
+    with tarfile.open(path1, 'r') as tar1:
+        with tarfile.open(path2, 'r') as tar2:
+            # look up differences in content
+            with make_temp_directory() as temp_dir1:
+                with make_temp_directory() as temp_dir2:
+                    logger.debug('content1 %s' % (tar1.getnames(),))
+                    logger.debug('content2 %s' % (tar2.getnames(),))
+                    for name in sorted(set(tar1.getnames()).intersection(tar2.getnames())):
+                        member1 = tar1.getmember(name)
+                        member2 = tar2.getmember(name)
+                        if not member1.isfile() or not member2.isfile():
+                            continue
+                        logger.debug('extract member %s' % (name,))
+                        tar1.extract(name, temp_dir1)
+                        tar2.extract(name, temp_dir2)
+                        content_differences.extend(
+                            debbindiff.comparators.compare_files(
+                                os.path.join(temp_dir1, name),
+                                os.path.join(temp_dir2, name),
+                                source=name))
+            # look up differences in file list and file metadata
+            content1 = get_tar_content(tar1)
+            content2 = get_tar_content(tar2)
+            if content1 != content2:
+                difference = Difference(content1.splitlines(1), content2.splitlines(1), path1, path2, source)
+            elif len(content_differences) >= 0:
+                difference = Difference(None, None, path1, path2, source)
+    if difference:
+        difference.add_details(content_differences)
+        return [difference]
+    else:
+        return []
+
+
diff --git a/debbindiff/comparators/text.py b/debbindiff/comparators/text.py
new file mode 100644
index 0000000..db9e295
--- /dev/null
+++ b/debbindiff/comparators/text.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
+import codecs
+from debbindiff.difference import Difference
+
+def compare_text_files(path1, path2, encoding, source=None):
+    lines1 = codecs.open(path1, 'r', encoding=encoding).readlines()
+    lines2 = codecs.open(path2, 'r', encoding=encoding).readlines()
+    if lines1 == lines2:
+        return []
+    return [Difference(lines1, lines2, path1, path2, source)]
diff --git a/debbindiff/comparators/utils.py b/debbindiff/comparators/utils.py
new file mode 100644
index 0000000..9f44a4d
--- /dev/null
+++ b/debbindiff/comparators/utils.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
+from contextlib import contextmanager
+import hashlib
+import shutil
+import tempfile
+from debbindiff.difference import Difference, get_source
+
+def are_same_binaries(path1, path2):
+    BUF_SIZE = 20 * 2 ** 10 # 20 kB
+    h1 = hashlib.md5()
+    f1 = open(path1, 'rb')
+    h2 = hashlib.md5()
+    f2 = open(path2, 'rb')
+    while True:
+        buf1 = f1.read(BUF_SIZE)
+        buf2 = f2.read(BUF_SIZE)
+        if not buf1 or not buf2:
+            return False
+        h1.update(buf1)
+        h2.update(buf2)
+        if h1.digest() != h2.digest():
+            return False
+    return True
+
+# decorator that will create a fallback on binary diff if no differences
+# are detected
+def binary_fallback(original_function):
+    def with_fallback(path1, path2, source=None):
+        if are_same_binaries(path1, path2):
+            return []
+        inside_differences = original_function(path1, path2, source)
+        # no differences detected inside? let's at least do a binary diff
+        if len(inside_differences) == 0:
+            difference = compare_binary_files(path1, path2)[0]
+            difference.comment = "No differences found inside, yet data differs"
+        else:
+            difference = Difference(None, None, path1, path2, source=get_source(path1, path2))
+            difference.add_details(inside_differences)
+        return [difference]
+    return with_fallback
+
+ at contextmanager
+def make_temp_directory():
+    temp_dir = tempfile.mkdtemp(suffix='debbindiff')
+    yield temp_dir
+    shutil.rmtree(temp_dir)
diff --git a/debbindiff/comparators/xz.py b/debbindiff/comparators/xz.py
new file mode 100644
index 0000000..83c5a22
--- /dev/null
+++ b/debbindiff/comparators/xz.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
+from contextlib import contextmanager
+import os.path
+import subprocess
+import debbindiff.comparators
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
+from debbindiff.difference import get_source
+
+ at contextmanager
+def decompress_xz(path):
+    with make_temp_directory() as temp_dir:
+        if path.endswith('.xz'):
+            temp_path = os.path.join(temp_dir, os.path.basename(path[:-3]))
+        else:
+            temp_path = os.path.join(temp_dir, "%s-content" % path)
+        with open(temp_path, 'wb') as temp_file:
+            subprocess.check_call(
+                ["xz", "--decompress", "--stdout", path],
+                shell=False, stdout=temp_file, stderr=None)
+            yield temp_path
+
+ at binary_fallback
+def compare_xz_files(path1, path2, source=None):
+    with decompress_xz(path1) as new_path1:
+        with decompress_xz(path2) as new_path2:
+            return debbindiff.comparators.compare_files(
+                new_path1, new_path2,
+                source=get_source(new_path1, new_path2))
+
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
new file mode 100644
index 0000000..3699eda
--- /dev/null
+++ b/debbindiff/difference.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
+import difflib
+import os.path
+
+class Difference(object):
+    def __init__(self, lines1, lines2, path1, path2, source=None, comment=None):
+        # allow to override declared file paths, useful when comparing tempfiles
+        if source:
+            self._source1 = source
+            self._source2 = source
+        else:
+            self._source1 = path1
+            self._source2 = path2
+        self._lines1 = lines1
+        self._lines2 = lines2
+        self._comment = comment
+        self._details = []
+
+    @property
+    def comment(self):
+        return self._comment
+
+    @comment.setter
+    def set_comment(self, comment):
+        self._comment = comment
+
+    def get_diff(self, in_sources1=[], in_sources2=[]):
+        if self._comment:
+            yield '\n'
+            for line in self._comment.split('\n'):
+                yield line
+            yield '\n\n'
+        sources1 = in_sources1 + [self._source1]
+        sources2 = in_sources2 + [self._source2]
+        if self._lines1 is not None and self._lines2 is not None:
+            fromfile1 = " -> ".join(sources1)
+            fromfile2 = " -> ".join(sources2)
+            for line in difflib.unified_diff(self._lines1, self._lines2,
+                                             fromfile=fromfile1,
+                                             tofile=fromfile2,  n=0):
+                if not line.endswith('\n'):
+                    line += '\n'
+                yield line
+        for detail in self._details:
+            for line in detail.get_diff(sources1, sources2):
+                yield line
+
+    def add_details(self, differences):
+        self._details.extend(differences)
+
+def get_source(path1, path2):
+    if os.path.basename(path1) == os.path.basename(path2):
+        return os.path.basename(path1)
+    return None
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list