[Reproducible-commits] [debbindiff] 01/10: Split everything in little modules
Jérémy Bobbio
lunar at moszumanska.debian.org
Mon Sep 29 18:29:23 UTC 2014
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch master
in repository debbindiff.
commit dcd90da6627e09b0d93660ab3957ee67a1b1ae35
Author: Jérémy Bobbio <lunar at debian.org>
Date: Mon Sep 29 16:31:04 2014 +0200
Split everything in little modules
---
.gitignore | 2 +
debbindiff.py | 292 +------------------------------------
debbindiff/comparators/__init__.py | 73 ++++++++++
debbindiff/comparators/binary.py | 30 ++++
debbindiff/comparators/changes.py | 83 +++++++++++
debbindiff/comparators/tar.py | 76 ++++++++++
debbindiff/comparators/text.py | 28 ++++
debbindiff/comparators/utils.py | 64 ++++++++
debbindiff/comparators/xz.py | 47 ++++++
debbindiff/difference.py | 73 ++++++++++
10 files changed, 478 insertions(+), 290 deletions(-)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c9834df
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.sw[p-z]
+*.pyc
diff --git a/debbindiff.py b/debbindiff.py
index 13b2e7a..b478475 100755
--- a/debbindiff.py
+++ b/debbindiff.py
@@ -21,301 +21,13 @@
from __future__ import print_function
import sys
-import difflib
-import os.path
-import re
-import magic
-import hashlib
-import codecs
-import tempfile
-import shutil
-import subprocess
-import tarfile
-from StringIO import StringIO
-from contextlib import contextmanager
-from debbindiff.changes import Changes
-from debbindiff.pyxxd import hexdump
-from debbindiff import logger
-
-class Difference(object):
- def __init__(self, lines1, lines2, path1, path2, source=None, comment=None):
- # allow to override declared file paths, useful when comparing tempfiles
- if source:
- self._source1 = source
- self._source2 = source
- else:
- self._source1 = path1
- self._source2 = path2
- self._lines1 = lines1
- self._lines2 = lines2
- self._comment = comment
- self._details = []
-
- @property
- def comment(self):
- return self._comment
-
- @comment.setter
- def set_comment(self, comment):
- self._comment = comment
-
- def get_diff(self, in_sources1=[], in_sources2=[]):
- if self._comment:
- yield '\n'
- for line in self._comment.split('\n'):
- yield line
- yield '\n\n'
- sources1 = in_sources1 + [self._source1]
- sources2 = in_sources2 + [self._source2]
- if self._lines1 is not None and self._lines2 is not None:
- fromfile1 = " -> ".join(sources1)
- fromfile2 = " -> ".join(sources2)
- for line in difflib.unified_diff(self._lines1, self._lines2,
- fromfile=fromfile1,
- tofile=fromfile2, n=0):
- if not line.endswith('\n'):
- line += '\n'
- yield line
- for detail in self._details:
- for line in detail.get_diff(sources1, sources2):
- yield line
-
- def add_details(self, differences):
- self._details.extend(differences)
-
-DOT_CHANGES_FIELDS = [
- "Format", "Source", "Binary", "Architecture",
- "Version", "Distribution", "Urgency",
- "Maintainer", "Changed-By", "Description",
- "Changes"
- ]
-
-def compare_changes_files(path1, path2, source=None):
- try:
- dot_changes1 = Changes(filename=path1)
- dot_changes1.validate(check_signature=False)
- dot_changes2 = Changes(filename=path2)
- dot_changes2.validate(check_signature=False)
- except IOError, e:
- logger.critical(e)
- sys.exit(2)
-
- differences = []
- for field in DOT_CHANGES_FIELDS:
- if dot_changes1[field] != dot_changes2[field]:
- differences.append(Difference(
- ["%s: %s" % (field, dot_changes1[field])],
- ["%s: %s" % (field, dot_changes2[field])],
- dot_changes1.get_changes_file(),
- dot_changes2.get_changes_file(),
- source=source))
-
- # This will handle differences in the list of files, checksums, priority
- # and section
- files1 = dot_changes1.get('Files')
- files2 = dot_changes2.get('Files')
- logger.debug(dot_changes1.get_as_string('Files'))
- if files1 == files2:
- return differences
-
- files_difference = Difference(
- dot_changes1.get_as_string('Files').splitlines(1),
- dot_changes2.get_as_string('Files').splitlines(1),
- dot_changes1.get_changes_file(),
- dot_changes2.get_changes_file(),
- source=source,
- comment="List of files does not match")
-
- files1 = dict([(d['name'], d) for d in files1])
- files2 = dict([(d['name'], d) for d in files2])
-
- for filename in sorted(set(files1.keys()).union(files2.keys())):
- d1 = files1[filename]
- d2 = files2[filename]
- if d1['md5sum'] != d2['md5sum']:
- logger.debug("%s mentioned in .changes have differences" % filename)
- files_difference.add_details(compare_files(dot_changes1.get_path(filename),
- dot_changes2.get_path(filename),
- source=get_source(dot_changes1.get_path(filename),
- dot_changes2.get_path(filename))))
-
- differences.append(files_difference)
- return differences
-
-def guess_mime_type(path):
- if not hasattr(guess_mime_type, 'mimedb'):
- guess_mime_type.mimedb = magic.open(magic.MIME)
- guess_mime_type.mimedb.load()
- return guess_mime_type.mimedb.file(path)
-
-
-def are_same_binaries(path1, path2):
- BUF_SIZE = 20 * 2 ** 10 # 20 kB
- h1 = hashlib.md5()
- f1 = open(path1, 'rb')
- h2 = hashlib.md5()
- f2 = open(path2, 'rb')
- while True:
- buf1 = f1.read(BUF_SIZE)
- buf2 = f2.read(BUF_SIZE)
- if not buf1 or not buf2:
- return False
- h1.update(buf1)
- h2.update(buf2)
- if h1.digest() != h2.digest():
- return False
- return True
-
-def get_source(path1, path2):
- if os.path.basename(path1) == os.path.basename(path2):
- return os.path.basename(path1)
- return None
-
- at contextmanager
-def make_temp_directory():
- temp_dir = tempfile.mkdtemp(suffix='debbindiff')
- yield temp_dir
- shutil.rmtree(temp_dir)
-
- at contextmanager
-def decompress_xz(path):
- with make_temp_directory() as temp_dir:
- if path.endswith('.xz'):
- temp_path = os.path.join(temp_dir, os.path.basename(path[:-3]))
- else:
- temp_path = os.path.join(temp_dir, "%s-content" % path)
- with open(temp_path, 'wb') as temp_file:
- subprocess.check_call(
- ["xz", "--decompress", "--stdout", path],
- shell=False, stdout=temp_file, stderr=None)
- yield temp_path
-
-# decorator that will create a fallback on binary diff if no differences
-# are detected
-def binary_fallback(original_function):
- def with_fallback(path1, path2, source=None):
- if are_same_binaries(path1, path2):
- return []
- inside_differences = original_function(path1, path2, source)
- # no differences detected inside? let's at least do a binary diff
- if len(inside_differences) == 0:
- difference = compare_binary_files(path1, path2)[0]
- difference.comment = "No differences found inside, yet data differs"
- else:
- difference = Difference(None, None, path1, path2, source=get_source(path1, path2))
- difference.add_details(inside_differences)
- return [difference]
- return with_fallback
-
- at binary_fallback
-def compare_xz_files(path1, path2, source=None):
- with decompress_xz(path1) as new_path1:
- with decompress_xz(path2) as new_path2:
- return compare_files(new_path1, new_path2, source=get_source(new_path1, new_path2))
-
-def get_tar_content(tar):
- orig_stdout = sys.stdout
- output = StringIO()
- try:
- sys.stdout = output
- tar.list(verbose=True)
- return output.getvalue()
- finally:
- sys.stdout = orig_stdout
-
- at binary_fallback
-def compare_tar_files(path1, path2, source=None):
- difference = None
- content_differences = []
- with tarfile.open(path1, 'r') as tar1:
- with tarfile.open(path2, 'r') as tar2:
- # look up differences in content
- with make_temp_directory() as temp_dir1:
- with make_temp_directory() as temp_dir2:
- logger.debug('content1 %s' % (tar1.getnames(),))
- logger.debug('content2 %s' % (tar2.getnames(),))
- for name in sorted(set(tar1.getnames()).intersection(tar2.getnames())):
- member1 = tar1.getmember(name)
- member2 = tar2.getmember(name)
- if not member1.isfile() or not member2.isfile():
- continue
- logger.debug('extract member %s' % (name,))
- tar1.extract(name, temp_dir1)
- tar2.extract(name, temp_dir2)
- content_differences.extend(
- compare_files(os.path.join(temp_dir1, name),
- os.path.join(temp_dir2, name),
- source=name))
- # look up differences in file list and file metadata
- content1 = get_tar_content(tar1)
- content2 = get_tar_content(tar2)
- if content1 != content2:
- difference = Difference(content1.splitlines(1), content2.splitlines(1), path1, path2, source)
- elif len(content_differences) >= 0:
- difference = Difference(None, None, path1, path2, source)
- if difference:
- difference.add_details(content_differences)
- return [difference]
- else:
- return []
-
-def compare_text_files(path1, path2, encoding, source=None):
- lines1 = codecs.open(path1, 'r', encoding=encoding).readlines()
- lines2 = codecs.open(path2, 'r', encoding=encoding).readlines()
- if lines1 == lines2:
- return []
- return [Difference(lines1, lines2, path1, path2, source)]
-
-def compare_binary_files(path1, path2, source=None):
- hexdump1 = hexdump(open(path1, 'rb').read())
- hexdump2 = hexdump(open(path2, 'rb').read())
- if hexdump1 == hexdump2:
- return []
- return [Difference(hexdump1.splitlines(1), hexdump2.splitlines(1), path1, path2, source)]
-
-COMPARATORS = [
- (None, r'\.changes$', compare_changes_files),
- (r'^application/x-xz(;|$)', r'\.xz$', compare_xz_files),
- (r'^application/x-tar(;|$)', r'\.tar$', compare_tar_files),
- ]
-
-def compare_unknown(path1, path2, source=None):
- logger.debug("compare unknown path: %s and %s" % (path1, path2))
- mime_type1 = guess_mime_type(path1)
- mime_type2 = guess_mime_type(path2)
- logger.debug("mime_type1: %s | mime_type2: %s" % (mime_type1, mime_type2))
- if mime_type1.startswith('text/') and mime_type2.startswith('text/'):
- encodings1 = re.findall(r'; charset=([^ ]+)', mime_type1)
- encodings2 = re.findall(r'; charset=([^ ]+)', mime_type2)
- if len(encodings1) > 0 and encodings1 == encodings2:
- encoding = encodings1[0]
- else:
- encoding = None
- return compare_text_files(path1, path2, encoding, source)
- return compare_binary_files(path1, path2, source)
-
-def compare_files(path1, path2, source=None):
- if not os.path.isfile(path1):
- logger.critical("%s is not a file" % path1)
- sys.exit(2)
- if not os.path.isfile(path2):
- logger.critical("%s is not a file" % path2)
- sys.exit(2)
- for mime_type_regex, filename_regex, comparator in COMPARATORS:
- if mime_type_regex:
- mime_type1 = guess_mime_type(path1)
- mime_type2 = guess_mime_type(path2)
- if re.search(mime_type_regex, mime_type1) and re.search(mime_type_regex, mime_type2):
- return comparator(path1, path2, source)
- if filename_regex and re.search(filename_regex, path1) and re.search(filename_regex, path2):
- return comparator(path1, path2, source)
- return compare_unknown(path1, path2, source)
+import debbindiff.comparators
def main():
if len(sys.argv) != 3:
print("Usage: %s FILE1 FILE2")
sys.exit(2)
- differences = compare_files(sys.argv[1], sys.argv[2])
+ differences = debbindiff.comparators.compare_files(sys.argv[1], sys.argv[2])
for difference in differences:
for line in difference.get_diff():
print(line, end='')
diff --git a/debbindiff/comparators/__init__.py b/debbindiff/comparators/__init__.py
new file mode 100644
index 0000000..d72e981
--- /dev/null
+++ b/debbindiff/comparators/__init__.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+
+import magic
+import os.path
+import re
+from debbindiff import logger
+from debbindiff.difference import Difference, get_source
+from debbindiff.comparators.binary import compare_binary_files
+from debbindiff.comparators.changes import compare_changes_files
+from debbindiff.comparators.text import compare_text_files
+from debbindiff.comparators.tar import compare_tar_files
+from debbindiff.comparators.xz import compare_xz_files
+
+def guess_mime_type(path):
+ if not hasattr(guess_mime_type, 'mimedb'):
+ guess_mime_type.mimedb = magic.open(magic.MIME)
+ guess_mime_type.mimedb.load()
+ return guess_mime_type.mimedb.file(path)
+
+def compare_unknown(path1, path2, source=None):
+ logger.debug("compare unknown path: %s and %s" % (path1, path2))
+ mime_type1 = guess_mime_type(path1)
+ mime_type2 = guess_mime_type(path2)
+ logger.debug("mime_type1: %s | mime_type2: %s" % (mime_type1, mime_type2))
+ if mime_type1.startswith('text/') and mime_type2.startswith('text/'):
+ encodings1 = re.findall(r'; charset=([^ ]+)', mime_type1)
+ encodings2 = re.findall(r'; charset=([^ ]+)', mime_type2)
+ if len(encodings1) > 0 and encodings1 == encodings2:
+ encoding = encodings1[0]
+ else:
+ encoding = None
+ return compare_text_files(path1, path2, encoding, source)
+ return compare_binary_files(path1, path2, source)
+
+COMPARATORS = [
+ (None, r'\.changes$', compare_changes_files),
+ (r'^application/x-xz(;|$)', r'\.xz$', compare_xz_files),
+ (r'^application/x-tar(;|$)', r'\.tar$', compare_tar_files),
+ ]
+
+def compare_files(path1, path2, source=None):
+ if not os.path.isfile(path1):
+ logger.critical("%s is not a file" % path1)
+ sys.exit(2)
+ if not os.path.isfile(path2):
+ logger.critical("%s is not a file" % path2)
+ sys.exit(2)
+ for mime_type_regex, filename_regex, comparator in COMPARATORS:
+ if mime_type_regex:
+ mime_type1 = guess_mime_type(path1)
+ mime_type2 = guess_mime_type(path2)
+ if re.search(mime_type_regex, mime_type1) and re.search(mime_type_regex, mime_type2):
+ return comparator(path1, path2, source)
+ if filename_regex and re.search(filename_regex, path1) and re.search(filename_regex, path2):
+ return comparator(path1, path2, source)
+ return compare_unknown(path1, path2, source)
diff --git a/debbindiff/comparators/binary.py b/debbindiff/comparators/binary.py
new file mode 100644
index 0000000..c8695aa
--- /dev/null
+++ b/debbindiff/comparators/binary.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+
+from debbindiff.difference import Difference
+from debbindiff.pyxxd import hexdump
+
+def compare_binary_files(path1, path2, source=None):
+ hexdump1 = hexdump(open(path1, 'rb').read())
+ hexdump2 = hexdump(open(path2, 'rb').read())
+ if hexdump1 == hexdump2:
+ return []
+ return [Difference(hexdump1.splitlines(1), hexdump2.splitlines(1), path1, path2, source)]
+
+
diff --git a/debbindiff/comparators/changes.py b/debbindiff/comparators/changes.py
new file mode 100644
index 0000000..27adf3f
--- /dev/null
+++ b/debbindiff/comparators/changes.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+
+from debbindiff import logger
+from debbindiff.changes import Changes
+import debbindiff.comparators
+from debbindiff.difference import Difference, get_source
+
+DOT_CHANGES_FIELDS = [
+ "Format", "Source", "Binary", "Architecture",
+ "Version", "Distribution", "Urgency",
+ "Maintainer", "Changed-By", "Description",
+ "Changes"
+ ]
+
+def compare_changes_files(path1, path2, source=None):
+ try:
+ dot_changes1 = Changes(filename=path1)
+ dot_changes1.validate(check_signature=False)
+ dot_changes2 = Changes(filename=path2)
+ dot_changes2.validate(check_signature=False)
+ except IOError, e:
+ logger.critical(e)
+ sys.exit(2)
+
+ differences = []
+ for field in DOT_CHANGES_FIELDS:
+ if dot_changes1[field] != dot_changes2[field]:
+ differences.append(Difference(
+ ["%s: %s" % (field, dot_changes1[field])],
+ ["%s: %s" % (field, dot_changes2[field])],
+ dot_changes1.get_changes_file(),
+ dot_changes2.get_changes_file(),
+ source=source))
+
+ # This will handle differences in the list of files, checksums, priority
+ # and section
+ files1 = dot_changes1.get('Files')
+ files2 = dot_changes2.get('Files')
+ logger.debug(dot_changes1.get_as_string('Files'))
+ if files1 == files2:
+ return differences
+
+ files_difference = Difference(
+ dot_changes1.get_as_string('Files').splitlines(1),
+ dot_changes2.get_as_string('Files').splitlines(1),
+ dot_changes1.get_changes_file(),
+ dot_changes2.get_changes_file(),
+ source=source,
+ comment="List of files does not match")
+
+ files1 = dict([(d['name'], d) for d in files1])
+ files2 = dict([(d['name'], d) for d in files2])
+
+ for filename in sorted(set(files1.keys()).union(files2.keys())):
+ d1 = files1[filename]
+ d2 = files2[filename]
+ if d1['md5sum'] != d2['md5sum']:
+ logger.debug("%s mentioned in .changes have differences" % filename)
+ files_difference.add_details(
+ debbindiff.comparators.compare_files(dot_changes1.get_path(filename),
+ dot_changes2.get_path(filename),
+ source=get_source(dot_changes1.get_path(filename),
+ dot_changes2.get_path(filename))))
+
+ differences.append(files_difference)
+ return differences
diff --git a/debbindiff/comparators/tar.py b/debbindiff/comparators/tar.py
new file mode 100644
index 0000000..65af2de
--- /dev/null
+++ b/debbindiff/comparators/tar.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+
+import os.path
+from StringIO import StringIO
+import sys
+import tarfile
+from debbindiff import logger
+from debbindiff.difference import Difference
+import debbindiff.comparators
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
+
+def get_tar_content(tar):
+ orig_stdout = sys.stdout
+ output = StringIO()
+ try:
+ sys.stdout = output
+ tar.list(verbose=True)
+ return output.getvalue()
+ finally:
+ sys.stdout = orig_stdout
+
+ at binary_fallback
+def compare_tar_files(path1, path2, source=None):
+ difference = None
+ content_differences = []
+ with tarfile.open(path1, 'r') as tar1:
+ with tarfile.open(path2, 'r') as tar2:
+ # look up differences in content
+ with make_temp_directory() as temp_dir1:
+ with make_temp_directory() as temp_dir2:
+ logger.debug('content1 %s' % (tar1.getnames(),))
+ logger.debug('content2 %s' % (tar2.getnames(),))
+ for name in sorted(set(tar1.getnames()).intersection(tar2.getnames())):
+ member1 = tar1.getmember(name)
+ member2 = tar2.getmember(name)
+ if not member1.isfile() or not member2.isfile():
+ continue
+ logger.debug('extract member %s' % (name,))
+ tar1.extract(name, temp_dir1)
+ tar2.extract(name, temp_dir2)
+ content_differences.extend(
+ debbindiff.comparators.compare_files(
+ os.path.join(temp_dir1, name),
+ os.path.join(temp_dir2, name),
+ source=name))
+ # look up differences in file list and file metadata
+ content1 = get_tar_content(tar1)
+ content2 = get_tar_content(tar2)
+ if content1 != content2:
+ difference = Difference(content1.splitlines(1), content2.splitlines(1), path1, path2, source)
+ elif len(content_differences) >= 0:
+ difference = Difference(None, None, path1, path2, source)
+ if difference:
+ difference.add_details(content_differences)
+ return [difference]
+ else:
+ return []
+
+
diff --git a/debbindiff/comparators/text.py b/debbindiff/comparators/text.py
new file mode 100644
index 0000000..db9e295
--- /dev/null
+++ b/debbindiff/comparators/text.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+
+import codecs
+from debbindiff.difference import Difference
+
+def compare_text_files(path1, path2, encoding, source=None):
+ lines1 = codecs.open(path1, 'r', encoding=encoding).readlines()
+ lines2 = codecs.open(path2, 'r', encoding=encoding).readlines()
+ if lines1 == lines2:
+ return []
+ return [Difference(lines1, lines2, path1, path2, source)]
diff --git a/debbindiff/comparators/utils.py b/debbindiff/comparators/utils.py
new file mode 100644
index 0000000..9f44a4d
--- /dev/null
+++ b/debbindiff/comparators/utils.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+
+from contextlib import contextmanager
+import hashlib
+import shutil
+import tempfile
+from debbindiff.difference import Difference, get_source
+
+def are_same_binaries(path1, path2):
+ BUF_SIZE = 20 * 2 ** 10 # 20 kB
+ h1 = hashlib.md5()
+ f1 = open(path1, 'rb')
+ h2 = hashlib.md5()
+ f2 = open(path2, 'rb')
+ while True:
+ buf1 = f1.read(BUF_SIZE)
+ buf2 = f2.read(BUF_SIZE)
+ if not buf1 or not buf2:
+ return False
+ h1.update(buf1)
+ h2.update(buf2)
+ if h1.digest() != h2.digest():
+ return False
+ return True
+
+# decorator that will create a fallback on binary diff if no differences
+# are detected
+def binary_fallback(original_function):
+ def with_fallback(path1, path2, source=None):
+ if are_same_binaries(path1, path2):
+ return []
+ inside_differences = original_function(path1, path2, source)
+ # no differences detected inside? let's at least do a binary diff
+ if len(inside_differences) == 0:
+ difference = compare_binary_files(path1, path2)[0]
+ difference.comment = "No differences found inside, yet data differs"
+ else:
+ difference = Difference(None, None, path1, path2, source=get_source(path1, path2))
+ difference.add_details(inside_differences)
+ return [difference]
+ return with_fallback
+
+ at contextmanager
+def make_temp_directory():
+ temp_dir = tempfile.mkdtemp(suffix='debbindiff')
+ yield temp_dir
+ shutil.rmtree(temp_dir)
diff --git a/debbindiff/comparators/xz.py b/debbindiff/comparators/xz.py
new file mode 100644
index 0000000..83c5a22
--- /dev/null
+++ b/debbindiff/comparators/xz.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+
+from contextlib import contextmanager
+import os.path
+import subprocess
+import debbindiff.comparators
+from debbindiff.comparators.utils import binary_fallback, make_temp_directory
+from debbindiff.difference import get_source
+
+ at contextmanager
+def decompress_xz(path):
+ with make_temp_directory() as temp_dir:
+ if path.endswith('.xz'):
+ temp_path = os.path.join(temp_dir, os.path.basename(path[:-3]))
+ else:
+ temp_path = os.path.join(temp_dir, "%s-content" % path)
+ with open(temp_path, 'wb') as temp_file:
+ subprocess.check_call(
+ ["xz", "--decompress", "--stdout", path],
+ shell=False, stdout=temp_file, stderr=None)
+ yield temp_path
+
+ at binary_fallback
+def compare_xz_files(path1, path2, source=None):
+ with decompress_xz(path1) as new_path1:
+ with decompress_xz(path2) as new_path2:
+ return debbindiff.comparators.compare_files(
+ new_path1, new_path2,
+ source=get_source(new_path1, new_path2))
+
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
new file mode 100644
index 0000000..3699eda
--- /dev/null
+++ b/debbindiff/difference.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+#
+# debbindiff: highlight differences between two builds of Debian packages
+#
+# Copyright © 2014 Jérémy Bobbio <lunar at debian.org>
+#
+# debdindiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# debbindiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+
+import difflib
+import os.path
+
+class Difference(object):
+ def __init__(self, lines1, lines2, path1, path2, source=None, comment=None):
+ # allow to override declared file paths, useful when comparing tempfiles
+ if source:
+ self._source1 = source
+ self._source2 = source
+ else:
+ self._source1 = path1
+ self._source2 = path2
+ self._lines1 = lines1
+ self._lines2 = lines2
+ self._comment = comment
+ self._details = []
+
+ @property
+ def comment(self):
+ return self._comment
+
+ @comment.setter
+ def set_comment(self, comment):
+ self._comment = comment
+
+ def get_diff(self, in_sources1=[], in_sources2=[]):
+ if self._comment:
+ yield '\n'
+ for line in self._comment.split('\n'):
+ yield line
+ yield '\n\n'
+ sources1 = in_sources1 + [self._source1]
+ sources2 = in_sources2 + [self._source2]
+ if self._lines1 is not None and self._lines2 is not None:
+ fromfile1 = " -> ".join(sources1)
+ fromfile2 = " -> ".join(sources2)
+ for line in difflib.unified_diff(self._lines1, self._lines2,
+ fromfile=fromfile1,
+ tofile=fromfile2, n=0):
+ if not line.endswith('\n'):
+ line += '\n'
+ yield line
+ for detail in self._details:
+ for line in detail.get_diff(sources1, sources2):
+ yield line
+
+ def add_details(self, differences):
+ self._details.extend(differences)
+
+def get_source(path1, path2):
+ if os.path.basename(path1) == os.path.basename(path2):
+ return os.path.basename(path1)
+ return None
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git
More information about the Reproducible-commits
mailing list