[Reproducible-commits] [diffoscope] 02/03: comperators.deb: Read md5sums ahead
Joachim Breitner
nomeata at moszumanska.debian.org
Thu Dec 3 15:37:37 UTC 2015
This is an automated email from the git hooks/post-receive script.
nomeata pushed a commit to branch pu/parallel2
in repository diffoscope.
commit c08d2ea3760e92263446f857a4ece30f56905426
Author: Joachim Breitner <mail at joachim-breitner.de>
Date: Thu Dec 3 16:28:53 2015 +0100
comperators.deb: Read md5sums ahead
Due to parallelism, we cannot rely that the Md5sums are being compared
before the data.tar.gz files are. Therefore, in the compare() of
DebFile, reach out for the two md5sums and compare the list of
differences. The latter works, but needs to be made more reliable.
The result is still stored in the DebFile object, and fetched from there
by DebTarContainer.
Now all tests pass again.
---
diffoscope/comparators/deb.py | 59 +++++++++++++++++++++++++++----------------
1 file changed, 37 insertions(+), 22 deletions(-)
diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index 2a021e9..8c81633 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -26,6 +26,7 @@ from diffoscope.comparators.libarchive import LibarchiveContainer
from diffoscope.comparators.utils import \
Archive, ArchiveMember, get_ar_content
from diffoscope.comparators.tar import TarListing
+import diffoscope.comparators
class DebContainer(LibarchiveContainer):
@@ -50,7 +51,38 @@ class DebFile(File):
def set_files_with_same_content_in_data(self, files):
self._files_with_same_content_in_data = files
+ def get_md5file(self):
+ # TODO: Finer graned defensive coding, e.g. bail out if there is a None
+ try:
+ # Can we rely on this path? Can there be a control.tar.something?
+ cont1 = self.as_container
+ memb1 = diffoscope.comparators.specialize(cont1.get_member('control.tar.gz'))
+ cont2 = memb1.as_container
+ memb2 = diffoscope.comparators.specialize(cont2.get_member('gzip-content'))
+ cont3 = memb2.as_container
+ memb3 = diffoscope.comparators.specialize(cont3.get_member('./md5sums'))
+
+ d = {}
+ with open(memb3.path, 'r', encoding='utf-8') as f:
+ for line in f.readlines():
+ md5sum, path = re.split(r'\s+', line.strip(), maxsplit=1)
+ d[path] = md5sum
+ return d
+ except:
+ return {}
+
def compare_details(self, other, source=None):
+ my_md5sums = self.get_md5file()
+ other_md5sums = {}
+ if isinstance(other,DebFile): # could be NonExistingFile
+ other_md5sums = other.get_md5file()
+ same = set()
+ for path in my_md5sums.keys() & other_md5sums.keys():
+ if my_md5sums[path] == other_md5sums[path]:
+ same.add('./%s' % path)
+ logger.debug("These are all the same %s" % same)
+ self.set_files_with_same_content_in_data(same)
+
differences = []
my_content = get_ar_content(self.path)
other_content = get_ar_content(other.path)
@@ -61,6 +93,10 @@ class DebFile(File):
class Md5sumsFile(File):
+ """
+ Changes int the md5sum files are really boring, as they show up somewhere else,
+ so do not print them in the diff.
+ """
@staticmethod
def recognizes(file):
return isinstance(file, ArchiveMember) and \
@@ -70,33 +106,12 @@ class Md5sumsFile(File):
file.container.source.container.source.name.startswith('control.tar.') and \
isinstance(file.container.source.container.source.container.source, DebFile)
- @staticmethod
- def parse_md5sums(path):
- d = {}
- with open(path, 'r', encoding='utf-8') as f:
- for line in f.readlines():
- md5sum, path = re.split(r'\s+', line.strip(), maxsplit=1)
- d[path] = md5sum
- return d
-
def compare(self, other, source=None):
if other.path is None:
return None
- try:
- my_md5sums = Md5sumsFile.parse_md5sums(self.path)
- other_md5sums = Md5sumsFile.parse_md5sums(other.path)
- same = set()
- for path in my_md5sums.keys() & other_md5sums.keys():
- if my_md5sums[path] == other_md5sums[path]:
- same.add('./%s' % path)
- self.container.source.container.source.container.source.set_files_with_same_content_in_data(same)
- logger.debug('Identifed %d files as identical in data archive', len(same))
+ else:
return Difference(self.path, other.path, source='md5sums',
notification="Files in package differs")
- except ValueError as e:
- difference = self.compare_bytes(other)
- difference.add_comment('Malformed md5sums file: %s' % e)
- return difference
class DebTarContainer(LibarchiveContainer):
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the Reproducible-commits
mailing list