[Reproducible-commits] [diffoscope] 02/03: comperators.deb: Read md5sums ahead

Joachim Breitner nomeata at moszumanska.debian.org
Thu Dec 3 15:37:37 UTC 2015


This is an automated email from the git hooks/post-receive script.

nomeata pushed a commit to branch pu/parallel2
in repository diffoscope.

commit c08d2ea3760e92263446f857a4ece30f56905426
Author: Joachim Breitner <mail at joachim-breitner.de>
Date:   Thu Dec 3 16:28:53 2015 +0100

    comperators.deb: Read md5sums ahead
    
    Due to parallelism, we cannot rely that the Md5sums are being compared
    before the data.tar.gz files are. Therefore, in the compare() of
    DebFile, reach out for the two md5sums and compare the list of
    differences. The latter works, but needs to be made more reliable.
    
    The result is still stored in the DebFile object, and fetched from there
    by DebTarContainer.
    
    Now all tests pass again.
---
 diffoscope/comparators/deb.py | 59 +++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index 2a021e9..8c81633 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -26,6 +26,7 @@ from diffoscope.comparators.libarchive import LibarchiveContainer
 from diffoscope.comparators.utils import \
     Archive, ArchiveMember, get_ar_content
 from diffoscope.comparators.tar import TarListing
+import diffoscope.comparators
 
 
 class DebContainer(LibarchiveContainer):
@@ -50,7 +51,38 @@ class DebFile(File):
     def set_files_with_same_content_in_data(self, files):
         self._files_with_same_content_in_data = files
 
+    def get_md5file(self):
+        # TODO: Finer graned defensive coding, e.g. bail out if there is a None
+        try:
+            # Can we rely on this path? Can there be a control.tar.something?
+            cont1 = self.as_container
+            memb1 = diffoscope.comparators.specialize(cont1.get_member('control.tar.gz'))
+            cont2 = memb1.as_container
+            memb2 = diffoscope.comparators.specialize(cont2.get_member('gzip-content'))
+            cont3 = memb2.as_container
+            memb3 = diffoscope.comparators.specialize(cont3.get_member('./md5sums'))
+
+            d = {}
+            with open(memb3.path, 'r', encoding='utf-8') as f:
+                for line in f.readlines():
+                    md5sum, path = re.split(r'\s+', line.strip(), maxsplit=1)
+                    d[path] = md5sum
+            return d
+        except:
+            return {}
+
     def compare_details(self, other, source=None):
+        my_md5sums = self.get_md5file()
+        other_md5sums = {}
+        if isinstance(other,DebFile): # could be NonExistingFile
+            other_md5sums = other.get_md5file()
+        same = set()
+        for path in my_md5sums.keys() & other_md5sums.keys():
+            if my_md5sums[path] == other_md5sums[path]:
+                same.add('./%s' % path)
+        logger.debug("These are all the same %s" % same)
+        self.set_files_with_same_content_in_data(same)
+
         differences = []
         my_content = get_ar_content(self.path)
         other_content = get_ar_content(other.path)
@@ -61,6 +93,10 @@ class DebFile(File):
 
 
 class Md5sumsFile(File):
+    """
+    Changes int the md5sum files are really boring, as they show up somewhere else,
+    so do not print them in the diff.
+    """
     @staticmethod
     def recognizes(file):
         return isinstance(file, ArchiveMember) and \
@@ -70,33 +106,12 @@ class Md5sumsFile(File):
                file.container.source.container.source.name.startswith('control.tar.') and \
                isinstance(file.container.source.container.source.container.source, DebFile)
 
-    @staticmethod
-    def parse_md5sums(path):
-        d = {}
-        with open(path, 'r', encoding='utf-8') as f:
-            for line in f.readlines():
-                md5sum, path = re.split(r'\s+', line.strip(), maxsplit=1)
-                d[path] = md5sum
-        return d
-
     def compare(self, other, source=None):
         if other.path is None:
             return None
-        try:
-            my_md5sums = Md5sumsFile.parse_md5sums(self.path)
-            other_md5sums = Md5sumsFile.parse_md5sums(other.path)
-            same = set()
-            for path in my_md5sums.keys() & other_md5sums.keys():
-                if my_md5sums[path] == other_md5sums[path]:
-                    same.add('./%s' % path)
-            self.container.source.container.source.container.source.set_files_with_same_content_in_data(same)
-            logger.debug('Identifed %d files as identical in data archive', len(same))
+        else:
             return Difference(self.path, other.path, source='md5sums',
                               notification="Files in package differs")
-        except ValueError as e:
-            difference = self.compare_bytes(other)
-            difference.add_comment('Malformed md5sums file: %s' % e)
-            return difference
 
 
 class DebTarContainer(LibarchiveContainer):

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git



More information about the Reproducible-commits mailing list