[Reproducible-commits] [diffoscope] 01/03: Improve overloading of what gets compared in a container

Jérémy Bobbio lunar at moszumanska.debian.org
Thu Sep 3 14:08:26 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository diffoscope.

commit e86d44491aa809ef7535e92c248727e17e2e84ed
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Thu Sep 3 12:51:36 2015 +0000

    Improve overloading of what gets compared in a container
    
    Most often, containers need to overload the set of files that gets
    compared and not the way comparisons are performed. So we introduce
    a new method get_members that returns a dictionary of names and
    members. The names will be used to match which file should actually be
    compared.
    
    We also split the compare method using a 'comparisons' generator.
    This should also allow other less conventional extensions in the future.
    
    This reduces some code duplication (especially visible in .changes)
    and make the situation for .gzip, .bz2, and .xz more straightforward.
    
    We also take the opportunity to remove the useless 'source' argument
    in Container.compare which was uncessarily complicating the code.
---
 diffoscope/comparators/bzip2.py    | 15 +++++----------
 diffoscope/comparators/cpio.py     |  2 +-
 diffoscope/comparators/deb.py      |  4 ++--
 diffoscope/comparators/debian.py   | 27 +++++++--------------------
 diffoscope/comparators/gzip.py     | 15 +++++----------
 diffoscope/comparators/iso9660.py  |  2 +-
 diffoscope/comparators/rpm.py      |  2 +-
 diffoscope/comparators/squashfs.py |  2 +-
 diffoscope/comparators/tar.py      |  2 +-
 diffoscope/comparators/utils.py    | 37 +++++++++++++++++++++----------------
 diffoscope/comparators/xz.py       | 15 +++++----------
 diffoscope/comparators/zip.py      |  2 +-
 12 files changed, 51 insertions(+), 74 deletions(-)

diff --git a/diffoscope/comparators/bzip2.py b/diffoscope/comparators/bzip2.py
index 9b6f383..806e329 100644
--- a/diffoscope/comparators/bzip2.py
+++ b/diffoscope/comparators/bzip2.py
@@ -22,7 +22,7 @@ import re
 import subprocess
 import diffoscope.comparators
 from diffoscope.comparators.binary import File, needs_content
-from diffoscope.comparators.utils import Archive, get_compressed_content_name
+from diffoscope.comparators.utils import Archive, get_compressed_content_name, NO_COMMENT
 from diffoscope import logger, tool_required
 
 
@@ -38,6 +38,9 @@ class Bzip2Container(Archive):
     def close_archive(self):
         self._path = None
 
+    def get_members(self):
+        return {'bzip2-content': self.get_member(self.get_member_names()[0])}
+
     def get_member_names(self):
         return [get_compressed_content_name(self.path, '.bz2')]
 
@@ -51,14 +54,6 @@ class Bzip2Container(Archive):
                 shell=False, stdout=fp, stderr=None)
         return dest_path
 
-    def compare(self, other, source=None):
-        my_file = self.get_member(self.get_member_names()[0])
-        other_file = other.get_member(other.get_member_names()[0])
-        source = None
-        if my_file.name == other_file.name:
-            source = my_file.name
-        return [diffoscope.comparators.compare_files(my_file, other_file, source)]
-
 
 class Bzip2File(File):
     RE_FILE_TYPE = re.compile(r'^bzip2 compressed data\b')
@@ -71,4 +66,4 @@ class Bzip2File(File):
     def compare_details(self, other, source=None):
         with Bzip2Container(self).open() as my_container, \
              Bzip2Container(other).open() as other_container:
-            return my_container.compare(other_container, source)
+            return my_container.compare(other_container)
diff --git a/diffoscope/comparators/cpio.py b/diffoscope/comparators/cpio.py
index 2365703..d11ba78 100644
--- a/diffoscope/comparators/cpio.py
+++ b/diffoscope/comparators/cpio.py
@@ -46,5 +46,5 @@ class CpioFile(File):
             CpioContent, self.path, other.path, source="file list"))
         with LibarchiveContainer(self).open() as my_container, \
              LibarchiveContainer(other).open() as other_container:
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences
diff --git a/diffoscope/comparators/deb.py b/diffoscope/comparators/deb.py
index 3283ad3..9cab27e 100644
--- a/diffoscope/comparators/deb.py
+++ b/diffoscope/comparators/deb.py
@@ -83,7 +83,7 @@ class DebFile(File):
                                my_content, other_content, self.path, other.path, source="metadata"))
         with DebContainer(self).open() as my_container, \
              DebContainer(other).open() as other_container:
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences
 
 
@@ -157,5 +157,5 @@ class DebDataTarFile(File):
             other_listing = get_tar_listing(other_container.archive)
             differences.append(Difference.from_unicode(
                                   my_listing, other_listing, self.name, other.name, source="metadata"))
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences
diff --git a/diffoscope/comparators/debian.py b/diffoscope/comparators/debian.py
index 4d5282f..af5b714 100644
--- a/diffoscope/comparators/debian.py
+++ b/diffoscope/comparators/debian.py
@@ -23,7 +23,7 @@ import re
 from diffoscope.changes import Changes
 import diffoscope.comparators
 from diffoscope.comparators.binary import File, needs_content
-from diffoscope.comparators.utils import Container
+from diffoscope.comparators.utils import Container, NO_COMMENT
 from diffoscope.difference import Difference
 
 
@@ -83,7 +83,12 @@ class DotChangesContainer(Container):
 
     @contextmanager
     def open(self):
+        self._version_re = DotChangesContainer.get_version_trimming_re(self)
         yield self
+        del self._version_re
+
+    def get_members(self):
+        return {self._trim_version_number(name): self.get_member(name) for name in self.get_member_names()}
 
     def get_member_names(self):
         return [d['name'] for d in self.source.changes.get('Files')]
@@ -94,24 +99,6 @@ class DotChangesContainer(Container):
     def _trim_version_number(self, name):
         return self._version_re.sub('', name)
 
-    def compare(self, other, source=None):
-        differences = []
-        my_names = set(self.get_member_names())
-        my_trim_re = DotChangesContainer.get_version_trimming_re(self)
-        my_canonical_names = dict([(my_trim_re.sub('', name), name) for name in my_names])
-        other_names = set(other.get_member_names())
-        other_trim_re = DotChangesContainer.get_version_trimming_re(other)
-        other_canonical_names = dict([(other_trim_re.sub('', name), name) for name in other_names])
-        for canonical_name in sorted(set(my_canonical_names.keys()).intersection(other_canonical_names.keys())):
-            my_file = self.get_member(my_canonical_names[canonical_name])
-            other_file = other.get_member(other_canonical_names[canonical_name])
-            source = None
-            if my_canonical_names[canonical_name] == other_canonical_names[canonical_name]:
-                source = my_canonical_names[canonical_name]
-            differences.append(
-                diffoscope.comparators.compare_files(my_file, other_file, source=source))
-        return differences
-
 
 class DotChangesFile(File):
     RE_FILE_EXTENSION = re.compile(r'\.changes$')
@@ -145,6 +132,6 @@ class DotChangesFile(File):
                                                    self.path, other.path, source='Files'))
         with DotChangesContainer(self).open() as my_container, \
              DotChangesContainer(other).open() as other_container:
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
 
         return differences
diff --git a/diffoscope/comparators/gzip.py b/diffoscope/comparators/gzip.py
index fb99c84..1f29c7c 100644
--- a/diffoscope/comparators/gzip.py
+++ b/diffoscope/comparators/gzip.py
@@ -23,7 +23,7 @@ import os.path
 import diffoscope.comparators
 from diffoscope import logger, tool_required
 from diffoscope.comparators.binary import needs_content
-from diffoscope.comparators.utils import Archive, get_compressed_content_name
+from diffoscope.comparators.utils import Archive, get_compressed_content_name, NO_COMMENT
 from diffoscope.difference import Difference
 
 
@@ -39,6 +39,9 @@ class GzipContainer(Archive):
     def close_archive(self):
         self._path = None
 
+    def get_members(self):
+        return {'gzip-content': self.get_member(self.get_member_names()[0])}
+
     def get_member_names(self):
         return [get_compressed_content_name(self.path, '.gz')]
 
@@ -52,14 +55,6 @@ class GzipContainer(Archive):
                 shell=False, stdout=fp, stderr=None)
         return dest_path
 
-    def compare(self, other, source=None):
-        my_file = self.get_member(self.get_member_names()[0])
-        other_file = other.get_member(other.get_member_names()[0])
-        source = None
-        if my_file.name == other_file.name:
-            source = my_file.name
-        return [diffoscope.comparators.compare_files(my_file, other_file, source)]
-
 
 class GzipFile(object):
     RE_FILE_TYPE = re.compile(r'^gzip compressed data\b')
@@ -75,5 +70,5 @@ class GzipFile(object):
                                self.magic_file_type, other.magic_file_type, self, other, source='metadata'))
         with GzipContainer(self).open() as my_container, \
              GzipContainer(other).open() as other_container:
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences
diff --git a/diffoscope/comparators/iso9660.py b/diffoscope/comparators/iso9660.py
index 8f2693b..d10655c 100644
--- a/diffoscope/comparators/iso9660.py
+++ b/diffoscope/comparators/iso9660.py
@@ -76,5 +76,5 @@ class Iso9660File(File):
             differences.append(Difference.from_command(ISO9660Listing, self.path, other.path, command_args=(extension,)))
         with LibarchiveContainer(self).open() as my_container, \
              LibarchiveContainer(other).open() as other_container:
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences
diff --git a/diffoscope/comparators/rpm.py b/diffoscope/comparators/rpm.py
index 2b8d23b..4627a47 100644
--- a/diffoscope/comparators/rpm.py
+++ b/diffoscope/comparators/rpm.py
@@ -92,5 +92,5 @@ class RpmFile(AbstractRpmFile):
         differences.append(compare_rpm_headers(self.path, other.path))
         with RpmContainer(self).open() as my_container, \
              RpmContainer(other).open() as other_container:
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences
diff --git a/diffoscope/comparators/squashfs.py b/diffoscope/comparators/squashfs.py
index 7b39e22..fb13b2f 100644
--- a/diffoscope/comparators/squashfs.py
+++ b/diffoscope/comparators/squashfs.py
@@ -201,5 +201,5 @@ class SquashfsFile(File):
         differences.append(Difference.from_command(SquashfsListing, self.path, other.path))
         with SquashfsContainer(self).open() as my_container, \
              SquashfsContainer(other).open() as other_container:
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences
diff --git a/diffoscope/comparators/tar.py b/diffoscope/comparators/tar.py
index 07a7e75..6cd0fc8 100644
--- a/diffoscope/comparators/tar.py
+++ b/diffoscope/comparators/tar.py
@@ -147,5 +147,5 @@ class TarFile(File):
             other_listing = get_tar_listing(other_container.archive)
             differences.append(Difference.from_unicode(
                                   my_listing, other_listing, self.name, other.name, source="metadata"))
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences
diff --git a/diffoscope/comparators/utils.py b/diffoscope/comparators/utils.py
index 6edb0cd..dd40310 100644
--- a/diffoscope/comparators/utils.py
+++ b/diffoscope/comparators/utils.py
@@ -143,6 +143,9 @@ def get_compressed_content_name(path, expected_extension):
     return name
 
 
+NO_COMMENT = None
+
+
 class Container(object):
     __metaclass__ = ABCMeta
 
@@ -157,6 +160,10 @@ class Container(object):
     def open(self):
         raise NotImplemented
 
+    def get_members(self):
+        """Returns a directory. The key is what is used to match when comparing containers."""
+        return {name: self.get_member(name) for name in self.get_member_names()}
+
     @abstractmethod
     def get_member_names(self):
         raise NotImplemented
@@ -165,25 +172,23 @@ class Container(object):
     def get_member(self, member_name):
         raise NotImplemented
 
+    def comparisons(self, other):
+        my_members = self.get_members()
+        other_members = other.get_members()
+        for name in sorted(set(my_members.iterkeys()).intersection(set(other_members.iterkeys()))):
+            yield my_members.pop(name), other_members.pop(name), NO_COMMENT
+        for my_file, other_file, score in diffoscope.comparators.perform_fuzzy_matching(my_members.values(), other_members.values()):
+            comment = 'Files similar despite different names (difference score: %d)' % score
+            yield my_file, other_file, comment
+
     def compare(self, other, source=None):
         differences = []
-        my_names = set(self.get_member_names())
-        other_names = set(other.get_member_names())
-        for name in sorted(my_names.intersection(other_names)):
-            logger.debug('compare member %s', name)
-            my_file = self.get_member(name)
-            other_file = other.get_member(name)
-            differences.append(
-                diffoscope.comparators.compare_files(
-                    my_file, other_file, source=name))
-        my_extra_files = map(self.get_member, my_names.difference(other_names))
-        other_extra_files = map(other.get_member, other_names.difference(my_names))
-        for my_file, other_file, score in diffoscope.comparators.perform_fuzzy_matching(my_extra_files, other_extra_files):
+        for my_file, other_file, comment in self.comparisons(other):
             difference = diffoscope.comparators.compare_files(my_file, other_file)
-            if difference is None:
-                difference = Difference(None, my_file.name, other_file.name)
-            difference.add_comment(
-                'Files similar despite different names (difference score: %d)' % score)
+            if comment:
+                if difference is None:
+                    difference = Difference(None, my_file.name, other_file.name)
+                difference.add_comment(comment)
             differences.append(difference)
         return differences
 
diff --git a/diffoscope/comparators/xz.py b/diffoscope/comparators/xz.py
index f14a2f7..4848e65 100644
--- a/diffoscope/comparators/xz.py
+++ b/diffoscope/comparators/xz.py
@@ -22,7 +22,7 @@ import re
 import subprocess
 import diffoscope.comparators
 from diffoscope.comparators.binary import File, needs_content
-from diffoscope.comparators.utils import Archive, get_compressed_content_name
+from diffoscope.comparators.utils import Archive, get_compressed_content_name, NO_COMMENT
 from diffoscope import logger, tool_required
 
 
@@ -38,6 +38,9 @@ class XzContainer(Archive):
     def close_archive(self):
         self._path = None
 
+    def get_members(self):
+        return {'xz-content': self.get_member(self.get_member_names()[0])}
+
     def get_member_names(self):
         return [get_compressed_content_name(self.path, '.xz')]
 
@@ -51,14 +54,6 @@ class XzContainer(Archive):
                 shell=False, stdout=fp, stderr=None)
         return dest_path
 
-    def compare(self, other, source=None):
-        my_file = self.get_member(self.get_member_names()[0])
-        other_file = other.get_member(other.get_member_names()[0])
-        source = None
-        if my_file.name == other_file.name:
-            source = my_file.name
-        return [diffoscope.comparators.compare_files(my_file, other_file, source)]
-
 
 class XzFile(File):
     RE_FILE_TYPE = re.compile(r'^XZ compressed data$')
@@ -71,4 +66,4 @@ class XzFile(File):
     def compare_details(self, other, source=None):
         with XzContainer(self).open() as my_container, \
              XzContainer(other).open() as other_container:
-            return my_container.compare(other_container, source)
+            return my_container.compare(other_container)
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index ceca4b3..644304e 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -101,5 +101,5 @@ class ZipFile(File):
         differences.append(zipinfo_difference)
         with ZipContainer(self).open() as my_container, \
              ZipContainer(other).open() as other_container:
-            differences.extend(my_container.compare(other_container, source))
+            differences.extend(my_container.compare(other_container))
         return differences

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git



More information about the Reproducible-commits mailing list