[Reproducible-commits] [debbindiff] 01/01: Work-in-progress: implement parallel processing

Jérémy Bobbio lunar at moszumanska.debian.org
Thu Jul 30 18:05:48 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch pu/parellel
in repository debbindiff.

commit 021cb0c28fc6ddab06ce0189335601213a19ef38
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Thu Jul 30 18:04:40 2015 +0000

    Work-in-progress: implement parallel processing
---
 debbindiff.py                      |  7 ++++++-
 debbindiff/__init__.py             |  1 +
 debbindiff/comparators/__init__.py | 11 +++++++++++
 debbindiff/comparators/binary.py   | 15 ++++++++++++---
 debbindiff/comparators/deb.py      |  4 +++-
 debbindiff/comparators/debian.py   |  2 +-
 debbindiff/comparators/tar.py      | 10 +++++++++-
 debbindiff/comparators/utils.py    | 27 +++++++++++++++++++--------
 8 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/debbindiff.py b/debbindiff.py
index ff13f1f..c1c7db3 100755
--- a/debbindiff.py
+++ b/debbindiff.py
@@ -27,7 +27,7 @@ import codecs
 import os
 import sys
 import traceback
-from debbindiff import logger, VERSION, set_locale
+from debbindiff import logger, VERSION, set_locale, set_jobs
 import debbindiff.comparators
 from debbindiff.presenters.html import output_html
 from debbindiff.presenters.text import output_text
@@ -43,6 +43,8 @@ def create_parser():
                         help='show external tools required and exit')
     parser.add_argument('--debug', dest='debug', action='store_true',
                         default=False, help='display debug messages')
+    parser.add_argument('--jobs', metavar='JOBS', dest='jobs', type=int,
+                        help='comparisons to run simultaneously (default to CPU count)')
     parser.add_argument('--html', metavar='output', dest='html_output',
                         help='write HTML report to given file (use - for stdout)')
     parser.add_argument('--text', metavar='output', dest='text_output',
@@ -84,12 +86,15 @@ class ListToolsAction(argparse.Action):
         sys.exit(0)
 
 
+
+
 def main():
     parser = create_parser()
     parsed_args = parser.parse_args(sys.argv[1:])
     if parsed_args.debug:
         logger.setLevel(logging.DEBUG)
     set_locale()
+    set_jobs(parsed_args.jobs)
     difference = debbindiff.comparators.compare_root_paths(
         parsed_args.file1, parsed_args.file2)
     if difference:
diff --git a/debbindiff/__init__.py b/debbindiff/__init__.py
index 2f911de..f0aefe2 100644
--- a/debbindiff/__init__.py
+++ b/debbindiff/__init__.py
@@ -32,6 +32,7 @@ logger.addHandler(ch)
 formatter = logging.Formatter('%(levelname)8s %(message)s')
 ch.setFormatter(formatter)
 
+jobs = 1
 
 class RequiredToolNotFound(Exception):
     PROVIDERS = { 'ar':         { 'debian': 'binutils-multiarch' }
diff --git a/debbindiff/comparators/__init__.py b/debbindiff/comparators/__init__.py
index cd489e8..7ffb252 100644
--- a/debbindiff/comparators/__init__.py
+++ b/debbindiff/comparators/__init__.py
@@ -18,11 +18,15 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+from itertools import dropwhile
 import magic
+import multiprocess
+import operator
 import os.path
 import re
 import sys
 from debbindiff import logger, tool_required
+import debbindiff
 from debbindiff.difference import Difference
 from debbindiff.comparators.binary import \
     File, FilesystemFile, compare_binary_files
@@ -76,6 +80,13 @@ def compare_files(file1, file2, source=None):
         return file1.compare(file2, source)
 
 
+def compare_many_files(comparisons):
+    if not hasattr(compare_many_files, 'pool'):
+        compare_many_files.pool = multiprocess.Pool(debbindiff.jobs)
+    logger.debug('compare_many_files %s', comparisons)
+    return compare_many_files.pool.map(compare_files, comparisons)
+
+
 # The order matters! They will be tried in turns.
 FILE_CLASSES = (
     Directory,
diff --git a/debbindiff/comparators/binary.py b/debbindiff/comparators/binary.py
index afa19a0..9fcd6c0 100644
--- a/debbindiff/comparators/binary.py
+++ b/debbindiff/comparators/binary.py
@@ -26,6 +26,7 @@ import os.path
 import re
 from stat import S_ISCHR, S_ISBLK
 import subprocess
+import threading
 import magic
 from debbindiff.difference import Difference
 from debbindiff import tool_required, RequiredToolNotFound, logger
@@ -82,17 +83,25 @@ class File(object):
         if not hasattr(self, '_mimedb'):
             self._mimedb = magic.open(magic.NONE)
             self._mimedb.load()
-        return self._mimedb.file(path)
+            self._mimedb_lock = threading.Lock()
+        self._mimedb_lock.acquire()
+        ret = self._mimedb.file(path)
+        self._mimedb_lock.release()
+        return ret
 
     @classmethod
     def guess_encoding(self, path):
         if not hasattr(self, '_mimedb_encoding'):
             self._mimedb_encoding = magic.open(magic.MAGIC_MIME_ENCODING)
             self._mimedb_encoding.load()
-        return self._mimedb_encoding.file(path)
+            self._mimedb_encoding_lock = threading.Lock()
+        self._mimedb_encoding_lock.acquire()
+        ret = self._mimedb_encoding.file(path)
+        self._mimedb_encoding_lock.release()
+        return ret
 
     def __repr__(self):
-        return '<%s %s %s>' % (self.__class__, self.name, self.path)
+        return '<%s %s %s %s>' % (self.__class__.__name__, id(self), self.name, self.path)
 
     # Path should only be used when accessing the file content (through get_content())
     @property
diff --git a/debbindiff/comparators/deb.py b/debbindiff/comparators/deb.py
index 4d975af..8bd6332 100644
--- a/debbindiff/comparators/deb.py
+++ b/debbindiff/comparators/deb.py
@@ -27,7 +27,7 @@ from debbindiff.difference import Difference
 import debbindiff.comparators
 from debbindiff.comparators.binary import File, needs_content
 from debbindiff.comparators.utils import \
-    Archive, ArchiveMember, get_ar_content
+    Archive, ArchiveMember, get_ar_content, synchronized
 
 AR_EXTRACTION_BUFFER_SIZE = 32768
 
@@ -39,9 +39,11 @@ class ArContainer(Archive):
         # ArFile don't have to be closed
         pass
 
+    @synchronized
     def get_member_names(self):
         return self.archive.getnames()
 
+    @synchronized
     def extract(self, member_name, dest_dir):
         logger.debug('ar extracting %s to %s', member_name, dest_dir)
         member = self.archive.getmember(member_name)
diff --git a/debbindiff/comparators/debian.py b/debbindiff/comparators/debian.py
index e15b47f..3f2a42a 100644
--- a/debbindiff/comparators/debian.py
+++ b/debbindiff/comparators/debian.py
@@ -39,7 +39,7 @@ DOT_CHANGES_FIELDS = [
 
 class DotChangesMember(File):
     def __init__(self, container, member_name):
-        self._container = container
+        #self._container = container
         self._name = member_name
         self._path = None
 
diff --git a/debbindiff/comparators/tar.py b/debbindiff/comparators/tar.py
index d7e0440..e1303b2 100644
--- a/debbindiff/comparators/tar.py
+++ b/debbindiff/comparators/tar.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+from contextlib import contextmanager
 import os.path
 import re
 import stat
@@ -30,7 +31,7 @@ from debbindiff.comparators.binary import File, needs_content
 from debbindiff.comparators.device import Device
 from debbindiff.comparators.directory import Directory
 from debbindiff.comparators.symlink import Symlink
-from debbindiff.comparators.utils import Archive, ArchiveMember
+from debbindiff.comparators.utils import Archive, ArchiveMember, synchronized
 
 class TarMember(ArchiveMember):
     def is_directory(self):
@@ -47,6 +48,10 @@ class TarDirectory(Directory, TarMember):
     def __init__(self, archive, member_name):
         ArchiveMember.__init__(self, archive, member_name)
 
+    @contextmanager
+    def get_content(self):
+        yield
+
     def compare(self, other, source=None):
         return None
 
@@ -96,14 +101,17 @@ class TarContainer(Archive):
     def close_archive(self):
         self.archive.close()
 
+    @synchronized
     def get_member_names(self):
         return self.archive.getnames()
 
+    @synchronized
     def extract(self, member_name, dest_dir):
         logger.debug('tar extracting %s to %s', member_name, dest_dir)
         self.archive.extract(member_name, dest_dir)
         return os.path.join(dest_dir, member_name).decode('utf-8')
 
+    @synchronized
     def get_member(self, member_name):
         tarinfo = self.archive.getmember(member_name)
         if tarinfo.isdir():
diff --git a/debbindiff/comparators/utils.py b/debbindiff/comparators/utils.py
index 7734a40..a1ae959 100644
--- a/debbindiff/comparators/utils.py
+++ b/debbindiff/comparators/utils.py
@@ -19,6 +19,7 @@
 
 from abc import ABCMeta, abstractmethod
 from contextlib import contextmanager
+from functools import wraps
 # The following would be shutil.which in Python 3.3
 import hashlib
 import re
@@ -28,7 +29,7 @@ from stat import S_ISCHR, S_ISBLK
 from StringIO import StringIO
 import subprocess
 import tempfile
-from threading import Thread
+from threading import Thread, RLock
 import debbindiff.comparators
 from debbindiff.comparators.binary import \
     File, compare_binary_files
@@ -173,13 +174,9 @@ class Container(object):
         differences = []
         my_names = self.get_member_names()
         other_names = other.get_member_names()
-        for name in sorted(set(my_names).intersection(other_names)):
-            logger.debug('compare member %s', name)
-            my_file = self.get_member(name)
-            other_file = other.get_member(name)
-            differences.append(
-                debbindiff.comparators.compare_files(
-                    my_file, other_file, source=name))
+        differences.extend(debbindiff.comparators.compare_many_files(
+            [(self.get_member(name), other.get_member(name), name)
+             for name in sorted(set(my_names).intersection(other_names))]))
         return differences
 
 
@@ -261,3 +258,17 @@ class Archive(Container):
 
     def get_member(self, member_name):
         return ArchiveMember(self, member_name)
+
+
+# decorator
+def synchronized(original_method):
+    @wraps(original_method)
+    def wrapper(self, *args, **kwargs):
+        if not hasattr(self, '__rlock'):
+            self.__rlock = RLock()
+        try:
+            self.__rlock.acquire()
+            return original_method(self, *args, **kwargs)
+        finally:
+            self.__rlock.release()
+    return wrapper

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list