[diffoscope] 06/10: parallel: comparison_pool: reworks map function
Juliana Oliveira
jwnx-guest at moszumanska.debian.org
Sun Jan 14 20:04:35 UTC 2018
This is an automated email from the git hooks/post-receive script.
jwnx-guest pushed a commit to branch jwnx_parallel_diffoscope
in repository diffoscope.
commit c28f09815b55a766418f0b335834189c54de6cec
Author: Juliana Oliveira <juliana.orod at gmail.com>
Date: Sun Jan 14 16:51:55 2018 -0200
parallel: comparison_pool: reworks map function
ComparisonPool.map function aims to simulate multiprocessing.map
function while better handling serialization and asyncronously
processing. Async processing is prefered in this case because it unlocks
file descriptors on-the-go, avoiding recursion errors.
Each process is spawned by a apply_async function and added to a
callback dictionary, where it can be orderly retrieved later. If a function
fails to execute, a CommandFailedToExecute exception is raised.
Signed-off-by: Juliana Oliveira <juliana.orod at gmail.com>
---
diffoscope/parallel/comparison_pool.py | 45 +++++++++++++++++++++++++---------
1 file changed, 33 insertions(+), 12 deletions(-)
diff --git a/diffoscope/parallel/comparison_pool.py b/diffoscope/parallel/comparison_pool.py
index 43c7594..7d72a0a 100644
--- a/diffoscope/parallel/comparison_pool.py
+++ b/diffoscope/parallel/comparison_pool.py
@@ -1,36 +1,57 @@
import logging
import dill
-from pathos.multiprocessing import ProcessingPool as Pool
+from multiprocess import Pool
from diffoscope.config import Config
-from diffoscope.parallel.thread_manager import ThreadManager
+from functools import partial
+from pickle import PickleError
logger = logging.getLogger(__name__)
+class CommandFailedToExecute(Exception):
+ def __init__(self, err):
+ self.err = err
+ def __str__(self):
+ return repr(self.err)
+
+
class ComparisonPool(object):
def __init__(self):
self._pool_size = Config().jobs
- self._pool = Pool(nodes=self._pool_size)
+ self._pool = Pool(self._pool_size)
logger.debug("ComparisonPool initialized. Pool size: %d", self._pool_size)
- def map(self, fun, args=[]):
+ def map(self, fun, args=[], callback=None):
logger.debug("Invoking parallel map for function %s", fun)
- threads = ThreadManager()
+ pool = self._pool
+ jobs = []
+
+ def _callback(result, index):
+ callback[index] = result
- for arg in args:
- threads.new(fun, arg)
- threads.join()
+ for index, arg in enumerate(args):
+ logger.debug("Adding new process for %s (%d: %s)", fun, index, arg)
+ new_callback = partial(_callback, index=index)
+ jobs.append(pool.apply_async(fun, args=(arg,), callback= new_callback))
- while not threads.ready():
- time.sleep(0.5)
+ for job in jobs:
+ try:
+ job.get()
+ except PickleError as e:
+ raise CommandFailedToExecute(e)
- return threads.result()
+ if not job.successful():
+ raise CommandFailedToExecute(job._err_callback)
+ logger.debug("Closing Pools")
+ pool.close()
+ pool.join()
+ jobs = None
+ logger.debug("Ending ComparisonPool.map")
-pool = ComparisonPool()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the Reproducible-commits
mailing list