[Reproducible-commits] [debbindiff] 06/19: Add a new abstract class to pass command output to diff
Jérémy Bobbio
lunar at moszumanska.debian.org
Tue Mar 31 14:59:28 UTC 2015
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch pu/feed-diff
in repository debbindiff.
commit e1a055138684507fba507f9589ce36b5198f89c9
Author: Jérémy Bobbio <lunar at debian.org>
Date: Mon Mar 30 13:16:10 2015 +0200
Add a new abstract class to pass command output to diff
Convert msgunfmt as an example.
---
debbindiff/comparators/gettext.py | 43 +++++++++++++++--------
debbindiff/comparators/utils.py | 72 +++++++++++++++++++++++++++++++++++++++
debbindiff/difference.py | 30 ++++++++++++++++
3 files changed, 131 insertions(+), 14 deletions(-)
diff --git a/debbindiff/comparators/gettext.py b/debbindiff/comparators/gettext.py
index 54c8da0..35deb38 100644
--- a/debbindiff/comparators/gettext.py
+++ b/debbindiff/comparators/gettext.py
@@ -20,28 +20,43 @@
import re
import subprocess
from debbindiff import tool_required
-from debbindiff.comparators.utils import binary_fallback
+from debbindiff.comparators.utils import binary_fallback, Command
from debbindiff.difference import Difference
from debbindiff import logger
- at tool_required('msgunfmt')
-def msgunfmt(path):
- output = subprocess.check_output(['msgunfmt', path], shell=False)
- found = re.search(r'^"Content-Type: [^;]+; charset=([^\\]+)\\n"$', output, re.MULTILINE)
- if found:
- encoding = found.group(1)
- else:
- logger.debug('unable to determine PO encoding, falling back to utf-8')
- encoding = 'utf-8'
- return output.decode(encoding)
+class Msgunfmt(Command):
+ CHARSET_RE = re.compile(r'^"Content-Type: [^;]+; charset=([^\\]+)\\n"$')
+
+ def __init__(self, *args, **kwargs):
+ super(Msgunfmt, self).__init__(*args, **kwargs)
+ self._header = ''
+ self._encoding = None
+
+ @tool_required('msgunfmt')
+ def cmdline(self):
+ return ['msgunfmt', self.path]
+
+ def filter(self, line):
+ if not self._encoding:
+ if line == '':
+ logger.debug("unable to determine PO encoding, let's hope it's utf-8")
+ return self._header
+ self._header += line
+ found = Msgunfmt.CHARSET_RE.match(line)
+ if found:
+ self._encoding = found.group(1)
+ return self._header.decode(self._encoding).encode('utf-8')
+ return ''
+ if self._encoding != 'utf-8':
+ return line.decode(self._encoding).encode('utf-8')
+ else:
+ return line
@binary_fallback
def compare_mo_files(path1, path2, source=None):
- mo1 = msgunfmt(path1)
- mo2 = msgunfmt(path2)
- difference = Difference.from_unicode(mo1, mo2, path1, path2, source='msgunfmt')
+ difference = Difference.from_command(Msgunfmt, path1, path2)
if not difference:
return []
return [difference]
diff --git a/debbindiff/comparators/utils.py b/debbindiff/comparators/utils.py
index d702710..9d34817 100644
--- a/debbindiff/comparators/utils.py
+++ b/debbindiff/comparators/utils.py
@@ -17,6 +17,7 @@
# You should have received a copy of the GNU General Public License
# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+from abc import ABCMeta, abstractmethod
from contextlib import contextmanager
# The following would be shutil.which in Python 3.3
import hashlib
@@ -25,6 +26,7 @@ import os
import shutil
import subprocess
import tempfile
+from threading import Thread
from debbindiff.comparators.binary import compare_binary_files
from debbindiff.difference import Difference
from debbindiff import logger, RequiredToolNotFound
@@ -92,3 +94,73 @@ def make_temp_directory():
def get_ar_content(path):
return subprocess.check_output(
['ar', 'tv', path], stderr=subprocess.STDOUT, shell=False).decode('utf-8')
+
+
+class Command(object):
+ __metaclass__ = ABCMeta
+
+ def __init__(self, path):
+ self._path = path
+ self._process = subprocess.Popen(self.cmdline(),
+ shell=False, close_fds=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if hasattr(self, 'feed_stdin'):
+ self._stdin_feeder = Thread(target=self.feed_stdin, args=(self._process.stdin,))
+ self._stdin_feeder.daemon = True
+ self._stdin_feeder.start()
+ else:
+ self._stdin_feeder = None
+ self._process.stdin.close()
+ self._stderr = ''
+ self._stderr_line_count = 0
+ self._stderr_reader = Thread(target=self._read_stderr)
+ self._stderr_reader.daemon = True
+ self._stderr_reader.start()
+
+ @property
+ def path(self):
+ return self._path
+
+ @abstractmethod
+ def cmdline(self):
+ raise NotImplemented
+
+ # Define only if needed
+ #def feed_stdin(self, f)
+
+ def filter(self, line):
+ # Assume command output is utf-8 by default
+ return line
+
+ def poll(self):
+ return self._process.poll()
+
+ def terminate(self):
+ return self._process.terminate()
+
+ def wait(self):
+ if self._stdin_feeder:
+ self._stdin_feeder.join()
+ self._stderr_reader.join()
+ self._process.wait()
+
+ MAX_STDERR_LINES = 50
+
+ def _read_stderr(self):
+ for line in iter(self._process.stderr.readline, b''):
+ self._stderr_line_count += 1
+ if self._stderr_line_count <= Command.MAX_STDERR_LINES:
+ self._stderr += line
+ if self._stderr_line_count > Command.MAX_STDERR_LINES:
+ self._stderr += '[ %d lines ignored ]\n' % (self._stderr_line_count - Command.MAX_STDERR_LINES)
+ self._process.stderr.close()
+
+ @property
+ def stderr_content(self):
+ return self._stderr
+
+ @property
+ def stdout(self):
+ return self._process.stdout
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
index 9ee4cb5..dd1c3b0 100644
--- a/debbindiff/difference.py
+++ b/debbindiff/difference.py
@@ -238,6 +238,14 @@ def make_feeder_from_file(in_file, filter=lambda buf: buf.encode('utf-8')):
return end_nl
return feeder
+def make_feeder_from_command(command):
+ def feeder(out_file):
+ end_nl = make_feeder_from_file(command.stdout, command.filter)(out_file)
+ if command.poll() is None:
+ command.terminate()
+ command.wait()
+ return end_nl
+ return feeder
def diff(feeder1, feeder2):
end_nl_q1 = Queue()
@@ -291,6 +299,28 @@ class Difference(object):
make_feeder_from_file(file2),
*args, **kwargs)
+ @staticmethod
+ def from_command(cls, path1, path2, *args, **kwargs):
+ command1 = cls(path1)
+ command2 = cls(path2)
+ if 'source' not in kwargs:
+ kwargs['source'] = ' '.join(map(lambda x: '{}' if x == command1.path else x, command1.cmdline()))
+ difference = Difference.from_feeder(make_feeder_from_command(command1),
+ make_feeder_from_command(command2),
+ path1, path2, *args, **kwargs)
+ if not difference:
+ return None
+ if command1.stderr_content or command2.stderr_content:
+ if difference.comment:
+ difference.comment += '\n'
+ else:
+ difference.comment = ''
+ if command1.stderr_content:
+ difference.comment += 'stderr from `%s`:\n%s\n' % (' '.join(command1.cmdline()), command1.stderr_content)
+ if command2.stderr_content:
+ difference.comment += 'stderr from `%s`:\n%s\n' % (' '.join(command2.cmdline()), command2.stderr_content)
+ return difference
+
@property
def comment(self):
return self._comment
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git
More information about the Reproducible-commits
mailing list