[Reproducible-commits] [debbindiff] 06/19: Add a new abstract class to pass command output to diff

Jérémy Bobbio lunar at moszumanska.debian.org
Tue Mar 31 14:59:28 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch pu/feed-diff
in repository debbindiff.

commit e1a055138684507fba507f9589ce36b5198f89c9
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Mon Mar 30 13:16:10 2015 +0200

    Add a new abstract class to pass command output to diff
    
    Convert msgunfmt as an example.
---
 debbindiff/comparators/gettext.py | 43 +++++++++++++++--------
 debbindiff/comparators/utils.py   | 72 +++++++++++++++++++++++++++++++++++++++
 debbindiff/difference.py          | 30 ++++++++++++++++
 3 files changed, 131 insertions(+), 14 deletions(-)

diff --git a/debbindiff/comparators/gettext.py b/debbindiff/comparators/gettext.py
index 54c8da0..35deb38 100644
--- a/debbindiff/comparators/gettext.py
+++ b/debbindiff/comparators/gettext.py
@@ -20,28 +20,43 @@
 import re
 import subprocess
 from debbindiff import tool_required
-from debbindiff.comparators.utils import binary_fallback
+from debbindiff.comparators.utils import binary_fallback, Command
 from debbindiff.difference import Difference
 from debbindiff import logger
 
 
- at tool_required('msgunfmt')
-def msgunfmt(path):
-    output = subprocess.check_output(['msgunfmt', path], shell=False)
-    found = re.search(r'^"Content-Type: [^;]+; charset=([^\\]+)\\n"$', output, re.MULTILINE)
-    if found:
-        encoding = found.group(1)
-    else:
-        logger.debug('unable to determine PO encoding, falling back to utf-8')
-        encoding = 'utf-8'
-    return output.decode(encoding)
+class Msgunfmt(Command):
+    CHARSET_RE = re.compile(r'^"Content-Type: [^;]+; charset=([^\\]+)\\n"$')
+
+    def __init__(self, *args, **kwargs):
+        super(Msgunfmt, self).__init__(*args, **kwargs)
+        self._header = ''
+        self._encoding = None
+
+    @tool_required('msgunfmt')
+    def cmdline(self):
+        return ['msgunfmt', self.path]
+
+    def filter(self, line):
+        if not self._encoding:
+            if line == '':
+                logger.debug("unable to determine PO encoding, let's hope it's utf-8")
+                return self._header
+            self._header += line
+            found = Msgunfmt.CHARSET_RE.match(line)
+            if found:
+                self._encoding = found.group(1)
+                return self._header.decode(self._encoding).encode('utf-8')
+            return ''
+        if self._encoding != 'utf-8':
+            return line.decode(self._encoding).encode('utf-8')
+        else:
+            return line
 
 
 @binary_fallback
 def compare_mo_files(path1, path2, source=None):
-    mo1 = msgunfmt(path1)
-    mo2 = msgunfmt(path2)
-    difference = Difference.from_unicode(mo1, mo2, path1, path2, source='msgunfmt')
+    difference = Difference.from_command(Msgunfmt, path1, path2)
     if not difference:
         return []
     return [difference]
diff --git a/debbindiff/comparators/utils.py b/debbindiff/comparators/utils.py
index d702710..9d34817 100644
--- a/debbindiff/comparators/utils.py
+++ b/debbindiff/comparators/utils.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+from abc import ABCMeta, abstractmethod
 from contextlib import contextmanager
 # The following would be shutil.which in Python 3.3
 import hashlib
@@ -25,6 +26,7 @@ import os
 import shutil
 import subprocess
 import tempfile
+from threading import Thread
 from debbindiff.comparators.binary import compare_binary_files
 from debbindiff.difference import Difference
 from debbindiff import logger, RequiredToolNotFound
@@ -92,3 +94,73 @@ def make_temp_directory():
 def get_ar_content(path):
     return subprocess.check_output(
         ['ar', 'tv', path], stderr=subprocess.STDOUT, shell=False).decode('utf-8')
+
+
+class Command(object):
+    __metaclass__ = ABCMeta
+
+    def __init__(self, path):
+        self._path = path
+        self._process = subprocess.Popen(self.cmdline(),
+                                         shell=False, close_fds=True,
+                                         stdin=subprocess.PIPE,
+                                         stdout=subprocess.PIPE,
+                                         stderr=subprocess.PIPE)
+        if hasattr(self, 'feed_stdin'):
+            self._stdin_feeder = Thread(target=self.feed_stdin, args=(self._process.stdin,))
+            self._stdin_feeder.daemon = True
+            self._stdin_feeder.start()
+        else:
+            self._stdin_feeder = None
+            self._process.stdin.close()
+        self._stderr = ''
+        self._stderr_line_count = 0
+        self._stderr_reader = Thread(target=self._read_stderr)
+        self._stderr_reader.daemon = True
+        self._stderr_reader.start()
+
+    @property
+    def path(self):
+        return self._path
+
+    @abstractmethod
+    def cmdline(self):
+        raise NotImplemented
+
+    # Define only if needed
+    #def feed_stdin(self, f)
+
+    def filter(self, line):
+        # Assume command output is utf-8 by default
+        return line
+
+    def poll(self):
+        return self._process.poll()
+
+    def terminate(self):
+        return self._process.terminate()
+
+    def wait(self):
+        if self._stdin_feeder:
+            self._stdin_feeder.join()
+        self._stderr_reader.join()
+        self._process.wait()
+
+    MAX_STDERR_LINES = 50
+
+    def _read_stderr(self):
+        for line in iter(self._process.stderr.readline, b''):
+            self._stderr_line_count += 1
+            if self._stderr_line_count <= Command.MAX_STDERR_LINES:
+                self._stderr += line
+        if self._stderr_line_count > Command.MAX_STDERR_LINES:
+            self._stderr += '[ %d lines ignored ]\n' % (self._stderr_line_count - Command.MAX_STDERR_LINES)
+        self._process.stderr.close()
+
+    @property
+    def stderr_content(self):
+        return self._stderr
+
+    @property
+    def stdout(self):
+        return self._process.stdout
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
index 9ee4cb5..dd1c3b0 100644
--- a/debbindiff/difference.py
+++ b/debbindiff/difference.py
@@ -238,6 +238,14 @@ def make_feeder_from_file(in_file, filter=lambda buf: buf.encode('utf-8')):
         return end_nl
     return feeder
 
+def make_feeder_from_command(command):
+    def feeder(out_file):
+        end_nl = make_feeder_from_file(command.stdout, command.filter)(out_file)
+        if command.poll() is None:
+            command.terminate()
+        command.wait()
+        return end_nl
+    return feeder
 
 def diff(feeder1, feeder2):
     end_nl_q1 = Queue()
@@ -291,6 +299,28 @@ class Difference(object):
                                       make_feeder_from_file(file2),
                                       *args, **kwargs)
 
+    @staticmethod
+    def from_command(cls, path1, path2, *args, **kwargs):
+        command1 = cls(path1)
+        command2 = cls(path2)
+        if 'source' not in kwargs:
+            kwargs['source'] = ' '.join(map(lambda x: '{}' if x == command1.path else x, command1.cmdline()))
+        difference = Difference.from_feeder(make_feeder_from_command(command1),
+                                            make_feeder_from_command(command2),
+                                            path1, path2, *args, **kwargs)
+        if not difference:
+            return None
+        if command1.stderr_content or command2.stderr_content:
+            if difference.comment:
+                difference.comment += '\n'
+            else:
+                difference.comment = ''
+            if command1.stderr_content:
+                difference.comment += 'stderr from `%s`:\n%s\n' % (' '.join(command1.cmdline()), command1.stderr_content)
+            if command2.stderr_content:
+                difference.comment += 'stderr from `%s`:\n%s\n' % (' '.join(command2.cmdline()), command2.stderr_content)
+        return difference
+
     @property
     def comment(self):
         return self._comment

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list