[Reproducible-commits] [debbindiff] 06/06: Attempt to sort out multiple decoding and encoding issues

Jérémy Bobbio lunar at moszumanska.debian.org
Thu Mar 19 20:56:48 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository debbindiff.

commit bdb4eeb3683ced9abd2f8cb2eb8828ac30363e2d
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Thu Mar 19 21:55:30 2015 +0100

    Attempt to sort out multiple decoding and encoding issues
---
 debbindiff/comparators/binary.py    |  2 +-
 debbindiff/comparators/cpio.py      | 11 ++++++-----
 debbindiff/comparators/directory.py |  9 +++++----
 debbindiff/comparators/elf.py       |  6 +++---
 debbindiff/comparators/fonts.py     |  3 ++-
 debbindiff/comparators/gettext.py   |  3 ++-
 debbindiff/comparators/gzip.py      |  3 ++-
 debbindiff/comparators/haskell.py   |  2 +-
 debbindiff/comparators/pdf.py       |  5 +++--
 debbindiff/comparators/png.py       |  3 ++-
 debbindiff/comparators/squashfs.py  |  5 +++--
 debbindiff/comparators/utils.py     |  8 ++++----
 debbindiff/comparators/zip.py       |  3 ++-
 debbindiff/difference.py            |  4 ++++
 debbindiff/presenters/text.py       | 16 +++++++++-------
 15 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/debbindiff/comparators/binary.py b/debbindiff/comparators/binary.py
index 918283c..c6002ed 100644
--- a/debbindiff/comparators/binary.py
+++ b/debbindiff/comparators/binary.py
@@ -22,7 +22,7 @@ import subprocess
 
 
 def get_hexdump(path):
-    return subprocess.check_output(['xxd', path], shell=False)
+    return subprocess.check_output(['xxd', path], shell=False).decode('ascii')
 
 
 def compare_binary_files(path1, path2, source=None):
diff --git a/debbindiff/comparators/cpio.py b/debbindiff/comparators/cpio.py
index 5809067..754d723 100644
--- a/debbindiff/comparators/cpio.py
+++ b/debbindiff/comparators/cpio.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+import locale
 import subprocess
 import os.path
 import debbindiff.comparators
@@ -30,14 +31,14 @@ def get_cpio_content(path, verbose=False):
     cmd = ['cpio', '--quiet', '-tF', path]
     if verbose:
         cmd = ['cpio', '-tvF', path]
-    return subprocess.check_output(cmd, stderr=subprocess.PIPE, shell=False)
+    return subprocess.check_output(cmd, stderr=subprocess.PIPE, shell=False).decode(locale.getpreferredencoding())
 
 
 @tool_required('cpio')
 def extract_cpio_archive(path, destdir):
     cmd = ['cpio', '--no-absolute-filenames', '--quiet', '-idF',
-            os.path.abspath(path)]
-    logger.debug("extracting %s into %s", path, destdir)
+            os.path.abspath(path.encode('utf-8'))]
+    logger.debug("extracting %s into %s", path.encode('utf-8'), destdir)
     p = subprocess.Popen(cmd, shell=False, cwd=destdir)
     p.communicate()
     p.wait()
@@ -64,8 +65,8 @@ def compare_cpio_files(path1, path2, source=None):
         with make_temp_directory() as temp_dir2:
             extract_cpio_archive(path1, temp_dir1)
             extract_cpio_archive(path2, temp_dir2)
-            files1 = [ f for f in content1.split('\n') ]
-            files2 = [ f for f in content2.split('\n') ]
+            files1 = content1.splitlines(1)
+            files2 = content2.splitlines(1)
             for member in sorted(set(files1).intersection(set(files2))):
                 in_path1 = os.path.join(temp_dir1, member)
                 in_path2 = os.path.join(temp_dir2, member)
diff --git a/debbindiff/comparators/directory.py b/debbindiff/comparators/directory.py
index fd7fe54..976dae3 100644
--- a/debbindiff/comparators/directory.py
+++ b/debbindiff/comparators/directory.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+import locale
 import os.path
 import re
 import subprocess
@@ -27,12 +28,12 @@ import debbindiff.comparators
 
 
 def ls(path):
-    return subprocess.check_output(['ls', path], shell=False).decode('utf-8')
+    return subprocess.check_output(['ls', path], shell=False).decode(locale.getpreferredencoding())
 
 
 @tool_required('stat')
 def stat(path):
-    output = subprocess.check_output(['stat', path], shell=False).decode('utf-8')
+    output = subprocess.check_output(['stat', path], shell=False).decode(locale.getpreferredencoding())
     output = re.sub(r'^\s*File:.*$', '', output, flags=re.MULTILINE)
     output = re.sub(r'Inode: [0-9]+', '', output)
     return output
@@ -41,7 +42,7 @@ def stat(path):
 @tool_required('lsattr')
 def lsattr(path):
     try:
-        output = subprocess.check_output(['lsattr', '-d', path], shell=False, stderr=subprocess.STDOUT).decode('utf-8')
+        output = subprocess.check_output(['lsattr', '-d', path], shell=False, stderr=subprocess.STDOUT).decode(locale.getpreferredencoding())
         return output.split()[0]
     except subprocess.CalledProcessError as e:
         if e.returncode == 1:
@@ -51,7 +52,7 @@ def lsattr(path):
 
 @tool_required('getfacl')
 def getfacl(path):
-    return subprocess.check_output(['getfacl', '-p', '-c', path], shell=False).decode('utf-8')
+    return subprocess.check_output(['getfacl', '-p', '-c', path], shell=False).decode(locale.getpreferredencoding())
 
 
 def compare_meta(path1, path2):
diff --git a/debbindiff/comparators/elf.py b/debbindiff/comparators/elf.py
index 8a42f99..8bf447b 100644
--- a/debbindiff/comparators/elf.py
+++ b/debbindiff/comparators/elf.py
@@ -28,7 +28,7 @@ from debbindiff.difference import Difference
 def readelf_all(path):
     output = subprocess.check_output(
         ['readelf', '--all', path],
-        shell=False)
+        shell=False).decode('ascii')
     # the full path can appear in the output, we need to remove it
     return re.sub(re.escape(path), os.path.basename(path), output)
 
@@ -37,7 +37,7 @@ def readelf_all(path):
 def readelf_debug_dump(path):
     output = subprocess.check_output(
         ['readelf', '--debug-dump', path],
-        shell=False)
+        shell=False).decode('ascii')
     # the full path can appear in the output, we need to remove it
     return re.sub(re.escape(path), os.path.basename(path), output)
 
@@ -48,7 +48,7 @@ def objdump_disassemble(path):
         ['objdump', '--disassemble', '--full-contents', path],
         shell=False)
     # the full path appears in the output, we need to remove it
-    return re.sub(re.escape(path), os.path.basename(path), output)
+    return re.sub(re.escape(path), os.path.basename(path), output).decode('ascii')
 
 
 # this one is not wrapped with binary_fallback and is used
diff --git a/debbindiff/comparators/fonts.py b/debbindiff/comparators/fonts.py
index eac2d2b..875f399 100644
--- a/debbindiff/comparators/fonts.py
+++ b/debbindiff/comparators/fonts.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+import locale
 import subprocess
 from debbindiff.comparators.utils import binary_fallback, tool_required
 from debbindiff.difference import Difference
@@ -24,7 +25,7 @@ from debbindiff.difference import Difference
 
 @tool_required('showttf')
 def show_ttf(path):
-    return subprocess.check_output(['showttf', path], shell=False)
+    return subprocess.check_output(['showttf', path], shell=False).decode(locale.getpreferredencoding())
 
 
 @binary_fallback
diff --git a/debbindiff/comparators/gettext.py b/debbindiff/comparators/gettext.py
index b152555..0b477d1 100644
--- a/debbindiff/comparators/gettext.py
+++ b/debbindiff/comparators/gettext.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+import locale
 import subprocess
 from debbindiff.comparators.utils import binary_fallback, tool_required
 from debbindiff.difference import Difference
@@ -24,7 +25,7 @@ from debbindiff.difference import Difference
 
 @tool_required('msgunfmt')
 def msgunfmt(path):
-    return subprocess.check_output(['msgunfmt', path], shell=False)
+    return subprocess.check_output(['msgunfmt', path], shell=False).decode(locale.getpreferredencoding())
 
 
 @binary_fallback
diff --git a/debbindiff/comparators/gzip.py b/debbindiff/comparators/gzip.py
index c0dbf4a..e59ebf4 100644
--- a/debbindiff/comparators/gzip.py
+++ b/debbindiff/comparators/gzip.py
@@ -18,6 +18,7 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 from contextlib import contextmanager
+import locale
 import subprocess
 import os.path
 import debbindiff.comparators
@@ -42,7 +43,7 @@ def decompress_gzip(path):
 
 @tool_required('file')
 def get_gzip_metadata(path):
-    return subprocess.check_output(['file', '--brief', path])
+    return subprocess.check_output(['file', '--brief', path]).decode(locale.getpreferredencoding())
 
 
 @binary_fallback
diff --git a/debbindiff/comparators/haskell.py b/debbindiff/comparators/haskell.py
index a0db51d..c1793e5 100644
--- a/debbindiff/comparators/haskell.py
+++ b/debbindiff/comparators/haskell.py
@@ -24,7 +24,7 @@ from debbindiff.difference import Difference
 
 @tool_required('ghc')
 def show_iface(path):
-    return subprocess.check_output(['ghc', '--show-iface', path], shell=False)
+    return subprocess.check_output(['ghc', '--show-iface', path], shell=False).decode('ascii')
 
 
 @binary_fallback
diff --git a/debbindiff/comparators/pdf.py b/debbindiff/comparators/pdf.py
index 5e18fb0..195fd82 100644
--- a/debbindiff/comparators/pdf.py
+++ b/debbindiff/comparators/pdf.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+import locale
 import subprocess
 from debbindiff.comparators.utils import binary_fallback, tool_required
 from debbindiff.difference import Difference, get_source
@@ -27,14 +28,14 @@ def uncompress(path):
     output = subprocess.check_output(
         ['pdftk', path, 'output', '-', 'uncompress'],
         shell=False, close_fds=True)
-    return output.decode('latin-1').encode('ascii', 'backslashreplace')
+    return output.decode('latin-1')
 
 
 @tool_required('pdftotext')
 def pdftotext(path):
     return subprocess.check_output(
         ['pdftotext', path, '-'],
-        shell=False, close_fds=True).decode('utf-8')
+        shell=False, close_fds=True).decode(locale.getpreferredencoding())
 
 
 @binary_fallback
diff --git a/debbindiff/comparators/png.py b/debbindiff/comparators/png.py
index bfeb39b..214fa7b 100644
--- a/debbindiff/comparators/png.py
+++ b/debbindiff/comparators/png.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+import locale
 import subprocess
 from debbindiff.comparators.utils import binary_fallback, tool_required
 from debbindiff.difference import Difference
@@ -31,7 +32,7 @@ def sng(path):
         p.wait()
         if p.returncode != 0:
             return 'sng exited with error %d\n%s' % (p.returncode, err)
-        return out
+        return out.decode(locale.getpreferredencoding())
 
 @binary_fallback
 def compare_png_files(path1, path2, source=None):
diff --git a/debbindiff/comparators/squashfs.py b/debbindiff/comparators/squashfs.py
index 7d457ea..3896125 100644
--- a/debbindiff/comparators/squashfs.py
+++ b/debbindiff/comparators/squashfs.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+import locale
 import subprocess
 import os.path
 import debbindiff.comparators
@@ -32,10 +33,10 @@ def get_squashfs_content(path, verbose=True):
     if verbose:
         # first get superblock information
         cmd = ['unsquashfs', '-s', path]
-        content = subprocess.check_output(cmd, shell=False)
+        content = subprocess.check_output(cmd, shell=False).decode(locale.getpreferredencoding())
         # and then the verbose file listing
         cmd = ['unsquashfs', '-d', '', '-lls', path]
-    return content + subprocess.check_output(cmd, shell=False)
+    return content + subprocess.check_output(cmd, shell=False).decode(locale.getpreferredencoding())
 
 
 @tool_required('unsquashfs')
diff --git a/debbindiff/comparators/utils.py b/debbindiff/comparators/utils.py
index 8b2f63c..a036d47 100644
--- a/debbindiff/comparators/utils.py
+++ b/debbindiff/comparators/utils.py
@@ -128,10 +128,10 @@ def tool_required(command):
     tool_required.all.add(command)
     def wrapper(original_function):
         if find_executable(command):
-            def tool_check(*args):
-                return original_function(*args)
+            def tool_check(*args, **kwargs):
+                return original_function(*args, **kwargs)
         else:
-            def tool_check(*args):
+            def tool_check(*args, **kwargs):
                 raise RequiredToolNotFound(command)
         return tool_check
     return wrapper
@@ -146,4 +146,4 @@ def make_temp_directory():
 
 def get_ar_content(path):
     return subprocess.check_output(
-        ['ar', 'tv', path], stderr=subprocess.STDOUT, shell=False)
+        ['ar', 'tv', path], stderr=subprocess.STDOUT, shell=False).decode('utf-8')
diff --git a/debbindiff/comparators/zip.py b/debbindiff/comparators/zip.py
index d96008b..c892da0 100644
--- a/debbindiff/comparators/zip.py
+++ b/debbindiff/comparators/zip.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
+import locale
 import os.path
 import re
 import subprocess
@@ -33,7 +34,7 @@ def get_zipinfo(path, verbose=False):
         cmd = ['zipinfo', '-v', path]
     else:
         cmd = ['zipinfo', path]
-    output = subprocess.check_output(cmd, shell=False)
+    output = subprocess.check_output(cmd, shell=False).decode(locale.getpreferredencoding())
     # the full path appears in the output, we need to remove it
     return re.sub(re.escape(path), os.path.basename(path), output)
 
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
index 8322b0e..6293a7d 100644
--- a/debbindiff/difference.py
+++ b/debbindiff/difference.py
@@ -23,6 +23,10 @@ import os.path
 class Difference(object):
     def __init__(self, lines1, lines2, path1, path2, source=None,
                  comment=None):
+        if lines1 and type(lines1[0]) is not unicode:
+            raise UnicodeError('lines1 has not been decoded')
+        if lines2 and type(lines2[0]) is not unicode:
+            raise UnicodeError('lines2 has not been decoded')
         # allow to override declared file paths, useful when comparing
         # tempfiles
         if source:
diff --git a/debbindiff/presenters/text.py b/debbindiff/presenters/text.py
index 4095735..4811490 100644
--- a/debbindiff/presenters/text.py
+++ b/debbindiff/presenters/text.py
@@ -18,14 +18,16 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
+
 import sys
 import difflib
+import locale
 
 
 def print_difference(difference, print_func):
     if difference.comment:
         for line in difference.comment.split('\n'):
-            print_func("│┄ %s" % line)
+            print_func(u"│┄ %s" % line)
     if difference.lines1 or difference.lines2:
         g = difflib.unified_diff(difference.lines1, difference.lines2)
         # First skip lines with filename
@@ -34,22 +36,22 @@ def print_difference(difference, print_func):
         for line in g:
             if line.startswith('--- ') or line.startswith('+++ '):
                 continue
-            print_func("│ %s" % line.encode('utf-8'), end='')
+            print_func("│ %s" % line.encode(locale.getpreferredencoding()), end='')
 
 def print_details(difference, print_func):
     if not difference.details:
         return
     for detail in difference.details:
         if detail.source1 == detail.source2:
-            print_func("├── %s" % detail.source1)
+            print_func(u"├── %s" % detail.source1)
         else:
-            print_func("│   --- %s" % (detail.source1))
-            print_func("├── +++ %s" % (detail.source2))
+            print_func(u"│   --- %s" % (detail.source1))
+            print_func(u"├── +++ %s" % (detail.source2))
         print_difference(detail, print_func)
         def new_print_func(*args, **kwargs):
-            print_func('│  ', *args, **kwargs)
+            print_func(u'│  ', *args, **kwargs)
         print_details(detail, new_print_func)
-    print_func('╵')
+    print_func(u'╵')
 
 def output_text(differences, print_func):
     for difference in differences:

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list