[python-debian/master 29/36] Fix up most of debian.deb822 for string handling changes in Python 3. There are still a couple of difficult cases left.

Mon Oct 8 07:41:25 UTC 2012

---
 lib/debian/deb822.py |   32 +++++++++++++++--------
 tests/test_deb822.py |   69 ++++++++++++++++++++++++++++---------------------
 2 files changed, 60 insertions(+), 41 deletions(-)

diff --git a/lib/debian/deb822.py b/lib/debian/deb822.py
index d57ae0d..ffcce12 100644
--- a/lib/debian/deb822.py
+++ b/lib/debian/deb822.py
@@ -54,6 +54,8 @@ except ImportError:
     _mapping_mixin = DictMixin
     _mutable_mapping_mixin = DictMixin
 
+import six
+
 if sys.version >= '3':
     import io
     def _is_real_file(f):
@@ -228,7 +230,7 @@ class Deb822Dict(_mutable_mapping_mixin, object):
             else:
                 raise
 
-        if isinstance(value, str):
+        if isinstance(value, bytes):
             # Always return unicode objects instead of strings
             try:
                 value = value.decode(self.encoding)
@@ -394,7 +396,7 @@ class Deb822(Deb822Dict):
 
         wanted_field = lambda f: fields is None or f in fields
 
-        if isinstance(sequence, basestring):
+        if isinstance(sequence, six.string_types):
             sequence = sequence.splitlines()
 
         curkey = None
@@ -442,6 +444,10 @@ class Deb822(Deb822Dict):
     def __unicode__(self):
         return self.dump()
 
+    if sys.version >= '3':
+        def __bytes__(self):
+            return self.dump().encode(self.encoding)
+
     # __repr__ is handled by Deb822Dict
 
     def get_as_string(self, key):
@@ -451,7 +457,7 @@ class Deb822(Deb822Dict):
         this can be overridden in subclasses (e.g. _multivalued) that can take
         special values.
         """
-        return unicode(self[key])
+        return six.text_type(self[key])
 
     def dump(self, fd=None, encoding=None):
         """Dump the the contents in the original format
@@ -722,9 +728,9 @@ class GpgInfo(dict):
 
         n = cls()
 
-        if isinstance(out, basestring):
+        if isinstance(out, six.string_types):
             out = out.split('\n')
-        if isinstance(err, basestring):
+        if isinstance(err, six.string_types):
             err = err.split('\n')
 
         n.out = out
@@ -777,13 +783,17 @@ class GpgInfo(dict):
             raise IOError("cannot access any of the given keyrings")
 
         p = subprocess.Popen(args, stdin=subprocess.PIPE,
-                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                             universal_newlines=True)
         # XXX what to do with exit code?
 
-        if isinstance(sequence, basestring):
-            (out, err) = p.communicate(sequence)
+        if isinstance(sequence, six.string_types):
+            inp = sequence
         else:
-            (out, err) = p.communicate(cls._get_full_string(sequence))
+            inp = cls._get_full_string(sequence)
+        if sys.version >= '3':
+            inp = inp.encode('UTF-8')
+        out, err = p.communicate(inp)
 
         return cls.from_output(out, err)
 
@@ -1049,7 +1059,7 @@ class _multivalued(Deb822):
                 field_lengths = {}
             for item in array:
                 for x in order:
-                    raw_value = unicode(item[x])
+                    raw_value = six.text_type(item[x])
                     try:
                         length = field_lengths[keyl][x]
                     except KeyError:
@@ -1085,7 +1095,7 @@ class _gpg_multivalued(_multivalued):
             sequence = kwargs.get("sequence", None)
 
         if sequence is not None:
-            if isinstance(sequence, basestring):
+            if isinstance(sequence, six.string_types):
                 self.raw_text = sequence
             elif hasattr(sequence, "items"):
                 # sequence is actually a dict(-like) object, so we don't have
diff --git a/tests/test_deb822.py b/tests/test_deb822.py
index 2716eec..fecd4b2 100755
--- a/tests/test_deb822.py
+++ b/tests/test_deb822.py
@@ -27,8 +27,9 @@ import unittest
 import warnings
 try:
     from StringIO import StringIO
+    BytesIO = StringIO
 except ImportError:
-    from io import StringIO
+    from io import BytesIO, StringIO
 
 import six
 
@@ -146,7 +147,7 @@ CcYAoOLYDF5G1h3oR1iDNyeCI6hRW03S
     ]
 
 
-CHANGES_FILE = u'''\
+CHANGES_FILE = six.u('''\
 Format: 1.7
 Date: Fri, 28 Dec 2007 17:08:48 +0100
 Source: bzr-gtk
@@ -171,7 +172,7 @@ Files:
  0fd797f4138a9d4fdeb8c30597d46bc9 1003 python optional bzr-gtk_0.93.0-2.dsc
  d9523676ae75c4ced299689456f252f4 3860 python optional bzr-gtk_0.93.0-2.diff.gz
  8960459940314b21019dedd5519b47a5 168544 python optional bzr-gtk_0.93.0-2_all.deb
-'''
+''')
 
 CHECKSUM_CHANGES_FILE = '''\
 Format: 1.8
@@ -276,6 +277,14 @@ PARSED_PARAGRAPHS_WITH_COMMENTS = [
 ]
 
 
+def open_utf8(filename, mode='r'):
+    """Open a UTF-8 text file in text mode."""
+    if sys.version < '3':
+        return open(filename, mode=mode)
+    else:
+        return open(filename, mode=mode, encoding='UTF-8')
+
+
 class TestDeb822Dict(unittest.TestCase):
     def make_dict(self):
         d = deb822.Deb822Dict()
@@ -330,7 +339,7 @@ class TestDeb822Dict(unittest.TestCase):
 
     def test_unicode_key_access(self):
         d = self.make_dict()
-        self.assertEqual(1, d[u'testkey'])
+        self.assertEqual(1, d[six.u('testkey')])
 
 
 class TestDeb822(unittest.TestCase):
@@ -439,30 +448,30 @@ class TestDeb822(unittest.TestCase):
     def _test_iter_paragraphs(self, filename, cls, **kwargs):
         """Ensure iter_paragraphs consistency"""
         
-        f = open(filename)
+        f = open(filename, 'rb')
         packages_content = f.read()
         f.close()
         # XXX: The way multivalued fields parsing works, we can't guarantee
         # that trailing whitespace is reproduced.
-        packages_content = "\n".join([line.rstrip() for line in
-                                      packages_content.splitlines()] + [''])
+        packages_content = b"\n".join([line.rstrip() for line in
+                                       packages_content.splitlines()] + [b''])
 
-        s = StringIO()
+        s = BytesIO()
         l = []
-        f = open(filename)
+        f = open_utf8(filename)
         for p in cls.iter_paragraphs(f, **kwargs):
             p.dump(s)
-            s.write("\n")
+            s.write(b"\n")
             l.append(p)
         f.close()
         self.assertEqual(s.getvalue(), packages_content)
         if kwargs["shared_storage"] is False:
             # If shared_storage is False, data should be consistent across
             # iterations -- i.e. we can use "old" objects
-            s = StringIO()
+            s = BytesIO()
             for p in l:
                 p.dump(s)
-                s.write("\n")
+                s.write(b"\n")
             self.assertEqual(s.getvalue(), packages_content)
 
     def test_iter_paragraphs_apt_shared_storage_packages(self):
@@ -694,18 +703,18 @@ Description: python modules to work with Debian-related data formats
         objects = []
         objects.append(deb822.Deb822(UNPARSED_PACKAGE))
         objects.append(deb822.Deb822(CHANGES_FILE))
-        with open('test_Packages') as f:
+        with open_utf8('test_Packages') as f:
             objects.extend(deb822.Deb822.iter_paragraphs(f))
-        with open('test_Packages') as f:
+        with open_utf8('test_Packages') as f:
             objects.extend(deb822.Packages.iter_paragraphs(f))
-        with open('test_Sources') as f:
+        with open_utf8('test_Sources') as f:
             objects.extend(deb822.Deb822.iter_paragraphs(f))
         with open('test_Sources.iso8859-1') as f:
             objects.extend(deb822.Deb822.iter_paragraphs(
                 f, encoding="iso8859-1"))
         for d in objects:
             for value in d.values():
-                self.assertTrue(isinstance(value, unicode))
+                self.assertTrue(isinstance(value, six.text_type))
 
         # The same should be true for Sources and Changes except for their
         # _multivalued fields
@@ -713,15 +722,15 @@ Description: python modules to work with Debian-related data formats
         multi.append(deb822.Changes(CHANGES_FILE))
         multi.append(deb822.Changes(SIGNED_CHECKSUM_CHANGES_FILE
                                     % CHECKSUM_CHANGES_FILE))
-        with open('test_Sources') as f:
+        with open_utf8('test_Sources') as f:
             multi.extend(deb822.Sources.iter_paragraphs(f))
         for d in multi:
             for key, value in d.items():
                 if key.lower() not in d.__class__._multivalued_fields:
-                    self.assertTrue(isinstance(value, unicode))
+                    self.assertTrue(isinstance(value, six.text_type))
 
     def test_encoding_integrity(self):
-        with open('test_Sources') as f:
+        with open_utf8('test_Sources') as f:
             utf8 = list(deb822.Deb822.iter_paragraphs(f))
         with open('test_Sources.iso8859-1') as f:
             latin1 = list(deb822.Deb822.iter_paragraphs(
@@ -734,20 +743,20 @@ Description: python modules to work with Debian-related data formats
 
         # XXX: The way multiline fields parsing works, we can't guarantee
         # that trailing whitespace is reproduced.
-        with open('test_Sources') as f:
-            utf8_contents = "\n".join([line.rstrip() for line in f] + [''])
-        with open('test_Sources.iso8859-1') as f:
-            latin1_contents = "\n".join([line.rstrip() for line in f] + [''])
+        with open('test_Sources', 'rb') as f:
+            utf8_contents = b"\n".join([line.rstrip() for line in f] + [b''])
+        with open('test_Sources.iso8859-1', 'rb') as f:
+            latin1_contents = b"\n".join([line.rstrip() for line in f] + [b''])
 
-        utf8_to_latin1 = StringIO()
+        utf8_to_latin1 = BytesIO()
         for d in utf8:
             d.dump(fd=utf8_to_latin1, encoding='iso8859-1')
-            utf8_to_latin1.write("\n")
+            utf8_to_latin1.write(b"\n")
 
-        latin1_to_utf8 = StringIO()
+        latin1_to_utf8 = BytesIO()
         for d in latin1:
             d.dump(fd=latin1_to_utf8, encoding='utf-8')
-            latin1_to_utf8.write("\n")
+            latin1_to_utf8.write(b"\n")
 
         self.assertEqual(utf8_contents, latin1_to_utf8.getvalue())
         self.assertEqual(latin1_contents, utf8_to_latin1.getvalue())
@@ -774,10 +783,10 @@ Description: python modules to work with Debian-related data formats
                                                           use_apt_pkg=False)]:
             p1 = next(paragraphs)
             self.assertEqual(p1['maintainer'],
-                             u'Adeodato SimÃ³ <dato at net.com.org.es>')
+                             six.u('Adeodato Sim\xf3 <dato at net.com.org.es>'))
             p2 = next(paragraphs)
             self.assertEqual(p2['uploaders'],
-                             u'Frank KÃ¼ster <frank at debian.org>')
+                             six.u('Frank K\xfcster <frank at debian.org>'))
         f2.close()
         f1.close()
 
@@ -913,7 +922,7 @@ class TestPkgRelations(unittest.TestCase):
                             src_rel)))
 
     def test_sources(self):
-        f = open('test_Sources')
+        f = open_utf8('test_Sources')
         pkgs = deb822.Sources.iter_paragraphs(f)
         pkg1 = next(pkgs)
         rel1 = {'build-conflicts': [],
-- 
1.7.2.5