[python-debian/master] Robustness tweaks

John Wright jsw at debian.org
Wed Nov 5 00:40:07 UTC 2008


- Use a special OrderedSet class for keeping track of keys in Deb822Dict
  instead of relying on the internal __dict object (since some fields
  might be provided by __parsed)
- Make _gpg_multivalued.__init__ more robust when dealing with empty
  sequences
- Unit tests to exercise Packages as well as Sources parsing for
  iter_paragraphs
---
 debian_bundle/deb822.py |   54 ++++++++++++++++++++++++++++++++++++----
 tests/test_deb822.py    |   62 +++++++++++++++++++++++++++++-----------------
 2 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/debian_bundle/deb822.py b/debian_bundle/deb822.py
index 2ad9115..c744490 100644
--- a/debian_bundle/deb822.py
+++ b/debian_bundle/deb822.py
@@ -36,6 +36,43 @@ import sys
 import StringIO
 import UserDict
 
+class OrderedSet(set):
+    """A set object that preserves order when iterating over it
+
+    We use this to keep track of keys in Deb822Dict, because it's much faster
+    to look up if a key is in a set than in a list.
+    """
+
+    def __init__(self, iterable=[]):
+        set.__init__(self)
+        self.__order = []
+        for item in iterable:
+            self.add(item)
+
+    def add(self, item):
+        if item not in self:
+            # set.add will raise TypeError if something's unhashable, so we
+            # don't have to handle that ourselves
+            set.add(self, item)
+            self.__order.append(item)
+
+    def remove(self, item):
+        # set.remove will raise KeyError, so we don't need to handle that
+        # ourselves
+        set.remove(self, item)
+        self.__order.remove(item)
+
+    def __iter__(self):
+        return iter(self.__order)
+
+    ### list-like methods
+    append = add
+
+    def extend(self, iterable):
+        for item in iterable:
+            self.add(item)
+    ###
+
 class Deb822Dict(object, UserDict.DictMixin):
     # Subclassing UserDict.DictMixin because we're overriding so much dict
     # functionality that subclassing dict requires overriding many more than
@@ -58,7 +95,7 @@ class Deb822Dict(object, UserDict.DictMixin):
 
     def __init__(self, _dict=None, _parsed=None, _fields=None):
         self.__dict = {}
-        self.__keys = []
+        self.__keys = OrderedSet()
         self.__parsed = None
 
         if _dict is not None:
@@ -83,7 +120,6 @@ class Deb822Dict(object, UserDict.DictMixin):
                 self.__keys.extend([ _strI(k) for k in self.__parsed.keys() ])
             else:
                 self.__keys.extend([ _strI(f) for f in _fields if self.__parsed.has_key(f) ])
-
         
     ### BEGIN DictMixin methods
 
@@ -104,7 +140,8 @@ class Deb822Dict(object, UserDict.DictMixin):
                 raise
 
     def __contains__(self, key):
-        return key in self.__dict
+        key = _strI(key)
+        return key in self.__keys
     
     def __delitem__(self, key):
         key = _strI(key)
@@ -818,7 +855,8 @@ class _gpg_multivalued(_multivalued):
     gpg can verify the signature.  Use it just like you would use the
     _multivalued class.
 
-    This class only stores raw text if it detects a gpg signature (see
+    This class only stores raw text if it is given a raw string, or if it
+    detects a gpg signature when given a file or sequence of lines (see
     Deb822.split_gpg_and_payload for details).
     """
 
@@ -836,8 +874,12 @@ class _gpg_multivalued(_multivalued):
                 # the raw text.
                 pass
             else:
-                gpg_pre_lines, lines, gpg_post_lines = \
-                        self.split_gpg_and_payload(sequence)
+                try:
+                    gpg_pre_lines, lines, gpg_post_lines = \
+                            self.split_gpg_and_payload(sequence)
+                except EOFError:
+                    # Empty input
+                    gpg_pre_lines = lines = gpg_post_lines = []
                 if gpg_pre_lines and gpg_post_lines:
                     raw_text = StringIO.StringIO()
                     raw_text.write("\n".join(gpg_pre_lines))
diff --git a/tests/test_deb822.py b/tests/test_deb822.py
index c4ef1f6..8280338 100755
--- a/tests/test_deb822.py
+++ b/tests/test_deb822.py
@@ -385,36 +385,52 @@ class TestDeb822(unittest.TestCase):
                 self.assertWellParsed(d, PARSED_PACKAGE)
             self.assertEqual(count, 2)
 
-    def test_iter_paragraphs_shared_storage(self):
-        """Ensure consistency with the three possible iter_paragraph options"""
+    def _test_iter_paragraphs(self, file, cls, **kwargs):
+        """Ensure iter_paragraphs consistency"""
         
-        f = open("test_Packages")
+        f = open(file)
         packages_content = f.read()
         f.close()
-
-        combinations = [
-            {"use_apt_pkg": True, "shared_storage": True},
-            {"use_apt_pkg": True, "shared_storage": False},
-            {"use_apt_pkg": False, "shared_storage": False},
-        ]
-
-        for kwargs in combinations:
+        # XXX: The way multivalued fields parsing works, we can't guarantee
+        # that trailing whitespace is reproduced.
+        packages_content = "\n".join([line.rstrip() for line in
+                                      packages_content.splitlines()] + [''])
+
+        s = StringIO()
+        l = []
+        for p in cls.iter_paragraphs(open(file), **kwargs):
+            p.dump(s)
+            s.write("\n")
+            l.append(p)
+        self.assertEqual(s.getvalue(), packages_content)
+        if kwargs["shared_storage"] is False:
+            # If shared_storage is False, data should be consistent across
+            # iterations -- i.e. we can use "old" objects
             s = StringIO()
-            l = []
-            for p in deb822.Packages.iter_paragraphs(open("test_Packages"),
-                                                     **kwargs):
+            for p in l:
                 p.dump(s)
                 s.write("\n")
-                l.append(p)
             self.assertEqual(s.getvalue(), packages_content)
-            if kwargs["shared_storage"] is False:
-                # If shared_storage is False, data should be consistent across
-                # iterations -- i.e. we can use "old" objects
-                s = StringIO()
-                for p in l:
-                    p.dump(s)
-                    s.write("\n")
-                self.assertEqual(s.getvalue(), packages_content)
+
+    def test_iter_paragraphs_apt_shared_storage_packages(self):
+        self._test_iter_paragraphs("test_Packages", deb822.Packages,
+                                   use_apt_pkg=True, shared_storage=True)
+    def test_iter_paragraphs_apt_no_shared_storage_packages(self):
+        self._test_iter_paragraphs("test_Packages", deb822.Packages,
+                                   use_apt_pkg=True, shared_storage=False)
+    def test_iter_paragraphs_no_apt_no_shared_storage_packages(self):
+        self._test_iter_paragraphs("test_Packages", deb822.Packages,
+                                   use_apt_pkg=False, shared_storage=False)
+
+    def test_iter_paragraphs_apt_shared_storage_sources(self):
+        self._test_iter_paragraphs("test_Sources", deb822.Sources,
+                                   use_apt_pkg=True, shared_storage=True)
+    def test_iter_paragraphs_apt_no_shared_storage_sources(self):
+        self._test_iter_paragraphs("test_Sources", deb822.Sources,
+                                   use_apt_pkg=True, shared_storage=False)
+    def test_iter_paragraphs_no_apt_no_shared_storage_sources(self):
+        self._test_iter_paragraphs("test_Sources", deb822.Sources,
+                                   use_apt_pkg=False, shared_storage=False)
 
     def test_parser_empty_input(self):
         self.assertEqual({}, deb822.Deb822([]))
-- 
1.5.5.GIT





More information about the pkg-python-debian-commits mailing list