[python-debian/master] deb822: Don't interpret comment lines

John Wright jsw at debian.org
Sun Jul 3 08:55:18 UTC 2011


Lines beginning with '#' are ignored, and blocks of comments don't cause
empty paragraphs to be generated.

Closes: #632306
---
 debian/changelog     |    6 +++-
 lib/debian/deb822.py |   30 ++++++++++++++++++++---
 tests/test_deb822.py |   65 +++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index c62864a..f4cd69d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,8 +4,9 @@ python-debian (0.1.21) UNRELEASED; urgency=low
   * test_changelog.py: Close open files. (Closes: #625672)
 
   [ John Wright ]
-  * Allow ':' as the first character of a value. (Closes: #597249)
-  * Avoid dumping unparseable data. (Closes: #597120)
+  * deb822: Allow ':' as the first character of a value.
+    (Closes: #597249)
+  * deb822: Avoid dumping unparseable data. (Closes: #597120)
   * Clean up deb822.GpgInfo implementation:
     - Change several @staticmethod decorated methods to @classmethod,
       since they call the class constructor.
@@ -19,6 +20,7 @@ python-debian (0.1.21) UNRELEASED; urgency=low
       accept keyring arguments that are under the GnuPG home directory,
       regardless of the current directory. (Closes: #627063)
   * deb822.Deb822.gpg_info takes an optional keyrings argument.
+  * deb822: Don't interpret lines starting with '#'. (Closes: #632306)
 
  -- John Wright <jsw at debian.org>  Sat, 02 Jul 2011 23:20:27 -0700
 
diff --git a/lib/debian/deb822.py b/lib/debian/deb822.py
index c48dc9c..4c5b74e 100644
--- a/lib/debian/deb822.py
+++ b/lib/debian/deb822.py
@@ -59,7 +59,8 @@ class TagSectionWrapper(object, UserDict.DictMixin):
         self.__section = section
 
     def keys(self):
-        return self.__section.keys()
+        return [key for key in self.__section.keys()
+                if not key.startswith('#')]
 
     def __getitem__(self, key):
         s = self.__section.find_raw(key)
@@ -310,8 +311,11 @@ class Deb822(Deb822Dict):
         if _have_apt_pkg and use_apt_pkg and isinstance(sequence, file):
             parser = apt_pkg.TagFile(sequence)
             for section in parser:
-                yield cls(fields=fields, _parsed=TagSectionWrapper(section),
-                          encoding=encoding)
+                paragraph = cls(fields=fields,
+                                _parsed=TagSectionWrapper(section),
+                                encoding=encoding)
+                if paragraph:
+                    yield paragraph
 
         else:
             iterable = iter(sequence)
@@ -324,6 +328,22 @@ class Deb822(Deb822Dict):
 
     ###
 
+    @staticmethod
+    def _skip_useless_lines(sequence):
+        """Yields only lines that do not begin with '#'.
+
+        Also skips any blank lines at the beginning of the input.
+        """
+        at_beginning = True
+        for line in sequence:
+            if line.startswith('#'):
+                continue
+            if at_beginning:
+                if not line.rstrip('\r\n'):
+                    continue
+                at_beginning = False
+            yield line
+
     def _internal_parser(self, sequence, fields=None):
         # The key is non-whitespace, non-colon characters before any colon.
         key_part = r"^(?P<key>[^: \t\n\r\f\v]+)\s*:\s*"
@@ -338,7 +358,9 @@ class Deb822(Deb822Dict):
 
         curkey = None
         content = ""
-        for line in self.gpg_stripped_paragraph(sequence):
+
+        for line in self.gpg_stripped_paragraph(
+                self._skip_useless_lines(sequence)):
             m = single.match(line)
             if m:
                 if curkey:
diff --git a/tests/test_deb822.py b/tests/test_deb822.py
index 7ef81e1..c3806bd 100755
--- a/tests/test_deb822.py
+++ b/tests/test_deb822.py
@@ -237,6 +237,47 @@ iD8DBQFIGWQO0UIZh3p4ZWERAug/AJ93DWD9o+1VMgPDjWn/dsmPSgTWGQCeOfZi
 -----END PGP SIGNATURE-----
 '''
 
+UNPARSED_PARAGRAPHS_WITH_COMMENTS = '''\
+# Leading comments should be ignored.
+
+Source: foo
+Section: bar
+# An inline comment in the middle of a paragraph should be ignored.
+Priority: optional
+Homepage: http://www.debian.org/
+
+# Comments in the middle shouldn't result in extra blank paragraphs either.
+
+# Ditto.
+
+# A comment at the top of a paragraph should be ignored.
+Package: foo
+Architecture: any
+Description: An awesome package
+  # This should still appear in the result.
+  Blah, blah, blah. # So should this.
+# A comment at the end of a paragraph should be ignored.
+
+# Trailing comments shouldn't cause extra blank paragraphs.
+'''
+
+PARSED_PARAGRAPHS_WITH_COMMENTS = [
+    deb822.Deb822Dict([
+        ('Source', 'foo'),
+        ('Section', 'bar'),
+        ('Priority', 'optional'),
+        ('Homepage', 'http://www.debian.org/'),
+    ]),
+    deb822.Deb822Dict([
+        ('Package', 'foo'),
+        ('Architecture', 'any'),
+        ('Description', 'An awesome package\n'
+            '  # This should still appear in the result.\n'
+            '  Blah, blah, blah. # So should this.'),
+    ]),
+]
+
+
 class TestDeb822Dict(unittest.TestCase):
     def make_dict(self):
         d = deb822.Deb822Dict()
@@ -397,10 +438,10 @@ class TestDeb822(unittest.TestCase):
                 self.assertWellParsed(d, PARSED_PACKAGE)
             self.assertEqual(count, 2)
 
-    def _test_iter_paragraphs(self, file, cls, **kwargs):
+    def _test_iter_paragraphs(self, filename, cls, **kwargs):
         """Ensure iter_paragraphs consistency"""
         
-        f = open(file)
+        f = open(filename)
         packages_content = f.read()
         f.close()
         # XXX: The way multivalued fields parsing works, we can't guarantee
@@ -410,10 +451,12 @@ class TestDeb822(unittest.TestCase):
 
         s = StringIO()
         l = []
-        for p in cls.iter_paragraphs(open(file), **kwargs):
+        f = open(filename)
+        for p in cls.iter_paragraphs(f, **kwargs):
             p.dump(s)
             s.write("\n")
             l.append(p)
+        f.close()
         self.assertEqual(s.getvalue(), packages_content)
         if kwargs["shared_storage"] is False:
             # If shared_storage is False, data should be consistent across
@@ -762,6 +805,22 @@ Description: python modules to work with Debian-related data formats
         d['Files'] = [{'md5sum': 'deadbeef', 'size': '9605', 'name': 'bad\n'}]
         self.assertRaises(ValueError, d.get_as_string, 'files')
 
+    def _test_iter_paragraphs_comments(self, paragraphs):
+        self.assertEqual(len(paragraphs), len(PARSED_PARAGRAPHS_WITH_COMMENTS))
+        for i in range(len(paragraphs)):
+            self.assertWellParsed(paragraphs[i],
+                                  PARSED_PARAGRAPHS_WITH_COMMENTS[i])
+
+    def test_iter_paragraphs_comments_use_apt_pkg(self):
+        paragraphs = list(deb822.Deb822.iter_paragraphs(
+            UNPARSED_PARAGRAPHS_WITH_COMMENTS.splitlines(), use_apt_pkg=True))
+        self._test_iter_paragraphs_comments(paragraphs)
+
+    def test_iter_paragraphs_comments_native(self):
+        paragraphs = list(deb822.Deb822.iter_paragraphs(
+            UNPARSED_PARAGRAPHS_WITH_COMMENTS.splitlines(), use_apt_pkg=False))
+        self._test_iter_paragraphs_comments(paragraphs)
+
 
 class TestPkgRelations(unittest.TestCase):
 
-- 
1.7.2.5




More information about the pkg-python-debian-commits mailing list