[python-debian/master] Avoid dumping unparseable data

John Wright jsw at debian.org
Sun Jul 3 09:50:04 UTC 2011


Add an input validation method that is called by the default __setitem__,
and add some validation at output time for multivalued fields (since
their input is a mutable list which makes it unsuitable for validation
at intput time).

Closes: #597120
---
 debian/changelog     |    1 +
 lib/debian/deb822.py |   45 ++++++++++++++++++++++++++++++++++++++++++---
 tests/test_deb822.py |   27 +++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index 3fcedff..cce54f5 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -5,6 +5,7 @@ python-debian (0.1.21) UNRELEASED; urgency=low
 
   [ John Wright ]
   * Allow ':' as the first character of a value. (Closes: #597249)
+  * Avoid dumping unparseable data. (Closes: #597120)
 
  -- John Wright <jsw at debian.org>  Sat, 02 Jul 2011 23:20:27 -0700
 
diff --git a/lib/debian/deb822.py b/lib/debian/deb822.py
index 84a4a74..47c921d 100644
--- a/lib/debian/deb822.py
+++ b/lib/debian/deb822.py
@@ -408,8 +408,9 @@ class Deb822(Deb822Dict):
             value = self.get_as_string(key)
             if not value or value[0] == '\n':
                 # Avoid trailing whitespace after "Field:" if it's on its own
-                # line or the value is empty
-                # XXX Uh, really print value if value == '\n'?
+                # line or the value is empty.  We don't have to worry about the
+                # case where value == '\n', since we ensure that is not the
+                # case in __setitem__.
                 entry = '%s:%s\n' % (key, value)
             else:
                 entry = '%s: %s\n' % (key, value)
@@ -590,6 +591,31 @@ class Deb822(Deb822Dict):
 
         return self.gpg_info
 
+    def validate_input(self, key, value):
+        """Raise ValueError if value is not a valid value for key
+
+        Subclasses that do interesting things for different keys may wish to
+        override this method.
+        """
+
+        # The value cannot end in a newline (if it did, dumping the object
+        # would result in multiple stanzas)
+        if value.endswith('\n'):
+            raise ValueError("value must not end in '\\n'")
+
+        # Make sure there are no blank lines (actually, the first one is
+        # allowed to be blank, but no others), and each subsequent line starts
+        # with whitespace
+        for line in value.splitlines()[1:]:
+            if not line:
+                raise ValueError("value must not have blank lines")
+            if not line[0].isspace():
+                raise ValueError("each line must start with whitespace")
+
+    def __setitem__(self, key, value):
+        self.validate_input(key, value)
+        Deb822Dict.__setitem__(self, key, value)
+
 ###
 
 # XXX check what happens if input contains more that one signature
@@ -894,6 +920,16 @@ class _multivalued(Deb822):
             for line in filter(None, contents.splitlines()):
                 updater_method(Deb822Dict(zip(fields, line.split())))
 
+    def validate_input(self, key, value):
+        if key.lower() in self._multivalued_fields:
+            # It's difficult to write a validator for multivalued fields, and
+            # basically futile, since we allow mutable lists.  In any case,
+            # with sanity checking in get_as_string, we shouldn't ever output
+            # unparseable data.
+            pass
+        else:
+            Deb822.validate_input(self, key, value)
+
     def get_as_string(self, key):
         keyl = key.lower()
         if keyl in self._multivalued_fields:
@@ -911,13 +947,16 @@ class _multivalued(Deb822):
                 field_lengths = {}
             for item in array:
                 for x in order:
-                    raw_value = str(item[x])
+                    raw_value = unicode(item[x])
                     try:
                         length = field_lengths[keyl][x]
                     except KeyError:
                         value = raw_value
                     else:
                         value = (length - len(raw_value)) * " " + raw_value
+                    if "\n" in value:
+                        raise ValueError("'\\n' not allowed in component of "
+                                         "multivalued field %s" % key)
                     fd.write(" %s" % value)
                 fd.write("\n")
             return fd.getvalue().rstrip("\n")
diff --git a/tests/test_deb822.py b/tests/test_deb822.py
index e29a12d..b7e252e 100755
--- a/tests/test_deb822.py
+++ b/tests/test_deb822.py
@@ -734,6 +734,33 @@ Description: python modules to work with Debian-related data formats
         parsed = {'Foo': ': bar'}
         self.assertWellParsed(deb822.Deb822(data), parsed)
 
+    @staticmethod
+    def _dictset(d, key, value):
+        d[key] = value
+
+    def test_field_value_ends_in_newline(self):
+        """Field values are not allowed to end with newlines"""
+
+        d = deb822.Deb822()
+        self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\n')
+        self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\nbaz\n')
+
+    def test_field_value_contains_blank_line(self):
+        """Field values are not allowed to contain blank lines"""
+
+        d = deb822.Deb822()
+        self.assertRaises(ValueError, self._dictset, d, 'foo', 'bar\n\nbaz')
+        self.assertRaises(ValueError, self._dictset, d, 'foo', '\n\nbaz')
+
+    def test_multivalued_field_contains_newline(self):
+        """Multivalued field components are not allowed to contain newlines"""
+
+        d = deb822.Dsc()
+        # We don't check at set time, since one could easily modify the list
+        # without deb822 knowing.  We instead check at get time.
+        d['Files'] = [{'md5sum': 'deadbeef', 'size': '9605', 'name': 'bad\n'}]
+        self.assertRaises(ValueError, d.get_as_string, 'files')
+
 
 class TestPkgRelations(unittest.TestCase):
 
-- 
1.7.2.5




More information about the pkg-python-debian-commits mailing list