[python-hdf5storage] 66/84: Fixed bug where structured numpy.ndarrays could be written with null characters and, depending on the case, / in their field names (not valid characters).

Mon Feb 29 08:25:05 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 0.1.10
in repository python-hdf5storage.

commit c9233ecaa5c40a73cf1100ff413562bfdf941d37
Author: Freja Nordsiek <fnordsie at gmail.com>
Date:   Sat Aug 22 13:45:23 2015 -0400

    Fixed bug where structured numpy.ndarrays could be written with null characters and, depending on the case, / in their field names (not valid characters).
---
 hdf5storage/Marshallers.py   | 26 +++++++++++++++++++++++---
 tests/make_randoms.py        | 42 ++++++++++++++++++++++--------------------
 tests/test_write_readback.py | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 23 deletions(-)

diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
index 8e713b3..70b9504 100644
--- a/hdf5storage/Marshallers.py
+++ b/hdf5storage/Marshallers.py
@@ -690,6 +690,17 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                 or not all(data_to_store.shape) \
                 or not all([all(data_to_store[n].shape) \
                 for n in data_to_store.dtype.names])):
+            # Grab the list of fields that don't have a null character
+            # or a / in them since those can't be written.
+            field_names = [n for n in data_to_store.dtype.names
+                           if '/' not in n and '\x00' not in n]
+
+            # Throw and exception if we had to exclude any field names.
+            if len(field_names) != len(data_to_store.dtype.names):
+                raise NotImplementedError("Null characters ('\x00') " \
+                    + "and '/' in the field names of this type of " \
+                    + 'numpy.ndarray are not supported.')
+
             # If the group doesn't exist, it needs to be created. If it
             # already exists but is not a group, it needs to be deleted
             # before being created.
@@ -702,9 +713,6 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
 
             grp2 = grp[name]
 
-            # Grab the list of fields.
-            field_names = list(data_to_store.dtype.names)
-
             # Write the metadata, and set the MATLAB_class to 'struct'
             # explicitly.
             if options.matlab_compatible:
@@ -764,6 +772,18 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                                 - set(['H5PATH'])):
                             del_attribute(grp2[field], attribute)
         else:
+            # If it has fields and it isn't a Reference type, none of
+            # them can contain a / character.
+            if data_to_store.dtype.fields is not None \
+                    and h5py.check_dtype(ref=data_to_store.dtype) \
+                    is not h5py.Reference:
+                for n in data_to_store.dtype.fields:
+                    if '\x00' in n:
+                        raise NotImplementedError( \
+                            "Null characters ('\x00') " \
+                            + 'in the field names of this type of ' \
+                            + 'numpy.ndarray are not supported.')
+
             # The data must first be written. If name is not present
             # yet, then it must be created. If it is present, but not a
             # Dataset, has the wrong dtype, or is the wrong shape; then
diff --git a/tests/make_randoms.py b/tests/make_randoms.py
index 39747b7..86e25a6 100644
--- a/tests/make_randoms.py
+++ b/tests/make_randoms.py
@@ -218,26 +218,28 @@ def random_dict():
 
 
 def random_structured_numpy_array(shape, field_shapes=None,
-                                  nonascii_fields=False):
-    # Make random field names, dtypes, and sizes. Though, if
-    # field_shapes is explicitly given, the sizes should be
-    # random. The field names must all be of type str, not unicode
-    # in Python 2. Optionally include non-ascii characters in the
-    # field names (will have to be encoded in Python 2.x). String
-    # types will not be used due to the difficulty in assigning the
-    # length.
-    if nonascii_fields:
-        name_func = random_str_some_unicode
-    else:
-        name_func = random_str_ascii
-    names = [name_func(
-             max_structured_ndarray_field_lengths)
-             for i in range(0, random.randint(
-             min_structured_ndarray_fields,
-             max_structured_ndarray_fields))]
-    if sys.hexversion < 0x03000000:
-        for i, name in enumerate(names):
-            names[i] = name.encode('UTF-8')
+                                  nonascii_fields=False,
+                                  names=None):
+    # Make random field names (if not provided with field names),
+    # dtypes, and sizes. Though, if field_shapes is explicitly given,
+    # the sizes should be random. The field names must all be of type
+    # str, not unicode in Python 2. Optionally include non-ascii
+    # characters in the field names (will have to be encoded in Python
+    # 2.x). String types will not be used due to the difficulty in
+    # assigning the length.
+    if names is None:
+        if nonascii_fields:
+            name_func = random_str_some_unicode
+        else:
+            name_func = random_str_ascii
+        names = [name_func(
+                 max_structured_ndarray_field_lengths)
+                 for i in range(0, random.randint(
+                 min_structured_ndarray_fields,
+                 max_structured_ndarray_fields))]
+        if sys.hexversion < 0x03000000:
+            for i, name in enumerate(names):
+                names[i] = name.encode('UTF-8')
     dts = [random.choice(list(set(dtypes)
            - set(('S', 'U'))))
            for i in range(len(names))]
diff --git a/tests/test_write_readback.py b/tests/test_write_readback.py
index e5bc226..ab73424 100644
--- a/tests/test_write_readback.py
+++ b/tests/test_write_readback.py
@@ -130,6 +130,23 @@ class TestPythonMatlabFormat(object):
         out = self.write_readback(data, random_name(),
                                   self.options)
         self.assert_equal(out, data)
+
+    def check_numpy_structured_array_field_special_char(self, ch):
+        # Makes a random 1d structured ndarray with the character
+        # in one field, writes it and reads it back, and then compares
+        # it.
+        field_names = [random_str_ascii(max_dict_key_length)
+                       for i in range(2)]
+        field_names[1] = field_names[1][0] + ch + field_names[1][1:]
+        if sys.hexversion < 0x03000000:
+            for i in range(len(field_names)):
+                field_names[i] = field_names[i].encode('UTF-8')
+        shape = random_numpy_shape(1, \
+            max_structured_ndarray_axis_length)
+        data = random_structured_numpy_array(shape, names=field_names)
+        out = self.write_readback(data, random_name(),
+                                  self.options)
+        self.assert_equal(out, data)
     
     def check_python_collection(self, tp):
         # Makes a random collection of the specified type, writes it and
@@ -328,6 +345,14 @@ class TestPythonMatlabFormat(object):
                                   self.options)
         self.assert_equal(out, data)
 
+    @raises(NotImplementedError)
+    def test_numpy_structured_array_field_null_character(self):
+        self.check_numpy_structured_array_field_special_char('\x00')
+
+    @raises(NotImplementedError)
+    def test_numpy_structured_array_field_forward_slash(self):
+        self.check_numpy_structured_array_field_special_char('/')
+
     def test_python_collection(self):
         for tp in (list, tuple, set, frozenset, collections.deque):
             yield self.check_python_collection, tp
@@ -393,6 +418,10 @@ class TestPythonFormat(TestPythonMatlabFormat):
                                   self.options)
         self.assert_equal(out, data)
 
+    # Won't throw an exception unlike the parent.
+    def test_numpy_structured_array_field_forward_slash(self):
+        self.check_numpy_structured_array_field_special_char('/')
+
 
 class TestNoneFormat(TestPythonMatlabFormat):
     def __init__(self):
@@ -414,6 +443,10 @@ class TestNoneFormat(TestPythonMatlabFormat):
                                   self.options)
         self.assert_equal(out, data)
 
+    # Won't throw an exception unlike the parent.
+    def test_numpy_structured_array_field_forward_slash(self):
+        self.check_numpy_structured_array_field_special_char('/')
+
     def assert_equal(self, a, b):
         assert_equal_none_format(a, b)
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git