[python-hdf5storage] 66/84: Fixed bug where structured numpy.ndarrays could be written with null characters and, depending on the case, / in their field names (not valid characters).
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:25:05 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 0.1.10
in repository python-hdf5storage.
commit c9233ecaa5c40a73cf1100ff413562bfdf941d37
Author: Freja Nordsiek <fnordsie at gmail.com>
Date: Sat Aug 22 13:45:23 2015 -0400
Fixed bug where structured numpy.ndarrays could be written with null characters and, depending on the case, / in their field names (not valid characters).
---
hdf5storage/Marshallers.py | 26 +++++++++++++++++++++++---
tests/make_randoms.py | 42 ++++++++++++++++++++++--------------------
tests/test_write_readback.py | 33 +++++++++++++++++++++++++++++++++
3 files changed, 78 insertions(+), 23 deletions(-)
diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
index 8e713b3..70b9504 100644
--- a/hdf5storage/Marshallers.py
+++ b/hdf5storage/Marshallers.py
@@ -690,6 +690,17 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
or not all(data_to_store.shape) \
or not all([all(data_to_store[n].shape) \
for n in data_to_store.dtype.names])):
+ # Grab the list of fields that don't have a null character
+ # or a / in them since those can't be written.
+ field_names = [n for n in data_to_store.dtype.names
+ if '/' not in n and '\x00' not in n]
+
+ # Throw and exception if we had to exclude any field names.
+ if len(field_names) != len(data_to_store.dtype.names):
+ raise NotImplementedError("Null characters ('\x00') " \
+ + "and '/' in the field names of this type of " \
+ + 'numpy.ndarray are not supported.')
+
# If the group doesn't exist, it needs to be created. If it
# already exists but is not a group, it needs to be deleted
# before being created.
@@ -702,9 +713,6 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
grp2 = grp[name]
- # Grab the list of fields.
- field_names = list(data_to_store.dtype.names)
-
# Write the metadata, and set the MATLAB_class to 'struct'
# explicitly.
if options.matlab_compatible:
@@ -764,6 +772,18 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
- set(['H5PATH'])):
del_attribute(grp2[field], attribute)
else:
+ # If it has fields and it isn't a Reference type, none of
+ # them can contain a / character.
+ if data_to_store.dtype.fields is not None \
+ and h5py.check_dtype(ref=data_to_store.dtype) \
+ is not h5py.Reference:
+ for n in data_to_store.dtype.fields:
+ if '\x00' in n:
+ raise NotImplementedError( \
+ "Null characters ('\x00') " \
+ + 'in the field names of this type of ' \
+ + 'numpy.ndarray are not supported.')
+
# The data must first be written. If name is not present
# yet, then it must be created. If it is present, but not a
# Dataset, has the wrong dtype, or is the wrong shape; then
diff --git a/tests/make_randoms.py b/tests/make_randoms.py
index 39747b7..86e25a6 100644
--- a/tests/make_randoms.py
+++ b/tests/make_randoms.py
@@ -218,26 +218,28 @@ def random_dict():
def random_structured_numpy_array(shape, field_shapes=None,
- nonascii_fields=False):
- # Make random field names, dtypes, and sizes. Though, if
- # field_shapes is explicitly given, the sizes should be
- # random. The field names must all be of type str, not unicode
- # in Python 2. Optionally include non-ascii characters in the
- # field names (will have to be encoded in Python 2.x). String
- # types will not be used due to the difficulty in assigning the
- # length.
- if nonascii_fields:
- name_func = random_str_some_unicode
- else:
- name_func = random_str_ascii
- names = [name_func(
- max_structured_ndarray_field_lengths)
- for i in range(0, random.randint(
- min_structured_ndarray_fields,
- max_structured_ndarray_fields))]
- if sys.hexversion < 0x03000000:
- for i, name in enumerate(names):
- names[i] = name.encode('UTF-8')
+ nonascii_fields=False,
+ names=None):
+ # Make random field names (if not provided with field names),
+ # dtypes, and sizes. Though, if field_shapes is explicitly given,
+ # the sizes should be random. The field names must all be of type
+ # str, not unicode in Python 2. Optionally include non-ascii
+ # characters in the field names (will have to be encoded in Python
+ # 2.x). String types will not be used due to the difficulty in
+ # assigning the length.
+ if names is None:
+ if nonascii_fields:
+ name_func = random_str_some_unicode
+ else:
+ name_func = random_str_ascii
+ names = [name_func(
+ max_structured_ndarray_field_lengths)
+ for i in range(0, random.randint(
+ min_structured_ndarray_fields,
+ max_structured_ndarray_fields))]
+ if sys.hexversion < 0x03000000:
+ for i, name in enumerate(names):
+ names[i] = name.encode('UTF-8')
dts = [random.choice(list(set(dtypes)
- set(('S', 'U'))))
for i in range(len(names))]
diff --git a/tests/test_write_readback.py b/tests/test_write_readback.py
index e5bc226..ab73424 100644
--- a/tests/test_write_readback.py
+++ b/tests/test_write_readback.py
@@ -130,6 +130,23 @@ class TestPythonMatlabFormat(object):
out = self.write_readback(data, random_name(),
self.options)
self.assert_equal(out, data)
+
+ def check_numpy_structured_array_field_special_char(self, ch):
+ # Makes a random 1d structured ndarray with the character
+ # in one field, writes it and reads it back, and then compares
+ # it.
+ field_names = [random_str_ascii(max_dict_key_length)
+ for i in range(2)]
+ field_names[1] = field_names[1][0] + ch + field_names[1][1:]
+ if sys.hexversion < 0x03000000:
+ for i in range(len(field_names)):
+ field_names[i] = field_names[i].encode('UTF-8')
+ shape = random_numpy_shape(1, \
+ max_structured_ndarray_axis_length)
+ data = random_structured_numpy_array(shape, names=field_names)
+ out = self.write_readback(data, random_name(),
+ self.options)
+ self.assert_equal(out, data)
def check_python_collection(self, tp):
# Makes a random collection of the specified type, writes it and
@@ -328,6 +345,14 @@ class TestPythonMatlabFormat(object):
self.options)
self.assert_equal(out, data)
+ @raises(NotImplementedError)
+ def test_numpy_structured_array_field_null_character(self):
+ self.check_numpy_structured_array_field_special_char('\x00')
+
+ @raises(NotImplementedError)
+ def test_numpy_structured_array_field_forward_slash(self):
+ self.check_numpy_structured_array_field_special_char('/')
+
def test_python_collection(self):
for tp in (list, tuple, set, frozenset, collections.deque):
yield self.check_python_collection, tp
@@ -393,6 +418,10 @@ class TestPythonFormat(TestPythonMatlabFormat):
self.options)
self.assert_equal(out, data)
+ # Won't throw an exception unlike the parent.
+ def test_numpy_structured_array_field_forward_slash(self):
+ self.check_numpy_structured_array_field_special_char('/')
+
class TestNoneFormat(TestPythonMatlabFormat):
def __init__(self):
@@ -414,6 +443,10 @@ class TestNoneFormat(TestPythonMatlabFormat):
self.options)
self.assert_equal(out, data)
+ # Won't throw an exception unlike the parent.
+ def test_numpy_structured_array_field_forward_slash(self):
+ self.check_numpy_structured_array_field_special_char('/')
+
def assert_equal(self, a, b):
assert_equal_none_format(a, b)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git
More information about the debian-science-commits
mailing list