[python-hdf5storage] 11/84: Fixed writing of empty matlab structs to use a Dataset with the appropriate fields attributes set and several reading errors with the new format.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:24:58 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 0.1.10
in repository python-hdf5storage.
commit 99d573c8736a49c77a20e5d4f4e40e31e230abaa
Author: Freja Nordsiek <fnordsie at gmail.com>
Date: Thu Aug 14 14:58:14 2014 -0400
Fixed writing of empty matlab structs to use a Dataset with the appropriate fields attributes set and several reading errors with the new format.
---
hdf5storage/Marshallers.py | 151 +++++++++++++++++++++++++++------------------
1 file changed, 92 insertions(+), 59 deletions(-)
diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
index bdea98b..52ecb22 100644
--- a/hdf5storage/Marshallers.py
+++ b/hdf5storage/Marshallers.py
@@ -628,15 +628,12 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
data_to_store = np.uint8(data_to_store)
# If data is empty, we instead need to store the shape of the
- # array if the appropriate option is set, unless it is a
- # structured ndarray and we are storing those as structs.
+ # array if the appropriate option is set.
if options.store_shape_for_empty and (data.size == 0 \
or ((data.dtype.type == np.bytes_ \
or data.dtype.type == np.str_) \
- and data.nbytes == 0)) \
- and (data_to_store.dtype.fields is None \
- or not options.structured_numpy_ndarray_as_struct):
+ and data.nbytes == 0)):
data_to_store = np.uint64(data_to_store.shape)
# If it is a complex type, then it needs to be encoded to have
@@ -683,8 +680,6 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
# Write the metadata, and set the MATLAB_class to 'struct'
# explicitly.
- self.write_metadata(f, grp, name, data, type_string,
- options)
if options.matlab_compatible:
set_attribute_string(grp[name], 'MATLAB_class',
'struct')
@@ -697,29 +692,6 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
set(field_names)):
del grp2[field]
- # If we are making it MATLAB compatible and have h5py
- # version >= 2.3, then we can set the MATLAB_fields
- # Attribute as long as all keys are mappable to
- # ASCII. Otherwise, the attribute should be deleted. It is
- # written as a vlen='S1' array of bytes_ arrays of the
- # individual characters.
- if options.matlab_compatible \
- and distutils.version.LooseVersion( \
- h5py.__version__) \
- >= distutils.version.LooseVersion('2.3'):
- try:
- dt = h5py.special_dtype(vlen=np.dtype('S1'))
- fs = np.empty(shape=(len(field_names),), dtype=dt)
- for i, s in enumerate(field_names):
- fs[i] = np.array([c.encode('ascii') for c in s],
- dtype='S1')
- except UnicodeDecodeError:
- del_attribute(grp[name], 'MATLAB_fields')
- else:
- set_attribute(grp[name], 'MATLAB_fields', fs)
- else:
- del_attribute(grp[name], 'MATLAB_fields')
-
# Go field by field making an object array (make an empty
# object array and assign element wise) and write it inside
# the Group. If it only has a single element, write that
@@ -785,15 +757,9 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
else:
grp[name][...] = data_to_store
- # Write the metadata using the inherited function (good
- # enough). The Attributes 'Python.numpy.fields' and
- # 'MATLAB_fields', if present, need to be deleted since this
- # isn't a structured ndarray.
-
- self.write_metadata(f, grp, name, data, type_string,
- options)
- del_attribute(grp[name], 'Python.Fields')
- del_attribute(grp[name], 'MATLAB_fields')
+ # Write the metadata using the inherited function (good enough).
+ self.write_metadata(f, grp, name, data, type_string,
+ options)
def write_metadata(self, f, grp, name, data, type_string, options):
# First, call the inherited version to do most of the work.
@@ -834,17 +800,50 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
set_attribute_string(grp[name], 'Python.numpy.Container',
container)
- # If its dtype has fields, then we set the 'Python.Fields'
- # Attribute to the field names if we are storing python metadata
- # and we are storing structured ndarrays as structures.
- if options.store_python_metadata \
- and data.dtype.fields is not None \
+ # If its dtype has fields, then we set the 'Python.Fields' and
+ # 'MATLAB_fields' Attributes to the field names if we are
+ # storing python metadata or doing matlab compatibility and we
+ # are storing a structured ndarray as a structure.
+ if data.dtype.fields is not None \
and options.structured_numpy_ndarray_as_struct:
- set_attribute_string_array(grp[name],
- 'Python.Fields',
- list(data.dtype.names))
+ # Grab the list of fields.
+ field_names = list(data.dtype.names)
+
+ # Write or delete 'Python.Fields' as appropriate.
+ if options.store_python_metadata \
+ and data.dtype.fields is not None \
+ and options.structured_numpy_ndarray_as_struct:
+ set_attribute_string_array(grp[name],
+ 'Python.Fields',
+ field_names)
+ else:
+ del_attribute(grp[name], 'Python.Fields')
+
+ # If we are making it MATLAB compatible and have h5py
+ # version >= 2.3, then we can set the MATLAB_fields
+ # Attribute as long as all keys are mappable to
+ # ASCII. Otherwise, the attribute should be deleted. It is
+ # written as a vlen='S1' array of bytes_ arrays of the
+ # individual characters.
+ if options.matlab_compatible \
+ and distutils.version.LooseVersion( \
+ h5py.__version__) \
+ >= distutils.version.LooseVersion('2.3'):
+ try:
+ dt = h5py.special_dtype(vlen=np.dtype('S1'))
+ fs = np.empty(shape=(len(field_names),), dtype=dt)
+ for i, s in enumerate(field_names):
+ fs[i] = np.array([c.encode('ascii') for c in s],
+ dtype='S1')
+ except UnicodeDecodeError:
+ del_attribute(grp[name], 'MATLAB_fields')
+ else:
+ set_attribute(grp[name], 'MATLAB_fields', fs)
+ else:
+ del_attribute(grp[name], 'MATLAB_fields')
else:
del_attribute(grp[name], 'Python.Fields')
+ del_attribute(grp[name], 'MATLAB_fields')
# If data is empty, we need to set the Python.Empty and
# MATLAB_empty attributes to 1 if we are storing type info or
@@ -872,17 +871,28 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
# attribute needs to be set looking up the data type (gotten
# using np.dtype.type). If it is a string or bool type, then
# the MATLAB_int_decode attribute must be set to the number of
- # bytes each element takes up (dtype.itemsize). Otherwise,
- # the attributes must be deleted.
+ # bytes each element takes up (dtype.itemsize). If the dtype has
+ # fields and we are writing it as a structure, the class needs
+ # to be overriddent to 'struct'. Otherwise, the attributes must
+ # be deleted.
tp = data.dtype.type
- if options.matlab_compatible and tp in self.__MATLAB_classes:
- set_attribute_string(grp[name], 'MATLAB_class',
- self.__MATLAB_classes[tp])
- if tp in (np.bytes_, np.str_, np.bool_):
- set_attribute(grp[name], 'MATLAB_int_decode', np.int64(
- grp[name].dtype.itemsize))
+ if options.matlab_compatible:
+ if data.dtype.fields is not None \
+ and options.structured_numpy_ndarray_as_struct:
+ set_attribute_string(grp[name], 'MATLAB_class',
+ 'struct')
+ elif tp in self.__MATLAB_classes:
+ set_attribute_string(grp[name], 'MATLAB_class',
+ self.__MATLAB_classes[tp])
+ if tp in (np.bytes_, np.str_, np.bool_):
+ set_attribute(grp[name], 'MATLAB_int_decode',
+ np.int64(grp[name].dtype.itemsize))
+ else:
+ del_attribute(grp[name], 'MATLAB_int_decode')
else:
+ del_attribute(grp[name], 'MATLAB_class')
+ del_attribute(grp[name], 'MATLAB_empty')
del_attribute(grp[name], 'MATLAB_int_decode')
else:
del_attribute(grp[name], 'MATLAB_class')
@@ -981,7 +991,7 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
obj = np.zeros((1,), dtype='object')
obj[0] = v
struct_data[k] = obj
-
+
# The dtype for the structured ndarray needs to be
# composed. This is done by going through each field (in the
# proper order, if the fields were given, or any order if
@@ -1000,7 +1010,7 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
- set(fields))
extra_fields.sort()
fields.extend(extra_fields)
-
+
dt_whole = []
for k in fields:
# In Python 2, the field names for a structured ndarray
@@ -1054,17 +1064,40 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
# enough of it, then no conversions can be done.
if type_string is not None and underlying_type is not None and \
shape is not None:
+ # If the Attributes 'Python.Fields' and/or 'MATLAB_fields'
+ # are present, the underlying type needs to be changed to
+ # the proper dtype for the structure.
+ if python_fields is not None or matlab_fields is not None:
+ if python_fields is not None:
+ fields = python_fields
+ else:
+ fields = [k.tostring().decode()
+ for k in matlab_fields]
+ struct_dtype = list()
+ for k in fields:
+ if sys.hexversion >= 0x03000000:
+ struct_dtype.append((k, 'object'))
+ else:
+ struct_dtype.append((k.encode('ascii'),
+ 'object'))
+ else:
+ struct_dtype = None
+
# If it is empty ('Python.Empty' set to 1), then the shape
# information is stored in data and we need to set data to
# the empty array of the proper type (in underlying_type)
# and the given shape. If we are going to transpose it
# later, we need to transpose it now so that it still keeps
- # the right shape.
+ # the right shape. Also, if it is a structure that we just
+ # figured out the dtype for, that needs to be used.
if python_empty == 1:
if underlying_type.startswith('bytes'):
data = np.zeros(tuple(shape), dtype='S1')
elif underlying_type.startswith('str'):
data = np.zeros(tuple(shape), dtype='U1')
+ elif struct_dtype is not None:
+ data = np.zeros(tuple(shape),
+ dtype=struct_dtype)
else:
data = np.zeros(tuple(shape),
dtype=underlying_type)
@@ -1175,7 +1208,7 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
dt_whole.append((k.tostring(), 'object'))
data = np.zeros(shape=tuple(np.uint64(data)),
dtype=dt_whole)
-
+
# The order of the dimensions must be switched from Fortran
# order which MATLAB uses to C order which Python uses.
data = data.T
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git
More information about the debian-science-commits
mailing list