[python-hdf5storage] 132/152: Added/changed code to handle empty structured ndarrays.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:24:42 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 0.1
in repository python-hdf5storage.
commit b585cc08c0a0d1fc1e8064f9e25ac440781f0703
Author: Freja Nordsiek <fnordsie at gmail.com>
Date: Sat Feb 15 20:12:26 2014 -0500
Added/changed code to handle empty structured ndarrays.
---
README.rst | 21 ++++++++++++---------
doc/source/storage_format.rst | 7 +++++++
hdf5storage/Marshallers.py | 37 +++++++++++++++++++++++--------------
3 files changed, 42 insertions(+), 23 deletions(-)
diff --git a/README.rst b/README.rst
index 8c7ba07..6d28f47 100644
--- a/README.rst
+++ b/README.rst
@@ -69,11 +69,11 @@ will be what it is read back as) the MATLAB class it becomes if
targetting a MAT file, and the first version of this package to
support writing it so MATlAB can read it.
-============= ======= ========================== =========== ==========
+============= ======= ========================== =========== =============
Python MATLAB
--------------------------------------------------- -----------------------
+-------------------------------------------------- --------------------------
Type Version Converted to Class Version
-============= ======= ========================== =========== ==========
+============= ======= ========================== =========== =============
bool 0.1 np.bool\_ or np.uint8 logical 0.1 [1]_
None 0.1 ``np.float64([])`` ``[]`` 0.1
int 0.1 np.int64 int64 0.1
@@ -106,11 +106,11 @@ np.complex128 0.1 double 0.1
np.str\_ 0.1 np.uint32/16 char/uint32 0.1 [2]_
np.bytes\_ 0.1 np.bytes\_ or np.uint16 char 0.1 [3]_
np.object\_ 0.1 cell 0.1
-np.ndarray 0.1 [5]_ [6]_ [5]_ [6]_ 0.1 [5]_
+np.ndarray 0.1 [5]_ [6]_ [5]_ [6]_ 0.1 [5]_ [7]_
np.matrix 0.1 [5]_ [5]_ 0.1 [5]_
np.chararray 0.1 [5]_ [5]_ 0.1 [5]_
np.recarray 0.1 structured np.ndarray [5]_ [6]_ 0.1 [5]_
-============= ======= ========================== =========== ==========
+============= ======= ========================== =========== =============
.. [1] Depends on the selected options. Always ``np.uint8`` when doing
MATLAB compatiblity, or if the option is explicitly set.
@@ -129,11 +129,14 @@ np.recarray 0.1 structured np.ndarray [5]_ [6]_ 0.1 [5]_
.. [4] All keys must be ``str``.
.. [5] Container types are only supported if their underlying dtype is
supported. Data conversions are done based on its dtype.
-.. [6] Structured ``np.ndarray``s (have fields in their dtypes) can be
+.. [6] Structured ``np.ndarray`` s (have fields in their dtypes) can be
written as an HDF5 COMPOUND type or as an HDF5 Group with Datasets
holding its fields (either the values directly, or as an HDF5
Reference array to the values for the different elements of the
data).
+.. [7] Structured ``np.ndarray`` s with no elements, when written like a
+ structure, will not be read back with the right dtypes for their
+ fields (will all become 'object').
This table gives the MATLAB classes that can be read from a MAT file,
the first version of this package that can read them, and the Python
@@ -143,8 +146,8 @@ type they are read as.
MATLAB Class Version Python Type
=============== ======= =================================
logical 0.1 np.bool\_
-single 0.1 np.float32 or np.complex64 [7]_
-double 0.1 np.float64 or np.complex128 [7]_
+single 0.1 np.float32 or np.complex64 [8]_
+double 0.1 np.float64 or np.complex128 [8]_
uint8 0.1 np.uint8
uint16 0.1 np.uint16
uint32 0.1 np.uint32
@@ -158,4 +161,4 @@ cell 0.1 np.object\_
canonical empty 0.1 ``np.float64([])``
=============== ======= =================================
-.. [7] Depends on whether there is a complex part or not.
+.. [8] Depends on whether there is a complex part or not.
diff --git a/doc/source/storage_format.rst b/doc/source/storage_format.rst
index 8944298..586b8ec 100644
--- a/doc/source/storage_format.rst
+++ b/doc/source/storage_format.rst
@@ -319,6 +319,13 @@ each field are written in :py:attr:`Options.group_for_references` and
an HDF5 Reference array to all of those elements is written as a Dataset
under the field name in the Groups.
+.. note::
+
+ If it has no elements and
+ :py:attr:`Options.structured_numpy_ndarray_as_struct` is set, it
+ can't be read back from the file accurately. The dtype for all the
+ fields will become 'object' instead of what they originally were.
+
Optional Data Transformations
=============================
diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
index 460d271..59bb813 100644
--- a/hdf5storage/Marshallers.py
+++ b/hdf5storage/Marshallers.py
@@ -619,12 +619,15 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
data_to_store = np.uint8(data_to_store)
# If data is empty, we instead need to store the shape of the
- # array if the appropriate option is set.
+ # array if the appropriate option is set, unless it is a
+ # structured ndarray and we are storing those as structs.
if options.store_shape_for_empty and (data.size == 0 \
or ((data.dtype.type == np.bytes_ \
or data.dtype.type == np.str_) \
- and data.nbytes == 0)):
+ and data.nbytes == 0)) \
+ and (data_to_store.dtype.fields is None \
+ or not options.structured_numpy_ndarray_as_struct):
data_to_store = np.uint64(data_to_store.shape)
# If it is a complex type, then it needs to be encoded to have
@@ -670,20 +673,12 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
field_names = list(data_to_store.dtype.names)
# Write the metadata, and set the MATLAB_class to 'struct'
- # explicitly. Then, we set the 'Python.Fields'
- # Attribute to the field names if we are storing python
- # metadata.
+ # explicitly.
self.write_metadata(f, grp, name, data, type_string,
options)
if options.matlab_compatible:
set_attribute_string(grp[name], 'MATLAB_class',
'struct')
- if options.store_python_metadata:
- set_attribute_string_array(grp[name],
- 'Python.Fields',
- field_names)
- else:
- del_attribute(grp[name], 'Python.Fields')
# Delete any Datasets/Groups not corresponding to a field
# name in data if that option is set.
@@ -797,6 +792,18 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
set_attribute_string(grp[name], 'Python.numpy.Container',
container)
+ # If its dtype has fields, then we set the 'Python.Fields'
+ # Attribute to the field names if we are storing python metadata
+ # and we are storing structured ndarrays as structures.
+ if options.store_python_metadata \
+ and data.dtype.fields is not None \
+ and options.structured_numpy_ndarray_as_struct:
+ set_attribute_string_array(grp[name],
+ 'Python.Fields',
+ list(data.dtype.names))
+ else:
+ del_attribute(grp[name], 'Python.Fields')
+
# If data is empty, we need to set the Python.Empty and
# MATLAB_empty attributes to 1 if we are storing type info or
# making it MATLAB compatible. Otherwise, no empty attribute is
@@ -901,7 +908,8 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
or h5py.check_dtype(ref=fld.dtype) is None \
or len(set(fld.attrs.keys()) \
& ((set(self.python_attributes) \
- | set(self.matlab_attributes)) - {'H5PATH'})) \
+ | set(self.matlab_attributes))
+ - {'H5PATH', 'MATLAB_empty', 'Python.Empty'})) \
!= 0:
is_multi_element = False
try:
@@ -944,11 +952,12 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
# If any of the elements are not Numpy types or if they
# don't all have the exact same dtype and shape, then
# this field will just be an object field.
- first = v.flatten()[0]
- if not isinstance(first, tuple(self.types)):
+ if v.size == 0 or not isinstance(v.flatten()[0], \
+ tuple(self.types)):
dt_whole.append((k, 'object'))
continue
+ first = v.flatten()[0]
dt = first.dtype
sp = first.shape
all_same = True
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git
More information about the debian-science-commits
mailing list