[python-hdf5storage] 132/152: Added/changed code to handle empty structured ndarrays.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:24:42 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 0.1
in repository python-hdf5storage.

commit b585cc08c0a0d1fc1e8064f9e25ac440781f0703
Author: Freja Nordsiek <fnordsie at gmail.com>
Date:   Sat Feb 15 20:12:26 2014 -0500

    Added/changed code to handle empty structured ndarrays.
---
 README.rst                    | 21 ++++++++++++---------
 doc/source/storage_format.rst |  7 +++++++
 hdf5storage/Marshallers.py    | 37 +++++++++++++++++++++++--------------
 3 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/README.rst b/README.rst
index 8c7ba07..6d28f47 100644
--- a/README.rst
+++ b/README.rst
@@ -69,11 +69,11 @@ will be what it is read back as) the MATLAB class it becomes if
 targetting a MAT file, and the first version of this package to
 support writing it so MATlAB can read it.
 
-=============  =======  ==========================  ===========  ==========
+=============  =======  ==========================  ===========  =============
 Python                                              MATLAB
---------------------------------------------------  -----------------------
+--------------------------------------------------  --------------------------
 Type           Version  Converted to                Class        Version
-=============  =======  ==========================  ===========  ==========
+=============  =======  ==========================  ===========  =============
 bool           0.1      np.bool\_ or np.uint8       logical      0.1 [1]_
 None           0.1      ``np.float64([])``          ``[]``       0.1
 int            0.1      np.int64                    int64        0.1
@@ -106,11 +106,11 @@ np.complex128  0.1                                  double       0.1
 np.str\_       0.1      np.uint32/16                char/uint32  0.1 [2]_
 np.bytes\_     0.1      np.bytes\_ or np.uint16     char         0.1 [3]_
 np.object\_    0.1                                  cell         0.1
-np.ndarray     0.1      [5]_ [6]_                   [5]_ [6]_    0.1 [5]_
+np.ndarray     0.1      [5]_ [6]_                   [5]_ [6]_    0.1 [5]_ [7]_
 np.matrix      0.1      [5]_                        [5]_         0.1 [5]_
 np.chararray   0.1      [5]_                        [5]_         0.1 [5]_
 np.recarray    0.1      structured np.ndarray       [5]_ [6]_    0.1 [5]_
-=============  =======  ==========================  ===========  ==========
+=============  =======  ==========================  ===========  =============
 
 .. [1] Depends on the selected options. Always ``np.uint8`` when doing
        MATLAB compatiblity, or if the option is explicitly set.
@@ -129,11 +129,14 @@ np.recarray    0.1      structured np.ndarray       [5]_ [6]_    0.1 [5]_
 .. [4] All keys must be ``str``.
 .. [5] Container types are only supported if their underlying dtype is
        supported. Data conversions are done based on its dtype.
-.. [6] Structured ``np.ndarray``s (have fields in their dtypes) can be
+.. [6] Structured ``np.ndarray`` s (have fields in their dtypes) can be
        written as an HDF5 COMPOUND type or as an HDF5 Group with Datasets
        holding its fields (either the values directly, or as an HDF5
        Reference array to the values for the different elements of the
        data).
+.. [7] Structured ``np.ndarray`` s with no elements, when written like a
+       structure, will not be read back with the right dtypes for their
+       fields (will all become 'object').
 
 This table gives the MATLAB classes that can be read from a MAT file,
 the first version of this package that can read them, and the Python
@@ -143,8 +146,8 @@ type they are read as.
 MATLAB Class     Version  Python Type
 ===============  =======  =================================
 logical          0.1      np.bool\_
-single           0.1      np.float32 or np.complex64 [7]_
-double           0.1      np.float64 or np.complex128 [7]_
+single           0.1      np.float32 or np.complex64 [8]_
+double           0.1      np.float64 or np.complex128 [8]_
 uint8            0.1      np.uint8
 uint16           0.1      np.uint16
 uint32           0.1      np.uint32
@@ -158,4 +161,4 @@ cell             0.1      np.object\_
 canonical empty  0.1      ``np.float64([])``
 ===============  =======  =================================
 
-.. [7] Depends on whether there is a complex part or not.
+.. [8] Depends on whether there is a complex part or not.
diff --git a/doc/source/storage_format.rst b/doc/source/storage_format.rst
index 8944298..586b8ec 100644
--- a/doc/source/storage_format.rst
+++ b/doc/source/storage_format.rst
@@ -319,6 +319,13 @@ each field are written in :py:attr:`Options.group_for_references` and
 an HDF5 Reference array to all of those elements is written as a Dataset
 under the field name in the Groups.
 
+.. note::
+
+   If it has no elements and
+   :py:attr:`Options.structured_numpy_ndarray_as_struct` is set, it
+   can't be read back from the file accurately. The dtype for all the
+   fields will become 'object' instead of what they originally were.
+
 
 Optional Data Transformations
 =============================
diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
index 460d271..59bb813 100644
--- a/hdf5storage/Marshallers.py
+++ b/hdf5storage/Marshallers.py
@@ -619,12 +619,15 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
             data_to_store = np.uint8(data_to_store)
 
         # If data is empty, we instead need to store the shape of the
-        # array if the appropriate option is set.
+        # array if the appropriate option is set, unless it is a
+        # structured ndarray and we are storing those as structs.
 
         if options.store_shape_for_empty and (data.size == 0 \
                 or ((data.dtype.type == np.bytes_ \
                 or data.dtype.type == np.str_) \
-                and data.nbytes == 0)):
+                and data.nbytes == 0)) \
+                and (data_to_store.dtype.fields is None \
+                or not options.structured_numpy_ndarray_as_struct):
             data_to_store = np.uint64(data_to_store.shape)
 
         # If it is a complex type, then it needs to be encoded to have
@@ -670,20 +673,12 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
             field_names = list(data_to_store.dtype.names)
 
             # Write the metadata, and set the MATLAB_class to 'struct'
-            # explicitly. Then, we set the 'Python.Fields'
-            # Attribute to the field names if we are storing python
-            # metadata.
+            # explicitly.
             self.write_metadata(f, grp, name, data, type_string,
                                 options)
             if options.matlab_compatible:
                 set_attribute_string(grp[name], 'MATLAB_class',
                                      'struct')
-            if options.store_python_metadata:
-                set_attribute_string_array(grp[name],
-                                           'Python.Fields',
-                                           field_names)
-            else:
-                del_attribute(grp[name], 'Python.Fields')
 
             # Delete any Datasets/Groups not corresponding to a field
             # name in data if that option is set.
@@ -797,6 +792,18 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
             set_attribute_string(grp[name], 'Python.numpy.Container',
                                  container)
 
+        # If its dtype has fields, then we set the 'Python.Fields'
+        # Attribute to the field names if we are storing python metadata
+        # and we are storing structured ndarrays as structures.
+        if options.store_python_metadata \
+                and data.dtype.fields is not None \
+                and options.structured_numpy_ndarray_as_struct:
+            set_attribute_string_array(grp[name],
+                                       'Python.Fields',
+                                        list(data.dtype.names))
+        else:
+            del_attribute(grp[name], 'Python.Fields')
+
         # If data is empty, we need to set the Python.Empty and
         # MATLAB_empty attributes to 1 if we are storing type info or
         # making it MATLAB compatible. Otherwise, no empty attribute is
@@ -901,7 +908,8 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                         or h5py.check_dtype(ref=fld.dtype) is None \
                         or len(set(fld.attrs.keys()) \
                         & ((set(self.python_attributes) \
-                        | set(self.matlab_attributes)) - {'H5PATH'})) \
+                        | set(self.matlab_attributes))
+                        - {'H5PATH', 'MATLAB_empty', 'Python.Empty'})) \
                         != 0:
                     is_multi_element = False
                 try:
@@ -944,11 +952,12 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                 # If any of the elements are not Numpy types or if they
                 # don't all have the exact same dtype and shape, then
                 # this field will just be an object field.
-                first = v.flatten()[0]
-                if not isinstance(first, tuple(self.types)):
+                if v.size == 0 or not isinstance(v.flatten()[0], \
+                        tuple(self.types)):
                     dt_whole.append((k, 'object'))
                     continue
 
+                first = v.flatten()[0]
                 dt = first.dtype
                 sp = first.shape
                 all_same = True

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git



More information about the debian-science-commits mailing list