[python-hdf5storage] 54/152: Added read/write support for numpy.object_ types (become HDF5 references).
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:24:33 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 0.1
in repository python-hdf5storage.
commit 1083c70d09abbb937980c4513d4d8b4cf7bfb63a
Author: Freja Nordsiek <fnordsie at gmail.com>
Date: Tue Jan 28 02:36:51 2014 -0500
Added read/write support for numpy.object_ types (become HDF5 references).
---
README.rst | 2 +
doc/source/hdf5storage.Marshallers.rst | 6 +--
doc/source/hdf5storage.utilities.rst | 6 +++
hdf5storage/Marshallers.py | 77 ++++++++++++++++++++++++++++++++--
hdf5storage/__init__.py | 37 ++++++++++++++++
hdf5storage/utilities.py | 33 +++++++++++++++
6 files changed, 154 insertions(+), 7 deletions(-)
diff --git a/README.rst b/README.rst
index 2266959..02238ae 100644
--- a/README.rst
+++ b/README.rst
@@ -82,6 +82,7 @@ np.complex64 0.1 single 0.1
np.complex128 0.1 double 0.1
np.str\_ 0.1 np.uint32 uint32 0.1 [2]_
np.bytes\_ 0.1 char 0.1
+np.object\_ 0.1 cell 0.1
dict 0.1 struct 0.1 [3]_
============= ======= ================== ======= ========
@@ -108,6 +109,7 @@ int16 0.1 np.int16
int32 0.1 np.int32
int64 0.1 np.int64
struct 0.1 dict [5]_
+cell 0.1 np.object\_
============ ======= ================================
.. [4] Depends on whether there is a complex part or not.
diff --git a/doc/source/hdf5storage.Marshallers.rst b/doc/source/hdf5storage.Marshallers.rst
index 9b27d7f..b2329b1 100644
--- a/doc/source/hdf5storage.Marshallers.rst
+++ b/doc/source/hdf5storage.Marshallers.rst
@@ -52,7 +52,7 @@ NumpyScalarArrayMarshaller
np.int8, np.int16, np.int32, np.int64,
np.float16, np.float32, np.float64,
np.complex64, np.complex128,
- np.bytes_, np.str_]
+ np.bytes_, np.str_, np.object_]
.. autoinstanceattribute:: NumpyScalarArrayMarshaller.cpython_type_strings
:annotation: = ['numpy.ndarray', 'numpy.matrix',
@@ -61,12 +61,12 @@ NumpyScalarArrayMarshaller
'numpy.int16', 'numpy.int32', 'numpy.int64',
'numpy.float16', 'numpy.float32', 'numpy.float64',
'numpy.complex64', 'numpy.complex128',
- 'numpy.bytes_', 'numpy.str_']
+ 'numpy.bytes_', 'numpy.str_', 'numpy.object_']
.. autoinstanceattribute:: NumpyScalarArrayMarshaller.matlab_classes
:annotation: = ['logical', 'char', 'single', 'double', 'uint8',
'uint16', 'uint32', 'uint64', 'int8', 'int16',
- 'int32', 'int64']
+ 'int32', 'int64', 'cell']
PythonScalarMarshaller
diff --git a/doc/source/hdf5storage.utilities.rst b/doc/source/hdf5storage.utilities.rst
index 5f83b08..d129080 100644
--- a/doc/source/hdf5storage.utilities.rst
+++ b/doc/source/hdf5storage.utilities.rst
@@ -6,6 +6,12 @@ hdf5storage.utilities
.. automodule:: hdf5storage.utilities
+next_unused_name_in_group
+-------------------------
+
+.. autofunction:: next_unused_name_in_group
+
+
decode_to_str
-------------
diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
index 97a5c38..4b85289 100644
--- a/hdf5storage/Marshallers.py
+++ b/hdf5storage/Marshallers.py
@@ -28,6 +28,8 @@
"""
+import posixpath
+
import numpy as np
import h5py
@@ -312,7 +314,7 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
np.int8, np.int16, np.int32, np.int64,
np.float16, np.float32, np.float64,
np.complex64, np.complex128,
- np.bytes_, np.str_]
+ np.bytes_, np.str_, np.object_]
self.cpython_type_strings = ['numpy.ndarray', 'numpy.matrix',
'numpy.bool_',
'numpy.uint8', 'numpy.uint16',
@@ -323,7 +325,8 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
'numpy.float64',
'numpy.complex64',
'numpy.complex128',
- 'numpy.bytes_', 'numpy.str_']
+ 'numpy.bytes_', 'numpy.str_',
+ 'numpy.object_']
# If we are storing in MATLAB format, we will need to be able to
# set the MATLAB_class attribute. The different numpy types just
@@ -344,7 +347,8 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
np.complex64: 'single',
np.complex128: 'double',
np.bytes_: 'char',
- np.str_: 'char'}
+ np.str_: 'char',
+ np.object_: 'cell'}
# Make a dict to look up the opposite direction (given a matlab
# class, what numpy type to use.
@@ -360,7 +364,8 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
'int64': np.int64,
'single': np.float32,
'double': np.float64,
- 'char': np.str_}
+ 'char': np.str_,
+ 'cell': np.object_}
# Set matlab_classes to the supported classes (the values).
@@ -428,6 +433,46 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
data_to_store = encode_complex(data_to_store,
options.complex_names)
+ # If we are storing an object type and it isn't empty
+ # (data_to_store is still an object), then we must recursively
+ # write what each element points to and make an array of the
+ # references to them.
+
+ if data_to_store.dtype.name == 'object':
+ ref_dtype = h5py.special_dtype(ref=h5py.Reference)
+ data_refs = data_to_store.copy()
+
+ # Go through all the elements of data and write them,
+ # gabbing their references and putting them in
+ # data_refs. They will be put in group_for_references, which
+ # is also what the H5PATH needs to be set to if we are doing
+ # MATLAB compatibility (otherwise, the attribute needs to be
+ # deleted).
+
+ if options.group_for_references not in f:
+ f.create_group(options.group_for_references)
+
+ grp2 = f[options.group_for_references]
+
+ if not isinstance(grp2, h5py.Group):
+ del f[options.group_for_references]
+ grp2 = f[options.group_for_references]
+
+ for index, x in np.ndenumerate(data_to_store):
+ data_refs[index] = None
+ name_for_ref = next_unused_name_in_group(grp2, 16)
+ write_data(f, grp2, name_for_ref, x, None, options)
+ data_refs[index] = grp2[name_for_ref].ref
+ if options.MATLAB_compatible:
+ set_attribute_string(grp2[name_for_ref],
+ 'H5PATH', grp2.name)
+ else:
+ del_attribute(grp2[k], 'H5PATH')
+
+ # Now, the dtype needs to be changed to the reference type
+ # and the whole thing copied over to data_to_store.
+ data_to_store = data_refs.astype(dtype=ref_dtype)
+
# The data must first be written. If name is not present yet,
# then it must be created. If it is present, but not a Dataset,
# has the wrong dtype, or is the wrong shape; then it must be
@@ -552,6 +597,27 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
data = grp[name][...]
dt = data.dtype
+ # If it is a reference type, then we need to make an object
+ # array that is its replicate, but with the objects they are
+ # pointing to in their elements instead of just the references.
+ if h5py.check_dtype(ref=grp[name].dtype) is not None:
+ data_derefed = data.copy().astype(np.dtype('object'))
+
+ # Go through all the elements of data and read them using
+ # their references, and the putting the output in
+ # data_derefed. If they can't be read, None is put in.
+
+ for index, x in np.ndenumerate(data):
+ data_derefed[index] = None
+ try:
+ data_derefed[index] = read_data(f, f[x].parent, \
+ posixpath.basename(f[x].name), options)
+ except:
+ raise
+
+ # Now all that needs to be done is copy back to data.
+ data = data_derefed.copy()
+
# If metadata is present, that can be used to do convert to the
# desired/closest Python data types. If none is present, or not
# enough of it, then no conversions can be done.
@@ -885,6 +951,9 @@ class PythonDictMarshaller(TypeMarshaller):
# them (nothing needs to be done).
data = dict()
for k in grp[name]:
+ # We must exclude group_for_references
+ if grp[name][k].name == options.group_for_references:
+ continue
try:
data[k] = read_data(f, grp[name], k, options)
except:
diff --git a/hdf5storage/__init__.py b/hdf5storage/__init__.py
index a6b4a0e..349cf62 100644
--- a/hdf5storage/__init__.py
+++ b/hdf5storage/__init__.py
@@ -207,6 +207,7 @@ class Options(object):
reverse_dimension_order ``True``
store_shape_for_empty ``True``
complex_names ``('real', 'imag')``
+ group_for_references ``'/#refs#'``
========================= ====================
In addition to setting these options, a specially formatted block of
@@ -233,6 +234,8 @@ class Options(object):
See Attributes.
complex_names : tuple of two str, optional
See Attributes.
+ group_for_references : str, optional
+ See Attributes.
marshaller_collection : MarshallerCollection, optional
See Attributes.
@@ -247,6 +250,7 @@ class Options(object):
reverse_dimension_order : bool
store_shape_for_empty : bool
complex_names : tuple of two str
+ group_for_references : str
scalar_options : dict
``h5py.Group.create_dataset`` options for writing scalars.
array_options : dict
@@ -264,6 +268,7 @@ class Options(object):
reverse_dimension_order=False,
store_shape_for_empty=False,
complex_names=('r', 'i'),
+ group_for_references="/#refs#",
marshaller_collection=None):
# Set the defaults.
@@ -275,6 +280,7 @@ class Options(object):
self._reverse_dimension_order = False
self._store_shape_for_empty = False
self._complex_names = ('r', 'i')
+ self._group_for_references = "/#refs#"
self._MATLAB_compatible = True
# Apply all the given options using the setters, making sure to
@@ -289,6 +295,7 @@ class Options(object):
self.reverse_dimension_order = reverse_dimension_order
self.store_shape_for_empty = store_shape_for_empty
self.complex_names = complex_names
+ self.group_for_references = group_for_references
self.MATLAB_compatible = MATLAB_compatible
# Set the h5py options to use for writing scalars and arrays to
@@ -352,6 +359,7 @@ class Options(object):
reverse_dimension_order ``True``
store_shape_for_empty ``True``
complex_names ``('real', 'imag')``
+ group_for_references ``'/#refs#'``
========================= ====================
In addition to setting these options, a specially formatted
@@ -375,6 +383,7 @@ class Options(object):
self._reverse_dimension_order = True
self._store_shape_for_empty = True
self._complex_names = ('real', 'imag')
+ self._group_for_references = "/#refs#"
@property
def delete_unused_variables(self):
@@ -567,6 +576,34 @@ class Options(object):
if self._complex_names != ('real', 'imag'):
self._MATLAB_compatible = False
+ @property
+ def group_for_references(self):
+ """ Path for where to put objects pointed at by references.
+
+ str
+
+ The absolute POSIX path for the Group to place all data that is
+ pointed to by another piece of data (needed for
+ ``numpy.object_`` and similar types). This path is automatically
+ excluded from its parent group when reading back a ``dict``.
+
+ Must be ``'/#refs#`` if doing MATLAB compatibility.
+
+ """
+ return self._group_for_references
+
+ @group_for_references.setter
+ def group_for_references(self, value):
+ # Check that it an str and a valid absolute POSIX path, and then
+ # set it. If it is something other than "/#refs#", then we are
+ # not doing MATLAB compatible formatting.
+ if isinstance(value, str):
+ pth = posixpath.normpath(value)
+ if len(pth) > 1 and posixpath.isabs(pth):
+ self._group_for_references = value
+ if self._group_for_references != "/#refs#":
+ self._MATLAB_compatible = False
+
class MarshallerCollection(object):
""" Represents, maintains, and retreives a set of marshallers.
diff --git a/hdf5storage/utilities.py b/hdf5storage/utilities.py
index d4c7a32..5a880f6 100644
--- a/hdf5storage/utilities.py
+++ b/hdf5storage/utilities.py
@@ -28,10 +28,43 @@
"""
+import string
+import random
+
import numpy as np
import h5py
+def next_unused_name_in_group(grp, length):
+ """ Gives a name that isn't used in a Group.
+
+ Generates a name of the desired length that is not a Dataset or
+ Group in the given group. Note, if length is not large enough and
+ `grp` is full enough, there may be no available names meaning that
+ this function will hang.
+
+ Parameters
+ ----------
+ grp : h5py.Group or h5py.File
+ The HDF5 Group (or File if at '/') to generate an unused name
+ in.
+ length : int
+ Number of characters the name should be.
+
+ Returns
+ -------
+ str
+ A name that isn't already an existing Dataset or Group in
+ `grp`.
+
+ """
+ ltrs = string.ascii_letters + string.digits
+ existing_names = set(grp.keys())
+ while True:
+ name = ''.join([random.choice(ltrs) for i in range(0, length)])
+ if name not in existing_names:
+ return name
+
def decode_to_str(data):
""" Decodes data to the Python str type.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git
More information about the debian-science-commits
mailing list