[python-hdf5storage] 03/152: Initial set of python codes to write only (still need polishing)
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:24:28 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 0.1
in repository python-hdf5storage.
commit e6d9c0c00eea41d787c10bbf1a025ff83546a4a9
Author: Freja Nordsiek <fnordsie at gmail.com>
Date: Sun Dec 22 22:10:00 2013 -0500
Initial set of python codes to write only (still need polishing)
---
hdf5storage/Marshallers.py | 417 +++++++++++++++++++++++++++++++++++++++++++++
hdf5storage/__init__.py | 36 ++++
hdf5storage/core.py | 302 ++++++++++++++++++++++++++++++++
hdf5storage/lowlevel.py | 69 ++++++++
hdf5storage/utilities.py | 119 +++++++++++++
5 files changed, 943 insertions(+)
diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
new file mode 100644
index 0000000..cc6c0b8
--- /dev/null
+++ b/hdf5storage/Marshallers.py
@@ -0,0 +1,417 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import h5py
+
+from hdf5storage.utilities import *
+from hdf5storage.lowlevel import write_data
+
+
+class TypeMarshaller(object):
+ def __init__(self):
+ self.cpython_attributes = {'CPython.Type'}
+ self.matlab_attributes = {'H5PATH'}
+ self.types = []
+ self.cpython_type_strings = []
+
+ def get_type_string(self, data, type_string):
+ if type_string is not None:
+ return type_string
+ else:
+ i = self.types.index(type(data))
+ return self.cpython_type_strings[i]
+
+ def write(self, f, grp, name, data, type_string, options):
+ raise NotImplementedError('Can''t write data type: '
+ + str(type(data)))
+
+ def write_metadata(self, f, grp, name, data, type_string, options):
+ # Make sure we have a complete type_string.
+ type_string = self.get_type_string(data, type_string)
+
+ # The metadata that is written depends on the format.
+
+ if options.store_type_information:
+ set_attribute_string(grp[name], 'CPython.Type', type_string)
+
+ # If we are not storing type information or doing MATLAB
+ # compatibility, then attributes not in the cpython and/or
+ # MATLAB lists need to be removed.
+
+ attributes_used = set()
+
+ if options.store_type_information:
+ attributes_used |= self.cpython_attributes
+
+ if options.MATLAB_compatible:
+ attributes_used |= self.matlab_attributes
+
+ for attribute in (set(grp[name].attrs.keys) - attributes_used):
+ del_attribute(grp[name], attribute)
+
+ def read(self, f, grp, name, options):
+ raise NotImplementedError('Can''t read data: ' + name)
+
+
+class NumpyScalarArrayMarshaller(TypeMarshaller):
+ def __init__(self):
+ TypeMarshaller.__init__(self)
+ self.cpython_attributes |= {'CPython.Shape', 'CPython.Empty',
+ 'CPython.numpy.UnderlyingType'}
+ self.matlab_attributes |= {'MATLAB_class', 'MATLAB_empty',
+ 'MATLAB_int_decode'}
+ self.types = [np.ndarray, np.matrix,
+ np.bool8,
+ np.uint8, np.uint16, np.uint32, np.uint64,
+ np.int8, np.int16, np.int32, np.int64,
+ np.float16, np.float32, np.float64, np.float128,
+ np.complex64, np.complex128, np.complex256,
+ np.string_, np.unicode]
+ self.cpython_type_strings = ['numpy.ndarray', 'numpy.matrix',
+ 'numpy.bool8',
+ 'numpy.uint8', 'numpy.uint16',
+ 'numpy.uint32', 'numpy.uint64',
+ 'numpy.int8', 'numpy.int16',
+ 'numpy.int32', 'numpy.int64',
+ 'numpy.float16', 'numpy.float32',
+ 'numpy.float64', 'numpy.float128',
+ 'numpy.complex64',
+ 'numpy.complex128',
+ 'numpy.complex256',
+ 'numpy.string_', 'numpy.unicode']
+
+ # If we are storing in MATLAB format, we will need to be able to
+ # set the MATLAB_class attribute. The different numpy types just
+ # need to be properly mapped to the right strings. Some types do
+ # not have a string since MATLAB does not support them.
+
+ self.__MATLAB_classes = {np.bool8: 'logical', np.uint8: 'uint8',
+ np.uint16: 'uint16',
+ np.uint32: 'uint32',
+ np.uint64: 'uint64', np.int8: 'int8',
+ np.int16: 'int16', np.int32: 'int32',
+ np.int64: 'int64', np.float32: 'single',
+ np.float64: 'double',
+ np.complex64: 'single',
+ np.complex128: 'double',
+ np.string_: 'char',
+ np.unicode: 'char'}
+
+
+ def write(self, f, grp, name, data, type_string, options):
+ # Need to make a set of data that will be stored. It will start
+ # out as a copy of data and then be steadily manipulated.
+
+ data_to_store = data.copy()
+
+ # Optionally convert ASCII strings to UTF-16. This is done by
+ # simply converting to uint16's. This will require making them
+ # at least 1 dimensinal.
+
+ if options.convert_strings_to_utf16 and not (data.size == 0 \
+ and options.store_shape_for_empty) \
+ and data.dtype.type == np.string_:
+ data_to_store = np.uint16(np.atleast_1d( \
+ data_to_store).view(np.uint8))
+
+ # As of 2013-12-13, h5py cannot write numpy.unicode (UTF-32
+ # encoding) types. If it is just a numpy.unicode object, we can
+ # force it to UTF-16 or just write it as uint32's. If it is an
+ # array, forcing it to UTF-16 is a bad idea because characters
+ # are not always 2 bytes long in UTF-16. So, converting them to
+ # uint32 makes the most sense.
+
+ if data.dtype.type == np.unicode and not (data.size == 0 \
+ and options.store_shape_for_empty):
+ data_to_store = np.atleast_1d(data_to_store).view(np.uint32)
+
+ # Convert scalars to arrays if that option is set.
+
+ if options.convert_scalars_to_arrays:
+ data_to_store = np.atleast_2d(data_to_store)
+
+ # If data is empty, we instead need to store the shape of the
+ # array if the appropriate option is set.
+
+ if options.store_shape_for_empty and data.size == 0:
+ data_to_store = np.uint64(data.shape)
+ if options.convert_scalars_to_arrays:
+ data_to_store = np.atleast_2d(data_to_store)
+
+ # Reverse the dimension order if that option is set.
+
+ if options.reverse_dimension_order:
+ data_to_store = data_to_store.T
+
+ # The data must first be written. If name is not present yet,
+ # then it must be created. If it is present, but not a Dataset,
+ # has the wrong dtype, or is the wrong shape; then it must be
+ # deleted and then written. Otherwise, it is just overwritten in
+ # place (note, this will not change any filters or chunking
+ # settings, but will keep the file from growing needlessly).
+
+ if name not in grp:
+ grp.create_dataset(name, data=data_to_store,
+ **options.array_options)
+ elif not isinstance(grp[name], h5py.Dataset) \
+ or grp[name].dtype != data.dtype \
+ or grp[name].shape != data.shape:
+ del grp[name]
+ grp.create_dataset(name, data=data_to_store,
+ **options.array_options)
+ else:
+ grp[name][...] = data_to_store
+
+ # Write the metadata using the inherited function (good enough).
+
+ self.write_metadata(f, grp, name, data, type_string, options)
+
+
+ def write_metadata(self, f, grp, name, data, type_string, options):
+ # First, call the inherited version to do most of the work.
+
+ TypeMarshaller.write_metadata(self, f, grp, name, data,
+ type_string, options)
+
+ # Write the underlying numpy type if we are storing type
+ # information.
+
+ if options.store_type_information:
+ set_attribute_string(grp[name],
+ 'CPython.numpy.UnderlyingType',
+ data.dtype.name)
+
+ # If we are storing type information, the shape needs to be
+ # stored in CPython.Shape.
+
+ if options.store_type_information:
+ set_attribute(grp[name], 'CPython.Shape',
+ np.uint64(data.shape))
+
+ # If data is empty and we are supposed to store shape info for
+ # empty data, we need to set the CPython.Empty and MATLAB_empty
+ # attributes to 1 if we are storing type info or making it
+ # MATLAB compatible. Otherwise, no empty attribute is set and
+ # existing ones must be deleted.
+
+ if options.store_shape_for_empty and data.size == 0:
+ if options.store_type_information:
+ set_attribute(grp[name], 'CPython.Empty',
+ np.uint8(1))
+ else:
+ del_attribute(grp[name], 'CPython.Empty')
+ if options.MATLAB_compatible:
+ set_attribute(grp[name], 'MATLAB_empty',
+ np.uint8(1))
+ else:
+ del_attribute(grp[name], 'MATLAB_empty')
+ else:
+ del_attribute(grp[name], 'CPython.Empty')
+ del_attribute(grp[name], 'MATLAB_empty')
+
+ # If we are making it MATLAB compatible, the MATLAB_class
+ # attribute needs to be set looking up the data type (gotten
+ # using np.dtype.type) and if it is a string type, then the
+ # MATLAB_int_decode attribute must be set properly. Otherwise,
+ # the attributes must be deleted.
+
+ if options.MATLAB_compatible:
+ tp = data.dtype.type
+ if tp in self.__MATLAB_classes:
+ set_attribute_string(grp[name], 'MATLAB_class',
+ self.__MATLAB_classes[tp])
+ else:
+ set_attribute_string(grp[name], 'MATLAB_class', '')
+
+ if tp in (np.string_, np.unicode):
+ set_attribute(grp[name], 'MATLAB_int_decode',
+ {np.string_: 2, np.unicode: 4}[tp])
+ else:
+ del_attribute(grp[name], 'MATLAB_int_decode')
+
+
+class PythonScalarMarshaller(NumpyScalarArrayMarshaller):
+ def __init__(self):
+ NumpyScalarArrayMarshaller.__init__(self)
+ self.types = [bool, int, float, complex]
+ self.cpython_type_strings = ['bool', 'int', 'float', 'complex']
+
+ def write(self, f, grp, name, data, type_string, options):
+ # data just needs to be converted to the appropriate numpy type
+ # (pass it through np.array and then access [()] to get the
+ # scalar back as a scalar numpy type) and then pass it to the
+ # parent version of this function. The proper type_string needs
+ # to be grabbed now as the parent function will have a modified
+ # form of data to guess from if not given the right one
+ # explicitly.
+ NumpyScalarArrayMarshaller.write(self, f, grp, name,
+ np.array(data)[()],
+ self.get_type_string(data,
+ type_string), options)
+
+
+class PythonStringMarshaller(NumpyScalarArrayMarshaller):
+ def __init__(self):
+ NumpyScalarArrayMarshaller.__init__(self)
+ self.types = [str, bytes, bytearray]
+ self.cpython_type_strings = ['str', 'bytes', 'bytearray']
+
+ def write(self, f, grp, name, data, type_string, options):
+ # data just needs to be converted to a numpy string, unless it
+ # is a bytearray in which case it needs to be converted to a
+ # uint8 array.
+
+ if isinstance(data,bytearray):
+ cdata = np.uint8(data)
+ else:
+ cdata = np.string_(data)
+
+ # Now pass it to the parent version of this function to write
+ # it. The proper type_string needs to be grabbed now as the
+ # parent function will have a modified form of data to guess
+ # from if not given the right one explicitly.
+ NumpyScalarArrayMarshaller.write(self, f, grp, name, cdata,
+ self.get_type_string(data,
+ type_string), options)
+
+
+class PythonNoneMarshaller(NumpyScalarArrayMarshaller):
+ def __init__(self):
+ NumpyScalarArrayMarshaller.__init__(self)
+ self.types = [type(None)]
+ self.cpython_type_strings = ['builtins.NoneType']
+ def write(self, f, grp, name, data, type_string, options):
+ # Just going to use the parent function with an empty double
+ # (two dimensional so that MATLAB will import it as a []) as the
+ # data and the right type_string set (parent can't guess right
+ # from the modified form).
+ NumpyScalarArrayMarshaller.write(self, f, grp, name,
+ np.ndarray(shape=(0,0),
+ dtype='float64'),
+ self.get_type_string(data,
+ type_string), options)
+
+class PythonDictMarshaller(TypeMarshaller):
+ def __init__(self):
+ TypeMarshaller.__init__(self)
+ self.cpython_attributes |= {'CPython.Empty'}
+ self.matlab_attributes |= {'MATLAB_class', 'MATLAB_empty'}
+ self.types = [dict]
+ self.cpython_type_strings = ['dict']
+ self.__MATLAB_classes = ['struct']
+
+ def write(self, f, grp, name, data, type_string, options):
+ # If the group doesn't exist, it needs to be created. If it
+ # already exists but is not a group, it needs to be deleted
+ # before being created.
+
+ if name not in grp:
+ grp.create_group(name)
+ elif not isinstance(grp[name], h5py.Group):
+ del grp[name]
+ grp.create_group(name)
+
+ grp2 = grp[name]
+
+ # Write the metadata.
+ self.write_metadata(f, grp, name, data, type_string, options)
+
+ # Delete any Datasets/Groups not corresponding to a field name
+ # in data if that option is set.
+
+ if options.delete_unused_variables:
+ for field in {i for i in grp2}.difference({i for i in data}):
+ del grp2[field]
+
+ # Check for any field names that are not strings since they
+ # cannot be handled.
+
+ for fieldname in data:
+ if not isinstance(fieldname, str):
+ raise NotImplementedError('Dictionaries with non-string'
+ + ' keys are not supported: '
+ + repr(fieldname))
+
+ # Return a tuple holding the group to store in, all the elements
+ # of data, and their values to the calling function so that it
+ # can recurse over all the elements.
+
+ return ([grp2], [(n, v) for n, v in data.items()])
+
+ def write_metadata(self, f, grp, name, data, type_string, options):
+ # First, call the inherited version to do most of the work.
+
+ TypeMarshaller.write_metadata(self, f, grp, name, data,
+ type_string, options)
+
+ # If data is empty and we are supposed to store shape info for
+ # empty data, we need to set the CPython.Empty and MATLAB_empty
+ # attributes to 1 if we are storing type info or making it
+ # MATLAB compatible. Otherwise, no empty attribute is set and
+ # existing ones must be deleted.
+
+ if options.store_shape_for_empty and len(data) == 0:
+ if options.store_type_information:
+ set_attribute(grp[name], 'CPython.Empty',
+ np.uint8(1))
+ else:
+ del_attribute(grp[name], 'CPython.Empty')
+ if options.MATLAB_compatible:
+ set_attribute(grp[name], 'MATLAB_empty',
+ np.uint8(1))
+ else:
+ del_attribute(grp[name], 'MATLAB_empty')
+ else:
+ del_attribute(grp[name], 'CPython.Empty')
+ del_attribute(grp[name], 'MATLAB_empty')
+
+ # If we are making it MATLAB compatible, the MATLAB_class
+ # attribute needs to be set for the data type. Also, all the
+ # field names need to be stored in the attribute MATLAB_fields.
+ # If the type cannot be found, an error needs to be thrown. If
+ # we are not doing MATLAB compatibility, the attributes need to
+ # be deleted.
+
+ if options.MATLAB_compatible:
+ tp = type(data)
+ if tp in self.types:
+ set_attribute_string(grp[name], \
+ 'MATLAB_class', self.__MATLAB_classes[ \
+ self.types.index(tp)])
+ else:
+ raise NotImplementedError('Can''t write data type: '
+ + str(tp))
+
+ # Write an array of all the fields to the attribute that
+ # lists them.
+
+ # NOTE: Can't make it do a variable length set of strings
+ # like MATLAB likes. However, not including them seems to
+ # cause no problem.
+
+ # set_attribute_string_array(grp[name], \
+ # 'MATLAB_fields', [k for k in data])
diff --git a/hdf5storage/__init__.py b/hdf5storage/__init__.py
new file mode 100644
index 0000000..5517971
--- /dev/null
+++ b/hdf5storage/__init__.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+This is the hdf5storage package, a Python package to read and write
+python data types to HDF5 (Heirarchal Data Format) files beyond just
+Numpy types.
+
+Version 0.1
+"""
+
+__version__ = "0.1"
+
+from hdf5storage.core import write, MarshallerCollection
diff --git a/hdf5storage/core.py b/hdf5storage/core.py
new file mode 100644
index 0000000..9927934
--- /dev/null
+++ b/hdf5storage/core.py
@@ -0,0 +1,302 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import os
+import posixpath
+import copy
+import inspect
+import datetime
+import numpy as np
+import h5py
+
+from hdf5storage.utilities import *
+
+from hdf5storage.lowlevel import write_data
+
+
+class Options(object):
+ def __init__(self):
+ self.store_type_information = True
+ self.MATLAB_compatible = True
+ self.scalar_options = {}
+ self.array_options = {}
+ self.delete_unused_variables = True
+ self.convert_scalars_to_arrays = True
+ self.reverse_dimension_order = True
+ self.convert_strings_to_utf16 = True
+ self.store_shape_for_empty = True
+ self.complex_names = ('real', 'imag')
+ self.marshaller_collection = MarshallerCollection()
+
+
+class MarshallerCollection(object):
+ """ Represents, maintains, and retreives a set of marshallers.
+
+ Maintains a list of marshallers used to marshal data types to and
+ from HDF5 files. It includes the builtin marshallers from the
+ :py:mod:`hdf5storage.Marshallers` module as well as any user
+ supplied or added marshallers. While the builtin list cannot be
+ changed; user ones can be added or removed. Also has functions to
+ get the marshaller appropriate for ``type`` or type_string for a
+ python data type.
+
+ User marshallers must provide the same interface as
+ :py:class:`hdf5storage.Marshallers.TypeMarshaller`, which is
+ probably most easily done by inheriting from it.
+
+ Parameters
+ ----------
+ marshallers : marshaller or list of marshallers, optional
+ The user marshaller/s to add to the collection.
+
+ See Also
+ --------
+ hdf5storage.Marshallers
+ hdf5storage.Marshallers.TypeMarshaller
+
+ """
+ def __init__(self, marshallers=[]):
+ # Two lists of marshallers need to be maintained: one for the
+ # builtin ones in the Marshallers module, and another for user
+ # supplied ones.
+
+ # Grab all the marshallers in the Marshallers module (they are
+ # the classes) by inspection.
+ self._builtin_marshallers = [m() for key, m in dict(
+ inspect.getmembers(Marshallers,
+ inspect.isclass)).items()]
+ self._user_marshallers = []
+
+ # A list of all the marshallers will be needed along with
+ # dictionaries to lookup up the marshaller to use for given
+ # types or type strings (they are the keys).
+ self._marshallers = []
+ self._out = dict()
+ self._in = dict()
+
+ # Add any user given marshallers.
+ self.add_marshaller(copy.deepcopy(marshallers))
+
+ def _update_marshallers(self):
+ # Combine both sets of marshallers.
+ self._marshallers = self._builtin_marshallers.copy()
+ self._marshallers.extend(self._user_marshallers)
+
+ # Construct the dictionary to look up the appropriate marshaller
+ # by type.
+
+ self._out = {tp: m for m in self._marshallers for tp in m.types}
+
+ # The equivalent one to read data types given type strings needs
+ # to be created from it. Basically, we have to make the key be
+ # the cpython_type_string from it.
+
+ self._in = {type_string: m for key, m in self._out.items()
+ for type_string in m.cpython_type_strings}
+
+ def add_marshaller(self, marshallers):
+ if not isinstance(marshallers, (list, tuple, set, frozenset)):
+ marshallers = [marshallers]
+ for m in marshallers:
+ if m not in self._user_marshallers:
+ self._user_marshallers.append(m)
+ self._update_marshallers()
+
+ def remove_marshaller(self, marshallers):
+ if not isinstance(marshallers, (list, tuple, set, frozenset)):
+ marshallers = [marshallers]
+ for m in marshallers:
+ if m in self._user_marshallers:
+ self._user_marshallers.remove(m)
+ self._update_marshallers()
+
+ def clear_marshallers(self):
+ """ Clears the list of user provided marshallers.
+
+ Removes all user provided marshallers, but not the builtin ones
+ from the :py:mod:`hdf5storage.Marshallers` module, from the list
+ of marshallers used.
+
+ """
+ self._user_marshallers.clear()
+ self._update_marshallers()
+
+ def get_marshaller_for_type(self, tp):
+ if tp in self._out:
+ return copy.deepcopy(self._out[tp])
+ else:
+ return None
+
+
+def write(filename='data.h5', name='/data', data=None,
+ store_type_information=True, MATLAB_compatible=True,
+ delete_unused_variables=False,
+ convert_scalars_to_arrays=False,
+ reverse_dimension_order=False,
+ convert_strings_to_utf16=False,
+ store_shape_for_empty=False,
+ complex_names=('r','i')):
+ # Pack the different options into an Options class.
+
+ options = Options()
+
+ options.store_type_information = store_type_information
+ options.MATLAB_compatible = MATLAB_compatible
+ options.scalar_options = {}
+ options.array_options = {}
+ options.delete_unused_variables = delete_unused_variables
+ options.convert_scalars_to_arrays = convert_scalars_to_arrays
+ options.reverse_dimension_order = reverse_dimension_order
+ options.convert_strings_to_utf16 = convert_strings_to_utf16
+ options.store_shape_for_empty = store_shape_for_empty
+ options.complex_names = complex_names
+
+ # Now, if we are doing MATLAB compatibility, certain options must be
+ # overridden.
+
+ if MATLAB_compatible:
+ options.delete_unused_variables = True
+ options.convert_scalars_to_arrays = True
+ options.convert_strings_to_utf16 = True
+ options.reverse_dimension_order = True
+ options.store_shape_for_empty = True
+ options.complex_names = ('real','imag')
+
+ # Reset the list of MATLAB_fields attributes to set.
+
+ _MATLAB_fields_pairs = []
+
+ # Remove double slashes and a non-root trailing slash.
+
+ name = posixpath.normpath(name)
+
+ # Extract the group name and the target name (will be a dataset if
+ # data can be mapped to it, but will end up being made into a group
+ # otherwise. As HDF5 files use posix path, conventions, posixpath
+ # will do everything.
+ groupname = posixpath.dirname(name)
+ targetname = posixpath.basename(name)
+
+ # If groupname got turned into blank, then it is just root.
+ if groupname == '':
+ groupname = '/'
+
+ # If targetname got turned blank, then it is the current directory.
+ if targetname == '':
+ targetname = '.'
+
+ # Open the hdf5 file and start writing the data (and making the
+ # group groupname at the same time if it doesn't exist). This is all
+ # wrapped in a try block, so that the file can be closed if any
+ # errors happen (the error is re-raised). The
+ # h5py.get_config().complex_names is changed to complex_names. The
+ # previous value is restored at the end. Obviously, this makes this
+ # whole function thread unsafe as it changes it for h5py globally.
+
+ backup_complex_names = h5py.get_config().complex_names
+
+ try:
+ h5py.get_config().complex_names = options.complex_names
+
+ # If the file already exists, we just open it. If it doesn't
+ # exist yet and we are doing any MATLAB formatting, we need to
+ # allocate a 512 byte user block (need metadata for MATLAB to
+ # tell it is a valid .mat file). The user_block size is also
+ # grabbed right before closing, so that if there is a userblock
+ # and we are doing MATLAB formatting, we know to set it.
+
+ if os.path.isfile(filename) or not options.MATLAB_compatible:
+ f = h5py.File(filename)
+ else:
+ f = h5py.File(filename, mode='w', userblock_size=512)
+
+ if groupname not in f:
+ grp = f.require_group(groupname)
+ else:
+ grp = f[groupname]
+
+ write_data(f, grp, targetname, data,
+ None, options)
+ except:
+ print("Unexpected error:", sys.exc_info()[0])
+ raise
+ finally:
+ userblock_size = f.userblock_size
+ f.close()
+ h5py.get_config().complex_names = backup_complex_names
+
+ # If we are doing MATLAB formatting and there is a sufficiently
+ # large userblock, write the new userblock. The same sort of error
+ # handling is used.
+
+ if options.MATLAB_compatible and userblock_size >= 128:
+ # Get the time.
+ now = datetime.datetime.now()
+
+ # Construct the leading string. The MATLAB one looks like
+ #
+ # s = 'MATLAB 7.3 MAT-file, Platform: GLNXA64, Created on: ' \
+ # + now.strftime('%a %b %d %H:%M:%S %Y') \
+ # + ' HDF5 schema 1.00 .'
+ #
+ # Platform is going to be changed to CPython version
+
+ v = sys.version_info
+
+ s = 'MATLAB 7.3 MAT-file, Platform: CPython ' \
+ + '{0}.{1}.{2}'.format(v.major, v.minor, v.micro) \
+ + ', Created on: ' \
+ + now.strftime('%a %b %d %H:%M:%S %Y') \
+ + ' HDF5 schema 1.00 .'
+
+ # Make the bytearray while padding with spaces up to 128-12
+ # (the minus 12 is there since the last 12 bytes are special.
+
+ b = bytearray(s + (128-12-len(s))*' ', encoding='utf-8')
+
+ # Add 8 nulls (0) and the magic number (or something) that
+ # MATLAB uses.
+
+ b.extend(bytearray.fromhex('00000000 00000000 0002494D'))
+
+ # Now, write it to the beginning of the file.
+
+ try:
+ fd = open(filename, 'r+b')
+ fd.write(b)
+ except:
+ print("Unexpected error:", sys.exc_info()[0])
+ raise
+ finally:
+ fd.close()
+
+
+# Set an empty list of path-string_array pairs to set the
+# MATLAB_fields attributes on all the things that correspond to MATLAB
+# structures.
+
+_MATLAB_fields_pairs = []
diff --git a/hdf5storage/lowlevel.py b/hdf5storage/lowlevel.py
new file mode 100644
index 0000000..d5cbf88
--- /dev/null
+++ b/hdf5storage/lowlevel.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import h5py
+
+from hdf5storage.utilities import *
+
+
+def write_data(f, grp, name, data, type_string, options):
+ # Get the marshaller for type(data).
+
+ tp = type(data)
+ m = options.marshaller_collection.get_marshaller_for_type(tp)
+
+ # If a marshaller was found, use it to write the data. Otherwise,
+ # return an error. If we get something other than None back, then we
+ # must recurse through the entries. Also, we must set the H5PATH
+ # attribute to be the path to the containing group.
+
+ if m is not None:
+ outputs = m.write(f, grp, name, data, type_string, options)
+ if outputs is not None:
+ if len(outputs) > 2:
+ _MATLAB_fields_pairs.extend(outputs[2])
+ for i, v in enumerate(outputs[1]):
+ if len(outputs[0]) == 1:
+ write_data(f, outputs[0][0], v[0], v[1], None,
+ options)
+ if options.MATLAB_compatible:
+ set_attribute_string(outputs[0][0][v[0]],
+ 'H5PATH',
+ outputs[0][0].name)
+ else:
+ del_attribute(outputs[0][0][v[0]], 'H5PATH')
+ else:
+ write_data(f, outputs[0][i], v[0], v[1], None,
+ options)
+ if options.MATLAB_compatible:
+ set_attribute_string(outputs[0][i][v[0]],
+ 'H5PATH',
+ outputs[0][i].name)
+ else:
+ del_attribute(outputs[0][i][v[0]], 'H5PATH')
+ else:
+ raise NotImplementedError('Can''t write data type: '+str(tp))
diff --git a/hdf5storage/utilities.py b/hdf5storage/utilities.py
new file mode 100644
index 0000000..c2c1609
--- /dev/null
+++ b/hdf5storage/utilities.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+""" Module of functions to set and delete HDF5 attributes.
+
+"""
+
+import numpy as np
+import h5py
+
+
+def set_attribute(target, name, value):
+ """ Sets an attribute on a Dataset or Group.
+
+ If the attribute `name` doesn't exist yet, it is created. If it
+ already exists, it is overwritten if it differs from `value`.
+
+ Parameters
+ ----------
+ target : Dataset or Group
+ :py:class:`h5py.Dataset` or :py:class:`h5py.Group` to set the
+ attribute of.
+ name : str
+ Name of the attribute to set.
+ value : numpy type other than :py:class:`str_`
+ Value to set the attribute to.
+
+ """
+ if name not in target.attrs:
+ target.attrs.create(name, value)
+ elif target.attrs[name].dtype != value.dtype \
+ or target.attrs[name].shape != value.shape:
+ target.attrs.create(name, value)
+ elif np.any(target.attrs[name] != value):
+ target.attrs.modify(name, value)
+
+
+def set_attribute_string(target, name, value):
+ """ Sets an attribute to a string on a Dataset or Group.
+
+ If the attribute `name` doesn't exist yet, it is created. If it
+ already exists, it is overwritten if it differs from `value`.
+
+ Parameters
+ ----------
+ target : Dataset or Group
+ :py:class:`h5py.Dataset` or :py:class:`h5py.Group` to set the
+ attribute of.
+ name : str
+ Name of the attribute to set.
+ value : string
+ Value to set the attribute to. Can be any sort of string type
+ that will convert to a :py:class:`numpy.string_`
+
+ """
+ set_attribute(target, name, np.string_(value))
+
+
+def set_attribute_string_array(target, name, string_list):
+ """ Sets an attribute to an array of string on a Dataset or Group.
+
+ If the attribute `name` doesn't exist yet, it is created. If it
+ already exists, it is overwritten with the list of string
+ `string_list` (they will be vlen strings).
+
+ Parameters
+ ----------
+ target : Dataset or Group
+ :py:class:`h5py.Dataset` or :py:class:`h5py.Group` to set the
+ attribute of.
+ name : str
+ Name of the attribute to set.
+ string_list : list, tuple
+ List of strings to set the attribute to. Can be any string type
+ that will convert to a :py:class:`numpy.string_`
+
+ """
+ target.attrs.create(name, np.string_(string_list),
+ dtype=h5py.special_dtype(vlen=bytes))
+
+
+def del_attribute(target, name):
+ """ Deletes an attribute on a Dataset or Group.
+
+ If the attribute `name` exists, it is deleted.
+
+ Parameters
+ ----------
+ target : Dataset or Group
+ :py:class:`h5py.Dataset` or :py:class:`h5py.Group` to set the
+ attribute of.
+ name : str
+ Name of the attribute to delete.
+
+ """
+ if name in target.attrs:
+ del target.attrs[name]
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git
More information about the debian-science-commits
mailing list