[python-hdf5storage] 03/152: Initial set of python codes to write only (still need polishing)

Mon Feb 29 08:24:28 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 0.1
in repository python-hdf5storage.

commit e6d9c0c00eea41d787c10bbf1a025ff83546a4a9
Author: Freja Nordsiek <fnordsie at gmail.com>
Date:   Sun Dec 22 22:10:00 2013 -0500

    Initial set of python codes to write only (still need polishing)
---
 hdf5storage/Marshallers.py | 417 +++++++++++++++++++++++++++++++++++++++++++++
 hdf5storage/__init__.py    |  36 ++++
 hdf5storage/core.py        | 302 ++++++++++++++++++++++++++++++++
 hdf5storage/lowlevel.py    |  69 ++++++++
 hdf5storage/utilities.py   | 119 +++++++++++++
 5 files changed, 943 insertions(+)

diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
new file mode 100644
index 0000000..cc6c0b8
--- /dev/null
+++ b/hdf5storage/Marshallers.py
@@ -0,0 +1,417 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import h5py
+
+from hdf5storage.utilities import *
+from hdf5storage.lowlevel import write_data
+
+
+class TypeMarshaller(object):
+    def __init__(self):
+        self.cpython_attributes = {'CPython.Type'}
+        self.matlab_attributes = {'H5PATH'}
+        self.types = []
+        self.cpython_type_strings = []
+
+    def get_type_string(self, data, type_string):
+        if type_string is not None:
+            return type_string
+        else:
+            i = self.types.index(type(data))
+            return self.cpython_type_strings[i]
+
+    def write(self, f, grp, name, data, type_string, options):
+        raise NotImplementedError('Can''t write data type: '
+                                  + str(type(data)))
+
+    def write_metadata(self, f, grp, name, data, type_string, options):
+        # Make sure we have a complete type_string.
+        type_string = self.get_type_string(data, type_string)
+
+        # The metadata that is written depends on the format.
+
+        if options.store_type_information:
+            set_attribute_string(grp[name], 'CPython.Type', type_string)
+
+        # If we are not storing type information or doing MATLAB
+        # compatibility, then attributes not in the cpython and/or
+        # MATLAB lists need to be removed.
+
+        attributes_used = set()
+
+        if options.store_type_information:
+            attributes_used |= self.cpython_attributes
+
+        if options.MATLAB_compatible:
+            attributes_used |= self.matlab_attributes
+
+        for attribute in (set(grp[name].attrs.keys) - attributes_used):
+            del_attribute(grp[name], attribute)
+
+    def read(self, f, grp, name, options):
+        raise NotImplementedError('Can''t read data: ' + name)
+
+
+class NumpyScalarArrayMarshaller(TypeMarshaller):
+    def __init__(self):
+        TypeMarshaller.__init__(self)
+        self.cpython_attributes |= {'CPython.Shape', 'CPython.Empty',
+                                    'CPython.numpy.UnderlyingType'}
+        self.matlab_attributes |= {'MATLAB_class', 'MATLAB_empty',
+                                   'MATLAB_int_decode'}
+        self.types = [np.ndarray, np.matrix,
+                      np.bool8,
+                      np.uint8, np.uint16, np.uint32, np.uint64,
+                      np.int8, np.int16, np.int32, np.int64,
+                      np.float16, np.float32, np.float64, np.float128,
+                      np.complex64, np.complex128, np.complex256,
+                      np.string_, np.unicode]
+        self.cpython_type_strings = ['numpy.ndarray', 'numpy.matrix',
+                                     'numpy.bool8',
+                                     'numpy.uint8', 'numpy.uint16',
+                                     'numpy.uint32', 'numpy.uint64',
+                                     'numpy.int8', 'numpy.int16',
+                                     'numpy.int32', 'numpy.int64',
+                                     'numpy.float16', 'numpy.float32',
+                                     'numpy.float64', 'numpy.float128',
+                                     'numpy.complex64',
+                                     'numpy.complex128',
+                                     'numpy.complex256',
+                                     'numpy.string_', 'numpy.unicode']
+
+        # If we are storing in MATLAB format, we will need to be able to
+        # set the MATLAB_class attribute. The different numpy types just
+        # need to be properly mapped to the right strings. Some types do
+        # not have a string since MATLAB does not support them.
+
+        self.__MATLAB_classes = {np.bool8: 'logical', np.uint8: 'uint8',
+                                 np.uint16: 'uint16',
+                                 np.uint32: 'uint32',
+                                 np.uint64: 'uint64', np.int8: 'int8',
+                                 np.int16: 'int16', np.int32: 'int32',
+                                 np.int64: 'int64', np.float32: 'single',
+                                 np.float64: 'double',
+                                 np.complex64: 'single',
+                                 np.complex128: 'double',
+                                 np.string_: 'char',
+                                 np.unicode: 'char'}
+
+
+    def write(self, f, grp, name, data, type_string, options):
+        # Need to make a set of data that will be stored. It will start
+        # out as a copy of data and then be steadily manipulated.
+
+        data_to_store = data.copy()
+
+        # Optionally convert ASCII strings to UTF-16. This is done by
+        # simply converting to uint16's. This will require making them
+        # at least 1 dimensinal.
+
+        if options.convert_strings_to_utf16 and not (data.size == 0 \
+                and options.store_shape_for_empty) \
+                and data.dtype.type == np.string_:
+            data_to_store = np.uint16(np.atleast_1d( \
+                            data_to_store).view(np.uint8))
+
+        # As of 2013-12-13, h5py cannot write numpy.unicode (UTF-32
+        # encoding) types. If it is just a numpy.unicode object, we can
+        # force it to UTF-16 or just write it as uint32's. If it is an
+        # array, forcing it to UTF-16 is a bad idea because characters
+        # are not always 2 bytes long in UTF-16. So, converting them to
+        # uint32 makes the most sense.
+
+        if data.dtype.type == np.unicode and not (data.size == 0 \
+                and options.store_shape_for_empty):
+            data_to_store = np.atleast_1d(data_to_store).view(np.uint32)
+
+        # Convert scalars to arrays if that option is set.
+
+        if options.convert_scalars_to_arrays:
+            data_to_store = np.atleast_2d(data_to_store)
+
+        # If data is empty, we instead need to store the shape of the
+        # array if the appropriate option is set.
+
+        if options.store_shape_for_empty and data.size == 0:
+            data_to_store = np.uint64(data.shape)
+            if options.convert_scalars_to_arrays:
+                data_to_store = np.atleast_2d(data_to_store)
+
+        # Reverse the dimension order if that option is set.
+
+        if options.reverse_dimension_order:
+            data_to_store = data_to_store.T
+
+        # The data must first be written. If name is not present yet,
+        # then it must be created. If it is present, but not a Dataset,
+        # has the wrong dtype, or is the wrong shape; then it must be
+        # deleted and then written. Otherwise, it is just overwritten in
+        # place (note, this will not change any filters or chunking
+        # settings, but will keep the file from growing needlessly).
+
+        if name not in grp:
+            grp.create_dataset(name, data=data_to_store,
+                               **options.array_options)
+        elif not isinstance(grp[name], h5py.Dataset) \
+                or grp[name].dtype != data.dtype \
+                or grp[name].shape != data.shape:
+            del grp[name]
+            grp.create_dataset(name, data=data_to_store,
+                               **options.array_options)
+        else:
+            grp[name][...] = data_to_store
+
+        # Write the metadata using the inherited function (good enough).
+
+        self.write_metadata(f, grp, name, data, type_string, options)
+
+
+    def write_metadata(self, f, grp, name, data, type_string, options):
+        # First, call the inherited version to do most of the work.
+
+        TypeMarshaller.write_metadata(self, f, grp, name, data,
+                                      type_string, options)
+
+        # Write the underlying numpy type if we are storing type
+        # information.
+
+        if options.store_type_information:
+            set_attribute_string(grp[name],
+                                 'CPython.numpy.UnderlyingType',
+                                 data.dtype.name)
+
+        # If we are storing type information, the shape needs to be
+        # stored in CPython.Shape.
+
+        if options.store_type_information:
+            set_attribute(grp[name], 'CPython.Shape',
+                          np.uint64(data.shape))
+
+        # If data is empty and we are supposed to store shape info for
+        # empty data, we need to set the CPython.Empty and MATLAB_empty
+        # attributes to 1 if we are storing type info or making it
+        # MATLAB compatible. Otherwise, no empty attribute is set and
+        # existing ones must be deleted.
+
+        if options.store_shape_for_empty and data.size == 0:
+            if options.store_type_information:
+                set_attribute(grp[name], 'CPython.Empty',
+                                          np.uint8(1))
+            else:
+                del_attribute(grp[name], 'CPython.Empty')
+            if options.MATLAB_compatible:
+                set_attribute(grp[name], 'MATLAB_empty',
+                                          np.uint8(1))
+            else:
+                del_attribute(grp[name], 'MATLAB_empty')
+        else:
+            del_attribute(grp[name], 'CPython.Empty')
+            del_attribute(grp[name], 'MATLAB_empty')
+
+        # If we are making it MATLAB compatible, the MATLAB_class
+        # attribute needs to be set looking up the data type (gotten
+        # using np.dtype.type) and if it is a string type, then the
+        # MATLAB_int_decode attribute must be set properly. Otherwise,
+        # the attributes must be deleted.
+
+        if options.MATLAB_compatible:
+            tp = data.dtype.type
+            if tp in self.__MATLAB_classes:
+                set_attribute_string(grp[name], 'MATLAB_class',
+                                     self.__MATLAB_classes[tp])
+            else:
+                set_attribute_string(grp[name], 'MATLAB_class', '')
+
+            if tp in (np.string_, np.unicode):
+                set_attribute(grp[name], 'MATLAB_int_decode',
+                              {np.string_: 2, np.unicode: 4}[tp])
+            else:
+                del_attribute(grp[name], 'MATLAB_int_decode')
+
+
+class PythonScalarMarshaller(NumpyScalarArrayMarshaller):
+    def __init__(self):
+        NumpyScalarArrayMarshaller.__init__(self)
+        self.types = [bool, int, float, complex]
+        self.cpython_type_strings = ['bool', 'int', 'float', 'complex']
+
+    def write(self, f, grp, name, data, type_string, options):
+        # data just needs to be converted to the appropriate numpy type
+        # (pass it through np.array and then access [()] to get the
+        # scalar back as a scalar numpy type) and then pass it to the
+        # parent version of this function. The proper type_string needs
+        # to be grabbed now as the parent function will have a modified
+        # form of data to guess from if not given the right one
+        # explicitly.
+        NumpyScalarArrayMarshaller.write(self, f, grp, name,
+                                         np.array(data)[()],
+                                         self.get_type_string(data,
+                                         type_string), options)
+
+
+class PythonStringMarshaller(NumpyScalarArrayMarshaller):
+    def __init__(self):
+        NumpyScalarArrayMarshaller.__init__(self)
+        self.types = [str, bytes, bytearray]
+        self.cpython_type_strings = ['str', 'bytes', 'bytearray']
+
+    def write(self, f, grp, name, data, type_string, options):
+        # data just needs to be converted to a numpy string, unless it
+        # is a bytearray in which case it needs to be converted to a
+        # uint8 array.
+
+        if isinstance(data,bytearray):
+            cdata = np.uint8(data)
+        else:
+            cdata = np.string_(data)
+
+        # Now pass it to the parent version of this function to write
+        # it. The proper type_string needs to be grabbed now as the
+        # parent function will have a modified form of data to guess
+        # from if not given the right one explicitly.
+        NumpyScalarArrayMarshaller.write(self, f, grp, name, cdata,
+                                         self.get_type_string(data,
+                                         type_string), options)
+
+
+class PythonNoneMarshaller(NumpyScalarArrayMarshaller):
+    def __init__(self):
+        NumpyScalarArrayMarshaller.__init__(self)
+        self.types = [type(None)]
+        self.cpython_type_strings = ['builtins.NoneType']
+    def write(self, f, grp, name, data, type_string, options):
+        # Just going to use the parent function with an empty double
+        # (two dimensional so that MATLAB will import it as a []) as the
+        # data and the right type_string set (parent can't guess right
+        # from the modified form).
+        NumpyScalarArrayMarshaller.write(self, f, grp, name,
+                                         np.ndarray(shape=(0,0),
+                                         dtype='float64'),
+                                         self.get_type_string(data,
+                                         type_string), options)
+
+class PythonDictMarshaller(TypeMarshaller):
+    def __init__(self):
+        TypeMarshaller.__init__(self)
+        self.cpython_attributes |= {'CPython.Empty'}
+        self.matlab_attributes |= {'MATLAB_class', 'MATLAB_empty'}
+        self.types = [dict]
+        self.cpython_type_strings = ['dict']
+        self.__MATLAB_classes = ['struct']
+
+    def write(self, f, grp, name, data, type_string, options):
+        # If the group doesn't exist, it needs to be created. If it
+        # already exists but is not a group, it needs to be deleted
+        # before being created.
+
+        if name not in grp:
+            grp.create_group(name)
+        elif not isinstance(grp[name], h5py.Group):
+            del grp[name]
+            grp.create_group(name)
+
+        grp2 = grp[name]
+
+        # Write the metadata.
+        self.write_metadata(f, grp, name, data, type_string, options)
+
+        # Delete any Datasets/Groups not corresponding to a field name
+        # in data if that option is set.
+
+        if options.delete_unused_variables:
+            for field in {i for i in grp2}.difference({i for i in data}):
+                del grp2[field]
+
+        # Check for any field names that are not strings since they
+        # cannot be handled.
+
+        for fieldname in data:
+            if not isinstance(fieldname, str):
+                raise NotImplementedError('Dictionaries with non-string'
+                                          + ' keys are not supported: '
+                                          + repr(fieldname))
+
+        # Return a tuple holding the group to store in, all the elements
+        # of data, and their values to the calling function so that it
+        # can recurse over all the elements.
+
+        return ([grp2], [(n, v) for n, v in data.items()])
+
+    def write_metadata(self, f, grp, name, data, type_string, options):
+        # First, call the inherited version to do most of the work.
+
+        TypeMarshaller.write_metadata(self, f, grp, name, data,
+                                      type_string, options)
+
+        # If data is empty and we are supposed to store shape info for
+        # empty data, we need to set the CPython.Empty and MATLAB_empty
+        # attributes to 1 if we are storing type info or making it
+        # MATLAB compatible. Otherwise, no empty attribute is set and
+        # existing ones must be deleted.
+
+        if options.store_shape_for_empty and len(data) == 0:
+            if options.store_type_information:
+                set_attribute(grp[name], 'CPython.Empty',
+                                          np.uint8(1))
+            else:
+                del_attribute(grp[name], 'CPython.Empty')
+            if options.MATLAB_compatible:
+                set_attribute(grp[name], 'MATLAB_empty',
+                                          np.uint8(1))
+            else:
+                del_attribute(grp[name], 'MATLAB_empty')
+        else:
+            del_attribute(grp[name], 'CPython.Empty')
+            del_attribute(grp[name], 'MATLAB_empty')
+
+        # If we are making it MATLAB compatible, the MATLAB_class
+        # attribute needs to be set for the data type. Also, all the
+        # field names need to be stored in the attribute MATLAB_fields.
+        # If the type cannot be found, an error needs to be thrown. If
+        # we are not doing MATLAB compatibility, the attributes need to
+        # be deleted.
+
+        if options.MATLAB_compatible:
+            tp = type(data)
+            if tp in self.types:
+                set_attribute_string(grp[name], \
+                            'MATLAB_class', self.__MATLAB_classes[ \
+                            self.types.index(tp)])
+            else:
+                raise NotImplementedError('Can''t write data type: '
+                                          + str(tp))
+
+            # Write an array of all the fields to the attribute that
+            # lists them.
+
+            # NOTE: Can't make it do a variable length set of strings
+            # like MATLAB likes. However, not including them seems to
+            # cause no problem.
+
+            # set_attribute_string_array(grp[name], \
+            #     'MATLAB_fields', [k for k in data])
diff --git a/hdf5storage/__init__.py b/hdf5storage/__init__.py
new file mode 100644
index 0000000..5517971
--- /dev/null
+++ b/hdf5storage/__init__.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+This is the hdf5storage package, a Python package to read and write
+python data types to HDF5 (Heirarchal Data Format) files beyond just
+Numpy types.
+
+Version 0.1
+"""
+
+__version__ = "0.1"
+
+from hdf5storage.core import write, MarshallerCollection
diff --git a/hdf5storage/core.py b/hdf5storage/core.py
new file mode 100644
index 0000000..9927934
--- /dev/null
+++ b/hdf5storage/core.py
@@ -0,0 +1,302 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import os
+import posixpath
+import copy
+import inspect
+import datetime
+import numpy as np
+import h5py
+
+from hdf5storage.utilities import *
+
+from hdf5storage.lowlevel import write_data
+
+
+class Options(object):
+    def __init__(self):
+        self.store_type_information = True
+        self.MATLAB_compatible = True
+        self.scalar_options = {}
+        self.array_options = {}
+        self.delete_unused_variables = True
+        self.convert_scalars_to_arrays = True
+        self.reverse_dimension_order = True
+        self.convert_strings_to_utf16 = True
+        self.store_shape_for_empty = True
+        self.complex_names = ('real', 'imag')
+        self.marshaller_collection = MarshallerCollection()
+
+
+class MarshallerCollection(object):
+    """ Represents, maintains, and retreives a set of marshallers.
+
+    Maintains a list of marshallers used to marshal data types to and
+    from HDF5 files. It includes the builtin marshallers from the
+    :py:mod:`hdf5storage.Marshallers` module as well as any user
+    supplied or added marshallers. While the builtin list cannot be
+    changed; user ones can be added or removed. Also has functions to
+    get the marshaller appropriate for ``type`` or type_string for a
+    python data type.
+
+    User marshallers must provide the same interface as
+    :py:class:`hdf5storage.Marshallers.TypeMarshaller`, which is
+    probably most easily done by inheriting from it.
+
+    Parameters
+    ----------
+    marshallers : marshaller or list of marshallers, optional
+        The user marshaller/s to add to the collection.
+
+    See Also
+    --------
+    hdf5storage.Marshallers
+    hdf5storage.Marshallers.TypeMarshaller
+
+    """
+    def __init__(self, marshallers=[]):
+        # Two lists of marshallers need to be maintained: one for the
+        # builtin ones in the Marshallers module, and another for user
+        # supplied ones.
+
+        # Grab all the marshallers in the Marshallers module (they are
+        # the classes) by inspection.
+        self._builtin_marshallers = [m() for key, m in dict(
+                                     inspect.getmembers(Marshallers,
+                                     inspect.isclass)).items()]
+        self._user_marshallers = []
+
+        # A list of all the marshallers will be needed along with
+        # dictionaries to lookup up the marshaller to use for given
+        # types or type strings (they are the keys).
+        self._marshallers = []
+        self._out = dict()
+        self._in = dict()
+
+        # Add any user given marshallers.
+        self.add_marshaller(copy.deepcopy(marshallers))
+
+    def _update_marshallers(self):
+        # Combine both sets of marshallers.
+        self._marshallers = self._builtin_marshallers.copy()
+        self._marshallers.extend(self._user_marshallers)
+
+        # Construct the dictionary to look up the appropriate marshaller
+        # by type.
+
+        self._out = {tp: m for m in self._marshallers for tp in m.types}
+
+        # The equivalent one to read data types given type strings needs
+        # to be created from it. Basically, we have to make the key be
+        # the cpython_type_string from it.
+
+        self._in = {type_string: m for key, m in self._out.items()
+                    for type_string in m.cpython_type_strings}
+
+    def add_marshaller(self, marshallers):
+        if not isinstance(marshallers, (list, tuple, set, frozenset)):
+            marshallers = [marshallers]
+        for m in marshallers:
+            if m not in self._user_marshallers:
+                self._user_marshallers.append(m)
+        self._update_marshallers()
+
+    def remove_marshaller(self, marshallers):
+        if not isinstance(marshallers, (list, tuple, set, frozenset)):
+            marshallers = [marshallers]
+        for m in marshallers:
+            if m in self._user_marshallers:
+                self._user_marshallers.remove(m)
+        self._update_marshallers()
+
+    def clear_marshallers(self):
+        """ Clears the list of user provided marshallers.
+
+        Removes all user provided marshallers, but not the builtin ones
+        from the :py:mod:`hdf5storage.Marshallers` module, from the list
+        of marshallers used.
+
+        """
+        self._user_marshallers.clear()
+        self._update_marshallers()
+
+    def get_marshaller_for_type(self, tp):
+        if tp in self._out:
+            return copy.deepcopy(self._out[tp])
+        else:
+            return None
+
+
+def write(filename='data.h5', name='/data', data=None,
+          store_type_information=True, MATLAB_compatible=True,
+          delete_unused_variables=False,
+          convert_scalars_to_arrays=False,
+          reverse_dimension_order=False,
+          convert_strings_to_utf16=False,
+          store_shape_for_empty=False,
+          complex_names=('r','i')):
+    # Pack the different options into an Options class.
+
+    options = Options()
+
+    options.store_type_information = store_type_information
+    options.MATLAB_compatible = MATLAB_compatible
+    options.scalar_options = {}
+    options.array_options = {}
+    options.delete_unused_variables = delete_unused_variables
+    options.convert_scalars_to_arrays = convert_scalars_to_arrays
+    options.reverse_dimension_order = reverse_dimension_order
+    options.convert_strings_to_utf16 = convert_strings_to_utf16
+    options.store_shape_for_empty = store_shape_for_empty
+    options.complex_names = complex_names
+
+    # Now, if we are doing MATLAB compatibility, certain options must be
+    # overridden.
+
+    if MATLAB_compatible:
+        options.delete_unused_variables = True
+        options.convert_scalars_to_arrays = True
+        options.convert_strings_to_utf16 = True
+        options.reverse_dimension_order = True
+        options.store_shape_for_empty = True
+        options.complex_names = ('real','imag')
+
+    # Reset the list of MATLAB_fields attributes to set.
+
+    _MATLAB_fields_pairs = []
+
+    # Remove double slashes and a non-root trailing slash.
+
+    name = posixpath.normpath(name)
+
+    # Extract the group name and the target name (will be a dataset if
+    # data can be mapped to it, but will end up being made into a group
+    # otherwise. As HDF5 files use posix path, conventions, posixpath
+    # will do everything.
+    groupname = posixpath.dirname(name)
+    targetname = posixpath.basename(name)
+
+    # If groupname got turned into blank, then it is just root.
+    if groupname == '':
+        groupname = '/'
+
+    # If targetname got turned blank, then it is the current directory.
+    if targetname == '':
+        targetname = '.'
+
+    # Open the hdf5 file and start writing the data (and making the
+    # group groupname at the same time if it doesn't exist). This is all
+    # wrapped in a try block, so that the file can be closed if any
+    # errors happen (the error is re-raised). The
+    # h5py.get_config().complex_names is changed to complex_names. The
+    # previous value is restored at the end. Obviously, this makes this
+    # whole function thread unsafe as it changes it for h5py globally.
+
+    backup_complex_names = h5py.get_config().complex_names
+
+    try:
+        h5py.get_config().complex_names = options.complex_names
+
+        # If the file already exists, we just open it. If it doesn't
+        # exist yet and we are doing any MATLAB formatting, we need to
+        # allocate a 512 byte user block (need metadata for MATLAB to
+        # tell it is a valid .mat file). The user_block size is also
+        # grabbed right before closing, so that if there is a userblock
+        # and we are doing MATLAB formatting, we know to set it.
+
+        if os.path.isfile(filename) or not options.MATLAB_compatible:
+            f = h5py.File(filename)
+        else:
+            f = h5py.File(filename, mode='w', userblock_size=512)
+
+        if groupname not in f:
+            grp = f.require_group(groupname)
+        else:
+            grp = f[groupname]
+
+        write_data(f, grp, targetname, data,
+                   None, options)
+    except:
+        print("Unexpected error:", sys.exc_info()[0])
+        raise
+    finally:
+        userblock_size = f.userblock_size
+        f.close()
+        h5py.get_config().complex_names = backup_complex_names
+
+    # If we are doing MATLAB formatting and there is a sufficiently
+    # large userblock, write the new userblock. The same sort of error
+    # handling is used.
+
+    if options.MATLAB_compatible and userblock_size >= 128:
+        # Get the time.
+        now = datetime.datetime.now()
+
+        # Construct the leading string. The MATLAB one looks like
+        #
+        # s = 'MATLAB 7.3 MAT-file, Platform: GLNXA64, Created on: ' \
+        #     + now.strftime('%a %b %d %H:%M:%S %Y') \
+        #     + ' HDF5 schema 1.00 .'
+        #
+        # Platform is going to be changed to CPython version
+
+        v = sys.version_info
+
+        s = 'MATLAB 7.3 MAT-file, Platform: CPython ' \
+            + '{0}.{1}.{2}'.format(v.major, v.minor, v.micro) \
+            + ', Created on: ' \
+            + now.strftime('%a %b %d %H:%M:%S %Y') \
+            + ' HDF5 schema 1.00 .'
+
+        # Make the bytearray while padding with spaces up to 128-12
+        # (the minus 12 is there since the last 12 bytes are special.
+
+        b = bytearray(s + (128-12-len(s))*' ', encoding='utf-8')
+
+        # Add 8 nulls (0) and the magic number (or something) that
+        # MATLAB uses.
+
+        b.extend(bytearray.fromhex('00000000 00000000 0002494D'))
+
+        # Now, write it to the beginning of the file.
+
+        try:
+            fd = open(filename, 'r+b')
+            fd.write(b)
+        except:
+            print("Unexpected error:", sys.exc_info()[0])
+            raise
+        finally:
+            fd.close()
+
+
+# Set an empty list of path-string_array pairs to set the
+# MATLAB_fields attributes on all the things that correspond to MATLAB
+# structures.
+
+_MATLAB_fields_pairs = []
diff --git a/hdf5storage/lowlevel.py b/hdf5storage/lowlevel.py
new file mode 100644
index 0000000..d5cbf88
--- /dev/null
+++ b/hdf5storage/lowlevel.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import h5py
+
+from hdf5storage.utilities import *
+
+
+def write_data(f, grp, name, data, type_string, options):
+    # Get the marshaller for type(data).
+
+    tp = type(data)
+    m = options.marshaller_collection.get_marshaller_for_type(tp)
+
+    # If a marshaller was found, use it to write the data. Otherwise,
+    # return an error. If we get something other than None back, then we
+    # must recurse through the entries. Also, we must set the H5PATH
+    # attribute to be the path to the containing group.
+
+    if m is not None:
+        outputs = m.write(f, grp, name, data, type_string, options)
+        if outputs is not None:
+            if len(outputs) > 2:
+                _MATLAB_fields_pairs.extend(outputs[2])
+            for i, v in enumerate(outputs[1]):
+                if len(outputs[0]) == 1:
+                    write_data(f, outputs[0][0], v[0], v[1], None,
+                               options)
+                    if options.MATLAB_compatible:
+                        set_attribute_string(outputs[0][0][v[0]],
+                                             'H5PATH',
+                                             outputs[0][0].name)
+                    else:
+                        del_attribute(outputs[0][0][v[0]], 'H5PATH')
+                else:
+                    write_data(f, outputs[0][i], v[0], v[1], None,
+                               options)
+                    if options.MATLAB_compatible:
+                        set_attribute_string(outputs[0][i][v[0]],
+                                             'H5PATH',
+                                             outputs[0][i].name)
+                    else:
+                        del_attribute(outputs[0][i][v[0]], 'H5PATH')
+    else:
+        raise NotImplementedError('Can''t write data type: '+str(tp))
diff --git a/hdf5storage/utilities.py b/hdf5storage/utilities.py
new file mode 100644
index 0000000..c2c1609
--- /dev/null
+++ b/hdf5storage/utilities.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2013, Freja Nordsiek
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+""" Module of functions to set and delete HDF5 attributes.
+
+"""
+
+import numpy as np
+import h5py
+
+
+def set_attribute(target, name, value):
+    """ Sets an attribute on a Dataset or Group.
+
+    If the attribute `name` doesn't exist yet, it is created. If it
+    already exists, it is overwritten if it differs from `value`.
+
+    Parameters
+    ----------
+    target : Dataset or Group
+        :py:class:`h5py.Dataset` or :py:class:`h5py.Group` to set the
+        attribute of.
+    name : str
+        Name of the attribute to set.
+    value : numpy type other than :py:class:`str_`
+        Value to set the attribute to.
+
+    """
+    if name not in target.attrs:
+        target.attrs.create(name, value)
+    elif target.attrs[name].dtype != value.dtype \
+            or target.attrs[name].shape != value.shape:
+        target.attrs.create(name, value)
+    elif np.any(target.attrs[name] != value):
+        target.attrs.modify(name, value)
+
+
+def set_attribute_string(target, name, value):
+    """ Sets an attribute to a string on a Dataset or Group.
+
+    If the attribute `name` doesn't exist yet, it is created. If it
+    already exists, it is overwritten if it differs from `value`.
+
+    Parameters
+    ----------
+    target : Dataset or Group
+        :py:class:`h5py.Dataset` or :py:class:`h5py.Group` to set the
+        attribute of.
+    name : str
+        Name of the attribute to set.
+    value : string
+        Value to set the attribute to. Can be any sort of string type
+        that will convert to a :py:class:`numpy.string_`
+
+    """
+    set_attribute(target, name, np.string_(value))
+
+
+def set_attribute_string_array(target, name, string_list):
+    """ Sets an attribute to an array of string on a Dataset or Group.
+
+    If the attribute `name` doesn't exist yet, it is created. If it
+    already exists, it is overwritten with the list of string
+    `string_list` (they will be vlen strings).
+
+    Parameters
+    ----------
+    target : Dataset or Group
+        :py:class:`h5py.Dataset` or :py:class:`h5py.Group` to set the
+        attribute of.
+    name : str
+        Name of the attribute to set.
+    string_list : list, tuple
+        List of strings to set the attribute to. Can be any string type
+        that will convert to a :py:class:`numpy.string_`
+
+    """
+    target.attrs.create(name, np.string_(string_list),
+                        dtype=h5py.special_dtype(vlen=bytes))
+
+
+def del_attribute(target, name):
+    """ Deletes an attribute on a Dataset or Group.
+
+    If the attribute `name` exists, it is deleted.
+
+    Parameters
+    ----------
+    target : Dataset or Group
+        :py:class:`h5py.Dataset` or :py:class:`h5py.Group` to set the
+        attribute of.
+    name : str
+        Name of the attribute to delete.
+
+    """
+    if name in target.attrs:
+        del target.attrs[name]

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git