[h5py] 27/455: More transactions, broken for now

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:14 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 046905455cc8d07fdebeaefcf5fa487efbbf00af
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Sat May 24 00:19:42 2008 +0000

    More transactions, broken for now
---
 h5py/h5t.pyx         |  33 ++----
 h5py/highlevel.py    | 316 +++++++++++++++++++++++++++++++++------------------
 h5py/transactions.py |   4 +
 3 files changed, 217 insertions(+), 136 deletions(-)

diff --git a/h5py/h5t.pyx b/h5py/h5t.pyx
index 99bbc67..856632b 100644
--- a/h5py/h5t.pyx
+++ b/h5py/h5t.pyx
@@ -690,27 +690,8 @@ def py_set_complex_names(char* real_name=NULL, char* imag_name=NULL):
         raise ValueError("Must be called with no arguments or exactly 2: STRING real_name, STRING imag_name")
 
     
-def py_h5t_to_dtype(hid_t type_id, object force_native=False, int force_string_length=-1, object compound_fields=None):
-    """ (INT type_id, BOOL force_native=False, INT force_string_length=-1,
-            TUPLE compound_fields=None) 
-        => INT type_id
-
-        Produce a Numpy dtype of the same general kind as an HDF5 datatype.
-        Note that the result is *NOT* guaranteed to be memory-compatible with
-        the HDF5 type; for that use py_dtype_to_h5t.  
-
-        If force_native is True, all byte-orders in the returned dtype will be
-        in native order. Variable-length (VLEN) strings are currently not
-        supported, but by providing a value for <force_string_length> they
-        can be converted to fixed-length strings compatible with Numpy.
-
-        If compound_fields is provided, it must be a tuple of names which 
-        correspond to fields in the HDF5 object.  Only HDF5 field names which
-        are present in this tuple will be copied, and will be inserted into the
-        dtype in the order that they appear in the tuple.  Fields which are
-        not present in the HDF5 type are discarded.  As a side effect, this
-        disables automatic conversion of compound types to complex numbers,
-        even if they have the appropriate names.
+def py_h5t_to_dtype(hid_t type_id, object byteorder=None, int string_length=-1, object compound_fields=None):
+    """ TODO: rework this.
     """
     cdef int classtype
     cdef int sign
@@ -735,10 +716,10 @@ def py_h5t_to_dtype(hid_t type_id, object force_native=False, int force_string_l
 
     elif classtype == H5T_STRING:
         if is_variable_str(type_id):
-            if force_string_length <= 0:
+            if string_length <= 0:
                 raise ConversionError("Variable-length strings are unsupported; try using a fixed size via force_string_length")
             else:
-                size = force_string_length
+                size = string_length
         else:
             size = get_size(type_id)
         typeobj = dtype("|S" + str(size))
@@ -755,7 +736,7 @@ def py_h5t_to_dtype(hid_t type_id, object force_native=False, int force_string_l
             tmp_id = get_member_type(type_id, i)
             try:
                 tmp_name = get_member_name(type_id, i)
-                field_list.append( (tmp_name, py_h5t_to_dtype(tmp_id, force_native, force_string_length)) )
+                field_list.append( (tmp_name, py_h5t_to_dtype(tmp_id, byteorder, string_length)) )
             finally:
                 H5Tclose(tmp_id)
 
@@ -805,8 +786,8 @@ def py_h5t_to_dtype(hid_t type_id, object force_native=False, int force_string_l
     else:
         raise ConversionError('Unsupported datatype class "%s"' % CLASS_MAPPER[classtype])
 
-    if force_native:
-        return typeobj.newbyteorder('=')
+    if byteorder is not None:
+        return typeobj.newbyteorder(byteorder)
     return typeobj
 
 def py_dtype_to_h5t(numpy.dtype dtype_in):
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index b16dfe0..b38b56e 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -59,20 +59,34 @@ import h5a
 import h5p
 from errors import H5Error
 
-from transactions import Action, TransactionManager, TransactionStateError
+from transactions import Action, TransactionManager, TransactionStateError, \
+                         IllegalTransactionError
+
+def tempname(prefix=""):
+    return prefix+"".join(random.sample(string.ascii_letters))
 
 # === Base classes / context manager support ==================================
 
-class NamedObject(object):
+class BaseNamed(object):
 
     """ Base class for objects which reside in HDF5 files.  Among other things,
         any named object is a valid context manager for Python's "with"
         statement, capable of tracking transactions in HDF5 files.
     """
 
+    def _set_manager(self, val):
+        self._manager = val
+        if hasattr(self, 'attrs'):
+            self.attrs.manager = val
+    
+    def _get_manager(self):
+        return self._manager
+
+    manager = property(_get_manager, _set_manager)
+
     def __init__(self):
         self.manager = None
-        self._auto = False
+        self._tr_active = False  # transaction active?
 
     def __enter__(self):
         """ Put the object in transaction mode.  If no transaction manager is
@@ -80,6 +94,9 @@ class NamedObject(object):
 
             Please don't call this manually.
         """
+        if self._tr_active:
+            raise TransactionStateError("A transaction is already in progress.")
+
         stat = h5g.get_objinfo(self.id, '.')
         token = (stat.fileno, stat.objno)
 
@@ -87,14 +104,18 @@ class NamedObject(object):
             self.manager = TransactionManager()
         
         self.manager.lock(token)
-        self._auto = True
+        self._tr_active = True
         return self.manager
 
     def __exit__(self, type_, value, tb):
-        """ Exit transaction mode.
+        """ Exit transaction mode.  Commits or rolls back, depending on the
+            given exception state, but does not destroy the transaction manager.
 
             Please don't call this manually.
         """
+        if not self._tr_active:
+            raise TransactionStateError("Exited transaction mode with no transaction in progress")
+
         if type_ is None:
             self.manager.commit()
         else:
@@ -104,8 +125,7 @@ class NamedObject(object):
         token = (stat.fileno, stat.objno)
 
         self.manager.unlock(token)
-        self.manager = None
-        self._auto = False
+        self._tr_active = False
 
     def begin_transaction(self, manager=None):
         """ Manually put the object into "transaction" mode.  Every API call 
@@ -125,25 +145,14 @@ class NamedObject(object):
             you can use any object of this class as the context manager in a 
             Python "with" statement.
         """
-        if self._auto:
-            raise TransactionStateError('No manual control inside a "with" block.')
-
-        if self.manager is not None:
-            raise TransactionStateError("Transaction already in progress.")
-
         self.manager = manager
         return self.__enter__()
 
     def end_transaction(self):
         """ Take the object out of "transaction" mode.  Implicitly commits any
             pending actions.  Raises TransactionStateError if the object is not
-            in transaction mode.
+            in transaction mode.  Never call this inside a "with" block.
         """
-        if self._auto:
-            raise TransactionStateError('No manual control inside a "with" block.')
-        if self.manager is None:
-            raise TransactionStateError("No transaction in progress.")
-
         self.__exit__(None, None, None)
 
 class WithWrapper(object):
@@ -165,7 +174,7 @@ class WithWrapper(object):
         return mgr
 
     def __exit__(self, type_, value, tb):
-        return any(obj.__exit__(type_, value, tb) for obj in objs)
+        return all(obj.__exit__(type_, value, tb) for obj in objs)
 
 def many(*args):
     """ Enables tracking of multiple named objects in Python's "with" statement.
@@ -177,58 +186,99 @@ def many(*args):
 
 # === Main classes (Dataset/Group/File) =======================================
 
-class Dataset(object):
+class Dataset(BaseNamed):
 
     """ High-level interface to an HDF5 dataset
 
-        A Dataset object is designed to permit "Numpy-like" access to the 
-        underlying HDF5 dataset.  It supports array-style indexing, which 
-        returns Numpy ndarrays.  For the case of arrays containing compound
-        data, it also allows a "compound mask" to be set, allowing you to 
-        only extract elements which match names in the mask.  The underlying
-        array can also be written to using the indexing syntax.
-
-        HDF5 attribute access is provided through the property obj.attrs.  See
-        the AttributeManager class documentation for more information.
-
-        Read-only properties:
-        names       Compound fields defined in this object (tuple or None)
-        names_mask  Current mask controlling compound access (tuple or None)
-        shape       Tuple containing array dimensions
-        dtype       A Numpy dtype representing the array data-type.
-
-        Writable properties:
-        force_native    
-            Returned data will be automatically converted
-            to the native platform byte order
-
-        force_string_length     
-            Variable-length strings will be converted to
-            Numpy strings of this length.
+        TODO: rework this
     """
 
     # --- Properties (Dataset) ------------------------------------------------
 
-    def _set_native(self, val):
-        self._force_native = bool(val) if val is not None else None
+    def _get_byteorder(self):
+        return self._byteorder
+
+    def _set_byteorder(self, val):
+        valid = [None, '<', '>', '=']
+        if not val in valid:
+            raise ValueError("Byte order must be one of %s (got %s)" % (", ".join(valid), str(val)))
+
+        if not self._tr_active:
+            self._byteorder = val
+        else:
+            backup = self._byteorder
+            action = Action("Set byte order to " + str(val), 
+                    (setattr, (self, '_byteorder', val), {}),
+                    (setattr, (self, '_byteorder', backup), ()),
+                    None)
+            self.manager.do(action)
+
+    def _get_string_length(self):
+        return self._string_length
 
     def _set_string_length(self, val):
-        self._string_length = val
+        if val is not None and val < 1:
+            raise ValueError("String length must be at least 1.")
 
-    names_mask = property(lambda self: self._fields)
-    names = property(lambda self: self.dtype.names)
+        if not self._tr_active:
+            self._string_length = val
+        else:
+            backup = self._string_length
+            action = Action("Set string length to "+str(val),
+                    (setattr, (self, '_string_length', val), {}),
+                    (setattr, (self, '_string_length', backup), ()),
+                    None)
+
+    def _get_names_mask(self):
+        return self._fields
 
+    def _set_names_mask(self, iterable):
+        """ Determine which fields of a compound datatype will be read. Only 
+            compound fields whose names match those provided by the given 
+            iterable will be read.  Any given names which do not exist in the
+            HDF5 compound type are simply ignored.
+
+            If the argument is a single string, it will be correctly processed
+            (i.e. not exploded).
+        """
+        if iterable == None:
+            val = None
+        else:
+            if isinstance(iterable, basestring):
+                iterable = (iterable,)    # not 'i','t','e','r','a','b','l','e'
+            val = tuple(iterable)
+
+        if not self._tr_active:
+            self._fields = val
+        else:
+            backup = self._string_length
+            action = Action("Set names mask to \"%s\"" % str(val),
+                    (setattr, (self, '_fields', val), {}),
+                    (setattr, (self, '_fields', backup), ()),
+                    None)
+
+
+    #: Byte order for data read/written by this object; can be None, <, >, =.
+    byteorder = property(_get_byteorder, _set_byteorder)
+
+    #: Convert vlen strings to fixed-width: None or >= 1.
+    string_length = property(_get_string_length, _set_string_length)
+
+    #: Restrict I/0 to these fields.  None, iterable of strings, or single string.
+    names_mask = property(_get_names_mask, _set_names_mask)
+
+    #: Numpy-style shape tuple for this dataset. Readonly.
     shape = property(lambda self: h5d.py_shape(self.id))
-    dtype = property(lambda self: h5d.py_dtype(self.id))
 
-    force_native = property(lambda self: self._force_native, _set_native)
-    force_string_length = property(lambda self: self._string_length, _set_string_length)
+    #: Numpy dtype representing dataset's type. Readonly.
+    dtype = property(lambda self: h5d.py_dtype(self.id))
 
+    #: Attribute manager; see AttributeManager docstring.
     attrs = property(lambda self: self._attrs)
 
     # --- Public interface (Dataset) ------------------------------------------
 
-    def __init__(self, group, name, create=False, force=False,
+    def __init__(self, group, name, create=False,
                     data=None, dtype=None, shape=None, 
                     chunks=None, compression=None, shuffle=False, fletcher32=False):
         """ Create a new Dataset object.  There are two modes of operation:
@@ -244,11 +294,7 @@ class Dataset(object):
                 "dtype" (Numpy dtype object) and "shape" (tuple of dimensions).
                 Chunks/compression/shuffle/fletcher32 can also be specified.
 
-                By default, creating a dataset will fail if another of the
-                same name already exists. If you specify force=True, any 
-                existing dataset will be unlinked, and the new one created.
-                This is as close as possible to an atomic operation; if the 
-                dataset creation fails, the old dataset isn't destroyed.
+                If a dataset of the same name already exists, creation fails.
 
             Creation keywords (* is default):
 
@@ -257,32 +303,33 @@ class Dataset(object):
             shuffle:       Use the shuffle filter? (requires compression) T/F*
             fletcher32:    Enable Fletcher32 error detection? T/F*
         """
+        BaseNamed.__init__(self)
         if create:
-            if force and h5g.py_exists(group.id,name):
-                tmpname = 'h5py_temp_' + ''.join(random.sample(string.ascii_letters, 30))
-                tmpid = h5d.py_create(group.id, tmpname, data, shape, 
-                                    chunks, compression, shuffle, fletcher32)
-                h5g.unlink(group.id, name)
-                h5g.link(group.id, tmpname, name)
-                h5g.unlink(group.id, tmpname)
-
-            else:
+            if not group._tr_active:
                 self.id = h5d.py_create(group.id, name, data, shape, 
                                         chunks, compression, shuffle, fletcher32)
+            else:
+                action = Action("Create dataset \"%s\"" % name,
+                        (h5d.py_create, (group.id, name, data, shape, chunks,
+                                         compression, shuffle, fletcher32), {}),
+                        (h5g.unlink, (group.id, name), {}),
+                        None)
+                group.manager.do(action)
+
         else:
             if any((data,dtype,shape,chunks,compression,shuffle,fletcher32)):
                 raise ValueError('You cannot specify keywords when opening a dataset.')
             self.id = h5d.open(group.id, name)
 
-        self._fields = None
         self._attrs = AttributeManager(self)
-        self.force_native = None
-        self.force_string_length = None
+        self._fields = None
+        self._byteorder = None
+        self._string_length = None
 
     def __getitem__(self, *args):
         """ Read a slice from the underlying HDF5 array.  Currently only
             numerical slices are supported; for recarray-style access consider
-            using set_names_mask().
+            using the names_mask property.
         """
         if any( [isinstance(x, basestring) for x in args] ):
             raise TypeError("Slices must be numbers; recarray-style indexing is not yet supported.")
@@ -299,36 +346,34 @@ class Dataset(object):
             and the Numpy array's datatype must be convertible to the HDF5
             array's datatype.
         """
-        start, count, stride = _slices_to_tuples(args[0:len(args)-1])
-        h5d.py_write_slab(self.id, args[-1], start, stride)
-
-    def set_names_mask(self, iterable=None):
-        """ Determine which fields of a compound datatype will be read. Only 
-            compound fields whose names match those provided by the given 
-            iterable will be read.  Any given names which do not exist in the
-            HDF5 compound type are simply ignored.
-
-            If the argument is a single string, it will be correctly processed
-            (i.e. not exploded).
-        """
-        if iterable == None:
-            self._fields = None
+        val = args[-1]
+        slices = args[0:len(args)-1]
+        start, count, stride = _slices_to_tuples(slices)
+        if not self._tr_active:
+            h5d.py_write_slab(self.id, val, start, stride)
         else:
-            if isinstance(iterable, basestring):
-                iterable = (iterable,)    # not 'i','t','e','r','a','b','l','e'
-            self._fields = tuple(iterable)
+            backup = h5d.py_read_slab(self.id, start, count, stride)
+            action = Action("Write slice",
+                            (h5d.py_write_slab, (self.id, val, start, stride), {}),
+                            (h5d.py_write_slab, (self.id, backup, start, stride), {}),
+                            None )
+            self.manager.do(action)
 
     def close(self):
         """ Force the HDF5 library to close and free this object.  You 
             shouldn't need to do this in normal operation; HDF5 objects are 
             automatically closed when their Python counterparts are deallocated.
         """
+        if self._tr_active:
+            raise IllegalTransactionError("close() is not a transactable operation.")
         h5d.close(self.id)
 
     def __del__(self):
         try:
+            if self._tr_active:
+                self.manager.commit()
             h5d.close(self.id)
-        except H5Error:
+        except:
             pass
 
     def __str__(self):
@@ -337,7 +382,7 @@ class Dataset(object):
     def __repr__(self):
         return self.__str__()
 
-class Group(object):
+class Group(BaseNamed):
     """ Represents an HDF5 group object
 
         Group members are accessed through dictionary-style syntax.  Iterating
@@ -375,9 +420,17 @@ class Group(object):
             raising an exception if it doesn't exist.  If "create" is True,
             create a new HDF5 group and link it into the parent group.
         """
+        BaseNamed.__init__(self)
         self.id = 0
         if create:
-            self.id = h5g.create(parent_object.id, name)
+            if not parent_object._tr_active:
+                self.id = h5g.create(parent_object.id, name)
+            else:
+                action = Action('Create group "%s"' % name,
+                        (h5g.create, (parent_object.id, name), {}),
+                        (h5g.unlink, (parent_object.id, name), {}),
+                        None)
+                parent_object.manager.do(action)
         else:
             self.id = h5g.open(parent_object.id, name)
         
@@ -387,7 +440,10 @@ class Group(object):
     def __delitem__(self, name):
         """ Unlink a member from the HDF5 group.
         """
-        h5g.unlink(self.id, name)
+        if not self._tr_active:
+            h5g.unlink(self.id, name)
+        else:
+            raise IllegalTransactionError("Deleting members is not (yet) a transactable operation.")
 
     def __setitem__(self, name, obj):
         """ Add the given object to the group.  Here are the rules:
@@ -402,23 +458,27 @@ class Group(object):
                 dtype.
         """
         if isinstance(obj, Group) or isinstance(obj, Dataset):
-            h5g.link(self.id, name, h5i.get_name(obj.id), link_type=h5g.LINK_HARD)
+            objname = h5i.get_name(obj.id)
+            if not self._tr_active:
+                h5g.link(self.id, name, objname, link_type=h5g.LINK_HARD)
+            else:
+                action = Action('Create link "%s"' % name,
+                            (h5g.link, (self.id, name, objname), {}),
+                            (h5g.unlink, (self.id, name), {}),
+                             None )
+                self.manager.do(action)
+
+        else:
+            if not isinstance(obj, numpy.ndarray):
+                obj = numpy.array(obj)
 
-        elif isinstance(obj, numpy.ndarray):
             if h5t.py_can_convert_dtype(obj.dtype):
+                # Dataset creation is automatically recorded
                 dset = Dataset(self, name, data=obj, create=True, force=True)
                 dset.close()
             else:
                 raise ValueError("Don't know how to store data of this type in a dataset: " + repr(obj.dtype))
 
-        else:
-            arr = numpy.array(obj)
-            if h5t.py_can_convert_dtype(arr.dtype):
-                dset = Dataset(self, name, data=arr, create=True, force=True)
-                dset.close()
-            else:
-                raise ValueError("Don't know how to store data of this type in a dataset: " + repr(arr.dtype))
-
     def __getitem__(self, name):
         """ Retrive the Group or Dataset object.  If the Dataset is scalar,
             returns its value instead.
@@ -451,12 +511,15 @@ class Group(object):
             have to use this, as these objects are automatically closed when
             their Python equivalents are deallocated.
         """
-        h5g.close(self.id)
+        if not self._tr_active:
+            h5g.close(self.id)
+        else:
+            raise IllegalTransactionError("Can't close group while transaction is active.")
 
     def __del__(self):
         try:
             h5g.close(self.id)
-        except H5Error:
+        except:
             pass
 
     def __str__(self):
@@ -491,6 +554,7 @@ class File(Group):
             If "noclobber" is specified, file truncation (w/w+) will fail if 
             the file already exists.  Note this is NOT the default.
         """
+        BaseNamed.__init__(self)
         if not mode in self._modes:
             raise ValueError("Invalid mode; must be one of %s" % ', '.join(self._modes))
               
@@ -512,6 +576,7 @@ class File(Group):
         self.filename = name
         self.mode = mode
         self.noclobber = noclobber
+        self.attrs = AttributeManager(self)
 
     def close(self):
         """ Close this HDF5 object.  Note that any further access to objects
@@ -560,6 +625,7 @@ class AttributeManager(object):
     """
     def __init__(self, parent_object):
         self.id = parent_object.id
+        self.manager = None
 
     def __getitem__(self, name):
         obj = h5a.py_get(self.id, name)
@@ -570,12 +636,42 @@ class AttributeManager(object):
     def __setitem__(self, name, value):
         if not isinstance(value, numpy.ndarray):
             value = numpy.array(value)
-        if h5a.py_exists(self.id, name):
-            h5a.delete(self.id, name)
-        h5a.py_set(self.id, name, value)
+
+        def set_attribute(objid, name, newval):
+            backup = None
+            if h5a.py_exists(objid, name):
+                backup = h5a.py_get(objid, name)
+                h5a.delete(objid, name)
+            try:
+                h5a.py_set(objid, name, value)
+            except:
+                if backup is not None:
+                    h5a.py_set(objid, name, backup)
+                raise
+
+        if self.manager is not None:
+            set_attribute(self.id, name, value)
+        else:
+            if h5a.py_exists(self.id, name):
+                backup = h5a.py_get(self.id, name)
+                undo = (set_attribute, (self.id, name, backup), {})
+            else:
+                undo = (h5a.delete, (self.id, name), {})
+
+            action = Action("Set attribute \"%s\"" % name,
+                        (set_attribute, (self.id, name, value), {}),
+                        undo,
+                        None)
 
     def __delitem__(self, name):
-        h5a.delete(self.id, name)
+        if self.manager is None:
+            h5a.delete(self.id, name)
+        else:
+            backup = h5a.py_get(self.id, name)
+            action = Action("Delete \"%s\"" % name,
+                        (h5a.delete, (self.id, name) ,{}),
+                        (h5a.py_set, (self.id, name, backup), {}),
+                        None)
 
     def __len__(self):
         return h5a.get_num_attrs(self.id)
@@ -590,7 +686,7 @@ class AttributeManager(object):
     def __str__(self):
         return "Attributes: "+', '.join(['"%s"' % x for x in self])
 
-class NamedType(object):
+class NamedType(BaseNamed):
 
     """ Represents a named datatype, stored in a file.  
 
diff --git a/h5py/transactions.py b/h5py/transactions.py
index 323891b..d5cc34a 100644
--- a/h5py/transactions.py
+++ b/h5py/transactions.py
@@ -10,6 +10,10 @@ class TransactionStateError(TransactionError):
     """
     pass
 
+class IllegalTransactionError(TransactionError):
+    pass
+
+
 class Action(object):
 
     """ Represents the smallest component of a transaction.

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



More information about the debian-science-commits mailing list