[h5py] 27/455: More transactions, broken for now
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:14 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit 046905455cc8d07fdebeaefcf5fa487efbbf00af
Author: andrewcollette <andrew.collette at gmail.com>
Date: Sat May 24 00:19:42 2008 +0000
More transactions, broken for now
---
h5py/h5t.pyx | 33 ++----
h5py/highlevel.py | 316 +++++++++++++++++++++++++++++++++------------------
h5py/transactions.py | 4 +
3 files changed, 217 insertions(+), 136 deletions(-)
diff --git a/h5py/h5t.pyx b/h5py/h5t.pyx
index 99bbc67..856632b 100644
--- a/h5py/h5t.pyx
+++ b/h5py/h5t.pyx
@@ -690,27 +690,8 @@ def py_set_complex_names(char* real_name=NULL, char* imag_name=NULL):
raise ValueError("Must be called with no arguments or exactly 2: STRING real_name, STRING imag_name")
-def py_h5t_to_dtype(hid_t type_id, object force_native=False, int force_string_length=-1, object compound_fields=None):
- """ (INT type_id, BOOL force_native=False, INT force_string_length=-1,
- TUPLE compound_fields=None)
- => INT type_id
-
- Produce a Numpy dtype of the same general kind as an HDF5 datatype.
- Note that the result is *NOT* guaranteed to be memory-compatible with
- the HDF5 type; for that use py_dtype_to_h5t.
-
- If force_native is True, all byte-orders in the returned dtype will be
- in native order. Variable-length (VLEN) strings are currently not
- supported, but by providing a value for <force_string_length> they
- can be converted to fixed-length strings compatible with Numpy.
-
- If compound_fields is provided, it must be a tuple of names which
- correspond to fields in the HDF5 object. Only HDF5 field names which
- are present in this tuple will be copied, and will be inserted into the
- dtype in the order that they appear in the tuple. Fields which are
- not present in the HDF5 type are discarded. As a side effect, this
- disables automatic conversion of compound types to complex numbers,
- even if they have the appropriate names.
+def py_h5t_to_dtype(hid_t type_id, object byteorder=None, int string_length=-1, object compound_fields=None):
+ """ TODO: rework this.
"""
cdef int classtype
cdef int sign
@@ -735,10 +716,10 @@ def py_h5t_to_dtype(hid_t type_id, object force_native=False, int force_string_l
elif classtype == H5T_STRING:
if is_variable_str(type_id):
- if force_string_length <= 0:
+ if string_length <= 0:
raise ConversionError("Variable-length strings are unsupported; try using a fixed size via force_string_length")
else:
- size = force_string_length
+ size = string_length
else:
size = get_size(type_id)
typeobj = dtype("|S" + str(size))
@@ -755,7 +736,7 @@ def py_h5t_to_dtype(hid_t type_id, object force_native=False, int force_string_l
tmp_id = get_member_type(type_id, i)
try:
tmp_name = get_member_name(type_id, i)
- field_list.append( (tmp_name, py_h5t_to_dtype(tmp_id, force_native, force_string_length)) )
+ field_list.append( (tmp_name, py_h5t_to_dtype(tmp_id, byteorder, string_length)) )
finally:
H5Tclose(tmp_id)
@@ -805,8 +786,8 @@ def py_h5t_to_dtype(hid_t type_id, object force_native=False, int force_string_l
else:
raise ConversionError('Unsupported datatype class "%s"' % CLASS_MAPPER[classtype])
- if force_native:
- return typeobj.newbyteorder('=')
+ if byteorder is not None:
+ return typeobj.newbyteorder(byteorder)
return typeobj
def py_dtype_to_h5t(numpy.dtype dtype_in):
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index b16dfe0..b38b56e 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -59,20 +59,34 @@ import h5a
import h5p
from errors import H5Error
-from transactions import Action, TransactionManager, TransactionStateError
+from transactions import Action, TransactionManager, TransactionStateError, \
+ IllegalTransactionError
+
+def tempname(prefix=""):
+ return prefix+"".join(random.sample(string.ascii_letters))
# === Base classes / context manager support ==================================
-class NamedObject(object):
+class BaseNamed(object):
""" Base class for objects which reside in HDF5 files. Among other things,
any named object is a valid context manager for Python's "with"
statement, capable of tracking transactions in HDF5 files.
"""
+ def _set_manager(self, val):
+ self._manager = val
+ if hasattr(self, 'attrs'):
+ self.attrs.manager = val
+
+ def _get_manager(self):
+ return self._manager
+
+ manager = property(_get_manager, _set_manager)
+
def __init__(self):
self.manager = None
- self._auto = False
+ self._tr_active = False # transaction active?
def __enter__(self):
""" Put the object in transaction mode. If no transaction manager is
@@ -80,6 +94,9 @@ class NamedObject(object):
Please don't call this manually.
"""
+ if self._tr_active:
+ raise TransactionStateError("A transaction is already in progress.")
+
stat = h5g.get_objinfo(self.id, '.')
token = (stat.fileno, stat.objno)
@@ -87,14 +104,18 @@ class NamedObject(object):
self.manager = TransactionManager()
self.manager.lock(token)
- self._auto = True
+ self._tr_active = True
return self.manager
def __exit__(self, type_, value, tb):
- """ Exit transaction mode.
+ """ Exit transaction mode. Commits or rolls back, depending on the
+ given exception state, but does not destroy the transaction manager.
Please don't call this manually.
"""
+ if not self._tr_active:
+ raise TransactionStateError("Exited transaction mode with no transaction in progress")
+
if type_ is None:
self.manager.commit()
else:
@@ -104,8 +125,7 @@ class NamedObject(object):
token = (stat.fileno, stat.objno)
self.manager.unlock(token)
- self.manager = None
- self._auto = False
+ self._tr_active = False
def begin_transaction(self, manager=None):
""" Manually put the object into "transaction" mode. Every API call
@@ -125,25 +145,14 @@ class NamedObject(object):
you can use any object of this class as the context manager in a
Python "with" statement.
"""
- if self._auto:
- raise TransactionStateError('No manual control inside a "with" block.')
-
- if self.manager is not None:
- raise TransactionStateError("Transaction already in progress.")
-
self.manager = manager
return self.__enter__()
def end_transaction(self):
""" Take the object out of "transaction" mode. Implicitly commits any
pending actions. Raises TransactionStateError if the object is not
- in transaction mode.
+ in transaction mode. Never call this inside a "with" block.
"""
- if self._auto:
- raise TransactionStateError('No manual control inside a "with" block.')
- if self.manager is None:
- raise TransactionStateError("No transaction in progress.")
-
self.__exit__(None, None, None)
class WithWrapper(object):
@@ -165,7 +174,7 @@ class WithWrapper(object):
return mgr
def __exit__(self, type_, value, tb):
- return any(obj.__exit__(type_, value, tb) for obj in objs)
+ return all(obj.__exit__(type_, value, tb) for obj in objs)
def many(*args):
""" Enables tracking of multiple named objects in Python's "with" statement.
@@ -177,58 +186,99 @@ def many(*args):
# === Main classes (Dataset/Group/File) =======================================
-class Dataset(object):
+class Dataset(BaseNamed):
""" High-level interface to an HDF5 dataset
- A Dataset object is designed to permit "Numpy-like" access to the
- underlying HDF5 dataset. It supports array-style indexing, which
- returns Numpy ndarrays. For the case of arrays containing compound
- data, it also allows a "compound mask" to be set, allowing you to
- only extract elements which match names in the mask. The underlying
- array can also be written to using the indexing syntax.
-
- HDF5 attribute access is provided through the property obj.attrs. See
- the AttributeManager class documentation for more information.
-
- Read-only properties:
- names Compound fields defined in this object (tuple or None)
- names_mask Current mask controlling compound access (tuple or None)
- shape Tuple containing array dimensions
- dtype A Numpy dtype representing the array data-type.
-
- Writable properties:
- force_native
- Returned data will be automatically converted
- to the native platform byte order
-
- force_string_length
- Variable-length strings will be converted to
- Numpy strings of this length.
+ TODO: rework this
"""
# --- Properties (Dataset) ------------------------------------------------
- def _set_native(self, val):
- self._force_native = bool(val) if val is not None else None
+ def _get_byteorder(self):
+ return self._byteorder
+
+ def _set_byteorder(self, val):
+ valid = [None, '<', '>', '=']
+ if not val in valid:
+ raise ValueError("Byte order must be one of %s (got %s)" % (", ".join(valid), str(val)))
+
+ if not self._tr_active:
+ self._byteorder = val
+ else:
+ backup = self._byteorder
+ action = Action("Set byte order to " + str(val),
+ (setattr, (self, '_byteorder', val), {}),
+ (setattr, (self, '_byteorder', backup), ()),
+ None)
+ self.manager.do(action)
+
+ def _get_string_length(self):
+ return self._string_length
def _set_string_length(self, val):
- self._string_length = val
+ if val is not None and val < 1:
+ raise ValueError("String length must be at least 1.")
- names_mask = property(lambda self: self._fields)
- names = property(lambda self: self.dtype.names)
+ if not self._tr_active:
+ self._string_length = val
+ else:
+ backup = self._string_length
+ action = Action("Set string length to "+str(val),
+ (setattr, (self, '_string_length', val), {}),
+ (setattr, (self, '_string_length', backup), ()),
+ None)
+
+ def _get_names_mask(self):
+ return self._fields
+ def _set_names_mask(self, iterable):
+ """ Determine which fields of a compound datatype will be read. Only
+ compound fields whose names match those provided by the given
+ iterable will be read. Any given names which do not exist in the
+ HDF5 compound type are simply ignored.
+
+ If the argument is a single string, it will be correctly processed
+ (i.e. not exploded).
+ """
+ if iterable == None:
+ val = None
+ else:
+ if isinstance(iterable, basestring):
+ iterable = (iterable,) # not 'i','t','e','r','a','b','l','e'
+ val = tuple(iterable)
+
+ if not self._tr_active:
+ self._fields = val
+ else:
+ backup = self._string_length
+ action = Action("Set names mask to \"%s\"" % str(val),
+ (setattr, (self, '_fields', val), {}),
+ (setattr, (self, '_fields', backup), ()),
+ None)
+
+
+ #: Byte order for data read/written by this object; can be None, <, >, =.
+ byteorder = property(_get_byteorder, _set_byteorder)
+
+ #: Convert vlen strings to fixed-width: None or >= 1.
+ string_length = property(_get_string_length, _set_string_length)
+
+ #: Restrict I/0 to these fields. None, iterable of strings, or single string.
+ names_mask = property(_get_names_mask, _set_names_mask)
+
+ #: Numpy-style shape tuple for this dataset. Readonly.
shape = property(lambda self: h5d.py_shape(self.id))
- dtype = property(lambda self: h5d.py_dtype(self.id))
- force_native = property(lambda self: self._force_native, _set_native)
- force_string_length = property(lambda self: self._string_length, _set_string_length)
+ #: Numpy dtype representing dataset's type. Readonly.
+ dtype = property(lambda self: h5d.py_dtype(self.id))
+ #: Attribute manager; see AttributeManager docstring.
attrs = property(lambda self: self._attrs)
# --- Public interface (Dataset) ------------------------------------------
- def __init__(self, group, name, create=False, force=False,
+ def __init__(self, group, name, create=False,
data=None, dtype=None, shape=None,
chunks=None, compression=None, shuffle=False, fletcher32=False):
""" Create a new Dataset object. There are two modes of operation:
@@ -244,11 +294,7 @@ class Dataset(object):
"dtype" (Numpy dtype object) and "shape" (tuple of dimensions).
Chunks/compression/shuffle/fletcher32 can also be specified.
- By default, creating a dataset will fail if another of the
- same name already exists. If you specify force=True, any
- existing dataset will be unlinked, and the new one created.
- This is as close as possible to an atomic operation; if the
- dataset creation fails, the old dataset isn't destroyed.
+ If a dataset of the same name already exists, creation fails.
Creation keywords (* is default):
@@ -257,32 +303,33 @@ class Dataset(object):
shuffle: Use the shuffle filter? (requires compression) T/F*
fletcher32: Enable Fletcher32 error detection? T/F*
"""
+ BaseNamed.__init__(self)
if create:
- if force and h5g.py_exists(group.id,name):
- tmpname = 'h5py_temp_' + ''.join(random.sample(string.ascii_letters, 30))
- tmpid = h5d.py_create(group.id, tmpname, data, shape,
- chunks, compression, shuffle, fletcher32)
- h5g.unlink(group.id, name)
- h5g.link(group.id, tmpname, name)
- h5g.unlink(group.id, tmpname)
-
- else:
+ if not group._tr_active:
self.id = h5d.py_create(group.id, name, data, shape,
chunks, compression, shuffle, fletcher32)
+ else:
+ action = Action("Create dataset \"%s\"" % name,
+ (h5d.py_create, (group.id, name, data, shape, chunks,
+ compression, shuffle, fletcher32), {}),
+ (h5g.unlink, (group.id, name), {}),
+ None)
+ group.manager.do(action)
+
else:
if any((data,dtype,shape,chunks,compression,shuffle,fletcher32)):
raise ValueError('You cannot specify keywords when opening a dataset.')
self.id = h5d.open(group.id, name)
- self._fields = None
self._attrs = AttributeManager(self)
- self.force_native = None
- self.force_string_length = None
+ self._fields = None
+ self._byteorder = None
+ self._string_length = None
def __getitem__(self, *args):
""" Read a slice from the underlying HDF5 array. Currently only
numerical slices are supported; for recarray-style access consider
- using set_names_mask().
+ using the names_mask property.
"""
if any( [isinstance(x, basestring) for x in args] ):
raise TypeError("Slices must be numbers; recarray-style indexing is not yet supported.")
@@ -299,36 +346,34 @@ class Dataset(object):
and the Numpy array's datatype must be convertible to the HDF5
array's datatype.
"""
- start, count, stride = _slices_to_tuples(args[0:len(args)-1])
- h5d.py_write_slab(self.id, args[-1], start, stride)
-
- def set_names_mask(self, iterable=None):
- """ Determine which fields of a compound datatype will be read. Only
- compound fields whose names match those provided by the given
- iterable will be read. Any given names which do not exist in the
- HDF5 compound type are simply ignored.
-
- If the argument is a single string, it will be correctly processed
- (i.e. not exploded).
- """
- if iterable == None:
- self._fields = None
+ val = args[-1]
+ slices = args[0:len(args)-1]
+ start, count, stride = _slices_to_tuples(slices)
+ if not self._tr_active:
+ h5d.py_write_slab(self.id, val, start, stride)
else:
- if isinstance(iterable, basestring):
- iterable = (iterable,) # not 'i','t','e','r','a','b','l','e'
- self._fields = tuple(iterable)
+ backup = h5d.py_read_slab(self.id, start, count, stride)
+ action = Action("Write slice",
+ (h5d.py_write_slab, (self.id, val, start, stride), {}),
+ (h5d.py_write_slab, (self.id, backup, start, stride), {}),
+ None )
+ self.manager.do(action)
def close(self):
""" Force the HDF5 library to close and free this object. You
shouldn't need to do this in normal operation; HDF5 objects are
automatically closed when their Python counterparts are deallocated.
"""
+ if self._tr_active:
+ raise IllegalTransactionError("close() is not a transactable operation.")
h5d.close(self.id)
def __del__(self):
try:
+ if self._tr_active:
+ self.manager.commit()
h5d.close(self.id)
- except H5Error:
+ except:
pass
def __str__(self):
@@ -337,7 +382,7 @@ class Dataset(object):
def __repr__(self):
return self.__str__()
-class Group(object):
+class Group(BaseNamed):
""" Represents an HDF5 group object
Group members are accessed through dictionary-style syntax. Iterating
@@ -375,9 +420,17 @@ class Group(object):
raising an exception if it doesn't exist. If "create" is True,
create a new HDF5 group and link it into the parent group.
"""
+ BaseNamed.__init__(self)
self.id = 0
if create:
- self.id = h5g.create(parent_object.id, name)
+ if not parent_object._tr_active:
+ self.id = h5g.create(parent_object.id, name)
+ else:
+ action = Action('Create group "%s"' % name,
+ (h5g.create, (parent_object.id, name), {}),
+ (h5g.unlink, (parent_object.id, name), {}),
+ None)
+ parent_object.manager.do(action)
else:
self.id = h5g.open(parent_object.id, name)
@@ -387,7 +440,10 @@ class Group(object):
def __delitem__(self, name):
""" Unlink a member from the HDF5 group.
"""
- h5g.unlink(self.id, name)
+ if not self._tr_active:
+ h5g.unlink(self.id, name)
+ else:
+ raise IllegalTransactionError("Deleting members is not (yet) a transactable operation.")
def __setitem__(self, name, obj):
""" Add the given object to the group. Here are the rules:
@@ -402,23 +458,27 @@ class Group(object):
dtype.
"""
if isinstance(obj, Group) or isinstance(obj, Dataset):
- h5g.link(self.id, name, h5i.get_name(obj.id), link_type=h5g.LINK_HARD)
+ objname = h5i.get_name(obj.id)
+ if not self._tr_active:
+ h5g.link(self.id, name, objname, link_type=h5g.LINK_HARD)
+ else:
+ action = Action('Create link "%s"' % name,
+ (h5g.link, (self.id, name, objname), {}),
+ (h5g.unlink, (self.id, name), {}),
+ None )
+ self.manager.do(action)
+
+ else:
+ if not isinstance(obj, numpy.ndarray):
+ obj = numpy.array(obj)
- elif isinstance(obj, numpy.ndarray):
if h5t.py_can_convert_dtype(obj.dtype):
+ # Dataset creation is automatically recorded
dset = Dataset(self, name, data=obj, create=True, force=True)
dset.close()
else:
raise ValueError("Don't know how to store data of this type in a dataset: " + repr(obj.dtype))
- else:
- arr = numpy.array(obj)
- if h5t.py_can_convert_dtype(arr.dtype):
- dset = Dataset(self, name, data=arr, create=True, force=True)
- dset.close()
- else:
- raise ValueError("Don't know how to store data of this type in a dataset: " + repr(arr.dtype))
-
def __getitem__(self, name):
""" Retrive the Group or Dataset object. If the Dataset is scalar,
returns its value instead.
@@ -451,12 +511,15 @@ class Group(object):
have to use this, as these objects are automatically closed when
their Python equivalents are deallocated.
"""
- h5g.close(self.id)
+ if not self._tr_active:
+ h5g.close(self.id)
+ else:
+ raise IllegalTransactionError("Can't close group while transaction is active.")
def __del__(self):
try:
h5g.close(self.id)
- except H5Error:
+ except:
pass
def __str__(self):
@@ -491,6 +554,7 @@ class File(Group):
If "noclobber" is specified, file truncation (w/w+) will fail if
the file already exists. Note this is NOT the default.
"""
+ BaseNamed.__init__(self)
if not mode in self._modes:
raise ValueError("Invalid mode; must be one of %s" % ', '.join(self._modes))
@@ -512,6 +576,7 @@ class File(Group):
self.filename = name
self.mode = mode
self.noclobber = noclobber
+ self.attrs = AttributeManager(self)
def close(self):
""" Close this HDF5 object. Note that any further access to objects
@@ -560,6 +625,7 @@ class AttributeManager(object):
"""
def __init__(self, parent_object):
self.id = parent_object.id
+ self.manager = None
def __getitem__(self, name):
obj = h5a.py_get(self.id, name)
@@ -570,12 +636,42 @@ class AttributeManager(object):
def __setitem__(self, name, value):
if not isinstance(value, numpy.ndarray):
value = numpy.array(value)
- if h5a.py_exists(self.id, name):
- h5a.delete(self.id, name)
- h5a.py_set(self.id, name, value)
+
+ def set_attribute(objid, name, newval):
+ backup = None
+ if h5a.py_exists(objid, name):
+ backup = h5a.py_get(objid, name)
+ h5a.delete(objid, name)
+ try:
+ h5a.py_set(objid, name, value)
+ except:
+ if backup is not None:
+ h5a.py_set(objid, name, backup)
+ raise
+
+ if self.manager is not None:
+ set_attribute(self.id, name, value)
+ else:
+ if h5a.py_exists(self.id, name):
+ backup = h5a.py_get(self.id, name)
+ undo = (set_attribute, (self.id, name, backup), {})
+ else:
+ undo = (h5a.delete, (self.id, name), {})
+
+ action = Action("Set attribute \"%s\"" % name,
+ (set_attribute, (self.id, name, value), {}),
+ undo,
+ None)
def __delitem__(self, name):
- h5a.delete(self.id, name)
+ if self.manager is None:
+ h5a.delete(self.id, name)
+ else:
+ backup = h5a.py_get(self.id, name)
+ action = Action("Delete \"%s\"" % name,
+ (h5a.delete, (self.id, name) ,{}),
+ (h5a.py_set, (self.id, name, backup), {}),
+ None)
def __len__(self):
return h5a.get_num_attrs(self.id)
@@ -590,7 +686,7 @@ class AttributeManager(object):
def __str__(self):
return "Attributes: "+', '.join(['"%s"' % x for x in self])
-class NamedType(object):
+class NamedType(BaseNamed):
""" Represents a named datatype, stored in a file.
diff --git a/h5py/transactions.py b/h5py/transactions.py
index 323891b..d5cc34a 100644
--- a/h5py/transactions.py
+++ b/h5py/transactions.py
@@ -10,6 +10,10 @@ class TransactionStateError(TransactionError):
"""
pass
+class IllegalTransactionError(TransactionError):
+ pass
+
+
class Action(object):
""" Represents the smallest component of a transaction.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list