[h5py] 82/455: Low-level threading; read/writes release GIL; hash functions
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:20 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit 576571bdca1b71ff18558f305fc00ee6086efaaa
Author: andrewcollette <andrew.collette at gmail.com>
Date: Sat Jul 26 20:12:58 2008 +0000
Low-level threading; read/writes release GIL; hash functions
---
h5py/h5.pxd | 12 ++++-
h5py/h5.pyx | 121 +++++++++++++++++++++++++++++++++++++------
h5py/h5d.pxd | 7 ++-
h5py/h5d.pyx | 97 +++++++++++++++++++++++++++++++---
h5py/h5f.pyx | 14 +++++
h5py/h5g.pxd | 4 ++
h5py/h5g.pyx | 16 ++++++
h5py/highlevel.py | 36 +------------
h5py/numpy.pxd | 1 +
h5py/tests/test_highlevel.py | 8 +++
10 files changed, 254 insertions(+), 62 deletions(-)
diff --git a/h5py/h5.pxd b/h5py/h5.pxd
index e84b45b..762051f 100644
--- a/h5py/h5.pxd
+++ b/h5py/h5.pxd
@@ -249,14 +249,22 @@ cdef int _disable_exceptions() except -1
cdef err_c pause_errors() except? NULL
cdef int resume_errors(err_c cookie) except -1
-# === Custom identifier wrappers ==============================================
+cdef object standard_richcmp(object self, object other, int how)
+
+cdef class H5PYConfig:
+
+ cdef object _rlock_type # RLock constructor or compatible
+ cdef object _complex_names # ('r','i')
+ cdef public object _lockdict # Weakref dict for RLock instances
cdef class ObjectID:
""" Base wrapper class for HDF5 object identifiers """
cdef object __weakref__
cdef readonly hid_t id
cdef readonly int _locked
-
+ cdef H5PYConfig _cfg # Used to cache a reference to the global config object
+ cdef object _hash # Used by subclasses to cache a hash value,
+ # which may be expensive to compute.
diff --git a/h5py/h5.pyx b/h5py/h5.pyx
index 9eb055a..8fdcbcb 100644
--- a/h5py/h5.pyx
+++ b/h5py/h5.pyx
@@ -34,6 +34,8 @@ include "conditions.pxi"
from python cimport PyErr_SetObject
import atexit
+import threading
+from weakref import WeakKeyDictionary
# Logging is only enabled when compiled with H5PY_DEBUG nonzero
IF H5PY_DEBUG:
@@ -80,7 +82,78 @@ def _open():
"""
H5open()
-# === Identifier wrappers =====================================================
+cdef class H5PYConfig:
+
+ """
+ Global configuration object for the h5py package.
+ """
+
+ def __init__(self):
+ self._lockdict = WeakKeyDictionary() # ObjectID weakref => RLock instance
+ self._complex_names = ('r','i')
+ self.RLock = threading.RLock
+
+ property RLock:
+ """ Callable returning a reentrant lock (default is threading.RLock).
+
+ Whatever you provide must support the Python context manager
+ protocol, and provide the methods acquire() and release(). It
+ also MUST be reentrant, or dataset reads/writes will deadlock.
+ """
+ def __get__(self):
+ return self._rlock_type
+
+ def __set__(self, val):
+ testlock = val()
+ if not (hasattr(testlock, 'acquire') and hasattr(testlock, 'release') and\
+ hasattr(testlock, '__enter__') and hasattr(testlock, '__exit__')):
+ raise ValueError("Generated locks must provide __enter__, __exit__, acquire, release")
+ self._rlock_type = val
+ self._lockdict.clear()
+
+ property complex_names:
+ """ Tuple (real, img) indicating names used to save complex types.
+ """
+ def __get__(self):
+ return self._complex_names
+
+ def __set__(self, val):
+ # TODO: validation
+ self._complex_names = val
+
+ def _get_lock(self, ObjectID key not None):
+ """ (ObjectID key) => LOCK
+
+ Obtain a reentrant lock instance. Guaranteed to be the same lock
+ for the same key. Keys are kept as weak references; when they
+ disappear, so do the lock objects.
+ """
+ # ObjectID instances which are both equal and hash to the same value
+ # are guaranteed to point to the same underlying HDF5 object.
+ lock = self._lockdict.get(key, None)
+ if lock is None:
+ lock = self._rlock_type()
+ self._lockdict[key] = lock
+ return lock
+
+config = H5PYConfig()
+
+cdef object standard_richcmp(object self, object other, int how):
+ # This needs to be shared because of weird CPython quirks involving
+ # subclasses and the __hash__ method.
+
+ if how == 2 or how == 3:
+
+ if not typecheck(self, ObjectID) and typecheck(other, ObjectID):
+ return NotImplemented
+
+ eq = (hash(self) == hash(other))
+
+ if how == 2:
+ return eq
+ return not eq
+
+ return NotImplemented
cdef class ObjectID:
@@ -101,6 +174,16 @@ cdef class ObjectID:
The truth value of an ObjectID (i.e. bool(obj_id)) indicates whether
the underlying HDF5 identifier is valid.
+
+ Rudimentary thread safety is provided by the property pylock, which is
+ an RLock instance shared by objects that point to the same underlying
+ HDF5 structure. In multithreaded programs, you should acquire this
+ lock before modifying the structure. Locks have no relationship;
+ locking a file does not prevent access to its objects, nor a group to
+ its members.
+
+ ObjectID subclasses which release the GIL (e.g. around blocking I/O
+ operations) will lock themselves first.
"""
property _valid:
@@ -109,6 +192,15 @@ cdef class ObjectID:
def __get__(self):
return H5Iget_type(self.id) != H5I_BADID
+ property pylock:
+ """ RLock or equivalent for threads. The same lock is returned for
+ equal objects (objects which point to the same HDF5 structure).
+ """
+ def __get__(self):
+ if self._cfg is None:
+ self._cfg = config
+ return self._cfg._get_lock(self)
+
def __nonzero__(self):
""" Truth value for object identifiers (like _valid) """
return self._valid
@@ -145,18 +237,13 @@ cdef class ObjectID:
def __richcmp__(self, object other, int how):
""" Supports only == and != """
+ return standard_richcmp(self, other, how)
- if how == 2 or how == 3:
-
- if not hasattr(other, 'id'):
- return False
- eq = isinstance(other, type(self)) and self.id == other.id
-
- if how == 2:
- return eq
- return not eq
-
- raise TypeError("Only equality comparisons are supported.")
+ def __hash__(self):
+ """ Hash method defaults to the identifer, as this cannot change over
+ the life of the object.
+ """
+ return self.id
def __str__(self):
if self._valid:
@@ -174,10 +261,9 @@ cdef class ObjectID:
def __repr__(self):
return self.__str__()
-
# === Public exception hierarchy ==============================================
-class H5Error(EnvironmentError):
+class H5Error(Exception):
""" Base class for internal HDF5 library exceptions.
Subclass of EnvironmentError; errno is computed from the HDF5 major
and minor error numbers:
@@ -488,12 +574,13 @@ cdef herr_t extract_cb(int n, H5E_error_t *err_desc, void* data_in):
err_struct.min_num = err_desc.min_num
return 1
-cdef herr_t err_callback(void* client_data):
+cdef herr_t err_callback(void* client_data) with gil:
# Callback which sets Python exception based on the current error stack.
# Can't use the standard Pyrex raise because then the traceback
- # points here!
-
+ # points here. MUST be "with gil" as it can be called by nogil HDF5
+ # routines.
+
cdef H5E_error_t err_struct
cdef H5E_major_t mj
cdef H5E_minor_t mn
diff --git a/h5py/h5d.pxd b/h5py/h5d.pxd
index c597852..04148f1 100644
--- a/h5py/h5d.pxd
+++ b/h5py/h5d.pxd
@@ -76,10 +76,13 @@ cdef extern from "hdf5.h":
haddr_t H5Dget_offset(hid_t dset_id) except *
hsize_t H5Dget_storage_size(hid_t dset_id) except? 0
+ # These must have their return values checked manually. The functions
+ # H5PY_H5Dread and H5PY_HDwrite return -1 specifically, for use when
+ # the GIL is released and PyErr_Occurred() is inadvisable.
herr_t H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
- hid_t file_space_id, hid_t plist_id, void *buf) except *
+ hid_t file_space_id, hid_t plist_id, void *buf) nogil
herr_t H5Dwrite(hid_t dset_id, hid_t mem_type, hid_t mem_space, hid_t
- file_space, hid_t xfer_plist, void* buf) except *
+ file_space, hid_t xfer_plist, void* buf) nogil
herr_t H5Dextend(hid_t dataset_id, hsize_t *size) except *
diff --git a/h5py/h5d.pyx b/h5py/h5d.pyx
index 9d0eb8a..3e71a2e 100644
--- a/h5py/h5d.pyx
+++ b/h5py/h5d.pyx
@@ -15,10 +15,11 @@
"""
# Pyrex compile-time imports
+from h5 cimport standard_richcmp
from h5s cimport H5S_ALL, H5S_UNLIMITED, H5S_SCALAR, H5S_SIMPLE, \
H5Sget_simple_extent_type, H5Sclose, H5Sselect_all, \
H5Sget_simple_extent_ndims, H5Sget_select_npoints
-from numpy cimport import_array, PyArray_DATA
+from numpy cimport import_array, PyArray_DATA, NPY_WRITEABLE
from utils cimport check_numpy_read, check_numpy_write, \
require_tuple, \
convert_tuple, \
@@ -29,6 +30,7 @@ from h5 cimport HADDR_UNDEF
import h5
import h5t
import h5s
+import h5g
import_array()
@@ -75,6 +77,30 @@ def open(ObjectID loc not None, char* name):
"""
return DatasetID(H5Dopen(loc.id, name))
+# --- Proxy functions for safe(r) threading -----------------------------------
+
+# It's not legal to call PyErr_Occurred() with nogil, so we can't use
+# the standard except * syntax. Trap negative return numbers and convert them
+# to something Pyrex can recognize.
+
+cdef int H5PY_H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
+ hid_t file_space_id, hid_t plist_id, void *buf) nogil except -1:
+
+ cdef herr_t retval
+ retval = H5Dread(dset_id, mem_type_id,mem_space_id, file_space_id,
+ plist_id, buf)
+ if retval < 0:
+ return -1
+ return retval
+
+cdef int H5PY_H5Dwrite(hid_t dset_id, hid_t mem_type, hid_t mem_space, hid_t
+ file_space, hid_t xfer_plist, void* buf) nogil except -1:
+ cdef herr_t retval
+ retval = H5Dwrite(dset_id, mem_type, mem_space, file_space,
+ xfer_plist, buf)
+ if retval < 0:
+ return -1
+ return retval
# === Dataset I/O =============================================================
@@ -146,14 +172,38 @@ cdef class DatasetID(ObjectID):
wide variety of dataspace configurations are possible, this is not
checked. You can easily crash Python by reading in data from too
large a dataspace.
+
+ The actual read is non-blocking; the array object is temporarily
+ marked read-only, but attempting to mutate it in another thread
+ is a bad idea. Also, this DatasetID object acquires its own lock
+ (obj.pylock) until the operation completes.
"""
cdef TypeID mtype
+ cdef hid_t self_id, mtype_id, mspace_id, fspace_id, plist_id
+ cdef void* data
+ cdef int oldflags
+
+ self.pylock.acquire()
+ try:
+ oldflags = arr_obj.flags
+ arr_obj.flags = oldflags & (~NPY_WRITEABLE) # Wish-it-was-a-mutex approach
+
+ mtype = h5t.py_create(arr_obj.dtype)
+ check_numpy_write(arr_obj, -1)
- mtype = h5t.py_create(arr_obj.dtype)
- check_numpy_write(arr_obj, -1)
+ self_id = self.id
+ mtype_id = mtype.id
+ mspace_id = mspace.id
+ fspace_id = fspace.id
+ plist_id = pdefault(dxpl)
+ data = PyArray_DATA(arr_obj)
- H5Dread(self.id, mtype.id, mspace.id, fspace.id, pdefault(dxpl), PyArray_DATA(arr_obj))
+ with nogil:
+ H5PY_H5Dread(self_id, mtype_id, mspace_id, fspace_id, plist_id, data)
+ finally:
+ arr_obj.flags = oldflags
+ self.pylock.release()
def write(self, SpaceID mspace not None, SpaceID fspace not None,
ndarray arr_obj not None, PropDXID dxpl=None):
@@ -166,13 +216,38 @@ cdef class DatasetID(ObjectID):
The provided Numpy array must be C-contiguous, and own its data.
If this is not the case, ValueError will be raised and the read
will fail.
+
+ The actual write is non-blocking; the array object is temporarily
+ marked read-only, but attempting to mutate it in another thread
+ is a bad idea. Also, this DatasetID object acquires its own lock
+ (obj.pylock) until the operation completes.
"""
cdef TypeID mtype
+ cdef hid_t self_id, mtype_id, mspace_id, fspace_id, plist_id
+ cdef void* data
+ cdef int oldflags
+
+ self.pylock.acquire()
+ try:
+ oldflags = arr_obj.flags
+ arr_obj.flags = oldflags & (~NPY_WRITEABLE) # Wish-it-was-a-mutex approach
- mtype = h5t.py_create(arr_obj.dtype)
- check_numpy_read(arr_obj, -1)
+ mtype = h5t.py_create(arr_obj.dtype)
+ check_numpy_read(arr_obj, -1)
- H5Dwrite(self.id, mtype.id, mspace.id, fspace.id, pdefault(dxpl), PyArray_DATA(arr_obj))
+ self_id = self.id
+ mtype_id = mtype.id
+ mspace_id = mspace.id
+ fspace_id = fspace.id
+ plist_id = pdefault(dxpl)
+ data = PyArray_DATA(arr_obj)
+
+ with nogil:
+ H5PY_H5Dwrite(self_id, mtype_id, mspace_id, fspace_id, plist_id, data)
+
+ finally:
+ arr_obj.flags = oldflags
+ self.pylock.release()
def extend(self, object shape):
""" (TUPLE shape)
@@ -260,5 +335,13 @@ cdef class DatasetID(ObjectID):
"""
return H5Dget_storage_size(self.id)
+ def __richcmp__(self, object other, int how):
+ return standard_richcmp(self, other, how)
+
+ def __hash__(self):
+ if self._hash is None:
+ info = h5g.get_objinfo(self)
+ self._hash = hash( (info.fileno, info.objno) )
+ return self._hash
diff --git a/h5py/h5f.pyx b/h5py/h5f.pyx
index e65ff41..96ef323 100644
--- a/h5py/h5f.pyx
+++ b/h5py/h5f.pyx
@@ -15,6 +15,7 @@
"""
# Pyrex compile-time imports
+from h5 cimport standard_richcmp
from h5p cimport propwrap, pdefault, PropFAID, PropFCID, H5P_DEFAULT
from h5t cimport typewrap
from h5a cimport AttrID
@@ -26,6 +27,7 @@ from utils cimport emalloc, efree, pybool
# Runtime imports
import h5
+import h5g
# === Public constants and data structures ====================================
@@ -289,5 +291,17 @@ cdef class FileID(ObjectID):
only tracks free space until the file is closed.
"""
return H5Fget_freespace(self.id)
+
+ def __richcmp__(self, object other, int how):
+ return standard_richcmp(self, other, how)
+
+ def __hash__(self):
+ # Obtain the file number from the root group metadata
+ if self._hash is None:
+ info = h5g.get_objinfo(self)
+ self._hash = hash(info.fileno)
+ return self._hash
+
+
diff --git a/h5py/h5g.pxd b/h5py/h5g.pxd
index 6392348..dc1af0c 100644
--- a/h5py/h5g.pxd
+++ b/h5py/h5g.pxd
@@ -14,6 +14,7 @@
# license is available at licenses/pytables.txt, in the distribution root
# directory.
+include "conditions.pxi"
include "std_defs.pxi"
from h5 cimport class ObjectID
@@ -68,4 +69,7 @@ cdef extern from "hdf5.h":
herr_t H5Gset_comment(hid_t loc_id, char *name, char *comment ) except *
int H5Gget_comment(hid_t loc_id, char *name, size_t bufsize, char *comment ) except *
+ IF H5PY_18API:
+ hid_t H5Gcreate_anon( hid_t loc_id, hid_t gcpl_id, hid_t gapl_id ) except *
+
diff --git a/h5py/h5g.pyx b/h5py/h5g.pyx
index 0a0e126..93f7901 100644
--- a/h5py/h5g.pyx
+++ b/h5py/h5g.pyx
@@ -13,9 +13,12 @@
"""
Low-level HDF5 "H5G" group interface.
"""
+include "conditions.pxi"
# Pyrex compile-time imports
from utils cimport emalloc, efree
+from h5 cimport standard_richcmp
+from h5p cimport H5P_DEFAULT
# Runtime imports
import h5
@@ -104,6 +107,10 @@ def create(ObjectID loc not None, char* name, int size_hint=-1):
"""
return GroupID(H5Gcreate(loc.id, name, size_hint))
+IF H5PY_18API:
+ def create_anon(ObjectID loc not None):
+ return GroupID(H5Gcreate_anon(loc.id, H5P_DEFAULT, H5P_DEFAULT))
+
cdef herr_t iter_cb_helper(hid_t gid, char *name, object int_tpl) except -1:
# Callback function for H5Giterate
# Automatic exception propagation breaks in 1.8 for some reason, so
@@ -377,3 +384,12 @@ cdef class GroupID(ObjectID):
""" Number of group members """
return self.get_num_objs()
+ def __richcmp__(self, object other, int how):
+ return standard_richcmp(self, other, how)
+
+ def __hash__(self):
+ if self._hash is None:
+ info = get_objinfo(self)
+ self._hash = hash( (info.fileno, info.objno) )
+ return self._hash
+
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 9c47055..58ff28c 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -70,40 +70,8 @@ class LockableObject(object):
Base class which provides rudimentary locking support.
"""
- __locks = WeakValueDictionary() # Key => RLock object
- __locks_lock = threading.RLock()
-
- def _get_lock(self):
- """ Get an reentrant lock object appropriate for this object.
-
- Returns the same lock for each unique underlying HDF5 object:
- 1. For named objects, use fileno/objno as key (guaranteed unique)
- 2. For transient objects, use the HDF5 integer identifier
-
- This has the following limitations:
- 1. File objects can be locked, but this is not very useful because
- there's no obvious way to represent the dependency relationship
- between files and the objects they contain.
-
- 2. In cases where different transient identifiers refer to the
- same object, it will not be properly locked. Currently no
- high-level objects are transient.
-
- Note this function does NOT acquire the lock.
- """
- with self.__locks_lock:
- #print "Locking %d" % self.id.id
- name = h5i.get_name(self.id)
- if name is None:
- key = self.id.id
- else:
- info = h5g.get_objinfo(self.id)
- key = (info.fileno, info.objno)
-
- return self.__locks.setdefault(key, threading.RLock())
-
- lock = property(_get_lock,
- doc = "A threading.RLock instance associated with this HDF5 structure")
+ lock = property(lambda self: self.id.pylock,
+ doc = "A reentrant lock associated with this HDF5 structure")
class HLObject(LockableObject):
diff --git a/h5py/numpy.pxd b/h5py/numpy.pxd
index 481d27f..e8e1784 100644
--- a/h5py/numpy.pxd
+++ b/h5py/numpy.pxd
@@ -94,6 +94,7 @@ cdef extern from "numpy/arrayobject.h":
int PyArray_SETITEM(object arr, void *itemptr, object obj)
dtype PyArray_DescrFromType(int type)
object PyArray_Scalar(void *data, dtype descr, object base)
+ long PyArray_NBYTES(object arr)
int PyArray_CheckScalar(object sclr)
void PyArray_ScalarAsCtype(object sclr, void* ptr)
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index 6745eed..35a90cf 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -241,6 +241,14 @@ class TestDataset(unittest.TestCase):
f.close()
os.unlink(fname)
+ def test_Dataset_exceptions(self):
+ # These trigger exceptions in H5Dread
+ ref = numpy.ones((10,10), dtype='<i4')
+ dsid = self.f.create_dataset('ds', data=ref)
+ arr = numpy.ndarray((10,10), dtype='|S6') # incompatible datatype
+ self.assertRaises(H5Error, dsid.id.read, h5s.ALL, h5s.ALL, arr)
+ # or it'll segfault...
+
class TestGroup(unittest.TestCase):
def setUp(self):
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list