[h5py] 82/455: Low-level threading; read/writes release GIL; hash functions

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:20 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 576571bdca1b71ff18558f305fc00ee6086efaaa
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Sat Jul 26 20:12:58 2008 +0000

    Low-level threading; read/writes release GIL; hash functions
 h5py/h5.pxd                  |  12 ++++-
 h5py/h5.pyx                  | 121 +++++++++++++++++++++++++++++++++++++------
 h5py/h5d.pxd                 |   7 ++-
 h5py/h5d.pyx                 |  97 +++++++++++++++++++++++++++++++---
 h5py/h5f.pyx                 |  14 +++++
 h5py/h5g.pxd                 |   4 ++
 h5py/h5g.pyx                 |  16 ++++++
 h5py/highlevel.py            |  36 +------------
 h5py/numpy.pxd               |   1 +
 h5py/tests/test_highlevel.py |   8 +++
 10 files changed, 254 insertions(+), 62 deletions(-)

diff --git a/h5py/h5.pxd b/h5py/h5.pxd
index e84b45b..762051f 100644
--- a/h5py/h5.pxd
+++ b/h5py/h5.pxd
@@ -249,14 +249,22 @@ cdef int _disable_exceptions() except -1
 cdef err_c pause_errors() except? NULL
 cdef int resume_errors(err_c cookie) except -1
-# === Custom identifier wrappers ==============================================
+cdef object standard_richcmp(object self, object other, int how)
+cdef class H5PYConfig:
+    cdef object _rlock_type         # RLock constructor or compatible
+    cdef object _complex_names      # ('r','i')
+    cdef public object _lockdict    # Weakref dict for RLock instances
 cdef class ObjectID:
     """ Base wrapper class for HDF5 object identifiers """
     cdef object __weakref__
     cdef readonly hid_t id
     cdef readonly int _locked
+    cdef H5PYConfig _cfg        # Used to cache a reference to the global config object
+    cdef object _hash           # Used by subclasses to cache a hash value,
+                                # which may be expensive to compute.
diff --git a/h5py/h5.pyx b/h5py/h5.pyx
index 9eb055a..8fdcbcb 100644
--- a/h5py/h5.pyx
+++ b/h5py/h5.pyx
@@ -34,6 +34,8 @@ include "conditions.pxi"
 from python cimport PyErr_SetObject
 import atexit
+import threading
+from weakref import WeakKeyDictionary
 # Logging is only enabled when compiled with H5PY_DEBUG nonzero
@@ -80,7 +82,78 @@ def _open():
-# === Identifier wrappers =====================================================
+cdef class H5PYConfig:
+    """
+        Global configuration object for the h5py package.
+    """
+    def __init__(self):
+        self._lockdict = WeakKeyDictionary()  # ObjectID weakref => RLock instance
+        self._complex_names = ('r','i')
+        self.RLock = threading.RLock
+    property RLock:
+        """ Callable returning a reentrant lock (default is threading.RLock).
+            Whatever you provide must support the Python context manager
+            protocol, and provide the methods acquire() and release().  It
+            also MUST be reentrant, or dataset reads/writes will deadlock.
+        """
+        def __get__(self):
+            return self._rlock_type
+        def __set__(self, val):
+            testlock = val()
+            if not (hasattr(testlock, 'acquire') and hasattr(testlock, 'release') and\
+                    hasattr(testlock, '__enter__') and hasattr(testlock, '__exit__')):
+                raise ValueError("Generated locks must provide __enter__, __exit__, acquire, release")
+            self._rlock_type = val
+            self._lockdict.clear()
+    property complex_names:
+        """ Tuple (real, img) indicating names used to save complex types.
+        """
+        def __get__(self):
+            return self._complex_names
+        def __set__(self, val):
+            # TODO: validation
+            self._complex_names = val
+    def _get_lock(self, ObjectID key not None):
+        """ (ObjectID key) => LOCK 
+            Obtain a reentrant lock instance.  Guaranteed to be the same lock
+            for the same key.  Keys are kept as weak references; when they
+            disappear, so do the lock objects.
+        """
+        # ObjectID instances which are both equal and hash to the same value
+        # are guaranteed to point to the same underlying HDF5 object.
+        lock = self._lockdict.get(key, None)
+        if lock is None:
+            lock = self._rlock_type()
+            self._lockdict[key] = lock
+        return lock
+config = H5PYConfig()
+cdef object standard_richcmp(object self, object other, int how):
+    # This needs to be shared because of weird CPython quirks involving
+    # subclasses and the __hash__ method.
+    if how == 2 or how == 3:
+        if not typecheck(self, ObjectID) and typecheck(other, ObjectID):
+            return NotImplemented
+        eq = (hash(self) == hash(other))
+        if how == 2:
+            return eq
+        return not eq
+    return NotImplemented
 cdef class ObjectID:
@@ -101,6 +174,16 @@ cdef class ObjectID:
         The truth value of an ObjectID (i.e. bool(obj_id)) indicates whether
         the underlying HDF5 identifier is valid.
+        Rudimentary thread safety is provided by the property pylock, which is
+        an RLock instance shared by objects that point to the same underlying
+        HDF5 structure.  In multithreaded programs, you should acquire this
+        lock before modifying the structure.  Locks have no relationship;
+        locking a file does not prevent access to its objects, nor a group to
+        its members.
+        ObjectID subclasses which release the GIL (e.g. around blocking I/O
+        operations) will lock themselves first.
     property _valid:
@@ -109,6 +192,15 @@ cdef class ObjectID:
         def __get__(self):
             return H5Iget_type(self.id) != H5I_BADID
+    property pylock:
+        """ RLock or equivalent for threads.  The same lock is returned for
+            equal objects (objects which point to the same HDF5 structure).
+        """
+        def __get__(self):
+            if self._cfg is None:
+                self._cfg = config
+            return self._cfg._get_lock(self)
     def __nonzero__(self):
         """ Truth value for object identifiers (like _valid) """
         return self._valid
@@ -145,18 +237,13 @@ cdef class ObjectID:
     def __richcmp__(self, object other, int how):
         """ Supports only == and != """
+        return standard_richcmp(self, other, how)
-        if how == 2 or how == 3:
-            if not hasattr(other, 'id'):
-                return False
-            eq = isinstance(other, type(self)) and self.id == other.id
-            if how == 2:
-                return eq
-            return not eq
-        raise TypeError("Only equality comparisons are supported.")
+    def __hash__(self):
+        """ Hash method defaults to the identifer, as this cannot change over
+            the life of the object.
+        """
+        return self.id
     def __str__(self):
         if self._valid:
@@ -174,10 +261,9 @@ cdef class ObjectID:
     def __repr__(self):
         return self.__str__()
 # === Public exception hierarchy ==============================================
-class H5Error(EnvironmentError):
+class H5Error(Exception):
     """ Base class for internal HDF5 library exceptions.
         Subclass of EnvironmentError; errno is computed from the HDF5 major
         and minor error numbers:
@@ -488,12 +574,13 @@ cdef herr_t extract_cb(int n, H5E_error_t *err_desc, void* data_in):
     err_struct.min_num = err_desc.min_num
     return 1
-cdef herr_t err_callback(void* client_data):
+cdef herr_t err_callback(void* client_data) with gil:
     # Callback which sets Python exception based on the current error stack.
     # Can't use the standard Pyrex raise because then the traceback
-    # points here!
+    # points here.  MUST be "with gil" as it can be called by nogil HDF5
+    # routines.
     cdef H5E_error_t err_struct
     cdef H5E_major_t mj
     cdef H5E_minor_t mn
diff --git a/h5py/h5d.pxd b/h5py/h5d.pxd
index c597852..04148f1 100644
--- a/h5py/h5d.pxd
+++ b/h5py/h5d.pxd
@@ -76,10 +76,13 @@ cdef extern from "hdf5.h":
   haddr_t   H5Dget_offset(hid_t dset_id) except *
   hsize_t   H5Dget_storage_size(hid_t dset_id) except? 0
+  # These must have their return values checked manually.  The functions
+  # H5PY_H5Dread and H5PY_HDwrite return -1 specifically, for use when
+  # the GIL is released and PyErr_Occurred() is inadvisable.
   herr_t    H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
-                  hid_t file_space_id, hid_t plist_id, void *buf) except *
+                  hid_t file_space_id, hid_t plist_id, void *buf) nogil
   herr_t    H5Dwrite(hid_t dset_id, hid_t mem_type, hid_t mem_space, hid_t 
-                        file_space, hid_t xfer_plist, void* buf) except *
+                        file_space, hid_t xfer_plist, void* buf) nogil
   herr_t    H5Dextend(hid_t dataset_id, hsize_t *size) except *
diff --git a/h5py/h5d.pyx b/h5py/h5d.pyx
index 9d0eb8a..3e71a2e 100644
--- a/h5py/h5d.pyx
+++ b/h5py/h5d.pyx
@@ -15,10 +15,11 @@
 # Pyrex compile-time imports
+from h5 cimport standard_richcmp
 from h5s cimport H5S_ALL, H5S_UNLIMITED, H5S_SCALAR, H5S_SIMPLE, \
                     H5Sget_simple_extent_type, H5Sclose, H5Sselect_all, \
                     H5Sget_simple_extent_ndims, H5Sget_select_npoints
-from numpy cimport import_array, PyArray_DATA
+from numpy cimport import_array, PyArray_DATA, NPY_WRITEABLE
 from utils cimport  check_numpy_read, check_numpy_write, \
                     require_tuple, \
                     convert_tuple, \
@@ -29,6 +30,7 @@ from h5 cimport HADDR_UNDEF
 import h5
 import h5t
 import h5s
+import h5g
@@ -75,6 +77,30 @@ def open(ObjectID loc not None, char* name):
     return DatasetID(H5Dopen(loc.id, name))
+# --- Proxy functions for safe(r) threading -----------------------------------
+# It's not legal to call PyErr_Occurred() with nogil, so we can't use
+# the standard except * syntax.  Trap negative return numbers and convert them
+# to something Pyrex can recognize.
+cdef int H5PY_H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
+                  hid_t file_space_id, hid_t plist_id, void *buf) nogil except -1:
+    cdef herr_t retval
+    retval = H5Dread(dset_id, mem_type_id,mem_space_id, file_space_id,
+                        plist_id, buf)
+    if retval < 0:
+        return -1
+    return retval
+cdef int H5PY_H5Dwrite(hid_t dset_id, hid_t mem_type, hid_t mem_space, hid_t 
+                        file_space, hid_t xfer_plist, void* buf) nogil except -1:
+    cdef herr_t retval
+    retval = H5Dwrite(dset_id, mem_type, mem_space, file_space,
+                        xfer_plist, buf)
+    if retval < 0:
+        return -1
+    return retval
 # === Dataset I/O =============================================================
@@ -146,14 +172,38 @@ cdef class DatasetID(ObjectID):
             wide variety of dataspace configurations are possible, this is not
             checked.  You can easily crash Python by reading in data from too
             large a dataspace.
+            The actual read is non-blocking; the array object is temporarily
+            marked read-only, but attempting to mutate it in another thread
+            is a bad idea.  Also, this DatasetID object acquires its own lock
+            (obj.pylock) until the operation completes.
         cdef TypeID mtype
+        cdef hid_t self_id, mtype_id, mspace_id, fspace_id, plist_id
+        cdef void* data
+        cdef int oldflags
+        self.pylock.acquire()
+        try:
+            oldflags = arr_obj.flags
+            arr_obj.flags = oldflags & (~NPY_WRITEABLE) # Wish-it-was-a-mutex approach
+            mtype = h5t.py_create(arr_obj.dtype)
+            check_numpy_write(arr_obj, -1)
-        mtype = h5t.py_create(arr_obj.dtype)
-        check_numpy_write(arr_obj, -1)
+            self_id = self.id
+            mtype_id = mtype.id
+            mspace_id = mspace.id
+            fspace_id = fspace.id
+            plist_id = pdefault(dxpl)
+            data = PyArray_DATA(arr_obj)
-        H5Dread(self.id, mtype.id, mspace.id, fspace.id, pdefault(dxpl), PyArray_DATA(arr_obj))
+            with nogil:
+                H5PY_H5Dread(self_id, mtype_id, mspace_id, fspace_id, plist_id, data)
+        finally:
+            arr_obj.flags = oldflags
+            self.pylock.release()
     def write(self, SpaceID mspace not None, SpaceID fspace not None, 
                     ndarray arr_obj not None, PropDXID dxpl=None):
@@ -166,13 +216,38 @@ cdef class DatasetID(ObjectID):
             The provided Numpy array must be C-contiguous, and own its data.  
             If this is not the case, ValueError will be raised and the read 
             will fail.
+            The actual write is non-blocking; the array object is temporarily
+            marked read-only, but attempting to mutate it in another thread
+            is a bad idea.  Also, this DatasetID object acquires its own lock
+            (obj.pylock) until the operation completes.
         cdef TypeID mtype
+        cdef hid_t self_id, mtype_id, mspace_id, fspace_id, plist_id
+        cdef void* data
+        cdef int oldflags
+        self.pylock.acquire()
+        try:
+            oldflags = arr_obj.flags
+            arr_obj.flags = oldflags & (~NPY_WRITEABLE) # Wish-it-was-a-mutex approach
-        mtype = h5t.py_create(arr_obj.dtype)
-        check_numpy_read(arr_obj, -1)
+            mtype = h5t.py_create(arr_obj.dtype)
+            check_numpy_read(arr_obj, -1)
-        H5Dwrite(self.id, mtype.id, mspace.id, fspace.id, pdefault(dxpl), PyArray_DATA(arr_obj))
+            self_id = self.id
+            mtype_id = mtype.id
+            mspace_id = mspace.id
+            fspace_id = fspace.id
+            plist_id = pdefault(dxpl)
+            data = PyArray_DATA(arr_obj)
+            with nogil:
+                H5PY_H5Dwrite(self_id, mtype_id, mspace_id, fspace_id, plist_id, data)
+        finally:
+            arr_obj.flags = oldflags
+            self.pylock.release()
     def extend(self, object shape):
         """ (TUPLE shape)
@@ -260,5 +335,13 @@ cdef class DatasetID(ObjectID):
         return H5Dget_storage_size(self.id)
+    def __richcmp__(self, object other, int how):
+        return standard_richcmp(self, other, how)
+    def __hash__(self):
+        if self._hash is None:
+            info = h5g.get_objinfo(self)
+            self._hash = hash( (info.fileno, info.objno) )
+        return self._hash
diff --git a/h5py/h5f.pyx b/h5py/h5f.pyx
index e65ff41..96ef323 100644
--- a/h5py/h5f.pyx
+++ b/h5py/h5f.pyx
@@ -15,6 +15,7 @@
 # Pyrex compile-time imports
+from h5 cimport standard_richcmp
 from h5p cimport propwrap, pdefault, PropFAID, PropFCID, H5P_DEFAULT
 from h5t cimport typewrap
 from h5a cimport AttrID
@@ -26,6 +27,7 @@ from utils cimport emalloc, efree, pybool
 # Runtime imports
 import h5
+import h5g
 # === Public constants and data structures ====================================
@@ -289,5 +291,17 @@ cdef class FileID(ObjectID):
             only tracks free space until the file is closed.
         return H5Fget_freespace(self.id)
+    def __richcmp__(self, object other, int how):
+        return standard_richcmp(self, other, how)
+    def __hash__(self):
+        # Obtain the file number from the root group metadata
+        if self._hash is None:
+            info = h5g.get_objinfo(self)
+            self._hash = hash(info.fileno)
+        return self._hash
diff --git a/h5py/h5g.pxd b/h5py/h5g.pxd
index 6392348..dc1af0c 100644
--- a/h5py/h5g.pxd
+++ b/h5py/h5g.pxd
@@ -14,6 +14,7 @@
 # license is available at licenses/pytables.txt, in the distribution root
 # directory.
+include "conditions.pxi"
 include "std_defs.pxi"
 from h5 cimport class ObjectID
@@ -68,4 +69,7 @@ cdef extern from "hdf5.h":
   herr_t H5Gset_comment(hid_t loc_id, char *name, char *comment ) except *
   int H5Gget_comment(hid_t loc_id, char *name, size_t bufsize, char *comment ) except *
+  IF H5PY_18API:
+    hid_t H5Gcreate_anon( hid_t loc_id, hid_t gcpl_id, hid_t gapl_id  ) except *
diff --git a/h5py/h5g.pyx b/h5py/h5g.pyx
index 0a0e126..93f7901 100644
--- a/h5py/h5g.pyx
+++ b/h5py/h5g.pyx
@@ -13,9 +13,12 @@
     Low-level HDF5 "H5G" group interface.
+include "conditions.pxi"
 # Pyrex compile-time imports
 from utils cimport emalloc, efree
+from h5 cimport standard_richcmp
+from h5p cimport H5P_DEFAULT
 # Runtime imports
 import h5
@@ -104,6 +107,10 @@ def create(ObjectID loc not None, char* name, int size_hint=-1):
     return GroupID(H5Gcreate(loc.id, name, size_hint))
+   def create_anon(ObjectID loc not None):
+        return GroupID(H5Gcreate_anon(loc.id, H5P_DEFAULT, H5P_DEFAULT))
 cdef herr_t iter_cb_helper(hid_t gid, char *name, object int_tpl) except -1:
     # Callback function for H5Giterate
     # Automatic exception propagation breaks in 1.8 for some reason, so
@@ -377,3 +384,12 @@ cdef class GroupID(ObjectID):
         """ Number of group members """
         return self.get_num_objs()
+    def __richcmp__(self, object other, int how):
+        return standard_richcmp(self, other, how)
+    def __hash__(self):
+        if self._hash is None:
+            info = get_objinfo(self)
+            self._hash = hash( (info.fileno, info.objno) )
+        return self._hash
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 9c47055..58ff28c 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -70,40 +70,8 @@ class LockableObject(object):
         Base class which provides rudimentary locking support.
-    __locks = WeakValueDictionary()   # Key => RLock object
-    __locks_lock = threading.RLock()
-    def _get_lock(self):
-        """ Get an reentrant lock object appropriate for this object.
-            Returns the same lock for each unique underlying HDF5 object:
-             1. For named objects, use fileno/objno as key (guaranteed unique)
-             2. For transient objects, use the HDF5 integer identifier
-            This has the following limitations:
-             1. File objects can be locked, but this is not very useful because
-                there's no obvious way to represent the dependency relationship
-                between files and the objects they contain.
-             2. In cases where different transient identifiers refer to the
-                same object, it will not be properly locked.  Currently no
-                high-level objects are transient.
-            Note this function does NOT acquire the lock.
-        """
-        with self.__locks_lock:
-            #print "Locking %d" % self.id.id
-            name = h5i.get_name(self.id)
-            if name is None:
-                key = self.id.id
-            else:
-                info = h5g.get_objinfo(self.id)
-                key = (info.fileno, info.objno)
-            return self.__locks.setdefault(key, threading.RLock())
-    lock = property(_get_lock,
-        doc = "A threading.RLock instance associated with this HDF5 structure")
+    lock = property(lambda self: self.id.pylock,
+        doc = "A reentrant lock associated with this HDF5 structure")
 class HLObject(LockableObject):
diff --git a/h5py/numpy.pxd b/h5py/numpy.pxd
index 481d27f..e8e1784 100644
--- a/h5py/numpy.pxd
+++ b/h5py/numpy.pxd
@@ -94,6 +94,7 @@ cdef extern from "numpy/arrayobject.h":
   int PyArray_SETITEM(object arr, void *itemptr, object obj)
   dtype PyArray_DescrFromType(int type)
   object PyArray_Scalar(void *data, dtype descr, object base)
+  long PyArray_NBYTES(object arr)
   int PyArray_CheckScalar(object sclr)
   void PyArray_ScalarAsCtype(object sclr, void* ptr)
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index 6745eed..35a90cf 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -241,6 +241,14 @@ class TestDataset(unittest.TestCase):
+    def test_Dataset_exceptions(self):
+        # These trigger exceptions in H5Dread
+        ref = numpy.ones((10,10), dtype='<i4')
+        dsid = self.f.create_dataset('ds', data=ref)
+        arr = numpy.ndarray((10,10), dtype='|S6') # incompatible datatype
+        self.assertRaises(H5Error, dsid.id.read, h5s.ALL, h5s.ALL, arr)
+        # or it'll segfault...
 class TestGroup(unittest.TestCase):
     def setUp(self):

Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git

More information about the debian-science-commits mailing list