[h5py] 33/455: h5t tweaks and proxying

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:14 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit b2c3676dcd0c7b70e87930f26070870bd1d4ad30
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Mon May 26 17:37:19 2008 +0000

    h5t tweaks and proxying
---
 h5py/h5d.pyx  | 86 +++++++++++++++++++++++++++++++++++++++++++++++++------
 h5py/h5t.pyx  | 12 +++++---
 h5py/proxy.py | 92 ++++++++++++++++++++++++++++++-----------------------------
 3 files changed, 133 insertions(+), 57 deletions(-)

diff --git a/h5py/h5d.pyx b/h5py/h5d.pyx
index 0a30c9b..68b69a1 100644
--- a/h5py/h5d.pyx
+++ b/h5py/h5d.pyx
@@ -551,29 +551,99 @@ def py_rank(hid_t dset_id):
             H5Sclose(space_id)
     return rank
 
-def py_dtype(hid_t dset_id):
-    """ (INT dset_id) => DTYPE numpy_dtype
+def py_dtype(hid_t dset_id, **kwds):
+    """ (INT dset_id, **kwds) => DTYPE numpy_dtype
 
         Get the datatype of an HDF5 dataset, converted to a Numpy dtype.
+        Keywords are passed to py_h5t_to_dtype.
     """
     cdef hid_t type_id
     type_id = 0
     dtype_out = None
     try:
         type_id = get_type(dset_id)
-        dtype_out = h5t.py_h5t_to_dtype(type_id)
+        dtype_out = h5t.py_h5t_to_dtype(type_id, **kwds)
     finally:
         if type_id:
             H5Tclose(type_id)
     return dtype_out
 
-def py_patch(hid_t ds_source, hid_t ds_sink, hid_t space_id):
-    """ (INT ds_source, INT ds_sink, INT space_id)
+def py_patch(hid_t ds_source, hid_t ds_sink, hid_t transfer_space):
+    """ (INT ds_source, INT ds_sink, INT transfer_space)
 
-        Transfer selected elements from one dataset to another.
-        Not yet implemented.
+        Transfer selected elements from one dataset to another.  The transfer
+        selection must be compatible with both the source and sink datasets, or
+        an exception will be raised. 
+
+        This function will allocate a memory buffer large enough to hold the
+        entire selection at once.  Looping and memory limitation constraints 
+        are the caller's responsibility.
     """
-    pass
+    cdef hid_t source_space
+    cdef hid_t sink_space
+    cdef hid_t mem_space
+    cdef hid_t source_type
+    cdef void* xfer_buf
+
+    cdef hssize_t npoints
+    cdef size_t type_size
+    cdef herr_t retval
+
+    source_space = 0    
+    sink_space = 0
+    mem_space = 0
+    source_type = 0
+    xfer_buf = NULL
+
+    try:
+        source_space = get_space(ds_source)
+        sink_space = get_space(sink)
+        source_type = get_type(source)
+
+        npoints = h5s.get_select_npoints(space_id)
+        type_size = h5t.get_size(source_type)
+
+        mem_space = h5s.create_simple((npoints,))
+        h5s.select_all(mem_space)
+
+        # This assumes that reading into a contiguous buffer and then writing
+        # out again to the same selection preserves the arrangement of data
+        # elements.  I think this is a reasonable assumption.
+
+        xfer_buf = malloc(npoints*type_size)
+
+        # Let the HDF5 library do dataspace validation; the worst that can
+        # happen is that the write will fail after taking a while to read.
+
+        retval = H5Dread(ds_source, source_type, mem_space, transfer_space, H5P_DEFAULT, xfer_buf)
+        if retval < 0:
+            raise DatasetError("Source read failed.")
+
+        retval = H5Dwrite(ds_sink, source_type, mem_space, transfer_space, H5P_DEFAULT, xfer_buf)
+        if retval < 0:
+            raise DatasetError("Sink write failed.")
+
+    finally:
+        if source_space != 0:
+            H5Sclose(source_space)
+        if sink_space != 0:
+            H5Sclose(sink_space)
+        if mem_space != 0:
+            H5Sclose(mem_space)
+        if source_type != 0:
+            H5Tclose(source_type)
+        if xfer_buf != NULL:
+            free(xfer_buf)
+
+
+
+
+
+
+
+
+
+
 
 
 
diff --git a/h5py/h5t.pyx b/h5py/h5t.pyx
index fdd40ca..f8c09ab 100644
--- a/h5py/h5t.pyx
+++ b/h5py/h5t.pyx
@@ -284,12 +284,16 @@ def detect_class(hid_t type_id, int classtype):
     return bool(retval)
 
 
-def close(hid_t type_id):
-    " (INT type_id) "
-    
+def close(hid_t type_id, int force=0):
+    """ (INT type_id, BOOL force=False)
+
+        Close this datatype.  If "force" is True, ignore any errors.  Useful
+        for exception handlers, when you're not sure if you've got an immutable
+        datatype.
+    """
     cdef herr_t retval
     retval = H5Tclose(type_id)
-    if retval < 0:
+    if retval < 0 and force:
         raise DatatypeError("Failed to close datatype %d" % type_id)
 
 # === Atomic datatype operations ==============================================
diff --git a/h5py/proxy.py b/h5py/proxy.py
index c06e1bb..ebaad34 100644
--- a/h5py/proxy.py
+++ b/h5py/proxy.py
@@ -2,7 +2,7 @@ import tempfile
 import os
 import numpy
 
-import h5d, h5s, h5t, h5f
+import h5d, h5s, h5t, h5f, h5p, h5z
 
 class ProxyError(StandardError):
     pass
@@ -15,58 +15,62 @@ class DatasetProxy(object):
     """
     def begin_proxy(self):
 
-        # todo: modify plist to enforce late allocation and no compression
 
         if self.proxy_id is not None:
             raise ProxyError("Already proxying.")
 
         fid = 0
-        sid = 0
-        pid = 0
-        tid = 0
+        space_id = 0
+        plist_id = 0
+        type_id = 0
         proxy_id = 0
         fname = tempfile.mktemp('.hdf5')
 
         try:
-            sid = h5d.get_space(self.id)
-            pid = h5g.get_create_plist(self.id)
-            tid = h5g.get_type(self.id)
+            space_id = h5d.get_space(self.id)
+            type_id = h5g.get_type(self.id)
+            plist_id = h5g.get_create_plist(self.id)
+
+            h5p.remove_filter(plist_id, h5z.FILTER_ALL)
+            h5p.set_alloc_time(plist_id, h5p.ALLOC_TIME_INCR)
 
             fid = h5f.create(fname, h5f.ACC_RDWR)
-            proxy_id = h5d.create(fid, "PROXY", tid, sid, pid)
+            proxy_id = h5d.create(fid, "PROXY", type_id, space_id, plist_id)
         except:
             if fid != 0:
                 h5f.close(fid)
-            if sid != 0:
-                h5s.close(sid)
+            if space_id != 0:
+                h5s.close(space_id)
             raise
         finally:
-            if pid != 0:
-                h5p.close(pid)
-            if tid != 0:
-                h5t.close(tid)
+            if plist_id != 0:
+                h5p.close(plist_id)
+            if type_id != 0:
+                h5t.close(type_id)
 
-        self.fid = fid
-        self.space_id = sid
-        self.proxy_id = proxy_id
-        self.fname = fname
+        self._proxy_fid = fid
+        self._proxy_fname = fname
+        self._proxy_space = space_id
+        self._proxy_id = proxy_id
 
     def end_proxy(self):
 
-        if self.proxy_id is None:
+        if not hasattr(self, '_proxy_id') or self._proxy_id is None:
             raise ProxyError("Not proxying.")
 
-        h5s.close(self.space_id)
-        h5d.close(self.proxy_id)
-        h5f.close(self.fid)
-        os.unlink(self.fname)
-        self.proxy_id = None
-
-
-    def read(self, start, count, stride=None, **kwds):
+        h5s.close(self._proxy_space)
+        h5d.close(self._proxy_id)
+        h5f.close(self._proxy_fid)
+        self._proxy_id = None
+        os.unlink(self._proxy_fname)
 
-        # todo: argument validation
+    def _read(self, start, count, stride=None, **kwds):
+        """ Dataset read access.  In direct mode, simply reads data from 
+            self.id.  In proxy mode, reads unmodified data from self.id and
+            modified sections from self._proxy_id)
 
+            Don't call this directly.
+        """
         if self.proxy_id is None:
             return h5d.py_read_slab(self.id, start, count, stride, **kwds)
 
@@ -74,22 +78,20 @@ class DatasetProxy(object):
             mem_space = 0
             backing_space = 0
             patch_space = 0
-            tid = 0
             
             try:
                 mem_space = h5s.create_simple(count)    
 
                 # Create Numpy array
-                tid = h5d.get_type(self.proxy_id)
-                dtype = h5t.py_h5t_to_dtype(tid, **kwds)
+                dtype = h5t.py_dtype(self._proxy_id)
                 arr = numpy.ndarray(count, dtype=dtype)
 
-                patch_space = h5s.copy(self.space_id)
-                backing_space = h5s.copy(self.space_id)
+                patch_space = h5s.copy(self._proxy_space)
+                backing_space = h5s.copy(self._proxy_space)
 
                 # What needs to be read from the original dataset.
                 # This is all elements of the new selection which are not
-                # marked as modified.
+                # already selected in self._proxy_space
                 h5s.select_hyperslab(backing_space, start, count, stride, op=h5s.SELECT_NOTA)
 
                 # What needs to be read from the proxy dataset.
@@ -103,7 +105,7 @@ class DatasetProxy(object):
 
                 # Read the rest from the proxy dataset.
                 if h5s.get_select_npoints(patch_space) > 0:
-                    h5d.read(self.proxy_id, mem_space, patch_space, arr)
+                    h5d.read(self._proxy_id, mem_space, patch_space, arr)
 
             finally:
                 if mem_space != 0:
@@ -112,33 +114,33 @@ class DatasetProxy(object):
                     h5s.close(backing_space)
                 if patch_space != 0:
                     h5s.close(patch_space)
-                if tid != 0:
-                    h5t.close(tid)
 
             return arr
 
-    def write(self, arr, start, stride=None):
+    def _write(self, arr, start, stride=None):
         
         if self.proxy_id is None:
             h5d.py_write_slab(self.id, arr, start, stride)
         
         else:
             # We get free argument validation courtesy of this function.
-            h5d.py_write_slab(self.proxy_id, arr, start, stride)
+            h5d.py_write_slab(self._proxy_id, arr, start, stride)
 
             # Record this section of the dataspace as changed.
             count = arr.shape
-            h5s.select_hyperslab(self.space_id, start, count, stride, op=h5s.SELECT_OR)
+            h5s.select_hyperslab(self._proxy_space, start, count, stride, op=h5s.SELECT_OR)
 
     def commit(self):
 
-        # this will use the yet-unwritten h5d.py_patch function
-        pass
+        h5d.py_patch(self._proxy_id, self.id, self._proxy_space)
+        h5s.select_none(self._proxy_space)
 
     def rollback(self):
 
-        # fixme: this leaks file space
-        h5s.select_none(self.space_id)
+        # Proxy file doesn't shrink, but space will be re-used.
+        # Worst case == proxy file is size of the original dataset, sans
+        # compression
+        h5s.select_none(self._proxy_space)
             
         
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



More information about the debian-science-commits mailing list