[h5py] 34/455: HL tweaks for new API; remove h5t silliness

Thu Jul 2 18:19:14 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 061e2819cb65818fcc6118bb11606df1b474cafe
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Mon May 26 23:31:22 2008 +0000

    HL tweaks for new API; remove h5t silliness
---
 h5py/h5d.pyx      |   8 +++--
 h5py/h5t.pyx      |  68 ++++++++++++++++++-----------------
 h5py/highlevel.py | 105 +++++++++++++++++++++++++-----------------------------
 h5py/proxy.py     |  29 +++++++++------
 4 files changed, 107 insertions(+), 103 deletions(-)

diff --git a/h5py/h5d.pyx b/h5py/h5d.pyx
index 68b69a1..dbdeb0b 100644
--- a/h5py/h5d.pyx
+++ b/h5py/h5d.pyx
@@ -375,11 +375,13 @@ def py_create(hid_t parent_id, char* name, object data=None, object dtype=None,
 
 def py_read_slab(hid_t ds_id, object start, object count, 
                  object stride=None, **kwds):
-    """ (INT ds_id, TUPLE start, TUPLE count, TUPLE stride=None,
-            STRING byteorder=None, TUPLE compound_names=None, 
-            TUPLE complex_names=None)
+    """ (INT ds_id, TUPLE start, TUPLE count, TUPLE stride=None, **kwds)
         => NDARRAY numpy_array_out
     
+        Keywords allowed:
+            STRING byteorder=None, TUPLE compound_names=None, 
+            TUPLE complex_names=None
+
         Read a hyperslab from an existing HDF5 dataset, and return it as a
         Numpy array. Dimensions are specified by:
 
diff --git a/h5py/h5t.pyx b/h5py/h5t.pyx
index f8c09ab..ca199dd 100644
--- a/h5py/h5t.pyx
+++ b/h5py/h5t.pyx
@@ -695,9 +695,9 @@ def _validate_names(names):
     """ Common validation function for compound object field names, which must
         be tuples of strings.
     """
-    if isinstance(item, tuple):
+    if isinstance(names, tuple):
         bad = False
-        for x in names:
+        for name in names:
             if not isinstance(name, str):
                 bad = True
                 break
@@ -746,6 +746,8 @@ def py_h5t_to_dtype(hid_t type_id, object byteorder=None,
     cdef int i
     cdef hid_t tmp_id
 
+    typeobj = None
+
     # Argument validation and defaults
 
     if byteorder is not None: 
@@ -786,51 +788,51 @@ def py_h5t_to_dtype(hid_t type_id, object byteorder=None,
         typeobj = dtype("|V" + str(size))
 
     elif classtype == H5T_COMPOUND:
-        # 1. Read field names and put them in a list
-        # 2. If a subset of names are requested, check to 
-        #       make sure they exist and use that list instead.
-        # 3. Alternately, if the type only has two fields, 
-        #       see if we should convert it as a complex number.
+
 
         nfields = get_nmembers(type_id)
-        field_list = []
+        field_names = []
+        field_types = []
 
+        # First step: read field names and their Numpy dtypes into 
+        # two separate arrays.
         for i from 0 <= i < nfields:
             tmp_id = get_member_type(type_id, i)
             try:
                 tmp_name = get_member_name(type_id, i)
-                field_list.append( (
-                                    tmp_name, 
-                                    py_h5t_to_dtype(tmp_id, byteorder,
-                                      None, complex_names)
-                                 ) )
+                field_names.append(tmp_name)
+                field_types.append(py_h5t_to_dtype(tmp_id, byteorder,
+                                        None, complex_names))
             finally:
                 H5Tclose(tmp_id)
 
+
+        # 1. Only a particular (ordered) subset is requested
         if compound_names is not None:
+            dt_list = []
             # Validate the requested fields
-            requested = set(compound_names)
-            present = set(field_list)
-            missing = requested - present
-            if len(missing) > 0:
-                raise ValueError("The following fields are not present in the given compound type:\n" + 
-                                 ", ".join(missing) )
-            field_list = compound_names
+            for name in compound_names:
+                try:
+                    idx = field_names.index(name)
+                except ValueError:
+                    raise ValueError('Field "%s" not found. Valid fields are:\n%s' % (name, ", ".join(field_names)))
+                dt_list.append( (name, field_types[idx]) )
             
-        elif len(field_list) == 2:
-            # Special case: complex type.  Note this changes "field_list" to a string.
-            if complex_names is not None and complex_names != () and \
-               field_list[0][1].str     == field_list[1][1].str and \
-               field_list[0][1].str[1]  == 'f'                  and \
-               field_list[0][0].lower() == complex_names[0]     and \
-               field_list[1][0].lower() == complex_names[1]:
+            typeobj = dtype(dt_list)
+
+        # 2. Check if it should be converted to a complex number
+        elif len(field_names) == 2 and tuple(field_names) == complex_names and \
+          field_types[0] == field_types[1] and field_types[0].kind == 'f':
 
-                    bstring = field_list[0][1].str
-                    blen = int(bstring[2:])
-                    nstring = bstring[0] + "c" + str(2*blen)
-                    field_list = nstring
+            bstring = field_types[0].str
+            blen = int(bstring[2:])
+            nstring = bstring[0] + "c" + str(2*blen)
 
-        typeobj = dtype(field_list)
+            typeobj = dtype(nstring)
+
+        # 3. Read all fields of the compound type, in HDF5 order.
+        else:
+            typeobj = dtype(zip(field_names, field_types))
 
     elif classtype == H5T_ENUM:
         # Enumerated types are treated as their parent type, with an additional
@@ -851,12 +853,12 @@ def py_h5t_to_dtype(hid_t type_id, object byteorder=None,
             H5Tclose(super_tid)
         shape = get_array_dims(type_id)
         typeobj = dtype( (base_dtype, shape) )
-
     else:
         raise ConversionError('Unsupported datatype class "%s"' % CLASS_MAPPER[classtype])
 
     if byteorder is not None:
         return typeobj.newbyteorder(byteorder)
+
     return typeobj
 
 def py_dtype_to_h5t(numpy.dtype dtype_in, object complex_names=None):
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 955b38b..54ae1f2 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -66,47 +66,46 @@ class Dataset(object):
 
         A Dataset object is designed to permit "Numpy-like" access to the 
         underlying HDF5 dataset.  It supports array-style indexing, which 
-        returns Numpy ndarrays.  For the case of arrays containing compound
-        data, it also allows a "compound mask" to be set, allowing you to 
-        only extract elements which match names in the mask.  The underlying
-        array can also be written to using the indexing syntax.
+        returns Numpy ndarrays.  "Extended-recarray" slicing is also possible;
+        specify the names of fields you want along with the numerical slices.
+        The underlying array can also be written to using the indexing syntax.
 
         HDF5 attribute access is provided through the property obj.attrs.  See
         the AttributeManager class documentation for more information.
 
         Read-only properties:
-        names       Compound fields defined in this object (tuple or None)
-        names_mask  Current mask controlling compound access (tuple or None)
         shape       Tuple containing array dimensions
         dtype       A Numpy dtype representing the array data-type.
 
         Writable properties:
-        force_native    
-            Returned data will be automatically converted
-            to the native platform byte order
-
-        force_string_length     
-            Variable-length strings will be converted to
-            Numpy strings of this length.
+        cnames:     HDF5 compound names used for complex I/O
     """
 
     # --- Properties (Dataset) ------------------------------------------------
 
-    def _set_native(self, val):
-        self._force_native = bool(val) if val is not None else None
+    #: Numpy-style shape tuple giving dataset dimensions
+    shape = property(lambda self: h5d.py_shape(self.id))
 
-    def _set_string_length(self, val):
-        self._string_length = val
+    #: Numpy dtype representing the datatype
+    dtype = property(lambda self: h5d.py_dtype(self.id))
 
-    names_mask = property(lambda self: self._fields)
-    names = property(lambda self: self.dtype.names)
+    def _set_byteorder(self, order):
+        if order is not None:
+            h5t._validate_byteorder(order)
+        self._byteorder = order
+    
+    #: Set to <, > or = to coerce I/0 to a particular byteorder, or None to use default.
+    byteorder = property(lambda self: self._byteorder, _set_byteorder)
 
-    shape = property(lambda self: h5d.py_shape(self.id))
-    dtype = property(lambda self: h5d.py_dtype(self.id))
+    def _set_cnames(self, names):
+        if names is not None:
+            h5t._validate_complex(names)
+        self._cnames = names
 
-    force_native = property(lambda self: self._force_native, _set_native)
-    force_string_length = property(lambda self: self._string_length, _set_string_length)
+    #: Set to (realname, imgname) to control I/O of Python complex numbers.
+    cnames = property(lambda self: self._cnames, _set_cnames)
 
+    #: Provides access to HDF5 attributes. See AttributeManager docstring.
     attrs = property(lambda self: self._attrs)
 
     # --- Public interface (Dataset) ------------------------------------------
@@ -157,49 +156,41 @@ class Dataset(object):
                 raise ValueError('You cannot specify keywords when opening a dataset.')
             self.id = h5d.open(group.id, name)
 
-        self._fields = None
         self._attrs = AttributeManager(self)
-        self.force_native = None
-        self.force_string_length = None
+        self._byteorder = None
+        self._cnames = None
 
-    def __getitem__(self, *args):
-        """ Read a slice from the underlying HDF5 array.  Currently only
-            numerical slices are supported; for recarray-style access consider
-            using set_names_mask().
-        """
-        if any( [isinstance(x, basestring) for x in args] ):
-            raise TypeError("Slices must be numbers; recarray-style indexing is not yet supported.")
+    def __getitem__(self, args):
+        """ Read a slice from the underlying HDF5 array.  Takes slices and
+            recarray-style field names (more than one is allowed!) in any
+            order.  Examples:
+
+            ds[0,0:15,:] => (1 x 14 x <all) slice on 3-dimensional dataset.
 
-        start, count, stride = _slices_to_tuples(args)
+            ds[:] => All elements, regardless of dimension.
 
-        return h5d.py_read_slab(self.id, start, count, stride, 
-                                compound_fields=self.names_mask,
-                                force_native=self.force_native)
+            ds[0:3, 1:4, "a", "b"] => (3 x 3) slice, only including compound
+                                      elements "a" and "b".
+        """
+        start, count, stride, names = slicer(self.shape, args)
+
+        return h5d.py_read_slab(self.id, start, count, stride,
+                                byteorder = self._byteorder, 
+                                compound_names = names,
+                                complex_names = self._cnames)
 
-    def __setitem__(self, *args):
+    def __setitem__(self, args):
         """ Write to the underlying array from an existing Numpy array.  The
             shape of the Numpy array must match the shape of the selection,
             and the Numpy array's datatype must be convertible to the HDF5
             array's datatype.
         """
-        start, count, stride = _slices_to_tuples(args[0:len(args)-1])
-        h5d.py_write_slab(self.id, args[-1], start, stride)
-
-    def set_names_mask(self, iterable=None):
-        """ Determine which fields of a compound datatype will be read. Only 
-            compound fields whose names match those provided by the given 
-            iterable will be read.  Any given names which do not exist in the
-            HDF5 compound type are simply ignored.
+        val = args[-1]
+        start, count, stride, names = slicer(val.shape, args[:-1])
+        if names is not None:
+            raise ValueError("Field names are not allowed for write.")
 
-            If the argument is a single string, it will be correctly processed
-            (i.e. not exploded).
-        """
-        if iterable == None:
-            self._fields = None
-        else:
-            if isinstance(iterable, basestring):
-                iterable = (iterable,)    # not 'i','t','e','r','a','b','l','e'
-            self._fields = tuple(iterable)
+        h5d.py_write_slab(self.id, args[-1], start, stride)
 
     def close(self):
         """ Force the HDF5 library to close and free this object.  You 
@@ -814,11 +805,13 @@ def slicer(shape, args):
 
     nslices = len(count)
 
-    # Check for lone ":"
+    # Check for lone ":" or no numeric slices, which in Numpy means the whole thing.
     if nslices == len(rawslices) == 1:
         slice_ = rawslices[0]
         if slice_.stop == None and slice_.step == None and slice_.stop == None:
             return ((0,)*rank, shape, (1,)*rank, names)
+    if nslices == 0:
+            return ((0,)*rank, shape, (1,)*rank, names)
 
     if nslices != rank:
         raise ValueError("Not enough slices (%d); dataset is rank-%d" % (nslices, rank))
diff --git a/h5py/proxy.py b/h5py/proxy.py
index ebaad34..3019d91 100644
--- a/h5py/proxy.py
+++ b/h5py/proxy.py
@@ -16,7 +16,7 @@ class DatasetProxy(object):
     def begin_proxy(self):
 
 
-        if self.proxy_id is not None:
+        if self.proxy_active():
             raise ProxyError("Already proxying.")
 
         fid = 0
@@ -53,9 +53,14 @@ class DatasetProxy(object):
         self._proxy_space = space_id
         self._proxy_id = proxy_id
 
+    def proxy_active(self):
+        if hasattr(self, '_proxy_id') and self._proxy_id is not None:
+            return True
+        return False
+
     def end_proxy(self):
 
-        if not hasattr(self, '_proxy_id') or self._proxy_id is None:
+        if not self.proxy_active():
             raise ProxyError("Not proxying.")
 
         h5s.close(self._proxy_space)
@@ -67,11 +72,11 @@ class DatasetProxy(object):
     def _read(self, start, count, stride=None, **kwds):
         """ Dataset read access.  In direct mode, simply reads data from 
             self.id.  In proxy mode, reads unmodified data from self.id and
-            modified sections from self._proxy_id)
+            modified sections from self._proxy_id.
 
             Don't call this directly.
         """
-        if self.proxy_id is None:
+        if not self.proxy_active():
             return h5d.py_read_slab(self.id, start, count, stride, **kwds)
 
         else:
@@ -119,7 +124,7 @@ class DatasetProxy(object):
 
     def _write(self, arr, start, stride=None):
         
-        if self.proxy_id is None:
+        if not self.proxy_active():
             h5d.py_write_slab(self.id, arr, start, stride)
         
         else:
@@ -132,15 +137,17 @@ class DatasetProxy(object):
 
     def commit(self):
 
-        h5d.py_patch(self._proxy_id, self.id, self._proxy_space)
-        h5s.select_none(self._proxy_space)
+        if self.proxy_active():
+            h5d.py_patch(self._proxy_id, self.id, self._proxy_space)
+            h5s.select_none(self._proxy_space)
 
     def rollback(self):
 
-        # Proxy file doesn't shrink, but space will be re-used.
-        # Worst case == proxy file is size of the original dataset, sans
-        # compression
-        h5s.select_none(self._proxy_space)
+        if self.proxy_active():
+            # Proxy file doesn't shrink, but space will be re-used.
+            # Worst case == proxy file is size of the original dataset, sans
+            # compression
+            h5s.select_none(self._proxy_space)
             
         
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git