[h5py] 333/455: Add soft links, external links, references and region references

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:47 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit be835b35def2779245df2ce96798d17bb7b942f1
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Sat Dec 12 22:19:57 2009 +0000

    Add soft links, external links, references and region references
---
 h5py/__init__.py   |  21 +++--
 h5py/h5r.pxd       |   4 +
 h5py/h5r.pyx       |  37 ++++++---
 h5py/h5t.pyx       | 228 ++++++++++++++++++++++++++++++-----------------------
 h5py/highlevel.py  | 180 ++++++++++++++++++++++++++++++------------
 h5py/selections.py | 132 ++++++++++++++++++++++++-------
 6 files changed, 410 insertions(+), 192 deletions(-)

diff --git a/h5py/__init__.py b/h5py/__init__.py
index ea245c4..bf578b5 100644
--- a/h5py/__init__.py
+++ b/h5py/__init__.py
@@ -23,6 +23,8 @@ __doc__ = \
 try:
     import h5
 except ImportError, e:
+    # Many people try to load h5py after compiling, which fails in the
+    # presence of the source directory
     import os.path as op
     if op.exists('setup.py'):
         raise ImportError('Import error:\n"%s"\n\nBe sure to exit source directory before importing h5py' % e)
@@ -32,20 +34,27 @@ except ImportError, e:
 import h5, h5a, h5d, h5f, h5fd, h5g, h5l, h5o, h5i, h5p, h5r, h5s, h5t, h5z
 import highlevel, filters, selections, version
 
-# Re-export high-level interface to package level
-from highlevel import File, Group, Dataset, Datatype, AttributeManager, \
-                      is_hdf5, \
-                      new_vlen, new_enum, get_vlen, get_enum
-
 from h5 import get_config
 from h5e import H5Error
 
+from highlevel import File, Group, Dataset, Datatype, AttributeManager, is_hdf5
+
+# New way to handle special types
+from h5t import special_dtype, check_dtype
+
+# Deprecated way to handle special types
+# These are going away in 1.4
+from h5t import py_new_vlen as new_vlen
+from h5t import py_get_vlen as get_vlen
+from h5t import py_new_enum as new_enum
+from h5t import py_get_enum as get_enum
 
 __doc__ = __doc__ % (version.version, version.hdf5_version, version.api_version)
 
 __all__ = ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5r',
            'h5o', 'h5l', 'h5z', 'h5i', 'version', 'File', 'Group', 'Dataset',
-           'Datatype', 'AttributeManager', 'H5Error', 'get_config', 'is_hdf5']
+           'Datatype', 'AttributeManager', 'H5Error', 'get_config', 'is_hdf5',
+           'special_dtype', 'check_dtype']
 
 try:
     try:
diff --git a/h5py/h5r.pxd b/h5py/h5r.pxd
index bf9fd89..40c5ac1 100644
--- a/h5py/h5r.pxd
+++ b/h5py/h5r.pxd
@@ -21,3 +21,7 @@ cdef class Reference:
     cdef ref_u ref
     cdef readonly int typecode
 
+cdef class RegionReference(Reference):
+
+    pass
+
diff --git a/h5py/h5r.pyx b/h5py/h5r.pyx
index 5c124cb..cdc28e7 100644
--- a/h5py/h5r.pyx
+++ b/h5py/h5r.pyx
@@ -21,6 +21,8 @@ from h5 cimport init_hdf5, ObjectID
 from h5i cimport wrap_identifier
 from h5s cimport SpaceID
 
+from python cimport PyString_FromStringAndSize
+
 # Initialization
 init_hdf5()
 
@@ -50,7 +52,12 @@ def create(ObjectID loc not None, char* name, int ref_type, SpaceID space=None):
     """
     cdef hid_t space_id
     cdef Reference ref
-    ref = Reference()
+    if ref_type == H5R_OBJECT:
+        ref = Reference()
+    elif ref_type == H5R_DATASET_REGION:
+        ref = RegionReference()
+    else:
+        raise ValueError("Unknown reference typecode")
     if space is None:
         space_id = -1
     else:
@@ -70,14 +77,14 @@ def dereference(Reference ref not None, ObjectID id not None):
     in the file) must also be provided.  Returns None if the reference
     is zero-filled.
 
-    The reference type may be either OBJECT or DATASET_REGION.
+    The reference may be either Reference or RegionReference.
     """
     if not ref:
         return None
     return wrap_identifier(H5Rdereference(id.id, <H5R_type_t>ref.typecode, &ref.ref))
 
 
-def get_region(Reference ref not None, ObjectID id not None):
+def get_region(RegionReference ref not None, ObjectID id not None):
     """(Reference ref, ObjectID id) => SpaceID or None
 
     Retrieve the dataspace selection pointed to by the reference.
@@ -86,8 +93,8 @@ def get_region(Reference ref not None, ObjectID id not None):
     object in the file (including the dataset itself) must also be
     provided.
 
-    The reference object must be of type DATASET_REGION.  If it's not, or
-    if the reference is zero-filled, returns None.
+    The reference object must be a RegionReference.  If it is zero-filled,
+    returns None.
     """
     if ref.typecode != H5R_DATASET_REGION or not ref:
         return None
@@ -97,8 +104,8 @@ def get_region(Reference ref not None, ObjectID id not None):
 def get_obj_type(Reference ref not None, ObjectID id not None):
     """(Reference ref, ObjectID id) => INT obj_code or None
 
-    Determine what type of object the eference points to.  The
-    reference may be either type OBJECT or DATASET_REGION.  The file
+    Determine what type of object the reference points to.  The
+    reference may be a Reference or RegionReference.  The file
     identifier or the identifier of any object in the file must also
     be provided.
 
@@ -155,14 +162,24 @@ cdef class Reference:
         return False
 
     def __repr__(self):
-        empty_str = "non-empty" if self else "empty"
         if self.typecode == H5R_OBJECT:
-            return "<HDF5 object reference (%s)>" % empty_str
+            desc_str = PyString_FromStringAndSize(<char*>&self.ref, sizeof(hobj_ref_t))
+            return "<HDF5 object reference (%r)>" % desc_str
         elif self.typecode == H5R_DATASET_REGION:
-            return "<HDF5 dataset region reference (%s)>" % empty_str
+            desc_str = PyString_FromStringAndSize(<char*>&self.ref, sizeof(hdset_reg_ref_t))
+            return "<HDF5 dataset region reference (%r)>" % desc_str
         return "<Invalid HDF5 reference>"
 
+cdef class RegionReference(Reference):
+
+    """
+        Opaque representation of an HDF5 region reference.
+
+        This is a subclass of Reference which exists mainly for programming
+        convenience.
+    """
 
+    pass
 
 
 
diff --git a/h5py/h5t.pyx b/h5py/h5t.pyx
index 5f76522..ddef792 100644
--- a/h5py/h5t.pyx
+++ b/h5py/h5t.pyx
@@ -25,6 +25,7 @@ from h5 cimport init_hdf5, H5PYConfig, get_config
 from h5p cimport PropID, pdefault
 from numpy cimport dtype, ndarray
 from python_string cimport PyString_FromStringAndSize
+from h5r cimport Reference, RegionReference
 
 from utils cimport  emalloc, efree, \
                     require_tuple, convert_dims, convert_tuple
@@ -619,7 +620,7 @@ cdef class TypeStringID(TypeID):
     cdef object py_dtype(self):
         # Numpy translation function for string types
         if self.is_variable_str():
-            return py_new_vlen(str)
+            return special_dtype(vlen=str)
 
         return dtype("|S" + str(self.get_size()))
 
@@ -652,9 +653,9 @@ cdef class TypeReferenceID(TypeID):
     
     cdef object py_dtype(self):
         if H5Tequal(self.id, H5T_STD_REF_OBJ):
-            return py_new_ref(H5R_OBJECT)
+            return special_dtype(ref=Reference)
         elif H5Tequal(self.id, H5T_STD_REF_DSETREG):
-            return py_new_ref(H5R_DATASET_REGION)
+            return special_dtype(ref=RegionReference)
         else:
             raise TypeError("Unknown reference type")
 
@@ -1147,7 +1148,7 @@ cdef class TypeEnumID(TypeCompositeID):
         if members == ref:
             return dtype('bool')
     
-        return py_new_enum(basetype.py_dtype(), members)
+        return special_dtype(enum=(basetype.py_dtype(), members))
 
 
 # === Translation from NumPy dtypes to HDF5 type objects ======================
@@ -1317,10 +1318,10 @@ cdef TypeStringID _c_vlen_str(object basetype):
     H5Tset_size(tid, H5T_VARIABLE)
     return TypeStringID(tid)
 
-cdef TypeReferenceID _c_ref(int typecode):
-    if typecode == H5R_OBJECT:
+cdef TypeReferenceID _c_ref(object refclass):
+    if refclass is Reference:
         return STD_REF_OBJ
-    elif typecode == H5R_DATASET_REGION:
+    elif refclass is RegionReference:
         return STD_REF_DSETREG
     raise TypeError("Unrecognized reference code")
 
@@ -1353,7 +1354,7 @@ cpdef TypeID py_create(object dtype_in, bint logical=0):
 
         if logical:
             # Check for an enumeration hint
-            enum_vals = py_get_enum(dt)
+            enum_vals = check_dtype(enum=dt)
             if enum_vals is not None:
                 return _c_enum(dt, enum_vals)
 
@@ -1386,13 +1387,13 @@ cpdef TypeID py_create(object dtype_in, bint logical=0):
     elif kind == c'O':
 
         if logical:
-            vlen = py_get_vlen(dt)
+            vlen = check_dtype(vlen=dt)
             if vlen is not None:
                 return _c_vlen_str(vlen)
 
-            refcode = py_get_ref(dt)
-            if refcode is not None:
-                    return _c_ref(refcode)
+            refclass = check_dtype(ref=dt)
+            if refclass is not None:
+                    return _c_ref(refclass)
 
             raise TypeError("Object dtype %r has no native HDF5 equivalent" % (dt,))
 
@@ -1401,101 +1402,97 @@ cpdef TypeID py_create(object dtype_in, bint logical=0):
     # Unrecognized
     else:
         raise TypeError("No conversion path for dtype: %s" % repr(dt))
-
-cpdef dtype py_new_enum(object dt_in, dict enum_vals):
-    """ (DTYPE dt_in, DICT enum_vals) => DTYPE
-
-    Create a new NumPy integer dtype, which contains "hint" metadata for
-    an enum. Only dtypes of kind 'i' or 'u' are allowed.  The enum_vals
-    dict must consist only of string keys and integer values.
-    """
-
-    cdef dtype dt = dtype(dt_in)
-    if dt.kind != 'i' and dt.kind != 'u':
-        raise TypeError("Only integer types can be used as enums")
-
-    return dtype((dt, [( ({'vals': enum_vals},'enum'), dt )] ))
-    
-cpdef dict py_get_enum(object dt):
-    """ (DTYPE dt_in) => DICT
-
-    Determine the enum values associated with a "hinted" NumPy integer type.
-    Returns None if the type does not contain hints, or is of the wrong kind.
-    """
-    
-    if dt.kind != 'i' and dt.kind != 'u':
-        return None
-
-    if dt.fields is not None and 'enum' in dt.fields:
-        tpl = dt.fields['enum']
-        if len(tpl) == 3:
-            info_dict = tpl[2]
-            if 'vals' in info_dict:
-                return info_dict['vals']
-
-    return None
 
-cpdef dtype py_new_ref(int typecode):
-    """ (INT typecode) => DTYPE
+def special_dtype(**kwds):
+    """ Create a new h5py "special" type.  Only one keyword may be given.
 
-    Create a NumPy object type representing an HDF5 reference.  The typecode
-    should be one of:
-    
-    - h5r.OBJECT
-    - h5r.DATASET_REGION
-    """
-    return dtype(('O', [( ({'type': typecode},'hdf5ref'), 'O' )] ))
+    Legal keywords are:
+
+    vlen = basetype
+        Base type for HDF5 variable-length datatype.  Currently only the
+        builtin string class (str) is allowed.
+        Example: special_dtype( vlen=str )
 
-cpdef object py_get_ref(object dt_in):
-    """ (DTYPE dt_in) => INT typecode or None
+    enum = (basetype, values_dict)
+        Create a NumPy representation of an HDF5 enumerated type.  Provide
+        a 2-tuple containing an (integer) base dtype and a dict mapping
+        string names to integer values.
 
-    Determine what kind of reference this dtype represents.  Returns None
-    if it's not a reference.
+    ref = Reference | RegionReference
+        Create a NumPy representation of an HDF5 object or region reference
+        type.
     """
-    cdef dtype dt = dtype(dt_in)
+    
+    if len(kwds) != 1:
+        raise TypeError("Exactly one keyword may be provided")
 
-    if dt.kind != 'O':
-        return None
-
-    if dt.fields is not None and 'hdf5ref' in dt.fields:
-        tpl = dt.fields['hdf5ref']
-        if len(tpl) == 3:
-            hint_dict = tpl[2]
-            if 'type' in hint_dict:
-                return hint_dict['type']
-
-    return None
-
-cpdef dtype py_new_vlen(object kind):
-    """ (OBJECT kind) => DTYPE
-
-    Create a new NumPy object dtype, which contains "hint" metadata
-    identifying the proper HDF5 vlen base type.  For now, only the native
-    Python string object (str) is supported.
-    """
-    if kind is not str:
-        raise NotImplementedError("Only string vlens are currently supported")
-
-    return dtype(('O', [( ({'type': kind},'vlen'), 'O' )] ))
-
-cpdef object py_get_vlen(object dt_in):
-    """ (OBJECT dt_in) => TYPE
-
-    Determine the vlen "hint" type associated with a NumPy object type,
-    or None if the dtype does not contain a hint or is not of kind "O".
-    """
-    cdef dtype dt = dtype(dt_in)
+    name, val = kwds.popitem()
+
+    if name == 'vlen':
+        if val is not str:
+            raise NotImplementedError("Only string vlens are currently supported")
 
-    if dt.kind != 'O':
-        return None
+        return dtype(('O', [( ({'type': val},'vlen'), 'O' )] ))
+
+    if name == 'enum':
+
+        try:
+            dt, enum_vals = val
+        except TypeError:
+            raise TypeError("Enums must be created from a 2-tuple (basetype, values_dict)")
+
+        dt = dtype(dt)
+        if dt.kind not in "iu":
+            raise TypeError("Only integer types can be used as enums")
 
-    if dt.fields is not None and 'vlen' in dt.fields:
-        tpl = dt.fields['vlen']
+        return dtype((dt, [( ({'vals': enum_vals},'enum'), dt )] ))
+
+    if name == 'ref':
+
+        if val not in (Reference, RegionReference):
+            raise ValueError("Ref class must be Reference or RegionReference")
+
+        return dtype(('O', [( ({'type': val},'hdf5ref'), 'O' )] ))
+
+    raise TypeError('Unknown special type "%s"' % name)
+   
+def check_dtype(**kwds):
+    """ Check a dtype for h5py special type "hint" information.  Only one
+    keyword may be given.
+
+    vlen = dtype
+        If the dtype represents an HDF5 vlen, returns the Python base class.
+        Currently only builting string vlens (str) are supported.  Returns
+        None if the dtype does not represent an HDF5 vlen.
+
+    enum = dtype
+        If the dtype represents an HDF5 enumerated type, returns the dictionary
+        mapping string names to integer values.  Returns None if the dtype does
+        not represent an HDF5 enumerated type.
+
+    ref = dtype
+        If the dtype represents an HDF5 reference type, returns the reference
+        class (either Reference or RegionReference).  Returns None if the dtype
+        does not represent an HDF5 reference type.
+    """
+
+    if len(kwds) != 1:
+        raise TypeError("Exactly one keyword may be provided")
+
+    name, dt = kwds.popitem()
+
+    if name not in ('vlen', 'enum', 'ref'):
+        raise TypeError('Unknown special type "%s"' % name)
+
+    hintkey = 'type' if name is not 'enum' else 'vals'
+
+    if dt.fields is not None and name in dt.fields:
+        tpl = dt.fields[name]
         if len(tpl) == 3:
             hint_dict = tpl[2]
-            if 'type' in hint_dict:
-                return hint_dict['type']
-
+            if hintkey in hint_dict:
+                return hint_dict[hintkey]
+
     return None
 
 def convert(TypeID src not None, TypeID dst not None, size_t n,
@@ -1538,5 +1535,42 @@ def find(TypeID src not None, TypeID dst not None):
         return (data[0].need_bkg,)
     except:
         return None
+
+# ============================================================================
+# Deprecated functions
+
+import warnings
+
+cpdef dtype py_new_enum(object dt_in, dict enum_vals):
+    """ (DTYPE dt_in, DICT enum_vals) => DTYPE
+
+    Deprecated; use special_dtype() instead.
+    """
+    warnings.warn("Deprecated; use special_dtype(enum=(dtype, values)) instead", DeprecationWarning)
+    return special_dtype(enum = (dt_in, enum_vals))
+
+cpdef dict py_get_enum(object dt):
+    """ (DTYPE dt_in) => DICT
+
+    Deprecated; use check_dtype() instead.
+    """
+    warnings.warn("Deprecated; use check_dtype(enum=dtype) instead", DeprecationWarning)
+    return check_dtype(enum=dt)
+
+cpdef dtype py_new_vlen(object kind):
+    """ (OBJECT kind) => DTYPE
+
+    Deprecated; use special_dtype() instead.
+    """
+    warnings.warn("Deprecated; use special_dtype(vlen=basetype) instead", DeprecationWarning)
+    return special_dtype(vlen=kind)
+
+cpdef object py_get_vlen(object dt_in):
+    """ (OBJECT dt_in) => TYPE
+
+    Deprecated; use check_dtype() instead.
+    """
+    warnings.warn("Deprecated; use check_dtype(vlen=dtype) instead", DeprecationWarning)
+    return check_dtype(vlen=dt_in)
 
 
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 5c38253..69670e8 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -118,21 +118,10 @@ class HLObject(object):
     def _lock(self):
         return self.file._fidlock
 
-    def ref(self, path=None, selection=None):
-        """Create an object reference
-        """
-        return h5r.create(self.id, '.' if path is None else path, h5r.OBJECT)
-
-    def deref(self, ref):
-        """Dereference an object reference
-        """
-        kind = h5r.get_obj_type(ref, self.id)
-        if kind == h5g.GROUP:
-            return Group(self, None, _rawid=h5r.dereference(ref, self.id))
-        elif kind == h5g.DATASET:
-            return Dataset(self, None, _rawid=h5r.dereference(ref, self.id))
-        else:
-            raise TypeError("Unrecognized object type")
+    @property
+    def ref(self):
+        """ An (opaque) HDF5 reference to this object """
+        return h5r.create(self.id, '.', h5r.OBJECT)
         
     def __init__(self, parent):
         if not isinstance(self, File):
@@ -190,7 +179,7 @@ class _DictCompat(object):
             for x in self:
                 yield (x, self[x])
 
-    def get(self, name, default):
+    def get(self, name, default=None):
         """ Retrieve the member, or return default if it doesn't exist """
         with self._lock:
             if name in self:
@@ -261,9 +250,12 @@ class Group(HLObject, _DictCompat):
         The action taken depends on the type of object assigned:
 
         Named HDF5 object (Dataset, Group, Datatype)
-            A hard link is created in this group which points to the
+            A hard link is created at "name" which points to the
             given object.
 
+        SoftLink or ExternalLink instance
+            Create the corresponding link.
+
         Numpy ndarray
             The array is converted to a dataset object, with default
             settings (contiguous storage, etc.).
@@ -280,6 +272,12 @@ class Group(HLObject, _DictCompat):
             if isinstance(obj, Group) or isinstance(obj, Dataset) or isinstance(obj, Datatype):
                 self.id.link(h5i.get_name(obj.id), name, link_type=h5g.LINK_HARD)
 
+            elif isinstance(obj, SoftLink):
+                self.id.link(obj.path, name, link_type=h5g.LINK_SOFT)
+    
+            elif isinstance(obj, ExternalLink):
+                self.id.links.create_external(name, obj.filename, obj.path)
+
             elif isinstance(obj, numpy.dtype):
                 htype = h5t.py_create(obj)
                 htype.commit(self.id, name)
@@ -291,6 +289,18 @@ class Group(HLObject, _DictCompat):
         """ Open an object attached to this group. 
         """
         with self._lock:
+
+            if isinstance(name, h5r.Reference):
+                if not name:
+                    raise ValueError("Empty reference")
+                kind = h5r.get_obj_type(name, self.id)
+                if kind == h5g.GROUP:
+                    return Group(self, None, _rawid=h5r.dereference(name, self.id))
+                elif kind == h5g.DATASET:
+                    return Dataset(self, None, _rawid=h5r.dereference(name, self.id))
+                else:
+                    raise ValueError("Unrecognized object type")
+
             info = h5g.get_objinfo(self.id, name)
 
             if info.type == h5g.DATASET:
@@ -410,6 +420,59 @@ class Group(HLObject, _DictCompat):
             
             return dset
 
+    def get(self, name, default=None, getclass=False, dereference=True):
+        """ Retrieve item "name", or "default" if it's not in this group.
+
+        getclass
+            If True, returns the class of object (Group, Dataset, etc.)
+            instead of the object itself.
+
+        dereference
+            If True (default), follow soft and external links and retrieve
+            the objects they point to.  If False, return SoftLink and
+            ExternalLink instances instead.
+        """
+        with self._lock:
+
+            if not name in self:
+                return default
+
+            if config.API_18:
+
+                linkinfo = self.id.get_info(name)
+
+                if dereference or linkinfo.type == h5l.TYPE_HARD:
+
+                    objinfo = h5o.get_info(self.id, name)
+                    cls = {h5o.TYPE_GROUP: Group, h5o.TYPE_DATASET: Dataset,
+                           h5o.TYPE_NAMED_DATATYPE: Datatype}.get(objinfo.type)
+                    if cls is None:
+                        raise TypeError("Unknown object type")
+
+                    return cls if getclass else cls(self, name)
+
+                else:
+                    if linkinfo.type == h5l.TYPE_SOFT:
+                        return SoftLink(self.id.get_val(name))
+                    elif linkinfo.type == h5l.TYPE_EXTERNAL:
+                        return ExternalLink(*self.id.get_val(name))
+
+                    raise TypeError("Unknown link class")
+
+            # API 1.6
+            info = h5g.get_objinfo(self.id, name, follow_link=dereference)
+
+            cls = {h5g.DATASET: Dataset, h5g.GROUP: Group,
+                   h5g.TYPE: Datatype}.get(info.type)
+
+            if cls is not None:
+                return cls if getclass else cls(self, name)
+
+            if not dereference and info.type == h5g.LINK:
+                return SoftLink(self.id.get_linkval(name))
+                    
+            raise TypeError("Unknown object type")
+
     # New 1.8.X methods
 
     def copy(self, source, dest, name=None):
@@ -709,6 +772,16 @@ class File(Group):
         except Exception:
             pass
 
+class _RegionProxy(object):
+
+    def __init__(self, dset):
+        self.id = dset.id
+
+    def __getitem__(self, args):
+        
+        selection = sel.select(self.id.shape, args, dsid=self.id)
+        return h5r.create(self.id, '.', h5r.DATASET_REGION, selection._id)
+
 class Dataset(HLObject):
 
     """ High-level interface to an HDF5 dataset.
@@ -781,6 +854,10 @@ class Dataset(HLObject):
             dims = space.get_simple_extent_dims(True)
             return tuple(x if x != h5s.UNLIMITED else None for x in dims)
 
+    @property
+    def regionref(self):
+        return self._regionproxy
+
     def __init__(self, group, name,
                     shape=None, dtype=None, data=None,
                     chunks=None, compression=None, shuffle=None,
@@ -888,6 +965,7 @@ class Dataset(HLObject):
                     self.id.write(h5s.ALL, h5s.ALL, data)
 
             self._attrs = AttributeManager(self)
+            self._regionproxy = _RegionProxy(self)
             plist = self.id.get_create_plist()
             self._filters = filters.get_filters(plist)
             if plist.get_layout() == h5d.CHUNKED:
@@ -995,7 +1073,7 @@ class Dataset(HLObject):
                 new_dtype = numpy.dtype([(name, basetype.fields[name][0]) for name in names])
 
             # Perform the dataspace selection.
-            selection = sel.select(self.shape, args)
+            selection = sel.select(self.shape, args, dsid=self.id)
 
             if selection.nselect == 0:
                 return numpy.ndarray((0,), dtype=new_dtype)
@@ -1059,7 +1137,7 @@ class Dataset(HLObject):
                 mtype = None
 
             # Perform the dataspace selection
-            selection = sel.select(self.shape, args)
+            selection = sel.select(self.shape, args, dsid=self.id)
 
             if selection.nselect == 0:
                 return
@@ -1097,13 +1175,13 @@ class Dataset(HLObject):
         if source_sel is None:
             source_sel = sel.SimpleSelection(self.shape)
         else:
-            source_sel = sel.select(self.shape, source_sel)  # for numpy.s_
+            source_sel = sel.select(self.shape, source_sel, self.id)  # for numpy.s_
         fspace = source_sel._id
 
         if dest_sel is None:
             dest_sel = sel.SimpleSelection(dest.shape)
         else:
-            dest_sel = sel.select(dest.shape, dest_sel)
+            dest_sel = sel.select(dest.shape, dest_sel, self.id)
 
         for mspace in dest_sel.broadcast(source_sel.mshape):
             self.id.read(mspace, fspace, dest)
@@ -1121,13 +1199,13 @@ class Dataset(HLObject):
         if source_sel is None:
             source_sel = sel.SimpleSelection(source.shape)
         else:
-            source_sel = sel.select(source.shape, source_sel)  # for numpy.s_
+            source_sel = sel.select(source.shape, source_sel, self.id)  # for numpy.s_
         mspace = source_sel._id
 
         if dest_sel is None:
             dest_sel = sel.SimpleSelection(self.shape)
         else:
-            dest_sel = sel.select(self.shape, dest_sel)
+            dest_sel = sel.select(self.shape, dest_sel, self.id)
 
         for fspace in dest_sel.broadcast(source_sel.mshape):
             self.id.write(mspace, fspace, source)
@@ -1324,43 +1402,45 @@ class Datatype(HLObject):
             except Exception:
                 return "<Closed HDF5 named type>"
 
+class SoftLink(object):
 
-# Re-export functions for new type infrastructure
+    """
+        Represents a symbolic ("soft") link in an HDF5 file.  The path
+        may be absolute or relative.  No checking is performed to ensure
+        that the target actually exists.
+    """
 
-def new_vlen(basetype):
-    """ Create a NumPy dtype representing a variable-length type.
+    @property
+    def path(self):
+        return self._path
 
-    Currently only the native string type (str) is allowed.
+    def __init__(self, path):
+        self._path = str(path)
 
-    The kind of the returned dtype is always "O"; metadata attached to the
-    dtype allows h5py to perform translation between HDF5 VL types and
-    native Python objects.
-    """
-    return h5t.py_new_vlen(basetype)
+    def __repr__(self):
+        return '<SoftLink to "%s">' % self.path
 
-def get_vlen(dtype):
-    """ Return the "base" type from a NumPy dtype which represents a 
-    variable-length type, or None if the type is not of variable length.
+class ExternalLink(object):
 
-    Currently only variable-length strings, created with new_vlen(), are
-    supported.
     """
-    return h5t.py_get_vlen(dtype)
-
-def new_enum(dtype, values):
-    """ Create a new enumerated type, from an integer base type and dictionary
-    of values.
-
-    The values dict should contain string keys and int/long values.
+        Represents an HDF5 external link.  Paths may be absolute or relative.
+        No checking is performed to ensure either the target or file exists.
     """
-    return h5t.py_new_enum(numpy.dtype(dtype), values)
 
-def get_enum(dtype):
-    """ Extract the values dictionary from an enumerated type, returning None
-    if the given dtype does not represent an enum.
-    """
-    return h5t.py_get_enum(dtype)
+    @property
+    def path(self):
+        return self._path
 
+    @property
+    def filename(self):
+        return self._filename
 
+    def __init__(self, filename, path):
+        if not config.API_18:
+            raise NotImplementedError("External links are only available as of HDF5 1.8")
+        self._filename = str(filename)
+        self._path = str(path)
 
+    def __repr__(self):
+        return '<ExternalLink to "%s" in file "%s"' % (self.path, self.filename)
 
diff --git a/h5py/selections.py b/h5py/selections.py
index 1080eca..c9319d7 100644
--- a/h5py/selections.py
+++ b/h5py/selections.py
@@ -16,7 +16,7 @@
 
 import numpy as np
 
-from h5py import h5s
+from h5py import h5s, h5r
 
 # Selection types for hyperslabs
 from h5py.h5s import SELECT_SET  as SET
@@ -26,25 +26,61 @@ from h5py.h5s import SELECT_XOR  as XOR
 from h5py.h5s import SELECT_NOTB as NOTB
 from h5py.h5s import SELECT_NOTA as NOTA
 
-def select(shape, args):
-    """ Automatically determine the correct selection class, perform the
-        selection, and return the selection instance.  Args may be a single
-        argument or a tuple of arguments.
+def select(shape, args, dsid):
+    """ High-level routine to generate a selection from arbitrary arguments
+    to __getitem__.  The arguments should be the following:
+
+    shape
+        Shape of the "source" dataspace.
+
+    args
+        Either a single argument or a tuple of arguments.  See below for
+        supported classes of argument.
+    
+    dsid
+        A h5py.h5d.DatasetID instance representing the source dataset.
+
+    Argument classes:
+
+    Single Selection instance
+        Returns the argument.
+
+    numpy.ndarray
+        Must be a boolean mask.  Returns a PointSelection instance.
+
+    RegionReference
+        Returns a Selection instance.
+
+    Indices, slices, ellipses only
+        Returns a SimpleSelection instance
+
+    Indices, slices, ellipses, lists or boolean index arrays
+        Returns a FancySelection instance.
     """
     if not isinstance(args, tuple):
         args = (args,)
 
+    # "Special" indexing objects
     if len(args) == 1:
+
         arg = args[0]
         if isinstance(arg, Selection):
-            if arg.shape == shape:
-                return arg
-            raise TypeError("Mismatched selection shape")
+            if arg.shape != shape:
+                raise TypeError("Mismatched selection shape")
+            return arg
+
         elif isinstance(arg, np.ndarray):
             sel = PointSelection(shape)
             sel[arg]
             return sel
 
+        elif isinstance(arg, h5r.RegionReference):
+            sid = h5r.get_region(arg, dsid)
+            if shape != sid.shape:
+                raise TypeError("Reference shape does not match dataset shape")
+                
+            return Selection(shape, spaceid=sid)
+
     for a in args:
         if not isinstance(a, slice) and a is not Ellipsis:
             try:
@@ -58,6 +94,29 @@ def select(shape, args):
     sel[args]
     return sel
 
+class _RegionProxy(object):
+
+    """
+        Thin proxy object which takes __getitem__-style index arguments and
+        produces RegionReference objects.  Example:
+
+        >>> dset = myfile['dataset']
+        >>> myref = dset.regionref[0:100,20:30]
+        >>> data = dset[myref]
+
+    """
+
+    def __init__(self, dsid):
+        """ Supply a h5py.h5d.DatasetID instance """
+        self.id = dsid
+
+    def __getitem__(self, args):
+        """ Takes arbitrary selection terms and produces a RegionReference
+        object.  Selection must be compatible with the dataset.
+        """
+        selection = select(self.id.shape, args)
+        return h5r.create(self.id, '.', h5r.DATASET_REGION, selection.id)
+
 class Selection(object):
 
     """
@@ -70,6 +129,7 @@ class Selection(object):
                              What args are allowed depends on the
                              particular subclass in use.
 
+        id (read-only) =>      h5py.h5s.SpaceID instance
         shape (read-only) =>   The shape of the dataspace.
         mshape  (read-only) => The shape of the selection region. 
                                Not guaranteed to fit within "shape", although
@@ -80,13 +140,25 @@ class Selection(object):
 
         broadcast(target_shape) => Return an iterable which yields dataspaces
                                    for read, based on target_shape.
+
+        The base class represents "unshaped" selections (1-D).
     """
 
-    def __init__(self, shape):
-        shape = tuple(shape)
-        self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape))
-        self._id.select_all()
-        self._shape = shape
+    def __init__(self, shape, spaceid=None):
+        """ Create a selection.  Shape may be None if spaceid is given. """
+        if spaceid is not None:
+            self._id = spaceid
+            self._shape = spaceid.shape
+        else:
+            shape = tuple(shape)
+            self._shape = shape
+            self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape))
+            self._id.select_all()
+
+    @property
+    def id(self):
+        """ SpaceID instance """
+        return self._id
 
     @property
     def shape(self):
@@ -98,15 +170,9 @@ class Selection(object):
         """ Number of elements currently selected """
         return self._id.get_select_npoints()
 
-class _Selection_1D(Selection):
-
-    """
-        Base class for selections which result in a 1-D shape, as with
-        NumPy indexing via boolean mask arrays.
-    """
-
     @property
     def mshape(self):
+        """ Shape of selection (always 1-D for this class) """
         return (self.nselect,)
 
     def broadcast(self, target_shape):
@@ -115,8 +181,10 @@ class _Selection_1D(Selection):
             raise TypeError("Broadcasting is not supported for point-wise selections")
         yield self._id
 
+    def __getitem__(self, args):
+        raise NotImplementedError("This class does not support indexing")
 
-class PointSelection(_Selection_1D):
+class PointSelection(Selection):
 
     """
         Represents a point-wise selection.  You can supply sequences of
@@ -168,11 +236,16 @@ class SimpleSelection(Selection):
         and integer arguments.  Can participate in broadcasting.
     """
 
-    def __init__(self, shape):
-        Selection.__init__(self, shape)
+    @property
+    def mshape(self):
+        """ Shape of current selection """
+        return self._mshape
+
+    def __init__(self, shape, *args, **kwds):
+        Selection.__init__(self, shape, *args, **kwds)
         rank = len(self.shape)
         self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank)
-        self.mshape = self.shape
+        self._mshape = self.shape
 
     def __getitem__(self, args):
 
@@ -191,7 +264,7 @@ class SimpleSelection(Selection):
 
         self._sel = (start, count, step, scalar)
 
-        self.mshape = tuple(x for x, y in zip(count, scalar) if not y)
+        self._mshape = tuple(x for x, y in zip(count, scalar) if not y)
 
         return self
 
@@ -241,7 +314,7 @@ class SimpleSelection(Selection):
                 yield sid
 
 
-class HyperSelection(_Selection_1D):
+class HyperSelection(Selection):
 
     """
         Represents multiple overlapping rectangular selections, combined
@@ -317,14 +390,15 @@ class FancySelection(Selection):
 
         Broadcasting is not supported for these selections.
     """
-    def __init__(self, shape):
-        Selection.__init__(self, shape)
-        self._mshape = shape
 
     @property
     def mshape(self):
         return self._mshape
 
+    def __init__(self, shape, *args, **kwds):
+        Selection.__init__(self, shape, *args, **kwds)
+        self._mshape = self.shape
+
     def __getitem__(self, args):
 
         if not isinstance(args, tuple):

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



More information about the debian-science-commits mailing list