[h5py] 197/455: Clean up and refactor selections

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:32 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 72d740bbdc848a0f1e4d3bcde6830d62e1621770
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Tue Jan 27 01:20:06 2009 +0000

    Clean up and refactor selections
---
 h5py/highlevel.py            |  42 +++---
 h5py/selections.py           | 343 +++++++++++++++++++++++++++----------------
 h5py/tests/test_highlevel.py |   6 +-
 3 files changed, 240 insertions(+), 151 deletions(-)

diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index ebb512d..b82d6b2 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -870,12 +870,12 @@ class Dataset(HLObject):
 
             args = args if isinstance(args, tuple) else (args,)
 
-            # 1. Sort field indices from the rest of the args.
+            # Sort field indices from the rest of the args.
             names = tuple(x for x in args if isinstance(x, str))
             args = tuple(x for x in args if not isinstance(x, str))
 
-            # 2. Create NumPy datatype for read, using only the named fields
-            #    as specified by the user.
+            # Create NumPy datatype for read, using only the named fields
+            # as specified by the user.
             basetype = self.id.dtype
             if len(names) == 0:
                 new_dtype = basetype
@@ -885,19 +885,21 @@ class Dataset(HLObject):
                         raise ValueError("Field %s does not appear in this type." % name)
                 new_dtype = numpy.dtype([(name, basetype.fields[name][0]) for name in names])
 
-            # 3. Perform the dataspace selection.
-            selection = sel.FancySelection(self.shape)
-            selection[args] = sel.SET
+            # Perform the dataspace selection.
+            selection = sel.select(self.shape, args)
 
-            # 4. Create the output array using information from the selection.
+            if selection.nselect == 0:
+                return numpy.ndarray((0,), dtype=new_dtype)
+
+            # Create the output array using information from the selection.
             arr = numpy.ndarray(selection.mshape, new_dtype, order='C')
 
-            # 5. Perfom the actual read
+            # Perfom the actual read
             mspace = h5s.create_simple(selection.mshape)
             fspace = selection._id
             self.id.read(mspace, fspace, arr)
 
-            # 6. Patch up the output for NumPy
+            # Patch up the output for NumPy
             if len(names) == 1:
                 arr = arr[names[0]]     # Single-field recarray convention
             if arr.shape == ():
@@ -914,25 +916,21 @@ class Dataset(HLObject):
 
             args = args if isinstance(args, tuple) else (args,)
 
-            # 1. Sort field indices from the slicing
+            # Sort field indices from the slicing
             names = tuple(x for x in args if isinstance(x, str))
             args = tuple(x for x in args if not isinstance(x, str))
 
-            # 2. Create new dtype (TODO)
-            if len(names) == 0:
-                pass
-            else:
-                raise NotImplementedError("Field name selections are not yet allowed for write.")
+            if len(names) != 0:
+                raise TypeError("Field name selections are not allowed for write.")
 
             # 3. Validate the input array
             val = numpy.asarray(val, order='C')
 
             # 4. Perform the dataspace selection
-            if sel.is_simple(args):
-                selection = sel.RectSelection(self.shape)
-            else:
-                selection = sel.FancySelection(self.shape)
-            selection[args]
+            selection = sel.select(self.shape, args)
+
+            if selection.nselect == 0:
+                return
 
             # 5. Broadcast scalars if necessary
             if val.shape == () and selection.mshape != ():
@@ -940,9 +938,9 @@ class Dataset(HLObject):
                 val2[...] = val
                 val = val2
             
-            # 5. Perform the write, with broadcasting
+            # 6. Perform the write, with broadcasting
             mspace = h5s.create_simple(val.shape, (h5s.UNLIMITED,)*len(val.shape))
-            for fspace in selection.shape_broadcast(val.shape):
+            for fspace in selection.broadcast(val.shape):
                 self.id.write(mspace, fspace, val)
 
     def read_direct(self, dest, source_sel=None, dest_sel=None):
diff --git a/h5py/selections.py b/h5py/selections.py
index 65a6a27..4665edb 100644
--- a/h5py/selections.py
+++ b/h5py/selections.py
@@ -14,28 +14,65 @@ from h5py.h5s import SELECT_XOR  as XOR
 from h5py.h5s import SELECT_NOTB as NOTB
 from h5py.h5s import SELECT_NOTA as NOTA
 
-def is_simple(args):
-    for arg in args:
-        if not isinstance(arg, slice):
+def select(shape, args):
+    """ Automatically determine the correct selection class, perform the
+        selection, and return the selection instance.  Args may be a single
+        argument or a tuple of arguments.
+    """
+    if not isinstance(args, tuple):
+        args = (args,)
+
+    if len(args) == 1 and isinstance(args[0], np.ndarray):
+            sel = PointSelection(shape)
+            sel[args[0]]
+            return sel
+
+    for a in args:
+        if not isinstance(a, slice) or a is not Ellipsis:
             try:
-                long(arg)
+                int(a)
             except Exception:
-                return False
-    return True
+                sel = FancySelection(shape)
+                sel[args]
+                return sel
+    
+    sel = SimpleSelection(shape)
+    sel[args]
+    return sel
 
 class Selection(object):
 
     """
-        Base class for HDF5 dataspace selections
+        Base class for HDF5 dataspace selections.  Subclasses support the
+        "selection protocol", which means they have at least the following
+        members:
+        
+        __init__(shape)   => Create a new selection on "shape"-tuple
+        __getitem__(args) => Perform a selection with the range specified.
+                             What args are allowed depends on the
+                             particular subclass in use.
+
+        shape (read-only) =>   The shape of the dataspace.
+        mshape  (read-only) => The shape of the selection region. 
+                               Not guaranteed to fit within "shape", although
+                               the total number of points is less than
+                               product(shape).
+        nselect (read-only) => Number of selected points.  Always equal to
+                               product(mshape).
+
+        broadcast(target_shape) => Return an iterable which yields dataspaces
+                                   for read, based on target_shape.
     """
 
     def __init__(self, shape):
         shape = tuple(shape)
         self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape))
+        self._id.select_all()
         self._shape = shape
 
     @property
     def shape(self):
+        """ Shape of whole dataspace """
         return self._shape
 
     @property
@@ -56,140 +93,75 @@ class Selection(object):
 
         raise TypeError("Selection invalid")
 
-    def shape_broadcast(self, shape):
-        """ Stub broadcasting method """
-        if not shape == self.shape:
-            raise TypeError("Broadcasting is only supported for simple selections")
+class _Selection_1D(Selection):
+
+    """
+        Base class for selections which result in a 1-D shape, as with
+        NumPy indexing via boolean mask arrays.
+    """
+
+    @property
+    def mshape(self):
+        return (self.nselect,)
+
+    def broadcast(self, target_shape):
+        """ Get an iterable for broadcasting """
+        if np.product(target_shape) != self.nselect:
+            raise TypeError("Broadcasting is not supported for point-wise selections")
+
         yield self._id
 
-class PointSelection(Selection):
+class PointSelection(_Selection_1D):
 
     """
-        Represents a point-wise selection.
+        Represents a point-wise selection.  You can supply sequences of
+        points to the three methods append(), prepend() and set(), or a
+        single boolean array to __getitem__.
     """
 
     def _perform_selection(self, points, op):
 
-        points = np.asarray(points, order='C')
+        points = np.asarray(points, order='C', dtype='u8')
         if len(points.shape) == 1:
             points.shape = (1,points.shape[0])
 
         if self._id.get_select_type() != h5s.SEL_POINTS:
             op = h5s.SELECT_SET
 
-        self._id.select_elements(points, op)
+        if len(points) == 0:
+            self._id.select_none()
+        else:
+            self._id.select_elements(points, op)
+
+    def __getitem__(self, arg):
+        """ Perform point-wise selection from a NumPy boolean array """
+        if not (isinstance(arg, np.ndarray) and arg.dtype.kind == 'b'):
+            raise TypeError("PointSelection __getitem__ only works with bool arrays")
+        if not arg.shape == self.shape:
+            raise TypeError("Boolean indexing array has incompatible shape")
+
+        points = np.transpose(arg.nonzero())
+        self.set(points)
+        return self
 
     def append(self, points):
+        """ Add the sequence of points to the end of the current selection """
         self._perform_selection(points, h5s.SELECT_APPEND)
 
     def prepend(self, points):
+        """ Add the sequence of points to the beginning of the current selection """
         self._perform_selection(points, h5s.SELECT_PREPEND)
 
     def set(self, points):
+        """ Replace the current selection with the given sequence of points"""
         self._perform_selection(points, h5s.SELECT_SET)
 
-
-class RectSelection(Selection):
-
-    """ A single "rectangular" (regular) selection composed of only slices
-        and integer arguments.  Can participate in broadcasting.
-    """
-
-    def __init__(self, *args, **kwds):
-        Selection.__init__(self, *args, **kwds)
-        rank = len(self.shape)
-        self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank)
-        self.mshape = self.shape
-
-    def __getitem__(self, args):
-        if not isinstance(args, tuple):
-            args = (args,)
-  
-        start, count, step, scalar = self._handle_args(args)
-
-        self._id.select_hyperslab(start, count, step)
-
-        self._sel = (start, count, step, scalar)
-
-        self.mshape = tuple(x for x, y in zip(count, scalar) if not y)
-
-        return self._id
-
-
-    def shape_broadcast(self, target_shape):
-        """ Return an iterator over target dataspaces for broadcasting """
-
-        # count = (10,10,10)
-        # cshape = (1,1,5)
-
-        start, count, step, scalar = self._sel
-
-        rank = len(count)
-        target = list(target_shape)
-
-        tshape = []
-        for idx in xrange(1,rank+1):
-            if len(target) == 0 or scalar[-idx]:     # Skip scalar axes
-                tshape.append(1)
-            else:
-                t = target.pop()
-                if count[-idx] == t or t == 1:
-                    tshape.append(t)
-                else:
-                    raise TypeError("Can't broadcast %s -> %s [%s,%s,%s] %s\n%s" % (target_shape, count, count[-idx], t, -idx, tshape, self._sel))
-        tshape.reverse()
-        tshape = tuple(tshape)
-
-        chunks = tuple(x/y for x, y in zip(count, tshape))
-
-        #print tshape, chunks
-
-        nchunks = np.product(chunks)
-
-        sid = self._id.copy()
-        sid.select_hyperslab((0,)*rank, tshape, step)
-
-        for idx in xrange(nchunks):
-            offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start))
-            sid.offset_simple(offset)
-            yield sid
-
-    def _handle_args(self, args):
-        """ Process a "simple" selection tuple, containing only slices and
-            integer objects.  Return is a 3-tuple with start, count, step tuples.
-
-            If "args" is shorter than "shape", the remaining axes are fully
-            selected.
-        """
-        args = _broadcast(args, len(self.shape))
-
-        start = []
-        count = []
-        step  = []
-        scalar = []
-
-        for arg, length in zip(args, self.shape):
-            if isinstance(arg, slice):
-                x,y,z = _translate_slice(arg, length)
-                s = False
-            else:
-                try:
-                    x,y,z = _translate_int(int(arg), length)
-                    s = True
-                except TypeError:
-                    raise TypeError('Illegal index "%s" (must be a slice or number)' % arg)
-            start.append(x)
-            count.append(y)
-            step.append(z)
-            scalar.append(s)
-
-        return tuple(start), tuple(count), tuple(step), tuple(scalar)
-
-class HyperSelection(RectSelection):
+class HyperSelection(_Selection_1D):
 
     """
         Represents multiple overlapping rectangular selections, combined
-        with set-like operators.
+        with set-like operators.  Result is a 1D shape, as with boolean array
+        selection.
 
         When created, the entire dataspace is selected.  To make
         adjustments to the selection, use the standard NumPy slicing
@@ -215,6 +187,10 @@ class HyperSelection(RectSelection):
   
     """
 
+    def __getitem__(self, args):
+        self[args] = SET
+        return self
+
     def __setitem__(self, args, op):
 
         if not isinstance(args, tuple):
@@ -244,26 +220,104 @@ class HyperSelection(RectSelection):
         self._id.select_hyperslab(start, count, step, op=op)
 
 
+class SimpleSelection(Selection):
+
+    """ A single "rectangular" (regular) selection composed of only slices
+        and integer arguments.  Can participate in broadcasting.
+    """
+
+    def __init__(self, shape):
+        Selection.__init__(self, shape)
+        rank = len(self.shape)
+        self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank)
+        self.mshape = self.shape
+
+    def __getitem__(self, args):
+        if not isinstance(args, tuple):
+            args = (args,)
+  
+        start, count, step, scalar = _handle_simple(self.shape,args)
+
+        # HDF5 hyperslabs freak out with scalar selections
+        if self.shape == ():
+            if count == ():
+                self._id.select_all()
+                return self._id
+            else:
+                raise TypeError("Invalid scalar selection")
+
+        self._id.select_hyperslab(start, count, step)
+
+        self._sel = (start, count, step, scalar)
+
+        self.mshape = tuple(x for x, y in zip(count, scalar) if not y)
+
+        return self
+
+
+    def broadcast(self, target_shape):
+        """ Return an iterator over target dataspaces for broadcasting.
+
+        Follows the standard NumPy broadcasting rules against the current
+        selection shape (self.mshape).
+        """
+
+        start, count, step, scalar = self._sel
+
+        rank = len(count)
+        target = list(target_shape)
+
+        tshape = []
+        for idx in xrange(1,rank+1):
+            if len(target) == 0 or scalar[-idx]:     # Skip scalar axes
+                tshape.append(1)
+            else:
+                t = target.pop()
+                if t == 1 or count[-idx] == t:
+                    tshape.append(t)
+                else:
+                    raise TypeError("Can't broadcast %s -> %s" % (target_shape, count))
+        tshape.reverse()
+        tshape = tuple(tshape)
+
+        chunks = tuple(x/y for x, y in zip(count, tshape))
+
+        nchunks = np.product(chunks)
+
+        sid = self._id.copy()
+        sid.select_hyperslab((0,)*rank, tshape, step)
+
+        for idx in xrange(nchunks):
+            offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start))
+            sid.offset_simple(offset)
+            yield sid
+
 
-class FancySelection(HyperSelection):
+class FancySelection(Selection):
 
     """
-        Implements advanced, NumPy-style selection operations.
+        Implements advanced NumPy-style selection operations in addition to
+        the standard slice-and-int behavior.
 
         Indexing arguments may be ints, slices, lists of indicies, or
-        boolean arrays (1-D).  The only permitted operation is SET.
+        per-axis (1D) boolean arrays.
 
-        Intended for internal use by the Dataset __getitem__ machinery.
+        Broadcasting is not supported for these selections.
     """
+    def __init__(self, shape):
+        Selection.__init__(self, shape)
+        self._mshape = shape
 
-    def __setitem__(self, args, op):
+    @property
+    def mshape(self):
+        return self._mshape
+
+    def __getitem__(self, args):
 
-        if op != SET:
-            raise ValueError("The only permitted operation is SET")
         if not isinstance(args, tuple):
             args = (args,)
 
-        args = _broadcast(args, len(self.shape))
+        args = _expand_ellipsis(args, len(self.shape))
 
         self._id.select_all()
 
@@ -349,11 +403,15 @@ class FancySelection(HyperSelection):
 
                     last_idx = select_idx
 
-        self.mshape = tuple(x for x in mshape if x != 0)
+        self._mshape = tuple(x for x in mshape if x != 0)
+
+    def broadcast(self, target_shape):
+        if not target_shape == self.mshape:
+            raise TypeError("Broadcasting is not supported for complex selections")
+        yield self._id
 
-def _broadcast(args, rank):
-    """ Expand ellipsis objects and fill in missing axes.  Returns the
-    new args tuple.
+def _expand_ellipsis(args, rank):
+    """ Expand ellipsis objects and fill in missing axes.
     """
     n_el = list(args).count(Ellipsis)
     if n_el > 1:
@@ -375,6 +433,39 @@ def _broadcast(args, rank):
 
     return final_args
 
+def _handle_simple(shape, args):
+    """ Process a "simple" selection tuple, containing only slices and
+        integer objects.  Return is a 4-tuple with tuples for start,
+        count, step, and a flag which tells if the axis is a "scalar"
+        selection (indexed by an integer).
+
+        If "args" is shorter than "shape", the remaining axes are fully
+        selected.
+    """
+    args = _expand_ellipsis(args, len(shape))
+
+    start = []
+    count = []
+    step  = []
+    scalar = []
+
+    for arg, length in zip(args, shape):
+        if isinstance(arg, slice):
+            x,y,z = _translate_slice(arg, length)
+            s = False
+        else:
+            try:
+                x,y,z = _translate_int(int(arg), length)
+                s = True
+            except TypeError:
+                raise TypeError('Illegal index "%s" (must be a slice or number)' % arg)
+        start.append(x)
+        count.append(y)
+        step.append(z)
+        scalar.append(s)
+
+    return tuple(start), tuple(count), tuple(step), tuple(scalar)
+
 def _translate_int(exp, length):
     """ Given an integer index, return a 3-tuple
         (start, count, step)
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index 18341cf..1e14898 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -455,9 +455,9 @@ class TestDataset(HDF5TestCase):
         slices += [ s[0:7:2,0:9:3,15:43:5], s[2:8:2,...] ]
         slices += [ s[0], s[1], s[9], s[0,0], s[4,5], s[:] ]
         slices += [ s[3,...], s[3,2,...] ]
-        #slices += [ numpy.random.random((10,10,50)) > 0.5 ]  # Truth array
-        #slices += [ numpy.zeros((10,10,50), dtype='bool') ]
-        #slices += [ s[0, 1, [2,3,6,7]], s[:,[1,2]], s[[1,2]], s[3:7,[1]]]
+        slices += [ numpy.random.random((10,10,50)) > 0.5 ]  # Truth array
+        slices += [ numpy.zeros((10,10,50), dtype='bool') ]
+        slices += [ s[0, 1, [2,3,6,7]], s[:,[1,2]], s[[1,2]], s[3:7,[1]]]
 
         for slc in slices:
             print "    Checking %s on %s" % ((slc,) if not isinstance(slc, numpy.ndarray) else 'ARRAY', srcarr.shape)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



More information about the debian-science-commits mailing list