[h5py] 110/455: Replace .flat with more scalable CoordsList; fix bigslice test on 1.6.X

Thu Jul 2 18:19:23 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 97ebd3a9b471993592ec6ca3f0c6caa064cfc63a
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Wed Aug 27 06:35:11 2008 +0000

    Replace .flat with more scalable CoordsList; fix bigslice test on 1.6.X
---
 h5py/highlevel.py            | 75 +++++++++-----------------------------------
 h5py/tests/test_h5s.py       |  4 +--
 h5py/tests/test_highlevel.py | 53 +------------------------------
 h5py/utils_hl.py             | 58 ++++++++++++++--------------------
 4 files changed, 40 insertions(+), 150 deletions(-)

diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 57ed3ac..67f1499 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -53,13 +53,13 @@ import threading
 
 from h5py import h5, h5f, h5g, h5s, h5t, h5d, h5a, h5p, h5z, h5i, config
 from h5py.h5 import H5Error
-from utils_hl import slice_select, hbasename, strhdr, strlist, FlatIndexer, \
-                     guess_chunk
+from utils_hl import slice_select, hbasename, strhdr, strlist, guess_chunk
+from utils_hl import CoordsList
 from browse import _H5Browser
 
 
-__all__ = ["LockableObject", "HLObject", "File", "Group", "Dataset",
-           "Datatype", "AttributeManager"]
+__all__ = ["File", "Group", "Dataset",
+           "Datatype", "AttributeManager", "CoordsList"]
 
 try:
     # For interactive File.browse() capability
@@ -414,60 +414,6 @@ class File(Group):
                     readline.add_history(x)
         self._path = browser.path
 
-class FlatIndexProxy(object):
-
-    """
-        Utility class which allows 1-D indexing of datasets.
-
-        These come attached to Dataset objects as <obj>.flat.  They behave
-        like 1-D arrays; you can slice into them and assign to slices like
-        NumPy flatiter objects.  However, they are not iterable.
-
-        In addition to single indices and slices, you can also provide an
-        iterable which yields indices and slices.  The returned array will
-        be the union of these selections, in the order they were presented,
-        with duplicate entries skipped.
-
-        Examples:  (let dset be of shape (10,10))
-            >>> dset.flat[10]       # Equivalent to dset[1,0]
-            >>> dset.flat[5:15]     # Note you can't do this with dset[x,y]
-            >>> dset.flat[0,1,3,2]  # First 4 elements, in the specified order
-
-        Caveats:  At the HDF5 level, this works by explicitly listing the set
-        of points to be accessed.  For large, regularly strided selections,
-        you should use the standard n-D slicing syntax, which is significantly
-        faster.
-    """
-    
-    def __init__(self, dset):
-        self._dset = dset
-
-    def __getitem__(self, args):
-        """ Read from the dataset, treating it as a 1-D (C-contiguous) array.
-
-            Allowed slicing mechanisms:
-                1. Ints/longs
-                2. Extended slices
-                3. Sequences of ints/extended slices (e.g. flat[0,1,2])
-
-            Subsets which result in a single element are returned as scalars.
-        """
-        indexer = FlatIndexer(self._dset.shape, args)
-        arr = self._dset[indexer]
-
-        # NumPy does not respect the byteorder when slicing with .flat
-        return arr#.newbyteorder('=')
-
-    def __setitem__(self, args, val):
-        """ Write to the dataset, treating it as a 1-D (C-contiguous) array.
-
-            Allowed slicing mechanisms:
-                1. Ints/longs
-                2. Extended slices
-                3. Sequences of ints/extended slices (e.g. flat[0,1,2])
-        """
-        indexer = FlatIndexer(self._dset.shape, args)
-        self._dset[indexer] = val
 
 class Dataset(HLObject):
 
@@ -493,9 +439,6 @@ class Dataset(HLObject):
     dtype = property(lambda self: self.id.dtype,
         doc = "Numpy dtype representing the datatype")
 
-    flat = property(lambda self: FlatIndexProxy(self),
-        doc = "1-D read/write slicing access to the dataset.  Not iterable.")
-
     def _getval(self):
         with self._lock:
             arr = self[...]
@@ -636,6 +579,16 @@ class Dataset(HLObject):
             ds[:]
             ds[1,2,3,"a"]
             ds[0:5:2, ..., 0:2, "a", "b"]
+
+            Also supports:
+
+            * Boolean array indexing (True/False)
+            * Discrete point selection via CoordsList instance
+
+            Beware; these last two techniques work by explicitly enumerating
+            the points to be selected.  In the worst case, the selection list
+            for a boolean array can be every point in the dataset, with a 
+            2x to 3x memory overhead.
         """
         with self._lock:
 
diff --git a/h5py/tests/test_h5s.py b/h5py/tests/test_h5s.py
index 30b48b2..9c90831 100644
--- a/h5py/tests/test_h5s.py
+++ b/h5py/tests/test_h5s.py
@@ -16,8 +16,8 @@ import numpy
 from h5py import *
 from h5py.h5 import H5Error
 
-spaces = [(10,10), (1,1), (1,), (), (2**40,)]
-max_spaces = [(10,10), (3,4), (h5s.UNLIMITED,), (), (2**41,)]
+spaces = [(10,10), (1,1), (1,), (), (2**40,),(2**63-1,)]
+max_spaces = [(10,10), (3,4), (h5s.UNLIMITED,), (), (2**41,), (2**63-1,)]
 
 class TestH5S(unittest.TestCase):
 
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index e040bd5..e942d05 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -271,7 +271,7 @@ class TestDataset(unittest.TestCase):
                 print "    Testing base 2**%d" % numpy.log2(base)
 
                 empty = numpy.zeros(shp)
-                data = numpy.arange(numpy.product(shp)).reshape(shp)
+                data = numpy.arange(numpy.product(shp), dtype='=f4').reshape(shp)
 
                 dset[slc] = empty
                 arr = dset[slc]
@@ -332,57 +332,6 @@ class TestDataset(unittest.TestCase):
                 f.close()
                 os.unlink(fname)   
 
-    def test_Dataset_flat(self):
-        return
-        print ""
-
-        s = SliceFreezer()
-
-        flatindexing = [0, 1, 45, 355]
-        flatindexing += [s[0:500:2], s[0:644], s[3:99], s[35:655:3]]
-        flatindexing += [s[:45:], s[::3], s[:78:4]]
-
-        extended = [ (0,1,3,2) ]
-
-        for dt in TYPES1:
-
-            srcarr = numpy.arange(10*10*20, dtype=dt).reshape(10,10,20)
-            srcarr = srcarr + numpy.sin(srcarr)
-
-            fname = tempfile.mktemp('.hdf5')
-            f = File(fname, 'w')
-            try:
-                d = Dataset(f, "NewDataset", data=srcarr)
-                self.assertEqual(d.shape, srcarr.shape)
-                self.assertEqual(d.dtype, srcarr.dtype)
-                for idx in flatindexing:
-                    print "    Checking flat read %.20s %s" % (dt, idx)
-                    hresult = d.flat[idx]
-                    nresult = srcarr.flat[idx]
-                    if isinstance(nresult, numpy.ndarray):
-                        self.assertEqual(hresult.shape, nresult.shape)
-                        self.assertEqual(hresult.dtype, nresult.dtype)
-                    else:
-                        self.assert_(not isinstance(hresult, numpy.ndarray))
-                    self.assert_(numpy.all(hresult == nresult), "%s\n%s" % (hresult, nresult))
-
-                del f["NewDataset"]
-                d = Dataset(f, "NewDataset", data=srcarr)
-                for idx in flatindexing:
-                    print "    Checking flat write %.20s %s" % (dt, idx)
-                    srcarr.flat[idx] = numpy.cos(srcarr.flat[idx])
-                    d.flat[idx] = srcarr.flat[idx]
-                    self.assert_(numpy.all(d.value == srcarr))
-
-                del f["NewDataset"]
-                d = Dataset(f, "NewDataset", data=srcarr)
-                for seq in extended:
-                    subset = d.flat[seq]
-                    for idx, entry in enumerate(seq):
-                        self.assertEqual(subset[idx], srcarr.flat[entry])
-            finally:
-                f.close()
-                os.unlink(fname)   
 
     def test_Dataset_exceptions(self):
         # These trigger exceptions in H5Dread
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
index 59f0438..9659d96 100644
--- a/h5py/utils_hl.py
+++ b/h5py/utils_hl.py
@@ -62,41 +62,23 @@ def guess_chunk(shape, typesize):
 
     return tuple(long(x) for x in chunks)
 
-class FlatIndexer(object):
+class CoordsList(object):
 
     """
-        Utility class which encapsulates a 1-D selection into an n-D array.
-
+        Wrapper class for efficient access to sequences of sparse or
+        irregular coordinates.  Construct from either a single index
+        (a rank-length sequence of numbers), or a sequence of such
+        indices.
     """
 
-    def __init__(self, shape, args):
-        """ Shape must be a tuple; args must be iterable.
+    def __init__(self, points):
+        """ Create a new list of explicitly selected points.
         """
-        if shape == ():
-            raise TypeError("Can't slice into a scalar array.")
 
         try:
-            args = tuple(iter(args))
-        except TypeError:
-            args = (args,)
-
-        points = []
-
-        scalarok = False
-        for arg in args:
-            if isinstance(arg, slice):
-                points.extend(xrange(*arg.indices(numpy.product(shape))))
-            else:
-                try:
-                    points.append(long(arg))
-                except TypeError:
-                    raise ValueError("Illegal index (ints, longs or slices only)")
-                scalarok = True
-
-        self.coords = numpy.array([numpy.unravel_index(x, shape) for x in points])
-
-        # A scalar value should result for a single integer index.
-        self.scalar = True if scalarok and len(args) == 1 else False
+            self.coords = numpy.asarray(points, dtype='=u8')
+        except ValueError:
+            raise ValueError("Selection should be an index or a sequence of equal-rank indices")
 
 
 def slice_select(space, args):
@@ -119,6 +101,9 @@ def slice_select(space, args):
         2. Boolean indicating if the slice should result in a scalar quantity
     """
 
+    shape = space.shape
+    rank = len(shape)
+
     if len(args) == 0 or (len(args) == 1 and args[0] is Ellipsis):
         # The only safe way to access a scalar dataspace
         space.select_all()
@@ -137,18 +122,21 @@ def slice_select(space, args):
             space.select_elements(indices)
             return h5s.create_simple((len(indices),)), False
 
-        if isinstance(argval, FlatIndexer):
-            # Flat indexing also uses discrete selection
-            # Scalar determination is made by the indexer
+        if isinstance(argval, CoordsList):
+            # Coords indexing also uses discrete selection
+            c_ndim = argval.coords.ndim
+            if c_ndim != rank:
+                if c_ndim == 1:
+                    argval.coords.resize((1,len(argval.coords)))
+                else:
+                    raise ValueError("Coordinate list must contain %d-rank indices (not %d-rank)" % (rank, c_ndim))
+
             space.select_elements(argval.coords)
             npoints = space.get_select_elem_npoints()
-            return h5s.create_simple((npoints,)), argval.scalar
+            return h5s.create_simple((npoints,)), len(argval.coords) == 1
 
     # Proceed to hyperslab selection
 
-    shape = space.shape
-    rank = len(shape)
-
     # First expand (at most 1) ellipsis object
 
     n_el = list(args).count(Ellipsis)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git