[h5py] 110/455: Replace .flat with more scalable CoordsList; fix bigslice test on 1.6.X
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:23 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit 97ebd3a9b471993592ec6ca3f0c6caa064cfc63a
Author: andrewcollette <andrew.collette at gmail.com>
Date: Wed Aug 27 06:35:11 2008 +0000
Replace .flat with more scalable CoordsList; fix bigslice test on 1.6.X
---
h5py/highlevel.py | 75 +++++++++-----------------------------------
h5py/tests/test_h5s.py | 4 +--
h5py/tests/test_highlevel.py | 53 +------------------------------
h5py/utils_hl.py | 58 ++++++++++++++--------------------
4 files changed, 40 insertions(+), 150 deletions(-)
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 57ed3ac..67f1499 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -53,13 +53,13 @@ import threading
from h5py import h5, h5f, h5g, h5s, h5t, h5d, h5a, h5p, h5z, h5i, config
from h5py.h5 import H5Error
-from utils_hl import slice_select, hbasename, strhdr, strlist, FlatIndexer, \
- guess_chunk
+from utils_hl import slice_select, hbasename, strhdr, strlist, guess_chunk
+from utils_hl import CoordsList
from browse import _H5Browser
-__all__ = ["LockableObject", "HLObject", "File", "Group", "Dataset",
- "Datatype", "AttributeManager"]
+__all__ = ["File", "Group", "Dataset",
+ "Datatype", "AttributeManager", "CoordsList"]
try:
# For interactive File.browse() capability
@@ -414,60 +414,6 @@ class File(Group):
readline.add_history(x)
self._path = browser.path
-class FlatIndexProxy(object):
-
- """
- Utility class which allows 1-D indexing of datasets.
-
- These come attached to Dataset objects as <obj>.flat. They behave
- like 1-D arrays; you can slice into them and assign to slices like
- NumPy flatiter objects. However, they are not iterable.
-
- In addition to single indices and slices, you can also provide an
- iterable which yields indices and slices. The returned array will
- be the union of these selections, in the order they were presented,
- with duplicate entries skipped.
-
- Examples: (let dset be of shape (10,10))
- >>> dset.flat[10] # Equivalent to dset[1,0]
- >>> dset.flat[5:15] # Note you can't do this with dset[x,y]
- >>> dset.flat[0,1,3,2] # First 4 elements, in the specified order
-
- Caveats: At the HDF5 level, this works by explicitly listing the set
- of points to be accessed. For large, regularly strided selections,
- you should use the standard n-D slicing syntax, which is significantly
- faster.
- """
-
- def __init__(self, dset):
- self._dset = dset
-
- def __getitem__(self, args):
- """ Read from the dataset, treating it as a 1-D (C-contiguous) array.
-
- Allowed slicing mechanisms:
- 1. Ints/longs
- 2. Extended slices
- 3. Sequences of ints/extended slices (e.g. flat[0,1,2])
-
- Subsets which result in a single element are returned as scalars.
- """
- indexer = FlatIndexer(self._dset.shape, args)
- arr = self._dset[indexer]
-
- # NumPy does not respect the byteorder when slicing with .flat
- return arr#.newbyteorder('=')
-
- def __setitem__(self, args, val):
- """ Write to the dataset, treating it as a 1-D (C-contiguous) array.
-
- Allowed slicing mechanisms:
- 1. Ints/longs
- 2. Extended slices
- 3. Sequences of ints/extended slices (e.g. flat[0,1,2])
- """
- indexer = FlatIndexer(self._dset.shape, args)
- self._dset[indexer] = val
class Dataset(HLObject):
@@ -493,9 +439,6 @@ class Dataset(HLObject):
dtype = property(lambda self: self.id.dtype,
doc = "Numpy dtype representing the datatype")
- flat = property(lambda self: FlatIndexProxy(self),
- doc = "1-D read/write slicing access to the dataset. Not iterable.")
-
def _getval(self):
with self._lock:
arr = self[...]
@@ -636,6 +579,16 @@ class Dataset(HLObject):
ds[:]
ds[1,2,3,"a"]
ds[0:5:2, ..., 0:2, "a", "b"]
+
+ Also supports:
+
+ * Boolean array indexing (True/False)
+ * Discrete point selection via CoordsList instance
+
+ Beware; these last two techniques work by explicitly enumerating
+ the points to be selected. In the worst case, the selection list
+ for a boolean array can be every point in the dataset, with a
+ 2x to 3x memory overhead.
"""
with self._lock:
diff --git a/h5py/tests/test_h5s.py b/h5py/tests/test_h5s.py
index 30b48b2..9c90831 100644
--- a/h5py/tests/test_h5s.py
+++ b/h5py/tests/test_h5s.py
@@ -16,8 +16,8 @@ import numpy
from h5py import *
from h5py.h5 import H5Error
-spaces = [(10,10), (1,1), (1,), (), (2**40,)]
-max_spaces = [(10,10), (3,4), (h5s.UNLIMITED,), (), (2**41,)]
+spaces = [(10,10), (1,1), (1,), (), (2**40,),(2**63-1,)]
+max_spaces = [(10,10), (3,4), (h5s.UNLIMITED,), (), (2**41,), (2**63-1,)]
class TestH5S(unittest.TestCase):
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index e040bd5..e942d05 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -271,7 +271,7 @@ class TestDataset(unittest.TestCase):
print " Testing base 2**%d" % numpy.log2(base)
empty = numpy.zeros(shp)
- data = numpy.arange(numpy.product(shp)).reshape(shp)
+ data = numpy.arange(numpy.product(shp), dtype='=f4').reshape(shp)
dset[slc] = empty
arr = dset[slc]
@@ -332,57 +332,6 @@ class TestDataset(unittest.TestCase):
f.close()
os.unlink(fname)
- def test_Dataset_flat(self):
- return
- print ""
-
- s = SliceFreezer()
-
- flatindexing = [0, 1, 45, 355]
- flatindexing += [s[0:500:2], s[0:644], s[3:99], s[35:655:3]]
- flatindexing += [s[:45:], s[::3], s[:78:4]]
-
- extended = [ (0,1,3,2) ]
-
- for dt in TYPES1:
-
- srcarr = numpy.arange(10*10*20, dtype=dt).reshape(10,10,20)
- srcarr = srcarr + numpy.sin(srcarr)
-
- fname = tempfile.mktemp('.hdf5')
- f = File(fname, 'w')
- try:
- d = Dataset(f, "NewDataset", data=srcarr)
- self.assertEqual(d.shape, srcarr.shape)
- self.assertEqual(d.dtype, srcarr.dtype)
- for idx in flatindexing:
- print " Checking flat read %.20s %s" % (dt, idx)
- hresult = d.flat[idx]
- nresult = srcarr.flat[idx]
- if isinstance(nresult, numpy.ndarray):
- self.assertEqual(hresult.shape, nresult.shape)
- self.assertEqual(hresult.dtype, nresult.dtype)
- else:
- self.assert_(not isinstance(hresult, numpy.ndarray))
- self.assert_(numpy.all(hresult == nresult), "%s\n%s" % (hresult, nresult))
-
- del f["NewDataset"]
- d = Dataset(f, "NewDataset", data=srcarr)
- for idx in flatindexing:
- print " Checking flat write %.20s %s" % (dt, idx)
- srcarr.flat[idx] = numpy.cos(srcarr.flat[idx])
- d.flat[idx] = srcarr.flat[idx]
- self.assert_(numpy.all(d.value == srcarr))
-
- del f["NewDataset"]
- d = Dataset(f, "NewDataset", data=srcarr)
- for seq in extended:
- subset = d.flat[seq]
- for idx, entry in enumerate(seq):
- self.assertEqual(subset[idx], srcarr.flat[entry])
- finally:
- f.close()
- os.unlink(fname)
def test_Dataset_exceptions(self):
# These trigger exceptions in H5Dread
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
index 59f0438..9659d96 100644
--- a/h5py/utils_hl.py
+++ b/h5py/utils_hl.py
@@ -62,41 +62,23 @@ def guess_chunk(shape, typesize):
return tuple(long(x) for x in chunks)
-class FlatIndexer(object):
+class CoordsList(object):
"""
- Utility class which encapsulates a 1-D selection into an n-D array.
-
+ Wrapper class for efficient access to sequences of sparse or
+ irregular coordinates. Construct from either a single index
+ (a rank-length sequence of numbers), or a sequence of such
+ indices.
"""
- def __init__(self, shape, args):
- """ Shape must be a tuple; args must be iterable.
+ def __init__(self, points):
+ """ Create a new list of explicitly selected points.
"""
- if shape == ():
- raise TypeError("Can't slice into a scalar array.")
try:
- args = tuple(iter(args))
- except TypeError:
- args = (args,)
-
- points = []
-
- scalarok = False
- for arg in args:
- if isinstance(arg, slice):
- points.extend(xrange(*arg.indices(numpy.product(shape))))
- else:
- try:
- points.append(long(arg))
- except TypeError:
- raise ValueError("Illegal index (ints, longs or slices only)")
- scalarok = True
-
- self.coords = numpy.array([numpy.unravel_index(x, shape) for x in points])
-
- # A scalar value should result for a single integer index.
- self.scalar = True if scalarok and len(args) == 1 else False
+ self.coords = numpy.asarray(points, dtype='=u8')
+ except ValueError:
+ raise ValueError("Selection should be an index or a sequence of equal-rank indices")
def slice_select(space, args):
@@ -119,6 +101,9 @@ def slice_select(space, args):
2. Boolean indicating if the slice should result in a scalar quantity
"""
+ shape = space.shape
+ rank = len(shape)
+
if len(args) == 0 or (len(args) == 1 and args[0] is Ellipsis):
# The only safe way to access a scalar dataspace
space.select_all()
@@ -137,18 +122,21 @@ def slice_select(space, args):
space.select_elements(indices)
return h5s.create_simple((len(indices),)), False
- if isinstance(argval, FlatIndexer):
- # Flat indexing also uses discrete selection
- # Scalar determination is made by the indexer
+ if isinstance(argval, CoordsList):
+ # Coords indexing also uses discrete selection
+ c_ndim = argval.coords.ndim
+ if c_ndim != rank:
+ if c_ndim == 1:
+ argval.coords.resize((1,len(argval.coords)))
+ else:
+ raise ValueError("Coordinate list must contain %d-rank indices (not %d-rank)" % (rank, c_ndim))
+
space.select_elements(argval.coords)
npoints = space.get_select_elem_npoints()
- return h5s.create_simple((npoints,)), argval.scalar
+ return h5s.create_simple((npoints,)), len(argval.coords) == 1
# Proceed to hyperslab selection
- shape = space.shape
- rank = len(shape)
-
# First expand (at most 1) ellipsis object
n_el = list(args).count(Ellipsis)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list