[h5py] 106/455: Fix broken indexing; setup tweaks for better handling of --hdf5=

Thu Jul 2 18:19:22 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 051d98d91018d63998f9c448829c60c0cb905550
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Sat Aug 23 02:48:13 2008 +0000

    Fix broken indexing; setup tweaks for better handling of --hdf5=
---
 h5py/highlevel.py            |  22 ++++---
 h5py/tests/test_highlevel.py |  17 +++--
 h5py/utils_hl.py             | 152 ++++++++++++++++++++-----------------------
 setup.py                     |   2 +-
 4 files changed, 95 insertions(+), 98 deletions(-)

diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 1dcffe7..de53763 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -455,10 +455,8 @@ class FlatIndexProxy(object):
         indexer = FlatIndexer(self._dset.shape, args)
         arr = self._dset[indexer]
 
-        # These match the way NumPy behaves
-        if arr.shape == ():
-            return numpy.asscalar(arr)
-        return arr.newbyteorder('=')
+        # NumPy does not respect the byteorder when slicing with .flat
+        return arr#.newbyteorder('=')
 
     def __setitem__(self, args, val):
         """ Write to the dataset, treating it as a 1-D (C-contiguous) array.
@@ -649,9 +647,10 @@ class Dataset(HLObject):
 
             fspace = self.id.get_space()
 
-            # Perform selection on the dataset and retrieve the
-            # dataspace for NumPy to use
-            mspace = slice_select(fspace, slices)
+            # Perform selection on the dataset.  This returns
+            # 1. The proper HDF5 memory dataspace to use for the read
+            # 2. A flag which indicates if the result should be a scalar
+            mspace, scalar_result = slice_select(fspace, slices)
 
             # Create NumPy datatype for read, using the named type restrictions
             basetype = self.id.dtype
@@ -671,10 +670,13 @@ class Dataset(HLObject):
             # Perform the actual read
             self.id.read(mspace, fspace, arr)
 
+            # Match NumPy conventions
             if len(names) == 1:
-                # Match Numpy convention for recarray indexing
-                arr = arr[names[0]]
-            return arr.squeeze()
+                arr = arr[names[0]]     # Single-field recarray convention
+            arr = arr.squeeze()         # No "1" dimensions
+            if scalar_result:
+                arr = numpy.asscalar(arr)   # Scalar if slicing rules say it is
+            return arr
 
     def __setitem__(self, args, val):
         """ Write to the HDF5 dataset from a Numpy array.  The shape of the
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index 59f3cfd..1968fde 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -260,9 +260,9 @@ class TestDataset(unittest.TestCase):
         slices += [ s[9,9,49], s[9,:,49], s[9,:,:] ]
         slices += [ s[0, ..., 49], s[...], s[..., 49], s[9,...] ]
         slices += [ s[0:7:2,0:9:3,15:43:5], s[2:8:2,...] ]
-        slices += [ s[0], s[1], s[9], s[:] ] # Numpy convention
+        slices += [ s[0], s[1], s[9], s[0,0], s[4,5], s[:] ]
+        slices += [ s[3,...], s[3,2,...] ]
         slices += [ numpy.random.random((10,10,50)) > 0.5 ]  # Truth array
-       
         for dt in TYPES1:
 
             srcarr = numpy.arange(10*10*50, dtype=dt).reshape(10,10,50)
@@ -280,8 +280,11 @@ class TestDataset(unittest.TestCase):
                     print "    Checking read %.20s %s" % (dt, argtpl if not isinstance(argtpl, numpy.ndarray) else 'ARRAY')
                     hresult = d[argtpl]
                     nresult = srcarr[argtpl]
-                    self.assertEqual(hresult.shape, nresult.shape)
-                    self.assertEqual(hresult.dtype, nresult.dtype)
+                    if isinstance(nresult, numpy.ndarray):
+                        self.assertEqual(hresult.shape, nresult.shape)
+                        self.assertEqual(hresult.dtype, nresult.dtype)
+                    else:
+                        self.assert_(not isinstance(hresult, numpy.ndarray))
                     self.assert_(numpy.all(hresult == nresult))
 
                 del f["NewDataset"]
@@ -324,12 +327,12 @@ class TestDataset(unittest.TestCase):
                     print "    Checking flat read %.20s %s" % (dt, idx)
                     hresult = d.flat[idx]
                     nresult = srcarr.flat[idx]
-                    if hasattr(hresult, 'shape'):
+                    if isinstance(nresult, numpy.ndarray):
                         self.assertEqual(hresult.shape, nresult.shape)
                         self.assertEqual(hresult.dtype, nresult.dtype)
-                        self.assert_(numpy.all(hresult == nresult), "%s\n%s" % (hresult, nresult))
                     else:
-                        self.assertEqual(hresult, numpy.asscalar(nresult))
+                        self.assert_(not isinstance(hresult, numpy.ndarray))
+                    self.assert_(numpy.all(hresult == nresult), "%s\n%s" % (hresult, nresult))
 
                 del f["NewDataset"]
                 d = Dataset(f, "NewDataset", data=srcarr)
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
index b7c56bb..974f174 100644
--- a/h5py/utils_hl.py
+++ b/h5py/utils_hl.py
@@ -73,136 +73,128 @@ class FlatIndexer(object):
         """ Shape must be a tuple; args must be iterable.
         """
         try:
-            args = iter(args)
+            args = tuple(iter(args))
         except TypeError:
             args = (args,)
 
         points = []
 
+        scalarok = False
         for arg in args:
             if isinstance(arg, slice):
                 points.extend(xrange(*arg.indices(numpy.product(shape))))
-            elif isinstance(arg, int) or isinstance(arg, long):
-                points.append(arg)
             else:
-                raise ValueError("Illegal index (ints, longs or slices only)")
+                try:
+                    points.append(long(arg))
+                except TypeError:
+                    raise ValueError("Illegal index (ints, longs or slices only)")
+                scalarok = True
 
         self.coords = numpy.array([numpy.unravel_index(x, shape) for x in points])
 
+        # A scalar value should result for a single integer index.
+        self.scalar = True if scalarok and len(args) == 1 else False
+
+
 def slice_select(space, args):
     """ Perform a selection on the given HDF5 dataspace, using a tuple
         of Python extended slice objects.  The dataspace may be scalar or
-        simple.  The slice argument may be:
+        simple.  The following selection mechanisms are implemented:
 
-        0-tuple:
-            Entire dataspace selected (compatible with scalar)
+        1. select_all:
+            0-tuple
+            1-tuple containing Ellipsis
 
-        1-tuple:
-            1. A single Ellipsis: entire dataspace selected
-            2. A single integer or slice (row-broadcasting)
-            3. A NumPy array: element-wise selection
-            4. A FlatIndexer instance containing a coordinate list
+        2. Hyperslab selection
+            n-tuple (n>1) containing slice/integer/Ellipsis objects
 
-        n-tuple:
-            1. slice objects
-            2. Ellipsis objects
-            3. Integers
+        3. Discrete element selection
+            1-tuple containing boolean array or FlatIndexer
 
-        The return value is the appropriate memory dataspace to use.
+        The return value is a 2-tuple:
+        1. Appropriate memory dataspace to use for new array
+        2. Boolean indicating if the slice should result in a scalar quantity
     """
 
     if len(args) == 0 or (len(args) == 1 and args[0] is Ellipsis):
         space.select_all()
-        return space.copy()
+        return space.copy(), False
 
     if len(args) == 1:
         argval = args[0]
 
         if isinstance(argval, numpy.ndarray):
-            # Catch element-wise selection
+            # Boolean array indexing is handled by discrete element selection
+            # It never results in a scalar value
             indices = numpy.transpose(argval.nonzero())
             space.select_elements(indices)
-            return h5s.create_simple((len(indices),))
+            return h5s.create_simple((len(indices),)), False
 
         if isinstance(argval, FlatIndexer):
+            # Flat indexing also uses discrete selection
+            # Scalar determination is made by the indexer
             space.select_elements(argval.coords)
             npoints = space.get_select_elem_npoints()
-            return h5s.create_simple((npoints,))
-
-        # Single-index obj[0] access is always equivalent to obj[0,...].
-        # Pack it back up and send it to the hyperslab machinery
-        args = (argval, Ellipsis)
+            return h5s.create_simple((npoints,)), argval.scalar
 
     # Proceed to hyperslab selection
 
     shape = space.shape
     rank = len(shape)
 
-    start = []
-    count = []
-    stride = []
+    # First expand (at most 1) ellipsis object
 
-    # Expand integers and ellipsis arguments to slices
-    for dim, arg in enumerate(args):
+    n_el = list(args).count(Ellipsis)
+    if n_el > 1:
+        raise ValueError("Only one ellipsis may be used.")
+    elif n_el == 0 and len(args) != rank:
+        args = args + (Ellipsis,)  # Simple version of NumPy broadcasting
 
-        if isinstance(arg, int) or isinstance(arg, long):
-            if arg < 0:
-                raise ValueError("Negative indices are not allowed.")
-            start.append(arg)
-            count.append(1)
-            stride.append(1)
+    final_args = []
+    n_args = len(args)
 
-        elif isinstance(arg, slice):
+    for idx, arg in enumerate(args):
 
-            # slice.indices() method clips, so do it the hard way...
+        if arg == Ellipsis:
+            final_args.extend( (slice(None,None,None),)*(rank-n_args+1) )
+        else:
+            final_args.append(arg)
 
-            # Start
-            if arg.start is None:
-                ss=0
-            else:
-                if arg.start < 0:
-                    raise ValueError("Negative dimensions are not allowed")
-                ss=arg.start
+    # Step through the expanded argument list and handle each axis
 
-            # Stride
-            if arg.step is None:
-                st = 1
-            else:
-                if arg.step <= 0:
-                    raise ValueError("Only positive step sizes allowed")
-                st = arg.step
+    start = []
+    count = []
+    stride = []
+    simple = []
+    for idx, (length, exp) in enumerate(zip(shape,final_args)):
+
+        if isinstance(exp, slice):
+            start_, stop_, step_ = exp.indices(length)
+            count_ = (stop_-start_)//step_
+            if (stop_-start_) % step_ != 0:
+                count_ += 1
+            simple_ = False
+        else:
+            try:
+                exp = long(exp)
+            except TypeError:
+                raise TypeError("Illegal index on axis %d: %r" % (idx, exp))
 
-            # Count
-            if arg.stop is None:
-                cc = shape[dim]/st
-            else:
-                if arg.stop < 0:
-                    raise ValueError("Negative dimensions are not allowed")
-                cc = (arg.stop-ss)/st
-                if ((arg.stop-ss) % st) != 0:
-                    cc += 1   # Be careful with integer division!
-            if cc == 0:
-                raise ValueError("Zero-length selections are not allowed")
-
-            start.append(ss)
-            stride.append(st)
-            count.append(cc)
-
-        elif arg == Ellipsis:
-            nslices = rank-(len(args)-1)
-            if nslices <= 0:
-                continue
-            for x in range(nslices):
-                idx = dim+x
-                start.append(0)
-                count.append(shape[dim+x])
-                stride.append(1)
+            if exp > length-1:
+                raise IndexError('Index %d out of bounds: "%d" (should be <= %d)' % (idx, exp, length-1))
 
-        else:
-            raise ValueError("Bad slice type %s" % repr(arg))
+            start_ = exp
+            step_ = 1
+            count_ = 1
+            simple_ = True
+
+        start.append(start_)
+        count.append(count_)
+        stride.append(step_)
+        simple.append(simple_)
 
     space.select_hyperslab(tuple(start), tuple(count), tuple(stride))
-    return h5s.create_simple(tuple(count))
+    return h5s.create_simple(tuple(count)), all(simple)
 
 def strhdr(line, char='-'):
     """ Print a line followed by an ASCII-art underline """
diff --git a/setup.py b/setup.py
index 827d936..6bdcb81 100644
--- a/setup.py
+++ b/setup.py
@@ -116,7 +116,7 @@ for arg in sys.argv[:]:
         splitarg = arg.split('=',1)
         if len(splitarg) != 2:
             fatal("HDF5 directory not understood (wants --hdf5=/path/to/hdf5)")
-        opts.HDF5_DIR = splitarg[1]
+        opts.HDF5_DIR = op.abspath(splitarg[1])
         sys.argv.remove(arg)
     elif arg.find('--io-nonblock') == 0:
         opts.ENABLE_PYREX=True

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git