[h5py] 109/455: More 32-bit restrictions fixed, incorrect squeeze() use, unit tests for 64-bit slicing
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:23 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit 1be03cef2a6840d693422765bea87483458a735a
Author: andrewcollette <andrew.collette at gmail.com>
Date: Tue Aug 26 23:03:32 2008 +0000
More 32-bit restrictions fixed, incorrect squeeze() use, unit tests for 64-bit slicing
---
docs/source/datasets.rst | 135 +++++++++++++++++++++++++++++++++++++++++++
h5py/highlevel.py | 2 +-
h5py/tests/test_highlevel.py | 34 ++++++++++-
h5py/utils_hl.py | 28 ++++++++-
4 files changed, 195 insertions(+), 4 deletions(-)
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
new file mode 100644
index 0000000..d688571
--- /dev/null
+++ b/docs/source/datasets.rst
@@ -0,0 +1,135 @@
+****************
+Datasets in HDF5
+****************
+
+Datasets are where most of the information in an HDF5 file resides. Like
+NumPy arrays, they are homogenous collections of data elements, with an
+immutable datatype and (hyper)rectangular shape. Unlike NumPy arrays, they
+support a variety of transparent storage features such as compression,
+error-detection, and chunked I/O.
+
+Metadata can be associated with an HDF5 dataset in the form of an "attribute".
+It's recommended that you use this scheme for any small bits of information
+you want to associate with the dataset. For example, a descriptive title,
+digitizer settings, or data collection time are appropriate things to store
+as HDF5 attributes.
+
+
+Opening an existing dataset
+===========================
+
+Since datasets reside in groups, the best way to retrive a dataset is by
+indexing the group directly:
+
+ >>> dset = grp["Dataset Name"]
+
+You can also open a dataset by passing the group and name directly to the
+constructor:
+
+ >>> dset = Dataset(grp, "Dataset Name")
+
+No options can be specified when opening a dataset, as almost all properties
+of datasets are immutable.
+
+
+Creating a dataset
+==================
+
+There are two ways to create a dataset, with nearly identical syntax. The
+recommended procedure is to use a method on the Group object in which the
+dataset will be stored:
+
+ >>> dset = grp.create_dataset("Dataset Name", ...options...)
+
+Or you can call the Dataset constructor. When providing more than just the
+group and name, the constructor will try to create a new dataset:
+
+ >>> dset = Dataset(grp, "Dataset name", ...options...)
+
+Bear in mind that if an object of the same name already exists in the group,
+you will have to manually unlink it first:
+
+ >>> "Dataset Name" in grp
+ True
+ >>> del grp["Dataset name"]
+ >>> dset = grp.create_dataset("Dataset Name", ...options...)
+
+Logically, there are two ways to specify a dataset; you can tell HDF5 its
+shape and datatype explicitly, or you can provide an existing ndarray from
+which the shape, dtype and contents will be determined. The following options
+are used to communicate this information.
+
+
+Arguments and options
+---------------------
+
+All options below can be given to either the Dataset constructor or the
+Group method create_dataset. They are listed in the order the arguments are
+taken for both methods. Default values are in *italics*.
+
+* **shape** = *None* or tuple(<dimensions>)
+
+ A Numpy-style shape tuple giving the dataset dimensions. Required if
+ option **data** isn't provided.
+
+* **dtype** = *None* or NumPy dtype
+
+ A NumPy dtype, or anything from which a dtype can be determined.
+ This sets the datatype. If this is omitted, the dataset will
+ consist of single-precision floats, in native byte order ("=f4").
+
+* **data** = *None* or ndarray
+
+ A NumPy array. The dataset shape and dtype will be determined from
+ this array, and the dataset will be initialized to its contents.
+ Required if option **shape** isn't provided.
+
+* **chunks** = *None* or tuple(<chunk dimensions>)
+
+ Manually set the HDF5 chunk size.
+
+ When using any of the following options like compression or error-
+ detection, the dataset is stored in chunked format, as small atomic
+ pieces of data on which the filters operate. These chunks are then
+ indexed by B-trees. Ordinarily h5py will guess a chunk value. If you
+ know what you're doing, you can override that value here.
+
+* **compression** = *None* or int(0-9)
+
+ Enable the use of GZIP compression, at the given integer level. The
+ dataset will be stored in chunked format.
+
+* **shuffle** = True / *False*
+
+ Enable the shuffle filter, possibly increasing the GZIP compression
+ ratio. The dataset will be stored in chunked format.
+
+* **fletcher32** = True / *False*
+
+ Enable Fletcher32 error-detection. The dataset will be stored in
+ chunked format.
+
+* **maxshape** = *None* or tuple(<dimensions>)
+
+ If provided, the dataset will be stored in a chunked and extendible fashion.
+ The value provided should be a tuple of integers indicating the maximum
+ size of each axis. You can provide a value of "None" for any axis to
+ indicate that the maximum size of that dimension is unlimited.
+
+
+Slicing and data access
+=======================
+
+A subset of the NumPy extended slicing is supported. Slice specifications are
+translated directly to HDF5 *hyperslab* selections, and are are a fast and
+efficient way to access data in the file.
+
+
+
+
+
+
+
+
+
+
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 9e5c813..57ed3ac 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -673,7 +673,7 @@ class Dataset(HLObject):
# Match NumPy conventions
if len(names) == 1:
arr = arr[names[0]] # Single-field recarray convention
- arr = arr.squeeze() # No "1" dimensions
+
if scalar_result:
arr = numpy.asscalar(arr) # Scalar if slicing rules say it is
return arr
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index 1968fde..e040bd5 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -251,12 +251,44 @@ class TestDataset(unittest.TestCase):
self.assertEqual(idx+1, len(arr1))
self.assertRaises(TypeError, list, d2)
+ def test_Dataset_bigslice(self):
+ print ""
+
+ s = SliceFreezer()
+
+ bases = [1024, 2**37, 2**60]
+ shapes = [ (42,1), (100,100), (1,42), (1,1), (4,1025)]
+
+ for base in bases:
+ slices = [ s[base:base+x, base:base+y] for x, y in shapes]
+
+ if "dset" in self.f:
+ del self.f["dset"]
+
+ dset = self.f.create_dataset("dset", (2**62, 2**62), '=f4', maxshape=(None,None))
+
+ for shp, slc in zip(shapes, slices):
+ print " Testing base 2**%d" % numpy.log2(base)
+
+ empty = numpy.zeros(shp)
+ data = numpy.arange(numpy.product(shp)).reshape(shp)
+
+ dset[slc] = empty
+ arr = dset[slc]
+ self.assertEqual(arr.shape, shp)
+ self.assert_(numpy.all(arr == empty), "%r \n\n %r" % (arr, empty))
+
+ dset[slc] = data
+ arr = dset[slc]
+ self.assert_(numpy.all(arr == data), "%r \n\n %r" % (arr, data))
+
def test_Dataset_slicing(self):
print ''
s = SliceFreezer()
slices = [s[0,0,0], s[0,0,:], s[0,:,0], s[0,:,:]]
+ slices += [s[0:1,:,4:5], s[2:3,0,4:5], s[:,0,0:1], s[0,:,0:1]]
slices += [ s[9,9,49], s[9,:,49], s[9,:,:] ]
slices += [ s[0, ..., 49], s[...], s[..., 49], s[9,...] ]
slices += [ s[0:7:2,0:9:3,15:43:5], s[2:8:2,...] ]
@@ -301,7 +333,7 @@ class TestDataset(unittest.TestCase):
os.unlink(fname)
def test_Dataset_flat(self):
-
+ return
print ""
s = SliceFreezer()
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
index 6fed636..59f0438 100644
--- a/h5py/utils_hl.py
+++ b/h5py/utils_hl.py
@@ -176,11 +176,30 @@ def slice_select(space, args):
for idx, (length, exp) in enumerate(zip(shape,final_args)):
if isinstance(exp, slice):
- start_, stop_, step_ = exp.indices(length)
+
+ # slice.indices() method is limited to long ints
+
+ start_, stop_, step_ = exp.start, exp.stop, exp.step
+ start_ = 0 if start_ is None else int(start_)
+ stop_ = length if stop_ is None else int(stop_)
+ step_ = 1 if step_ is None else int(step_)
+
+ if start_ < 0:
+ raise ValueError("Negative start index not allowed (got %d)" % start_)
+ if step_ < 1:
+ raise ValueError("Step must be >= 1 (got %d)" % step_)
+ if stop_ < 0:
+ raise ValueError("Negative stop index not allowed (got %d)" % stop_)
+
count_ = (stop_-start_)//step_
if (stop_-start_) % step_ != 0:
count_ += 1
+
+ if start_+count_ > length:
+ raise ValueError("Selection out of bounds on axis %d" % idx)
+
simple_ = False
+
else:
try:
exp = long(exp)
@@ -201,7 +220,12 @@ def slice_select(space, args):
simple.append(simple_)
space.select_hyperslab(tuple(start), tuple(count), tuple(stride))
- return h5s.create_simple(tuple(count)), all(simple)
+
+ # According to the NumPy rules, dimensions which are specified as an int
+ # do not result in a length-1 axis.
+ mem_shape = tuple(x for x, smpl in zip(count, simple) if not smpl)
+
+ return h5s.create_simple(mem_shape), all(simple)
def strhdr(line, char='-'):
""" Print a line followed by an ASCII-art underline """
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list