[h5py] 156/455: Reworking resize()
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:28 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit f40a57f8591534f7ca431aeda5857bec6145a352
Author: andrewcollette <andrew.collette at gmail.com>
Date: Wed Nov 19 15:35:27 2008 +0000
Reworking resize()
---
docs/source/guide/hl.rst | 49 +++++++++++++++++++++++++++++---------------
h5py/__init__.py | 3 ++-
h5py/defs.pxd | 1 +
h5py/h5d.pyx | 35 +++++++++++++++++++++++++++----
h5py/highlevel.py | 34 +++++++++++++++++++++++-------
h5py/tests/test_highlevel.py | 6 +++---
6 files changed, 97 insertions(+), 31 deletions(-)
diff --git a/docs/source/guide/hl.rst b/docs/source/guide/hl.rst
index 3feb871..f5e0654 100644
--- a/docs/source/guide/hl.rst
+++ b/docs/source/guide/hl.rst
@@ -480,6 +480,32 @@ points are selected is preserved.
example, it takes 40MB to express a 1-million point selection on a rank-3
array. Be careful, especially with boolean masks.
+Special features
+----------------
+
+Unlike memory-resident NumPy arrays, HDF5 dataset support a number of optional
+features. These are enabled by the keywords provided to
+:meth:`Group.create_dataset`. Some of the more useful are:
+
+Resizing
+ You can specify a maximum size for the dataset when you create it, by
+ providing a "maxshape" tuple. Elements with the value ``None`` indicate
+ unlimited dimensions. Later calls to :meth:`Dataset.resize` will
+ modify the shape in-place::
+
+ >>> dset = grp.create_dataset((10,10), '=f8', maxshape=(None, None))
+ >>> dset.shape
+ (10, 10)
+ >>> dset.resize((20,20))
+ >>> dset.shape
+ (20, 20)
+
+Compression
+ Transparent GZIP compression can substantially reduce the storage space
+ needed for the dataset. Supply an integer between 0 and 9. Using the
+ *shuffle* filter along with this option can improve the compression ratio
+ further.
+
Value attribute and scalar datasets
-----------------------------------
@@ -495,19 +521,8 @@ array, and a full n-dimensional array for all other cases:
[ 1., 1.]])
>>> f["ScalarDS"].value
1.0
-
-Extending Datasets
-------------------
-
-If the dataset is created with the *maxshape* option set, you can later expand
-its size. Simply call the *extend* method:
-
- >>> dset = f.create_dataset("MyDataset", (5,5), maxshape=(None,None))
- >>> dset.shape
- (5, 5)
- >>> dset.extend((15,20))
- >>> dset.shape
- (15, 20)
+ >>> f["ScalarDS"][...]
+ array(1.0)
Length and iteration
--------------------
@@ -577,10 +592,12 @@ Reference
Write to the dataset. See :ref:`slicing_access`.
- .. method:: extend(shape)
+ .. method:: resize(shape, axis=None)
- Expand the size of the dataset to this new shape. Must be compatible
- with the *maxshape* as specified when the dataset was created.
+ Change the size of the dataset to this new shape. Must be compatible
+ with the *maxshape* as specified when the dataset was created. If
+ the keyword *axis* is provided, the argument should be a single
+ integer instead; that axis only will be modified.
.. method:: __len__
diff --git a/h5py/__init__.py b/h5py/__init__.py
index 8ea1225..fd086fa 100644
--- a/h5py/__init__.py
+++ b/h5py/__init__.py
@@ -32,12 +32,13 @@ except ImportError:
import utils, h5, h5a, h5d, h5f, h5g, h5i, h5p, h5r, h5s, h5t, h5z, highlevel, version
from highlevel import File, Group, Dataset, Datatype, AttributeManager, CoordsList
+from h5 import H5Error, get_config
__doc__ = __doc__ % (version.version, version.hdf5_version, version.api_version)
__all__ = ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5r',
'h5z', 'h5i', 'version', 'File', 'Group', 'Dataset',
- 'Datatype', 'AttributeManager', 'CoordsList']
+ 'Datatype', 'AttributeManager', 'CoordsList', 'H5Error', 'get_config']
if version.api_version_tuple >= (1,8):
import h5o, h5l
diff --git a/h5py/defs.pxd b/h5py/defs.pxd
index ee00992..c2cf75c 100644
--- a/h5py/defs.pxd
+++ b/h5py/defs.pxd
@@ -242,6 +242,7 @@ cdef extern from "hdf5.h":
hsize_t *point, void *operator_data)
herr_t H5Diterate(void *buf, hid_t type_id, hid_t space_id,
H5D_operator_t operator, void* operator_data) except *
+ herr_t H5Dset_extent(hid_t dset_id, hsize_t* size)
# === H5F - File API ==========================================================
diff --git a/h5py/h5d.pyx b/h5py/h5d.pyx
index 157b9af..2108e6c 100644
--- a/h5py/h5d.pyx
+++ b/h5py/h5d.pyx
@@ -256,10 +256,8 @@ cdef class DatasetID(ObjectID):
its dataspace, which are fixed when the dataset is created.
"""
cdef int rank
- cdef hid_t space_id
- cdef hsize_t* dims
- space_id = 0
- dims = NULL
+ cdef hid_t space_id = 0
+ cdef hsize_t* dims = NULL
try:
space_id = H5Dget_space(self.id)
@@ -278,6 +276,35 @@ cdef class DatasetID(ObjectID):
H5Sclose(space_id)
@sync
+ def set_extent(self, tuple shape):
+ """ (TUPLE shape)
+
+ Set the size of the dataspace to match the given shape. If the new
+ size is larger in any dimension, it must be compatible with the
+ maximum dataspace size.
+ """
+ cdef int rank
+ cdef hid_t space_id = 0
+ cdef hsize_t* dims = NULL
+
+ try:
+ space_id = H5Dget_space(self.id)
+ rank = H5Sget_simple_extent_ndims(space_id)
+
+ if len(shape) != rank:
+ raise TypeError("New shape length (%d) must match dataset rank (%d)" % (len(shape), rank))
+
+ dims = <hsize_t*>emalloc(sizeof(hsize_t)*rank)
+ convert_tuple(shape, dims, rank)
+ H5Dset_extent(self.id, dims)
+
+ finally:
+ efree(dims)
+ if space_id:
+ H5Sclose(space_id)
+
+
+ @sync
def get_space(self):
""" () => SpaceID
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index fb9cbc2..faed44d 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -51,6 +51,7 @@ import numpy
import inspect
import threading
import sys
+import warnings
from h5py import h5, h5f, h5g, h5s, h5t, h5d, h5a, h5p, h5z, h5i
from h5py.h5 import H5Error
@@ -298,7 +299,7 @@ class Group(HLObject, _DictCompat):
fletcher32: Enable Fletcher32 error detection? T/F*
maxshape: Tuple giving dataset maximum dimensions or None*.
You can grow each axis up to this limit using
- extend(). For each unlimited axis, provide None.
+ resize(). For each unlimited axis, provide None.
All these options require chunking. If a chunk tuple is not
provided, the constructor will guess an appropriate chunk shape.
@@ -669,7 +670,7 @@ class Dataset(HLObject):
fletcher32: Enable Fletcher32 error detection? T/F*
maxshape: Tuple giving dataset maximum dimensions or None*.
You can grow each axis up to this limit using
- extend(). For each unlimited axis, provide None.
+ resize(). For each unlimited axis, provide None.
All these options require chunking. If a chunk tuple is not
provided, the constructor will guess an appropriate chunk shape.
@@ -736,14 +737,33 @@ class Dataset(HLObject):
self._plist = self.id.get_create_plist()
def extend(self, shape):
- """ Resize the dataset so it's at least as big as "shape".
+ """ Deprecated. Use resize() instead. """
+ warnings.warn("extend() will be removed in 1.1; use resize() instead", DeprecationWarning)
+ self.resize(shape)
- Note that the new shape must be compatible with the "maxshape"
- argument provided when the dataset was created. Also, the rank of
- the dataset cannot be changed.
+ def resize(self, size, axis=None):
+ """ Resize the dataset, or the specified axis.
+
+ Argument should be either a new shape tuple, or an integer. The rank
+ of the dataset cannot be changed. Keep in mind the dataset can only
+ be resized up to the maximum dimensions provided when it was created.
+
+ Beware; if the array has more than one dimension, the indices of
+ existing data can change.
"""
+ if axis is not None:
+ if not axis >=0 and axis < self.id.rank:
+ raise ValueError("Invalid axis (0 to %s allowed)" % self.id.rank-1)
+ try:
+ newlen = int(size)
+ except TypeError:
+ raise TypeError("Argument must be a single int if axis is specified")
+ size = list(self.shape)
+ size[axis] = newlen
+ size = tuple(size)
+
with self._lock:
- self.id.extend(shape)
+ self.id.set_extent(size)
def __len__(self):
""" The size of the first axis. TypeError if scalar.
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index ea569d7..ba6d163 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -317,7 +317,7 @@ class TestDataset(HDF5TestCase):
self.assert_(numpy.all(dset[:] == x))
del self.f['TEST_DATA']
- def test_Dataset_extend(self):
+ def test_Dataset_resize(self):
""" Test extending datasets """
self.output("")
@@ -344,13 +344,13 @@ class TestDataset(HDF5TestCase):
for final_shape in final_shapes[shape]:
self.output(" Extending %s to %s" % (shape, final_shape))
newarr = numpy.arange(numpy.product(final_shape)).reshape(final_shape)
- ds.extend(final_shape)
+ ds.resize(final_shape)
ds[...] = newarr
self.assertEqual(ds.shape, final_shape)
self.assert_(numpy.all(ds[...] == newarr))
for illegal_shape in illegal_shapes[shape]:
- self.assertRaises(H5Error, ds.extend, illegal_shape)
+ self.assertRaises(H5Error, ds.resize, illegal_shape)
def test_Dataset_len_iter(self):
""" Test new and old len(), iteration over rows """
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list