[h5py] 156/455: Reworking resize()

Thu Jul 2 18:19:28 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit f40a57f8591534f7ca431aeda5857bec6145a352
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Wed Nov 19 15:35:27 2008 +0000

    Reworking resize()
---
 docs/source/guide/hl.rst     | 49 +++++++++++++++++++++++++++++---------------
 h5py/__init__.py             |  3 ++-
 h5py/defs.pxd                |  1 +
 h5py/h5d.pyx                 | 35 +++++++++++++++++++++++++++----
 h5py/highlevel.py            | 34 +++++++++++++++++++++++-------
 h5py/tests/test_highlevel.py |  6 +++---
 6 files changed, 97 insertions(+), 31 deletions(-)

diff --git a/docs/source/guide/hl.rst b/docs/source/guide/hl.rst
index 3feb871..f5e0654 100644
--- a/docs/source/guide/hl.rst
+++ b/docs/source/guide/hl.rst
@@ -480,6 +480,32 @@ points are selected is preserved.
     example, it takes 40MB to express a 1-million point selection on a rank-3
     array.  Be careful, especially with boolean masks.
 
+Special features
+----------------
+
+Unlike memory-resident NumPy arrays, HDF5 dataset support a number of optional
+features.  These are enabled by the keywords provided to
+:meth:`Group.create_dataset`.  Some of the more useful are:
+
+Resizing
+    You can specify a maximum size for the dataset when you create it, by
+    providing a "maxshape" tuple.  Elements with the value ``None`` indicate
+    unlimited dimensions.  Later calls to :meth:`Dataset.resize` will
+    modify the shape in-place::
+
+        >>> dset = grp.create_dataset((10,10), '=f8', maxshape=(None, None))
+        >>> dset.shape
+        (10, 10)
+        >>> dset.resize((20,20))
+        >>> dset.shape
+        (20, 20)
+
+Compression
+    Transparent GZIP compression can substantially reduce the storage space
+    needed for the dataset.  Supply an integer between 0 and 9.  Using the
+    *shuffle* filter along with this option can improve the compression ratio
+    further.
+
 Value attribute and scalar datasets
 -----------------------------------
 
@@ -495,19 +521,8 @@ array, and a full n-dimensional array for all other cases:
            [ 1.,  1.]])
     >>> f["ScalarDS"].value
     1.0
-
-Extending Datasets
-------------------
-
-If the dataset is created with the *maxshape* option set, you can later expand
-its size.  Simply call the *extend* method:
-
-    >>> dset = f.create_dataset("MyDataset", (5,5), maxshape=(None,None))
-    >>> dset.shape
-    (5, 5)
-    >>> dset.extend((15,20))
-    >>> dset.shape
-    (15, 20)
+    >>> f["ScalarDS"][...]
+    array(1.0)
 
 Length and iteration
 --------------------
@@ -577,10 +592,12 @@ Reference
 
         Write to the dataset.  See :ref:`slicing_access`.
 
-    .. method:: extend(shape)
+    .. method:: resize(shape, axis=None)
 
-        Expand the size of the dataset to this new shape.  Must be compatible
-        with the *maxshape* as specified when the dataset was created.
+        Change the size of the dataset to this new shape.  Must be compatible
+        with the *maxshape* as specified when the dataset was created.  If
+        the keyword *axis* is provided, the argument should be a single
+        integer instead; that axis only will be modified.
 
     .. method:: __len__
 
diff --git a/h5py/__init__.py b/h5py/__init__.py
index 8ea1225..fd086fa 100644
--- a/h5py/__init__.py
+++ b/h5py/__init__.py
@@ -32,12 +32,13 @@ except ImportError:
 import utils, h5, h5a, h5d, h5f, h5g, h5i, h5p, h5r, h5s, h5t, h5z, highlevel, version
 
 from highlevel import File, Group, Dataset, Datatype, AttributeManager, CoordsList
+from h5 import H5Error, get_config
 
 __doc__ = __doc__ % (version.version, version.hdf5_version, version.api_version)
 
 __all__ = ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5r',
            'h5z', 'h5i', 'version', 'File', 'Group', 'Dataset',
-           'Datatype', 'AttributeManager', 'CoordsList']
+           'Datatype', 'AttributeManager', 'CoordsList', 'H5Error', 'get_config']
 
 if version.api_version_tuple >= (1,8):
     import h5o, h5l
diff --git a/h5py/defs.pxd b/h5py/defs.pxd
index ee00992..c2cf75c 100644
--- a/h5py/defs.pxd
+++ b/h5py/defs.pxd
@@ -242,6 +242,7 @@ cdef extern from "hdf5.h":
                     hsize_t *point, void *operator_data)
   herr_t    H5Diterate(void *buf, hid_t type_id, hid_t space_id, 
                         H5D_operator_t operator, void* operator_data) except *
+  herr_t    H5Dset_extent(hid_t dset_id, hsize_t* size)
 
 
 # === H5F - File API ==========================================================
diff --git a/h5py/h5d.pyx b/h5py/h5d.pyx
index 157b9af..2108e6c 100644
--- a/h5py/h5d.pyx
+++ b/h5py/h5d.pyx
@@ -256,10 +256,8 @@ cdef class DatasetID(ObjectID):
             its dataspace, which are fixed when the dataset is created.
         """
         cdef int rank
-        cdef hid_t space_id
-        cdef hsize_t* dims
-        space_id = 0
-        dims = NULL
+        cdef hid_t space_id = 0
+        cdef hsize_t* dims = NULL
 
         try:
             space_id = H5Dget_space(self.id)
@@ -278,6 +276,35 @@ cdef class DatasetID(ObjectID):
                 H5Sclose(space_id)
 
     @sync
+    def set_extent(self, tuple shape):
+        """ (TUPLE shape)
+
+            Set the size of the dataspace to match the given shape.  If the new
+            size is larger in any dimension, it must be compatible with the
+            maximum dataspace size.
+        """
+        cdef int rank
+        cdef hid_t space_id = 0
+        cdef hsize_t* dims = NULL
+
+        try:
+            space_id = H5Dget_space(self.id)
+            rank = H5Sget_simple_extent_ndims(space_id)
+
+            if len(shape) != rank:
+                raise TypeError("New shape length (%d) must match dataset rank (%d)" % (len(shape), rank))
+
+            dims = <hsize_t*>emalloc(sizeof(hsize_t)*rank)
+            convert_tuple(shape, dims, rank)
+            H5Dset_extent(self.id, dims)
+
+        finally:
+            efree(dims)
+            if space_id:
+                H5Sclose(space_id)
+
+
+    @sync
     def get_space(self):
         """ () => SpaceID
 
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index fb9cbc2..faed44d 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -51,6 +51,7 @@ import numpy
 import inspect
 import threading
 import sys
+import warnings
 
 from h5py import h5, h5f, h5g, h5s, h5t, h5d, h5a, h5p, h5z, h5i
 from h5py.h5 import H5Error
@@ -298,7 +299,7 @@ class Group(HLObject, _DictCompat):
         fletcher32:    Enable Fletcher32 error detection? T/F*
         maxshape:      Tuple giving dataset maximum dimensions or None*.
                        You can grow each axis up to this limit using
-                       extend().  For each unlimited axis, provide None.
+                       resize().  For each unlimited axis, provide None.
 
         All these options require chunking.  If a chunk tuple is not
         provided, the constructor will guess an appropriate chunk shape.
@@ -669,7 +670,7 @@ class Dataset(HLObject):
         fletcher32:    Enable Fletcher32 error detection? T/F*
         maxshape:      Tuple giving dataset maximum dimensions or None*.
                        You can grow each axis up to this limit using
-                       extend().  For each unlimited axis, provide None.
+                       resize().  For each unlimited axis, provide None.
 
         All these options require chunking.  If a chunk tuple is not
         provided, the constructor will guess an appropriate chunk shape.
@@ -736,14 +737,33 @@ class Dataset(HLObject):
             self._plist = self.id.get_create_plist()
 
     def extend(self, shape):
-        """ Resize the dataset so it's at least as big as "shape".
+        """ Deprecated.  Use resize() instead. """
+        warnings.warn("extend() will be removed in 1.1; use resize() instead", DeprecationWarning)
+        self.resize(shape)
 
-        Note that the new shape must be compatible with the "maxshape"
-        argument provided when the dataset was created.  Also, the rank of
-        the dataset cannot be changed.
+    def resize(self, size, axis=None):
+        """ Resize the dataset, or the specified axis.
+
+        Argument should be either a new shape tuple, or an integer.  The rank
+        of the dataset cannot be changed.  Keep in mind the dataset can only
+        be resized up to the maximum dimensions provided when it was created.
+
+        Beware; if the array has more than one dimension, the indices of
+        existing data can change.
         """
+        if axis is not None:
+            if not axis >=0 and axis < self.id.rank:
+                raise ValueError("Invalid axis (0 to %s allowed)" % self.id.rank-1)
+            try:
+                newlen = int(size)
+            except TypeError:
+                raise TypeError("Argument must be a single int if axis is specified")
+            size = list(self.shape)
+            size[axis] = newlen
+            size = tuple(size)
+
         with self._lock:
-            self.id.extend(shape)
+            self.id.set_extent(size)
 
     def __len__(self):
         """ The size of the first axis.  TypeError if scalar.
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index ea569d7..ba6d163 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -317,7 +317,7 @@ class TestDataset(HDF5TestCase):
             self.assert_(numpy.all(dset[:] == x))
             del self.f['TEST_DATA']
 
-    def test_Dataset_extend(self):
+    def test_Dataset_resize(self):
         """ Test extending datasets """
 
         self.output("")
@@ -344,13 +344,13 @@ class TestDataset(HDF5TestCase):
             for final_shape in final_shapes[shape]:
                 self.output("    Extending %s to %s" % (shape, final_shape))
                 newarr = numpy.arange(numpy.product(final_shape)).reshape(final_shape)
-                ds.extend(final_shape)
+                ds.resize(final_shape)
                 ds[...] = newarr
                 self.assertEqual(ds.shape, final_shape)
                 self.assert_(numpy.all(ds[...] == newarr))
 
             for illegal_shape in illegal_shapes[shape]:
-                self.assertRaises(H5Error, ds.extend, illegal_shape)
+                self.assertRaises(H5Error, ds.resize, illegal_shape)
 
     def test_Dataset_len_iter(self):
         """ Test new and old len(), iteration over rows """

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git