[h5py] 79/455: Highlevel fixes, change Dataset init, fix H5P segfault

Thu Jul 2 18:19:19 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit f0600f7ce240d9587ddffcd6512312aa35e13dcc
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Tue Jul 22 05:22:02 2008 +0000

    Highlevel fixes, change Dataset __init__, fix H5P segfault
---
 CHANGES.txt                  |  24 ++++++++--
 h5py/h5p.pxd                 |   2 +-
 h5py/h5p.pyx                 |  32 +++++++++++--
 h5py/highlevel.py            | 107 ++++++++++++++++++++++++++-----------------
 h5py/tests/test_highlevel.py |  25 +++++++---
 h5py/utils_hl.py             |   8 ++--
 6 files changed, 134 insertions(+), 64 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index e98121d..52ee829 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,15 +1,31 @@
+Version 0.2.2
+=============
+
+Low-level
+---------
+  - Fix segfault related to H5Pget_filter_by_idx
+
+High-level
+----------
+  - Fix Dataset single-value indexing
+  - Fix chunk values not working for Dataset creation
+  - Rearrange Dataset __init__ keywords to better match Numpy convention
+  - Add unit tests for extended Dataset options
+  - Catch all exceptions in highlevel __str__ methods
+  - Add names property for Group, AttributeManager objects
+
 Version 0.2.1
 =============
 
-General:
---------
+General
+-------
   - Moved version info into h5py.h5 module
   - Stub added for 1.8 API conditional compilation
   - Rewrote unit test framework to correctly use unittest methods
   - Remove VERSION.txt, add CHANGES.txt
 
-Low-level:
-----------
+Low-level
+---------
   - Normalize keyword naming for property list arguments
   - Change h5g.GroupID Python extensions to special methods
   - Additional property list methods
diff --git a/h5py/h5p.pxd b/h5py/h5p.pxd
index 14e9330..e23146f 100644
--- a/h5py/h5p.pxd
+++ b/h5py/h5p.pxd
@@ -177,7 +177,7 @@ cdef extern from "hdf5.h":
                               unsigned int *cd_values, size_t namelen, char name[]  ) except *
   herr_t        H5Pget_filter_by_id( hid_t plist_id, H5Z_filter_t filter, 
                                      unsigned int *flags, size_t *cd_nelmts, 
-                                     unsigned int cd_values[], size_t namelen, char name[]  ) except *
+                                     unsigned int cd_values[], size_t namelen, char name[]) except *
   herr_t        H5Pmodify_filter(hid_t plist, H5Z_filter_t filter, unsigned int flags,
                                  size_t cd_nelmts, unsigned int cd_values[]  ) except *
   herr_t        H5Premove_filter(hid_t plist, H5Z_filter_t filter  ) except *
diff --git a/h5py/h5p.pyx b/h5py/h5p.pyx
index 99c3487..52d49c8 100644
--- a/h5py/h5p.pyx
+++ b/h5py/h5p.pyx
@@ -414,7 +414,8 @@ cdef class PropDCID(PropInstanceID):
         if filter_idx < 0:
             raise ValueError("Filter index must be a non-negative integer")
 
-        filter_code = <int>H5Pget_filter(self.id, filter_idx, &flags, &nelements, cd_values, 256, name)
+        filter_code = <int>H5Pget_filter(self.id, filter_idx, &flags,
+                                         &nelements, cd_values, 256, name)
         name[256] = c'\0'  # in case it's > 256 chars
 
         vlist = []
@@ -423,11 +424,25 @@ cdef class PropDCID(PropInstanceID):
 
         return (filter_code, flags, tuple(vlist), name)
 
+    def _has_filter(self, int filter_code):
+        """ (INT filter_code)
+
+            Slow & stupid method to determine if a filter is used in this
+            property list.  Used because the HDF5 function H5Pget_filter_by_id
+            is broken.
+        """
+        cdef int nfilters
+        nfilters = self.get_nfilters()
+        for i from 0<=i<nfilters:
+            if self.get_filter(i)[0] == filter_code:
+                return True
+        return False
+
     def get_filter_by_id(self, int filter_code):
-        """ (INT filter_code) => TUPLE filter_info
+        """ (INT filter_code) => TUPLE filter_info or None
 
             Get information about a filter, identified by its code (one
-            of h5z.FILTER_*)
+            of h5z.FILTER_*).  If the filter doesn't exist, returns None.
 
             Tuple entries are:
             0: UINT flags (h5z.FLAG_*)
@@ -439,11 +454,18 @@ cdef class PropDCID(PropInstanceID):
         cdef size_t nelements
         cdef unsigned int cd_values[16]
         cdef char name[257]
+        cdef herr_t retval
         cdef int i
         nelements = 16 # HDF5 library actually complains if this is too big.
 
-        H5Pget_filter_by_id(self.id, <H5Z_filter_t>filter_code, &flags, &nelements, cd_values, 256, name)
-        name[256] = c'\0'  # in case it's > 256 chars
+        if not self._has_filter(filter_code):
+            return None
+
+        retval = H5Pget_filter_by_id(self.id, <H5Z_filter_t>filter_code,
+                                     &flags, &nelements, cd_values, 256, name)
+        assert nelements <= 16
+
+        name[256] = c'\0'  # In case HDF5 doesn't terminate it properly
 
         vlist = []
         for i from 0<=i<nelements:
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index a81c362..398e097 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -106,6 +106,9 @@ class Group(HLObject):
         the AttributeManager class.
     """
 
+    names = property(lambda self: tuple(self),
+        doc = "Tuple of group member names")
+
     def __init__(self, parent_object, name, create=False):
         """ Create a new Group object, from a parent object and a name.
 
@@ -208,20 +211,23 @@ class Group(HLObject):
         return Group(self, name, create=True)
 
     def create_dataset(self, name, *args, **kwds):
-        """ Create and return a dataset.  Arguments, in order:
-
-            You must specify either "data", or both "type" and "shape".
-             data:     Numpy array from which the dataset is constructed
-             dtype:    Numpy dtype giving the datatype
-             shape:    Numpy-style shape tuple giving the dataspace
-
-            Additional keyword options (* is default):
-             chunks:        Tuple of chunk dimensions or None*
-             compression:   DEFLATE (gzip) compression level, int or None*
-             shuffle:       Use the shuffle filter (needs compression) T/F*
-             fletcher32:    Enable Fletcher32 error detection T/F*
+        """ Create and return a new dataset, attached to this group.
+
+            create_dataset(name, shape, [dtype=<Numpy dtype>], **kwds)
+            create_dataset(name, data=<Numpy array>, **kwds)
+
+            If "dtype" is not specified, the default is single-precision
+            floating point, with native byte order ("=f4").
+
+            Creating a dataset will fail if another of the same name already 
+            exists. Additional keywords are:
+
+            chunks:        Tuple of chunk dimensions or None*
+            compression:   DEFLATE (gzip) compression level, int or None*
+            shuffle:       Use the shuffle filter? (requires compression) T/F*
+            fletcher32:    Enable Fletcher32 error detection? T/F*
         """
-        return Dataset(self, name, **kwds)
+        return Dataset(self, name, *args, **kwds)
 
     def desc(self):
         """ Extended (multi-line) description of this group, as a string.
@@ -241,9 +247,10 @@ class Group(HLObject):
         return outstr
         
     def __str__(self):
-        if self.id._valid:
+        try:
             return 'Group "%s" (%d members)' % (hbasename(self.name), len(self))
-        return "Closed group"
+        except:
+            return "Invalid group"
 
 
 class File(Group):
@@ -336,9 +343,10 @@ class File(Group):
             self.close()
         
     def __str__(self):
-        if self.id._valid:
+        try:
             return 'File "%s", root members: %s' % (self.name, ', '.join(['"%s"' % name for name in self]))
-        return "Closed file (%s)" % self.name
+        except:
+            return "Invalid file"
 
     def browse(self, dict=None):
         """ Open a command line shell to browse this file. If dict is not
@@ -408,54 +416,61 @@ class Dataset(HLObject):
         doc = "The entire dataset, as an array or scalar depending on the shape.")
 
     def __init__(self, group, name,
-                    data=None, dtype=None, shape=None, 
+                    shape=None, dtype=None, data=None,
                     chunks=None, compression=None, shuffle=False, fletcher32=False):
         """ Construct a Dataset object.  You might find it easier to use the
             Group methods: Group["name"] or Group.create_dataset().
 
             There are two modes of operation for this constructor:
 
-            1.  Open an existing dataset
-                If you only supply the required parameters "group" and "name",
-                the object will attempt to open an existing HDF5 dataset.
+            1.  Open an existing dataset:
+                  Dataset(group, name)
 
-            2.  Create a dataset
-                You supply "group", "name" and either:
-                - Keyword "data"; a Numpy array from which the shape, dtype and
-                    initial contents will be determined.
-                - Both "dtype" (Numpy dtype object) and "shape" (tuple of 
-                    dimensions).
+            2.  Create a dataset:
+                  Dataset(group, name, shape, [dtype=<Numpy dtype>], **kwds)
+                or
+                  Dataset(group, name, data=<Numpy array>, **kwds)
+
+                  If "dtype" is not specified, the default is single-precision
+                  floating point, with native byte order ("=f4").
 
             Creating a dataset will fail if another of the same name already 
             exists.  Also, chunks/compression/shuffle/fletcher32 may only be
             specified when creating a dataset.
 
-            Creation keywords (* is default):
+            Creation keywords (* is default); "chunks" is required for all:
 
             chunks:        Tuple of chunk dimensions or None*
             compression:   DEFLATE (gzip) compression level, int or None*
             shuffle:       Use the shuffle filter? (requires compression) T/F*
             fletcher32:    Enable Fletcher32 error detection? T/F*
         """
-        if data is None and dtype is None and shape is None:
+        if data is None and shape is None:
             if any((data,dtype,shape,chunks,compression,shuffle,fletcher32)):
                 raise ValueError('You cannot specify keywords when opening a dataset.')
             self.id = h5d.open(group.id, name)
         else:
-            if ((data is None) and (shape is None and dtype is None)) or \
-               ((data is not None) and (shape or dtype)):
-                raise ValueError("Either data or both shape and dtype must be specified.")
+            if ((data is None) and (shape is None)) or \
+               ((data is not None) and (shape is not None)):
+                raise ValueError("*Either* data *or* the shape must be specified.")
             
             if data is not None:
                 shape = data.shape
                 dtype = data.dtype
+            else:
+                if dtype is None:
+                    dtype = "=f4"
+            
+            dtype = numpy.dtype(dtype)
 
             plist = h5p.create(h5p.DATASET_CREATE)
             if chunks:
-                plist.set_chunks(chunks)
+                plist.set_chunk(chunks)
             if shuffle:
                 plist.set_shuffle()
-            if compression:
+            if compression is not None:
+                if compression is True:  # prevent accidental abuse
+                    compression = 6
                 plist.set_deflate(compression)
             if fletcher32:
                 plist.set_fletcher32()
@@ -542,6 +557,8 @@ class Dataset(HLObject):
         if len(names) != 0:
             raise ValueError("Field name selections are not allowed for write.")
 
+        val = numpy.array(val, dtype=self.dtype)
+
         if count != val.shape:
             # Allow assignments (1,10) => (10,)
             if numpy.product(count) != numpy.product(val.shape):
@@ -556,10 +573,11 @@ class Dataset(HLObject):
         self.id.write(mspace, fspace, numpy.array(val))
 
     def __str__(self):
-        if self.id._valid:
+        try:
             return 'Dataset "%s": %s %s' % (hbasename(self.name),
                     str(self.shape), repr(self.dtype))
-        return "Closed dataset"
+        except:
+            return "Invalid dataset"
 
 class AttributeManager(object):
 
@@ -586,6 +604,9 @@ class AttributeManager(object):
         understand.
     """
 
+    names = property(lambda self: tuple(self),
+        doc = "Tuple of attribute names")
+
     def __init__(self, parent):
         """ Private constructor; you should not create these.
         """
@@ -649,16 +670,15 @@ class AttributeManager(object):
         return h5a.py_exists(self.id, name)
 
     def __str__(self):
-        if self.id._valid:
+        try:
             rstr = 'Attributes of "%s": ' % hbasename(h5i.get_name(self.id))
             if len(self) == 0:
                 rstr += '(none)'
             else:
                 rstr += ', '.join(['"%s"' % x for x in self])
-        else:
-            rstr = "Attributes of closed object."
-
-        return rstr
+            return rstr
+        except:
+            return "Invalid attributes object"
 
     def __repr__(self):
         return str(self)
@@ -690,9 +710,10 @@ class Datatype(HLObject):
         self._attrs = AttributeManager(self)
 
     def __str__(self):
-        if self.id._valid:
+        try:
             return "Named datatype object (%s)" % str(self.dtype)
-        return "Closed datatype object"
+        except:
+            return "Invalid datatype object"
 
 
 
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index 6a0b774..f64fc34 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -36,13 +36,11 @@ TYPES1 = \
 
 TYPES2 = ["|S1", "|S2", "|S33", "|V1", "|V2", "|V33"]
 
-TYPES3 = [[(x, numpy.dtype(x)) for x in TYPES1]]
 
-TYPES = TYPES1 + TYPES2 + TYPES3
+TYPES = TYPES1 + TYPES2
 
 SHAPES = [(), (1,), (10,5), (1,10), (10,1), (100,1,100), (51,2,1025)]
 
-
  
 class TestFile(unittest.TestCase):
 
@@ -165,11 +163,14 @@ class TestDataset(unittest.TestCase):
         self.f.close()
         os.unlink(self.fname)
 
-    def test_Dataset_create_simple(self):
+    def test_Dataset_create(self):
         
         print ''
 
-        for shape in SHAPES:
+        shapes = [(), (1,), (10,5), (1,10), (10,1), (100,1,100), (51,2,1025)]
+        chunks = [None, (1,), (10,1), (1,1),  (1,1),  (50,1,100), (51,2,25)]
+
+        for shape, chunk in zip(shapes, chunks):
             for dt in TYPES:
                 print "    Creating %.20s %.40s" % (shape, dt)
                 dt = numpy.dtype(dt)
@@ -177,6 +178,16 @@ class TestDataset(unittest.TestCase):
                 self.assertEqual(d.shape, shape)
                 self.assertEqual(d.dtype, dt)
                 del self.f["NewDataset"]
+
+                if chunk is not None:
+                    print "        With chunk %s" % (chunk,)
+                    d = Dataset(self.f, "NewDataset", dtype=dt, shape=shape,
+                                chunks=chunk, shuffle=True, compression=6,
+                                fletcher32=True)
+                    self.assertEqual(d.shape, shape)
+                    self.assertEqual(d.dtype, dt)
+                    del self.f["NewDataset"]
+             
                 if 'V' not in dt.kind:
                     srcarr = numpy.ones(shape, dtype=dt)
                     d = Dataset(self.f, "NewDataset", data=srcarr)
@@ -184,7 +195,7 @@ class TestDataset(unittest.TestCase):
                     self.assertEqual(d.dtype, dt)
                     self.assert_(numpy.all(d.value == srcarr))
                     del self.f["NewDataset"]               
-            
+
     def test_Dataset_slicing(self):
 
         print ''
@@ -194,7 +205,7 @@ class TestDataset(unittest.TestCase):
         slices += [ s[9,9,49], s[9,:,49], s[9,:,:] ]
         slices += [ s[0, ..., 49], s[...], s[..., 49], s[9,...] ]
         slices += [ s[0:7:2,0:9:3,15:43:5], s[2:8:2,...] ]
-
+        slices += [ s[0], s[1], s[9], s[:] ] # Numpy convention
 
         for dt in TYPES1:
 
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
index acdfa01..d0b761e 100644
--- a/h5py/utils_hl.py
+++ b/h5py/utils_hl.py
@@ -53,14 +53,14 @@ def slicer(shape, args):
     if len(slices) == 0:
         slices = [Ellipsis]
 
+    # Hack to allow Numpy-style row indexing
+    if len(slices) == 1 and slices[0] != Ellipsis:
+        slices.append(Ellipsis)
+
     start = []
     count = []
     stride = []
 
-    # Hack to allow Numpy-style row indexing
-    if len(slices) == 1 and slices[0] != Ellipsis:
-        args.append(Ellipsis)
-
     # Expand integers and ellipsis arguments to slices
     for dim, arg in enumerate(slices):
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



[h5py] 79/455: Highlevel fixes, change Dataset __init__, fix H5P segfault

[h5py] 79/455: Highlevel fixes, change Dataset init, fix H5P segfault