[h5py] 79/455: Highlevel fixes, change Dataset __init__, fix H5P segfault
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:19 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit f0600f7ce240d9587ddffcd6512312aa35e13dcc
Author: andrewcollette <andrew.collette at gmail.com>
Date: Tue Jul 22 05:22:02 2008 +0000
Highlevel fixes, change Dataset __init__, fix H5P segfault
---
CHANGES.txt | 24 ++++++++--
h5py/h5p.pxd | 2 +-
h5py/h5p.pyx | 32 +++++++++++--
h5py/highlevel.py | 107 ++++++++++++++++++++++++++-----------------
h5py/tests/test_highlevel.py | 25 +++++++---
h5py/utils_hl.py | 8 ++--
6 files changed, 134 insertions(+), 64 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index e98121d..52ee829 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,15 +1,31 @@
+Version 0.2.2
+=============
+
+Low-level
+---------
+ - Fix segfault related to H5Pget_filter_by_idx
+
+High-level
+----------
+ - Fix Dataset single-value indexing
+ - Fix chunk values not working for Dataset creation
+ - Rearrange Dataset __init__ keywords to better match Numpy convention
+ - Add unit tests for extended Dataset options
+ - Catch all exceptions in highlevel __str__ methods
+ - Add names property for Group, AttributeManager objects
+
Version 0.2.1
=============
-General:
---------
+General
+-------
- Moved version info into h5py.h5 module
- Stub added for 1.8 API conditional compilation
- Rewrote unit test framework to correctly use unittest methods
- Remove VERSION.txt, add CHANGES.txt
-Low-level:
-----------
+Low-level
+---------
- Normalize keyword naming for property list arguments
- Change h5g.GroupID Python extensions to special methods
- Additional property list methods
diff --git a/h5py/h5p.pxd b/h5py/h5p.pxd
index 14e9330..e23146f 100644
--- a/h5py/h5p.pxd
+++ b/h5py/h5p.pxd
@@ -177,7 +177,7 @@ cdef extern from "hdf5.h":
unsigned int *cd_values, size_t namelen, char name[] ) except *
herr_t H5Pget_filter_by_id( hid_t plist_id, H5Z_filter_t filter,
unsigned int *flags, size_t *cd_nelmts,
- unsigned int cd_values[], size_t namelen, char name[] ) except *
+ unsigned int cd_values[], size_t namelen, char name[]) except *
herr_t H5Pmodify_filter(hid_t plist, H5Z_filter_t filter, unsigned int flags,
size_t cd_nelmts, unsigned int cd_values[] ) except *
herr_t H5Premove_filter(hid_t plist, H5Z_filter_t filter ) except *
diff --git a/h5py/h5p.pyx b/h5py/h5p.pyx
index 99c3487..52d49c8 100644
--- a/h5py/h5p.pyx
+++ b/h5py/h5p.pyx
@@ -414,7 +414,8 @@ cdef class PropDCID(PropInstanceID):
if filter_idx < 0:
raise ValueError("Filter index must be a non-negative integer")
- filter_code = <int>H5Pget_filter(self.id, filter_idx, &flags, &nelements, cd_values, 256, name)
+ filter_code = <int>H5Pget_filter(self.id, filter_idx, &flags,
+ &nelements, cd_values, 256, name)
name[256] = c'\0' # in case it's > 256 chars
vlist = []
@@ -423,11 +424,25 @@ cdef class PropDCID(PropInstanceID):
return (filter_code, flags, tuple(vlist), name)
+ def _has_filter(self, int filter_code):
+ """ (INT filter_code)
+
+ Slow & stupid method to determine if a filter is used in this
+ property list. Used because the HDF5 function H5Pget_filter_by_id
+ is broken.
+ """
+ cdef int nfilters
+ nfilters = self.get_nfilters()
+ for i from 0<=i<nfilters:
+ if self.get_filter(i)[0] == filter_code:
+ return True
+ return False
+
def get_filter_by_id(self, int filter_code):
- """ (INT filter_code) => TUPLE filter_info
+ """ (INT filter_code) => TUPLE filter_info or None
Get information about a filter, identified by its code (one
- of h5z.FILTER_*)
+ of h5z.FILTER_*). If the filter doesn't exist, returns None.
Tuple entries are:
0: UINT flags (h5z.FLAG_*)
@@ -439,11 +454,18 @@ cdef class PropDCID(PropInstanceID):
cdef size_t nelements
cdef unsigned int cd_values[16]
cdef char name[257]
+ cdef herr_t retval
cdef int i
nelements = 16 # HDF5 library actually complains if this is too big.
- H5Pget_filter_by_id(self.id, <H5Z_filter_t>filter_code, &flags, &nelements, cd_values, 256, name)
- name[256] = c'\0' # in case it's > 256 chars
+ if not self._has_filter(filter_code):
+ return None
+
+ retval = H5Pget_filter_by_id(self.id, <H5Z_filter_t>filter_code,
+ &flags, &nelements, cd_values, 256, name)
+ assert nelements <= 16
+
+ name[256] = c'\0' # In case HDF5 doesn't terminate it properly
vlist = []
for i from 0<=i<nelements:
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index a81c362..398e097 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -106,6 +106,9 @@ class Group(HLObject):
the AttributeManager class.
"""
+ names = property(lambda self: tuple(self),
+ doc = "Tuple of group member names")
+
def __init__(self, parent_object, name, create=False):
""" Create a new Group object, from a parent object and a name.
@@ -208,20 +211,23 @@ class Group(HLObject):
return Group(self, name, create=True)
def create_dataset(self, name, *args, **kwds):
- """ Create and return a dataset. Arguments, in order:
-
- You must specify either "data", or both "type" and "shape".
- data: Numpy array from which the dataset is constructed
- dtype: Numpy dtype giving the datatype
- shape: Numpy-style shape tuple giving the dataspace
-
- Additional keyword options (* is default):
- chunks: Tuple of chunk dimensions or None*
- compression: DEFLATE (gzip) compression level, int or None*
- shuffle: Use the shuffle filter (needs compression) T/F*
- fletcher32: Enable Fletcher32 error detection T/F*
+ """ Create and return a new dataset, attached to this group.
+
+ create_dataset(name, shape, [dtype=<Numpy dtype>], **kwds)
+ create_dataset(name, data=<Numpy array>, **kwds)
+
+ If "dtype" is not specified, the default is single-precision
+ floating point, with native byte order ("=f4").
+
+ Creating a dataset will fail if another of the same name already
+ exists. Additional keywords are:
+
+ chunks: Tuple of chunk dimensions or None*
+ compression: DEFLATE (gzip) compression level, int or None*
+ shuffle: Use the shuffle filter? (requires compression) T/F*
+ fletcher32: Enable Fletcher32 error detection? T/F*
"""
- return Dataset(self, name, **kwds)
+ return Dataset(self, name, *args, **kwds)
def desc(self):
""" Extended (multi-line) description of this group, as a string.
@@ -241,9 +247,10 @@ class Group(HLObject):
return outstr
def __str__(self):
- if self.id._valid:
+ try:
return 'Group "%s" (%d members)' % (hbasename(self.name), len(self))
- return "Closed group"
+ except:
+ return "Invalid group"
class File(Group):
@@ -336,9 +343,10 @@ class File(Group):
self.close()
def __str__(self):
- if self.id._valid:
+ try:
return 'File "%s", root members: %s' % (self.name, ', '.join(['"%s"' % name for name in self]))
- return "Closed file (%s)" % self.name
+ except:
+ return "Invalid file"
def browse(self, dict=None):
""" Open a command line shell to browse this file. If dict is not
@@ -408,54 +416,61 @@ class Dataset(HLObject):
doc = "The entire dataset, as an array or scalar depending on the shape.")
def __init__(self, group, name,
- data=None, dtype=None, shape=None,
+ shape=None, dtype=None, data=None,
chunks=None, compression=None, shuffle=False, fletcher32=False):
""" Construct a Dataset object. You might find it easier to use the
Group methods: Group["name"] or Group.create_dataset().
There are two modes of operation for this constructor:
- 1. Open an existing dataset
- If you only supply the required parameters "group" and "name",
- the object will attempt to open an existing HDF5 dataset.
+ 1. Open an existing dataset:
+ Dataset(group, name)
- 2. Create a dataset
- You supply "group", "name" and either:
- - Keyword "data"; a Numpy array from which the shape, dtype and
- initial contents will be determined.
- - Both "dtype" (Numpy dtype object) and "shape" (tuple of
- dimensions).
+ 2. Create a dataset:
+ Dataset(group, name, shape, [dtype=<Numpy dtype>], **kwds)
+ or
+ Dataset(group, name, data=<Numpy array>, **kwds)
+
+ If "dtype" is not specified, the default is single-precision
+ floating point, with native byte order ("=f4").
Creating a dataset will fail if another of the same name already
exists. Also, chunks/compression/shuffle/fletcher32 may only be
specified when creating a dataset.
- Creation keywords (* is default):
+ Creation keywords (* is default); "chunks" is required for all:
chunks: Tuple of chunk dimensions or None*
compression: DEFLATE (gzip) compression level, int or None*
shuffle: Use the shuffle filter? (requires compression) T/F*
fletcher32: Enable Fletcher32 error detection? T/F*
"""
- if data is None and dtype is None and shape is None:
+ if data is None and shape is None:
if any((data,dtype,shape,chunks,compression,shuffle,fletcher32)):
raise ValueError('You cannot specify keywords when opening a dataset.')
self.id = h5d.open(group.id, name)
else:
- if ((data is None) and (shape is None and dtype is None)) or \
- ((data is not None) and (shape or dtype)):
- raise ValueError("Either data or both shape and dtype must be specified.")
+ if ((data is None) and (shape is None)) or \
+ ((data is not None) and (shape is not None)):
+ raise ValueError("*Either* data *or* the shape must be specified.")
if data is not None:
shape = data.shape
dtype = data.dtype
+ else:
+ if dtype is None:
+ dtype = "=f4"
+
+ dtype = numpy.dtype(dtype)
plist = h5p.create(h5p.DATASET_CREATE)
if chunks:
- plist.set_chunks(chunks)
+ plist.set_chunk(chunks)
if shuffle:
plist.set_shuffle()
- if compression:
+ if compression is not None:
+ if compression is True: # prevent accidental abuse
+ compression = 6
plist.set_deflate(compression)
if fletcher32:
plist.set_fletcher32()
@@ -542,6 +557,8 @@ class Dataset(HLObject):
if len(names) != 0:
raise ValueError("Field name selections are not allowed for write.")
+ val = numpy.array(val, dtype=self.dtype)
+
if count != val.shape:
# Allow assignments (1,10) => (10,)
if numpy.product(count) != numpy.product(val.shape):
@@ -556,10 +573,11 @@ class Dataset(HLObject):
self.id.write(mspace, fspace, numpy.array(val))
def __str__(self):
- if self.id._valid:
+ try:
return 'Dataset "%s": %s %s' % (hbasename(self.name),
str(self.shape), repr(self.dtype))
- return "Closed dataset"
+ except:
+ return "Invalid dataset"
class AttributeManager(object):
@@ -586,6 +604,9 @@ class AttributeManager(object):
understand.
"""
+ names = property(lambda self: tuple(self),
+ doc = "Tuple of attribute names")
+
def __init__(self, parent):
""" Private constructor; you should not create these.
"""
@@ -649,16 +670,15 @@ class AttributeManager(object):
return h5a.py_exists(self.id, name)
def __str__(self):
- if self.id._valid:
+ try:
rstr = 'Attributes of "%s": ' % hbasename(h5i.get_name(self.id))
if len(self) == 0:
rstr += '(none)'
else:
rstr += ', '.join(['"%s"' % x for x in self])
- else:
- rstr = "Attributes of closed object."
-
- return rstr
+ return rstr
+ except:
+ return "Invalid attributes object"
def __repr__(self):
return str(self)
@@ -690,9 +710,10 @@ class Datatype(HLObject):
self._attrs = AttributeManager(self)
def __str__(self):
- if self.id._valid:
+ try:
return "Named datatype object (%s)" % str(self.dtype)
- return "Closed datatype object"
+ except:
+ return "Invalid datatype object"
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index 6a0b774..f64fc34 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -36,13 +36,11 @@ TYPES1 = \
TYPES2 = ["|S1", "|S2", "|S33", "|V1", "|V2", "|V33"]
-TYPES3 = [[(x, numpy.dtype(x)) for x in TYPES1]]
-TYPES = TYPES1 + TYPES2 + TYPES3
+TYPES = TYPES1 + TYPES2
SHAPES = [(), (1,), (10,5), (1,10), (10,1), (100,1,100), (51,2,1025)]
-
class TestFile(unittest.TestCase):
@@ -165,11 +163,14 @@ class TestDataset(unittest.TestCase):
self.f.close()
os.unlink(self.fname)
- def test_Dataset_create_simple(self):
+ def test_Dataset_create(self):
print ''
- for shape in SHAPES:
+ shapes = [(), (1,), (10,5), (1,10), (10,1), (100,1,100), (51,2,1025)]
+ chunks = [None, (1,), (10,1), (1,1), (1,1), (50,1,100), (51,2,25)]
+
+ for shape, chunk in zip(shapes, chunks):
for dt in TYPES:
print " Creating %.20s %.40s" % (shape, dt)
dt = numpy.dtype(dt)
@@ -177,6 +178,16 @@ class TestDataset(unittest.TestCase):
self.assertEqual(d.shape, shape)
self.assertEqual(d.dtype, dt)
del self.f["NewDataset"]
+
+ if chunk is not None:
+ print " With chunk %s" % (chunk,)
+ d = Dataset(self.f, "NewDataset", dtype=dt, shape=shape,
+ chunks=chunk, shuffle=True, compression=6,
+ fletcher32=True)
+ self.assertEqual(d.shape, shape)
+ self.assertEqual(d.dtype, dt)
+ del self.f["NewDataset"]
+
if 'V' not in dt.kind:
srcarr = numpy.ones(shape, dtype=dt)
d = Dataset(self.f, "NewDataset", data=srcarr)
@@ -184,7 +195,7 @@ class TestDataset(unittest.TestCase):
self.assertEqual(d.dtype, dt)
self.assert_(numpy.all(d.value == srcarr))
del self.f["NewDataset"]
-
+
def test_Dataset_slicing(self):
print ''
@@ -194,7 +205,7 @@ class TestDataset(unittest.TestCase):
slices += [ s[9,9,49], s[9,:,49], s[9,:,:] ]
slices += [ s[0, ..., 49], s[...], s[..., 49], s[9,...] ]
slices += [ s[0:7:2,0:9:3,15:43:5], s[2:8:2,...] ]
-
+ slices += [ s[0], s[1], s[9], s[:] ] # Numpy convention
for dt in TYPES1:
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
index acdfa01..d0b761e 100644
--- a/h5py/utils_hl.py
+++ b/h5py/utils_hl.py
@@ -53,14 +53,14 @@ def slicer(shape, args):
if len(slices) == 0:
slices = [Ellipsis]
+ # Hack to allow Numpy-style row indexing
+ if len(slices) == 1 and slices[0] != Ellipsis:
+ slices.append(Ellipsis)
+
start = []
count = []
stride = []
- # Hack to allow Numpy-style row indexing
- if len(slices) == 1 and slices[0] != Ellipsis:
- args.append(Ellipsis)
-
# Expand integers and ellipsis arguments to slices
for dim, arg in enumerate(slices):
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list