[h5py] 14/455: Checks for Numpy array I/O

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:12 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit e536176e7458814f7ec998c96ebc4b22c7303488
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Wed May 7 23:12:55 2008 +0000

    Checks for Numpy array I/O
---
 INSTALL.txt            | 25 ---------------
 h5py/h5.pxd            |  2 --
 h5py/h5.pyx            |  1 -
 h5py/h5a.pyx           | 46 +++++++++++++++++++++------
 h5py/h5d.pyx           | 44 +++++++++++++++++---------
 h5py/numpy.pxd         |  5 +++
 h5py/tests/common.py   |  7 +++-
 h5py/tests/test_h5a.py | 11 ++++++-
 h5py/utils.c           | 86 ++++++++++++++++++++++++++++++++++++++++++--------
 h5py/utils.h           |  6 ++--
 h5py/utils.pxd         |  4 +++
 11 files changed, 168 insertions(+), 69 deletions(-)

diff --git a/INSTALL.txt b/INSTALL.txt
deleted file mode 100644
index 03ac269..0000000
--- a/INSTALL.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Installation of h5py
-====================
-
-Requires:
----------
-* A Unix/Linux environment
-* Python 2.5
-* Pyrex
-  http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/
-* HDF5 1.6.5 or higher (1.8 untested)
-
-Installation
-------------
-
-1.  Unpack the tarball and cd to the resulting directory
-2.  Run ``python setup.py build`` to build the package
-3.  [optional] Run ``python setup.py test`` to run unit tests
-4.  Run ``sudo python setup.py install`` to install into your main Python
-    package directory.
-
-Bugs
-----
-I expect there are many. :) You can start a ticket at h5py.googlecode.com, or
-email me ("h5py" at the domain "alfven dot org").  h5py is very new and still
-under development.
diff --git a/h5py/h5.pxd b/h5py/h5.pxd
index d649667..48f8805 100755
--- a/h5py/h5.pxd
+++ b/h5py/h5.pxd
@@ -19,7 +19,6 @@ from defs_c cimport size_t
 # Common structs and types from HDF5
 cdef extern from "hdf5.h":
 
-
   ctypedef int hid_t  # In H5Ipublic.h
   ctypedef int hbool_t
   ctypedef int herr_t
@@ -42,4 +41,3 @@ cdef extern from "hdf5.h":
   herr_t H5check_version(unsigned majnum, unsigned minnum,
                          unsigned relnum )
 
-
diff --git a/h5py/h5.pyx b/h5py/h5.pyx
index 52990e4..d460f66 100755
--- a/h5py/h5.pyx
+++ b/h5py/h5.pyx
@@ -20,7 +20,6 @@
     is imported at the top of every h5* sub-module.  Initializes the
     library and defines common version info, classes and functions.
 """
-
 from h5e cimport H5Eset_auto, H5E_walk_t, H5Ewalk, H5E_error_t, \
                       H5E_WALK_DOWNWARD
 
diff --git a/h5py/h5a.pyx b/h5py/h5a.pyx
index e4460fc..cd5a1d0 100755
--- a/h5py/h5a.pyx
+++ b/h5py/h5a.pyx
@@ -17,11 +17,14 @@
 """
 
 # Pyrex compile-time imports
+cimport h5
 from defs_c   cimport malloc, free
 from h5  cimport herr_t, hid_t
 from h5p cimport H5P_DEFAULT
 from h5t cimport H5Tclose
-from numpy cimport ndarray, import_array
+from h5s cimport H5Sclose
+from numpy cimport ndarray, import_array, PyArray_DATA
+from utils cimport check_numpy_read, check_numpy_write
 
 # Runtime imports
 import h5
@@ -95,42 +98,67 @@ def read(hid_t attr_id, ndarray arr_obj):
         
         Read the attribute data into the given Numpy array.  Note that the 
         Numpy array must have the same shape as the HDF5 attribute, and a 
-        conversion-compatible datatype.  It must also be writable and
-        C-contiguous.  This is not currently checked.
+        conversion-compatible datatype.
+
+        The Numpy array must be writable, C-contiguous and own its data.  If
+        this is not the case, ValueError will be raised and the read will fail.
     """
     cdef hid_t mtype_id
+    cdef hid_t space_id
     cdef herr_t retval
+    cdef int array_ok
     mtype_id = 0
+    space_id = 0
 
     try:
         mtype_id = h5t.py_dtype_to_h5t(arr_obj.dtype)
-        retval = H5Aread(attr_id, mtype_id, <void*>arr_obj.data)
+        space_id = get_space(attr_id)
+        array_ok = check_numpy_write(arr_obj, space_id)
+        if array_ok <= 0:
+            raise ValueError("Numpy array is not set up correctly.")
+
+        retval = H5Aread(attr_id, mtype_id, PyArray_DATA(arr_obj))
         if retval < 0:
             raise H5AttributeError("Error reading from attribute %d" % attr_id)
     finally:
-        if mtype_id:
+        if mtype_id != 0:
             H5Tclose(mtype_id)
+        if space_id != 0:
+            H5Sclose(space_id)
 
 def write(hid_t attr_id, ndarray arr_obj):
     """ (INT attr_id, NDARRAY arr_obj)
 
         Write the contents of a Numpy array too the attribute.  Note that the 
         Numpy array must have the same shape as the HDF5 attribute, and a 
-        conversion-compatible datatype.  The Numpy array must also be
-        C-contiguous; this is not currently checked.
+        conversion-compatible datatype.  
+
+        The Numpy array must be C-contiguous and own its data.  If this is not
+        the case, ValueError will be raised and the write will fail.
     """
     
     cdef hid_t mtype_id
+    cdef hid_t space_id
     cdef herr_t retval
+    cdef int array_ok
     mtype_id = 0
+    space_id = 0
+
     try:
         mtype_id = h5t.py_dtype_to_h5t(arr_obj.dtype)
-        retval = H5Awrite(attr_id, mtype_id, <void*>arr_obj.data)
+        space_id = get_space(attr_id)
+        array_ok = check_numpy_read(arr_obj, space_id)
+        if array_ok <= 0:
+            raise ValueError("Given Numpy array is not set up correctly.")
+
+        retval = H5Awrite(attr_id, mtype_id, PyArray_DATA(arr_obj))
         if retval < 0:
             raise H5AttributeError("Error writing to attribute %d" % attr_id)
     finally:
-        if mtype_id:
+        if mtype_id != 0:
             H5Tclose(mtype_id)
+        if space_id != 0:
+            H5Sclose(space_id)
 
 # === Attribute inspection ====================================================
 
diff --git a/h5py/h5d.pyx b/h5py/h5d.pyx
index aabbd83..d87cf6e 100755
--- a/h5py/h5d.pyx
+++ b/h5py/h5d.pyx
@@ -28,7 +28,8 @@ from h5  cimport herr_t, hid_t, size_t, hsize_t, htri_t
 from h5s cimport H5Sclose, H5S_ALL, H5S_UNLIMITED
 from h5t cimport H5Tclose
 from h5p cimport H5P_DEFAULT
-from numpy cimport ndarray, import_array
+from numpy cimport ndarray, import_array, PyArray_DATA
+from utils cimport check_numpy_read, check_numpy_write
 
 # Runtime imports
 import h5
@@ -127,28 +128,34 @@ def read(hid_t dset_id, hid_t mspace_id, hid_t fspace_id, ndarray arr_obj, hid_t
         flexibility, you can specify dataspaces for the file and the Numpy
         object. Keyword plist may be a dataset transfer property list.
 
-        It is your responsibility to ensure that the memory dataspace
-        provided is compatible with the shape of the Numpy array.  It is also
-        up to you to ensure that the Numpy array's dtype is conversion-
-        compatible with the file's datatype. 
-
-        The given Numpy array *must* be C-contiguous, writable and aligned 
-        ("NPY_BEHAVED").  This is not currently checked; anything else may
-        crash Python.
+        The provided Numpy array must be writable, C-contiguous, and own
+        its data.  If this is not the case, ValueError will be raised and the 
+        read will fail.
 
+        It is your responsibility to ensure that the memory dataspace
+        provided is compatible with the shape of the Numpy array.  Since a
+        wide variety of dataspace configurations are possible, this is not
+        checked.  You can easily crash Python by reading in data from too
+        large a dataspace.
+        
         For a friendlier version of this function, try py_read_slab().
     """
     cdef hid_t mtype_id
     cdef herr_t retval
+    cdef int array_ok
     mtype_id = 0
 
     try:
         mtype_id = h5t.py_dtype_to_h5t(arr_obj.dtype)
-        retval = H5Dread(dset_id, mtype_id, mspace_id, fspace_id, plist, <void*>arr_obj.data)
+        array_ok = check_numpy_write(arr_obj, -1)
+        if array_ok <= 0:
+            raise ValueError("Numpy array is not set up correctly.")
+
+        retval = H5Dread(dset_id, mtype_id, mspace_id, fspace_id, plist, PyArray_DATA(arr_obj))
         if retval < 0:
             raise DatasetError("Error reading from dataset %d" % dset_id)
     finally:
-        if mtype_id:
+        if mtype_id != 0:
             H5Tclose(mtype_id)
         
 def write(hid_t dset_id, hid_t mspace_id, hid_t fspace_id, ndarray arr_obj, hid_t plist=H5P_DEFAULT):
@@ -156,18 +163,25 @@ def write(hid_t dset_id, hid_t mspace_id, hid_t fspace_id, ndarray arr_obj, hid_
           INT plist=H5P_DEFAULT )
 
         Write data from a Numpy array to an HDF5 dataset. Keyword plist may be 
-        a dataset transfer property list.  All the caveats in h5d.read() apply 
-        here as well, in particular the restrictions on the data area of the 
-        Numpy array.
+        a dataset transfer property list.
+
+        The provided Numpy array must be C-contiguous, and own its data.  If 
+        this is not the case, ValueError will be raised and the read will fail.
 
         For a friendlier version of this function, try py_write_slab()
     """
     cdef hid_t mtype_id
     cdef herr_t retval
+    cdef int array_ok
     mtype_id = 0
+
     try:
         mtype_id = h5t.py_dtype_to_h5t(arr_obj.dtype)
-        retval = H5Dwrite(dset_id, mtype_id, mspace_id, fspace_id, plist, <void*>arr_obj.data)
+        array_ok = check_numpy_read(arr_obj, -1)
+        if array_ok <= 0:
+            raise ValueError("Numpy array is not set up correctly.")
+
+        retval = H5Dwrite(dset_id, mtype_id, mspace_id, fspace_id, plist, PyArray_DATA(arr_obj))
         if retval < 0:
             raise DatasetError("Error writing to dataset %d" % dset_id)
     finally:
diff --git a/h5py/numpy.pxd b/h5py/numpy.pxd
index def7164..92fefd3 100755
--- a/h5py/numpy.pxd
+++ b/h5py/numpy.pxd
@@ -96,4 +96,9 @@ cdef extern from "numpy/arrayobject.h":
   # The NumPy initialization function
   void import_array()
 
+  void* PyArray_DATA(ndarray arr)
+
+
+
+
 
diff --git a/h5py/tests/common.py b/h5py/tests/common.py
index 445ca55..cc5e558 100755
--- a/h5py/tests/common.py
+++ b/h5py/tests/common.py
@@ -13,7 +13,7 @@
 import tempfile
 import os
 import shutil
-from h5py import h5f
+from h5py import h5f, h5p
 
 def getcopy(filename):
     """ Create a temporary working copy of "filename". Return is a 2-tuple
@@ -21,7 +21,12 @@ def getcopy(filename):
     """
     newname = tempfile.mktemp('.hdf5')
     shutil.copy(filename, newname)
+
+    plist = h5p.create(h5p.CLASS_FILE_ACCESS)
+    h5p.set_fclose_degree(plist, h5f.CLOSE_STRONG)
     fid = h5f.open(newname, h5f.ACC_RDWR)
+    h5p.close(plist)
+
     return (fid, newname)
 
 def deletecopy(fid, newname):
diff --git a/h5py/tests/test_h5a.py b/h5py/tests/test_h5a.py
index d92b247..e290649 100755
--- a/h5py/tests/test_h5a.py
+++ b/h5py/tests/test_h5a.py
@@ -11,7 +11,7 @@
 #-
 
 import unittest
-from numpy import array, ndarray, dtype, all
+from numpy import array, ndarray, dtype, all, ones
 import os
 
 from h5py import h5a
@@ -50,6 +50,7 @@ class TestH5A(unittest.TestCase):
         obj = h5g.open(fid, OBJECTNAME)
         for name, (value, dt, shape) in NEW_ATTRIBUTES.iteritems():
             arr_ref = array(value, dtype=dt)
+            arr_fail = ones((15,15), dtype=dt)
 
             sid = h5s.create(h5s.CLASS_SCALAR)
             tid = h5t.py_dtype_to_h5t(dt)
@@ -57,6 +58,7 @@ class TestH5A(unittest.TestCase):
             aid = h5a.create(obj, name, tid, sid)
             self.assert_(self.is_attr(aid))
             h5a.write(aid, arr_ref)
+            self.assertRaises(ValueError, h5a.write, aid, arr_fail)
             h5a.close(aid)
 
             arr_val = h5a.py_get(obj,name)
@@ -100,12 +102,19 @@ class TestH5A(unittest.TestCase):
     def test_read(self):
         for name in ATTRIBUTES:
             value, dt, shape = ATTRIBUTES[name]
+
             aid = h5a.open_name(self.obj, name)
             arr_holder = ndarray(shape, dtype=dt)
             arr_reference = array(value, dtype=dt)
+
+            if len(shape) != 0:
+                arr_fail = ndarray((), dtype=dt)
+                self.assertRaises(ValueError, h5a.read, aid, arr_fail)
+
             h5a.read(aid, arr_holder)
             self.assert_( all(arr_holder == arr_reference),
                 errstr(arr_reference, arr_holder, 'Attr "%s"):\n' % name, ))
+
             h5a.close(aid)
         
     # h5a.write is done by test_create_write
diff --git a/h5py/utils.c b/h5py/utils.c
index b9a04dc..340ddad 100755
--- a/h5py/utils.c
+++ b/h5py/utils.c
@@ -25,19 +25,6 @@
 #include "hdf5.h"
 
 
-/* Check to make sure we can reliably copy data from this array. */
-int check_array(PyObject* arr){
-
-    if(!PyArray_Check(arr)) return 0;
-
-    if(!PyArray_ISCONTIGUOUS(arr)) return 0;
-
-    if(!PyArray_ISBEHAVED(arr)) return 0;
-
-    return 1;
-
-}
-
 /* Convert an hsize_t array to a Python tuple of long ints.
    Returns None on failure
 */
@@ -107,6 +94,79 @@ hsize_t* tuple_to_dims(PyObject* tpl){
       return NULL;
 }
 
+/* The functions
+
+    - check_numpy_write(PyObject* arr, hid_t dataspace)
+    - check_numpy_read(PyObject* arr, hid_t dataspace)
+
+   test whether or not a given array object is suitable for reading or writing.
+   If dataspace id is positive, it will be checked for compatibility with
+   the array object's shape.
+
+   Return values:
+    1:  Can read/write
+    0:  Can't read/write
+   -1:  Failed to determine (i.e. either the array or the space object is bad)
+*/
+int check_numpy(PyArrayObject* arr, hid_t space_id, int write){
+
+    int required_flags;
+    hsize_t arr_rank;
+    hsize_t space_rank;
+    hsize_t *space_dims = NULL;
+    int i;
+
+    required_flags = NPY_C_CONTIGUOUS | NPY_OWNDATA;
+    /* That's not how you spell "writable" */
+    if(write) required_flags = required_flags | NPY_WRITEABLE;  
+
+    int retval = 0;  /* Default = not OK */
+
+    if(!(arr->flags & required_flags)) goto out;
+
+    if(space_id > 0){
+
+        arr_rank = arr->nd;
+        space_rank = H5Sget_simple_extent_ndims(space_id);
+
+        if(space_rank < 0) goto failed;
+        if( arr_rank != space_rank) goto out;
+
+        space_dims = (hsize_t*)malloc(sizeof(hsize_t)*space_rank);
+        space_rank = H5Sget_simple_extent_dims(space_id, space_dims, NULL);
+        if(space_rank < 0) goto failed;
+
+        for(i=0; i<space_rank; i++){
+            if(write){
+                if(PyArray_DIM(arr,i) < space_dims[i]) goto out;
+            } else {
+                if(PyArray_DIM(arr,i) > space_dims[i]) goto out;
+            }
+        }
+
+    }
+
+    retval = 1;  /* got here == success */
+
+  out:
+    if(space_dims != NULL) free(space_dims);
+    return retval; 
+
+  failed:
+    /* could optionally print an error message */
+    if(space_dims != NULL) free(space_dims);
+    return -1;
+}
+
+int check_numpy_write(PyArrayObject* arr, hid_t space_id){
+    return check_numpy(arr, space_id, 1);
+}
+
+int check_numpy_read(PyArrayObject* arr, hid_t space_id){
+    return check_numpy(arr, space_id, 0);
+}
+
+
 /* Rewritten versions of create_ieee_complex64/128 from Pytables, to support 
    standard array-interface typecodes and variable names for real/imag parts.  
    Also removed unneeded datatype copying.
diff --git a/h5py/utils.h b/h5py/utils.h
index 5cf78e8..5f58027 100755
--- a/h5py/utils.h
+++ b/h5py/utils.h
@@ -16,8 +16,9 @@
    distribution root directory.
 */
 
-
+#include "Python.h"
 #include "hdf5.h"
+#include "numpy/arrayobject.h"
 
 hid_t create_ieee_complex64(const char byteorder, const char* real_name, const char* img_name);
 hid_t create_ieee_complex128(const char byteorder, const char* real_name, const char* img_name);
@@ -26,5 +27,6 @@ hid_t create_ieee_complex128(const char byteorder, const char* real_name, const
 hsize_t* tuple_to_dims(PyObject* tpl);
 PyObject* dims_to_tuple(hsize_t* dims, hsize_t rank);
 
-int check_array(PyObject* arr);
+int check_numpy_read(PyArrayObject* arr, hid_t space_id);
+int check_numpy_write(PyArrayObject* arr, hid_t space_id);
 
diff --git a/h5py/utils.pxd b/h5py/utils.pxd
index 6922ede..f775f7f 100755
--- a/h5py/utils.pxd
+++ b/h5py/utils.pxd
@@ -11,6 +11,7 @@
 #-
 
 from h5 cimport hid_t, hsize_t
+from numpy cimport ndarray
 
 cdef extern from "utils.h":
 
@@ -18,3 +19,6 @@ cdef extern from "utils.h":
     hid_t create_ieee_complex128(char byteorder, char* real_name, char* img_name)
     hsize_t* tuple_to_dims(object tpl)
     object dims_to_tuple(hsize_t* dims, hsize_t rank)
+
+    int check_numpy_read(ndarray arr, hid_t space_id)
+    int check_numpy_write(ndarray arr, hid_t space_id)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



More information about the debian-science-commits mailing list