[h5py] 243/455: Merge vlen additions back into trunk

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:37 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 036777df758497600cff334e3e77240aa1a3da0b
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Sat May 16 21:26:18 2009 +0000

    Merge vlen additions back into trunk
---
 h5py/__init__.py           |   4 +
 h5py/defs.pxd              |  47 +++++
 h5py/h5.pxd                |  22 +++
 h5py/h5.pyx                |  14 ++
 h5py/h5a.pyx               |   6 +-
 h5py/h5d.pyx               |  31 +--
 h5py/h5t.pxd               |   2 +-
 h5py/h5t.pyx               | 173 +++++++++++++----
 h5py/highlevel.py          |  69 +++++--
 h5py/tests/data/strings.h5 | Bin 0 -> 8192 bytes
 h5py/tests/data/vlstra.h5  | Bin 0 -> 6144 bytes
 h5py/tests/test_h5t.py     |  11 --
 h5py/tests/test_vlen.py    |  99 ++++++++++
 h5py/typeconv.c            | 416 ++++++++++++++++++++++++++++++++++++++++
 h5py/typeconv.h            |  28 +++
 h5py/typeproxy.c           | 464 +++++++++++++++++++++++++++++++++++++++++++++
 h5py/typeproxy.h           |  55 ++++++
 setup.py                   |   4 +-
 18 files changed, 1357 insertions(+), 88 deletions(-)

diff --git a/h5py/__init__.py b/h5py/__init__.py
index 0ea2184..355d392 100644
--- a/h5py/__init__.py
+++ b/h5py/__init__.py
@@ -37,6 +37,10 @@ from h5e import H5Error
 
 import filters, selections
 
+# re-export custom vlen routines
+new_vlen = h5t.py_new_vlen
+get_vlen = h5t.py_get_vlen
+
 __doc__ = __doc__ % (version.version, version.hdf5_version, version.api_version)
 
 __all__ = ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5r',
diff --git a/h5py/defs.pxd b/h5py/defs.pxd
index 10028e5..03d11da 100644
--- a/h5py/defs.pxd
+++ b/h5py/defs.pxd
@@ -24,6 +24,7 @@ include "config.pxi"  # Needed for H5PY_*API defines
 
 # === Standard C library types and functions ==================================
 
+
 cdef extern from "stdlib.h":
   ctypedef long size_t
   void *malloc(size_t size)
@@ -963,6 +964,9 @@ cdef extern from "hdf5.h":
     H5T_DIR_ASCEND,
     H5T_DIR_DESCEND
 
+  # For vlen strings
+  cdef size_t H5T_VARIABLE
+
   # --- Predefined datatypes --------------------------------------------------
 
   cdef enum:
@@ -1126,6 +1130,7 @@ cdef extern from "hdf5.h":
   herr_t    H5Tset_tag(hid_t type_id, char* tag) except *
   char*     H5Tget_tag(hid_t type_id) except? NULL
 
+  # 1.8-specific functions
   IF H5PY_18API:
     hid_t H5Tdecode(unsigned char *buf) except *
     herr_t H5Tencode(hid_t obj_id, unsigned char *buf, size_t *nalloc) except *
@@ -1133,6 +1138,46 @@ cdef extern from "hdf5.h":
     herr_t H5Tcommit2(hid_t loc_id, char *name, hid_t dtype_id, hid_t lcpl_id,
             hid_t tcpl_id, hid_t tapl_id) 
 
+  # Type-conversion infrastructure
+
+  ctypedef enum H5T_pers_t:
+    H5T_PERS_DONTCARE	= -1,
+    H5T_PERS_HARD	= 0,	    # /*hard conversion function		     */
+    H5T_PERS_SOFT	= 1 	    # /*soft conversion function		     */
+
+  ctypedef enum H5T_cmd_t:
+    H5T_CONV_INIT	= 0,	#/*query and/or initialize private data	     */
+    H5T_CONV_CONV	= 1, 	#/*convert data from source to dest datatype */
+    H5T_CONV_FREE	= 2	    #/*function is being removed from path	     */
+
+  ctypedef enum H5T_bkg_t:
+    H5T_BKG_NO		= 0, 	#/*background buffer is not needed, send NULL */
+    H5T_BKG_TEMP	= 1,	#/*bkg buffer used as temp storage only       */
+    H5T_BKG_YES		= 2	    #/*init bkg buf with data before conversion   */
+
+  ctypedef struct H5T_cdata_t:
+    H5T_cmd_t		command     # /*what should the conversion function do?    */
+    H5T_bkg_t		need_bkg   #/*is the background buffer needed?	     */
+    hbool_t		recalc	        # /*recalculate private data		     */
+    void		*priv	        # /*private data				     */
+
+  ctypedef struct hvl_t:
+      size_t len # /* Length of VL data (in base type units) */
+      void *p    #/* Pointer to VL data */
+
+  ctypedef herr_t (*H5T_conv_t)(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
+      size_t nelmts, size_t buf_stride, size_t bkg_stride, void *buf,
+      void *bkg, hid_t dset_xfer_plist)
+
+  H5T_conv_t H5Tfind(hid_t src_id, hid_t dst_id, H5T_cdata_t **pcdata) 
+
+  herr_t    H5Tregister(H5T_pers_t pers, char *name, hid_t src_id,
+                        hid_t dst_id, H5T_conv_t func) except *
+  herr_t    H5Tunregister(H5T_pers_t pers, char *name, hid_t src_id,
+			            hid_t dst_id, H5T_conv_t func) except *
+
+
+
 # === H5Z - Filters ===========================================================
 
 cdef extern from "hdf5.h":
@@ -1241,3 +1286,5 @@ cdef extern from "hdf5.h":
     hsize_t H5Aget_storage_size(hid_t attr_id) except *
 
 
+
+
diff --git a/h5py/h5.pxd b/h5py/h5.pxd
index 89379d7..028a200 100644
--- a/h5py/h5.pxd
+++ b/h5py/h5.pxd
@@ -50,5 +50,27 @@ cdef class SmartStruct:
 # Library init.  Safe to call more than once.
 cdef int init_hdf5() except -1
 
+cdef extern from "typeconv.h":
+
+    hid_t h5py_object_type() except *
+    int h5py_register_conv() except -1
+
+cdef extern from "typeproxy.h":
+    ctypedef enum h5py_rw_t:
+        H5PY_WRITE = 0,
+        H5PY_READ
+
+    herr_t H5PY_dset_rw(hid_t dset, hid_t mtype, hid_t mspace_in, hid_t fspace_in,
+                   hid_t xfer_plist, void* buf, h5py_rw_t dir) except *
+
+    herr_t H5PY_attr_rw(hid_t attr, hid_t mtype, void* buf, h5py_rw_t dir) except *
+
+cdef hid_t get_object_type() except -1
+
+cdef herr_t attr_rw(hid_t attr_id, hid_t mem_type_id, void *buf, h5py_rw_t dir) except *
+
+cdef herr_t dset_rw(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, 
+                  hid_t file_space_id, hid_t xfer_plist_id, void *outbuf,
+                  h5py_rw_t dir) except *
 
 
diff --git a/h5py/h5.pyx b/h5py/h5.pyx
index 693353e..5458358 100644
--- a/h5py/h5.pyx
+++ b/h5py/h5.pyx
@@ -405,6 +405,19 @@ def _exithack():
 
 hdf5_inited = 0
 
+cdef hid_t get_object_type() except -1:
+    return h5py_object_type()
+
+cdef herr_t dset_rw(hid_t dataset_id, hid_t mem_type_id, hid_t mem_space_id, 
+                    hid_t file_space_id, hid_t xfer_plist_id, void *outbuf,
+                    h5py_rw_t dir) except *:
+
+    return H5PY_dset_rw(dataset_id, mem_type_id, mem_space_id, file_space_id,
+                        xfer_plist_id, outbuf, dir)
+
+cdef herr_t attr_rw(hid_t attr_id, hid_t mem_type_id, void *buf, h5py_rw_t dir) except *:
+    return H5PY_attr_rw(attr_id, mem_type_id, buf, dir)
+
 cdef int init_hdf5() except -1:
     # Initialize the library and register Python callbacks for exception
     # handling.  Safe to call more than once.
@@ -419,6 +432,7 @@ cdef int init_hdf5() except -1:
         if register_lzf() < 0:
             raise RuntimeError("Failed to register LZF filter")
         atexit.register(_exithack)
+        h5py_register_conv()
         hdf5_inited = 1
 
     return 0
diff --git a/h5py/h5a.pyx b/h5py/h5a.pyx
index f25e233..b3a2221 100644
--- a/h5py/h5a.pyx
+++ b/h5py/h5a.pyx
@@ -18,7 +18,7 @@ __doc__=\
 include "config.pxi"
 
 # Compile-time imports
-from h5 cimport init_hdf5, SmartStruct
+from h5 cimport init_hdf5, SmartStruct, attr_rw, H5PY_READ, H5PY_WRITE
 from h5t cimport TypeID, typewrap, py_create
 from h5s cimport SpaceID
 from h5p cimport PropID, pdefault
@@ -452,7 +452,7 @@ cdef class AttrID(ObjectID):
 
             mtype = py_create(arr.dtype)
 
-            H5Aread(self.id, mtype.id, PyArray_DATA(arr))
+            attr_rw(self.id, mtype.id, PyArray_DATA(arr), H5PY_READ)
 
         finally:
             if space_id:
@@ -478,7 +478,7 @@ cdef class AttrID(ObjectID):
             check_numpy_read(arr, space_id)
             mtype = py_create(arr.dtype)
 
-            H5Awrite(self.id, mtype.id, PyArray_DATA(arr))
+            attr_rw(self.id, mtype.id, PyArray_DATA(arr), H5PY_WRITE)
 
         finally:
             if space_id:
diff --git a/h5py/h5d.pyx b/h5py/h5d.pyx
index a651cb4..f7d598e 100644
--- a/h5py/h5d.pyx
+++ b/h5py/h5d.pyx
@@ -16,7 +16,7 @@ __doc__ = \
 include "config.pxi"
 
 # Compile-time imports
-from h5 cimport init_hdf5
+from h5 cimport init_hdf5, dset_rw, H5PY_READ, H5PY_WRITE
 from numpy cimport ndarray, import_array, PyArray_DATA, NPY_WRITEABLE
 from utils cimport  check_numpy_read, check_numpy_write, \
                     convert_tuple, emalloc, efree
@@ -79,29 +79,6 @@ def open(ObjectID loc not None, char* name):
 
 # --- Proxy functions for safe(r) threading -----------------------------------
 
-# It's not legal to call PyErr_Occurred() with nogil, so we can't use
-# the standard except * syntax.  Trap negative return numbers and convert them
-# to something Cython can recognize.
-
-cdef int H5PY_H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
-                  hid_t file_space_id, hid_t plist_id, void *buf) nogil except -1:
-
-    cdef herr_t retval
-    retval = H5Dread(dset_id, mem_type_id,mem_space_id, file_space_id,
-                        plist_id, buf)
-    if retval < 0:
-        return -1
-    return retval
-
-cdef int H5PY_H5Dwrite(hid_t dset_id, hid_t mem_type, hid_t mem_space, hid_t 
-                        file_space, hid_t xfer_plist, void* buf) nogil except -1:
-    cdef herr_t retval
-    retval = H5Dwrite(dset_id, mem_type, mem_space, file_space,
-                        xfer_plist, buf)
-    if retval < 0:
-        return -1
-    return retval
-
 
 cdef class DatasetID(ObjectID):
 
@@ -200,8 +177,7 @@ cdef class DatasetID(ObjectID):
 
         arr_obj.flags &= (~NPY_WRITEABLE) # Wish-it-was-a-mutex approach
         try:
-            with nogil:
-                H5PY_H5Dread(self_id, mtype_id, mspace_id, fspace_id, plist_id, data)
+            dset_rw(self_id, mtype_id, mspace_id, fspace_id, plist_id, data, H5PY_READ)
         finally:
             arr_obj.flags |= NPY_WRITEABLE
 
@@ -240,8 +216,7 @@ cdef class DatasetID(ObjectID):
 
         arr_obj.flags &= (~NPY_WRITEABLE) # Wish-it-was-a-mutex approach
         try:
-            with nogil:
-                H5PY_H5Dwrite(self_id, mtype_id, mspace_id, fspace_id, plist_id, data)
+            dset_rw(self_id, mtype_id, mspace_id, fspace_id, plist_id, data, H5PY_WRITE)
         finally:
             arr_obj.flags |= NPY_WRITEABLE
 
diff --git a/h5py/h5t.pxd b/h5py/h5t.pxd
index 7d9fb7f..b5e384c 100644
--- a/h5py/h5t.pxd
+++ b/h5py/h5t.pxd
@@ -69,7 +69,7 @@ cdef class TypeCompoundID(TypeCompositeID):
 # === C API for other modules =================================================
 
 cdef TypeID typewrap(hid_t id_)
-cpdef TypeID py_create(object dtype, dict enum_vals=*)
+cpdef TypeID py_create(object dtype, bint logical=*)
 
 
 
diff --git a/h5py/h5t.pyx b/h5py/h5t.pyx
index cf0588d..203dec2 100644
--- a/h5py/h5t.pyx
+++ b/h5py/h5t.pyx
@@ -60,7 +60,7 @@ __doc__ = \
 include "config.pxi"
 
 # Pyrex compile-time imports
-from h5 cimport init_hdf5, H5PYConfig, get_config, PHIL, get_phil
+from h5 cimport init_hdf5, H5PYConfig, get_config, PHIL, get_phil, get_object_type
 from h5p cimport PropID, pdefault
 from numpy cimport dtype, ndarray
 from python_string cimport PyString_FromStringAndSize
@@ -229,6 +229,10 @@ STD_REF_DSETREG = lockid(H5T_STD_REF_DSETREG)
 # Null terminated (C) and Fortran string types
 C_S1 = lockid(H5T_C_S1)
 FORTRAN_S1 = lockid(H5T_FORTRAN_S1)
+VARIABLE = H5T_VARIABLE
+
+# Custom Python object pointer type
+PYTHON_OBJECT = lockid(get_object_type())
 
 # Translation tables for HDF5 -> NumPy dtype conversion
 cdef dict _order_map = { H5T_ORDER_NONE: '|', H5T_ORDER_LE: '<', H5T_ORDER_BE: '>'}
@@ -593,8 +597,6 @@ cdef class TypeStringID(TypeID):
         """() => BOOL is_variable
 
         Determine if the given string datatype is a variable-length string.
-        Please note that reading/writing data in this format is impossible;
-        only fixed-length strings are currently supported.
         """
         return <bint>(H5Tis_variable_str(self.id))
 
@@ -651,7 +653,7 @@ cdef class TypeStringID(TypeID):
     cdef object py_dtype(self):
         # Numpy translation function for string types
         if self.is_variable_str():
-            raise TypeError("Variable-length strings are not supported.")
+            return py_new_vlen(str)
 
         return dtype("|S" + str(self.get_size()))
 
@@ -1156,19 +1158,23 @@ cdef class TypeEnumID(TypeCompositeID):
     cdef object py_dtype(self):
         # Translation function for enum types
 
-        cdef TypeID tmp_type
-        tmp_type = self.get_super()
+        cdef TypeID basetype = self.get_super()
+
+        nmembers = self.get_nmembers()
+        members = {}
+
+        for idx in xrange(nmembers):
+            name = self.get_member_name(idx)
+            val = self.get_member_value(idx) 
+            members[name] = val
 
-        if self.get_nmembers() == 2:
-            members = {}
-            ref = {cfg._f_name: 0, cfg._t_name: 1}
-            for idx in range(2):
-                name = self.get_member_name(idx)
-                val = self.get_member_value(idx)
-                members[name] = val
-            if members == ref:
-                return dtype('bool')
-        return tmp_type.py_dtype()
+        ref = {cfg._f_name: 0, cfg._t_name: 1}
+
+        # Boolean types have priority over standard enums
+        if members == ref:
+            return dtype('bool')
+    
+        return py_new_enum(basetype.py_dtype(), members)
 
 
 # === Translation from NumPy dtypes to HDF5 type objects ======================
@@ -1249,14 +1255,14 @@ cdef TypeEnumID _c_bool(dtype dt):
 
     return out
 
-cdef TypeArrayID _c_array(dtype dt):
+cdef TypeArrayID _c_array(dtype dt, int logical):
     # Arrays
     cdef dtype base
     cdef TypeID type_base
     cdef tuple shape
 
     base, shape = dt.subdtype
-    type_base = py_create(base)
+    type_base = py_create(base, logical=logical)
     return array_create(type_base, shape)
 
 cdef TypeOpaqueID _c_opaque(dtype dt):
@@ -1311,7 +1317,7 @@ cdef TypeCompoundID _c_complex(dtype dt):
 
     return TypeCompoundID(tid)
 
-cdef TypeCompoundID _c_compound(dtype dt):
+cdef TypeCompoundID _c_compound(dtype dt, int logical):
     # Compound datatypes
 
     cdef hid_t tid
@@ -1326,13 +1332,24 @@ cdef TypeCompoundID _c_compound(dtype dt):
 
     for name in names:
         dt_tmp, offset = dt.fields[name]
-        type_tmp = py_create(dt_tmp)
+        type_tmp = py_create(dt_tmp, logical=logical)
         H5Tinsert(tid, name, offset, type_tmp.id)
 
     return TypeCompoundID(tid)
 
+cdef TypeOpaqueID _c_object(dtype dt):
+    # Object types are represented by a custom opaque type
+    # Currently no other logic is required
+    return PYTHON_OBJECT
+
+cdef TypeStringID _c_vlen_str(object basetype):
+    # Variable-length strings
+    cdef hid_t tid
+    tid = H5Tcopy(H5T_C_S1)
+    H5Tset_size(tid, H5T_VARIABLE)
+    return TypeStringID(tid)
 
-cpdef TypeID py_create(object dtype_in, dict enum_vals=None):
+cpdef TypeID py_create(object dtype_in, bint logical=0):
     """(OBJECT dtype_in, DICT enum_vals=None) => TypeID
 
     Given a Numpy dtype object, generate a byte-for-byte memory-compatible
@@ -1342,11 +1359,12 @@ cpdef TypeID py_create(object dtype_in, dict enum_vals=None):
     Argument dtype_in may be a dtype object, or anything which can be
     converted to a dtype, including strings like '<i4'.
 
-    enum_vals
-        A optional dictionary mapping names to integer values.  If the
-        type being converted is an integer (Numpy kind i/u), the resulting 
-        HDF5 type will be an enumeration with that base type, and the 
-        given values. Ignored for all other types.
+    logical
+        If this flag is set, instead of returning a byte-for-byte identical
+        representation of the type, the function returns the closest logically
+        appropriate HDF5 type.  For example, in the case of a "hinted" dtype
+        of kind "O" representing a string, it would return an HDF5 variable-
+        length string type.
     """
     cdef dtype dt = dtype(dtype_in)
     cdef char kind = dt.kind
@@ -1360,10 +1378,13 @@ cpdef TypeID py_create(object dtype_in, dict enum_vals=None):
         # Integer
         elif kind == c'u' or kind == c'i':
 
-            if enum_vals is not None:
-                return _c_enum(dt, enum_vals)
-            else:
-                return _c_int(dt)
+            if logical:
+                # Check for an enumeration hint
+                enum_vals = py_get_enum(dt)
+                if enum_vals is not None:
+                    return _c_enum(dt, enum_vals)
+
+            return _c_int(dt)
 
         # Complex
         elif kind == c'c':
@@ -1371,12 +1392,12 @@ cpdef TypeID py_create(object dtype_in, dict enum_vals=None):
 
         # Compound
         elif kind == c'V' and dt.names is not None:
-            return _c_compound(dt)
+            return _c_compound(dt, logical)
 
         # Array or opaque
         elif kind == c'V':
             if dt.subdtype is not None:
-                return _c_array(dt)
+                return _c_array(dt, logical)
             else:
                 return _c_opaque(dt)
 
@@ -1388,10 +1409,98 @@ cpdef TypeID py_create(object dtype_in, dict enum_vals=None):
         elif kind == c'b':
             return _c_bool(dt)
 
+        # Object types (including those with vlen hints)
+        elif kind == c'O':
+
+            if logical:
+                # Check for vlen hints
+                vlen = py_get_vlen(dt)
+                if vlen is not None:
+                    return _c_vlen_str(vlen)
+                raise TypeError("Object dtype has no native HDF5 equivalent")
+
+            return _c_object(dt)
+
         # Unrecognized
         else:
             raise TypeError("No conversion path for dtype: %s" % repr(dt))
+
     finally:
         phil.release()
 
+cpdef dtype py_new_enum(dtype dt_in, dict enum_vals):
+    """ (DTYPE dt_in, DICT enum_vals) => DTYPE
+
+    Create a new NumPy integer dtype, which contains "hint" metadata for
+    an enum. Only dtypes of kind 'i' or 'u' are allowed.  The enum_vals
+    dict must consist only of string keys and integer values.
+    """
+
+    cdef dtype dt = dtype(dt_in)
+    if dt.kind != 'i' and dt.kind != 'u':
+        raise TypeError("Only integer types can be used as enums")
+
+    return dtype((dt, [( ({'vals': enum_vals},'enum'), dt )] ))
+    
+cpdef dict py_get_enum(object dt):
+    """ (DTYPE dt_in) => DICT
+
+    Determine the enum values associated with a "hinted" NumPy integer type.
+    Returns None if the type does not contain hints, or is of the wrong kind.
+    """
+    
+    if dt.kind != 'i' and dt.kind != 'u':
+        return None
+
+    if dt.fields is not None and 'enum' in dt.fields:
+        tpl = dt.fields['enum']
+        if len(tpl) == 3:
+            info_dict = tpl[2]
+            if 'vals' in info_dict:
+                return info_dict['vals']
+
+    return None
+
+cpdef dtype py_new_vlen(object kind):
+    """ (OBJECT kind) => DTYPE
+
+    Create a new NumPy object dtype, which contains "hint" metadata
+    identifying the proper HDF5 vlen base type.  For now, only the native
+    Python string object (str) is supported.
+    """
+    if kind is not str:
+        raise NotImplementedError("Only string vlens are currently supported")
+
+    return dtype(('O', [( ({'type': kind},'vlen'), 'O' )] ))
+
+cpdef object py_get_vlen(object dt_in):
+    """ (OBJECT dt_in) => TYPE
+
+    Determine the vlen "hint" type associated with a NumPy object type,
+    or None if the dtype does not contain a hint or is not of kind "O".
+    """
+    cdef dtype dt = dtype(dt_in)
+
+    if dt.kind != 'O':
+        return None
+
+    if dt.fields is not None and 'vlen' in dt.fields:
+        tpl = dt.fields['vlen']
+        if len(tpl) == 3:
+            hint_dict = tpl[2]
+            if 'type' in hint_dict:
+                return hint_dict['type']
+
+    return None
+
+        
+def path_exists(TypeID src not None, TypeID dst not None):
+
+    cdef H5T_cdata_t *data
+    cdef H5T_conv_t result = NULL
+    
+    result = H5Tfind(src.id, dst.id, &data)
+    return result != NULL
+
+
 
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index cd0b92a..1f4a7ae 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -775,7 +775,7 @@ class Dataset(HLObject):
                     maxshape = tuple(x if x is not None else h5s.UNLIMITED for x in maxshape)
 
                 space_id = h5s.create_simple(shape, maxshape)
-                type_id = h5t.py_create(dtype)
+                type_id = h5t.py_create(dtype, logical=True)
 
                 self.id = h5d.create(group.id, name, type_id, space_id, plist)
                 if data is not None:
@@ -1049,30 +1049,75 @@ class AttributeManager(LockableObject, _DictCompat):
             return arr
 
     def __setitem__(self, name, value):
-        """ Set the value of an attribute, overwriting any previous value.
+        """ Set a new attribute, or change the value of an existing one.
 
-        The value you provide must be convertible to a Numpy array or scalar.
+        The value you provide must be compatible with the type of any existing
+        attribute.  If no attribute with the given name exists, one will be
+        automatically created based on the type and shape of the given data.
 
-        Any existing value is destroyed just before the call to h5a.create.
-        If the creation fails, the data is not recoverable.
+        To unconditionally overwrite an existing attribute, use the method
+        "create".
         """
         with self._lock:
             value = numpy.asarray(value, order='C')
 
-            space = h5s.create_simple(value.shape)
-            htype = h5t.py_create(value.dtype)
-
-            # TODO: some kind of transactions safeguard
             if name in self:
-                h5a.delete(self.id, name)
+                attr = h5a.open(self.id, name)
+
+                # Allow the case of () <-> (1,)
+                if (value.shape != attr.shape) and not \
+                   (numpy.product(value.shape)==1 and numpy.product(attr.shape)==1):
+                    raise TypeError("Shape of data is incompatible with existing attribute")
+                attr.write(value)
+            
+            else:
+                space = h5s.create_simple(value.shape)
+                htype = h5t.py_create(value.dtype, logical=True)
 
-            attr = h5a.create(self.id, name, htype, space)
-            attr.write(value)
+                attr = h5a.create(self.id, name, htype, space)
+                attr.write(value)
 
     def __delitem__(self, name):
         """ Delete an attribute (which must already exist). """
         h5a.delete(self.id, name)
 
+    def create(self, name, data=None, shape=None, dtype=None):
+        """ Create a new attribute, overwriting any existing attribute.
+
+        name:   Name of the new attribute (required)
+        data:   An array to initialize the attribute.
+                Required unless "shape" is given.
+        shape:  Shape of the attribute.  Overrides data.shape if both are
+                given.  The total number of points must be unchanged.
+        dtype:  Data type of the attribute.  Overrides data.dtype if both
+                are given.  Must be conversion-compatible with data.dtype.
+        """
+       
+        if data is not None:
+            data = numpy.asarray(data, order='C', dtype=dtype)
+            if shape is None:
+                shape = data.shape
+            elif numpy.product(shape) != numpy.product(data.shape):
+                raise ValueError("Shape of new attribute conflicts with shape of data")
+                
+            if dtype is None:
+                dtype = data.dtype
+
+        if dtype is None:
+            dtype = numpy.dtype('f')
+        if shape is None:
+            raise ValueError('At least one of "shape" or "data" must be given')
+
+        space = h5s.create_simple(shape)
+        htype = h5t.py_create(dtype, logical=True)
+
+        if name in self:
+            h5a.delete(self.id, name)
+
+        attr = h5a.create(self.id, name, htype, space)
+        if data is not None:
+            attr.write(data)
+
     def __len__(self):
         """ Number of attributes attached to the object. """
         # I expect we will not have more than 2**32 attributes
diff --git a/h5py/tests/data/strings.h5 b/h5py/tests/data/strings.h5
new file mode 100644
index 0000000..5cba643
Binary files /dev/null and b/h5py/tests/data/strings.h5 differ
diff --git a/h5py/tests/data/vlstra.h5 b/h5py/tests/data/vlstra.h5
new file mode 100644
index 0000000..ef58462
Binary files /dev/null and b/h5py/tests/data/vlstra.h5 differ
diff --git a/h5py/tests/test_h5t.py b/h5py/tests/test_h5t.py
index dfd32dd..c96537c 100644
--- a/h5py/tests/test_h5t.py
+++ b/h5py/tests/test_h5t.py
@@ -238,17 +238,6 @@ class TestH5T(HDF5TestCase):
             self.assertEqual(type(htype), kind_map[dt.kind])
             self.assertEqual(dt, htype.dtype)
 
-    def test_py_create_enum(self):
-        enum = {'A': 0, 'AA': 1, 'foo': 34, 'bizarre': 127}
-        enum_bases = [ x for x in simple_types if 'i' in x or 'u' in x]
-        for x in enum_bases:
-            dt = dtype(x)
-            htype = h5t.py_create(dt, enum_vals=enum)
-            self.assertEqual(type(htype), h5t.TypeEnumID)
-            self.assertEqual(dt, htype.dtype)
-            for name, val in enum.iteritems():
-                self.assertEqual(name, htype.enum_nameof(val))
-
     def test_py_create_array(self):
         shapes = [ (1,1), (1,), (4,5), (99,10,22) ]
         array_types = []
diff --git a/h5py/tests/test_vlen.py b/h5py/tests/test_vlen.py
new file mode 100644
index 0000000..584a45f
--- /dev/null
+++ b/h5py/tests/test_vlen.py
@@ -0,0 +1,99 @@
+
+import numpy as np
+import h5py
+import unittest
+import os.path as op
+import os
+from common import skip
+
+class TestVlen(unittest.TestCase):
+
+    def test_create(self):
+        dt = h5py.new_vlen(str)
+        self.assertEqual(str, h5py.get_vlen(dt))
+        self.assertEqual(dt.kind, "O")
+
+    def test_read_attr(self):
+        
+        f = h5py.File(op.join(op.dirname(h5py.__file__), 'tests/data/vlstra.h5'), 'r')
+
+        self.assertEqual(f.attrs['test_scalar'], "This is the string for the attribute")
+
+        aid = h5py.h5a.open(f.id, 'test_scalar')
+        self.assertEqual(aid.dtype, h5py.new_vlen(str))
+
+    def test_write_attr(self):
+
+        f = h5py.File('tmp.hdf5','w')
+        value = "This is the string!"
+        
+        dt = h5py.new_vlen(str)
+        f.attrs.create('test_string', value, dtype=dt)
+        self.assertEqual(f.attrs['test_string'], value)
+    
+        aid = h5py.h5a.open(f.id, 'test_string')
+        self.assertEqual(dt, aid.dtype)
+
+    def test_read_strings(self):
+
+        f = h5py.File(op.join(op.dirname(h5py.__file__), 'tests/data/strings.h5'), 'r')
+
+        refarr = np.array(["A fight is a contract that takes two people to honor.",
+                           "A combative stance means that you've accepted the contract.",
+                           "In which case, you deserve what you get.",
+                           "  --  Professor Cheng Man-ch'ing"], dtype="O")
+
+        print "\nReading vlen strings:\n"+"-"*60
+        dset = f["StringsEx"]
+
+        for idx, x in enumerate(dset):
+            print '%d  "%s"' % (idx, x)
+        print "-"*60
+
+        self.assert_(np.all(refarr == dset[...]))
+        self.assert_(np.all(refarr[2] == dset[2]))
+        self.assert_(np.all(refarr[1:3] == dset[1:3]))
+
+        self.assertEqual(dset.dtype, h5py.new_vlen(str))
+    
+    def test_write_strings(self):
+
+        f = h5py.File('tmp.hdf5', 'w')
+        dt = h5py.new_vlen(str)
+
+        data_arr = np.array(["Hello there!", "string 2", "", "!!!!!"], dtype=dt)
+
+        slices = [np.s_[0], np.s_[1:3], np.s_[...]]
+
+        try:
+            dset = f.create_dataset("vlen_ds", (4,), dt)
+            for s in slices:
+                print "slc %s data %s" % (s, data_arr[s])
+                dset[s] = data_arr[s]
+                self.assert_(np.all(dset[s] == data_arr[s]))
+        finally:
+            f.close()
+            #os.unlink('tmp.hdf5')
+
+    def test_compound(self):
+
+        vlen_dt = h5py.new_vlen(str)
+        dts = [ [('a_name','>i4'), ('vlen',vlen_dt), ('d_name', '>f4')],
+                [('a_name','=i8'), ('vlen',vlen_dt), ('d_name', '>f4')] ]
+
+        f = h5py.File('tmp.hdf5', 'w')
+        try:
+            for dt in dts:
+                if 'vlen_ds' in f:                 del f['vlen_ds']
+                data = np.ndarray((1,),dtype=dt)
+                data['a_name'] = 42
+                data['vlen'] = 'This is a variable-length string'
+                data['d_name'] = 34.5
+                dset = f.create_dataset("vlen_ds", data=data)
+                self.assert_(np.all(dset[...] == data))
+
+        finally:
+            f.close()
+
+
+
diff --git a/h5py/typeconv.c b/h5py/typeconv.c
new file mode 100644
index 0000000..14f1bba
--- /dev/null
+++ b/h5py/typeconv.c
@@ -0,0 +1,416 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include "Python.h"
+#include "hdf5.h"
+#include "typeconv.h"
+
+hid_t _H5PY_OBJ = 0;
+
+/*  Return the (locked) canonical type corresponding to a Python object
+    pointer in memory.  This is an ephemeral type; it should never be stored
+    in a file. */
+hid_t h5py_object_type(void){
+    if(_H5PY_OBJ == 0){
+        _H5PY_OBJ = H5Tcreate(H5T_OPAQUE, sizeof(PyObject*));
+        H5Tset_tag(_H5PY_OBJ, "PYTHON:OBJECT");
+        H5Tlock(_H5PY_OBJ);
+    }
+    return _H5PY_OBJ;
+}
+
+
+/* === Type-conversion callbacks & support === */
+
+/* Check types for Python string/vlen conversion */
+htri_t h5py_match_str_ptr(hid_t str, hid_t pyptr){
+
+    htri_t is_var_str = 0;
+    htri_t is_pyptr = 0;
+    char* tagval;
+
+    is_var_str = H5Tis_variable_str(str);
+    if(is_var_str<0) goto failed;
+
+    tagval = H5Tget_tag(pyptr);
+    if(tagval != NULL){
+        is_pyptr = !strcmp(tagval, "PYTHON:OBJECT");
+    }
+    free(tagval);
+
+    return is_var_str && is_pyptr;
+
+    failed:     /* Error target */
+
+    return -1;
+}
+
+typedef struct {
+    size_t src_size;
+    size_t dst_size;
+} conv_size_t;
+
+/*  Convert from HDF5 variable-length strings to Python string objects.
+*/
+herr_t vlen_to_str(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
+                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf,
+                    void *bkg, hid_t dxpl){
+
+    PyGILState_STATE gil;
+
+    char** str;
+    PyObject** obj;
+    PyObject** obj_bkg;
+    PyObject* obj_tmp;
+
+    conv_size_t *sizes = NULL;
+
+    herr_t retval = -1;
+    int i;
+
+    switch(cdata->command){
+
+    /*  Determine if we can convert between src_id and dst_id; return 0 if
+        possible, -1 otherwise */
+    case H5T_CONV_INIT:  
+
+        /*  Only accept the case of vlen H5T_STRING to Python string */
+        if(h5py_match_str_ptr(src_id, dst_id) <= 0) goto init_failed;
+
+        cdata->need_bkg = H5T_BKG_YES;
+        cdata->priv = sizes = (conv_size_t*)malloc(sizeof(conv_size_t));
+        if(sizes==NULL) goto init_failed;
+
+        sizes->src_size = H5Tget_size(src_id);
+        if(sizes->src_size == 0) goto init_failed;
+        sizes->dst_size = H5Tget_size(dst_id);
+        if(sizes->dst_size == 0) goto init_failed;
+
+        return 0;
+
+        init_failed:    /* Error target */
+
+        free(sizes);
+        return -1;
+
+    case H5T_CONV_CONV:
+
+        gil = PyGILState_Ensure();
+
+        sizes = (conv_size_t*)(cdata->priv);
+
+        if(buf_stride==0) buf_stride = sizes->src_size;
+        if(bkg_stride==0) bkg_stride = sizes->dst_size;
+
+        for(i=0;i<nl;i++){
+
+            obj = (PyObject**)(buf+(i*buf_stride));
+            str = (char**)(buf+(i*buf_stride));
+            obj_bkg = (PyObject**)(bkg+(i*bkg_stride));
+
+            if((*str)==NULL){
+                obj_tmp = PyString_FromString("");
+            } else {
+                obj_tmp = PyString_FromString(*str);
+            }
+            if(obj_tmp==NULL) goto conv_failed;
+
+            /* Since all data conversions are by convention in-place, it
+               is our responsibility to free the memory used by the vlens. */
+            free(*str);
+
+            Py_XDECREF(*obj_bkg);
+            *obj = obj_tmp;
+        }
+
+        PyGILState_Release(gil);
+        return 0;
+
+        conv_failed:    /* Error target */
+        
+        PyGILState_Release(gil);
+        return -1;
+        
+    case H5T_CONV_FREE:
+
+        free(cdata->priv);
+        return 0;
+
+    default:
+
+        return -1;
+    }
+}
+
+
+/*  Convert from Python strings to HDF5 vlens.
+*/
+herr_t str_to_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
+                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf,
+                    void *bkg, hid_t dset_xfer_plist){
+
+    PyGILState_STATE gil;
+
+    PyObject**   obj;
+    char**       str;
+    char*        str_tmp;
+    Py_ssize_t   len;
+
+    conv_size_t *sizes = NULL;
+
+    herr_t retval = -1;
+    int i;
+
+    switch(cdata->command){
+
+    case H5T_CONV_INIT:
+
+        /*  Only accept Python string -> HDF5 vlen */
+        if(h5py_match_str_ptr(dst_id, src_id) <= 0) goto init_failed;
+
+        cdata->need_bkg = H5T_BKG_NO;
+        cdata->priv = sizes = (conv_size_t*)malloc(sizeof(conv_size_t));
+
+        sizes->src_size = H5Tget_size(src_id);
+        if(sizes->src_size==0) goto init_failed;
+        sizes->dst_size = H5Tget_size(dst_id);
+        if(sizes->dst_size==0) goto init_failed;
+        
+        return 0;
+
+        init_failed:    /* Error target */
+
+        free(sizes);
+        return -1;
+
+    case H5T_CONV_CONV:
+
+
+        gil = PyGILState_Ensure();
+        sizes = (conv_size_t*)(cdata->priv);
+
+        if(buf_stride==0) buf_stride = sizes->src_size;
+
+        for(i=0;i<nl;i++){
+
+            obj = (PyObject**)(buf+(i*buf_stride));
+            str = (char**)(buf+(i*buf_stride));
+
+            if(*obj == NULL || *obj == Py_None){
+                len = 1;
+                str_tmp = "";
+            } else if(PyString_CheckExact(*obj)) {
+                len = PyString_Size(*obj)+1;
+                str_tmp = PyString_AsString(*obj);
+            } else {
+                goto conv_failed;
+            }
+
+            *str = (char*)malloc(len);  /* len already includes null term */
+            memcpy(*str, str_tmp, len);
+
+        }            
+
+        PyGILState_Release(gil);
+        return 0;
+
+        conv_failed:    /* Error target */
+
+        PyGILState_Release(gil);
+        return -1;
+        
+    case H5T_CONV_FREE:
+
+        free(cdata->priv);
+        return 0;
+
+    default:
+
+        return -1;
+    }
+
+}
+
+/* Convert back & forth between enums and ints */
+
+typedef struct {
+    H5T_class_t src_cls;
+    size_t src_size;
+    size_t dst_size;
+    hid_t int_src_id;   /* Integer type appropriate for source */
+    hid_t int_dst_id;   /* Integer type appropriate for destination */
+    int identical;      /* Tells if the above types are the same */
+} h5py_enum_conv_t;
+
+/* This function is registered on both paths ENUM -> INT and INT -> ENUM */
+herr_t enum_int(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata,
+                    size_t nl, size_t buf_stride, size_t bkg_stride, void *buf,
+                    void *bkg, hid_t dset_xfer_plist){
+
+    h5py_enum_conv_t* info = NULL;
+    hid_t conv_src_id, conv_dst_id;
+
+    void* conv_buf = NULL;
+    size_t nalloc;
+
+    herr_t cresult;
+    int i;
+
+    switch(cdata->command){
+
+        case H5T_CONV_INIT:
+            
+            cdata->need_bkg = H5T_BKG_NO;
+            cdata->priv = info = (h5py_enum_conv_t*)malloc(sizeof(h5py_enum_conv_t));
+            if(info==NULL) goto init_failed;
+
+            info->int_src_id = 0;
+            info->int_dst_id = 0;
+
+            info->src_cls = H5Tget_class(src_id);
+            if(info->src_cls<0) goto init_failed;
+
+            info->src_size = H5Tget_size(src_id);
+            if(info->src_size<0) goto init_failed;
+            info->dst_size = H5Tget_size(dst_id);
+            if(info->dst_size<0) goto init_failed;
+
+            if(info->src_cls == H5T_ENUM){
+                /* We're trying to convert an ENUM to an INT */
+                info->int_src_id = H5Tget_super(src_id);
+                info->int_dst_id = dst_id;
+                H5Iinc_ref(dst_id);
+            } else {
+                /* We're trying to convert an INT to an ENUM */
+                info->int_src_id = src_id;
+                info->int_dst_id = H5Tget_super(dst_id);
+                H5Iinc_ref(src_id);
+            }
+            if(info->int_src_id<0) goto init_failed;
+            if(info->int_dst_id<0) goto init_failed;
+
+            cresult = H5Tequal(info->int_src_id, info->int_dst_id);
+            if(cresult<0) goto init_failed;
+
+            info->identical = cresult;
+
+            return 0;
+
+            init_failed:
+
+            if(info!=NULL){
+                if(info->int_src_id>0)  H5Idec_ref(info->int_src_id);
+                if(info->int_dst_id>0)  H5Idec_ref(info->int_dst_id);
+            }
+            free(info);
+
+            return -1;
+
+        case H5T_CONV_CONV:
+
+            info = (h5py_enum_conv_t*)(cdata->priv);
+
+            /* Shortcut */
+            if(info->identical) return 0;
+
+            if(buf_stride==0){
+                /*  Contiguous data: H5Tconvert can do this directly */
+
+                cresult = H5Tconvert(info->int_src_id,
+                                     info->int_dst_id,
+                                     nl, buf, NULL, dset_xfer_plist);
+                if(cresult<0) goto conv_failed;
+
+            } else {
+                /*  Can't tell H5Tconvert about strides; use a buffer */
+
+                if( (info->src_size) > (info->dst_size)){
+                    nalloc = (info->src_size)*nl;
+                } else {
+                    nalloc = (info->dst_size)*nl;
+                }
+                conv_buf = malloc(nalloc);
+                if(conv_buf==NULL) goto conv_failed;
+
+                /* Copy into temp buffer */
+                for(i=0;i<nl;i++){
+                    memcpy(conv_buf+(i*(info->src_size)), buf+(i*buf_stride), 
+                           info->src_size);
+                }
+    
+                /* Convert in-place */
+                cresult = H5Tconvert(info->int_src_id,
+                                     info->int_dst_id,
+                                     nl, conv_buf, NULL, dset_xfer_plist);
+                if(cresult<0) goto conv_failed;
+
+                /*  Copy back out to source buffer.  Remember these elements
+                    are now of size info->dst_size. */
+                for(i=0;i<nl;i++){
+                    memcpy(buf+(i*buf_stride), conv_buf+(i*(info->dst_size)),
+                           info->dst_size);
+                }
+
+            } /* if ... else */
+
+            free(conv_buf);
+            return 0;
+
+            conv_failed:
+
+            free(conv_buf);
+            return -1;
+
+        case H5T_CONV_FREE:
+
+            /* Segfault on cleanup; something's wrong with cdata->priv */
+            return 0;
+
+        default:
+
+            return -1;
+
+    } /* case */
+
+}        
+
+int h5py_register_conv(void){
+
+
+    hid_t h5py_obj = h5py_object_type();
+    hid_t vlen_str = H5Tcopy(H5T_C_S1);
+    H5Tset_size(vlen_str, H5T_VARIABLE);
+
+    hid_t h5py_enum = H5Tenum_create(H5T_NATIVE_INT);
+
+    /*  "Soft" registration means the conversion is tested for any two types
+        which match the given classes (in this case H5T_STRING and H55_OPAQUE) */
+    H5Tregister(H5T_PERS_SOFT, "vlen_to_str", vlen_str, h5py_obj, vlen_to_str);
+    H5Tregister(H5T_PERS_SOFT, "str_to_vlen", h5py_obj, vlen_str, str_to_vlen);
+
+    H5Tregister(H5T_PERS_SOFT, "enum to int", h5py_enum, H5T_NATIVE_INT, enum_int);
+    H5Tregister(H5T_PERS_SOFT, "int to enum", H5T_NATIVE_INT, h5py_enum, enum_int);
+
+    H5Tclose(vlen_str);
+
+    return 0;
+}
+
+
+
+
+
+
+
+
diff --git a/h5py/typeconv.h b/h5py/typeconv.h
new file mode 100644
index 0000000..90b78c0
--- /dev/null
+++ b/h5py/typeconv.h
@@ -0,0 +1,28 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+/*
+    Implements low-level infrastructure for vlen and enum types
+*/
+
+#include "hdf5.h"
+
+#ifndef H5PY_TYPECONV_H
+#define H5PY_TYPECONV_H
+
+/* Register all new conversion functions */
+int h5py_register_conv(void);
+
+/* Return the canonical Python object pointer type */
+hid_t h5py_object_type(void);
+
+#endif
diff --git a/h5py/typeproxy.c b/h5py/typeproxy.c
new file mode 100644
index 0000000..145e374
--- /dev/null
+++ b/h5py/typeproxy.c
@@ -0,0 +1,464 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+/*
+    Contains proxy functions for reading and writing data from datasets and
+    attributes.  Importantly, these functions implement the proper workarounds
+    required for variable-length type support, as implemented in typeconv.c.
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "hdf5.h"
+#include "typeproxy.h"
+
+/* ------- Private function prototypes ------ */
+
+herr_t h5py_resolve_spaces(hid_t dset_id, hid_t ifspace, hid_t imspace,
+                           hid_t *ofspace, hid_t *omspace);
+
+void* h5py_setup_buffer(hid_t itype, hid_t otype, hid_t space_id, hsize_t* nl);
+
+htri_t h5py_detect_vlen(hid_t type_id);
+
+
+/* ------- Attribute read/write support ------- */
+
+herr_t H5PY_attr_rw(hid_t attr, hid_t mtype, void *buf, h5py_rw_t dir){
+
+    hid_t   atype = 0;      /* Attribute data type */
+    hid_t   aspace = 0;     /* Attribute data space */
+
+    htri_t  vlen_present;
+    herr_t  status;         /* API function result */
+    herr_t  retval;         /* Return value for this function */
+
+    hsize_t mtype_size;
+    hsize_t  nl;
+    void*   bkg_buf = NULL;
+    void*   conv_buf = NULL;
+
+    atype = H5Aget_type(attr);
+    if(atype<0) goto failed;
+
+    vlen_present = h5py_detect_vlen(atype);
+    if(vlen_present<0) goto failed;
+
+    if(!vlen_present){
+        /* Direct read/write */
+        
+        switch(dir){
+        case H5PY_READ:
+            status = H5Aread(attr, mtype, buf);
+            break;
+        case H5PY_WRITE:
+            status = H5Awrite(attr, mtype, buf);
+            break;
+        default:
+            goto failed;
+        }
+        if(status<0) goto failed;
+    
+    } else {
+        /* Buffered read/write */
+
+        aspace = H5Aget_space(attr);
+        if(aspace<0) goto failed;
+
+        conv_buf = h5py_setup_buffer(atype, mtype, aspace, &nl);
+        if(conv_buf==NULL) goto failed;
+
+        mtype_size = H5Tget_size(mtype);
+        if(mtype_size==0) goto failed;
+
+        bkg_buf = malloc(mtype_size*nl);
+        if(bkg_buf==NULL) goto failed;
+
+        memcpy(bkg_buf, buf, mtype_size*nl);
+
+        switch(dir){
+
+        case H5PY_READ:
+            status = H5Aread(attr, atype, conv_buf);
+            if(status<0) goto failed;
+            status = H5Tconvert(atype, mtype, nl, conv_buf, bkg_buf, H5P_DEFAULT);
+            if(status<0) goto failed;
+            memcpy(buf, conv_buf, mtype_size*nl);
+            break;
+
+        case H5PY_WRITE:
+            memcpy(conv_buf, buf, mtype_size*nl);
+            status = H5Tconvert(mtype, atype, nl, conv_buf, bkg_buf, H5P_DEFAULT);
+            if(status<0) goto failed;
+            status = H5Awrite(attr, atype, conv_buf);
+            if(status<0) goto failed;
+            break;
+
+        default:
+            goto failed;
+        }
+    }
+
+    retval = 0;
+
+    out:        /* Cleanup */
+
+    free(bkg_buf);
+    free(conv_buf);
+    if(atype>0)     H5Tclose(atype);
+    if(aspace>0)    H5Sclose(aspace);
+
+    return retval;
+
+    failed:     /* Error target */
+
+    retval = -1;
+    goto out;
+
+}
+
+
+
+/*  H5PY_dset_rw
+
+    Read & write datasets with proxy support for vlen bug.  "Direction"
+    determines whether to read or write data.
+*/
+herr_t H5PY_dset_rw(hid_t dset, hid_t mtype, hid_t mspace_in, hid_t fspace_in,
+                   hid_t xfer_plist, void* buf, h5py_rw_t dir){
+
+    hid_t   dstype = 0; 
+    hid_t   mspace = 0, fspace =0;
+    htri_t  vlen_present;
+    herr_t  status;             /* Status flag for API calls */
+    herr_t  retval;             /* Return value for this function */
+
+    hsize_t nl;                 /* Number of elements for read/write */
+    size_t  mtype_size;
+    hid_t cspace = 0;           /* Dataspace for conversion buffer */
+    void* conv_buf = NULL;      /* Conversion buffer */
+    void* bkg_buf = NULL;       /* Backing buffer */
+
+
+    dstype = H5Dget_type(dset);
+    if(dstype<0) goto failed;
+
+    vlen_present = h5py_detect_vlen(dstype);
+    if(vlen_present<0) goto failed;
+
+    if(!vlen_present){
+        /* Standard read/write */
+
+        switch(dir){
+        case H5PY_READ:
+            status = H5Dread(dset, mtype, mspace_in, fspace_in, xfer_plist, buf);
+            break;
+        case H5PY_WRITE:
+            status = H5Dwrite(dset, mtype, mspace_in, fspace_in, xfer_plist, buf);
+            break;
+        default:
+            goto failed;
+        }
+        if(status<0) goto failed;
+
+    } else {
+        /* Buffered read/write */
+
+        status = h5py_resolve_spaces(dset, fspace_in, mspace_in, &fspace, &mspace);
+        if(status<0) goto failed;
+
+        conv_buf = h5py_setup_buffer(dstype, mtype, fspace, &nl);
+        if(conv_buf==NULL) goto failed;
+
+        cspace = H5Screate_simple(1, &nl, NULL);
+        if(cspace<0) goto failed;
+
+        /* Populate the backing buffer with in-memory data */
+        /* TODO: skip unless (1) reading (any type), or (2) writing compound */
+        mtype_size = H5Tget_size(mtype);
+        if(mtype_size==0) goto failed;
+
+        bkg_buf = malloc(mtype_size*nl);
+
+        status = h5py_copy(mtype, mspace, bkg_buf, buf, H5PY_GATHER);
+        if(status<0) goto failed;
+
+        switch(dir){
+
+        case H5PY_READ:
+            status = H5Dread(dset, dstype, cspace, fspace, xfer_plist, conv_buf);
+            if(status<0) goto failed;
+            status = H5Tconvert(dstype, mtype, nl, conv_buf, bkg_buf, xfer_plist);
+            if(status<0) goto failed;
+            status = h5py_copy(mtype, mspace, conv_buf, buf, H5PY_SCATTER);
+            if(status<0) goto failed;
+            break;
+
+        case H5PY_WRITE:
+            status = h5py_copy(mtype, mspace, conv_buf, buf, H5PY_GATHER);
+            if(status<0) goto failed;
+            status = H5Tconvert(mtype, dstype, nl, conv_buf, bkg_buf, xfer_plist);
+            if(status<0) goto failed;
+            status = H5Dwrite(dset, dstype, cspace, fspace, xfer_plist, conv_buf);
+            if(status<0) goto failed;
+            break;
+
+        default:
+            goto failed;
+        }
+
+    }
+
+    retval = 0;
+
+    out:        /* Cleanup */
+
+    free(conv_buf);
+    free(bkg_buf);
+
+    if(dstype>0)    H5Tclose(dstype);
+    if(fspace>0)    H5Sclose(fspace);
+    if(mspace>0)    H5Sclose(mspace);
+    if(cspace>0)    H5Sclose(cspace);
+
+    return retval;
+
+    failed:     /* Error target */
+
+    retval = -1;
+    goto out;
+
+}
+
+/* ------- Support functions ------- */
+
+
+/*  Normalize a pair of file and memory dataspaces to get rid of H5S_ALL's.
+    The new dataspaces returned via ofspace and omspace must be closed. */
+herr_t h5py_resolve_spaces(hid_t dset_id, hid_t ifspace, hid_t imspace,
+                           hid_t *ofspace, hid_t *omspace){
+
+    hid_t of_tmp, om_tmp;
+
+    if(ifspace==H5S_ALL){
+        of_tmp = H5Dget_space(dset_id);
+    } else {
+        of_tmp = H5Scopy(ifspace);
+    }
+    if(of_tmp<0) goto failed;
+
+    if(imspace==H5S_ALL){
+        om_tmp = H5Scopy(of_tmp);
+    } else {
+        om_tmp = H5Scopy(imspace);
+    }
+    if(om_tmp<0) goto failed;
+
+    *ofspace = of_tmp;
+    *omspace = om_tmp;
+
+    return 0;
+
+    failed:
+
+    return -1;
+}
+
+void* h5py_setup_buffer(hid_t itype, hid_t otype, hid_t space_id, hsize_t* nl){
+
+    void*       buf = NULL;
+    size_t      isize, osize, buflen;
+    hssize_t    nelements;
+
+    isize = H5Tget_size(itype);
+    if(isize==0) goto failed;
+
+    osize = H5Tget_size(otype);
+    if(osize==0) goto failed;
+
+    if(isize>osize){
+        buflen = isize;
+    } else {
+        buflen = osize;
+    }
+
+    nelements = H5Sget_select_npoints(space_id);
+    if(nelements<0) goto failed;
+
+    buf = malloc(nelements*buflen);
+    if(buf==NULL) goto failed;
+
+    *nl = nelements;
+    return buf;
+
+    failed:
+
+    free(buf);
+    return NULL;
+    
+}
+
+
+/*  
+    Determine if a type is variable-length (H5T_STRING or H5T_VLEN) or in the
+    case of compound or array types, contains one.
+*/
+htri_t h5py_detect_vlen(hid_t type_id){
+
+    H5T_class_t  typeclass;
+    htri_t       retval;
+
+    htri_t  is_vlen;
+    hid_t   stype=0;
+    int     nmembers;
+    int     i;
+
+    typeclass = H5Tget_class(type_id);
+    if(typeclass<0) goto failed;
+
+    switch(typeclass){
+
+        case H5T_STRING:
+            retval = H5Tis_variable_str(type_id);
+            break;
+
+        case H5T_VLEN:
+            retval = 1;
+            break;
+
+        case H5T_ARRAY:
+            stype = H5Tget_super(type_id);
+            if(stype<0){
+                retval = -1;
+                break;
+            }
+            retval = h5py_detect_vlen(stype);
+            break;
+
+        case H5T_COMPOUND:
+            nmembers = H5Tget_nmembers(type_id);
+            if(nmembers<0){
+                retval = -1;
+                break;
+            }
+            for(i=0;i<nmembers;i++){
+                stype = H5Tget_member_type(type_id, i);
+                if(stype<0){
+                    retval = -1;
+                    break;
+                }
+                retval = h5py_detect_vlen(stype);
+                if(retval!=0){
+                    break;
+                }
+            }
+            break;
+
+        default:
+            retval = 0;
+
+    } /* switch */
+
+
+    out:        /* cleanup */
+
+    if(stype>0)     H5Tclose(stype);
+    
+    return retval;
+
+    failed:     /* error target */
+
+    retval = -1;
+    goto out;
+
+}
+
+
+/* ------ Implements buffer-to-buffer scatter/gather operations ------- */
+
+typedef struct {
+    size_t  i;
+    size_t  el_size;
+    void*   buf;
+} h5py_scatter_t;
+
+herr_t h5py_scatter_cb(void* elem, hid_t type_id, unsigned ndim,
+                             const hsize_t *point, void *operator_data){
+
+    h5py_scatter_t* info = (h5py_scatter_t*)operator_data;
+   
+    memcpy(elem, (info->buf)+((info->i)*(info->el_size)), info->el_size);
+    
+    info->i++;
+
+    return 0;
+}
+
+herr_t h5py_gather_cb(void* elem, hid_t type_id, unsigned ndim,
+                             const hsize_t *point, void *operator_data){
+
+    h5py_scatter_t* info = (h5py_scatter_t*)operator_data;
+   
+    memcpy((info->buf)+((info->i)*(info->el_size)), elem, info->el_size);
+    
+    info->i++;
+
+    return 0;
+}
+
+herr_t h5py_copy(hid_t type_id, hid_t space_id, void* contig_buf, 
+                 void* scatter_buf, h5py_copy_t op){
+
+    size_t      el_size;
+    hssize_t    nl;
+    herr_t      call_result;
+
+    h5py_scatter_t info;
+    H5D_operator_t cb;
+
+    el_size = H5Tget_size(type_id);
+    if(el_size==0) goto failed;
+
+    nl = H5Sget_select_npoints(space_id);
+    if(nl<0) goto failed;
+
+    info.i = 0;
+    info.el_size = el_size;
+    info.buf = contig_buf;
+    
+    switch(op){
+        case H5PY_SCATTER:
+            cb = h5py_scatter_cb;
+            break;
+        case H5PY_GATHER:
+            cb = h5py_gather_cb;
+            break;
+        default:
+            goto failed;
+    }
+
+    call_result = H5Diterate(scatter_buf, type_id, space_id, cb, &info);
+    if(call_result<0) goto failed;
+
+    return 0;
+
+    failed:
+
+    return -1;
+}
+
+
+
+
+
+    
diff --git a/h5py/typeproxy.h b/h5py/typeproxy.h
new file mode 100644
index 0000000..f2b7c7b
--- /dev/null
+++ b/h5py/typeproxy.h
@@ -0,0 +1,55 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+/*
+    Conversion routines designed to support the use of variable-length,
+    reference and other types which suffer from the HDF5 type-conversion
+    bug.
+*/
+
+#include "hdf5.h"
+
+#ifndef H5PY_TYPEPROXY_H
+#define H5PY_TYPEPROXY_H
+
+/* Proxy functions for reading and writing datasets and attributes */
+
+typedef enum {
+    H5PY_WRITE = 0,
+    H5PY_READ
+} h5py_rw_t;
+
+herr_t H5PY_dset_rw(hid_t dset, hid_t mtype, hid_t mspace_in, hid_t fspace_in,
+                   hid_t xfer_plist, void* buf, h5py_rw_t dir);
+
+herr_t H5PY_attr_rw(hid_t attr, hid_t mtype, void* buf, h5py_rw_t dir);
+
+
+/*  Copy data back & forth between a contiguous buffer and a dataspace 
+    selection.  The dataspace must be a "real" dataspace; the value
+    H5S_ALL is not acceptable. */
+
+typedef enum {
+    H5PY_SCATTER = 0,
+    H5PY_GATHER
+} h5py_copy_t;
+
+herr_t h5py_copy(hid_t type_id, hid_t space_id, void* contig_buf, 
+                 void* scatter_buf, h5py_copy_t op);
+
+
+#endif
+
+
+
+
+
diff --git a/setup.py b/setup.py
index b4edf09..0d60214 100644
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,9 @@ MODULES = ['h5', 'h5e', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5z',
 
 EXTRA_SRC = {'h5': [ localpath("lzf/lzf_filter.c"), 
                      localpath("lzf/lzf/lzf_c.c"),
-                     localpath("lzf/lzf/lzf_d.c") ]    }
+                     localpath("lzf/lzf/lzf_d.c"),
+                     localpath("h5py/typeproxy.c"),
+                     localpath("h5py/typeconv.c") ]}
 
 # --- Imports -----------------------------------------------------------------
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



More information about the debian-science-commits mailing list