[python-hdf5storage] 137/152: Added Python 2.7 support.

Mon Feb 29 08:24:42 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 0.1
in repository python-hdf5storage.

commit f64d310db129c694cc88382b82369929e7b0cabc
Author: Freja Nordsiek <fnordsie at gmail.com>
Date:   Sat Feb 15 23:56:18 2014 -0500

    Added Python 2.7 support.
---
 README.rst                    |  23 +++++++--
 doc/source/storage_format.rst |  12 +++--
 hdf5storage/Marshallers.py    | 114 ++++++++++++++++++++++++++++++------------
 hdf5storage/utilities.py      |  48 +++++++++++-------
 setup.py                      |   3 +-
 tests/asserts.py              |  15 ++++--
 tests/test_write_readback.py  |  43 ++++++++++------
 7 files changed, 183 insertions(+), 75 deletions(-)

diff --git a/README.rst b/README.rst
index e195ae7..6657a6e 100644
--- a/README.rst
+++ b/README.rst
@@ -16,15 +16,31 @@ http://opensource.org/licenses/BSD-2-Clause).
 Installation
 ============
 
-This package will not work on Python < 3.0.
+This package will probably not work on Python < 2.7.
 
-This package requires the numpy and h5py (>= 2.0) packages. An optional
+This package requires the numpy and h5py (>= 2.1) packages. An optional
 dependency is the scipy package.
 
 To install hdf5storage, download the package and run the command::
 
     python3 setup.py install
 
+Python 2.7
+==========
+
+This package was designed and written for Python 3, with Python 2.7
+support added later. This does mean that a few things are a little
+clunky in Python 2. Examples include supporting ``unicode`` keys for
+dictionaries, not being able to import a structured ``numpy.ndarray`` if
+any of its fields contain characters outside of ASCII, the ``int`` and
+``long`` types both being mapped to the Python 3 ``int`` type, etc. The
+storage format's metadata looks more familiar from a Python 3 standpoint
+as well.
+
+All documentation and examples are written in terms of Python 3 syntax
+and types. Important Python 2 information beyond direct translations of
+syntax and types will be pointed out.
+
 Hierarchal Data Format 5 (HDF5)
 ===============================
 
@@ -129,7 +145,7 @@ np.recarray    0.1      structured np.ndarray       [5]_ [6]_    0.1 [5]_
        (or implicitly through doing MATLAB compatibility), it will be
        stored as ``np.uint16`` in UTF-16 encoding. Otherwise, it is just
        written as ``np.bytes_``.
-.. [4] All keys must be ``str``.
+.. [4] All keys must be ``str`` in Python 3 or ``unicode`` in Python 2.
 .. [5] Container types are only supported if their underlying dtype is
        supported. Data conversions are done based on its dtype.
 .. [6] Structured ``np.ndarray`` s (have fields in their dtypes) can be
@@ -159,6 +175,7 @@ int8             0.1      np.int8
 int16            0.1      np.int16
 int32            0.1      np.int32
 int64            0.1      np.int64
+char             0.1      np.str\_
 struct           0.1      structured np.ndarray
 cell             0.1      np.object\_
 canonical empty  0.1      ``np.float64([])``
diff --git a/doc/source/storage_format.rst b/doc/source/storage_format.rst
index 586b8ec..451362e 100644
--- a/doc/source/storage_format.rst
+++ b/doc/source/storage_format.rst
@@ -23,11 +23,11 @@ must have a 512 byte userblock, of which 128 bytes are used. The 128
 bytes consists of a 116 byte string (spaces pad the end) followed by a
 specific 12 byte sequence (magic number). On MATLAB, the 116 byte string, depending on the computer system and the date, looks like ::
 
-    'MATLAB 7.3 MAT-file, Platform: GLNXA64, Created on: Fri Feb 07 02:29:00 2014 HDF5 schema 1.00 .'
+    b'MATLAB 7.3 MAT-file, Platform: GLNXA64, Created on: Fri Feb 07 02:29:00 2014 HDF5 schema 1.00 .'
 
 This package just changes the Platform part to ::
 
-    'CPython A.B.C'
+    b'CPython A.B.C'
 
 Where A, B, and C are the major, minor, and micro version numbers of the Python interpreter (e.g. 3.3.0).
 
@@ -104,7 +104,7 @@ np.recarray    0.1      structued np.ndarray [5]_             Dataset or Group [
        ``matlab_compatible == True``), it will be stored as
        ``np.uint16`` in UTF-16 encoding. Otherwise, it is just written
        as ``np.bytes_``.
-.. [4] All keys must be ``str``.
+.. [4] All keys must be ``str`` in Python 3 or ``unicode`` in Python 2.
 .. [5] If it doesn't have any fields in its dtype or if
        :py:attr:`Options.structured_numpy_ndarray_as_struct` is not set, it
        is not converted and is written as is as a Dataset. Otherwise, it
@@ -326,6 +326,11 @@ under the field name in the Groups.
    can't be read back from the file accurately. The dtype for all the
    fields will become 'object' instead of what they originally were.
 
+.. note::
+
+   In Python 2, importing structured ``np.ndarray`` s if any of their
+   fields have characters outside of ASCII.
+
 
 Optional Data Transformations
 =============================
@@ -462,6 +467,7 @@ int8             0.1      np.int8
 int16            0.1      np.int16
 int32            0.1      np.int32
 int64            0.1      np.int64
+char             0.1      np.str\_
 struct           0.1      structured np.ndarray
 cell             0.1      np.object\_
 canonical empty  0.1      ``np.float64([])``
diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
index 859de1c..9d01e4f 100644
--- a/hdf5storage/Marshallers.py
+++ b/hdf5storage/Marshallers.py
@@ -28,6 +28,7 @@
 
 """
 
+import sys
 import posixpath
 import collections
 
@@ -463,6 +464,9 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                                    'Python.Fields'}
         self.matlab_attributes |= {'MATLAB_class', 'MATLAB_empty',
                                    'MATLAB_int_decode'}
+        # As np.str_ is the unicode type string in Python 3 and the bare
+        # bytes string in Python 2, we have to use np.unicode_ which is
+        # or points to the unicode one in both versions.
         self.types = [np.ndarray, np.matrix,
                       np.chararray, np.core.records.recarray,
                       np.bool_, np.void,
@@ -470,7 +474,8 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                       np.int8, np.int16, np.int32, np.int64,
                       np.float16, np.float32, np.float64,
                       np.complex64, np.complex128,
-                      np.bytes_, np.str_, np.object_]
+                      np.bytes_, np.unicode_, np.object_]
+        # Using Python 3 type strings.
         self.python_type_strings = ['numpy.ndarray', 'numpy.matrix',
                                     'numpy.chararray',
                                     'numpy.recarray',
@@ -505,7 +510,7 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                                  np.complex64: 'single',
                                  np.complex128: 'double',
                                  np.bytes_: 'char',
-                                 np.str_: 'char',
+                                 np.unicode_: 'char',
                                  np.object_: 'cell'}
 
         # Make a dict to look up the opposite direction (given a matlab
@@ -522,7 +527,7 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                                          'int64': np.int64,
                                          'single': np.float32,
                                          'double': np.float64,
-                                         'char': np.str_,
+                                         'char': np.unicode_,
                                          'cell': np.object_,
                                          'canonical empty': np.float64}
 
@@ -570,16 +575,17 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                     data_to_store).view(np.uint8))
 
         # As of 2013-12-13, h5py cannot write numpy.str_ (UTF-32
-        # encoding) types. If the option is set to try to convert them
-        # to UTF-16, then an attempt at the conversion is made. If no
-        # conversion is to be done, the conversion throws an exception
-        # (a UTF-32 character had no UTF-16 equivalent), or a UTF-32
-        # character gets turned into a UTF-16 doublet (the increase in
-        # the number of columns will be by a factor more than the length
-        # of the strings); then it will be simply converted to uint32's
-        # byte for byte instead.
-
-        if data.dtype.type == np.str_:
+        # encoding) types (its numpy.unicode_ in Python 2, which is an
+        # alias for it in Python 3). If the option is set to try to
+        # convert them to UTF-16, then an attempt at the conversion is
+        # made. If no conversion is to be done, the conversion throws an
+        # exception (a UTF-32 character had no UTF-16 equivalent), or a
+        # UTF-32 character gets turned into a UTF-16 doublet (the
+        # increase in the number of columns will be by a factor more
+        # than the length of the strings); then it will be simply
+        # converted to uint32's byte for byte instead.
+
+        if data.dtype.type == np.unicode_:
             new_data = None
             if options.convert_numpy_str_to_utf16:
                 try:
@@ -776,9 +782,16 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
         if options.store_python_metadata:
             set_attribute(grp[name], 'Python.Shape',
                           np.uint64(data.shape))
-            set_attribute_string(grp[name],
-                                 'Python.numpy.UnderlyingType',
-                                 data.dtype.name)
+
+            # Now, in Python 3, the dtype names for bare bytes and
+            # unicode strings start with 'bytes' and 'str' respectively,
+            # but in Python 2, they start with 'string' and 'unicode'
+            # respectively. The Python 2 ones must be converted to the
+            # Python 3 ones for writing.
+            set_attribute_string(grp[name], \
+                'Python.numpy.UnderlyingType', \
+                data.dtype.name.replace('string', 'bytes').replace( \
+                'unicode', 'str'))
             if isinstance(data, np.matrix):
                 container = 'matrix'
             elif isinstance(data, np.chararray):
@@ -896,7 +909,9 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
             # than H5PATH since that means that the fields are the
             # values (single element structured ndarray), as opposed to
             # Reference arrays to all the values (multi-element structed
-            # ndarray).
+            # ndarray). In Python 2, the field names need to be
+            # converted to ASCII from unicode when storing the fields in
+            # struct_data.
             struct_data = dict()
             is_multi_element = True
             for k in grp[name]:
@@ -913,7 +928,12 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                         != 0:
                     is_multi_element = False
                 try:
-                    struct_data[k] = read_data(f, grp[name], k, options)
+                    if sys.hexversion >= 0x03000000:
+                        struct_data[k] = read_data(f, grp[name], k,
+                                                   options)
+                    else:
+                        struct_data[k.encode()] = read_data(f, \
+                            grp[name], k, options)
                 except:
                     pass
 
@@ -1042,7 +1062,7 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
             elif underlying_type.startswith('str') \
                     or matlab_class == 'char':
                 if underlying_type == 'str':
-                    data = np.str_('')
+                    data = np.unicode_('')
                 elif underlying_type.startswith('str'):
                     data = convert_to_numpy_str(data, \
                         length=int(underlying_type[3:])//32)
@@ -1073,7 +1093,7 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                         data = data.flatten()[0]
                 elif underlying_type.startswith('str'):
                     if python_empty == 1:
-                        data = np.bytes_(b'')
+                        data = np.unicode_('')
                     elif isinstance(data, np.ndarray):
                         data = data.flatten()[0]
                 else:
@@ -1124,8 +1144,18 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
 class PythonScalarMarshaller(NumpyScalarArrayMarshaller):
     def __init__(self):
         NumpyScalarArrayMarshaller.__init__(self)
-        self.types = [bool, int, float, complex]
-        self.python_type_strings = ['bool', 'int', 'float', 'complex']
+
+        # In Python 3, there is only a single integer type int, which is
+        # variable width. In Python 2, there is the fixed width int and
+        # the variable width long. Python 2 needs to be able to save
+        # with either, but Python 3 needs to map both to int, which can
+        # be done by just putting the type int for its entry in types.
+        if sys.hexversion >= 0x03000000:
+            self.types = [bool, int, int, float, complex]
+        else:
+            self.types = [bool, int, long, float, complex]
+        self.python_type_strings = ['bool', 'int', 'long', 'float',
+                                    'complex']
         # As the parent class already has MATLAB strings handled, there
         # are no MATLAB classes that this marshaller should be used for.
         self.matlab_classes = []
@@ -1166,7 +1196,14 @@ class PythonScalarMarshaller(NumpyScalarArrayMarshaller):
 class PythonStringMarshaller(NumpyScalarArrayMarshaller):
     def __init__(self):
         NumpyScalarArrayMarshaller.__init__(self)
-        self.types = [str, bytes, bytearray]
+        # In Python 3, the unicode and bare bytes type strings are str
+        # and bytes, but before Python 3, they were unicode and str
+        # respectively. The Python 3 python_type_strings will be used,
+        # though.
+        if sys.hexversion >= 0x03000000:
+            self.types = [str, bytes, bytearray]
+        else:
+            self.types = [unicode, str, bytearray]
         self.python_type_strings = ['str', 'bytes', 'bytearray']
         # As the parent class already has MATLAB strings handled, there
         # are no MATLAB classes that this marshaller should be used for.
@@ -1200,7 +1237,10 @@ class PythonStringMarshaller(NumpyScalarArrayMarshaller):
             else:
                 return data.decode()
         elif type_string == 'bytes':
-            return bytes(data)
+            if sys.hexversion >= 0x03000000:
+                return bytes(data)
+            else:
+                return str(data)
         elif type_string == 'bytearray':
             return bytearray(data)
         else:
@@ -1265,14 +1305,24 @@ class PythonDictMarshaller(TypeMarshaller):
             for field in {i for i in grp2}.difference({i for i in data}):
                 del grp2[field]
 
-        # Check for any field names that are not strings since they
-        # cannot be handled.
-
-        for fieldname in data:
-            if not isinstance(fieldname, str):
-                raise NotImplementedError('Dictionaries with non-string'
-                                          + ' keys are not supported: '
-                                          + repr(fieldname))
+        # Check for any field names that are not unicode since they
+        # cannot be handled. How it is checked (what type it is) and the
+        # error message are different for each Python version.
+
+        if sys.hexversion >= 0x03000000:
+            for fieldname in data:
+                if not isinstance(fieldname, str):
+                    raise NotImplementedError('Dictionaries with non-'
+                                              + 'str keys are not '
+                                              + 'supported: '
+                                              + repr(fieldname))
+        else:
+            for fieldname in data:
+                if not isinstance(fieldname, unicode):
+                    raise NotImplementedError('Dictionaries with non-'
+                                              + 'unicode keys are not '
+                                              + 'supported: '
+                                              + repr(fieldname))
 
         # Go through all the elements of data and write them. The H5PATH
         # needs to be set as the path of grp2 on all of them if we are
diff --git a/hdf5storage/utilities.py b/hdf5storage/utilities.py
index fed65c5..eb35905 100644
--- a/hdf5storage/utilities.py
+++ b/hdf5storage/utilities.py
@@ -28,6 +28,8 @@
 
 """
 
+import sys
+import copy
 import string
 import random
 
@@ -215,14 +217,14 @@ def convert_to_str(data):
     # assuming it is in ASCII. Otherwise, data has to be returned as is.
 
     if isinstance(data, (np.ndarray, np.uint8, np.uint16, np.uint32,
-                  np.bytes_, np.str_)):
+                  np.bytes_, np.unicode_)):
         if data.dtype.name == 'uint8':
             return data.flatten().tostring().decode(encoding='ASCII')
         elif data.dtype.name == 'uint16':
             return data.tostring().decode(encoding='UTF-16')
         elif data.dtype.name == 'uint32':
             return data.flatten.tostring().decode(encoding='UTF-32')
-        elif data.dtype.name.startswith('bytes'):
+        elif data.dtype.char == 'S':
             return data.decode(encoding='ASCII')
         else:
             if isinstance(data, np.ndarray):
@@ -285,23 +287,25 @@ def convert_to_numpy_str(data, length=None):
 
     """
     # The method of conversion depends on its type.
-    if isinstance(data, np.str_) or (isinstance(data, np.ndarray) \
+    if isinstance(data, np.unicode_) or (isinstance(data, np.ndarray) \
             and data.dtype.char == 'U'):
         # It is already an np.str_ or array of them, so nothing needs to
         # be done.
         return data
-    elif isinstance(data, str):
+    elif (sys.hexversion >= 0x03000000 and isinstance(data, str)) \
+           or (sys.hexversion < 0x03000000 \
+           and isinstance(data, unicode)):
         # Easily converted through constructor.
-        return np.str_(data)
+        return np.unicode_(data)
     elif isinstance(data, (bytes, bytearray, np.bytes_)):
         # All of them can be decoded and then passed through the
         # constructor.
-        return np.str_(data.decode())
+        return np.unicode_(data.decode())
     elif isinstance(data, (np.uint8, np.uint16)):
         # They are single ASCII or UTF-16 scalars, and are easily
         # converted to a UTF-8 string and then passed through the
         # constructor.
-        return np.str_(convert_to_str(data))
+        return np.unicode_(convert_to_str(data))
     elif isinstance(data, np.uint32):
         # It is just the uint32 version of the character, so it just
         # needs to be have the dtype essentially changed by having its
@@ -313,7 +317,7 @@ def convert_to_numpy_str(data, length=None):
         new_data = np.zeros(shape=data.shape,
                             dtype='U' + str(data.dtype.itemsize))
         for index, x in np.ndenumerate(data):
-            new_data[index] = np.str_(x.decode())
+            new_data[index] = np.unicode_(x.decode())
         return new_data
     elif isinstance(data, np.ndarray) \
             and data.dtype.name in ('uint8', 'uint16', 'uint32'):
@@ -342,7 +346,7 @@ def convert_to_numpy_str(data, length=None):
         else:
             if length is None:
                 length = shape[-1]
-            new_shape = shape.copy()
+            new_shape = copy.deepcopy(shape)
             new_shape[-1] //= length
 
         # The new array can be made as all zeros (nulls) with enough
@@ -364,7 +368,7 @@ def convert_to_numpy_str(data, length=None):
                                          dtype=new_data.dtype,
                                          buffer=chunk.tostring())[()]
             else:
-                new_data[i] = np.str_(convert_to_str(chunk))
+                new_data[i] = np.unicode_(convert_to_str(chunk))
 
         # Only thing is left is to reshape it.
         return new_data.reshape(tuple(new_shape))
@@ -424,9 +428,12 @@ def convert_to_numpy_bytes(data, length=None):
         # It is already an np.bytes_ or array of them, so nothing needs
         # to be done.
         return data
-    elif isinstance(data, (str, bytes, bytearray)):
+    elif (sys.hexversion >= 0x03000000 \
+            and isinstance(data, (str, bytes, bytearray))) \
+            or (sys.hexversion < 0x03000000 \
+            and isinstance(data, (unicode, bytes, bytearray))):
         # Easily converted through constructor.
-        return np.str_(data)
+        return np.bytes_(data)
     elif isinstance(data, (np.uint16, np.uint32)):
         # They are single UTF-16 or UTF-32 scalars, and are easily
         # converted to a UTF-8 string and then passed through the
@@ -473,7 +480,7 @@ def convert_to_numpy_bytes(data, length=None):
         else:
             if length is None:
                 length = shape[-1]
-            new_shape = shape.copy()
+            new_shape = copy.deepcopy(shape)
             new_shape[-1] //= length
 
         # The new array can be made as all zeros (nulls) with enough
@@ -679,11 +686,13 @@ def get_attribute_string(target, name):
     value = get_attribute(target, name)
     if value is None:
         return value
-    elif isinstance(value, str):
+    elif (sys.hexversion >= 0x03000000 and isinstance(value, str)) \
+            or (sys.hexversion < 0x03000000 \
+            and isinstance(value, unicode)):
         return value
     elif isinstance(value, bytes):
         return value.decode()
-    elif isinstance(value, np.str_):
+    elif isinstance(value, np.unicode_):
         return str(value)
     elif isinstance(value, np.bytes_):
         return value.decode()
@@ -779,8 +788,13 @@ def set_attribute_string_array(target, name, string_list):
         List of strings to set the attribute to. Strings must be ``str``
 
     """
-    target.attrs.create(name, string_list,
-                        dtype=h5py.special_dtype(vlen=str))
+    s_list = [convert_to_str(s) for s in string_list]
+    if sys.hexversion >= 0x03000000:
+        target.attrs.create(name, s_list,
+                            dtype=h5py.special_dtype(vlen=str))
+    else:
+        target.attrs.create(name, s_list,
+                            dtype=h5py.special_dtype(vlen=unicode))
 
 
 def del_attribute(target, name):
diff --git a/setup.py b/setup.py
index 30f82a0..18cfcc6 100644
--- a/setup.py
+++ b/setup.py
@@ -14,10 +14,11 @@ setup(name='hdf5storage',
       author_email='fnordsie at gmail dt com',
       url='https://github.com/frejanordsiek/hdf5storage',
       packages=['hdf5storage'],
-      requires=['numpy', 'h5py (>= 2.0)'],
+      requires=['numpy', 'h5py (>= 2.1)'],
       license='BSD',
       keywords='hdf5 matlab',
       classifiers=[
+          "Programming Language :: Python :: 2.7"
           "Programming Language :: Python :: 3",
           "Development Status :: 3 - Alpha",
           "License :: OSI Approved :: BSD License",
diff --git a/tests/asserts.py b/tests/asserts.py
index 891e26b..81e6725 100644
--- a/tests/asserts.py
+++ b/tests/asserts.py
@@ -24,6 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import sys
 import collections
 
 import numpy as np
@@ -95,7 +96,10 @@ def assert_equal_none_format(a, b):
             assert type(a) == np.ndarray
             assert a.dtype == np.float64([]).dtype
             assert a.shape == (0, )
-        elif isinstance(b, (bytes, str, bytearray)):
+        elif (sys.hexversion >= 0x03000000 \
+                and isinstance(b, (bytes, str, bytearray))) \
+                or (sys.hexversion < 0x03000000 \
+                and isinstance(b, (bytes, unicode, bytearray))):
             assert a == np.bytes_(b)
         else:
             assert_equal_none_format(a, np.array(b)[()])
@@ -159,13 +163,16 @@ def assert_equal_matlab_format(a, b):
             assert type(a) == np.ndarray
             assert a.dtype == np.dtype('float64')
             assert a.shape == (1, 0)
-        elif isinstance(b, (bytes, str, bytearray)):
+        elif (sys.hexversion >= 0x03000000 \
+                and isinstance(b, (bytes, str, bytearray))) \
+                or (sys.hexversion < 0x03000000 \
+                and isinstance(b, (bytes, unicode, bytearray))):
             if len(b) == 0:
                 assert_equal(a, np.zeros(shape=(1, 0), dtype='U'))
             elif isinstance(b, (bytes, bytearray)):
-                assert_equal(a, np.atleast_2d(np.str_(b.decode())))
+                assert_equal(a, np.atleast_2d(np.unicode_(b.decode())))
             else:
-                assert_equal(a, np.atleast_2d(np.str_(b)))
+                assert_equal(a, np.atleast_2d(np.unicode_(b)))
         else:
             assert_equal(a, np.atleast_2d(np.array(b)))
     else:
diff --git a/tests/test_write_readback.py b/tests/test_write_readback.py
index f0a6cf2..3b46425 100644
--- a/tests/test_write_readback.py
+++ b/tests/test_write_readback.py
@@ -24,6 +24,8 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import sys
+import copy
 import os
 import os.path
 import posixpath
@@ -53,11 +55,14 @@ class TestPythonMatlabFormat(object):
         self.options = hdf5storage.Options()
 
         # Need a list of the supported numeric dtypes to test, excluding
-        # those not supported by MATLAB.
+        # those not supported by MATLAB. 'S' and 'U' dtype chars have to
+        # be used for the bare byte and unicode string dtypes since the
+        # dtype strings (but not chars) are not the same in Python 2 and
+        # 3.
         self.dtypes = ['bool', 'uint8', 'uint16', 'uint32', 'uint64',
                        'int8', 'int16', 'int32', 'int64',
                        'float32', 'float64', 'complex64', 'complex128',
-                       'bytes', 'str']
+                       'S', 'U']
 
         # Define the sizes of random datasets to use.
         self.max_string_length = 10
@@ -75,8 +80,14 @@ class TestPythonMatlabFormat(object):
 
     def random_str_ascii(self, length):
         # Makes a random ASCII str of the specified length.
-        ltrs = string.ascii_letters + string.digits
-        return ''.join([random.choice(ltrs) for i in range(0, length)])
+        if sys.hexversion >= 0x03000000:
+            ltrs = string.ascii_letters + string.digits
+            return ''.join([random.choice(ltrs) for i in \
+                range(0, length)])
+        else:
+            ltrs = unicode(string.ascii_letters + string.digits)
+            return u''.join([random.choice(ltrs) for i in \
+                range(0, length)])
 
     def random_bytes(self, length):
         # Makes a random sequence of bytes of the specified length from
@@ -101,17 +112,17 @@ class TestPythonMatlabFormat(object):
         # any other type, then it is just a matter of constructing the
         # right sized ndarray from a random sequence of bytes (all must
         # be forced to 0 and 1 for bool).
-        if dtype in 'bytes':
+        if dtype == 'S':
             length = random.randint(1, self.max_string_length)
             data = np.zeros(shape=shape, dtype='S' + str(length))
             for x in np.nditer(data, op_flags=['readwrite']):
                 x[...] = np.bytes_(self.random_bytes(length))
             return data
-        elif dtype == 'str':
+        elif dtype == 'U':
             length = random.randint(1, self.max_string_length)
             data = np.zeros(shape=shape, dtype='U' + str(length))
             for x in np.nditer(data, op_flags=['readwrite']):
-                x[...] = np.str_(self.random_str_ascii(length))
+                x[...] = np.unicode_(self.random_str_ascii(length))
             return data
         elif dtype == 'object':
             data = np.zeros(shape=shape, dtype='object')
@@ -134,12 +145,13 @@ class TestPythonMatlabFormat(object):
         # How a random scalar is made depends on th type. For must, it
         # is just a single number. But for the string types, it is a
         # string of any length.
-        if dtype == 'bytes':
+        if dtype == 'S':
             return np.bytes_(self.random_bytes(random.randint(1,
                              self.max_string_length)))
-        elif dtype == 'str':
-            return np.str_(self.random_str_ascii(
-                           random.randint(1, self.max_string_length)))
+        elif dtype == 'U':
+            return np.unicode_(self.random_str_ascii(
+                               random.randint(1,
+                               self.max_string_length)))
         else:
             return self.random_numpy(tuple(), dtype)[()]
 
@@ -169,7 +181,8 @@ class TestPythonMatlabFormat(object):
         data = dict()
         for i in range(0, random.randint(self.min_dict_keys, \
                 self.max_dict_keys)):
-            data[self.random_str_ascii(self.max_dict_key_length)] = \
+            name = self.random_str_ascii(self.max_dict_key_length)
+            data[name] = \
                 self.random_numpy(self.random_numpy_shape( \
                 self.dict_value_subarray_dimensions, \
                 self.max_dict_value_subarray_axis_length), \
@@ -356,19 +369,19 @@ class TestPythonMatlabFormat(object):
             yield self.check_numpy_scalar, dt
 
     def test_numpy_array_1d(self):
-        dtypes = self.dtypes.copy()
+        dtypes = copy.deepcopy(self.dtypes)
         dtypes.append('object')
         for dt in dtypes:
             yield self.check_numpy_array, dt, 1
 
     def test_numpy_array_2d(self):
-        dtypes = self.dtypes.copy()
+        dtypes = copy.deepcopy(self.dtypes)
         dtypes.append('object')
         for dt in dtypes:
             yield self.check_numpy_array, dt, 2
 
     def test_numpy_array_3d(self):
-        dtypes = self.dtypes.copy()
+        dtypes = copy.deepcopy(self.dtypes)
         dtypes.append('object')
         for dt in dtypes:
             yield self.check_numpy_array, dt, 3

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git