[python-hdf5storage] 53/152: Fixed bugs with writing and reading empty strings, in addition to a small file closing bug.

Mon Feb 29 08:24:33 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 0.1
in repository python-hdf5storage.

commit 0a8db475b05b3460600c43b8463fc8adc6b77467
Author: Freja Nordsiek <fnordsie at gmail.com>
Date:   Tue Jan 28 00:41:32 2014 -0500

    Fixed bugs with writing and reading empty strings, in addition to a small file closing bug.
---
 hdf5storage/Marshallers.py   | 67 +++++++++++++++++++++++++++++---------------
 hdf5storage/__init__.py      |  4 ++-
 hdf5storage/utilities.py     |  8 ++++--
 tests/test_write_readback.py |  3 --
 4 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/hdf5storage/Marshallers.py b/hdf5storage/Marshallers.py
index 0e2e3d8..97a5c38 100644
--- a/hdf5storage/Marshallers.py
+++ b/hdf5storage/Marshallers.py
@@ -376,11 +376,13 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
         # simply converting to uint16's. This will require making them
         # at least 1 dimensinal.
 
-        if options.convert_strings_to_utf16 and not (data.size == 0 \
-                and options.store_shape_for_empty) \
-                and data.dtype.type == np.bytes_:
-            data_to_store = np.uint16(np.atleast_1d( \
-                            data_to_store).view(np.uint8))
+        if data.dtype.type == np.bytes_ \
+                and options.convert_strings_to_utf16:
+            if data_to_store.nbytes == 0:
+                data_to_store = np.uint16([])
+            else:
+                data_to_store = np.uint16(np.atleast_1d( \
+                    data_to_store).view(np.uint8))
 
         # As of 2013-12-13, h5py cannot write numpy.unicode (UTF-32
         # encoding) types. If it is just a numpy.unicode object, we can
@@ -389,9 +391,12 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
         # are not always 2 bytes long in UTF-16. So, converting them to
         # uint32 makes the most sense.
 
-        if data.dtype.type == np.str_ and not (data.size == 0 \
-                and options.store_shape_for_empty):
-            data_to_store = np.atleast_1d(data_to_store).view(np.uint32)
+        if data.dtype.type == np.str_:
+            if data_to_store.nbytes == 0:
+                data_to_store = np.uint32([])
+            else:
+                data_to_store = np.atleast_1d( \
+                    data_to_store).view(np.uint32)
 
         # Convert scalars to arrays if that option is set.
 
@@ -411,8 +416,11 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
         # If data is empty, we instead need to store the shape of the
         # array if the appropriate option is set.
 
-        if options.store_shape_for_empty and data.size == 0:
-            data_to_store = np.uint64(data.shape)
+        if options.store_shape_for_empty and (data.size == 0 \
+                or ((data.dtype.type == np.bytes_ \
+                or data.dtype.type == np.str_) \
+                and data.nbytes == 0)):
+            data_to_store = np.uint64(data_to_store.shape)
 
         # If it is a complex type, then it needs to be encoded to have
         # the proper complex field names.
@@ -472,13 +480,14 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
             set_attribute_string(grp[name], 'CPython.numpy.Container',
                                  container)
 
-        # If data is empty and we are supposed to store shape info for
-        # empty data, we need to set the CPython.Empty and MATLAB_empty
-        # attributes to 1 if we are storing type info or making it
-        # MATLAB compatible. Otherwise, no empty attribute is set and
-        # existing ones must be deleted.
+        # If data is empty, we need to set the CPython.Empty and
+        # MATLAB_empty attributes to 1 if we are storing type info or
+        # making it MATLAB compatible. Otherwise, no empty attribute is
+        # set and existing ones must be deleted.
 
-        if options.store_shape_for_empty and data.size == 0:
+        if data.size == 0  or ((data.dtype.type == np.bytes_ \
+                or data.dtype.type == np.str_)
+                and data.nbytes == 0):
             if options.store_type_information:
                 set_attribute(grp[name], 'CPython.Empty',
                                           np.uint8(1))
@@ -552,10 +561,15 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
             # If it is empty ('CPython.Empty' set to 1), then the shape
             # information is stored in data and we need to set data to
             # the empty array of the proper type (in underlying_type)
-            # and the given shape.
+            # and the given shape. If we are going to transpose it
+            # later, we need to transpose it now so that it still keeps
+            # the right shape.
             if cpython_empty == 1:
-                data = np.zeros(tuple(np.uint64(data)),
+                data = np.zeros(tuple(shape),
                                 dtype=underlying_type)
+                if matlab_class is not None or \
+                        options.reverse_dimension_order:
+                    data = data.T
 
             # If MATLAB attributes are present or the reverse dimension
             # order option was given, the dimension order needs to be
@@ -584,9 +598,15 @@ class NumpyScalarArrayMarshaller(TypeMarshaller):
                 data = np.bool_(data)
 
             # Convert to scalar, matrix, or ndarray depending on the
-            # container type.
+            # container type. For an empty scalar string, it needs to be
+            # manually set to '' and b'' or there will be problems.
             if container == 'scalar':
                 data = data[()]
+                if cpython_empty == 1:
+                    if underlying_type == 'bytes':
+                        data = np.bytes_(b'')
+                    elif underlying_type == 'str':
+                        data = np.str_('')
             elif container == 'matrix':
                 data = np.asmatrix(data)
             elif container == 'ndarray':
@@ -710,11 +730,14 @@ class PythonStringMarshaller(NumpyScalarArrayMarshaller):
         # as is.
         type_string = get_attribute_string(grp[name], 'CPython.Type')
         if type_string == 'str':
-            return data.tostring().decode()
+            if isinstance(data, np.ndarray):
+                return data.tostring().decode()
+            else:
+                return data.decode()
         elif type_string == 'bytes':
-            return data.tostring()
+            return bytes(data)
         elif type_string == 'bytearray':
-            return bytearray(data.tostring())
+            return bytearray(data)
         else:
             return data
 
diff --git a/hdf5storage/__init__.py b/hdf5storage/__init__.py
index 6e19f6c..a6b4a0e 100644
--- a/hdf5storage/__init__.py
+++ b/hdf5storage/__init__.py
@@ -1013,6 +1013,7 @@ def read(name='/', filename='data.h5',
     # in a try block, so that the file can be closed if any errors
     # happen (the error is re-raised).
     try:
+        f = None
         f = h5py.File(filename, mode='r')
 
         # Check that the containing group is in f and is indeed a
@@ -1027,6 +1028,7 @@ def read(name='/', filename='data.h5',
     except:
         raise
     finally:
-        f.close()
+        if f is not None:
+            f.close()
 
     return data
diff --git a/hdf5storage/utilities.py b/hdf5storage/utilities.py
index 0d8a399..d4c7a32 100644
--- a/hdf5storage/utilities.py
+++ b/hdf5storage/utilities.py
@@ -75,8 +75,12 @@ def decode_to_str(data):
         elif data.dtype.name.startswith('bytes'):
             return data.decode(encoding='ASCII')
         else:
-            return data.encode(encoding='UTF-32').decode( \
-                encoding='UTF-32', errors='replace')
+            if isinstance(data, np.ndarray):
+                return data.tostring().decode(encoding='UTF-32',
+                                              errors='replace')
+            else:
+                return data.encode(encoding='UTF-32').decode( \
+                    encoding='UTF-32', errors='replace')
 
     if isinstance(data, bytes):
         return data.decode(encoding='ASCII')
diff --git a/tests/test_write_readback.py b/tests/test_write_readback.py
index 3e283a9..b7036ed 100755
--- a/tests/test_write_readback.py
+++ b/tests/test_write_readback.py
@@ -139,7 +139,6 @@ class TestWriteReadbackCpythonMatlab(unittest.TestCase):
                                       self.options)
         self.assert_equal_str(data, out)
 
-    @unittest.expectedFailure
     def test_str_empty(self):
         data = ''
         out = self.write_readback_str(data, self.random_name(),
@@ -152,7 +151,6 @@ class TestWriteReadbackCpythonMatlab(unittest.TestCase):
                                         self.options)
         self.assert_equal_bytes(data, out)
 
-    @unittest.expectedFailure
     def test_bytes_empty(self):
         data = b''
         out = self.write_readback_bytes(data, self.random_name(),
@@ -165,7 +163,6 @@ class TestWriteReadbackCpythonMatlab(unittest.TestCase):
                                             self.options)
         self.assert_equal_bytearray(data, out)
 
-    @unittest.expectedFailure
     def test_bytearray_empty(self):
         data = bytearray(b'')
         out = self.write_readback_bytearray(data, self.random_name(),

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git