[python-hdf5storage] 48/84: Added tests for write-readback of bytes encoded in UTF-8 and structured numpy.ndarray with non-ascii or UTF-8 encoded fields.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Feb 29 08:25:02 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 0.1.10
in repository python-hdf5storage.

commit 88caa63aea708edd1cf45d860e8a69eae06f5ff4
Author: Freja Nordsiek <fnordsie at gmail.com>
Date:   Sun Apr 26 04:40:19 2015 -0400

    Added tests for write-readback of bytes encoded in UTF-8 and structured numpy.ndarray with non-ascii or UTF-8 encoded fields.
---
 tests/asserts.py             |  3 ++-
 tests/test_write_readback.py | 48 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/tests/asserts.py b/tests/asserts.py
index 72b034b..2d76482 100644
--- a/tests/asserts.py
+++ b/tests/asserts.py
@@ -194,7 +194,8 @@ def assert_equal_matlab_format(a, b):
             if len(b) == 0:
                 assert_equal(a, np.zeros(shape=(1, 0), dtype='U'))
             elif isinstance(b, (bytes, bytearray)):
-                assert_equal(a, np.atleast_2d(np.unicode_(b.decode())))
+                assert_equal(a, np.atleast_2d(np.unicode_( \
+                    b.decode('UTF-8'))))
             else:
                 assert_equal(a, np.atleast_2d(np.unicode_(b)))
         else:
diff --git a/tests/test_write_readback.py b/tests/test_write_readback.py
index c98cab1..2751553 100644
--- a/tests/test_write_readback.py
+++ b/tests/test_write_readback.py
@@ -101,14 +101,12 @@ class TestPythonMatlabFormat(object):
     def random_str_some_unicode(self, length):
         # Makes a random ASCII+limited unicode str of the specified
         # length.
+        ltrs = self.random_str_ascii(10)
         if sys.hexversion >= 0x03000000:
-            ltrs = '\u03c0\u03c9\xe9'
-            return ''.join([random.choice(ltrs) for i in \
-                range(0, length)])
+            ltrs += ''.join([chr(500 + i) for i in range(100)])
         else:
-            ltrs = u'\u03c0\u03c9\xe9'
-            return u''.join([random.choice(ltrs) for i in \
-                range(0, length)])
+            ltrs += u''.join([unichr(500 + i) for i in range(100)])
+        return ''.join([random.choice(ltrs) for i in range(0, length)])
 
     def random_bytes(self, length):
         # Makes a random sequence of bytes of the specified length from
@@ -217,20 +215,27 @@ class TestPythonMatlabFormat(object):
                 dtype=random.choice(self.dtypes))
         return data
     
-    def random_structured_numpy_array(self, shape, field_shapes=None):
-        # Make a random field names, dtypes, and sizes. Though, if
+    def random_structured_numpy_array(self, shape, field_shapes=None,
+                                      nonascii_fields=False):
+        # Make random field names, dtypes, and sizes. Though, if
         # field_shapes is explicitly given, the sizes should be
         # random. The field names must all be of type str, not unicode
-        # in Python 2. String types will not be used due to the
-        # difficulty in assigning the length.
-        names = [self.random_str_ascii(
+        # in Python 2. Optionally include non-ascii characters in the
+        # field names (will have to be encoded in Python 2.x). String
+        # types will not be used due to the difficulty in assigning the
+        # length.
+        if nonascii_fields:
+            name_func = self.random_str_some_unicode
+        else:
+            name_func = self.random_str_ascii
+        names = [name_func(
                  self.max_structured_ndarray_field_lengths)
                  for i in range(0, random.randint(
                  self.min_structured_ndarray_fields,
                  self.max_structured_ndarray_fields))]
         if sys.hexversion < 0x03000000:
             for i, name in enumerate(names):
-                names[i] = name.encode()
+                names[i] = name.encode('UTF-8')
         dtypes = [random.choice(list(set(self.dtypes)
                   - set(('S', 'U'))))
                   for i in range(len(names))]
@@ -439,6 +444,13 @@ class TestPythonMatlabFormat(object):
                                   self.options)
         self.assert_equal(out, data)
 
+    def test_str_ascii_encoded_utf8(self):
+        data = self.random_str_some_unicode(random.randint(1,
+            self.max_string_length)).encode('UTF-8')
+        out = self.write_readback(data, self.random_name(),
+                                  self.options)
+        self.assert_equal(out, data)
+
     def test_str_unicode(self):
         data = self.random_str_some_unicode(random.randint(1,
                                             self.max_string_length))
@@ -512,6 +524,18 @@ class TestPythonMatlabFormat(object):
         for i in range(1, 4):
             yield self.check_numpy_structured_array_empty, i
 
+    def test_numpy_structured_array_unicode_fields(self):
+        # Makes a random 1d structured ndarray with non-ascii characters
+        # in its fields, writes it and reads it back, and then compares
+        # it.
+        shape = self.random_numpy_shape(1, \
+            self.max_structured_ndarray_axis_length)
+        data = self.random_structured_numpy_array(shape,
+                                                  nonascii_fields=True)
+        out = self.write_readback(data, self.random_name(),
+                                  self.options)
+        self.assert_equal(out, data)
+
     def test_python_collection(self):
         for tp in (list, tuple, set, frozenset, collections.deque):
             yield self.check_python_collection, tp

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-hdf5storage.git



More information about the debian-science-commits mailing list