[h5py] 207/455: More tests; fix string type conversion to use STR_NULLPAD

Thu Jul 2 18:19:33 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 0fb1d6fcdc8c2b991bb066fd43cdb6b57c484f2c
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Thu Jan 29 01:10:05 2009 +0000

    More tests; fix string type conversion to use STR_NULLPAD
---
 h5py/h5t.pyx                 |   1 +
 h5py/tests/common.py         |  30 ++++++++++
 h5py/tests/test_dataset.py   |  50 ++++++++++++++++
 h5py/tests/test_filters.py   |  42 ++++++++++---
 h5py/tests/test_highlevel.py | 139 -------------------------------------------
 h5py/tests/test_slicing.py   |  82 ++++++++++++++++++-------
 6 files changed, 178 insertions(+), 166 deletions(-)

diff --git a/h5py/h5t.pyx b/h5py/h5t.pyx
index 918c727..cf0588d 100644
--- a/h5py/h5t.pyx
+++ b/h5py/h5t.pyx
@@ -1269,6 +1269,7 @@ cdef TypeStringID _c_string(dtype dt):
 
     tid = H5Tcopy(H5T_C_S1)
     H5Tset_size(tid, dt.itemsize)
+    H5Tset_strpad(tid, H5T_STR_NULLPAD)
     return TypeStringID(tid)
 
 cdef TypeCompoundID _c_complex(dtype dt):
diff --git a/h5py/tests/common.py b/h5py/tests/common.py
index b7ad4b5..79a30ac 100644
--- a/h5py/tests/common.py
+++ b/h5py/tests/common.py
@@ -56,6 +56,36 @@ def delhdf(f):
     f.close()
     os.unlink(fname)
 
+EPSILON = 1e-5
+import numpy as np
+from nose.tools import assert_equal
+
+INTS = ('i', 'i1', '<i2', '>i2', '<i4', '>i4')
+FLOATS = ('f', '<f4', '>f4', '<f8', '>f8')
+COMPLEX = ('<c8', '>c8', '<c16', '>c16')
+STRINGS = ('|S1', '|S2', 'S17', '|S100')
+VOIDS = ('|V4', '|V8')
+
+def assert_arr_equal(dset, arr, message=None, precision=None):
+    """ Make sure dset and arr have the same shape, dtype and contents, to
+        within the given precision.
+
+        Note that dset may be a NumPy array or an HDF5 dataset.
+    """
+    if precision is None:
+        precision = EPSILON
+    if message is None:
+        message = ''
+
+    if np.isscalar(dset) or np.isscalar(arr):
+        assert np.isscalar(dset) and np.isscalar(arr), "%r %r" % (dset, arr)
+        assert dset - arr < precision, message
+        return
+
+    assert_equal(dset.shape, arr.shape, message)
+    assert_equal(dset.dtype, arr.dtype, message)
+    assert np.all(np.abs(dset[...] - arr[...]) < precision), "%s %s" % (dset[...], arr[...]) if not message else message
+
 class HDF5TestCase(unittest.TestCase):
 
     """
diff --git a/h5py/tests/test_dataset.py b/h5py/tests/test_dataset.py
new file mode 100644
index 0000000..ad8d713
--- /dev/null
+++ b/h5py/tests/test_dataset.py
@@ -0,0 +1,50 @@
+
+"""
+
+    Test basic behavior of h5py.highlevel.Dataset, not including slicing
+    or keyword arguments
+"""
+
+import numpy as np
+
+from common import makehdf, delhdf, assert_arr_equal,\
+                   INTS, FLOATS, COMPLEX, STRINGS
+
+class TestDataset(object):
+
+    def setUp(self):
+        self.f = makehdf()
+    
+    def tearDown(self):
+        delhdf(self.f)
+
+    def make_dset(self, *args, **kwds):
+        if 'dset' in self.f:
+            del self.f['dset']
+        return self.f.create_dataset('dset', *args, **kwds)
+
+    def test_create(self):
+        # Test dataset creation from shape and type, or raw data
+
+        types = INTS + FLOATS + COMPLEX + STRINGS
+        shapes = ( (), (1,), (10,), (20,1,15), (7,200,1) )
+
+        for s in shapes:
+            srcdata = np.arange(np.product(s)).reshape(s)
+
+            for t in types:
+                print "test %s %s" % (s, t)
+                data = srcdata.astype(t)
+
+                dset = self.make_dset(s, t)
+
+                dset[...] = data
+
+                assert np.all(dset[...] == data), "%r %r" % (dset[...], data)
+
+                dset = self.make_dset(data=data)
+ 
+                assert np.all(dset[...] == data)
+
+                
+                
diff --git a/h5py/tests/test_filters.py b/h5py/tests/test_filters.py
index 0e1a360..351214b 100644
--- a/h5py/tests/test_filters.py
+++ b/h5py/tests/test_filters.py
@@ -23,10 +23,37 @@ class TestFilters(object):
             dtype = 'f'
         return self.f.create_dataset('dset', shape, dtype, **kwds)
         
-    def test_compression(self):
-        """ Dataset compression keywords """
+    def test_chunks(self):
+        # Check chunk behavior, including auto-chunking
+
+        # Test auto-chunking
+        pairs = [ ( {'chunks': None, 'compression': None}, False  ),
+                  ( {'chunks': True, 'compression': None},  True  ),
+                  ( {'chunks': None, 'compression': 'gzip'}, True ),
+                  ( {'fletcher32': True}, True ),
+                  ( {'shuffle': True}, True ),
+                  ( {'maxshape': (None, None)}, True),
+                  ( {}, False ) ]
+
+        for kwds, result in pairs:
+            dset = self.make_dset((10,10), **kwds)
+            assert_equal(bool(dset.chunks), result)
+
+        # Test user-defined chunking
+        shapes = [(), (1,), (10,5), (1,10), (2**60, 2**60, 2**34)]
+        chunks = {(): [None],
+                  (1,): [None, (1,)],
+                  (10,5): [None, (5,5), (10,1)],
+                  (1,10): [None, (1,10), (1,3)],
+                  (2**60, 2**60, 2**34): [(128, 64, 256)] }
+
+        for shape in shapes:
+            for chunk in chunks[shape]:
+                dset = self.make_dset(shape, chunks=chunk)
+                assert_equal(dset.chunks, chunk)
 
-        # 1. Test compression keyword only
+    def test_compression(self):
+        # Dataset compression keywords only
 
         settings = (0, 9, 4, 'gzip', 'lzf', None)
         results  = ('gzip', 'gzip', 'gzip', 'gzip', 'lzf', None)
@@ -44,7 +71,7 @@ class TestFilters(object):
             assert_equal(dset.compression_opts, o)
 
     def test_compression_opts(self):
-        """ Dataset compression keywords & options """
+        # Dataset compression keywords & options
 
         types = ('gzip', 'lzf')
         opts = {'gzip': (0, 9, 5), 'lzf': (None,)}
@@ -61,7 +88,8 @@ class TestFilters(object):
                 assert_equal(dset.compression_opts, o)
 
     def test_fletcher32_shuffle(self):
-        
+        # Check fletcher32 and shuffle, including auto-shuffle
+
         settings = (None, False, True)
         results = (False, False, True)
 
@@ -80,10 +108,10 @@ class TestFilters(object):
         assert_equal(dset.shuffle, False)
 
     def test_data(self):
-        """ Ensure data can be read/written with filters """
+        # Ensure data can be read/written with filters
 
         compression = (None, 'gzip', 'lzf')
-        shapes = ((), (10,), (10,10), (200,200,10))
+        shapes = ((), (10,), (10,10), (200,200))
         # Filter settings should be ignored for scalar shapes
 
         types = ('f','i', 'c')
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index c14df98..0c92a51 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -187,127 +187,6 @@ class TestDataset(HDF5TestCase):
         dset1.id._close()
         repr(dset1)
         str(dset1)
-
-    def test_create(self):
-        """ Test the constructor and public properties """
-
-        def new_dset(*args, **kwds):
-            """ Create a dataset from constructor arguments.
-
-                Return is a 2-tuple (template, dataset).
-            """
-            # "None" means the keyword is "not provided"
-            kwds = dict((x,y) for x,y in kwds.iteritems() if y is not None)
-
-            template = testfiles.Dataset(*args, **kwds)
-
-            self.output(str(template.kwds))
-
-            if 'TEST_DSET' in self.f:
-                del self.f['TEST_DSET']
-            dset = Dataset(self.f, 'TEST_DSET', *args, **kwds)
-
-            return (template, dset)
-
-        def verify_ds(hdf, template):
-            """ Compare a real dataset to a template """
-
-            # Make sure the shape and dtype of the real dataset match the
-            # template's description.
-            if 'shape' in template.kwds:
-                shape = template.kwds['shape']
-            else:
-                shape = template.kwds['data'].shape
-            if 'dtype' in template.kwds:
-                dtype = template.kwds['dtype']
-            else:
-                dtype = template.kwds['data'].dtype
-
-            self.assertEqual(hdf.dtype, dtype, "dtype mismatch %s %s" % (hdf.dtype, dtype))
-            self.assertEqual(hdf.shape, shape, "shape mismatch %s %s" % (hdf.shape, shape))
-
-            # If data was given, make sure it's identical
-            if 'data' in template.kwds:
-                self.assert_(numpy.all(hdf.value == template.kwds['data']))
-
-            # If other keywords were given (chunks, etc), make sure they are
-            # correctly recorded.
-            for name, value in template.kwds.iteritems():
-                if name == 'data':
-                    continue
-                elif value is True:
-                    self.assert_(getattr(hdf, name) is not None,
-                      "True kwd ignored: %s" % name)
-                elif name == 'compression' and value in range(10):
-                    cname = getattr(hdf,name)
-                    cval = getattr(hdf, 'compression_opts')
-                    self.assertEqual(cname, 'gzip')
-                    self.assertEqual(cval,value)
-                else:
-                    self.assertEqual(getattr(hdf, name), value,
-                      "kwd mismatch: %s: %s %s" % (name, getattr(hdf, name), value))
-
-            # Make sure all the public properties work
-            for name in ('shape', 'dtype', 'chunks', 'compression', 'shuffle',
-              'fletcher32', 'maxshape'):
-                getattr(hdf, name)
-
-            # If a chunks-requiring keyword is used, make sure it's honored
-            for name in ('chunks', 'compression', 'shuffle', 'fletcher32'):
-                if template.kwds.get(name, False):
-                    self.assert_(hdf.chunks is not None, "chunks missing for arg %s" % name)
-
-        # === Begin constructor test ===
-
-        # Method 1: specify shape and dtype
-        shapes = [(), (1,), (10,5), (1,10), (100,1,100), (51,2,1025),
-                  (2**60, 2**60, 2**34)]
-
-        for shape in shapes:
-            for dtype in TYPES1+TYPES1:
-                template, dset = new_dset(shape, dtype)
-                verify_ds(dset, template)
-
-        # Method 2: specify actual data
-        for shape in shapes[0:6]:
-            for dtype in TYPES1:
-                arr = numpy.arange(numpy.product(shape), dtype=dtype).reshape(shape)
-                template, dset = new_dset(data=arr)
-                verify_ds(dset, template)
-
-        # Test shape-related keywords
-        maxshapes = { (): [None, ()],
-                      (1,): [None, (1,)],
-                      (10,5): [None, (10,5), (20,20)],
-                      (1,10): [None, (2,10), (None,20)],
-                      (100,1,100): [None, (100,2,100), (None, None, None)],
-                      (51, 2, 1025): [None, (2**60, 2**40, None)],
-                      (2**60, 2**60, 2**34): [(2**62, 2**62, 2**35)] }
-
-        chunks = { (): [None],
-                  (1,): [None, (1,)],
-                  (10,5): [None, (5,5), (10,1)],
-                  (1,10): [None, True, (1,10), (1,3)],
-                  (100,1,100): [None, (50,1,10)],
-                  (51, 2, 1025): [None],
-                  (2**60, 2**60, 2**34): [(128,64, 256)]}
-
-        for shape in shapes:
-            for ms in maxshapes[shape]:
-                for chunk in chunks[shape]:
-                    template, dset = new_dset(shape, '<i4', chunks=chunk, maxshape=ms)
-                    verify_ds(dset, template)
-
-        # Other keywords
-        compression = [None, True, 5, 9, 'lzf']
-        fletcher32 = [True, False]
-        shuffle = [True, False]
-
-        for comp in compression:
-            for f in fletcher32:
-                for sh in [x if comp else None for x in shuffle]:
-                    template, dset = new_dset((100,100), '<i4', compression=comp, fletcher32=f, shuffle=sh)
-                    verify_ds(dset, template)
     
     def test_Dataset_order(self):
         """ Test order coercion """
@@ -412,24 +291,6 @@ class TestDataset(HDF5TestCase):
                 self.assert_(numpy.all(arr == data), "%r \n\n %r" % (arr, data))
 
 
-    def test_slice_names(self):
-        """ Test slicing with named fields """
-
-        srcarr = numpy.ndarray((10,10), dtype=[('a', '<i4'), ('b', '<f8')])
-        srcarr['a'] = numpy.arange(100).reshape((10,10))
-        srcarr['b'] = 100*numpy.arange(100).reshape((10,10))
-
-        dset = self.f.create_dataset('TEST', data=srcarr)
-
-        pairs = \
-            [ (dset[:], srcarr[:]), (dset['a'], srcarr['a']),
-              (dset[5,5,'a'], srcarr['a'][5,5]),
-              (dset[2,:,'b'], srcarr['b'][2,:]),
-              (dset['b', ..., 5], srcarr[...,5]['b']) ]
-
-        for i, (d, n) in enumerate(pairs):
-            self.assert_(numpy.all(d == n), "Index %d mismatch" % i)
-
     @skip
     def test_slice_coords(self):
         """ Test slicing with CoordsList instances """
diff --git a/h5py/tests/test_slicing.py b/h5py/tests/test_slicing.py
index 74b2e2e..528f742 100644
--- a/h5py/tests/test_slicing.py
+++ b/h5py/tests/test_slicing.py
@@ -2,23 +2,10 @@ import numpy as np
 import os
 from nose.tools import assert_equal
 
-from common import makehdf, delhdf
+from common import makehdf, delhdf, assert_arr_equal
 
 import h5py
 
-def check_arr_equal(dset, arr):
-    """ Make sure dset and arr have the same shape, dtype and contents.
-
-        Note that dset may be a NumPy array or an HDF5 dataset
-    """
-    if np.isscalar(dset) or np.isscalar(arr):
-        assert np.isscalar(dset) and np.isscalar(arr)
-        assert dset == arr
-        return
-
-    assert_equal(dset.shape, arr.shape)
-    assert_equal(dset.dtype, arr.dtype)
-    assert np.all(dset[...] == arr[...]), "%s %s" % (dset[...], arr[...])
 
 class SliceFreezer(object):
     """ Necessary because numpy.s_ clips slices > 2**32 """
@@ -36,19 +23,24 @@ class TestSlicing(object):
         delhdf(self.f)
 
     def generate(self, shape, dtype):
-        if 'dset' in self.f:
-            del self.f['dset']
 
         size = np.product(shape)
-        dset = self.f.create_dataset('dset', shape, dtype)
+        dset = self.generate_dset(shape, dtype)
         arr = np.arange(size, dtype=dtype).reshape(shape)
         return dset, arr
 
+    def generate_dset(self, shape, dtype, **kwds):
+        if 'dset' in self.f:
+            del self.f['dset']
+        return self.f.create_dataset('dset', shape, dtype, **kwds)
+        
     def generate_rand(self, shape, dtype='f'):
         return np.random.random(shape).astype(dtype)
 
 
     def test_slices(self):
+        # Test interger, slice, array and list indices
+
         dset, arr = self.generate((10,10,50),'f')
 
         slices = [s[0,0,0], s[0,0,:], s[0,:,0], s[0,:,:]]
@@ -70,15 +62,40 @@ class TestSlicing(object):
             dset[slc] = arr[slc]
             
             print "check write %s" % (slc,)
-            check_arr_equal(dset, arr)
+            assert_arr_equal(dset, arr)
 
             out = dset[slc]
 
             print "check read %s" % (slc,)
-            check_arr_equal(out, arr[slc])
+            assert_arr_equal(out, arr[slc])
+
+    def test_slices_big(self):
+        # Test slicing behavior for indices larger than 2**32
+
+        shape = (2**62, 2**62)
+        dtype = 'f'
+
+        bases = [1024, 2**37, 2**60]
+        regions = [ (42,1), (100,100), (1,42), (1,1), (4,1025)]
+
+        for base in bases:
+            print "Testing base 2**%d" % np.log2(base)
+
+            slices = [ s[base:base+x, base:base+y] for x, y in regions]
 
+            dset = self.generate_dset(shape, dtype, maxshape=(None, None))
+
+            for region, slc in zip(regions, slices):
+                print "    Testing shape %s slice %s" % (region, slc,)
+        
+                data = np.arange(np.product(region), dtype=dtype).reshape(region)
+
+                dset[slc] = data
+
+                assert_arr_equal(dset[slc], data)
 
     def test_scalars(self):
+        # Confirm correct behavior for scalar datasets
 
         dset, arr = self.generate((),'i')
         dset[...] = arr[...] = 42
@@ -88,6 +105,7 @@ class TestSlicing(object):
 
 
     def test_broadcast(self):
+        # Test broadcasting to HDF5
 
         dset, arr = self.generate((20,10,30),'f')
         dset[...] = arr[...]
@@ -105,7 +123,31 @@ class TestSlicing(object):
             print "broadcast %s %s" % (slc, shape)
             dset[slc] = subarr
             arr[slc] = subarr
-            check_arr_equal(dset, arr)
+            assert_arr_equal(dset, arr)
+
+    def test_slice_names(self):
+        # Test slicing in conjunction with named fields
+
+        shape = (10,10)
+        size = np.product(shape)
+        dtype = [('a', 'i'), ('b', 'f')]
+
+        srcarr = np.ndarray(shape, dtype)
+
+        srcarr['a'] = np.arange(size).reshape(shape)
+        srcarr['b'] = np.arange(size).reshape(shape)*100
+
+        dset = self.f.create_dataset('TEST', data=srcarr)
+
+        pairs = [  (s[:], srcarr[:]),
+                   (s['a'], srcarr['a']),
+                   (s[5,5,'a'], srcarr['a'][5,5]),
+                   (s[2,:,'b'], srcarr['b'][2,:]),
+                   (s['b',...,5], srcarr[...,5]['b']) ]
+
+        for slc, result in pairs:
+            print "slicing %s" % (slc,)
+            assert np.all(dset[slc] == result)
 
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git