[h5py] 203/455: Filter tests & cleanup; fix idiotic bug in LZF compressor

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:33 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 444af4acf967cd11c2e20d8c68bdb9cffaf40c45
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Wed Jan 28 07:09:20 2009 +0000

    Filter tests & cleanup; fix idiotic bug in LZF compressor
---
 h5py/__init__.py           |   4 +-
 h5py/browse.py             | 223 ------------------------
 h5py/filters.py            | 218 ++++++++++++++++++++++++
 h5py/highlevel.py          |  34 ++--
 h5py/lzf_filter.c          |  80 +++++----
 h5py/tests/common.py       |  10 ++
 h5py/tests/test_slicing.py |  13 +-
 h5py/utils_hl.py           | 414 ---------------------------------------------
 8 files changed, 298 insertions(+), 698 deletions(-)

diff --git a/h5py/__init__.py b/h5py/__init__.py
index 7522156..2936154 100644
--- a/h5py/__init__.py
+++ b/h5py/__init__.py
@@ -31,14 +31,14 @@ except ImportError, e:
 
 import utils, h5, h5a, h5d, h5f, h5fd, h5g, h5i, h5p, h5r, h5s, h5t, h5z, highlevel, version
 
-from highlevel import File, Group, Dataset, Datatype, AttributeManager, CoordsList, is_hdf5
+from highlevel import File, Group, Dataset, Datatype, AttributeManager, is_hdf5
 from h5 import H5Error, get_config
 
 __doc__ = __doc__ % (version.version, version.hdf5_version, version.api_version)
 
 __all__ = ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5r',
            'h5z', 'h5i', 'version', 'File', 'Group', 'Dataset',
-           'Datatype', 'AttributeManager', 'CoordsList', 'H5Error', 'get_config', 'is_hdf5']
+           'Datatype', 'AttributeManager', 'H5Error', 'get_config', 'is_hdf5']
 
 if version.api_version_tuple >= (1,8):
     import h5o, h5l
diff --git a/h5py/browse.py b/h5py/browse.py
deleted file mode 100644
index a1cf0e4..0000000
--- a/h5py/browse.py
+++ /dev/null
@@ -1,223 +0,0 @@
-#+
-# 
-# This file is part of h5py, a low-level Python interface to the HDF5 library.
-# 
-# Copyright (C) 2008 Andrew Collette
-# http://h5py.alfven.org
-# License: BSD  (See LICENSE.txt for full license)
-# 
-# $Date$
-# 
-#-
-
-"""
-    Internal module which provides the guts of the File.browse() method
-"""
-
-from cmd import Cmd
-from posixpath import join, basename, dirname, normpath, isabs
-from getopt import gnu_getopt, GetoptError
-import shlex
-import os
-import re
-import sys
-
-from utils_hl import hbasename
-
-from h5py import h5g
-
-NAMES = {h5g.DATASET: "Dataset", h5g.GROUP: "Group", h5g.TYPE: "Named Type"}
-LS_FORMAT = " %-20s    %-10s"
-
-class CmdError(StandardError):
-    pass
-
-# Why the hell doesn't Cmd inherit from object?  Properties don't work!
-class _H5Browser(Cmd, object):
-
-    """
-        HDF5 file browser class which holds state between sessions.
-    """
-    def _setpath(self, path):
-        self.prompt = "HDF5: %s> " % (hbasename(path))
-        self._path = path
-
-    path = property(lambda self: self._path, _setpath)
-
-    def __init__(self, fileobj, path=None, importdict=None):
-        """ Browse the file, putting any imported names into importdict. """
-        Cmd.__init__(self)
-        self.file = fileobj
-
-        self.path = path if path is not None else '/'
-
-        self.importdict = importdict
-        self.cmdloop('Browsing "%s". Type "help" for commands, "exit" to exit.' % os.path.basename(self.file.name))
-
-    def onecmd(self, line):
-        retval = False
-        try:
-            retval = Cmd.onecmd(self, line)
-        except (CmdError, GetoptError), e:
-            print "Error: "+e.args[0]
-        return retval
-
-    def abspath(self, path):
-        """ Correctly interpret the given path fragment, relative to the
-            current path.
-        """
-        return normpath(join(self.path,path))
-
-    def do_exit(self, line):
-        """ Exit back to Python """
-        return True
-
-    def do_EOF(self, line):
-        """ (Ctrl-D) Exit back to Python """
-        return True
-
-    def do_pwd(self, line):
-        """ Print name of current group """
-        print self.path
-
-    def do_cd(self, line):
-        """ cd [group] """
-        args = shlex.split(line)
-        if len(args) > 1:
-            raise CmdError("Too many arguments")
-        path = args[0] if len(args) == 1 else ''
-
-        path = self.abspath(path)
-        dname = dirname(path)
-        bname = basename(path)
-        try:
-            if bname != '' and not h5g.get_objinfo(self.file[dname].id, bname).type == h5g.GROUP:
-                raise CmdError('"%s" is not an HDF5 group' % bname)
-            else:
-                self.path = path
-        except:
-            raise CmdError('Can\'t open group "%s"' % path)
-
-    def complete_cd(self, text, line, begidx, endidx):
-        text = text.strip()
-        grpname = self.abspath(dirname(text))
-        targetname = basename(text)
-
-        grp = self.file[grpname]
-        rval = [join(grpname,x) for x in grp \
-                    if x.find(targetname) == 0 and \
-                    h5g.get_objinfo(grp.id,x).type == h5g.GROUP]
-        return rval
-
-    def do_ls(self, line):
-        """ ls [-l] [group] """
-
-        LONG_STYLE = False
-        opts, args = gnu_getopt(shlex.split(line), 'l')
-
-        if '-l' in [ opt[0] for opt in opts]:
-            LONG_STYLE = True
-        if len(args) == 0:
-            grpname = self.path
-        elif len(args) == 1:
-            grpname = self.abspath(args[0])
-        else:
-            self._error("Too many arguments")
-            return
-
-        try:
-            grp = self.file[grpname]
-            if LONG_STYLE:
-                print 'Group "%s" in file "%s":' % (hbasename(grpname), os.path.basename(self.file.name))
-                print LS_FORMAT % ("Name", "Type")
-                print LS_FORMAT % ("----", "----")
-            for name in grp:
-                typecode = h5g.get_objinfo(grp.id, name).type
-                pname = name if typecode != h5g.GROUP else name+'/'
-                if LONG_STYLE:
-                    print LS_FORMAT % (pname, NAMES[typecode])
-                else:
-                    print pname
-        except:
-            raise CmdError('Can\'t list contents of group "%s"' % hbasename(grpname))
-        
-    def do_info(self, line):
-
-        opts, args = gnu_getopt(shlex.split(line),'')
-
-        for arg in args:
-            name = self.abspath(arg)
-            try:
-                obj = self.file[name]
-                print obj.desc()
-            except:
-                raise CmdError("Can't get info on object \"%s\"" % hbasename(name))
-
-    def complete_info(self, text, line, begidx, endidx):
-        text = text.strip()
-        grpname = self.abspath(dirname(text))
-        targetname = basename(text)
-
-        grp = self.file[grpname]
-        rval = [join(grpname,x) for x in grp \
-                    if x.find(targetname) == 0]
-        return rval
-
-
-    def do_import(self, line):
-        """ import name [as python_name] 
- import name1 name2 name3 name4 ...
-        """
-        if self.importdict is None:
-            raise CmdError("No import dictionary provided")
-
-        opts, args = gnu_getopt(shlex.split(line),'')
-        
-        pynames = []
-        hnames = []
-
-        importdict = {}   # [Python name] => HDF5 object
-
-        if len(args) == 3 and args[1] == 'as':
-            pynames.append(args[2])
-            hnames.append(args[0])
-        else:
-            for arg in args:
-                absname = self.abspath(arg)
-                pynames.append(basename(absname))
-                hnames.append(absname)
-
-        for pyname, hname in zip(pynames, hnames):
-            try:
-                obj = self.file[hname]
-            except Exception, e:
-                raise CmdError("Can't import %s" % pyname)
-
-            if len(re.sub('[A-Za-z_][A-Za-z0-9_]*','',pyname)) != 0:
-                raise CmdError("%s is not a valid Python identifier" % pyname)
-
-            if pyname in self.importdict:
-                if not raw_input("Name %s already in use. Really import (y/N)?  " % pyname).strip().lower().startswith('y'):
-                    continue
-
-            importdict[pyname] = obj
-
-        self.importdict.update(importdict)
-
-    def complete_import(self, text, line, begidx, endidx):
-        text = text.strip()
-        grpname = self.abspath(dirname(text))
-        targetname = basename(text)
-
-        grp = self.file[grpname]
-        rval = [join(grpname,x) for x in grp \
-                    if x.find(targetname) == 0]
-        return rval
-
-
-    def complete_ls(self, *args):
-        return self.complete_cd(*args)
-
-
-
-
diff --git a/h5py/filters.py b/h5py/filters.py
new file mode 100644
index 0000000..e29cf32
--- /dev/null
+++ b/h5py/filters.py
@@ -0,0 +1,218 @@
+
+"""
+    Utility functions for high-level modules.
+"""
+from __future__ import with_statement
+from h5py import h5s, h5z, h5p, h5d
+import numpy as np
+
+CHUNK_BASE = 16*1024    # Multiplier by which chunks are adjusted
+CHUNK_MIN = 8*1024      # Soft lower limit (8k)
+CHUNK_MAX = 1024*1024   # Hard upper limit (1M)
+
+_COMP_FILTERS = {'gzip': h5z.FILTER_DEFLATE,
+                'szip': h5z.FILTER_SZIP,
+                'lzf': h5z.FILTER_LZF }
+
+DEFAULT_GZIP = 4
+DEFAULT_SZIP = ('nn', 8)
+
+def _gen_filter_tuples():
+    decode = []
+    encode = []
+    for name, code in _COMP_FILTERS.iteritems():
+        if h5z.filter_avail(code):
+            info = h5z.get_filter_info(code)
+            if info & h5z.FILTER_CONFIG_ENCODE_ENABLED:
+                encode.append(name)
+            if info & h5z.FILTER_CONFIG_DECODE_ENABLED:
+                decode.append(name)
+
+    return tuple(decode), tuple(encode)
+
+decode, encode = _gen_filter_tuples()
+
+def generate_dcpl(shape, dtype, chunks, compression, compression_opts,
+                  shuffle, fletcher32, maxshape):
+    """ Generate a dataset creation property list.
+
+        Checks range and correctness of each argument.  Does not check
+        for disallowed arguments.
+
+        chunks:         None or tuple with len == len(shape)
+        compression:    None or in 'gzip', 'lzf', 'szip'
+        compression_opts: None or <arbitrary>
+        shuffle:        T/F
+        fletcher32:     T/F
+        maxshape:       None or tuple with len == len(shape)
+    """
+
+    # Validate and normalize arguments
+
+    if shape == ():
+        if maxshape and maxshape != ():
+            raise TypeError("Scalar datasets cannot be extended")
+        return h5p.create(h5p.DATASET_CREATE)
+
+    fletcher32 = bool(fletcher32)
+
+    def rq_tuple(tpl, name):
+        if tpl not in (None, True):
+            try:
+                tpl = tuple(tpl)
+                if len(tpl) != len(shape):
+                    raise ValueError('"%s" must have same rank as dataset shape' % name)
+            except TypeError:
+                raise TypeError('"%s" argument must be None or a sequence object' % name) 
+ 
+    rq_tuple(chunks, 'chunks')
+    rq_tuple(maxshape, 'maxshape')
+
+    if compression is not None:
+
+        if shuffle is None:
+            shuffle = True
+
+        if compression not in _COMP_FILTERS:
+            raise ValueError("Compression method must be one of %s" % ", ".join(_COMP_FILTERS))
+        if compression == 'gzip':
+            if compression_opts is None:
+                gzip_level = DEFAULT_GZIP
+            elif compression_opts in range(10):
+                gzip_level = compression_opts
+            else:
+                raise ValueError("GZIP setting must be an integer from 0-9, not %r" % compression_opts)
+        elif compression == 'lzf':
+            if compression_opts is not None:
+                raise ValueError("LZF compression filter accepts no options")
+        elif compression == 'szip':
+            if compression_opts is None:
+                compression_opts = DEFAULT_SZIP
+
+            err = "SZIP options must be a 2-tuple ('ec'|'nn', even integer 0-32)"
+            try:
+                szmethod, szpix = compression_opts
+            except TypeError:
+                raise TypeError(err)
+            if szmethod not in ('ec', 'nn'):
+                raise ValueError(err)
+            if not (0<szpix<32 and szpix%2 == 0):
+                raise ValueError(err)
+
+    # End argument validation
+
+    if (chunks is True) or \
+    (chunks is None and any((shuffle, fletcher32, compression, maxshape))):
+        chunks = guess_chunk(shape, dtype.itemsize)
+        
+    if maxshape is True:
+        maxshape = (None,)*len(shape)
+
+    plist = h5p.create(h5p.DATASET_CREATE)
+    if chunks is not None:
+        plist.set_chunk(chunks)
+        plist.set_fill_time(h5d.FILL_TIME_ALLOC)
+
+    if shuffle:
+        plist.set_shuffle()
+
+    if compression == 'gzip':
+        plist.set_deflate(gzip_level)
+    elif compression == 'lzf':
+        plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
+    elif compression == 'szip':
+        opts = {'ec': h5z.SZIP_EC_OPTION_MASK, 'nn': h5z.SZIP_NN_OPTION_MASK}
+        plist.set_szip(opts[szmethod], szpix)
+
+    if fletcher32:
+        plist.set_fletcher32()
+
+    return plist
+
+def get_filters(plist):
+    """ Extract a dictionary of active filters from a DCPL, along with
+    their settings
+    """
+
+    filters = {h5z.FILTER_DEFLATE: 'gzip', h5z.FILTER_SZIP: 'szip',
+               h5z.FILTER_SHUFFLE: 'shuffle', h5z.FILTER_FLETCHER32: 'fletcher32',
+               h5z.FILTER_LZF: 'lzf'}
+    szopts = {h5z.SZIP_EC_OPTION_MASK: 'ec', h5z.SZIP_NN_OPTION_MASK: 'nn'}
+
+    pipeline = {}
+
+    nfilters = plist.get_nfilters()
+
+    for i in range(nfilters):
+
+        code, flags, vals, desc = plist.get_filter(i)
+
+        if code == h5z.FILTER_DEFLATE:
+            vals = vals[0] # gzip level
+
+        elif code == h5z.FILTER_SZIP:
+            mask, pixels = vals[0:2]
+            if mask & h5z.SZIP_EC_OPTION_MASK:
+                mask = 'ec'
+            elif mask & h5z.SZIP_NN_OPTION_MASK:
+                mask = 'nn'
+            else:
+                raise TypeError("Unknown SZIP configuration")
+            vals = (mask, pixels)
+        else:
+            if len(vals) == 0:
+                vals = None
+
+        pipeline[filters.get(code, str(code))] = vals
+
+    return pipeline
+
+def guess_chunk(shape, typesize):
+    """ Guess an appropriate chunk layout for a dataset, given its shape and
+        the size of each element in bytes.  Will allocate chunks only as large
+        as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
+        each axis, slightly favoring bigger values for the last index.
+    """
+
+    ndims = len(shape)
+    if ndims == 0:
+        raise ValueError("Chunks not allowed for scalar datasets.")
+
+    chunks = np.array(shape, dtype='=f8')
+
+    # Determine the optimal chunk size in bytes using a PyTables expression.
+    # This is kept as a float.
+    dset_size = np.product(chunks)*typesize
+    target_size = CHUNK_BASE * (2**np.log10(dset_size/(1024.*1024)))
+
+    if target_size > CHUNK_MAX:
+        target_size = CHUNK_MAX
+    elif target_size < CHUNK_MIN:
+        target_size = CHUNK_MIN
+
+    idx = 0
+    while True:
+        # Repeatedly loop over the axes, dividing them by 2.  Stop when:
+        # 1a. We're smaller than the target chunk size, OR
+        # 1b. We're within 50% of the target chunk size, AND
+        #  2. The chunk is smaller than the maximum chunk size
+
+        chunk_bytes = np.product(chunks)*typesize
+
+        if (chunk_bytes < target_size or \
+         abs(chunk_bytes-target_size)/target_size < 0.5) and \
+         chunk_bytes < CHUNK_MAX:
+            break
+
+        chunks[idx%ndims] = np.ceil(chunks[idx%ndims] / 2.0)
+        idx += 1
+
+    return tuple(long(x) for x in chunks)
+
+
+
+
+
+
+
+
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 98f2f8e..8afdd92 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -46,26 +46,30 @@ from __future__ import with_statement
 
 import os
 import numpy
-import inspect
 import threading
 import sys
 import warnings
 
+import os.path as op
+import posixpath as pp
+
 from h5py import h5, h5f, h5g, h5s, h5t, h5d, h5a, h5p, h5z, h5i
 from h5py.h5 import H5Error
-import utils_hl as uhl
-from utils_hl import slice_select, hbasename, guess_chunk
-from utils_hl import CoordsList
-from browse import _H5Browser
 import h5py.selections as sel
-import posixpath as pp
+
+import filters
 
 config = h5.get_config()
 if config.API_18:
     from h5py import h5o, h5l
 
 __all__ = ["File", "Group", "Dataset",
-           "Datatype", "AttributeManager", "CoordsList"]
+           "Datatype", "AttributeManager"]
+
+def __hbasename(name):
+    """ Basename function with more readable handling of trailing slashes"""
+    name = pp.basename(pp.normpath(name))
+    return name if name != '' else '/'
 
 def is_hdf5(fname):
     fname = os.path.abspath(fname)
@@ -477,7 +481,7 @@ class Group(HLObject, _DictCompat):
         with self._lock:
             try:
                 return '<HDF5 group "%s" (%d members)>' % \
-                    (hbasename(self.name), len(self))
+                    (_hbasename(self.name), len(self))
             except Exception:
                 return "<Closed HDF5 group>"
 
@@ -674,8 +678,8 @@ class Dataset(HLObject):
 
     def __init__(self, group, name,
                     shape=None, dtype=None, data=None,
-                    chunks=None, compression=None, shuffle=False,
-                    fletcher32=False, maxshape=None, compression_opts=None):
+                    chunks=None, compression=None, shuffle=None,
+                    fletcher32=None, maxshape=None, compression_opts=None):
         """ Open or create a new dataset in the file.
 
         It's recommended you use the Group methods (open via Group["name"],
@@ -761,7 +765,7 @@ class Dataset(HLObject):
 
                 # Generate the dataset creation property list
                 # This also validates the keyword arguments
-                plist = uhl.generate_dcpl(shape, dtype, chunks, compression,
+                plist = filters.generate_dcpl(shape, dtype, chunks, compression,
                             compression_opts, shuffle, fletcher32, maxshape)
 
                 if maxshape is not None:
@@ -776,7 +780,7 @@ class Dataset(HLObject):
 
             self._attrs = AttributeManager(self)
             plist = self.id.get_create_plist()
-            self._filters = uhl.get_filters(plist)
+            self._filters = filters.get_filters(plist)
             if plist.get_layout() == h5d.CHUNKED:
                 self._chunks = plist.get_chunk()
             else:
@@ -996,7 +1000,7 @@ class Dataset(HLObject):
         with self._lock:
             try:
                 return '<HDF5 dataset "%s": shape %s, type "%s">' % \
-                    (hbasename(self.name), self.shape, self.dtype.str)
+                    (_hbasename(self.name), self.shape, self.dtype.str)
             except Exception:
                 return "<Closed HDF5 dataset>"
 
@@ -1095,7 +1099,7 @@ class AttributeManager(LockableObject, _DictCompat):
         with self._lock:
             try:
                 return '<Attributes of HDF5 object "%s" (%d)>' % \
-                    (hbasename(h5i.get_name(self.id)), len(self))
+                    (_hbasename(h5i.get_name(self.id)), len(self))
             except Exception:
                 return "<Attributes of closed HDF5 object>"
 
@@ -1133,7 +1137,7 @@ class Datatype(HLObject):
         with self._lock:
             try:
                 return '<HDF5 named type "%s" (dtype %s)>' % \
-                    (hbasename(self.name), self.dtype.str)
+                    (_hbasename(self.name), self.dtype.str)
             except Exception:
                 return "<Closed HDF5 named type>"
 
diff --git a/h5py/lzf_filter.c b/h5py/lzf_filter.c
index df5be88..a662f55 100644
--- a/h5py/lzf_filter.c
+++ b/h5py/lzf_filter.c
@@ -82,6 +82,18 @@ int register_lzf(void){
     return retval;
 }
 
+void printbytes(char *buffer, int nbytes){
+
+    int i;
+    char c;
+    for(i=0; i<nbytes; i++){
+        c = buffer[i];
+        fprintf(stderr, "%03u ", c);
+        if(i%20==0){
+            fprintf(stderr, "\n");
+        }
+    }
+}
 /* The filter function */
 size_t lzf_filter(unsigned flags, size_t cd_nelmts,
 		    const unsigned cd_values[], size_t nbytes,
@@ -92,7 +104,6 @@ size_t lzf_filter(unsigned flags, size_t cd_nelmts,
 
     unsigned int status = 0;        /* Return code from lzf routines */
 
-
     /* We're compressing */
     if(!(flags & H5Z_FLAG_REVERSE)){
 
@@ -107,56 +118,51 @@ size_t lzf_filter(unsigned flags, size_t cd_nelmts,
 
         status = lzf_compress(*buf, nbytes, outbuf, outbuf_size);
 
-        if(status == 0){
-            free(outbuf);
-        }
-
-        return status;
-    }
-
     /* We're decompressing */
+    } else {
 
-    outbuf_size = (*buf_size);
+        outbuf_size = (*buf_size);
 
-    while(!status){
-    
-        free(outbuf);
-        outbuf = malloc(outbuf_size);
+        while(!status){
+        
+            free(outbuf);
+            outbuf = malloc(outbuf_size);
 
-        status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
+            status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
 
-        /* compression failed */
-        if(!status){
+            /* compression failed */
+            if(!status){
 
-            /* Output buffer too small; make it bigger */
-            if(errno == E2BIG){
+                /* Output buffer too small; make it bigger */
+                if(errno == E2BIG){
 #ifdef H5PY_LZF_DEBUG
-                fprintf(stderr, "LZF filter: Buffer guess too small: %d", outbuf_size);
+                    fprintf(stderr, "LZF filter: Buffer guess too small: %d", outbuf_size);
 #endif
-                outbuf_size += (*buf_size);
-                if(outbuf_size > H5PY_LZF_MAX_BUF){
-                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Requested LZF buffer too big");
+                    outbuf_size += (*buf_size);
+                    if(outbuf_size > H5PY_LZF_MAX_BUF){
+                        PUSH_ERR("lzf_filter", H5E_CALLBACK, "Requested LZF buffer too big");
+                        goto failed;
+                    }
+
+                /* Horrible internal error (data corruption) */
+                } else if(errno == EINVAL) {
+
+                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression");
                     goto failed;
-                }
 
-            /* Horrible internal error (data corruption) */
-            } else if(errno == EINVAL) {
-                PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression");
-                goto failed;
+                /* Unknown error */
+                } else {
+                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error");
+                    goto failed;
+                }
 
-            /* Unknown error */
-            } else {
-                PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error");
-                goto failed;
-            }
+            } /* if !status */
 
-        } /* if !status */
+        } /* while !status */
 
-    } /* while !status */
-    
+    } /* compressing vs decompressing */
 
-    /* If compression/decompression successful, swap buffers */
-    if(status){
+    if(status != 0){
 
         free(*buf);
         *buf = outbuf;
diff --git a/h5py/tests/common.py b/h5py/tests/common.py
index 0e0b704..b7ad4b5 100644
--- a/h5py/tests/common.py
+++ b/h5py/tests/common.py
@@ -46,6 +46,16 @@ def covers(*args):
 
     return wrap
 
+def makehdf():
+    fname = tempfile.mktemp('.hdf5')
+    f = h5py.File(fname, 'w')
+    return f
+
+def delhdf(f):
+    fname = f.name
+    f.close()
+    os.unlink(fname)
+
 class HDF5TestCase(unittest.TestCase):
 
     """
diff --git a/h5py/tests/test_slicing.py b/h5py/tests/test_slicing.py
index e025e47..74b2e2e 100644
--- a/h5py/tests/test_slicing.py
+++ b/h5py/tests/test_slicing.py
@@ -1,9 +1,11 @@
 import numpy as np
-import h5py
-import tempfile
 import os
 from nose.tools import assert_equal
 
+from common import makehdf, delhdf
+
+import h5py
+
 def check_arr_equal(dset, arr):
     """ Make sure dset and arr have the same shape, dtype and contents.
 
@@ -28,13 +30,10 @@ s = SliceFreezer()
 class TestSlicing(object):
 
     def setUp(self):
-        fname = tempfile.mktemp('.hdf5')
-        self.f = h5py.File(fname, 'w')
-        self.fname = fname
+        self.f = makehdf()
 
     def tearDown(self):
-        self.f.close()
-        os.unlink(self.fname)
+        delhdf(self.f)
 
     def generate(self, shape, dtype):
         if 'dset' in self.f:
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
deleted file mode 100644
index c5cfe23..0000000
--- a/h5py/utils_hl.py
+++ /dev/null
@@ -1,414 +0,0 @@
-
-"""
-    Utility functions for high-level modules.
-"""
-from __future__ import with_statement
-from h5py import h5s, h5z, h5p, h5d
-
-from posixpath import basename, normpath
-import numpy
-
-CHUNK_BASE = 16*1024    # Multiplier by which chunks are adjusted
-MIN_CHUNK = 8*1024      # Soft lower limit (8k)
-MAX_CHUNK = 1024*1024   # Hard upper limit (1M)
-
-def hbasename(name):
-    """ Basename function with more readable handling of trailing slashes"""
-    bname = normpath(name)
-    bname = basename(bname)
-    if bname == '':
-        bname = '/'
-    return bname
-
-COMP_FILTERS = {'gzip': h5z.FILTER_DEFLATE,
-                'szip': h5z.FILTER_SZIP,
-                'lzf': h5z.FILTER_LZF }
-
-def generate_dcpl(shape, dtype, chunks, compression, compression_opts,
-                  shuffle, fletcher32, maxshape):
-    """ Generate a dataset creation property list.
-
-        Checks range and correctness of each argument.  Does not check
-        for disallowed arguments.
-
-        chunks:         None or tuple with len == len(shape)
-        compression:    None or in 'gzip', 'lzf', 'szip'
-        compression_opts: None or <arbitrary>
-        shuffle:        T/F
-        fletcher32:     T/F
-        maxshape:       None or tuple with len == len(shape)
-    """
-
-    # Validate and normalize arguments
-
-    shuffle = bool(shuffle)
-    fletcher32 = bool(fletcher32)
-
-    def rq_tuple(tpl, name):
-        if tpl not in (None, True):
-            try:
-                tpl = tuple(tpl)
-                if len(tpl) != len(shape):
-                    raise ValueError('"%s" must have same rank as dataset shape' % name)
-            except TypeError:
-                raise TypeError('"%s" argument must be None or a sequence object' % name) 
- 
-    rq_tuple(chunks, 'chunks')
-    rq_tuple(maxshape, 'maxshape')
-
-    if compression is not None:
-        if compression not in COMP_FILTERS:
-            raise ValueError("Compression method must be one of %s" % ", ".join(COMP_FILTERS))
-        if compression == 'gzip':
-            if compression_opts is None:
-                gzip_level = 4
-            elif compression_opts in range(10):
-                gzip_level = compression_opts
-            else:
-                raise ValueError("GZIP setting must be an integer from 0-9, not %r" % compression_opts)
-        elif compression == 'lzf':
-            if compression_opts is not None:
-                raise ValueError("LZF compression filter accepts no options")
-        elif compression == 'szip':
-            if compression_opts is None:
-                compression_opts = (h5z.SZIP_NN_OPTION_MASK, 8)
-            else:
-                err = "SZIP options must be a 2-tuple ('ec'|'nn', even integer 0-32)"
-                try:
-                    szmethod, szpix = compression_opts
-                except TypeError:
-                    raise TypeError(err)
-                if szmethod not in ('ec', 'nn'):
-                    raise ValueError(err)
-                if not (0<szpix<32 and szpix%2 != 0):
-                    raise ValueError(err)
-
-    # End argument validation
-
-    if (chunks is True) or \
-    (chunks is None and any((shuffle, fletcher32, compression, maxshape))):
-        if shape == ():
-            raise TypeError("Compression cannot be used with scalar datasets")
-        chunks = guess_chunk(shape, dtype.itemsize)
-        
-    if maxshape is True:
-        maxshape = (None,)*len(shape)
-
-    plist = h5p.create(h5p.DATASET_CREATE)
-    if chunks is not None:
-        plist.set_chunk(chunks)
-        plist.set_fill_time(h5d.FILL_TIME_ALLOC)
-
-    if shuffle:
-        plist.set_shuffle()
-
-    if compression == 'gzip':
-        plist.set_deflate(gzip_level)
-    elif compression == 'lzf':
-        plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
-    elif compression == 'szip':
-        opts = {'ec': h5z.SZIP_EC_OPTION_MASK, 'nn': h5z.SZIP_NN_OPTION_MASK}
-        plist.set_szip(opts[szmethod], szpix)
-
-    if fletcher32:
-        plist.set_fletcher32()
-
-    return plist
-
-def get_filters(plist):
-    """ Extract a dictionary of active filters from a DCPL, along with
-    their settings
-    """
-
-    filters = {h5z.FILTER_DEFLATE: 'gzip', h5z.FILTER_SZIP: 'szip',
-               h5z.FILTER_SHUFFLE: 'shuffle', h5z.FILTER_FLETCHER32: 'fletcher32',
-               h5z.FILTER_LZF: 'lzf'}
-
-    pipeline = {}
-
-    nfilters = plist.get_nfilters()
-
-    for i in range(nfilters):
-        code, flags, vals, desc = plist.get_filter(i)
-        if len(vals) == 0:
-            vals = None
-        elif len(vals) == 1:
-            vals = vals[0]
-        pipeline[filters.get(code, str(code))] = vals
-
-    return pipeline
-
-def guess_chunk(shape, typesize):
-    """ Guess an appropriate chunk layout for a dataset, given its shape and
-        the size of each element in bytes.  Will allocate chunks only as large
-        as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
-        each axis, slightly favoring bigger values for the last index.
-    """
-
-    ndims = len(shape)
-    if ndims == 0:
-        raise ValueError("Chunks not allowed for scalar datasets.")
-
-    chunks = numpy.array(shape, dtype='=f8')
-
-    # Determine the optimal chunk size in bytes using a PyTables expression.
-    # This is kept as a float.
-    dset_size = numpy.product(chunks)*typesize
-    target_size = CHUNK_BASE * (2**numpy.log10(dset_size/(1024.*1024)))
-
-    if target_size > MAX_CHUNK:
-        target_size = MAX_CHUNK
-    elif target_size < MIN_CHUNK:
-        target_size = MIN_CHUNK
-
-    idx = 0
-    while True:
-        # Repeatedly loop over the axes, dividing them by 2.  Stop when:
-        # 1a. We're smaller than the target chunk size, OR
-        # 1b. We're within 50% of the target chunk size, AND
-        #  2. The chunk is smaller than the maximum chunk size
-
-        chunk_bytes = numpy.product(chunks)*typesize
-
-        if (chunk_bytes < target_size or \
-         abs(chunk_bytes-target_size)/target_size < 0.5) and \
-         chunk_bytes < MAX_CHUNK:
-            break
-
-        chunks[idx%ndims] = numpy.ceil(chunks[idx%ndims] / 2.0)
-        idx += 1
-
-    return tuple(long(x) for x in chunks)
-
-class CoordsList(object):
-
-    """
-        Wrapper class for efficient access to sequences of sparse or
-        irregular coordinates.  Construct from either a single index
-        (a rank-length sequence of numbers), or a sequence of such
-        indices:
-
-        CoordsList( (0,1,4) )               # Single index
-        CoordsList( [ (1,2,3), (7,8,9) ] )  # Multiple indices
-    """
-
-    npoints = property(lambda self: len(self.coords),
-        doc = "Number of selected points")
-
-    def __init__(self, points):
-        """ Create a new list of explicitly selected points.
-
-            CoordsList( (0,1,4) )               # Single index
-            CoordsList( [ (1,2,3), (7,8,9) ] )  # Multiple indices
-        """
-
-        try:
-            self.coords = numpy.asarray(points, dtype='=u8')
-        except ValueError:
-            raise ValueError("Selection should be an index or a sequence of equal-rank indices")
-
-        if len(self.coords) == 0:
-            pass # This will be caught at index-time
-        elif self.coords.ndim == 1:
-            self.coords.resize((1,len(self.coords)))
-        elif self.coords.ndim != 2:
-            raise ValueError("Selection should be an index or a sequence of equal-rank indices")
-
-
-def slice_select(space, args):
-    """ Perform a selection on the given HDF5 dataspace, using a tuple
-        of Python extended slice objects.  The dataspace may be scalar or
-        simple.  The following selection mechanisms are implemented:
-
-        1. select_all:
-            0-tuple
-            1-tuple containing Ellipsis
-
-        2. Hyperslab selection
-            n-tuple (n>1) containing slice/integer/Ellipsis objects
-
-        3. Discrete element selection
-            1-tuple containing boolean array or FlatIndexer
-
-        The return value is a 2-tuple:
-        1. Appropriate memory dataspace to use for new array
-        2. Boolean indicating if the slice should result in a scalar quantity
-    """
-    shape = space.shape
-    rank = len(shape)
-    space.set_extent_simple(shape, (h5s.UNLIMITED,)*rank)
-
-    if len(args) == 0 or (len(args) == 1 and args[0] is Ellipsis):
-        # The only safe way to access a scalar dataspace
-        space.select_all()
-        return space.copy(), False
-    else:
-        if space.get_simple_extent_type() == h5s.SCALAR:
-            raise TypeError('Can\'t slice a scalar dataset (only fields and "..." allowed)')
-
-    if len(args) == 1:
-        argval = args[0]
-
-        if isinstance(argval, numpy.ndarray):
-            # Boolean array indexing is handled by discrete element selection
-            # It never results in a scalar value
-            indices = numpy.transpose(argval.nonzero())
-            if len(indices) == 0:
-                space.select_none()
-            else:
-                space.select_elements(indices)
-            return h5s.create_simple((len(indices),), (h5s.UNLIMITED,)), False
-
-        if isinstance(argval, CoordsList):
-            # Coords indexing also uses discrete selection
-            if len(argval.coords) == 0:
-                space.select_none()
-                npoints = 0
-            elif argval.coords.ndim != 2 or argval.coords.shape[1] != rank:
-                raise ValueError("Coordinate list incompatible with %d-rank dataset" % rank)
-            else:
-                space.select_elements(argval.coords)
-                npoints = space.get_select_elem_npoints()
-            return h5s.create_simple((npoints,), (h5s.UNLIMITED,)), len(argval.coords) == 1
-
-    # Proceed to hyperslab selection
-
-    # First expand (at most 1) ellipsis object
-
-    n_el = list(args).count(Ellipsis)
-    if n_el > 1:
-        raise ValueError("Only one ellipsis may be used.")
-    elif n_el == 0 and len(args) != rank:
-        args = args + (Ellipsis,)  # Simple version of NumPy broadcasting
-
-    final_args = []
-    n_args = len(args)
-
-    for idx, arg in enumerate(args):
-
-        if arg == Ellipsis:
-            final_args.extend( (slice(None,None,None),)*(rank-n_args+1) )
-        else:
-            final_args.append(arg)
-
-
-    # Hyperslab selection
-
-    space.select_all()
-
-    def perform_selection(start, count, step, idx, op=h5s.SELECT_AND):
-        """ Performs a selection using start/count/step in the given axis.
-
-        All other axes have their full range selected.  The selection is
-        added to the current dataspace selection using the given operator,
-        defaulting to AND.
-
-        All arguments are ints.
-        """
-
-        shape = space.shape
-
-        start = tuple(0 if i != idx else start for i, x in enumerate(shape) )
-        count = tuple(x if i != idx else count for i, x in enumerate(shape) )
-        step  = tuple(1 if i != idx else step  for i, x in enumerate(shape) )
-
-        space.select_hyperslab(start, count, step, op=op)
-
-    def validate_number(num, length):
-        """ Make sure the given object can be converted to a positive int
-        smaller than the length.
-        """
-        try:
-            num = long(num)
-        except TypeError:
-            raise TypeError("Illegal index: %r" % num)
-        if num > length-1:
-            raise IndexError('Index out of bounds: %d' % num)
-        if num < 0:
-            raise IndexError('Negative index not allowed: %d' % num)
-
-    mshape = []
-
-    for idx, (length, exp) in enumerate(zip(shape,final_args)):
-
-        if isinstance(exp, slice):
-
-            start, stop, step = exp.start, exp.stop, exp.step
-            start = 0 if start is None else int(start)
-            stop = length if stop is None else int(stop)
-            step = 1 if step is None else int(step)
-
-            if start < 0:
-                raise ValueError("Negative start index not allowed (got %d)" % start)
-            if step < 1:
-                raise ValueError("Step must be >= 1 (got %d)" % step)
-            if stop < 0:
-                raise ValueError("Negative stop index not allowed (got %d)" % stop)
-
-            count = (stop-start)//step
-            if (stop-start) % step != 0:
-                count += 1
-
-            if start+count > length:
-                raise ValueError("Selection out of bounds on axis %d" % idx)
-
-            perform_selection(start, count, step, idx)
-
-            mshape.append(count)
-
-        else:  # either an index or list of indices
-
-            if not isinstance(exp, list):
-                exp = [exp]
-                mshape.append(0)
-            else:
-                mshape.append(len(exp))
-
-            if len(exp) == 0:
-                raise TypeError("Empty selections are not allowed (axis %d)" % idx)
-
-            if sorted(exp) != exp:
-                raise TypeError("Selection list must be provided in increasing order (axis %d)" % idx)
-
-            for x in exp:
-                validate_number(x, length)
-
-            for select_idx in xrange(len(exp)+1):
-
-                if select_idx == 0:
-                    start = 0
-                    count = exp[0]
-                elif select_idx == len(exp):
-                    start = exp[-1]+1
-                    count = length-start
-                else:
-                    start = exp[select_idx-1]+1
-                    count = exp[select_idx] - start
-                if count > 0:
-                    perform_selection(start, count, 1, idx, op=h5s.SELECT_NOTB)
-
-    mshape_final = tuple(x for x in mshape if x != 0)
-    mspace = h5s.create_simple(mshape_final, (h5s.UNLIMITED,)*len(mshape_final))
-
-    return mspace, (len(mshape_final) == 0)
-
-def strhdr(line, char='-'):
-    """ Print a line followed by an ASCII-art underline """
-    return line + "\n%s\n" % (char*len(line))
-
-def strlist(lst, keywidth=10):
-    """ Print a list of (key: value) pairs, with column alignment. """
-    format = "%-"+str(keywidth)+"s %s\n"
-
-    outstr = ''
-    for key, val in lst:
-        outstr += format % (key+':',val)
-
-    return outstr
-
-
-
-
-
-
-

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



More information about the debian-science-commits mailing list