[h5py] 203/455: Filter tests & cleanup; fix idiotic bug in LZF compressor
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:33 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit 444af4acf967cd11c2e20d8c68bdb9cffaf40c45
Author: andrewcollette <andrew.collette at gmail.com>
Date: Wed Jan 28 07:09:20 2009 +0000
Filter tests & cleanup; fix idiotic bug in LZF compressor
---
h5py/__init__.py | 4 +-
h5py/browse.py | 223 ------------------------
h5py/filters.py | 218 ++++++++++++++++++++++++
h5py/highlevel.py | 34 ++--
h5py/lzf_filter.c | 80 +++++----
h5py/tests/common.py | 10 ++
h5py/tests/test_slicing.py | 13 +-
h5py/utils_hl.py | 414 ---------------------------------------------
8 files changed, 298 insertions(+), 698 deletions(-)
diff --git a/h5py/__init__.py b/h5py/__init__.py
index 7522156..2936154 100644
--- a/h5py/__init__.py
+++ b/h5py/__init__.py
@@ -31,14 +31,14 @@ except ImportError, e:
import utils, h5, h5a, h5d, h5f, h5fd, h5g, h5i, h5p, h5r, h5s, h5t, h5z, highlevel, version
-from highlevel import File, Group, Dataset, Datatype, AttributeManager, CoordsList, is_hdf5
+from highlevel import File, Group, Dataset, Datatype, AttributeManager, is_hdf5
from h5 import H5Error, get_config
__doc__ = __doc__ % (version.version, version.hdf5_version, version.api_version)
__all__ = ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5r',
'h5z', 'h5i', 'version', 'File', 'Group', 'Dataset',
- 'Datatype', 'AttributeManager', 'CoordsList', 'H5Error', 'get_config', 'is_hdf5']
+ 'Datatype', 'AttributeManager', 'H5Error', 'get_config', 'is_hdf5']
if version.api_version_tuple >= (1,8):
import h5o, h5l
diff --git a/h5py/browse.py b/h5py/browse.py
deleted file mode 100644
index a1cf0e4..0000000
--- a/h5py/browse.py
+++ /dev/null
@@ -1,223 +0,0 @@
-#+
-#
-# This file is part of h5py, a low-level Python interface to the HDF5 library.
-#
-# Copyright (C) 2008 Andrew Collette
-# http://h5py.alfven.org
-# License: BSD (See LICENSE.txt for full license)
-#
-# $Date$
-#
-#-
-
-"""
- Internal module which provides the guts of the File.browse() method
-"""
-
-from cmd import Cmd
-from posixpath import join, basename, dirname, normpath, isabs
-from getopt import gnu_getopt, GetoptError
-import shlex
-import os
-import re
-import sys
-
-from utils_hl import hbasename
-
-from h5py import h5g
-
-NAMES = {h5g.DATASET: "Dataset", h5g.GROUP: "Group", h5g.TYPE: "Named Type"}
-LS_FORMAT = " %-20s %-10s"
-
-class CmdError(StandardError):
- pass
-
-# Why the hell doesn't Cmd inherit from object? Properties don't work!
-class _H5Browser(Cmd, object):
-
- """
- HDF5 file browser class which holds state between sessions.
- """
- def _setpath(self, path):
- self.prompt = "HDF5: %s> " % (hbasename(path))
- self._path = path
-
- path = property(lambda self: self._path, _setpath)
-
- def __init__(self, fileobj, path=None, importdict=None):
- """ Browse the file, putting any imported names into importdict. """
- Cmd.__init__(self)
- self.file = fileobj
-
- self.path = path if path is not None else '/'
-
- self.importdict = importdict
- self.cmdloop('Browsing "%s". Type "help" for commands, "exit" to exit.' % os.path.basename(self.file.name))
-
- def onecmd(self, line):
- retval = False
- try:
- retval = Cmd.onecmd(self, line)
- except (CmdError, GetoptError), e:
- print "Error: "+e.args[0]
- return retval
-
- def abspath(self, path):
- """ Correctly interpret the given path fragment, relative to the
- current path.
- """
- return normpath(join(self.path,path))
-
- def do_exit(self, line):
- """ Exit back to Python """
- return True
-
- def do_EOF(self, line):
- """ (Ctrl-D) Exit back to Python """
- return True
-
- def do_pwd(self, line):
- """ Print name of current group """
- print self.path
-
- def do_cd(self, line):
- """ cd [group] """
- args = shlex.split(line)
- if len(args) > 1:
- raise CmdError("Too many arguments")
- path = args[0] if len(args) == 1 else ''
-
- path = self.abspath(path)
- dname = dirname(path)
- bname = basename(path)
- try:
- if bname != '' and not h5g.get_objinfo(self.file[dname].id, bname).type == h5g.GROUP:
- raise CmdError('"%s" is not an HDF5 group' % bname)
- else:
- self.path = path
- except:
- raise CmdError('Can\'t open group "%s"' % path)
-
- def complete_cd(self, text, line, begidx, endidx):
- text = text.strip()
- grpname = self.abspath(dirname(text))
- targetname = basename(text)
-
- grp = self.file[grpname]
- rval = [join(grpname,x) for x in grp \
- if x.find(targetname) == 0 and \
- h5g.get_objinfo(grp.id,x).type == h5g.GROUP]
- return rval
-
- def do_ls(self, line):
- """ ls [-l] [group] """
-
- LONG_STYLE = False
- opts, args = gnu_getopt(shlex.split(line), 'l')
-
- if '-l' in [ opt[0] for opt in opts]:
- LONG_STYLE = True
- if len(args) == 0:
- grpname = self.path
- elif len(args) == 1:
- grpname = self.abspath(args[0])
- else:
- self._error("Too many arguments")
- return
-
- try:
- grp = self.file[grpname]
- if LONG_STYLE:
- print 'Group "%s" in file "%s":' % (hbasename(grpname), os.path.basename(self.file.name))
- print LS_FORMAT % ("Name", "Type")
- print LS_FORMAT % ("----", "----")
- for name in grp:
- typecode = h5g.get_objinfo(grp.id, name).type
- pname = name if typecode != h5g.GROUP else name+'/'
- if LONG_STYLE:
- print LS_FORMAT % (pname, NAMES[typecode])
- else:
- print pname
- except:
- raise CmdError('Can\'t list contents of group "%s"' % hbasename(grpname))
-
- def do_info(self, line):
-
- opts, args = gnu_getopt(shlex.split(line),'')
-
- for arg in args:
- name = self.abspath(arg)
- try:
- obj = self.file[name]
- print obj.desc()
- except:
- raise CmdError("Can't get info on object \"%s\"" % hbasename(name))
-
- def complete_info(self, text, line, begidx, endidx):
- text = text.strip()
- grpname = self.abspath(dirname(text))
- targetname = basename(text)
-
- grp = self.file[grpname]
- rval = [join(grpname,x) for x in grp \
- if x.find(targetname) == 0]
- return rval
-
-
- def do_import(self, line):
- """ import name [as python_name]
- import name1 name2 name3 name4 ...
- """
- if self.importdict is None:
- raise CmdError("No import dictionary provided")
-
- opts, args = gnu_getopt(shlex.split(line),'')
-
- pynames = []
- hnames = []
-
- importdict = {} # [Python name] => HDF5 object
-
- if len(args) == 3 and args[1] == 'as':
- pynames.append(args[2])
- hnames.append(args[0])
- else:
- for arg in args:
- absname = self.abspath(arg)
- pynames.append(basename(absname))
- hnames.append(absname)
-
- for pyname, hname in zip(pynames, hnames):
- try:
- obj = self.file[hname]
- except Exception, e:
- raise CmdError("Can't import %s" % pyname)
-
- if len(re.sub('[A-Za-z_][A-Za-z0-9_]*','',pyname)) != 0:
- raise CmdError("%s is not a valid Python identifier" % pyname)
-
- if pyname in self.importdict:
- if not raw_input("Name %s already in use. Really import (y/N)? " % pyname).strip().lower().startswith('y'):
- continue
-
- importdict[pyname] = obj
-
- self.importdict.update(importdict)
-
- def complete_import(self, text, line, begidx, endidx):
- text = text.strip()
- grpname = self.abspath(dirname(text))
- targetname = basename(text)
-
- grp = self.file[grpname]
- rval = [join(grpname,x) for x in grp \
- if x.find(targetname) == 0]
- return rval
-
-
- def complete_ls(self, *args):
- return self.complete_cd(*args)
-
-
-
-
diff --git a/h5py/filters.py b/h5py/filters.py
new file mode 100644
index 0000000..e29cf32
--- /dev/null
+++ b/h5py/filters.py
@@ -0,0 +1,218 @@
+
+"""
+ Utility functions for high-level modules.
+"""
+from __future__ import with_statement
+from h5py import h5s, h5z, h5p, h5d
+import numpy as np
+
+CHUNK_BASE = 16*1024 # Multiplier by which chunks are adjusted
+CHUNK_MIN = 8*1024 # Soft lower limit (8k)
+CHUNK_MAX = 1024*1024 # Hard upper limit (1M)
+
+_COMP_FILTERS = {'gzip': h5z.FILTER_DEFLATE,
+ 'szip': h5z.FILTER_SZIP,
+ 'lzf': h5z.FILTER_LZF }
+
+DEFAULT_GZIP = 4
+DEFAULT_SZIP = ('nn', 8)
+
+def _gen_filter_tuples():
+ decode = []
+ encode = []
+ for name, code in _COMP_FILTERS.iteritems():
+ if h5z.filter_avail(code):
+ info = h5z.get_filter_info(code)
+ if info & h5z.FILTER_CONFIG_ENCODE_ENABLED:
+ encode.append(name)
+ if info & h5z.FILTER_CONFIG_DECODE_ENABLED:
+ decode.append(name)
+
+ return tuple(decode), tuple(encode)
+
+decode, encode = _gen_filter_tuples()
+
+def generate_dcpl(shape, dtype, chunks, compression, compression_opts,
+ shuffle, fletcher32, maxshape):
+ """ Generate a dataset creation property list.
+
+ Checks range and correctness of each argument. Does not check
+ for disallowed arguments.
+
+ chunks: None or tuple with len == len(shape)
+ compression: None or in 'gzip', 'lzf', 'szip'
+ compression_opts: None or <arbitrary>
+ shuffle: T/F
+ fletcher32: T/F
+ maxshape: None or tuple with len == len(shape)
+ """
+
+ # Validate and normalize arguments
+
+ if shape == ():
+ if maxshape and maxshape != ():
+ raise TypeError("Scalar datasets cannot be extended")
+ return h5p.create(h5p.DATASET_CREATE)
+
+ fletcher32 = bool(fletcher32)
+
+ def rq_tuple(tpl, name):
+ if tpl not in (None, True):
+ try:
+ tpl = tuple(tpl)
+ if len(tpl) != len(shape):
+ raise ValueError('"%s" must have same rank as dataset shape' % name)
+ except TypeError:
+ raise TypeError('"%s" argument must be None or a sequence object' % name)
+
+ rq_tuple(chunks, 'chunks')
+ rq_tuple(maxshape, 'maxshape')
+
+ if compression is not None:
+
+ if shuffle is None:
+ shuffle = True
+
+ if compression not in _COMP_FILTERS:
+ raise ValueError("Compression method must be one of %s" % ", ".join(_COMP_FILTERS))
+ if compression == 'gzip':
+ if compression_opts is None:
+ gzip_level = DEFAULT_GZIP
+ elif compression_opts in range(10):
+ gzip_level = compression_opts
+ else:
+ raise ValueError("GZIP setting must be an integer from 0-9, not %r" % compression_opts)
+ elif compression == 'lzf':
+ if compression_opts is not None:
+ raise ValueError("LZF compression filter accepts no options")
+ elif compression == 'szip':
+ if compression_opts is None:
+ compression_opts = DEFAULT_SZIP
+
+ err = "SZIP options must be a 2-tuple ('ec'|'nn', even integer 0-32)"
+ try:
+ szmethod, szpix = compression_opts
+ except TypeError:
+ raise TypeError(err)
+ if szmethod not in ('ec', 'nn'):
+ raise ValueError(err)
+ if not (0<szpix<32 and szpix%2 == 0):
+ raise ValueError(err)
+
+ # End argument validation
+
+ if (chunks is True) or \
+ (chunks is None and any((shuffle, fletcher32, compression, maxshape))):
+ chunks = guess_chunk(shape, dtype.itemsize)
+
+ if maxshape is True:
+ maxshape = (None,)*len(shape)
+
+ plist = h5p.create(h5p.DATASET_CREATE)
+ if chunks is not None:
+ plist.set_chunk(chunks)
+ plist.set_fill_time(h5d.FILL_TIME_ALLOC)
+
+ if shuffle:
+ plist.set_shuffle()
+
+ if compression == 'gzip':
+ plist.set_deflate(gzip_level)
+ elif compression == 'lzf':
+ plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
+ elif compression == 'szip':
+ opts = {'ec': h5z.SZIP_EC_OPTION_MASK, 'nn': h5z.SZIP_NN_OPTION_MASK}
+ plist.set_szip(opts[szmethod], szpix)
+
+ if fletcher32:
+ plist.set_fletcher32()
+
+ return plist
+
+def get_filters(plist):
+ """ Extract a dictionary of active filters from a DCPL, along with
+ their settings
+ """
+
+ filters = {h5z.FILTER_DEFLATE: 'gzip', h5z.FILTER_SZIP: 'szip',
+ h5z.FILTER_SHUFFLE: 'shuffle', h5z.FILTER_FLETCHER32: 'fletcher32',
+ h5z.FILTER_LZF: 'lzf'}
+ szopts = {h5z.SZIP_EC_OPTION_MASK: 'ec', h5z.SZIP_NN_OPTION_MASK: 'nn'}
+
+ pipeline = {}
+
+ nfilters = plist.get_nfilters()
+
+ for i in range(nfilters):
+
+ code, flags, vals, desc = plist.get_filter(i)
+
+ if code == h5z.FILTER_DEFLATE:
+ vals = vals[0] # gzip level
+
+ elif code == h5z.FILTER_SZIP:
+ mask, pixels = vals[0:2]
+ if mask & h5z.SZIP_EC_OPTION_MASK:
+ mask = 'ec'
+ elif mask & h5z.SZIP_NN_OPTION_MASK:
+ mask = 'nn'
+ else:
+ raise TypeError("Unknown SZIP configuration")
+ vals = (mask, pixels)
+ else:
+ if len(vals) == 0:
+ vals = None
+
+ pipeline[filters.get(code, str(code))] = vals
+
+ return pipeline
+
+def guess_chunk(shape, typesize):
+ """ Guess an appropriate chunk layout for a dataset, given its shape and
+ the size of each element in bytes. Will allocate chunks only as large
+ as MAX_SIZE. Chunks are generally close to some power-of-2 fraction of
+ each axis, slightly favoring bigger values for the last index.
+ """
+
+ ndims = len(shape)
+ if ndims == 0:
+ raise ValueError("Chunks not allowed for scalar datasets.")
+
+ chunks = np.array(shape, dtype='=f8')
+
+ # Determine the optimal chunk size in bytes using a PyTables expression.
+ # This is kept as a float.
+ dset_size = np.product(chunks)*typesize
+ target_size = CHUNK_BASE * (2**np.log10(dset_size/(1024.*1024)))
+
+ if target_size > CHUNK_MAX:
+ target_size = CHUNK_MAX
+ elif target_size < CHUNK_MIN:
+ target_size = CHUNK_MIN
+
+ idx = 0
+ while True:
+ # Repeatedly loop over the axes, dividing them by 2. Stop when:
+ # 1a. We're smaller than the target chunk size, OR
+ # 1b. We're within 50% of the target chunk size, AND
+ # 2. The chunk is smaller than the maximum chunk size
+
+ chunk_bytes = np.product(chunks)*typesize
+
+ if (chunk_bytes < target_size or \
+ abs(chunk_bytes-target_size)/target_size < 0.5) and \
+ chunk_bytes < CHUNK_MAX:
+ break
+
+ chunks[idx%ndims] = np.ceil(chunks[idx%ndims] / 2.0)
+ idx += 1
+
+ return tuple(long(x) for x in chunks)
+
+
+
+
+
+
+
+
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 98f2f8e..8afdd92 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -46,26 +46,30 @@ from __future__ import with_statement
import os
import numpy
-import inspect
import threading
import sys
import warnings
+import os.path as op
+import posixpath as pp
+
from h5py import h5, h5f, h5g, h5s, h5t, h5d, h5a, h5p, h5z, h5i
from h5py.h5 import H5Error
-import utils_hl as uhl
-from utils_hl import slice_select, hbasename, guess_chunk
-from utils_hl import CoordsList
-from browse import _H5Browser
import h5py.selections as sel
-import posixpath as pp
+
+import filters
config = h5.get_config()
if config.API_18:
from h5py import h5o, h5l
__all__ = ["File", "Group", "Dataset",
- "Datatype", "AttributeManager", "CoordsList"]
+ "Datatype", "AttributeManager"]
+
+def __hbasename(name):
+ """ Basename function with more readable handling of trailing slashes"""
+ name = pp.basename(pp.normpath(name))
+ return name if name != '' else '/'
def is_hdf5(fname):
fname = os.path.abspath(fname)
@@ -477,7 +481,7 @@ class Group(HLObject, _DictCompat):
with self._lock:
try:
return '<HDF5 group "%s" (%d members)>' % \
- (hbasename(self.name), len(self))
+ (_hbasename(self.name), len(self))
except Exception:
return "<Closed HDF5 group>"
@@ -674,8 +678,8 @@ class Dataset(HLObject):
def __init__(self, group, name,
shape=None, dtype=None, data=None,
- chunks=None, compression=None, shuffle=False,
- fletcher32=False, maxshape=None, compression_opts=None):
+ chunks=None, compression=None, shuffle=None,
+ fletcher32=None, maxshape=None, compression_opts=None):
""" Open or create a new dataset in the file.
It's recommended you use the Group methods (open via Group["name"],
@@ -761,7 +765,7 @@ class Dataset(HLObject):
# Generate the dataset creation property list
# This also validates the keyword arguments
- plist = uhl.generate_dcpl(shape, dtype, chunks, compression,
+ plist = filters.generate_dcpl(shape, dtype, chunks, compression,
compression_opts, shuffle, fletcher32, maxshape)
if maxshape is not None:
@@ -776,7 +780,7 @@ class Dataset(HLObject):
self._attrs = AttributeManager(self)
plist = self.id.get_create_plist()
- self._filters = uhl.get_filters(plist)
+ self._filters = filters.get_filters(plist)
if plist.get_layout() == h5d.CHUNKED:
self._chunks = plist.get_chunk()
else:
@@ -996,7 +1000,7 @@ class Dataset(HLObject):
with self._lock:
try:
return '<HDF5 dataset "%s": shape %s, type "%s">' % \
- (hbasename(self.name), self.shape, self.dtype.str)
+ (_hbasename(self.name), self.shape, self.dtype.str)
except Exception:
return "<Closed HDF5 dataset>"
@@ -1095,7 +1099,7 @@ class AttributeManager(LockableObject, _DictCompat):
with self._lock:
try:
return '<Attributes of HDF5 object "%s" (%d)>' % \
- (hbasename(h5i.get_name(self.id)), len(self))
+ (_hbasename(h5i.get_name(self.id)), len(self))
except Exception:
return "<Attributes of closed HDF5 object>"
@@ -1133,7 +1137,7 @@ class Datatype(HLObject):
with self._lock:
try:
return '<HDF5 named type "%s" (dtype %s)>' % \
- (hbasename(self.name), self.dtype.str)
+ (_hbasename(self.name), self.dtype.str)
except Exception:
return "<Closed HDF5 named type>"
diff --git a/h5py/lzf_filter.c b/h5py/lzf_filter.c
index df5be88..a662f55 100644
--- a/h5py/lzf_filter.c
+++ b/h5py/lzf_filter.c
@@ -82,6 +82,18 @@ int register_lzf(void){
return retval;
}
+void printbytes(char *buffer, int nbytes){
+
+ int i;
+ char c;
+ for(i=0; i<nbytes; i++){
+ c = buffer[i];
+ fprintf(stderr, "%03u ", c);
+ if(i%20==0){
+ fprintf(stderr, "\n");
+ }
+ }
+}
/* The filter function */
size_t lzf_filter(unsigned flags, size_t cd_nelmts,
const unsigned cd_values[], size_t nbytes,
@@ -92,7 +104,6 @@ size_t lzf_filter(unsigned flags, size_t cd_nelmts,
unsigned int status = 0; /* Return code from lzf routines */
-
/* We're compressing */
if(!(flags & H5Z_FLAG_REVERSE)){
@@ -107,56 +118,51 @@ size_t lzf_filter(unsigned flags, size_t cd_nelmts,
status = lzf_compress(*buf, nbytes, outbuf, outbuf_size);
- if(status == 0){
- free(outbuf);
- }
-
- return status;
- }
-
/* We're decompressing */
+ } else {
- outbuf_size = (*buf_size);
+ outbuf_size = (*buf_size);
- while(!status){
-
- free(outbuf);
- outbuf = malloc(outbuf_size);
+ while(!status){
+
+ free(outbuf);
+ outbuf = malloc(outbuf_size);
- status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
+ status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
- /* compression failed */
- if(!status){
+ /* compression failed */
+ if(!status){
- /* Output buffer too small; make it bigger */
- if(errno == E2BIG){
+ /* Output buffer too small; make it bigger */
+ if(errno == E2BIG){
#ifdef H5PY_LZF_DEBUG
- fprintf(stderr, "LZF filter: Buffer guess too small: %d", outbuf_size);
+ fprintf(stderr, "LZF filter: Buffer guess too small: %d", outbuf_size);
#endif
- outbuf_size += (*buf_size);
- if(outbuf_size > H5PY_LZF_MAX_BUF){
- PUSH_ERR("lzf_filter", H5E_CALLBACK, "Requested LZF buffer too big");
+ outbuf_size += (*buf_size);
+ if(outbuf_size > H5PY_LZF_MAX_BUF){
+ PUSH_ERR("lzf_filter", H5E_CALLBACK, "Requested LZF buffer too big");
+ goto failed;
+ }
+
+ /* Horrible internal error (data corruption) */
+ } else if(errno == EINVAL) {
+
+ PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression");
goto failed;
- }
- /* Horrible internal error (data corruption) */
- } else if(errno == EINVAL) {
- PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression");
- goto failed;
+ /* Unknown error */
+ } else {
+ PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error");
+ goto failed;
+ }
- /* Unknown error */
- } else {
- PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error");
- goto failed;
- }
+ } /* if !status */
- } /* if !status */
+ } /* while !status */
- } /* while !status */
-
+ } /* compressing vs decompressing */
- /* If compression/decompression successful, swap buffers */
- if(status){
+ if(status != 0){
free(*buf);
*buf = outbuf;
diff --git a/h5py/tests/common.py b/h5py/tests/common.py
index 0e0b704..b7ad4b5 100644
--- a/h5py/tests/common.py
+++ b/h5py/tests/common.py
@@ -46,6 +46,16 @@ def covers(*args):
return wrap
+def makehdf():
+ fname = tempfile.mktemp('.hdf5')
+ f = h5py.File(fname, 'w')
+ return f
+
+def delhdf(f):
+ fname = f.name
+ f.close()
+ os.unlink(fname)
+
class HDF5TestCase(unittest.TestCase):
"""
diff --git a/h5py/tests/test_slicing.py b/h5py/tests/test_slicing.py
index e025e47..74b2e2e 100644
--- a/h5py/tests/test_slicing.py
+++ b/h5py/tests/test_slicing.py
@@ -1,9 +1,11 @@
import numpy as np
-import h5py
-import tempfile
import os
from nose.tools import assert_equal
+from common import makehdf, delhdf
+
+import h5py
+
def check_arr_equal(dset, arr):
""" Make sure dset and arr have the same shape, dtype and contents.
@@ -28,13 +30,10 @@ s = SliceFreezer()
class TestSlicing(object):
def setUp(self):
- fname = tempfile.mktemp('.hdf5')
- self.f = h5py.File(fname, 'w')
- self.fname = fname
+ self.f = makehdf()
def tearDown(self):
- self.f.close()
- os.unlink(self.fname)
+ delhdf(self.f)
def generate(self, shape, dtype):
if 'dset' in self.f:
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
deleted file mode 100644
index c5cfe23..0000000
--- a/h5py/utils_hl.py
+++ /dev/null
@@ -1,414 +0,0 @@
-
-"""
- Utility functions for high-level modules.
-"""
-from __future__ import with_statement
-from h5py import h5s, h5z, h5p, h5d
-
-from posixpath import basename, normpath
-import numpy
-
-CHUNK_BASE = 16*1024 # Multiplier by which chunks are adjusted
-MIN_CHUNK = 8*1024 # Soft lower limit (8k)
-MAX_CHUNK = 1024*1024 # Hard upper limit (1M)
-
-def hbasename(name):
- """ Basename function with more readable handling of trailing slashes"""
- bname = normpath(name)
- bname = basename(bname)
- if bname == '':
- bname = '/'
- return bname
-
-COMP_FILTERS = {'gzip': h5z.FILTER_DEFLATE,
- 'szip': h5z.FILTER_SZIP,
- 'lzf': h5z.FILTER_LZF }
-
-def generate_dcpl(shape, dtype, chunks, compression, compression_opts,
- shuffle, fletcher32, maxshape):
- """ Generate a dataset creation property list.
-
- Checks range and correctness of each argument. Does not check
- for disallowed arguments.
-
- chunks: None or tuple with len == len(shape)
- compression: None or in 'gzip', 'lzf', 'szip'
- compression_opts: None or <arbitrary>
- shuffle: T/F
- fletcher32: T/F
- maxshape: None or tuple with len == len(shape)
- """
-
- # Validate and normalize arguments
-
- shuffle = bool(shuffle)
- fletcher32 = bool(fletcher32)
-
- def rq_tuple(tpl, name):
- if tpl not in (None, True):
- try:
- tpl = tuple(tpl)
- if len(tpl) != len(shape):
- raise ValueError('"%s" must have same rank as dataset shape' % name)
- except TypeError:
- raise TypeError('"%s" argument must be None or a sequence object' % name)
-
- rq_tuple(chunks, 'chunks')
- rq_tuple(maxshape, 'maxshape')
-
- if compression is not None:
- if compression not in COMP_FILTERS:
- raise ValueError("Compression method must be one of %s" % ", ".join(COMP_FILTERS))
- if compression == 'gzip':
- if compression_opts is None:
- gzip_level = 4
- elif compression_opts in range(10):
- gzip_level = compression_opts
- else:
- raise ValueError("GZIP setting must be an integer from 0-9, not %r" % compression_opts)
- elif compression == 'lzf':
- if compression_opts is not None:
- raise ValueError("LZF compression filter accepts no options")
- elif compression == 'szip':
- if compression_opts is None:
- compression_opts = (h5z.SZIP_NN_OPTION_MASK, 8)
- else:
- err = "SZIP options must be a 2-tuple ('ec'|'nn', even integer 0-32)"
- try:
- szmethod, szpix = compression_opts
- except TypeError:
- raise TypeError(err)
- if szmethod not in ('ec', 'nn'):
- raise ValueError(err)
- if not (0<szpix<32 and szpix%2 != 0):
- raise ValueError(err)
-
- # End argument validation
-
- if (chunks is True) or \
- (chunks is None and any((shuffle, fletcher32, compression, maxshape))):
- if shape == ():
- raise TypeError("Compression cannot be used with scalar datasets")
- chunks = guess_chunk(shape, dtype.itemsize)
-
- if maxshape is True:
- maxshape = (None,)*len(shape)
-
- plist = h5p.create(h5p.DATASET_CREATE)
- if chunks is not None:
- plist.set_chunk(chunks)
- plist.set_fill_time(h5d.FILL_TIME_ALLOC)
-
- if shuffle:
- plist.set_shuffle()
-
- if compression == 'gzip':
- plist.set_deflate(gzip_level)
- elif compression == 'lzf':
- plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
- elif compression == 'szip':
- opts = {'ec': h5z.SZIP_EC_OPTION_MASK, 'nn': h5z.SZIP_NN_OPTION_MASK}
- plist.set_szip(opts[szmethod], szpix)
-
- if fletcher32:
- plist.set_fletcher32()
-
- return plist
-
-def get_filters(plist):
- """ Extract a dictionary of active filters from a DCPL, along with
- their settings
- """
-
- filters = {h5z.FILTER_DEFLATE: 'gzip', h5z.FILTER_SZIP: 'szip',
- h5z.FILTER_SHUFFLE: 'shuffle', h5z.FILTER_FLETCHER32: 'fletcher32',
- h5z.FILTER_LZF: 'lzf'}
-
- pipeline = {}
-
- nfilters = plist.get_nfilters()
-
- for i in range(nfilters):
- code, flags, vals, desc = plist.get_filter(i)
- if len(vals) == 0:
- vals = None
- elif len(vals) == 1:
- vals = vals[0]
- pipeline[filters.get(code, str(code))] = vals
-
- return pipeline
-
-def guess_chunk(shape, typesize):
- """ Guess an appropriate chunk layout for a dataset, given its shape and
- the size of each element in bytes. Will allocate chunks only as large
- as MAX_SIZE. Chunks are generally close to some power-of-2 fraction of
- each axis, slightly favoring bigger values for the last index.
- """
-
- ndims = len(shape)
- if ndims == 0:
- raise ValueError("Chunks not allowed for scalar datasets.")
-
- chunks = numpy.array(shape, dtype='=f8')
-
- # Determine the optimal chunk size in bytes using a PyTables expression.
- # This is kept as a float.
- dset_size = numpy.product(chunks)*typesize
- target_size = CHUNK_BASE * (2**numpy.log10(dset_size/(1024.*1024)))
-
- if target_size > MAX_CHUNK:
- target_size = MAX_CHUNK
- elif target_size < MIN_CHUNK:
- target_size = MIN_CHUNK
-
- idx = 0
- while True:
- # Repeatedly loop over the axes, dividing them by 2. Stop when:
- # 1a. We're smaller than the target chunk size, OR
- # 1b. We're within 50% of the target chunk size, AND
- # 2. The chunk is smaller than the maximum chunk size
-
- chunk_bytes = numpy.product(chunks)*typesize
-
- if (chunk_bytes < target_size or \
- abs(chunk_bytes-target_size)/target_size < 0.5) and \
- chunk_bytes < MAX_CHUNK:
- break
-
- chunks[idx%ndims] = numpy.ceil(chunks[idx%ndims] / 2.0)
- idx += 1
-
- return tuple(long(x) for x in chunks)
-
-class CoordsList(object):
-
- """
- Wrapper class for efficient access to sequences of sparse or
- irregular coordinates. Construct from either a single index
- (a rank-length sequence of numbers), or a sequence of such
- indices:
-
- CoordsList( (0,1,4) ) # Single index
- CoordsList( [ (1,2,3), (7,8,9) ] ) # Multiple indices
- """
-
- npoints = property(lambda self: len(self.coords),
- doc = "Number of selected points")
-
- def __init__(self, points):
- """ Create a new list of explicitly selected points.
-
- CoordsList( (0,1,4) ) # Single index
- CoordsList( [ (1,2,3), (7,8,9) ] ) # Multiple indices
- """
-
- try:
- self.coords = numpy.asarray(points, dtype='=u8')
- except ValueError:
- raise ValueError("Selection should be an index or a sequence of equal-rank indices")
-
- if len(self.coords) == 0:
- pass # This will be caught at index-time
- elif self.coords.ndim == 1:
- self.coords.resize((1,len(self.coords)))
- elif self.coords.ndim != 2:
- raise ValueError("Selection should be an index or a sequence of equal-rank indices")
-
-
-def slice_select(space, args):
- """ Perform a selection on the given HDF5 dataspace, using a tuple
- of Python extended slice objects. The dataspace may be scalar or
- simple. The following selection mechanisms are implemented:
-
- 1. select_all:
- 0-tuple
- 1-tuple containing Ellipsis
-
- 2. Hyperslab selection
- n-tuple (n>1) containing slice/integer/Ellipsis objects
-
- 3. Discrete element selection
- 1-tuple containing boolean array or FlatIndexer
-
- The return value is a 2-tuple:
- 1. Appropriate memory dataspace to use for new array
- 2. Boolean indicating if the slice should result in a scalar quantity
- """
- shape = space.shape
- rank = len(shape)
- space.set_extent_simple(shape, (h5s.UNLIMITED,)*rank)
-
- if len(args) == 0 or (len(args) == 1 and args[0] is Ellipsis):
- # The only safe way to access a scalar dataspace
- space.select_all()
- return space.copy(), False
- else:
- if space.get_simple_extent_type() == h5s.SCALAR:
- raise TypeError('Can\'t slice a scalar dataset (only fields and "..." allowed)')
-
- if len(args) == 1:
- argval = args[0]
-
- if isinstance(argval, numpy.ndarray):
- # Boolean array indexing is handled by discrete element selection
- # It never results in a scalar value
- indices = numpy.transpose(argval.nonzero())
- if len(indices) == 0:
- space.select_none()
- else:
- space.select_elements(indices)
- return h5s.create_simple((len(indices),), (h5s.UNLIMITED,)), False
-
- if isinstance(argval, CoordsList):
- # Coords indexing also uses discrete selection
- if len(argval.coords) == 0:
- space.select_none()
- npoints = 0
- elif argval.coords.ndim != 2 or argval.coords.shape[1] != rank:
- raise ValueError("Coordinate list incompatible with %d-rank dataset" % rank)
- else:
- space.select_elements(argval.coords)
- npoints = space.get_select_elem_npoints()
- return h5s.create_simple((npoints,), (h5s.UNLIMITED,)), len(argval.coords) == 1
-
- # Proceed to hyperslab selection
-
- # First expand (at most 1) ellipsis object
-
- n_el = list(args).count(Ellipsis)
- if n_el > 1:
- raise ValueError("Only one ellipsis may be used.")
- elif n_el == 0 and len(args) != rank:
- args = args + (Ellipsis,) # Simple version of NumPy broadcasting
-
- final_args = []
- n_args = len(args)
-
- for idx, arg in enumerate(args):
-
- if arg == Ellipsis:
- final_args.extend( (slice(None,None,None),)*(rank-n_args+1) )
- else:
- final_args.append(arg)
-
-
- # Hyperslab selection
-
- space.select_all()
-
- def perform_selection(start, count, step, idx, op=h5s.SELECT_AND):
- """ Performs a selection using start/count/step in the given axis.
-
- All other axes have their full range selected. The selection is
- added to the current dataspace selection using the given operator,
- defaulting to AND.
-
- All arguments are ints.
- """
-
- shape = space.shape
-
- start = tuple(0 if i != idx else start for i, x in enumerate(shape) )
- count = tuple(x if i != idx else count for i, x in enumerate(shape) )
- step = tuple(1 if i != idx else step for i, x in enumerate(shape) )
-
- space.select_hyperslab(start, count, step, op=op)
-
- def validate_number(num, length):
- """ Make sure the given object can be converted to a positive int
- smaller than the length.
- """
- try:
- num = long(num)
- except TypeError:
- raise TypeError("Illegal index: %r" % num)
- if num > length-1:
- raise IndexError('Index out of bounds: %d' % num)
- if num < 0:
- raise IndexError('Negative index not allowed: %d' % num)
-
- mshape = []
-
- for idx, (length, exp) in enumerate(zip(shape,final_args)):
-
- if isinstance(exp, slice):
-
- start, stop, step = exp.start, exp.stop, exp.step
- start = 0 if start is None else int(start)
- stop = length if stop is None else int(stop)
- step = 1 if step is None else int(step)
-
- if start < 0:
- raise ValueError("Negative start index not allowed (got %d)" % start)
- if step < 1:
- raise ValueError("Step must be >= 1 (got %d)" % step)
- if stop < 0:
- raise ValueError("Negative stop index not allowed (got %d)" % stop)
-
- count = (stop-start)//step
- if (stop-start) % step != 0:
- count += 1
-
- if start+count > length:
- raise ValueError("Selection out of bounds on axis %d" % idx)
-
- perform_selection(start, count, step, idx)
-
- mshape.append(count)
-
- else: # either an index or list of indices
-
- if not isinstance(exp, list):
- exp = [exp]
- mshape.append(0)
- else:
- mshape.append(len(exp))
-
- if len(exp) == 0:
- raise TypeError("Empty selections are not allowed (axis %d)" % idx)
-
- if sorted(exp) != exp:
- raise TypeError("Selection list must be provided in increasing order (axis %d)" % idx)
-
- for x in exp:
- validate_number(x, length)
-
- for select_idx in xrange(len(exp)+1):
-
- if select_idx == 0:
- start = 0
- count = exp[0]
- elif select_idx == len(exp):
- start = exp[-1]+1
- count = length-start
- else:
- start = exp[select_idx-1]+1
- count = exp[select_idx] - start
- if count > 0:
- perform_selection(start, count, 1, idx, op=h5s.SELECT_NOTB)
-
- mshape_final = tuple(x for x in mshape if x != 0)
- mspace = h5s.create_simple(mshape_final, (h5s.UNLIMITED,)*len(mshape_final))
-
- return mspace, (len(mshape_final) == 0)
-
-def strhdr(line, char='-'):
- """ Print a line followed by an ASCII-art underline """
- return line + "\n%s\n" % (char*len(line))
-
-def strlist(lst, keywidth=10):
- """ Print a list of (key: value) pairs, with column alignment. """
- format = "%-"+str(keywidth)+"s %s\n"
-
- outstr = ''
- for key, val in lst:
- outstr += format % (key+':',val)
-
- return outstr
-
-
-
-
-
-
-
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list