[h5py] 179/455: Start redoing HL compression interface
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:30 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit bffedbb29b3ae4d76db4136e244604f2f4b2f387
Author: andrewcollette <andrew.collette at gmail.com>
Date: Wed Dec 10 03:49:18 2008 +0000
Start redoing HL compression interface
---
h5py/highlevel.py | 83 +++++++++++++++---------------
h5py/tests/test_highlevel.py | 5 ++
h5py/utils_hl.py | 120 ++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 165 insertions(+), 43 deletions(-)
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index 354274f..bece59d 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -53,6 +53,7 @@ import warnings
from h5py import h5, h5f, h5g, h5s, h5t, h5d, h5a, h5p, h5z, h5i
from h5py.h5 import H5Error
+import utils_hl as uhl
from utils_hl import slice_select, hbasename, guess_chunk
from utils_hl import CoordsList
from browse import _H5Browser
@@ -614,31 +615,30 @@ class Dataset(HLObject):
@property
def chunks(self):
"""Dataset chunks (or None)"""
- try:
- return self._plist.get_chunk()
- except H5Error:
- return None
+ return self._chunks
@property
def compression(self):
- """Compression level (or None)"""
- filt = self._plist.get_filter_by_id(h5z.FILTER_DEFLATE)
- if filt is not None:
- return filt[1][0]
- filt = self._plist.get_filter_by_id(h5z.FILTER_LZF)
- if filt is not None:
- return 'lzf'
+ """Compression strategy (or None)"""
+ for x in ('gzip','lzf','szip'):
+ if x in self._filters:
+ return x
return None
@property
+ def compression_opts(self):
+ """ Compression setting. Int(0-9) for gzip, 2-tuple for szip. """
+ return self._filters.get(self.compression, None)
+
+ @property
def shuffle(self):
"""Shuffle filter present (T/F)"""
- return self._plist.get_filter_by_id(h5z.FILTER_SHUFFLE) is not None
+ return 'shuffle' in self._filters
@property
def fletcher32(self):
"""Fletcher32 filter is present (T/F)"""
- return self._plist.get_filter_by_id(h5z.FILTER_FLETCHER32) is not None
+ return 'fletcher32' in self._filters
@property
def maxshape(self):
@@ -649,7 +649,7 @@ class Dataset(HLObject):
def __init__(self, group, name,
shape=None, dtype=None, data=None,
chunks=None, compression=None, shuffle=False,
- fletcher32=False, maxshape=None):
+ fletcher32=False, maxshape=None, compression_opts=None):
""" Open or create a new dataset in the file.
It's recommended you use the Group methods (open via Group["name"],
@@ -675,12 +675,14 @@ class Dataset(HLObject):
Creation keywords (* is default):
chunks: Tuple of chunk dimensions, True, or None*
- compression: DEFLATE (gzip) compression level, int or None*
+ compression: "gzip", "lzf", or "szip" (if available)
shuffle: Use the shuffle filter? (requires compression) T/F*
fletcher32: Enable Fletcher32 error detection? T/F*
maxshape: Tuple giving dataset maximum dimensions or None*.
You can grow each axis up to this limit using
resize(). For each unlimited axis, provide None.
+
+ compress_opts: Optional setting for the compression filter
All these options require chunking. If a chunk tuple is not
provided, the constructor will guess an appropriate chunk shape.
@@ -688,7 +690,7 @@ class Dataset(HLObject):
"""
with group._lock:
if data is None and shape is None:
- if any((data,dtype,shape,chunks,compression,shuffle,fletcher32)):
+ if any((dtype,chunks,compression,shuffle,fletcher32)):
raise ValueError('You cannot specify keywords when opening a dataset.')
self.id = h5d.open(group.id, name)
else:
@@ -702,7 +704,6 @@ class Dataset(HLObject):
if data is None:
raise TypeError("Either data or shape must be specified")
shape = data.shape
-
else:
shape = tuple(shape)
if data is not None and (numpy.product(shape) != numpy.product(data.shape)):
@@ -716,33 +717,26 @@ class Dataset(HLObject):
else:
dtype = numpy.dtype(dtype)
- # Generate chunks if necessary
- if any((compression, shuffle, fletcher32, maxshape)) or chunks is True:
+ # Legacy
+ if any((compression, shuffle, fletcher32, maxshape)):
if chunks is False:
raise ValueError("Chunked format required for given storage options")
- if chunks in (True, None):
- chunks = guess_chunk(shape, dtype.itemsize)
-
- if chunks and shape == ():
- raise ValueError("Filter options cannot be used with scalar datasets.")
-
- plist = h5p.create(h5p.DATASET_CREATE)
- if chunks:
- plist.set_chunk(tuple(chunks))
- plist.set_fill_time(h5d.FILL_TIME_ALLOC)
- if shuffle:
- plist.set_shuffle()
- if compression:
- if compression is True:
- compression = 6
- if compression in range(10):
- plist.set_deflate(compression)
- elif compression == 'lzf':
- plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
+
+ # Legacy
+ if compression in range(10) or compression is True:
+ if compression_opts is None:
+ if compression is True:
+ compression_opts = 4
+ else:
+ compression_opts = compression
else:
- raise ValueError('Compression must be 0-9 or "lzf"')
- if fletcher32:
- plist.set_fletcher32()
+ raise TypeError("Conflict in compression options")
+ compression = 'gzip'
+
+ # Generate the dataset creation property list
+ # This also validates the keyword arguments
+ plist = uhl.generate_dcpl(shape, dtype, chunks, compression,
+ compression_opts, shuffle, fletcher32, maxshape)
if maxshape is not None:
maxshape = tuple(x if x is not None else h5s.UNLIMITED for x in maxshape)
@@ -755,7 +749,12 @@ class Dataset(HLObject):
self.id.write(h5s.ALL, h5s.ALL, data)
self._attrs = AttributeManager(self)
- self._plist = self.id.get_create_plist()
+ plist = self.id.get_create_plist()
+ self._filters = uhl.get_filters(plist)
+ if plist.get_layout() == h5d.CHUNKED:
+ self._chunks = plist.get_chunk()
+ else:
+ self._chunks = None
def extend(self, shape):
""" Deprecated. Use resize() instead. """
diff --git a/h5py/tests/test_highlevel.py b/h5py/tests/test_highlevel.py
index 986543c..78c0262 100644
--- a/h5py/tests/test_highlevel.py
+++ b/h5py/tests/test_highlevel.py
@@ -236,6 +236,11 @@ class TestDataset(HDF5TestCase):
elif value is True:
self.assert_(getattr(hdf, name) is not None,
"True kwd ignored: %s" % name)
+ elif name == 'compression':
+ cname = getattr(hdf,name)
+ cval = getattr(hdf, 'compression_opts')
+ self.assertEqual(cname, 'gzip')
+ self.assertEqual(cval,value)
else:
self.assertEqual(getattr(hdf, name), value,
"kwd mismatch: %s: %s %s" % (name, getattr(hdf, name), value))
diff --git a/h5py/utils_hl.py b/h5py/utils_hl.py
index a2ffe46..c5cfe23 100644
--- a/h5py/utils_hl.py
+++ b/h5py/utils_hl.py
@@ -3,7 +3,7 @@
Utility functions for high-level modules.
"""
from __future__ import with_statement
-from h5py import h5s
+from h5py import h5s, h5z, h5p, h5d
from posixpath import basename, normpath
import numpy
@@ -20,6 +20,124 @@ def hbasename(name):
bname = '/'
return bname
+COMP_FILTERS = {'gzip': h5z.FILTER_DEFLATE,
+ 'szip': h5z.FILTER_SZIP,
+ 'lzf': h5z.FILTER_LZF }
+
+def generate_dcpl(shape, dtype, chunks, compression, compression_opts,
+ shuffle, fletcher32, maxshape):
+ """ Generate a dataset creation property list.
+
+ Checks range and correctness of each argument. Does not check
+ for disallowed arguments.
+
+ chunks: None or tuple with len == len(shape)
+ compression: None or in 'gzip', 'lzf', 'szip'
+ compression_opts: None or <arbitrary>
+ shuffle: T/F
+ fletcher32: T/F
+ maxshape: None or tuple with len == len(shape)
+ """
+
+ # Validate and normalize arguments
+
+ shuffle = bool(shuffle)
+ fletcher32 = bool(fletcher32)
+
+ def rq_tuple(tpl, name):
+ if tpl not in (None, True):
+ try:
+ tpl = tuple(tpl)
+ if len(tpl) != len(shape):
+ raise ValueError('"%s" must have same rank as dataset shape' % name)
+ except TypeError:
+ raise TypeError('"%s" argument must be None or a sequence object' % name)
+
+ rq_tuple(chunks, 'chunks')
+ rq_tuple(maxshape, 'maxshape')
+
+ if compression is not None:
+ if compression not in COMP_FILTERS:
+ raise ValueError("Compression method must be one of %s" % ", ".join(COMP_FILTERS))
+ if compression == 'gzip':
+ if compression_opts is None:
+ gzip_level = 4
+ elif compression_opts in range(10):
+ gzip_level = compression_opts
+ else:
+ raise ValueError("GZIP setting must be an integer from 0-9, not %r" % compression_opts)
+ elif compression == 'lzf':
+ if compression_opts is not None:
+ raise ValueError("LZF compression filter accepts no options")
+ elif compression == 'szip':
+ if compression_opts is None:
+ compression_opts = (h5z.SZIP_NN_OPTION_MASK, 8)
+ else:
+ err = "SZIP options must be a 2-tuple ('ec'|'nn', even integer 0-32)"
+ try:
+ szmethod, szpix = compression_opts
+ except TypeError:
+ raise TypeError(err)
+ if szmethod not in ('ec', 'nn'):
+ raise ValueError(err)
+ if not (0<szpix<32 and szpix%2 != 0):
+ raise ValueError(err)
+
+ # End argument validation
+
+ if (chunks is True) or \
+ (chunks is None and any((shuffle, fletcher32, compression, maxshape))):
+ if shape == ():
+ raise TypeError("Compression cannot be used with scalar datasets")
+ chunks = guess_chunk(shape, dtype.itemsize)
+
+ if maxshape is True:
+ maxshape = (None,)*len(shape)
+
+ plist = h5p.create(h5p.DATASET_CREATE)
+ if chunks is not None:
+ plist.set_chunk(chunks)
+ plist.set_fill_time(h5d.FILL_TIME_ALLOC)
+
+ if shuffle:
+ plist.set_shuffle()
+
+ if compression == 'gzip':
+ plist.set_deflate(gzip_level)
+ elif compression == 'lzf':
+ plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
+ elif compression == 'szip':
+ opts = {'ec': h5z.SZIP_EC_OPTION_MASK, 'nn': h5z.SZIP_NN_OPTION_MASK}
+ plist.set_szip(opts[szmethod], szpix)
+
+ if fletcher32:
+ plist.set_fletcher32()
+
+ return plist
+
+def get_filters(plist):
+ """ Extract a dictionary of active filters from a DCPL, along with
+ their settings
+ """
+
+ filters = {h5z.FILTER_DEFLATE: 'gzip', h5z.FILTER_SZIP: 'szip',
+ h5z.FILTER_SHUFFLE: 'shuffle', h5z.FILTER_FLETCHER32: 'fletcher32',
+ h5z.FILTER_LZF: 'lzf'}
+
+ pipeline = {}
+
+ nfilters = plist.get_nfilters()
+
+ for i in range(nfilters):
+ code, flags, vals, desc = plist.get_filter(i)
+ if len(vals) == 0:
+ vals = None
+ elif len(vals) == 1:
+ vals = vals[0]
+ pipeline[filters.get(code, str(code))] = vals
+
+ return pipeline
+
def guess_chunk(shape, typesize):
""" Guess an appropriate chunk layout for a dataset, given its shape and
the size of each element in bytes. Will allocate chunks only as large
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list