[h5py] 175/455: Implement LZF compression filter
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:29 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.
commit 7ce4bacdcf3fe10b2300b42af556bc0eeef63e57
Author: andrewcollette <andrew.collette at gmail.com>
Date: Wed Dec 3 06:38:42 2008 +0000
Implement LZF compression filter
---
h5py/defs.pxd | 5 +
h5py/h5.pyx | 2 +
h5py/h5z.pyx | 3 +-
h5py/highlevel.py | 10 +-
h5py/lzf/lzf.h | 100 ++++++++++++++++++
h5py/lzf/lzfP.h | 159 +++++++++++++++++++++++++++++
h5py/lzf/lzf_c.c | 295 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
h5py/lzf/lzf_d.c | 148 +++++++++++++++++++++++++++
h5py/lzf_filter.c | 166 ++++++++++++++++++++++++++++++
h5py/lzf_filter.h | 26 +++++
setup.py | 9 +-
11 files changed, 918 insertions(+), 5 deletions(-)
diff --git a/h5py/defs.pxd b/h5py/defs.pxd
index 9116dd2..7f41c26 100644
--- a/h5py/defs.pxd
+++ b/h5py/defs.pxd
@@ -68,6 +68,11 @@ cdef extern from "compat.h":
size_t h5py_offset_n128_real
size_t h5py_offset_n128_imag
+cdef extern from "lzf_filter.h":
+
+ int H5PY_FILTER_LZF
+ int register_lzf() except *
+
# === H5 - Common definitions and library functions ===========================
cdef extern from "hdf5.h":
diff --git a/h5py/h5.pyx b/h5py/h5.pyx
index 7e2539b..7fbe03f 100644
--- a/h5py/h5.pyx
+++ b/h5py/h5.pyx
@@ -709,6 +709,8 @@ cdef int init_hdf5() except -1:
raise RuntimeError("Failed to initialize the HDF5 library.")
if H5Eset_auto(err_callback, NULL) < 0:
raise RuntimeError("Failed to register HDF5 exception callback.")
+ if register_lzf() < 0:
+ raise RuntimeError("Failed to register LZF filter")
atexit.register(_exithack)
hdf5_inited = 1
diff --git a/h5py/h5z.pyx b/h5py/h5z.pyx
index 656c335..f4c288f 100644
--- a/h5py/h5z.pyx
+++ b/h5py/h5z.pyx
@@ -24,9 +24,10 @@ from h5 cimport init_hdf5
# Initialization
init_hdf5()
-
# === Public constants and data structures ====================================
+FILTER_LZF = H5PY_FILTER_LZF
+
FILTER_ERROR = H5Z_FILTER_ERROR
FILTER_NONE = H5Z_FILTER_NONE
FILTER_ALL = H5Z_FILTER_ALL
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index dab04af..354274f 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -625,6 +625,9 @@ class Dataset(HLObject):
filt = self._plist.get_filter_by_id(h5z.FILTER_DEFLATE)
if filt is not None:
return filt[1][0]
+ filt = self._plist.get_filter_by_id(h5z.FILTER_LZF)
+ if filt is not None:
+ return 'lzf'
return None
@property
@@ -732,7 +735,12 @@ class Dataset(HLObject):
if compression:
if compression is True:
compression = 6
- plist.set_deflate(compression)
+ if compression in range(10):
+ plist.set_deflate(compression)
+ elif compression == 'lzf':
+ plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
+ else:
+ raise ValueError('Compression must be 0-9 or "lzf"')
if fletcher32:
plist.set_fletcher32()
diff --git a/h5py/lzf/lzf.h b/h5py/lzf/lzf.h
new file mode 100644
index 0000000..919b6e6
--- /dev/null
+++ b/h5py/lzf/lzf.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp at schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZF_H
+#define LZF_H
+
+/***********************************************************************
+**
+** lzf -- an extremely fast/free compression/decompression-method
+** http://liblzf.plan9.de/
+**
+** This algorithm is believed to be patent-free.
+**
+***********************************************************************/
+
+#define LZF_VERSION 0x0105 /* 1.5, API version */
+
+/*
+ * Compress in_len bytes stored at the memory block starting at
+ * in_data and write the result to out_data, up to a maximum length
+ * of out_len bytes.
+ *
+ * If the output buffer is not large enough or any error occurs return 0,
+ * otherwise return the number of bytes used, which might be considerably
+ * more than in_len (but less than 104% of the original size), so it
+ * makes sense to always use out_len == in_len - 1), to ensure _some_
+ * compression, and store the data uncompressed otherwise (with a flag, of
+ * course.
+ *
+ * lzf_compress might use different algorithms on different systems and
+ * even different runs, thus might result in different compressed strings
+ * depending on the phase of the moon or similar factors. However, all
+ * these strings are architecture-independent and will result in the
+ * original data when decompressed using lzf_decompress.
+ *
+ * The buffers must not be overlapping.
+ *
+ * If the option LZF_STATE_ARG is enabled, an extra argument must be
+ * supplied which is not reflected in this header file. Refer to lzfP.h
+ * and lzf_c.c.
+ *
+ */
+unsigned int
+lzf_compress (const void *const in_data, unsigned int in_len,
+ void *out_data, unsigned int out_len);
+
+/*
+ * Decompress data compressed with some version of the lzf_compress
+ * function and stored at location in_data and length in_len. The result
+ * will be stored at out_data up to a maximum of out_len characters.
+ *
+ * If the output buffer is not large enough to hold the decompressed
+ * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
+ * of decompressed bytes (i.e. the original length of the data) is
+ * returned.
+ *
+ * If an error in the compressed data is detected, a zero is returned and
+ * errno is set to EINVAL.
+ *
+ * This function is very fast, about as fast as a copying loop.
+ */
+unsigned int
+lzf_decompress (const void *const in_data, unsigned int in_len,
+ void *out_data, unsigned int out_len);
+
+#endif
+
diff --git a/h5py/lzf/lzfP.h b/h5py/lzf/lzfP.h
new file mode 100644
index 0000000..d533f18
--- /dev/null
+++ b/h5py/lzf/lzfP.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp at schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZFP_h
+#define LZFP_h
+
+#define STANDALONE 1 /* at the moment, this is ok. */
+
+#ifndef STANDALONE
+# include "lzf.h"
+#endif
+
+/*
+ * Size of hashtable is (1 << HLOG) * sizeof (char *)
+ * decompression is independent of the hash table size
+ * the difference between 15 and 14 is very small
+ * for small blocks (and 14 is usually a bit faster).
+ * For a low-memory/faster configuration, use HLOG == 13;
+ * For best compression, use 15 or 16 (or more, up to 23).
+ */
+#ifndef HLOG
+# define HLOG 16
+#endif
+
+/*
+ * Sacrifice very little compression quality in favour of compression speed.
+ * This gives almost the same compression as the default code, and is
+ * (very roughly) 15% faster. This is the preferred mode of operation.
+ */
+#ifndef VERY_FAST
+# define VERY_FAST 1
+#endif
+
+/*
+ * Sacrifice some more compression quality in favour of compression speed.
+ * (roughly 1-2% worse compression for large blocks and
+ * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
+ * In short: when in need for speed, enable this for binary data,
+ * possibly disable this for text data.
+ */
+#ifndef ULTRA_FAST
+# define ULTRA_FAST 0
+#endif
+
+/*
+ * Unconditionally aligning does not cost very much, so do it if unsure
+ */
+#ifndef STRICT_ALIGN
+# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
+#endif
+
+/*
+ * You may choose to pre-set the hash table (might be faster on some
+ * modern cpus and large (>>64k) blocks, and also makes compression
+ * deterministic/repeatable when the configuration otherwise is the same).
+ */
+#ifndef INIT_HTAB
+# define INIT_HTAB 0
+#endif
+
+/*
+ * Avoid assigning values to errno variable? for some embedding purposes
+ * (linux kernel for example), this is neccessary. NOTE: this breaks
+ * the documentation in lzf.h.
+ */
+#ifndef AVOID_ERRNO
+# define AVOID_ERRNO 0
+#endif
+
+/*
+ * Wether to pass the LZF_STATE variable as argument, or allocate it
+ * on the stack. For small-stack environments, define this to 1.
+ * NOTE: this breaks the prototype in lzf.h.
+ */
+#ifndef LZF_STATE_ARG
+# define LZF_STATE_ARG 0
+#endif
+
+/*
+ * Wether to add extra checks for input validity in lzf_decompress
+ * and return EINVAL if the input stream has been corrupted. This
+ * only shields against overflowing the input buffer and will not
+ * detect most corrupted streams.
+ * This check is not normally noticable on modern hardware
+ * (<1% slowdown), but might slow down older cpus considerably.
+ */
+#ifndef CHECK_INPUT
+# define CHECK_INPUT 1
+#endif
+
+/*****************************************************************************/
+/* nothing should be changed below */
+
+typedef unsigned char u8;
+
+typedef const u8 *LZF_STATE[1 << (HLOG)];
+
+#if !STRICT_ALIGN
+/* for unaligned accesses we need a 16 bit datatype. */
+# include <limits.h>
+# if USHRT_MAX == 65535
+ typedef unsigned short u16;
+# elif UINT_MAX == 65535
+ typedef unsigned int u16;
+# else
+# undef STRICT_ALIGN
+# define STRICT_ALIGN 1
+# endif
+#endif
+
+#if ULTRA_FAST
+# if defined(VERY_FAST)
+# undef VERY_FAST
+# endif
+#endif
+
+#if INIT_HTAB
+# ifdef __cplusplus
+# include <cstring>
+# else
+# include <string.h>
+# endif
+#endif
+
+#endif
+
diff --git a/h5py/lzf/lzf_c.c b/h5py/lzf/lzf_c.c
new file mode 100644
index 0000000..99dab09
--- /dev/null
+++ b/h5py/lzf/lzf_c.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp at schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#define HSIZE (1 << (HLOG))
+
+/*
+ * don't play with this unless you benchmark!
+ * decompression is not dependent on the hash function
+ * the hashing function might seem strange, just believe me
+ * it works ;)
+ */
+#ifndef FRST
+# define FRST(p) (((p[0]) << 8) | p[1])
+# define NEXT(v,p) (((v) << 8) | p[2])
+# if ULTRA_FAST
+# define IDX(h) ((( h >> (3*8 - HLOG)) - h ) & (HSIZE - 1))
+# elif VERY_FAST
+# define IDX(h) ((( h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# else
+# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# endif
+#endif
+/*
+ * IDX works because it is very similar to a multiplicative hash, e.g.
+ * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
+ * the latter is also quite fast on newer CPUs, and compresses similarly.
+ *
+ * the next one is also quite good, albeit slow ;)
+ * (int)(cos(h & 0xffffff) * 1e6)
+ */
+
+#if 0
+/* original lzv-like hash function, much worse and thus slower */
+# define FRST(p) (p[0] << 5) ^ p[1]
+# define NEXT(v,p) ((v) << 5) ^ p[2]
+# define IDX(h) ((h) & (HSIZE - 1))
+#endif
+
+#define MAX_LIT (1 << 5)
+#define MAX_OFF (1 << 13)
+#define MAX_REF ((1 << 8) + (1 << 3))
+
+#if __GNUC__ >= 3
+# define expect(expr,value) __builtin_expect ((expr),(value))
+# define inline inline
+#else
+# define expect(expr,value) (expr)
+# define inline static
+#endif
+
+#define expect_false(expr) expect ((expr) != 0, 0)
+#define expect_true(expr) expect ((expr) != 0, 1)
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1> ; literal
+ * LLLooooo oooooooo ; backref L
+ * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ *
+ */
+
+unsigned int
+lzf_compress (const void *const in_data, unsigned int in_len,
+ void *out_data, unsigned int out_len
+#if LZF_STATE_ARG
+ , LZF_STATE htab
+#endif
+ )
+{
+#if !LZF_STATE_ARG
+ LZF_STATE htab;
+#endif
+ const u8 **hslot;
+ const u8 *ip = (const u8 *)in_data;
+ u8 *op = (u8 *)out_data;
+ const u8 *in_end = ip + in_len;
+ u8 *out_end = op + out_len;
+ const u8 *ref;
+
+ /* off requires a type wide enough to hold a general pointer difference.
+ * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
+ * works for differences within a single object). We also assume that no
+ * no bit pattern traps. Since the only platform that is both non-POSIX
+ * and fails to support both assumptions is windows 64 bit, we make a
+ * special workaround for it.
+ */
+#if defined (WIN32) && defined (_M_X64)
+ unsigned _int64 off; /* workaround for missing POSIX compliance */
+#else
+ unsigned long off;
+#endif
+ unsigned int hval;
+ int lit;
+
+ if (!in_len || !out_len)
+ return 0;
+
+#if INIT_HTAB
+ memset (htab, 0, sizeof (htab));
+# if 0
+ for (hslot = htab; hslot < htab + HSIZE; hslot++)
+ *hslot++ = ip;
+# endif
+#endif
+
+ lit = 0; op++; /* start run */
+
+ hval = FRST (ip);
+ while (ip < in_end - 2)
+ {
+ hval = NEXT (hval, ip);
+ hslot = htab + IDX (hval);
+ ref = *hslot; *hslot = ip;
+
+ if (1
+#if INIT_HTAB
+ && ref < ip /* the next test will actually take care of this, but this is faster */
+#endif
+ && (off = ip - ref - 1) < MAX_OFF
+ && ip + 4 < in_end
+ && ref > (u8 *)in_data
+#if STRICT_ALIGN
+ && ref[0] == ip[0]
+ && ref[1] == ip[1]
+ && ref[2] == ip[2]
+#else
+ && *(u16 *)ref == *(u16 *)ip
+ && ref[2] == ip[2]
+#endif
+ )
+ {
+ /* match found at *ref++ */
+ unsigned int len = 2;
+ unsigned int maxlen = in_end - ip - len;
+ maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+
+ op [- lit - 1] = lit - 1; /* stop run */
+ op -= !lit; /* undo run if length is zero */
+
+ if (expect_false (op + 3 + 1 >= out_end))
+ return 0;
+
+ for (;;)
+ {
+ if (expect_true (maxlen > 16))
+ {
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ len++; if (ref [len] != ip [len]) break;
+ }
+
+ do
+ len++;
+ while (len < maxlen && ref[len] == ip[len]);
+
+ break;
+ }
+
+ len -= 2; /* len is now #octets - 1 */
+ ip++;
+
+ if (len < 7)
+ {
+ *op++ = (off >> 8) + (len << 5);
+ }
+ else
+ {
+ *op++ = (off >> 8) + ( 7 << 5);
+ *op++ = len - 7;
+ }
+
+ *op++ = off;
+ lit = 0; op++; /* start run */
+
+ ip += len + 1;
+
+ if (expect_false (ip >= in_end - 2))
+ break;
+
+#if ULTRA_FAST || VERY_FAST
+ --ip;
+# if VERY_FAST && !ULTRA_FAST
+ --ip;
+# endif
+ hval = FRST (ip);
+
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip;
+ ip++;
+
+# if VERY_FAST && !ULTRA_FAST
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip;
+ ip++;
+# endif
+#else
+ ip -= len + 1;
+
+ do
+ {
+ hval = NEXT (hval, ip);
+ htab[IDX (hval)] = ip;
+ ip++;
+ }
+ while (len--);
+#endif
+ }
+ else
+ {
+ /* one more literal byte we must copy */
+ if (expect_false (op >= out_end))
+ return 0;
+
+ lit++; *op++ = *ip++;
+
+ if (expect_false (lit == MAX_LIT))
+ {
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
+ }
+ }
+ }
+
+ if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+ return 0;
+
+ while (ip < in_end)
+ {
+ lit++; *op++ = *ip++;
+
+ if (expect_false (lit == MAX_LIT))
+ {
+ op [- lit - 1] = lit - 1; /* stop run */
+ lit = 0; op++; /* start run */
+ }
+ }
+
+ op [- lit - 1] = lit - 1; /* end run */
+ op -= !lit; /* undo run if length is zero */
+
+ return op - (u8 *)out_data;
+}
+
diff --git a/h5py/lzf/lzf_d.c b/h5py/lzf/lzf_d.c
new file mode 100644
index 0000000..9e2cd82
--- /dev/null
+++ b/h5py/lzf/lzf_d.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp at schmorp.de>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#if AVOID_ERRNO
+# define SET_ERRNO(n)
+#else
+# include <errno.h>
+# define SET_ERRNO(n) errno = (n)
+#endif
+
+#if (__i386 || __amd64) && __GNUC__ >= 3
+# define lzf_movsb(dst, src, len) \
+ asm ("rep movsb" \
+ : "=D" (dst), "=S" (src), "=c" (len) \
+ : "0" (dst), "1" (src), "2" (len));
+#endif
+
+unsigned int
+lzf_decompress (const void *const in_data, unsigned int in_len,
+ void *out_data, unsigned int out_len)
+{
+ u8 const *ip = (const u8 *)in_data;
+ u8 *op = (u8 *)out_data;
+ u8 const *const in_end = ip + in_len;
+ u8 *const out_end = op + out_len;
+
+ do
+ {
+ unsigned int ctrl = *ip++;
+
+ if (ctrl < (1 << 5)) /* literal run */
+ {
+ ctrl++;
+
+ if (op + ctrl > out_end)
+ {
+ SET_ERRNO (E2BIG);
+ return 0;
+ }
+
+#if CHECK_INPUT
+ if (ip + ctrl > in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+
+#ifdef lzf_movsb
+ lzf_movsb (op, ip, ctrl);
+#else
+ do
+ *op++ = *ip++;
+ while (--ctrl);
+#endif
+ }
+ else /* back reference */
+ {
+ unsigned int len = ctrl >> 5;
+
+ u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
+
+#if CHECK_INPUT
+ if (ip >= in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+ if (len == 7)
+ {
+ len += *ip++;
+#if CHECK_INPUT
+ if (ip >= in_end)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+#endif
+ }
+
+ ref -= *ip++;
+
+ if (op + len + 2 > out_end)
+ {
+ SET_ERRNO (E2BIG);
+ return 0;
+ }
+
+ if (ref < (u8 *)out_data)
+ {
+ SET_ERRNO (EINVAL);
+ return 0;
+ }
+
+#ifdef lzf_movsb
+ len += 2;
+ lzf_movsb (op, ref, len);
+#else
+ *op++ = *ref++;
+ *op++ = *ref++;
+
+ do
+ *op++ = *ref++;
+ while (--len);
+#endif
+ }
+ }
+ while (ip < in_end);
+
+ return op - (u8 *)out_data;
+}
+
diff --git a/h5py/lzf_filter.c b/h5py/lzf_filter.c
new file mode 100644
index 0000000..62153ad
--- /dev/null
+++ b/h5py/lzf_filter.c
@@ -0,0 +1,166 @@
+/***** Preamble block *********************************************************
+*
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+*
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD (See LICENSE.txt for full license)
+*
+* $Date$
+*
+****** End preamble block ****************************************************/
+
+/*
+ Implements an LZF filter module for HDF5, using the BSD-licensed library
+ by Marc Alexander Lehmann (http://www.goof.com/pcg/marc/liblzf.html).
+
+ No Python-specific code is used. The filter behaves like the DEFLATE
+ filter, in that it is called for every type and space, and returns 0
+ if the data cannot be compressed.
+
+ The only public function is (int) register_lzf(void), which passes on
+ the result from H5Zregister.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include "hdf5.h"
+#include "lzf/lzf.h"
+#include "lzf_filter.h"
+
+/* In HDF5, one filter function handles both compression and decompression */
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+ const unsigned cd_values[], size_t nbytes,
+ size_t *buf_size, void **buf);
+
+
+/* Try to register the filter, passing on the HDF5 return value */
+int register_lzf(void){
+
+/* Thanks to PyTables for this */
+#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 7
+ /* 1.6.x */
+ H5Z_class_t filter_class = {
+ (H5Z_filter_t)(H5PY_FILTER_LZF), /* filter_id */
+ "lzf", /* comment */
+ NULL, /* can_apply_func */
+ NULL, /* set_local_func */
+ (H5Z_func_t)(lzf_filter) /* filter_func */
+ };
+#else
+ /* 1.7.x */
+ H5Z_class_t filter_class = {
+ H5Z_CLASS_T_VERS, /* H5Z_class_t version */
+ (H5Z_filter_t)(H5PY_FILTER_LZF), /* filter_id */
+ 1, 1, /* Encoding and decoding enabled */
+ "lzf", /* comment */
+ NULL, /* can_apply_func */
+ NULL, /* set_local_func */
+ (H5Z_func_t)(lzf_filter) /* filter_func */
+ };
+#endif /* if H5_VERSION < "1.7" */
+
+ return H5Zregister(&filter_class);
+}
+
+#define H5PY_LZF_MAX_BUF (100L*1024L*1024L) /* 100MB chunks are outrageous */
+
+static size_t historical_buf_size = 0;
+
+/* The filter function */
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+ const unsigned cd_values[], size_t nbytes,
+ size_t *buf_size, void **buf){
+
+ void* outbuf = NULL;
+ size_t outbuf_size = 0;
+ unsigned int status = 0; /* Return code from lzf routines */
+
+
+ /* If we're compressing */
+ if(!(flags & H5Z_FLAG_REVERSE)){
+
+ /* Allocate an output buffer exactly as long as the input data; if
+ the result is larger, we simply return 0.
+ */
+ outbuf_size = nbytes;
+ outbuf = malloc(outbuf_size);
+
+ status = lzf_compress(*buf, nbytes, outbuf, outbuf_size);
+
+ /* If we're decompressing */
+ } else {
+
+ /* Initialize to our last guess */
+ if(historical_buf_size == 0){
+ historical_buf_size = *buf_size;
+ }
+ outbuf_size = historical_buf_size;
+
+ while(!status){
+
+ free(outbuf);
+ outbuf = malloc(outbuf_size);
+
+ status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
+
+ /* compression failed */
+ if(!status){
+
+ /* Output buffer too small */
+ if(errno == E2BIG){
+ outbuf_size += (*buf_size);
+ if(outbuf_size > H5PY_LZF_MAX_BUF){
+ fprintf(stderr, "Can't allocate buffer for LZF decompression");
+ goto failed;
+ }
+ historical_buf_size = outbuf_size;
+
+ /* Horrible internal error */
+ } else if(errno == EINVAL) {
+ fprintf(stderr, "LZF decompression error");
+ goto failed;
+
+ /* Unknown error */
+ } else {
+ fprintf(stderr, "Unspecified LZF error %d", errno);
+ goto failed;
+ }
+
+ } /* if !status */
+
+ } /* while !status */
+
+ } /* if decompressing */
+
+
+ /* If compression/decompression successful, swap buffers */
+ if(status){
+
+ free(*buf);
+ *buf = outbuf;
+ *buf_size = outbuf_size;
+
+ return status; /* Size of compressed/decompressed data */
+ }
+
+ failed:
+ /* Could put a Python exception call here */
+ free(outbuf);
+ return 0;
+
+} /* End filter function */
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/h5py/lzf_filter.h b/h5py/lzf_filter.h
new file mode 100644
index 0000000..9e8951c
--- /dev/null
+++ b/h5py/lzf_filter.h
@@ -0,0 +1,26 @@
+/***** Preamble block *********************************************************
+*
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+*
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD (See LICENSE.txt for full license)
+*
+* $Date$
+*
+****** End preamble block ****************************************************/
+
+/*
+ Filter code is chosen in an ad-hoc manner to avoid conflict
+ with PyTables LZO/BZIP2 implementation.
+*/
+
+#ifndef H5PY_LZF_FILTER
+#define H5PY_LZF_FILTER
+
+#define H5PY_FILTER_LZF 315
+
+int register_lzf(void);
+
+#endif
+
diff --git a/setup.py b/setup.py
index 314b860..2f626ef 100644
--- a/setup.py
+++ b/setup.py
@@ -48,7 +48,7 @@ from distutils.cmd import Command
# Basic package options
NAME = 'h5py' # Software title
-VERSION = '1.0.0'
+VERSION = '1.1.0'
MIN_NUMPY = '1.0.3'
MIN_CYTHON = '0.9.8.1.1'
SRC_PATH = 'h5py' # Name of directory with .pyx files
@@ -61,6 +61,7 @@ MODULES = {16: ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5z',
'h5i', 'h5r', 'h5fd', 'utils'],
18: ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5z',
'h5i', 'h5r', 'h5fd', 'utils', 'h5o', 'h5l']}
+EXTRA_SRC = {'h5': ["lzf_filter.c", "lzf/lzf_c.c", "lzf/lzf_d.c"]}
def version_check(vers, required):
""" Compare versions between two "."-separated strings. """
@@ -151,11 +152,13 @@ class ExtensionCreator(object):
self.extra_link_args = []
- def create_extension(self, name, extra_src=[]):
+ def create_extension(self, name, extra_src=None):
""" Create a distutils Extension object for the given module. A list
of C source files to be included in the compilation can also be
provided.
"""
+ if extra_src is None:
+ extra_src = []
sources = [op.join(SRC_PATH, name+'.c')]+[op.join(SRC_PATH,x) for x in extra_src]
return Extension(NAME+'.'+name,
sources,
@@ -230,7 +233,7 @@ class cybuild(build):
modules = MODULES[self.api]
creator = ExtensionCreator(self.hdf5)
- extensions = [creator.create_extension(x) for x in modules]
+ extensions = [creator.create_extension(x, EXTRA_SRC.get(x, None)) for x in modules]
self.distribution.ext_modules = extensions
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git
More information about the debian-science-commits
mailing list