[h5py] 175/455: Implement LZF compression filter

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 2 18:19:29 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to annotated tag 1.3.0
in repository h5py.

commit 7ce4bacdcf3fe10b2300b42af556bc0eeef63e57
Author: andrewcollette <andrew.collette at gmail.com>
Date:   Wed Dec 3 06:38:42 2008 +0000

    Implement LZF compression filter
---
 h5py/defs.pxd     |   5 +
 h5py/h5.pyx       |   2 +
 h5py/h5z.pyx      |   3 +-
 h5py/highlevel.py |  10 +-
 h5py/lzf/lzf.h    | 100 ++++++++++++++++++
 h5py/lzf/lzfP.h   | 159 +++++++++++++++++++++++++++++
 h5py/lzf/lzf_c.c  | 295 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 h5py/lzf/lzf_d.c  | 148 +++++++++++++++++++++++++++
 h5py/lzf_filter.c | 166 ++++++++++++++++++++++++++++++
 h5py/lzf_filter.h |  26 +++++
 setup.py          |   9 +-
 11 files changed, 918 insertions(+), 5 deletions(-)

diff --git a/h5py/defs.pxd b/h5py/defs.pxd
index 9116dd2..7f41c26 100644
--- a/h5py/defs.pxd
+++ b/h5py/defs.pxd
@@ -68,6 +68,11 @@ cdef extern from "compat.h":
   size_t h5py_offset_n128_real
   size_t h5py_offset_n128_imag
 
+cdef extern from "lzf_filter.h":
+
+  int H5PY_FILTER_LZF
+  int register_lzf() except *
+
 # === H5 - Common definitions and library functions ===========================
 
 cdef extern from "hdf5.h":
diff --git a/h5py/h5.pyx b/h5py/h5.pyx
index 7e2539b..7fbe03f 100644
--- a/h5py/h5.pyx
+++ b/h5py/h5.pyx
@@ -709,6 +709,8 @@ cdef int init_hdf5() except -1:
             raise RuntimeError("Failed to initialize the HDF5 library.")
         if H5Eset_auto(err_callback, NULL) < 0:
             raise RuntimeError("Failed to register HDF5 exception callback.")
+        if register_lzf() < 0:
+            raise RuntimeError("Failed to register LZF filter")
         atexit.register(_exithack)
         hdf5_inited = 1
 
diff --git a/h5py/h5z.pyx b/h5py/h5z.pyx
index 656c335..f4c288f 100644
--- a/h5py/h5z.pyx
+++ b/h5py/h5z.pyx
@@ -24,9 +24,10 @@ from h5 cimport init_hdf5
 # Initialization
 init_hdf5()
 
-
 # === Public constants and data structures ====================================
 
+FILTER_LZF = H5PY_FILTER_LZF
+
 FILTER_ERROR    = H5Z_FILTER_ERROR
 FILTER_NONE     = H5Z_FILTER_NONE
 FILTER_ALL      = H5Z_FILTER_ALL
diff --git a/h5py/highlevel.py b/h5py/highlevel.py
index dab04af..354274f 100644
--- a/h5py/highlevel.py
+++ b/h5py/highlevel.py
@@ -625,6 +625,9 @@ class Dataset(HLObject):
         filt = self._plist.get_filter_by_id(h5z.FILTER_DEFLATE)
         if filt is not None:
             return filt[1][0]
+        filt = self._plist.get_filter_by_id(h5z.FILTER_LZF)
+        if filt is not None:
+            return 'lzf'
         return None
 
     @property
@@ -732,7 +735,12 @@ class Dataset(HLObject):
                 if compression:
                     if compression is True:
                         compression = 6
-                    plist.set_deflate(compression)
+                    if compression in range(10):
+                        plist.set_deflate(compression)
+                    elif compression == 'lzf':
+                        plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
+                    else:
+                        raise ValueError('Compression must be 0-9 or "lzf"')
                 if fletcher32:
                     plist.set_fletcher32()
 
diff --git a/h5py/lzf/lzf.h b/h5py/lzf/lzf.h
new file mode 100644
index 0000000..919b6e6
--- /dev/null
+++ b/h5py/lzf/lzf.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp at schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZF_H
+#define LZF_H
+
+/***********************************************************************
+**
+**	lzf -- an extremely fast/free compression/decompression-method
+**	http://liblzf.plan9.de/
+**
+**	This algorithm is believed to be patent-free.
+**
+***********************************************************************/
+
+#define LZF_VERSION 0x0105 /* 1.5, API version */
+
+/*
+ * Compress in_len bytes stored at the memory block starting at
+ * in_data and write the result to out_data, up to a maximum length
+ * of out_len bytes.
+ *
+ * If the output buffer is not large enough or any error occurs return 0,
+ * otherwise return the number of bytes used, which might be considerably
+ * more than in_len (but less than 104% of the original size), so it
+ * makes sense to always use out_len == in_len - 1), to ensure _some_
+ * compression, and store the data uncompressed otherwise (with a flag, of
+ * course.
+ *
+ * lzf_compress might use different algorithms on different systems and
+ * even different runs, thus might result in different compressed strings
+ * depending on the phase of the moon or similar factors. However, all
+ * these strings are architecture-independent and will result in the
+ * original data when decompressed using lzf_decompress.
+ *
+ * The buffers must not be overlapping.
+ *
+ * If the option LZF_STATE_ARG is enabled, an extra argument must be
+ * supplied which is not reflected in this header file. Refer to lzfP.h
+ * and lzf_c.c.
+ *
+ */
+unsigned int 
+lzf_compress (const void *const in_data,  unsigned int in_len,
+              void             *out_data, unsigned int out_len);
+
+/*
+ * Decompress data compressed with some version of the lzf_compress
+ * function and stored at location in_data and length in_len. The result
+ * will be stored at out_data up to a maximum of out_len characters.
+ *
+ * If the output buffer is not large enough to hold the decompressed
+ * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
+ * of decompressed bytes (i.e. the original length of the data) is
+ * returned.
+ *
+ * If an error in the compressed data is detected, a zero is returned and
+ * errno is set to EINVAL.
+ *
+ * This function is very fast, about as fast as a copying loop.
+ */
+unsigned int 
+lzf_decompress (const void *const in_data,  unsigned int in_len,
+                void             *out_data, unsigned int out_len);
+
+#endif
+
diff --git a/h5py/lzf/lzfP.h b/h5py/lzf/lzfP.h
new file mode 100644
index 0000000..d533f18
--- /dev/null
+++ b/h5py/lzf/lzfP.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp at schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZFP_h
+#define LZFP_h
+
+#define STANDALONE 1 /* at the moment, this is ok. */
+
+#ifndef STANDALONE
+# include "lzf.h"
+#endif
+
+/*
+ * Size of hashtable is (1 << HLOG) * sizeof (char *)
+ * decompression is independent of the hash table size
+ * the difference between 15 and 14 is very small
+ * for small blocks (and 14 is usually a bit faster).
+ * For a low-memory/faster configuration, use HLOG == 13;
+ * For best compression, use 15 or 16 (or more, up to 23).
+ */
+#ifndef HLOG
+# define HLOG 16
+#endif
+
+/*
+ * Sacrifice very little compression quality in favour of compression speed.
+ * This gives almost the same compression as the default code, and is
+ * (very roughly) 15% faster. This is the preferred mode of operation.
+ */
+#ifndef VERY_FAST
+# define VERY_FAST 1
+#endif
+
+/*
+ * Sacrifice some more compression quality in favour of compression speed.
+ * (roughly 1-2% worse compression for large blocks and
+ * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
+ * In short: when in need for speed, enable this for binary data,
+ * possibly disable this for text data.
+ */
+#ifndef ULTRA_FAST
+# define ULTRA_FAST 0
+#endif
+
+/*
+ * Unconditionally aligning does not cost very much, so do it if unsure
+ */
+#ifndef STRICT_ALIGN
+# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
+#endif
+
+/*
+ * You may choose to pre-set the hash table (might be faster on some
+ * modern cpus and large (>>64k) blocks, and also makes compression
+ * deterministic/repeatable when the configuration otherwise is the same).
+ */
+#ifndef INIT_HTAB
+# define INIT_HTAB 0
+#endif
+
+/*
+ * Avoid assigning values to errno variable? for some embedding purposes
+ * (linux kernel for example), this is neccessary. NOTE: this breaks
+ * the documentation in lzf.h.
+ */
+#ifndef AVOID_ERRNO
+# define AVOID_ERRNO 0
+#endif
+
+/*
+ * Wether to pass the LZF_STATE variable as argument, or allocate it
+ * on the stack. For small-stack environments, define this to 1.
+ * NOTE: this breaks the prototype in lzf.h.
+ */
+#ifndef LZF_STATE_ARG
+# define LZF_STATE_ARG 0
+#endif
+
+/*
+ * Wether to add extra checks for input validity in lzf_decompress
+ * and return EINVAL if the input stream has been corrupted. This
+ * only shields against overflowing the input buffer and will not
+ * detect most corrupted streams.
+ * This check is not normally noticable on modern hardware
+ * (<1% slowdown), but might slow down older cpus considerably.
+ */
+#ifndef CHECK_INPUT
+# define CHECK_INPUT 1
+#endif
+
+/*****************************************************************************/
+/* nothing should be changed below */
+
+typedef unsigned char u8;
+
+typedef const u8 *LZF_STATE[1 << (HLOG)];
+
+#if !STRICT_ALIGN
+/* for unaligned accesses we need a 16 bit datatype. */
+# include <limits.h>
+# if USHRT_MAX == 65535
+    typedef unsigned short u16;
+# elif UINT_MAX == 65535
+    typedef unsigned int u16;
+# else
+#  undef STRICT_ALIGN
+#  define STRICT_ALIGN 1
+# endif
+#endif
+
+#if ULTRA_FAST
+# if defined(VERY_FAST)
+#  undef VERY_FAST
+# endif
+#endif
+
+#if INIT_HTAB
+# ifdef __cplusplus
+#  include <cstring>
+# else
+#  include <string.h>
+# endif
+#endif
+
+#endif
+
diff --git a/h5py/lzf/lzf_c.c b/h5py/lzf/lzf_c.c
new file mode 100644
index 0000000..99dab09
--- /dev/null
+++ b/h5py/lzf/lzf_c.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp at schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#define HSIZE (1 << (HLOG))
+
+/*
+ * don't play with this unless you benchmark!
+ * decompression is not dependent on the hash function
+ * the hashing function might seem strange, just believe me
+ * it works ;)
+ */
+#ifndef FRST
+# define FRST(p) (((p[0]) << 8) | p[1])
+# define NEXT(v,p) (((v) << 8) | p[2])
+# if ULTRA_FAST
+#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h  ) & (HSIZE - 1))
+# elif VERY_FAST
+#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# else
+#  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# endif
+#endif
+/*
+ * IDX works because it is very similar to a multiplicative hash, e.g.
+ * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
+ * the latter is also quite fast on newer CPUs, and compresses similarly.
+ *
+ * the next one is also quite good, albeit slow ;)
+ * (int)(cos(h & 0xffffff) * 1e6)
+ */
+
+#if 0
+/* original lzv-like hash function, much worse and thus slower */
+# define FRST(p) (p[0] << 5) ^ p[1]
+# define NEXT(v,p) ((v) << 5) ^ p[2]
+# define IDX(h) ((h) & (HSIZE - 1))
+#endif
+
+#define        MAX_LIT        (1 <<  5)
+#define        MAX_OFF        (1 << 13)
+#define        MAX_REF        ((1 << 8) + (1 << 3))
+
+#if __GNUC__ >= 3
+# define expect(expr,value)         __builtin_expect ((expr),(value))
+# define inline                     inline
+#else
+# define expect(expr,value)         (expr)
+# define inline                     static
+#endif
+
+#define expect_false(expr) expect ((expr) != 0, 0)
+#define expect_true(expr)  expect ((expr) != 0, 1)
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1>    ; literal
+ * LLLooooo oooooooo ; backref L
+ * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ *
+ */
+
+unsigned int
+lzf_compress (const void *const in_data, unsigned int in_len,
+	      void *out_data, unsigned int out_len
+#if LZF_STATE_ARG
+              , LZF_STATE htab
+#endif
+              )
+{
+#if !LZF_STATE_ARG
+  LZF_STATE htab;
+#endif
+  const u8 **hslot;
+  const u8 *ip = (const u8 *)in_data;
+        u8 *op = (u8 *)out_data;
+  const u8 *in_end  = ip + in_len;
+        u8 *out_end = op + out_len;
+  const u8 *ref;
+
+  /* off requires a type wide enough to hold a general pointer difference.
+   * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
+   * works for differences within a single object). We also assume that no
+   * no bit pattern traps. Since the only platform that is both non-POSIX
+   * and fails to support both assumptions is windows 64 bit, we make a
+   * special workaround for it.
+   */
+#if defined (WIN32) && defined (_M_X64)
+  unsigned _int64 off; /* workaround for missing POSIX compliance */
+#else
+  unsigned long off;
+#endif
+  unsigned int hval;
+  int lit;
+
+  if (!in_len || !out_len)
+    return 0;
+
+#if INIT_HTAB
+  memset (htab, 0, sizeof (htab));
+# if 0
+  for (hslot = htab; hslot < htab + HSIZE; hslot++)
+    *hslot++ = ip;
+# endif
+#endif
+
+  lit = 0; op++; /* start run */
+
+  hval = FRST (ip);
+  while (ip < in_end - 2)
+    {
+      hval = NEXT (hval, ip);
+      hslot = htab + IDX (hval);
+      ref = *hslot; *hslot = ip;
+
+      if (1
+#if INIT_HTAB
+          && ref < ip /* the next test will actually take care of this, but this is faster */
+#endif
+          && (off = ip - ref - 1) < MAX_OFF
+          && ip + 4 < in_end
+          && ref > (u8 *)in_data
+#if STRICT_ALIGN
+          && ref[0] == ip[0]
+          && ref[1] == ip[1]
+          && ref[2] == ip[2]
+#else
+          && *(u16 *)ref == *(u16 *)ip
+          && ref[2] == ip[2]
+#endif
+        )
+        {
+          /* match found at *ref++ */
+          unsigned int len = 2;
+          unsigned int maxlen = in_end - ip - len;
+          maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+
+          op [- lit - 1] = lit - 1; /* stop run */
+          op -= !lit; /* undo run if length is zero */
+
+          if (expect_false (op + 3 + 1 >= out_end))
+            return 0;
+
+          for (;;)
+            {
+              if (expect_true (maxlen > 16))
+                {
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                }
+
+              do
+                len++;
+              while (len < maxlen && ref[len] == ip[len]);
+
+              break;
+            }
+
+          len -= 2; /* len is now #octets - 1 */
+          ip++;
+
+          if (len < 7)
+            {
+              *op++ = (off >> 8) + (len << 5);
+            }
+          else
+            {
+              *op++ = (off >> 8) + (  7 << 5);
+              *op++ = len - 7;
+            }
+
+          *op++ = off;
+          lit = 0; op++; /* start run */
+
+          ip += len + 1;
+
+          if (expect_false (ip >= in_end - 2))
+            break;
+
+#if ULTRA_FAST || VERY_FAST
+          --ip;
+# if VERY_FAST && !ULTRA_FAST
+          --ip;
+# endif
+          hval = FRST (ip);
+
+          hval = NEXT (hval, ip);
+          htab[IDX (hval)] = ip;
+          ip++;
+
+# if VERY_FAST && !ULTRA_FAST
+          hval = NEXT (hval, ip);
+          htab[IDX (hval)] = ip;
+          ip++;
+# endif
+#else
+          ip -= len + 1;
+
+          do
+            {
+              hval = NEXT (hval, ip);
+              htab[IDX (hval)] = ip;
+              ip++;
+            }
+          while (len--);
+#endif
+        }
+      else
+        {
+          /* one more literal byte we must copy */
+          if (expect_false (op >= out_end))
+            return 0;
+
+          lit++; *op++ = *ip++;
+
+          if (expect_false (lit == MAX_LIT))
+            {
+              op [- lit - 1] = lit - 1; /* stop run */
+              lit = 0; op++; /* start run */
+            }
+        }
+    }
+
+  if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+    return 0;
+
+  while (ip < in_end)
+    {
+      lit++; *op++ = *ip++;
+
+      if (expect_false (lit == MAX_LIT))
+        {
+          op [- lit - 1] = lit - 1; /* stop run */
+          lit = 0; op++; /* start run */
+        }
+    }
+
+  op [- lit - 1] = lit - 1; /* end run */
+  op -= !lit; /* undo run if length is zero */
+
+  return op - (u8 *)out_data;
+}
+
diff --git a/h5py/lzf/lzf_d.c b/h5py/lzf/lzf_d.c
new file mode 100644
index 0000000..9e2cd82
--- /dev/null
+++ b/h5py/lzf/lzf_d.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp at schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#if AVOID_ERRNO
+# define SET_ERRNO(n)
+#else
+# include <errno.h>
+# define SET_ERRNO(n) errno = (n)
+#endif
+
+#if (__i386 || __amd64) && __GNUC__ >= 3
+# define lzf_movsb(dst, src, len)                \
+   asm ("rep movsb"                              \
+        : "=D" (dst), "=S" (src), "=c" (len)     \
+        :  "0" (dst),  "1" (src),  "2" (len));
+#endif
+
+unsigned int 
+lzf_decompress (const void *const in_data,  unsigned int in_len,
+                void             *out_data, unsigned int out_len)
+{
+  u8 const *ip = (const u8 *)in_data;
+  u8       *op = (u8 *)out_data;
+  u8 const *const in_end  = ip + in_len;
+  u8       *const out_end = op + out_len;
+
+  do
+    {
+      unsigned int ctrl = *ip++;
+
+      if (ctrl < (1 << 5)) /* literal run */
+        {
+          ctrl++;
+
+          if (op + ctrl > out_end)
+            {
+              SET_ERRNO (E2BIG);
+              return 0;
+            }
+
+#if CHECK_INPUT
+          if (ip + ctrl > in_end)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+#endif
+
+#ifdef lzf_movsb
+          lzf_movsb (op, ip, ctrl);
+#else
+          do
+            *op++ = *ip++;
+          while (--ctrl);
+#endif
+        }
+      else /* back reference */
+        {
+          unsigned int len = ctrl >> 5;
+
+          u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
+
+#if CHECK_INPUT
+          if (ip >= in_end)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+#endif
+          if (len == 7)
+            {
+              len += *ip++;
+#if CHECK_INPUT
+              if (ip >= in_end)
+                {
+                  SET_ERRNO (EINVAL);
+                  return 0;
+                }
+#endif
+            }
+
+          ref -= *ip++;
+
+          if (op + len + 2 > out_end)
+            {
+              SET_ERRNO (E2BIG);
+              return 0;
+            }
+
+          if (ref < (u8 *)out_data)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+
+#ifdef lzf_movsb
+          len += 2;
+          lzf_movsb (op, ref, len);
+#else
+          *op++ = *ref++;
+          *op++ = *ref++;
+
+          do
+            *op++ = *ref++;
+          while (--len);
+#endif
+        }
+    }
+  while (ip < in_end);
+
+  return op - (u8 *)out_data;
+}
+
diff --git a/h5py/lzf_filter.c b/h5py/lzf_filter.c
new file mode 100644
index 0000000..62153ad
--- /dev/null
+++ b/h5py/lzf_filter.c
@@ -0,0 +1,166 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+/*
+    Implements an LZF filter module for HDF5, using the BSD-licensed library
+    by Marc Alexander Lehmann (http://www.goof.com/pcg/marc/liblzf.html).
+
+    No Python-specific code is used.  The filter behaves like the DEFLATE
+    filter, in that it is called for every type and space, and returns 0
+    if the data cannot be compressed.
+
+    The only public function is (int) register_lzf(void), which passes on
+    the result from H5Zregister.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include "hdf5.h"
+#include "lzf/lzf.h"
+#include "lzf_filter.h"
+
+/* In HDF5, one filter function handles both compression and decompression */
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+		    const unsigned cd_values[], size_t nbytes,
+		    size_t *buf_size, void **buf);
+
+
+/* Try to register the filter, passing on the HDF5 return value */
+int register_lzf(void){
+
+/* Thanks to PyTables for this */
+#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 7
+   /* 1.6.x */
+    H5Z_class_t filter_class = {
+        (H5Z_filter_t)(H5PY_FILTER_LZF),    /* filter_id */
+        "lzf",                         /* comment */
+        NULL,                          /* can_apply_func */
+        NULL,                          /* set_local_func */
+        (H5Z_func_t)(lzf_filter)      /* filter_func */
+    };
+#else
+   /* 1.7.x */
+    H5Z_class_t filter_class = {
+        H5Z_CLASS_T_VERS,             /* H5Z_class_t version */
+        (H5Z_filter_t)(H5PY_FILTER_LZF),   /* filter_id */
+        1, 1,                         /* Encoding and decoding enabled */
+        "lzf",	 		  /* comment */
+        NULL,                         /* can_apply_func */
+        NULL,                         /* set_local_func */
+        (H5Z_func_t)(lzf_filter)     /* filter_func */
+    };
+#endif /* if H5_VERSION < "1.7" */
+
+    return H5Zregister(&filter_class);
+}
+
+#define H5PY_LZF_MAX_BUF (100L*1024L*1024L) /* 100MB chunks are outrageous */
+
+static size_t historical_buf_size = 0;
+
+/* The filter function */
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+		    const unsigned cd_values[], size_t nbytes,
+		    size_t *buf_size, void **buf){
+
+    void* outbuf = NULL;
+    size_t outbuf_size = 0;
+    unsigned int status = 0;        /* Return code from lzf routines */
+
+
+    /* If we're compressing */
+    if(!(flags & H5Z_FLAG_REVERSE)){
+
+        /* Allocate an output buffer exactly as long as the input data; if
+           the result is larger, we simply return 0.
+        */
+        outbuf_size = nbytes;
+        outbuf = malloc(outbuf_size);
+
+        status = lzf_compress(*buf, nbytes, outbuf, outbuf_size);
+
+    /* If we're decompressing */
+    } else {
+
+        /* Initialize to our last guess */
+        if(historical_buf_size == 0){
+            historical_buf_size = *buf_size;
+        }
+        outbuf_size = historical_buf_size;
+        
+        while(!status){
+        
+            free(outbuf);
+            outbuf = malloc(outbuf_size);
+
+            status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
+
+            /* compression failed */
+            if(!status){
+
+                /* Output buffer too small */
+                if(errno == E2BIG){
+                    outbuf_size += (*buf_size);
+                    if(outbuf_size > H5PY_LZF_MAX_BUF){
+                        fprintf(stderr, "Can't allocate buffer for LZF decompression");
+                        goto failed;
+                    }
+                    historical_buf_size = outbuf_size;
+
+                /* Horrible internal error */
+                } else if(errno == EINVAL) {
+                    fprintf(stderr, "LZF decompression error");
+                    goto failed;
+
+                /* Unknown error */
+                } else {
+                    fprintf(stderr, "Unspecified LZF error %d", errno);
+                    goto failed;
+                }
+
+            } /* if !status */
+
+        } /* while !status */
+
+    } /* if decompressing */
+    
+
+    /* If compression/decompression successful, swap buffers */
+    if(status){
+
+        free(*buf);
+        *buf = outbuf;
+        *buf_size = outbuf_size;
+
+        return status;  /* Size of compressed/decompressed data */
+    } 
+
+    failed:
+        /* Could put a Python exception call here */
+        free(outbuf);
+        return 0;
+
+} /* End filter function */
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/h5py/lzf_filter.h b/h5py/lzf_filter.h
new file mode 100644
index 0000000..9e8951c
--- /dev/null
+++ b/h5py/lzf_filter.h
@@ -0,0 +1,26 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+/*
+    Filter code is chosen in an ad-hoc manner to avoid conflict
+    with PyTables LZO/BZIP2 implementation.
+*/
+
+#ifndef H5PY_LZF_FILTER
+#define H5PY_LZF_FILTER
+
+#define H5PY_FILTER_LZF 315
+
+int register_lzf(void);
+
+#endif
+
diff --git a/setup.py b/setup.py
index 314b860..2f626ef 100644
--- a/setup.py
+++ b/setup.py
@@ -48,7 +48,7 @@ from distutils.cmd import Command
 
 # Basic package options
 NAME = 'h5py'               # Software title
-VERSION = '1.0.0'
+VERSION = '1.1.0'
 MIN_NUMPY = '1.0.3'
 MIN_CYTHON = '0.9.8.1.1'
 SRC_PATH = 'h5py'           # Name of directory with .pyx files
@@ -61,6 +61,7 @@ MODULES = {16:  ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5z',
                  'h5i', 'h5r', 'h5fd', 'utils'],
            18:  ['h5', 'h5f', 'h5g', 'h5s', 'h5t', 'h5d', 'h5a', 'h5p', 'h5z',
                  'h5i', 'h5r', 'h5fd', 'utils', 'h5o', 'h5l']}
+EXTRA_SRC = {'h5': ["lzf_filter.c", "lzf/lzf_c.c", "lzf/lzf_d.c"]}
 
 def version_check(vers, required):
     """ Compare versions between two "."-separated strings. """
@@ -151,11 +152,13 @@ class ExtensionCreator(object):
             self.extra_link_args = []
 
     
-    def create_extension(self, name, extra_src=[]):
+    def create_extension(self, name, extra_src=None):
         """ Create a distutils Extension object for the given module.  A list
             of C source files to be included in the compilation can also be
             provided.
         """
+        if extra_src is None:
+            extra_src = []
         sources = [op.join(SRC_PATH, name+'.c')]+[op.join(SRC_PATH,x) for x in extra_src]
         return Extension(NAME+'.'+name,
                             sources, 
@@ -230,7 +233,7 @@ class cybuild(build):
 
         modules = MODULES[self.api]
         creator = ExtensionCreator(self.hdf5)
-        extensions = [creator.create_extension(x) for x in modules]
+        extensions = [creator.create_extension(x, EXTRA_SRC.get(x, None)) for x in modules]
 
         self.distribution.ext_modules = extensions
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/h5py.git



More information about the debian-science-commits mailing list